]> Git Repo - linux.git/blob - drivers/accel/habanalabs/gaudi/gaudi.c
Merge tag 'mm-stable-2023-02-20-13-37' of git://git.kernel.org/pub/scm/linux/kernel...
[linux.git] / drivers / accel / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0xEE6b27FF /* 8 seconds */
99
100 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
101
102 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
103
104 #define MONITOR_SOB_STRING_SIZE         256
105
106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107         GAUDI_QUEUE_ID_DMA_0_0,
108         GAUDI_QUEUE_ID_DMA_0_1,
109         GAUDI_QUEUE_ID_DMA_0_2,
110         GAUDI_QUEUE_ID_DMA_0_3,
111         GAUDI_QUEUE_ID_DMA_1_0,
112         GAUDI_QUEUE_ID_DMA_1_1,
113         GAUDI_QUEUE_ID_DMA_1_2,
114         GAUDI_QUEUE_ID_DMA_1_3
115 };
116
117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
118                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
119                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
120                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
121                 "gaudi cpu eq"
122 };
123
124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
125         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
126         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
127         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
128         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
129         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
130         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
131         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
132         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
133 };
134
135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
136         [0] = GAUDI_QUEUE_ID_DMA_0_0,
137         [1] = GAUDI_QUEUE_ID_DMA_0_1,
138         [2] = GAUDI_QUEUE_ID_DMA_0_2,
139         [3] = GAUDI_QUEUE_ID_DMA_0_3,
140         [4] = GAUDI_QUEUE_ID_DMA_1_0,
141         [5] = GAUDI_QUEUE_ID_DMA_1_1,
142         [6] = GAUDI_QUEUE_ID_DMA_1_2,
143         [7] = GAUDI_QUEUE_ID_DMA_1_3,
144 };
145
146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
147         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
148         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
149         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
150         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
151         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
152         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
153         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
154         [PACKET_FENCE]          = sizeof(struct packet_fence),
155         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
156         [PACKET_NOP]            = sizeof(struct packet_nop),
157         [PACKET_STOP]           = sizeof(struct packet_stop),
158         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
159         [PACKET_WAIT]           = sizeof(struct packet_wait),
160         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
161 };
162
163 static inline bool validate_packet_id(enum packet_id id)
164 {
165         switch (id) {
166         case PACKET_WREG_32:
167         case PACKET_WREG_BULK:
168         case PACKET_MSG_LONG:
169         case PACKET_MSG_SHORT:
170         case PACKET_CP_DMA:
171         case PACKET_REPEAT:
172         case PACKET_MSG_PROT:
173         case PACKET_FENCE:
174         case PACKET_LIN_DMA:
175         case PACKET_NOP:
176         case PACKET_STOP:
177         case PACKET_ARB_POINT:
178         case PACKET_WAIT:
179         case PACKET_LOAD_AND_EXE:
180                 return true;
181         default:
182                 return false;
183         }
184 }
185
186 static const char * const
187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
188         "tpc_address_exceed_slm",
189         "tpc_div_by_0",
190         "tpc_spu_mac_overflow",
191         "tpc_spu_addsub_overflow",
192         "tpc_spu_abs_overflow",
193         "tpc_spu_fp_dst_nan_inf",
194         "tpc_spu_fp_dst_denorm",
195         "tpc_vpu_mac_overflow",
196         "tpc_vpu_addsub_overflow",
197         "tpc_vpu_abs_overflow",
198         "tpc_vpu_fp_dst_nan_inf",
199         "tpc_vpu_fp_dst_denorm",
200         "tpc_assertions",
201         "tpc_illegal_instruction",
202         "tpc_pc_wrap_around",
203         "tpc_qm_sw_err",
204         "tpc_hbw_rresp_err",
205         "tpc_hbw_bresp_err",
206         "tpc_lbw_rresp_err",
207         "tpc_lbw_bresp_err"
208 };
209
210 static const char * const
211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
212         "PQ AXI HBW error",
213         "CQ AXI HBW error",
214         "CP AXI HBW error",
215         "CP error due to undefined OPCODE",
216         "CP encountered STOP OPCODE",
217         "CP AXI LBW error",
218         "CP WRREG32 or WRBULK returned error",
219         "N/A",
220         "FENCE 0 inc over max value and clipped",
221         "FENCE 1 inc over max value and clipped",
222         "FENCE 2 inc over max value and clipped",
223         "FENCE 3 inc over max value and clipped",
224         "FENCE 0 dec under min value and clipped",
225         "FENCE 1 dec under min value and clipped",
226         "FENCE 2 dec under min value and clipped",
227         "FENCE 3 dec under min value and clipped"
228 };
229
230 static const char * const
231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
232         "Choice push while full error",
233         "Choice Q watchdog error",
234         "MSG AXI LBW returned with error"
235 };
236
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385         { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
386         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396
397 static s64 gaudi_state_dump_specs_props[] = {
398         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401         [SP_MON_OBJ_WR_ADDR_LOW] =
402                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403         [SP_MON_OBJ_WR_ADDR_HIGH] =
404                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425         [SP_FENCE0_CNT_OFFSET] =
426                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427         [SP_FENCE0_RDATA_OFFSET] =
428                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430         [SP_NUM_CORES] = 1,
431 };
432
433 static const int gaudi_queue_id_to_engine_id[] = {
434         [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
435         [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
436         [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
437         [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
438         [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
439         [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
440         [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
441         [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
442         [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
443         [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
444         [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
445         [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
446         [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
447         [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
448         [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
449         [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
450         [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
451         [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
452         [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
453         [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
454         [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
455         [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
456         [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
457         [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
458         [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
459         [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
460         [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
461         [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
462         [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
463 };
464
465 /* The order here is opposite to the order of the indexing in the h/w.
466  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
467  */
468 static const char * const gaudi_sync_manager_names[] = {
469         "SYNC_MGR_E_N",
470         "SYNC_MGR_W_N",
471         "SYNC_MGR_E_S",
472         "SYNC_MGR_W_S",
473         NULL
474 };
475
476 struct ecc_info_extract_params {
477         u64 block_address;
478         u32 num_memories;
479         bool derr;
480 };
481
482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
483                                                                 u64 phys_addr);
484 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
485                                         struct hl_cs_job *job);
486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
487                                         u32 size, u64 val);
488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
489                                         u32 num_regs, u32 val);
490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
491                                 u32 tpc_id);
492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
493 static int gaudi_cpucp_info_get(struct hl_device *hdev);
494 static void gaudi_disable_clock_gating(struct hl_device *hdev);
495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
497                                 u32 size, bool eb);
498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
499                                 struct hl_gen_wait_properties *prop);
500 static inline enum hl_collective_mode
501 get_collective_mode(struct hl_device *hdev, u32 queue_id)
502 {
503         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
504                 return HL_COLLECTIVE_MASTER;
505
506         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
507                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
508                 return HL_COLLECTIVE_SLAVE;
509
510         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
511                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
512                 return HL_COLLECTIVE_SLAVE;
513
514         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
515                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
516                 return HL_COLLECTIVE_SLAVE;
517
518         return HL_COLLECTIVE_NOT_SUPPORTED;
519 }
520
521 static inline void set_default_power_values(struct hl_device *hdev)
522 {
523         struct asic_fixed_properties *prop = &hdev->asic_prop;
524
525         if (hdev->card_type == cpucp_card_type_pmc) {
526                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
527
528                 if (prop->fw_security_enabled)
529                         prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
530                 else
531                         prop->dc_power_default = DC_POWER_DEFAULT_PMC;
532         } else {
533                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
534                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
535         }
536 }
537
538 static int gaudi_set_fixed_properties(struct hl_device *hdev)
539 {
540         struct asic_fixed_properties *prop = &hdev->asic_prop;
541         u32 num_sync_stream_queues = 0;
542         int i;
543
544         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
545         prop->hw_queues_props = kcalloc(prop->max_queues,
546                         sizeof(struct hw_queue_properties),
547                         GFP_KERNEL);
548
549         if (!prop->hw_queues_props)
550                 return -ENOMEM;
551
552         for (i = 0 ; i < prop->max_queues ; i++) {
553                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
554                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
555                         prop->hw_queues_props[i].driver_only = 0;
556                         prop->hw_queues_props[i].supports_sync_stream = 1;
557                         prop->hw_queues_props[i].cb_alloc_flags =
558                                 CB_ALLOC_KERNEL;
559                         num_sync_stream_queues++;
560                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
561                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
562                         prop->hw_queues_props[i].driver_only = 1;
563                         prop->hw_queues_props[i].supports_sync_stream = 0;
564                         prop->hw_queues_props[i].cb_alloc_flags =
565                                 CB_ALLOC_KERNEL;
566                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
567                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
568                         prop->hw_queues_props[i].driver_only = 0;
569                         prop->hw_queues_props[i].supports_sync_stream = 0;
570                         prop->hw_queues_props[i].cb_alloc_flags =
571                                 CB_ALLOC_USER;
572
573                 }
574                 prop->hw_queues_props[i].collective_mode =
575                                                 get_collective_mode(hdev, i);
576         }
577
578         prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
579         prop->cfg_base_address = CFG_BASE;
580         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
581         prop->host_base_address = HOST_PHYS_BASE;
582         prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
583         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
584         prop->completion_mode = HL_COMPLETION_MODE_JOB;
585         prop->collective_first_sob = 0;
586         prop->collective_first_mon = 0;
587
588         /* 2 SOBs per internal queue stream are reserved for collective */
589         prop->sync_stream_first_sob =
590                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
591                         * QMAN_STREAMS * HL_RSVD_SOBS;
592
593         /* 1 monitor per internal queue stream are reserved for collective
594          * 2 monitors per external queue stream are reserved for collective
595          */
596         prop->sync_stream_first_mon =
597                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
598                         (NUMBER_OF_EXT_HW_QUEUES * 2);
599
600         prop->dram_base_address = DRAM_PHYS_BASE;
601         prop->dram_size = GAUDI_HBM_SIZE_32GB;
602         prop->dram_end_address = prop->dram_base_address + prop->dram_size;
603         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
604
605         prop->sram_base_address = SRAM_BASE_ADDR;
606         prop->sram_size = SRAM_SIZE;
607         prop->sram_end_address = prop->sram_base_address + prop->sram_size;
608         prop->sram_user_base_address =
609                         prop->sram_base_address + SRAM_USER_BASE_OFFSET;
610
611         prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
612         prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
613
614         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
615         if (hdev->pldm)
616                 prop->mmu_pgt_size = 0x800000; /* 8MB */
617         else
618                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
619         prop->mmu_pte_size = HL_PTE_SIZE;
620         prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
621         prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
622         prop->dram_page_size = PAGE_SIZE_2MB;
623         prop->device_mem_alloc_default_page_size = prop->dram_page_size;
624         prop->dram_supports_virtual_memory = false;
625
626         prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
627         prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
628         prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
629         prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
630         prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
631         prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
632         prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
633         prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
634         prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
635         prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
636         prop->pmmu.start_addr = VA_HOST_SPACE_START;
637         prop->pmmu.end_addr =
638                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
639         prop->pmmu.page_size = PAGE_SIZE_4KB;
640         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
641         prop->pmmu.last_mask = LAST_MASK;
642         /* TODO: will be duplicated until implementing per-MMU props */
643         prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
644         prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
645
646         /* PMMU and HPMMU are the same except of page size */
647         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
648         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
649
650         /* shifts and masks are the same in PMMU and DMMU */
651         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
652         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
653         prop->dmmu.end_addr = VA_HOST_SPACE_END;
654         prop->dmmu.page_size = PAGE_SIZE_2MB;
655
656         prop->cfg_size = CFG_SIZE;
657         prop->max_asid = MAX_ASID;
658         prop->num_of_events = GAUDI_EVENT_SIZE;
659         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
660
661         set_default_power_values(hdev);
662
663         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
664         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
665
666         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
667         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
668
669         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
670                                         CARD_NAME_MAX_LEN);
671
672         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
673
674         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
675                         prop->sync_stream_first_sob +
676                         (num_sync_stream_queues * HL_RSVD_SOBS);
677         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
678                         prop->sync_stream_first_mon +
679                         (num_sync_stream_queues * HL_RSVD_MONS);
680
681         prop->first_available_user_interrupt = USHRT_MAX;
682
683         for (i = 0 ; i < HL_MAX_DCORES ; i++)
684                 prop->first_available_cq[i] = USHRT_MAX;
685
686         prop->fw_cpu_boot_dev_sts0_valid = false;
687         prop->fw_cpu_boot_dev_sts1_valid = false;
688         prop->hard_reset_done_by_fw = false;
689         prop->gic_interrupts_enable = true;
690
691         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
692
693         prop->clk_pll_index = HL_GAUDI_MME_PLL;
694         prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
695
696         prop->use_get_power_for_reset_history = true;
697
698         prop->configurable_stop_on_err = true;
699
700         prop->set_max_power_on_device_init = true;
701
702         prop->dma_mask = 48;
703
704         prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
705
706         return 0;
707 }
708
709 static int gaudi_pci_bars_map(struct hl_device *hdev)
710 {
711         static const char * const name[] = {"SRAM", "CFG", "HBM"};
712         bool is_wc[3] = {false, false, true};
713         int rc;
714
715         rc = hl_pci_bars_map(hdev, name, is_wc);
716         if (rc)
717                 return rc;
718
719         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
720                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
721
722         return 0;
723 }
724
725 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
726 {
727         struct gaudi_device *gaudi = hdev->asic_specific;
728         struct hl_inbound_pci_region pci_region;
729         u64 old_addr = addr;
730         int rc;
731
732         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
733                 return old_addr;
734
735         if (hdev->asic_prop.iatu_done_by_fw)
736                 return U64_MAX;
737
738         /* Inbound Region 2 - Bar 4 - Point to HBM */
739         pci_region.mode = PCI_BAR_MATCH_MODE;
740         pci_region.bar = HBM_BAR_ID;
741         pci_region.addr = addr;
742         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
743         if (rc)
744                 return U64_MAX;
745
746         if (gaudi) {
747                 old_addr = gaudi->hbm_bar_cur_addr;
748                 gaudi->hbm_bar_cur_addr = addr;
749         }
750
751         return old_addr;
752 }
753
754 static int gaudi_init_iatu(struct hl_device *hdev)
755 {
756         struct hl_inbound_pci_region inbound_region;
757         struct hl_outbound_pci_region outbound_region;
758         int rc;
759
760         if (hdev->asic_prop.iatu_done_by_fw)
761                 return 0;
762
763         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
764         inbound_region.mode = PCI_BAR_MATCH_MODE;
765         inbound_region.bar = SRAM_BAR_ID;
766         inbound_region.addr = SRAM_BASE_ADDR;
767         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
768         if (rc)
769                 goto done;
770
771         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
772         inbound_region.mode = PCI_BAR_MATCH_MODE;
773         inbound_region.bar = CFG_BAR_ID;
774         inbound_region.addr = SPI_FLASH_BASE_ADDR;
775         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
776         if (rc)
777                 goto done;
778
779         /* Inbound Region 2 - Bar 4 - Point to HBM */
780         inbound_region.mode = PCI_BAR_MATCH_MODE;
781         inbound_region.bar = HBM_BAR_ID;
782         inbound_region.addr = DRAM_PHYS_BASE;
783         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
784         if (rc)
785                 goto done;
786
787         /* Outbound Region 0 - Point to Host */
788         outbound_region.addr = HOST_PHYS_BASE;
789         outbound_region.size = HOST_PHYS_SIZE;
790         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
791
792 done:
793         return rc;
794 }
795
796 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
797 {
798         return RREG32(mmHW_STATE);
799 }
800
801 static int gaudi_early_init(struct hl_device *hdev)
802 {
803         struct asic_fixed_properties *prop = &hdev->asic_prop;
804         struct pci_dev *pdev = hdev->pdev;
805         resource_size_t pci_bar_size;
806         u32 fw_boot_status;
807         int rc;
808
809         rc = gaudi_set_fixed_properties(hdev);
810         if (rc) {
811                 dev_err(hdev->dev, "Failed setting fixed properties\n");
812                 return rc;
813         }
814
815         /* Check BAR sizes */
816         pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
817
818         if (pci_bar_size != SRAM_BAR_SIZE) {
819                 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
820                         SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
821                 rc = -ENODEV;
822                 goto free_queue_props;
823         }
824
825         pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
826
827         if (pci_bar_size != CFG_BAR_SIZE) {
828                 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
829                         CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
830                 rc = -ENODEV;
831                 goto free_queue_props;
832         }
833
834         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
835         hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
836
837         /* If FW security is enabled at this point it means no access to ELBI */
838         if (hdev->asic_prop.fw_security_enabled) {
839                 hdev->asic_prop.iatu_done_by_fw = true;
840
841                 /*
842                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
843                  * decision can only be taken based on PCI ID security.
844                  */
845                 hdev->asic_prop.gic_interrupts_enable = false;
846                 goto pci_init;
847         }
848
849         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
850                                 &fw_boot_status);
851         if (rc)
852                 goto free_queue_props;
853
854         /* Check whether FW is configuring iATU */
855         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
856                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
857                 hdev->asic_prop.iatu_done_by_fw = true;
858
859 pci_init:
860         rc = hl_pci_init(hdev);
861         if (rc)
862                 goto free_queue_props;
863
864         /* Before continuing in the initialization, we need to read the preboot
865          * version to determine whether we run with a security-enabled firmware
866          */
867         rc = hl_fw_read_preboot_status(hdev);
868         if (rc) {
869                 if (hdev->reset_on_preboot_fail)
870                         hdev->asic_funcs->hw_fini(hdev, true, false);
871                 goto pci_fini;
872         }
873
874         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
875                 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
876                 hdev->asic_funcs->hw_fini(hdev, true, false);
877         }
878
879         return 0;
880
881 pci_fini:
882         hl_pci_fini(hdev);
883 free_queue_props:
884         kfree(hdev->asic_prop.hw_queues_props);
885         return rc;
886 }
887
888 static int gaudi_early_fini(struct hl_device *hdev)
889 {
890         kfree(hdev->asic_prop.hw_queues_props);
891         hl_pci_fini(hdev);
892
893         return 0;
894 }
895
896 /**
897  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
898  *
899  * @hdev: pointer to hl_device structure
900  *
901  */
902 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
903 {
904         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
905         struct asic_fixed_properties *prop = &hdev->asic_prop;
906         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
907         int rc;
908
909         if ((hdev->fw_components & FW_TYPE_LINUX) &&
910                         (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
911                 struct gaudi_device *gaudi = hdev->asic_specific;
912
913                 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
914                         return 0;
915
916                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
917
918                 if (rc)
919                         return rc;
920
921                 freq = pll_freq_arr[2];
922         } else {
923                 /* Backward compatibility */
924                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
925                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
926                 nr = RREG32(mmPSOC_CPU_PLL_NR);
927                 nf = RREG32(mmPSOC_CPU_PLL_NF);
928                 od = RREG32(mmPSOC_CPU_PLL_OD);
929
930                 if (div_sel == DIV_SEL_REF_CLK ||
931                                 div_sel == DIV_SEL_DIVIDED_REF) {
932                         if (div_sel == DIV_SEL_REF_CLK)
933                                 freq = PLL_REF_CLK;
934                         else
935                                 freq = PLL_REF_CLK / (div_fctr + 1);
936                 } else if (div_sel == DIV_SEL_PLL_CLK ||
937                         div_sel == DIV_SEL_DIVIDED_PLL) {
938                         pll_clk = PLL_REF_CLK * (nf + 1) /
939                                         ((nr + 1) * (od + 1));
940                         if (div_sel == DIV_SEL_PLL_CLK)
941                                 freq = pll_clk;
942                         else
943                                 freq = pll_clk / (div_fctr + 1);
944                 } else {
945                         dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
946                         freq = 0;
947                 }
948         }
949
950         prop->psoc_timestamp_frequency = freq;
951         prop->psoc_pci_pll_nr = nr;
952         prop->psoc_pci_pll_nf = nf;
953         prop->psoc_pci_pll_od = od;
954         prop->psoc_pci_pll_div_factor = div_fctr;
955
956         return 0;
957 }
958
959 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
960                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
961 {
962         struct asic_fixed_properties *prop = &hdev->asic_prop;
963         struct packet_lin_dma *init_tpc_mem_pkt;
964         struct hl_cs_job *job;
965         struct hl_cb *cb;
966         u64 dst_addr;
967         u32 cb_size, ctl;
968         u8 tpc_id;
969         int rc;
970
971         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
972         if (!cb)
973                 return -EFAULT;
974
975         init_tpc_mem_pkt = cb->kernel_address;
976         cb_size = sizeof(*init_tpc_mem_pkt);
977         memset(init_tpc_mem_pkt, 0, cb_size);
978
979         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
980
981         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
982         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
983         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
984         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
985
986         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
987
988         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
989
990         /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
991         dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
992                                 round_up(prop->sram_user_base_address, SZ_8K));
993         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
994
995         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
996         if (!job) {
997                 dev_err(hdev->dev, "Failed to allocate a new job\n");
998                 rc = -ENOMEM;
999                 goto release_cb;
1000         }
1001
1002         job->id = 0;
1003         job->user_cb = cb;
1004         atomic_inc(&job->user_cb->cs_cnt);
1005         job->user_cb_size = cb_size;
1006         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1007         job->patched_cb = job->user_cb;
1008         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1009
1010         hl_debugfs_add_job(hdev, job);
1011
1012         rc = gaudi_send_job_on_qman0(hdev, job);
1013
1014         if (rc)
1015                 goto free_job;
1016
1017         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1018                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1019                 if (rc)
1020                         break;
1021         }
1022
1023 free_job:
1024         hl_userptr_delete_list(hdev, &job->userptr_list);
1025         hl_debugfs_remove_job(hdev, job);
1026         kfree(job);
1027         atomic_dec(&cb->cs_cnt);
1028
1029 release_cb:
1030         hl_cb_put(cb);
1031         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1032
1033         return rc;
1034 }
1035
1036 /*
1037  * gaudi_init_tpc_mem() - Initialize TPC memories.
1038  * @hdev: Pointer to hl_device structure.
1039  *
1040  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1041  *
1042  * Return: 0 for success, negative value for error.
1043  */
1044 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1045 {
1046         const struct firmware *fw;
1047         size_t fw_size;
1048         void *cpu_addr;
1049         dma_addr_t dma_handle;
1050         int rc, count = 5;
1051
1052 again:
1053         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1054         if (rc == -EINTR && count-- > 0) {
1055                 msleep(50);
1056                 goto again;
1057         }
1058
1059         if (rc) {
1060                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1061                                 GAUDI_TPC_FW_FILE);
1062                 goto out;
1063         }
1064
1065         fw_size = fw->size;
1066         cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1067         if (!cpu_addr) {
1068                 dev_err(hdev->dev,
1069                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1070                         fw_size);
1071                 rc = -ENOMEM;
1072                 goto out;
1073         }
1074
1075         memcpy(cpu_addr, fw->data, fw_size);
1076
1077         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1078
1079         hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1080
1081 out:
1082         release_firmware(fw);
1083         return rc;
1084 }
1085
1086 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1087 {
1088         struct gaudi_device *gaudi = hdev->asic_specific;
1089         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1090         struct hl_hw_queue *q;
1091         u32 i, sob_id, sob_group_id, queue_id;
1092
1093         /* Iterate through SOB groups and assign a SOB for each slave queue */
1094         sob_group_id =
1095                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1096         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1097
1098         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1099         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1100                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1101                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1102         }
1103
1104         /* Both DMA5 and TPC7 use the same resources since only a single
1105          * engine need to participate in the reduction process
1106          */
1107         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1108         q = &hdev->kernel_queues[queue_id];
1109         q->sync_stream_prop.collective_sob_id =
1110                         sob_id + NIC_NUMBER_OF_ENGINES;
1111
1112         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1113         q = &hdev->kernel_queues[queue_id];
1114         q->sync_stream_prop.collective_sob_id =
1115                         sob_id + NIC_NUMBER_OF_ENGINES;
1116 }
1117
1118 static void gaudi_sob_group_hw_reset(struct kref *ref)
1119 {
1120         struct gaudi_hw_sob_group *hw_sob_group =
1121                 container_of(ref, struct gaudi_hw_sob_group, kref);
1122         struct hl_device *hdev = hw_sob_group->hdev;
1123         int i;
1124
1125         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1126                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1127                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1128
1129         kref_init(&hw_sob_group->kref);
1130 }
1131
1132 static void gaudi_sob_group_reset_error(struct kref *ref)
1133 {
1134         struct gaudi_hw_sob_group *hw_sob_group =
1135                 container_of(ref, struct gaudi_hw_sob_group, kref);
1136         struct hl_device *hdev = hw_sob_group->hdev;
1137
1138         dev_crit(hdev->dev,
1139                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1140                 hw_sob_group->base_sob_id);
1141 }
1142
1143 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1144 {
1145         struct gaudi_collective_properties *prop;
1146         int i;
1147
1148         prop = &gaudi->collective_props;
1149
1150         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1151
1152         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1153                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1154                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1155                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1156         /* Set collective engine bit */
1157         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1158                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1159 }
1160
1161 static int gaudi_collective_init(struct hl_device *hdev)
1162 {
1163         u32 i, sob_id, reserved_sobs_per_group;
1164         struct gaudi_collective_properties *prop;
1165         struct gaudi_device *gaudi;
1166
1167         gaudi = hdev->asic_specific;
1168         prop = &gaudi->collective_props;
1169         sob_id = hdev->asic_prop.collective_first_sob;
1170
1171         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1172         reserved_sobs_per_group =
1173                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1174
1175         /* Init SOB groups */
1176         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1177                 prop->hw_sob_group[i].hdev = hdev;
1178                 prop->hw_sob_group[i].base_sob_id = sob_id;
1179                 sob_id += reserved_sobs_per_group;
1180                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1181         }
1182
1183         for (i = 0 ; i < QMAN_STREAMS; i++) {
1184                 prop->next_sob_group_val[i] = 1;
1185                 prop->curr_sob_group_idx[i] = 0;
1186                 gaudi_collective_map_sobs(hdev, i);
1187         }
1188
1189         gaudi_collective_mstr_sob_mask_set(gaudi);
1190
1191         return 0;
1192 }
1193
1194 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1195 {
1196         struct gaudi_device *gaudi = hdev->asic_specific;
1197         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1198
1199         kref_put(&cprop->hw_sob_group[sob_group].kref,
1200                                         gaudi_sob_group_hw_reset);
1201 }
1202
1203 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1204                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1205 {
1206         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1207         struct gaudi_collective_properties *cprop;
1208         struct hl_gen_wait_properties wait_prop;
1209         struct hl_sync_stream_properties *prop;
1210         struct gaudi_device *gaudi;
1211
1212         gaudi = hdev->asic_specific;
1213         cprop = &gaudi->collective_props;
1214         queue_id = job->hw_queue_id;
1215         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1216
1217         master_sob_base =
1218                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1219         master_monitor = prop->collective_mstr_mon_id[0];
1220
1221         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1222
1223         dev_dbg(hdev->dev,
1224                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1225                 master_sob_base, cprop->mstr_sob_mask[0],
1226                 cprop->next_sob_group_val[stream],
1227                 master_monitor, queue_id);
1228
1229         wait_prop.data = (void *) job->patched_cb;
1230         wait_prop.sob_base = master_sob_base;
1231         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1232         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1233         wait_prop.mon_id = master_monitor;
1234         wait_prop.q_idx = queue_id;
1235         wait_prop.size = cb_size;
1236         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1237
1238         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1239         master_monitor = prop->collective_mstr_mon_id[1];
1240
1241         dev_dbg(hdev->dev,
1242                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1243                 master_sob_base, cprop->mstr_sob_mask[1],
1244                 cprop->next_sob_group_val[stream],
1245                 master_monitor, queue_id);
1246
1247         wait_prop.sob_base = master_sob_base;
1248         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1249         wait_prop.mon_id = master_monitor;
1250         wait_prop.size = cb_size;
1251         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1252 }
1253
1254 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1255                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1256 {
1257         struct hl_gen_wait_properties wait_prop;
1258         struct hl_sync_stream_properties *prop;
1259         u32 queue_id, cb_size = 0;
1260
1261         queue_id = job->hw_queue_id;
1262         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1263
1264         if (job->cs->encaps_signals) {
1265                 /* use the encaps signal handle store earlier in the flow
1266                  * and set the SOB information from the encaps
1267                  * signals handle
1268                  */
1269                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1270                                                 cs_cmpl);
1271
1272                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1273                                 job->cs->sequence,
1274                                 cs_cmpl->hw_sob->sob_id,
1275                                 cs_cmpl->sob_val);
1276         }
1277
1278         /* Add to wait CBs using slave monitor */
1279         wait_prop.data = (void *) job->user_cb;
1280         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1281         wait_prop.sob_mask = 0x1;
1282         wait_prop.sob_val = cs_cmpl->sob_val;
1283         wait_prop.mon_id = prop->collective_slave_mon_id;
1284         wait_prop.q_idx = queue_id;
1285         wait_prop.size = cb_size;
1286
1287         dev_dbg(hdev->dev,
1288                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1289                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1290                 prop->collective_slave_mon_id, queue_id);
1291
1292         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1293
1294         dev_dbg(hdev->dev,
1295                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1296                 prop->collective_sob_id, queue_id);
1297
1298         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1299                         prop->collective_sob_id, cb_size, false);
1300 }
1301
1302 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1303 {
1304         struct hl_cs_compl *signal_cs_cmpl =
1305                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1306         struct hl_cs_compl *cs_cmpl =
1307                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1308         struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1309         struct gaudi_collective_properties *cprop;
1310         u32 stream, queue_id, sob_group_offset;
1311         struct gaudi_device *gaudi;
1312         struct hl_device *hdev;
1313         struct hl_cs_job *job;
1314         struct hl_ctx *ctx;
1315
1316         ctx = cs->ctx;
1317         hdev = ctx->hdev;
1318         gaudi = hdev->asic_specific;
1319         cprop = &gaudi->collective_props;
1320
1321         if (cs->encaps_signals) {
1322                 cs_cmpl->hw_sob = handle->hw_sob;
1323                 /* at this checkpoint we only need the hw_sob pointer
1324                  * for the completion check before start going over the jobs
1325                  * of the master/slaves, the sob_value will be taken later on
1326                  * in gaudi_collective_slave_init_job depends on each
1327                  * job wait offset value.
1328                  */
1329                 cs_cmpl->sob_val = 0;
1330         } else {
1331                 /* copy the SOB id and value of the signal CS */
1332                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1333                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1334         }
1335
1336         /* check again if the signal cs already completed.
1337          * if yes then don't send any wait cs since the hw_sob
1338          * could be in reset already. if signal is not completed
1339          * then get refcount to hw_sob to prevent resetting the sob
1340          * while wait cs is not submitted.
1341          * note that this check is protected by two locks,
1342          * hw queue lock and completion object lock,
1343          * and the same completion object lock also protects
1344          * the hw_sob reset handler function.
1345          * The hw_queue lock prevent out of sync of hw_sob
1346          * refcount value, changed by signal/wait flows.
1347          */
1348         spin_lock(&signal_cs_cmpl->lock);
1349
1350         if (completion_done(&cs->signal_fence->completion)) {
1351                 spin_unlock(&signal_cs_cmpl->lock);
1352                 return -EINVAL;
1353         }
1354         /* Increment kref since all slave queues are now waiting on it */
1355         kref_get(&cs_cmpl->hw_sob->kref);
1356
1357         spin_unlock(&signal_cs_cmpl->lock);
1358
1359         /* Calculate the stream from collective master queue (1st job) */
1360         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1361         stream = job->hw_queue_id % 4;
1362         sob_group_offset =
1363                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1364
1365         list_for_each_entry(job, &cs->job_list, cs_node) {
1366                 queue_id = job->hw_queue_id;
1367
1368                 if (hdev->kernel_queues[queue_id].collective_mode ==
1369                                 HL_COLLECTIVE_MASTER)
1370                         gaudi_collective_master_init_job(hdev, job, stream,
1371                                                 sob_group_offset);
1372                 else
1373                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1374         }
1375
1376         cs_cmpl->sob_group = sob_group_offset;
1377
1378         /* Handle sob group kref and wraparound */
1379         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1380         cprop->next_sob_group_val[stream]++;
1381
1382         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1383                 /*
1384                  * Decrement as we reached the max value.
1385                  * The release function won't be called here as we've
1386                  * just incremented the refcount.
1387                  */
1388                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1389                                 gaudi_sob_group_reset_error);
1390                 cprop->next_sob_group_val[stream] = 1;
1391                 /* only two SOBs are currently in use */
1392                 cprop->curr_sob_group_idx[stream] =
1393                         (cprop->curr_sob_group_idx[stream] + 1) &
1394                                                         (HL_RSVD_SOBS - 1);
1395
1396                 gaudi_collective_map_sobs(hdev, stream);
1397
1398                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1399                                 cprop->curr_sob_group_idx[stream], stream);
1400         }
1401
1402         mb();
1403         hl_fence_put(cs->signal_fence);
1404         cs->signal_fence = NULL;
1405
1406         return 0;
1407 }
1408
1409 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1410 {
1411         u32 cacheline_end, additional_commands;
1412
1413         cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1414         additional_commands = sizeof(struct packet_msg_prot) * 2;
1415
1416         if (user_cb_size + additional_commands > cacheline_end)
1417                 return cacheline_end - user_cb_size + additional_commands;
1418         else
1419                 return additional_commands;
1420 }
1421
1422 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1423                 struct hl_ctx *ctx, struct hl_cs *cs,
1424                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1425                 u32 encaps_signal_offset)
1426 {
1427         struct hw_queue_properties *hw_queue_prop;
1428         struct hl_cs_counters_atomic *cntr;
1429         struct hl_cs_job *job;
1430         struct hl_cb *cb;
1431         u32 cb_size;
1432         bool patched_cb;
1433
1434         cntr = &hdev->aggregated_cs_counters;
1435
1436         if (mode == HL_COLLECTIVE_MASTER) {
1437                 /* CB size of collective master queue contains
1438                  * 4 msg short packets for monitor 1 configuration
1439                  * 1 fence packet
1440                  * 4 msg short packets for monitor 2 configuration
1441                  * 1 fence packet
1442                  * 2 msg prot packets for completion and MSI
1443                  */
1444                 cb_size = sizeof(struct packet_msg_short) * 8 +
1445                                 sizeof(struct packet_fence) * 2 +
1446                                 sizeof(struct packet_msg_prot) * 2;
1447                 patched_cb = true;
1448         } else {
1449                 /* CB size of collective slave queues contains
1450                  * 4 msg short packets for monitor configuration
1451                  * 1 fence packet
1452                  * 1 additional msg short packet for sob signal
1453                  */
1454                 cb_size = sizeof(struct packet_msg_short) * 5 +
1455                                 sizeof(struct packet_fence);
1456                 patched_cb = false;
1457         }
1458
1459         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1460         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1461         if (!job) {
1462                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1463                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1464                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1465                 return -ENOMEM;
1466         }
1467
1468         /* Allocate internal mapped CB for non patched CBs */
1469         cb = hl_cb_kernel_create(hdev, cb_size,
1470                         hdev->mmu_enable && !patched_cb);
1471         if (!cb) {
1472                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1473                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1474                 kfree(job);
1475                 return -EFAULT;
1476         }
1477
1478         job->id = 0;
1479         job->cs = cs;
1480         job->user_cb = cb;
1481         atomic_inc(&job->user_cb->cs_cnt);
1482         job->user_cb_size = cb_size;
1483         job->hw_queue_id = queue_id;
1484
1485         /* since its guaranteed to have only one chunk in the collective wait
1486          * cs, we can use this chunk to set the encapsulated signal offset
1487          * in the jobs.
1488          */
1489         if (cs->encaps_signals)
1490                 job->encaps_sig_wait_offset = encaps_signal_offset;
1491
1492         /*
1493          * No need in parsing, user CB is the patched CB.
1494          * We call hl_cb_destroy() out of two reasons - we don't need
1495          * the CB in the CB idr anymore and to decrement its refcount as
1496          * it was incremented inside hl_cb_kernel_create().
1497          */
1498         if (patched_cb)
1499                 job->patched_cb = job->user_cb;
1500         else
1501                 job->patched_cb = NULL;
1502
1503         job->job_cb_size = job->user_cb_size;
1504         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1505
1506         /* increment refcount as for external queues we get completion */
1507         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1508                 cs_get(cs);
1509
1510         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1511
1512         list_add_tail(&job->cs_node, &cs->job_list);
1513
1514         hl_debugfs_add_job(hdev, job);
1515
1516         return 0;
1517 }
1518
1519 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1520                 struct hl_ctx *ctx, struct hl_cs *cs,
1521                 u32 wait_queue_id, u32 collective_engine_id,
1522                 u32 encaps_signal_offset)
1523 {
1524         struct gaudi_device *gaudi = hdev->asic_specific;
1525         struct hw_queue_properties *hw_queue_prop;
1526         u32 queue_id, collective_queue, num_jobs;
1527         u32 stream, nic_queue, nic_idx = 0;
1528         bool skip;
1529         int i, rc = 0;
1530
1531         /* Verify wait queue id is configured as master */
1532         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1533         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1534                 dev_err(hdev->dev,
1535                         "Queue %d is not configured as collective master\n",
1536                         wait_queue_id);
1537                 return -EINVAL;
1538         }
1539
1540         /* Verify engine id is supported */
1541         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1542                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1543                 dev_err(hdev->dev,
1544                         "Collective wait does not support engine %u\n",
1545                         collective_engine_id);
1546                 return -EINVAL;
1547         }
1548
1549         stream = wait_queue_id % 4;
1550
1551         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1552                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1553         else
1554                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1555
1556         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1557         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1558
1559         /* First job goes to the collective master queue, it will wait for
1560          * the collective slave queues to finish execution.
1561          * The synchronization is done using two monitors:
1562          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1563          * reduction engine (DMA5/TPC7).
1564          *
1565          * Rest of the jobs goes to the collective slave queues which will
1566          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1567          */
1568         for (i = 0 ; i < num_jobs ; i++) {
1569                 if (i == 0) {
1570                         queue_id = wait_queue_id;
1571                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1572                                 HL_COLLECTIVE_MASTER, queue_id,
1573                                 wait_queue_id, encaps_signal_offset);
1574                 } else {
1575                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1576                                 if (gaudi->hw_cap_initialized &
1577                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1578                                         skip = false;
1579                                 else
1580                                         skip = true;
1581
1582                                 queue_id = nic_queue;
1583                                 nic_queue += 4;
1584                                 nic_idx++;
1585
1586                                 if (skip)
1587                                         continue;
1588                         } else {
1589                                 queue_id = collective_queue;
1590                         }
1591
1592                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1593                                 HL_COLLECTIVE_SLAVE, queue_id,
1594                                 wait_queue_id, encaps_signal_offset);
1595                 }
1596
1597                 if (rc)
1598                         return rc;
1599         }
1600
1601         return rc;
1602 }
1603
1604 static int gaudi_late_init(struct hl_device *hdev)
1605 {
1606         struct gaudi_device *gaudi = hdev->asic_specific;
1607         int rc;
1608
1609         rc = gaudi->cpucp_info_get(hdev);
1610         if (rc) {
1611                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1612                 return rc;
1613         }
1614
1615         if ((hdev->card_type == cpucp_card_type_pci) &&
1616                         (hdev->nic_ports_mask & 0x3)) {
1617                 dev_info(hdev->dev,
1618                         "PCI card detected, only 8 ports are enabled\n");
1619                 hdev->nic_ports_mask &= ~0x3;
1620
1621                 /* Stop and disable unused NIC QMANs */
1622                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1623                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1624                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1625
1626                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1627                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1628                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1629
1630                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1631                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1632
1633                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1634         }
1635
1636         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1637         if (rc) {
1638                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1639                 return rc;
1640         }
1641
1642         /* Scrub both SRAM and DRAM */
1643         rc = hdev->asic_funcs->scrub_device_mem(hdev);
1644         if (rc)
1645                 goto disable_pci_access;
1646
1647         rc = gaudi_fetch_psoc_frequency(hdev);
1648         if (rc) {
1649                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1650                 goto disable_pci_access;
1651         }
1652
1653         rc = gaudi_mmu_clear_pgt_range(hdev);
1654         if (rc) {
1655                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1656                 goto disable_pci_access;
1657         }
1658
1659         rc = gaudi_init_tpc_mem(hdev);
1660         if (rc) {
1661                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1662                 goto disable_pci_access;
1663         }
1664
1665         rc = gaudi_collective_init(hdev);
1666         if (rc) {
1667                 dev_err(hdev->dev, "Failed to init collective\n");
1668                 goto disable_pci_access;
1669         }
1670
1671         /* We only support a single ASID for the user, so for the sake of optimization, just
1672          * initialize the ASID one time during device initialization with the fixed value of 1
1673          */
1674         gaudi_mmu_prepare(hdev, 1);
1675
1676         hl_fw_set_pll_profile(hdev);
1677
1678         return 0;
1679
1680 disable_pci_access:
1681         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1682
1683         return rc;
1684 }
1685
1686 static void gaudi_late_fini(struct hl_device *hdev)
1687 {
1688         hl_hwmon_release_resources(hdev);
1689 }
1690
1691 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1692 {
1693         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1694         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1695         int i, j, rc = 0;
1696
1697         /*
1698          * The device CPU works with 40-bits addresses, while bit 39 must be set
1699          * to '1' when accessing the host.
1700          * Bits 49:39 of the full host address are saved for a later
1701          * configuration of the HW to perform extension to 50 bits.
1702          * Because there is a single HW register that holds the extension bits,
1703          * these bits must be identical in all allocated range.
1704          */
1705
1706         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1707                 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1708                                                                 &dma_addr_arr[i],
1709                                                                 GFP_KERNEL | __GFP_ZERO);
1710                 if (!virt_addr_arr[i]) {
1711                         rc = -ENOMEM;
1712                         goto free_dma_mem_arr;
1713                 }
1714
1715                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1716                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1717                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1718                         break;
1719         }
1720
1721         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1722                 dev_err(hdev->dev,
1723                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1724                 rc = -EFAULT;
1725                 goto free_dma_mem_arr;
1726         }
1727
1728         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1729         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1730         hdev->cpu_pci_msb_addr =
1731                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1732
1733         if (!hdev->asic_prop.fw_security_enabled)
1734                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1735
1736 free_dma_mem_arr:
1737         for (j = 0 ; j < i ; j++)
1738                 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1739                                                 dma_addr_arr[j]);
1740
1741         return rc;
1742 }
1743
1744 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1745 {
1746         struct gaudi_device *gaudi = hdev->asic_specific;
1747         struct gaudi_internal_qman_info *q;
1748         u32 i;
1749
1750         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1751                 q = &gaudi->internal_qmans[i];
1752                 if (!q->pq_kernel_addr)
1753                         continue;
1754                 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1755         }
1756 }
1757
1758 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1759 {
1760         struct gaudi_device *gaudi = hdev->asic_specific;
1761         struct gaudi_internal_qman_info *q;
1762         int rc, i;
1763
1764         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1765                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1766                         continue;
1767
1768                 q = &gaudi->internal_qmans[i];
1769
1770                 switch (i) {
1771                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1772                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1773                         break;
1774                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1775                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1776                         break;
1777                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1778                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1779                         break;
1780                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1781                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1782                         break;
1783                 default:
1784                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1785                         rc = -EINVAL;
1786                         goto free_internal_qmans_pq_mem;
1787                 }
1788
1789                 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1790                                                                 GFP_KERNEL | __GFP_ZERO);
1791                 if (!q->pq_kernel_addr) {
1792                         rc = -ENOMEM;
1793                         goto free_internal_qmans_pq_mem;
1794                 }
1795         }
1796
1797         return 0;
1798
1799 free_internal_qmans_pq_mem:
1800         gaudi_free_internal_qmans_pq_mem(hdev);
1801         return rc;
1802 }
1803
1804 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1805 {
1806         struct asic_fixed_properties *prop = &hdev->asic_prop;
1807         struct pci_mem_region *region;
1808
1809         /* CFG */
1810         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1811         region->region_base = CFG_BASE;
1812         region->region_size = CFG_SIZE;
1813         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1814         region->bar_size = CFG_BAR_SIZE;
1815         region->bar_id = CFG_BAR_ID;
1816         region->used = 1;
1817
1818         /* SRAM */
1819         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1820         region->region_base = SRAM_BASE_ADDR;
1821         region->region_size = SRAM_SIZE;
1822         region->offset_in_bar = 0;
1823         region->bar_size = SRAM_BAR_SIZE;
1824         region->bar_id = SRAM_BAR_ID;
1825         region->used = 1;
1826
1827         /* DRAM */
1828         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1829         region->region_base = DRAM_PHYS_BASE;
1830         region->region_size = hdev->asic_prop.dram_size;
1831         region->offset_in_bar = 0;
1832         region->bar_size = prop->dram_pci_bar_size;
1833         region->bar_id = HBM_BAR_ID;
1834         region->used = 1;
1835
1836         /* SP SRAM */
1837         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1838         region->region_base = PSOC_SCRATCHPAD_ADDR;
1839         region->region_size = PSOC_SCRATCHPAD_SIZE;
1840         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1841         region->bar_size = CFG_BAR_SIZE;
1842         region->bar_id = CFG_BAR_ID;
1843         region->used = 1;
1844 }
1845
1846 static int gaudi_sw_init(struct hl_device *hdev)
1847 {
1848         struct gaudi_device *gaudi;
1849         u32 i, event_id = 0;
1850         int rc;
1851
1852         /* Allocate device structure */
1853         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1854         if (!gaudi)
1855                 return -ENOMEM;
1856
1857         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1858                 if (gaudi_irq_map_table[i].valid) {
1859                         if (event_id == GAUDI_EVENT_SIZE) {
1860                                 dev_err(hdev->dev,
1861                                         "Event array exceeds the limit of %u events\n",
1862                                         GAUDI_EVENT_SIZE);
1863                                 rc = -EINVAL;
1864                                 goto free_gaudi_device;
1865                         }
1866
1867                         gaudi->events[event_id++] =
1868                                         gaudi_irq_map_table[i].fc_id;
1869                 }
1870         }
1871
1872         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1873
1874         hdev->asic_specific = gaudi;
1875
1876         /* Create DMA pool for small allocations */
1877         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1878                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1879         if (!hdev->dma_pool) {
1880                 dev_err(hdev->dev, "failed to create DMA pool\n");
1881                 rc = -ENOMEM;
1882                 goto free_gaudi_device;
1883         }
1884
1885         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1886         if (rc)
1887                 goto free_dma_pool;
1888
1889         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1890         if (!hdev->cpu_accessible_dma_pool) {
1891                 dev_err(hdev->dev,
1892                         "Failed to create CPU accessible DMA pool\n");
1893                 rc = -ENOMEM;
1894                 goto free_cpu_dma_mem;
1895         }
1896
1897         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1898                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1899                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1900         if (rc) {
1901                 dev_err(hdev->dev,
1902                         "Failed to add memory to CPU accessible DMA pool\n");
1903                 rc = -EFAULT;
1904                 goto free_cpu_accessible_dma_pool;
1905         }
1906
1907         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1908         if (rc)
1909                 goto free_cpu_accessible_dma_pool;
1910
1911         spin_lock_init(&gaudi->hw_queues_lock);
1912
1913         hdev->supports_sync_stream = true;
1914         hdev->supports_coresight = true;
1915         hdev->supports_staged_submission = true;
1916         hdev->supports_wait_for_multi_cs = true;
1917
1918         hdev->asic_funcs->set_pci_memory_regions(hdev);
1919         hdev->stream_master_qid_arr =
1920                                 hdev->asic_funcs->get_stream_master_qid_arr();
1921         hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1922
1923         return 0;
1924
1925 free_cpu_accessible_dma_pool:
1926         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1927 free_cpu_dma_mem:
1928         if (!hdev->asic_prop.fw_security_enabled)
1929                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1930                                         hdev->cpu_pci_msb_addr);
1931         hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1932                                         hdev->cpu_accessible_dma_address);
1933 free_dma_pool:
1934         dma_pool_destroy(hdev->dma_pool);
1935 free_gaudi_device:
1936         kfree(gaudi);
1937         return rc;
1938 }
1939
1940 static int gaudi_sw_fini(struct hl_device *hdev)
1941 {
1942         struct gaudi_device *gaudi = hdev->asic_specific;
1943
1944         gaudi_free_internal_qmans_pq_mem(hdev);
1945
1946         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1947
1948         if (!hdev->asic_prop.fw_security_enabled)
1949                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1950                                         hdev->cpu_pci_msb_addr);
1951
1952         hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1953                                         hdev->cpu_accessible_dma_address);
1954
1955         dma_pool_destroy(hdev->dma_pool);
1956
1957         kfree(gaudi);
1958
1959         return 0;
1960 }
1961
1962 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1963 {
1964         struct hl_device *hdev = arg;
1965         int i;
1966
1967         if (hdev->disabled)
1968                 return IRQ_HANDLED;
1969
1970         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1971                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1972
1973         hl_irq_handler_eq(irq, &hdev->event_queue);
1974
1975         return IRQ_HANDLED;
1976 }
1977
1978 /*
1979  * For backward compatibility, new MSI interrupts should be set after the
1980  * existing CPU and NIC interrupts.
1981  */
1982 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1983                                 bool cpu_eq)
1984 {
1985         int msi_vec;
1986
1987         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1988                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1989                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1990
1991         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1992                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1993
1994         return pci_irq_vector(hdev->pdev, msi_vec);
1995 }
1996
1997 static int gaudi_enable_msi_single(struct hl_device *hdev)
1998 {
1999         int rc, irq;
2000
2001         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2002
2003         irq = gaudi_pci_irq_vector(hdev, 0, false);
2004         rc = request_irq(irq, gaudi_irq_handler_single, 0,
2005                         "gaudi single msi", hdev);
2006         if (rc)
2007                 dev_err(hdev->dev,
2008                         "Failed to request single MSI IRQ\n");
2009
2010         return rc;
2011 }
2012
2013 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2014 {
2015         int cq_cnt = hdev->asic_prop.completion_queues_count;
2016         int rc, i, irq_cnt_init, irq;
2017
2018         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2019                 irq = gaudi_pci_irq_vector(hdev, i, false);
2020                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2021                                 &hdev->completion_queue[i]);
2022                 if (rc) {
2023                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2024                         goto free_irqs;
2025                 }
2026         }
2027
2028         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2029         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2030                                 &hdev->event_queue);
2031         if (rc) {
2032                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2033                 goto free_irqs;
2034         }
2035
2036         return 0;
2037
2038 free_irqs:
2039         for (i = 0 ; i < irq_cnt_init ; i++)
2040                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2041                                 &hdev->completion_queue[i]);
2042         return rc;
2043 }
2044
2045 static int gaudi_enable_msi(struct hl_device *hdev)
2046 {
2047         struct gaudi_device *gaudi = hdev->asic_specific;
2048         int rc;
2049
2050         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2051                 return 0;
2052
2053         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2054         if (rc < 0) {
2055                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2056                 return rc;
2057         }
2058
2059         if (rc < NUMBER_OF_INTERRUPTS) {
2060                 gaudi->multi_msi_mode = false;
2061                 rc = gaudi_enable_msi_single(hdev);
2062         } else {
2063                 gaudi->multi_msi_mode = true;
2064                 rc = gaudi_enable_msi_multi(hdev);
2065         }
2066
2067         if (rc)
2068                 goto free_pci_irq_vectors;
2069
2070         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2071
2072         return 0;
2073
2074 free_pci_irq_vectors:
2075         pci_free_irq_vectors(hdev->pdev);
2076         return rc;
2077 }
2078
2079 static void gaudi_sync_irqs(struct hl_device *hdev)
2080 {
2081         struct gaudi_device *gaudi = hdev->asic_specific;
2082         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2083
2084         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2085                 return;
2086
2087         /* Wait for all pending IRQs to be finished */
2088         if (gaudi->multi_msi_mode) {
2089                 for (i = 0 ; i < cq_cnt ; i++)
2090                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2091
2092                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2093                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2094                                                 true));
2095         } else {
2096                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2097         }
2098 }
2099
2100 static void gaudi_disable_msi(struct hl_device *hdev)
2101 {
2102         struct gaudi_device *gaudi = hdev->asic_specific;
2103         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2104
2105         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2106                 return;
2107
2108         gaudi_sync_irqs(hdev);
2109
2110         if (gaudi->multi_msi_mode) {
2111                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2112                                                 true);
2113                 free_irq(irq, &hdev->event_queue);
2114
2115                 for (i = 0 ; i < cq_cnt ; i++) {
2116                         irq = gaudi_pci_irq_vector(hdev, i, false);
2117                         free_irq(irq, &hdev->completion_queue[i]);
2118                 }
2119         } else {
2120                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2121         }
2122
2123         pci_free_irq_vectors(hdev->pdev);
2124
2125         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2126 }
2127
2128 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2129 {
2130         struct gaudi_device *gaudi = hdev->asic_specific;
2131
2132         if (hdev->asic_prop.fw_security_enabled)
2133                 return;
2134
2135         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2136                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2137                 return;
2138
2139         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2140                 return;
2141
2142         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2143                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2144         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2145                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2146         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2147                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2148         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2149                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2150         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2151                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2152         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2153                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2154         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2155                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2156         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2157                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2158
2159         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2160                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2161         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2162                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2163         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2164                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2165         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2166                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2167         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2168                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2169         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2170                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2171         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2172                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2173         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2174                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2175
2176         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2177                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2178         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2179                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2180         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2181                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2182         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2183                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2184         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2185                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2186         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2187                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2188         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2189                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2190         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2191                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2192
2193         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2194 }
2195
2196 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2197 {
2198         struct gaudi_device *gaudi = hdev->asic_specific;
2199
2200         if (hdev->asic_prop.fw_security_enabled)
2201                 return;
2202
2203         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2204                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2205                 return;
2206
2207         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2208                 return;
2209
2210         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2211                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2213                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2215                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2217                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2219                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2221                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2223                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2225                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2226
2227         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2228                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2229         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2230                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2231         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2232                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2233         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2234                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2235         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2236                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2237         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2238                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2239         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2240                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2241         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2242                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2243
2244         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2245                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2246         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2247                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2248         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2249                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2250         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2251                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2252         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2253                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2254         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2255                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2256         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2257                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2258         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2259                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2260
2261         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2262 }
2263
2264 static void gaudi_init_e2e(struct hl_device *hdev)
2265 {
2266         if (hdev->asic_prop.fw_security_enabled)
2267                 return;
2268
2269         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2270                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2271                 return;
2272
2273         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2274         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2275         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2276         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2277
2278         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2279         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2280         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2281         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2282
2283         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2284         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2285         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2286         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2287
2288         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2289         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2290         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2291         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2292
2293         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2294         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2295         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2296         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2297
2298         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2299         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2300         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2301         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2302
2303         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2304         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2305         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2306         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2307
2308         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2309         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2310         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2311         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2312
2313         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2314         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2315         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2316         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2317
2318         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2319         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2320         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2321         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2322
2323         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2324         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2325         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2326         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2327
2328         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2329         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2330         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2331         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2332
2333         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2334         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2335         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2336         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2337
2338         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2339         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2340         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2341         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2342
2343         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2344         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2345         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2346         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2347
2348         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2349         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2350         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2351         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2352
2353         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2354         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2355         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2356         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2357
2358         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2359         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2360         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2361         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2362
2363         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2364         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2365         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2366         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2367
2368         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2369         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2370         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2371         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2372
2373         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2374         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2375         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2376         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2377
2378         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2379         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2380         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2381         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2382
2383         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2384         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2385         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2386         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2387
2388         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2389         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2390         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2391         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2392
2393         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2394                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2395         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2396                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2397
2398         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2399                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2400         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2401                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2402
2403         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2404                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2405         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2406                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2407
2408         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2409                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2410         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2411                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2412
2413         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2414                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2415         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2416                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2417
2418         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2419                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2420         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2421                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2422
2423         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2424                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2425         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2426                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2427
2428         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2429                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2430         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2431                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2432
2433         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2434                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2435         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2436                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2437
2438         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2439                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2440         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2441                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2442
2443         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2444                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2445         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2446                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2447
2448         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2449                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2450         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2451                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2452
2453         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2454                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2455         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2456                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2457
2458         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2459                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2460         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2461                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2462
2463         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2464                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2465         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2466                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2467
2468         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2469                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2470         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2471                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2472
2473         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2474                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2475         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2476                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2477
2478         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2479                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2480         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2481                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2482
2483         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2484                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2485         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2486                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2487
2488         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2489                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2490         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2491                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2492
2493         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2494                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2495         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2496                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2497
2498         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2499                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2500         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2501                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2502
2503         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2504                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2505         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2506                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2507
2508         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2509                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2510         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2511                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2512 }
2513
2514 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2515 {
2516         u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2517
2518         if (hdev->asic_prop.fw_security_enabled)
2519                 return;
2520
2521         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2522                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2523                 return;
2524
2525         hbm0_wr = 0x33333333;
2526         hbm0_rd = 0x77777777;
2527         hbm1_wr = 0x55555555;
2528         hbm1_rd = 0xDDDDDDDD;
2529
2530         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2531         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2532         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2533         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2534
2535         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2536         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2537         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2538         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2539
2540         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2541         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2542         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2543         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2544
2545         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2546         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2547         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2548         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2549
2550         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2551                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2552                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2553         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2554                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2555                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2556         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2557                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2558                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2559         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2560                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2561                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2562
2563         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2564                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2565                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2566         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2567                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2568                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2569         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2570                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2571                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2572         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2573                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2574                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2575 }
2576
2577 static void gaudi_init_golden_registers(struct hl_device *hdev)
2578 {
2579         u32 tpc_offset;
2580         int tpc_id, i;
2581
2582         gaudi_init_e2e(hdev);
2583         gaudi_init_hbm_cred(hdev);
2584
2585         for (tpc_id = 0, tpc_offset = 0;
2586                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2587                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2588                 /* Mask all arithmetic interrupts from TPC */
2589                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2590                 /* Set 16 cache lines */
2591                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2592                                 ICACHE_FETCH_LINE_NUM, 2);
2593         }
2594
2595         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2596         for (i = 0 ; i < 128 ; i += 8)
2597                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2598
2599         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2600         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2601         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2602         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2603 }
2604
2605 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2606                                         int qman_id, dma_addr_t qman_pq_addr)
2607 {
2608         struct cpu_dyn_regs *dyn_regs =
2609                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2610         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2611         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2612         u32 q_off, dma_qm_offset;
2613         u32 dma_qm_err_cfg, irq_handler_offset;
2614
2615         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2616
2617         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2618                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2619         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2620                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2621         so_base_en_lo = lower_32_bits(CFG_BASE +
2622                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2623         so_base_en_hi = upper_32_bits(CFG_BASE +
2624                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2625         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2626                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2627         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2628                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2629         so_base_ws_lo = lower_32_bits(CFG_BASE +
2630                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2631         so_base_ws_hi = upper_32_bits(CFG_BASE +
2632                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2633
2634         q_off = dma_qm_offset + qman_id * 4;
2635
2636         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2637         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2638
2639         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2640         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2641         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2642
2643         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2644         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2645                                                         QMAN_LDMA_SRC_OFFSET);
2646         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2647                                                         QMAN_LDMA_DST_OFFSET);
2648
2649         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2650         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2651         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2652         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2653         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2654         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2655         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2656         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2657
2658         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2659
2660         /* The following configuration is needed only once per QMAN */
2661         if (qman_id == 0) {
2662                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2663                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2664                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2665
2666                 /* Configure RAZWI IRQ */
2667                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2668                 if (hdev->stop_on_err)
2669                         dma_qm_err_cfg |=
2670                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2671
2672                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2673
2674                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2675                         lower_32_bits(CFG_BASE + irq_handler_offset));
2676                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2677                         upper_32_bits(CFG_BASE + irq_handler_offset));
2678
2679                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2680                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2681                                                                         dma_id);
2682
2683                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2684                                 QM_ARB_ERR_MSG_EN_MASK);
2685
2686                 /* Set timeout to maximum */
2687                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2688
2689                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2690                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2691
2692                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2693         }
2694 }
2695
2696 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2697 {
2698         struct cpu_dyn_regs *dyn_regs =
2699                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2700         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2701         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2702         u32 irq_handler_offset;
2703
2704         /* Set to maximum possible according to physical size */
2705         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2706         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2707
2708         /* WA for H/W bug H3-2116 */
2709         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2710
2711         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2712         if (hdev->stop_on_err)
2713                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2714
2715         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2716
2717         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2718                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2719                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2720
2721         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2722                 lower_32_bits(CFG_BASE + irq_handler_offset));
2723         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2724                 upper_32_bits(CFG_BASE + irq_handler_offset));
2725
2726         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2727                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2728         WREG32(mmDMA0_CORE_PROT + dma_offset,
2729                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2730         /* If the channel is secured, it should be in MMU bypass mode */
2731         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2732                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2733         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2734 }
2735
2736 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2737                                 u32 enable_mask)
2738 {
2739         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2740
2741         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2742 }
2743
2744 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2745 {
2746         struct gaudi_device *gaudi = hdev->asic_specific;
2747         struct hl_hw_queue *q;
2748         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2749
2750         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2751                 return;
2752
2753         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2754                 dma_id = gaudi_dma_assignment[i];
2755                 /*
2756                  * For queues after the CPU Q need to add 1 to get the correct
2757                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2758                  * order to get the correct MSI register.
2759                  */
2760                 if (dma_id > 1) {
2761                         cpu_skip = 1;
2762                         nic_skip = NIC_NUMBER_OF_ENGINES;
2763                 } else {
2764                         cpu_skip = 0;
2765                         nic_skip = 0;
2766                 }
2767
2768                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2769                         q_idx = 4 * dma_id + j + cpu_skip;
2770                         q = &hdev->kernel_queues[q_idx];
2771                         q->cq_id = cq_id++;
2772                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2773                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2774                                                 q->bus_address);
2775                 }
2776
2777                 gaudi_init_dma_core(hdev, dma_id);
2778
2779                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2780         }
2781
2782         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2783 }
2784
2785 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2786                                         int qman_id, u64 qman_base_addr)
2787 {
2788         struct cpu_dyn_regs *dyn_regs =
2789                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2790         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2791         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2792         u32 dma_qm_err_cfg, irq_handler_offset;
2793         u32 q_off, dma_qm_offset;
2794
2795         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2796
2797         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2798                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2799         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2800                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2801         so_base_en_lo = lower_32_bits(CFG_BASE +
2802                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2803         so_base_en_hi = upper_32_bits(CFG_BASE +
2804                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2805         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2806                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2807         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2808                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2809         so_base_ws_lo = lower_32_bits(CFG_BASE +
2810                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2811         so_base_ws_hi = upper_32_bits(CFG_BASE +
2812                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2813
2814         q_off = dma_qm_offset + qman_id * 4;
2815
2816         if (qman_id < 4) {
2817                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2818                                         lower_32_bits(qman_base_addr));
2819                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2820                                         upper_32_bits(qman_base_addr));
2821
2822                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2823                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2824                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2825
2826                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2827                                                         QMAN_CPDMA_SIZE_OFFSET);
2828                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2829                                                         QMAN_CPDMA_SRC_OFFSET);
2830                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2831                                                         QMAN_CPDMA_DST_OFFSET);
2832         } else {
2833                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2834                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2835                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2836
2837                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2838                                                         QMAN_LDMA_SIZE_OFFSET);
2839                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2840                                                         QMAN_LDMA_SRC_OFFSET);
2841                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2842                                                         QMAN_LDMA_DST_OFFSET);
2843
2844                 /* Configure RAZWI IRQ */
2845                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2846                 if (hdev->stop_on_err)
2847                         dma_qm_err_cfg |=
2848                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2849
2850                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2851
2852                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2853                         lower_32_bits(CFG_BASE + irq_handler_offset));
2854                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2855                         upper_32_bits(CFG_BASE + irq_handler_offset));
2856
2857                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2858                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2859                                                                         dma_id);
2860
2861                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2862                                 QM_ARB_ERR_MSG_EN_MASK);
2863
2864                 /* Set timeout to maximum */
2865                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2866
2867                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2868                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2869                                 QMAN_INTERNAL_MAKE_TRUSTED);
2870         }
2871
2872         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2873         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2874         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2875         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2876
2877         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2878         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2879                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2880                                 mtr_base_ws_lo);
2881                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2882                                 mtr_base_ws_hi);
2883                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2884                                 so_base_ws_lo);
2885                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2886                                 so_base_ws_hi);
2887         }
2888 }
2889
2890 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2891 {
2892         struct gaudi_device *gaudi = hdev->asic_specific;
2893         struct gaudi_internal_qman_info *q;
2894         u64 qman_base_addr;
2895         int i, j, dma_id, internal_q_index;
2896
2897         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2898                 return;
2899
2900         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2901                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2902
2903                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2904                          /*
2905                           * Add the CPU queue in order to get the correct queue
2906                           * number as all internal queue are placed after it
2907                           */
2908                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2909
2910                         q = &gaudi->internal_qmans[internal_q_index];
2911                         qman_base_addr = (u64) q->pq_dma_addr;
2912                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2913                                                 qman_base_addr);
2914                 }
2915
2916                 /* Initializing lower CP for HBM DMA QMAN */
2917                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2918
2919                 gaudi_init_dma_core(hdev, dma_id);
2920
2921                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2922         }
2923
2924         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2925 }
2926
2927 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2928                                         int qman_id, u64 qman_base_addr)
2929 {
2930         struct cpu_dyn_regs *dyn_regs =
2931                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2932         u32 mtr_base_lo, mtr_base_hi;
2933         u32 so_base_lo, so_base_hi;
2934         u32 irq_handler_offset;
2935         u32 q_off, mme_id;
2936         u32 mme_qm_err_cfg;
2937
2938         mtr_base_lo = lower_32_bits(CFG_BASE +
2939                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2940         mtr_base_hi = upper_32_bits(CFG_BASE +
2941                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2942         so_base_lo = lower_32_bits(CFG_BASE +
2943                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2944         so_base_hi = upper_32_bits(CFG_BASE +
2945                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2946
2947         q_off = mme_offset + qman_id * 4;
2948
2949         if (qman_id < 4) {
2950                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2951                                         lower_32_bits(qman_base_addr));
2952                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2953                                         upper_32_bits(qman_base_addr));
2954
2955                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2956                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2957                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2958
2959                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2960                                                         QMAN_CPDMA_SIZE_OFFSET);
2961                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2962                                                         QMAN_CPDMA_SRC_OFFSET);
2963                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2964                                                         QMAN_CPDMA_DST_OFFSET);
2965         } else {
2966                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2967                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2968                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2969
2970                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2971                                                         QMAN_LDMA_SIZE_OFFSET);
2972                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2973                                                         QMAN_LDMA_SRC_OFFSET);
2974                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2975                                                         QMAN_LDMA_DST_OFFSET);
2976
2977                 /* Configure RAZWI IRQ */
2978                 mme_id = mme_offset /
2979                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2980
2981                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2982                 if (hdev->stop_on_err)
2983                         mme_qm_err_cfg |=
2984                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2985
2986                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2987
2988                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2989                         lower_32_bits(CFG_BASE + irq_handler_offset));
2990                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2991                         upper_32_bits(CFG_BASE + irq_handler_offset));
2992
2993                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2994                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2995                                                                         mme_id);
2996
2997                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2998                                 QM_ARB_ERR_MSG_EN_MASK);
2999
3000                 /* Set timeout to maximum */
3001                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
3002
3003                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3004                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3005                                 QMAN_INTERNAL_MAKE_TRUSTED);
3006         }
3007
3008         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3009         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3010         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3011         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3012 }
3013
3014 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3015 {
3016         struct gaudi_device *gaudi = hdev->asic_specific;
3017         struct gaudi_internal_qman_info *q;
3018         u64 qman_base_addr;
3019         u32 mme_offset;
3020         int i, internal_q_index;
3021
3022         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3023                 return;
3024
3025         /*
3026          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3027          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3028          */
3029
3030         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3031
3032         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3033                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3034                 q = &gaudi->internal_qmans[internal_q_index];
3035                 qman_base_addr = (u64) q->pq_dma_addr;
3036                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3037                                         qman_base_addr);
3038                 if (i == 3)
3039                         mme_offset = 0;
3040         }
3041
3042         /* Initializing lower CP for MME QMANs */
3043         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3044         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3045         gaudi_init_mme_qman(hdev, 0, 4, 0);
3046
3047         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3048         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3049
3050         gaudi->hw_cap_initialized |= HW_CAP_MME;
3051 }
3052
3053 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3054                                 int qman_id, u64 qman_base_addr)
3055 {
3056         struct cpu_dyn_regs *dyn_regs =
3057                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3058         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3059         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3060         u32 tpc_qm_err_cfg, irq_handler_offset;
3061         u32 q_off, tpc_id;
3062
3063         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3064                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3065         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3066                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3067         so_base_en_lo = lower_32_bits(CFG_BASE +
3068                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3069         so_base_en_hi = upper_32_bits(CFG_BASE +
3070                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3071         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3072                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3073         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3074                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3075         so_base_ws_lo = lower_32_bits(CFG_BASE +
3076                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3077         so_base_ws_hi = upper_32_bits(CFG_BASE +
3078                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3079
3080         q_off = tpc_offset + qman_id * 4;
3081
3082         tpc_id = tpc_offset /
3083                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3084
3085         if (qman_id < 4) {
3086                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3087                                         lower_32_bits(qman_base_addr));
3088                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3089                                         upper_32_bits(qman_base_addr));
3090
3091                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3092                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3093                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3094
3095                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3096                                                         QMAN_CPDMA_SIZE_OFFSET);
3097                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3098                                                         QMAN_CPDMA_SRC_OFFSET);
3099                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3100                                                         QMAN_CPDMA_DST_OFFSET);
3101         } else {
3102                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3103                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3104                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3105
3106                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3107                                                         QMAN_LDMA_SIZE_OFFSET);
3108                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3109                                                         QMAN_LDMA_SRC_OFFSET);
3110                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3111                                                         QMAN_LDMA_DST_OFFSET);
3112
3113                 /* Configure RAZWI IRQ */
3114                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3115                 if (hdev->stop_on_err)
3116                         tpc_qm_err_cfg |=
3117                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3118
3119                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3120
3121                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3122                         lower_32_bits(CFG_BASE + irq_handler_offset));
3123                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3124                         upper_32_bits(CFG_BASE + irq_handler_offset));
3125
3126                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3127                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3128                                                                         tpc_id);
3129
3130                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3131                                 QM_ARB_ERR_MSG_EN_MASK);
3132
3133                 /* Set timeout to maximum */
3134                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3135
3136                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3137                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3138                                 QMAN_INTERNAL_MAKE_TRUSTED);
3139         }
3140
3141         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3142         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3143         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3144         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3145
3146         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3147         if (tpc_id == 6) {
3148                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3149                                 mtr_base_ws_lo);
3150                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3151                                 mtr_base_ws_hi);
3152                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3153                                 so_base_ws_lo);
3154                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3155                                 so_base_ws_hi);
3156         }
3157 }
3158
3159 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3160 {
3161         struct gaudi_device *gaudi = hdev->asic_specific;
3162         struct gaudi_internal_qman_info *q;
3163         u64 qman_base_addr;
3164         u32 so_base_hi, tpc_offset = 0;
3165         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3166                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3167         int i, tpc_id, internal_q_index;
3168
3169         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3170                 return;
3171
3172         so_base_hi = upper_32_bits(CFG_BASE +
3173                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3174
3175         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3176                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3177                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3178                                                 tpc_id * QMAN_STREAMS + i;
3179                         q = &gaudi->internal_qmans[internal_q_index];
3180                         qman_base_addr = (u64) q->pq_dma_addr;
3181                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3182                                                 qman_base_addr);
3183
3184                         if (i == 3) {
3185                                 /* Initializing lower CP for TPC QMAN */
3186                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3187
3188                                 /* Enable the QMAN and TPC channel */
3189                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3190                                                 QMAN_TPC_ENABLE);
3191                         }
3192                 }
3193
3194                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3195                                 so_base_hi);
3196
3197                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3198
3199                 gaudi->hw_cap_initialized |=
3200                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3201         }
3202 }
3203
3204 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3205                                 int qman_id, u64 qman_base_addr, int nic_id)
3206 {
3207         struct cpu_dyn_regs *dyn_regs =
3208                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3209         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3210         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3211         u32 nic_qm_err_cfg, irq_handler_offset;
3212         u32 q_off;
3213
3214         mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3215                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3216         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3217                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3218         so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3219                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3220         so_base_en_hi = upper_32_bits(CFG_BASE +
3221                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3222         mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3223                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3224         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3225                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3226         so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3227                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3228         so_base_ws_hi = upper_32_bits(CFG_BASE +
3229                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3230
3231         q_off = nic_offset + qman_id * 4;
3232
3233         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3234         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3235
3236         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3237         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3238         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3239
3240         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3241                                                         QMAN_LDMA_SIZE_OFFSET);
3242         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3243                                                         QMAN_LDMA_SRC_OFFSET);
3244         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3245                                                         QMAN_LDMA_DST_OFFSET);
3246
3247         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3248         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3249         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3250         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3251
3252         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3253         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3254         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3255         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3256         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3257
3258         if (qman_id == 0) {
3259                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3260                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3261                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3262
3263                 /* Configure RAZWI IRQ */
3264                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3265                 if (hdev->stop_on_err)
3266                         nic_qm_err_cfg |=
3267                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3268
3269                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3270
3271                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3272                         lower_32_bits(CFG_BASE + irq_handler_offset));
3273                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3274                         upper_32_bits(CFG_BASE + irq_handler_offset));
3275
3276                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3277                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3278                                                                         nic_id);
3279
3280                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3281                                 QM_ARB_ERR_MSG_EN_MASK);
3282
3283                 /* Set timeout to maximum */
3284                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3285
3286                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3287                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3288                                 QMAN_INTERNAL_MAKE_TRUSTED);
3289         }
3290 }
3291
3292 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3293 {
3294         struct gaudi_device *gaudi = hdev->asic_specific;
3295         struct gaudi_internal_qman_info *q;
3296         u64 qman_base_addr;
3297         u32 nic_offset = 0;
3298         u32 nic_delta_between_qmans =
3299                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3300         u32 nic_delta_between_nics =
3301                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3302         int i, nic_id, internal_q_index;
3303
3304         if (!hdev->nic_ports_mask)
3305                 return;
3306
3307         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3308                 return;
3309
3310         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3311
3312         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3313                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3314                         nic_offset += nic_delta_between_qmans;
3315                         if (nic_id & 1) {
3316                                 nic_offset -= (nic_delta_between_qmans * 2);
3317                                 nic_offset += nic_delta_between_nics;
3318                         }
3319                         continue;
3320                 }
3321
3322                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3323                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3324                                                 nic_id * QMAN_STREAMS + i;
3325                         q = &gaudi->internal_qmans[internal_q_index];
3326                         qman_base_addr = (u64) q->pq_dma_addr;
3327                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3328                                                 qman_base_addr, nic_id);
3329                 }
3330
3331                 /* Enable the QMAN */
3332                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3333
3334                 nic_offset += nic_delta_between_qmans;
3335                 if (nic_id & 1) {
3336                         nic_offset -= (nic_delta_between_qmans * 2);
3337                         nic_offset += nic_delta_between_nics;
3338                 }
3339
3340                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3341         }
3342 }
3343
3344 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3345 {
3346         struct gaudi_device *gaudi = hdev->asic_specific;
3347
3348         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3349                 return;
3350
3351         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3352         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3353         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3354 }
3355
3356 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3357 {
3358         struct gaudi_device *gaudi = hdev->asic_specific;
3359
3360         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3361                 return;
3362
3363         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3364         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3365         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3366         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3367         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3368 }
3369
3370 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3371 {
3372         struct gaudi_device *gaudi = hdev->asic_specific;
3373
3374         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3375                 return;
3376
3377         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3378         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3379 }
3380
3381 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3382 {
3383         struct gaudi_device *gaudi = hdev->asic_specific;
3384         u32 tpc_offset = 0;
3385         int tpc_id;
3386
3387         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3388                 return;
3389
3390         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3391                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3392                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3393         }
3394 }
3395
3396 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3397 {
3398         struct gaudi_device *gaudi = hdev->asic_specific;
3399         u32 nic_mask, nic_offset = 0;
3400         u32 nic_delta_between_qmans =
3401                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3402         u32 nic_delta_between_nics =
3403                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3404         int nic_id;
3405
3406         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3407                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3408
3409                 if (gaudi->hw_cap_initialized & nic_mask)
3410                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3411
3412                 nic_offset += nic_delta_between_qmans;
3413                 if (nic_id & 1) {
3414                         nic_offset -= (nic_delta_between_qmans * 2);
3415                         nic_offset += nic_delta_between_nics;
3416                 }
3417         }
3418 }
3419
3420 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3421 {
3422         struct gaudi_device *gaudi = hdev->asic_specific;
3423
3424         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3425                 return;
3426
3427         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3428         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3429         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3430         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3431 }
3432
3433 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3434 {
3435         struct gaudi_device *gaudi = hdev->asic_specific;
3436
3437         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3438                 return;
3439
3440         /* Stop CPs of HBM DMA QMANs */
3441
3442         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3443         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3444         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3445         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3446         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3447 }
3448
3449 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3450 {
3451         struct gaudi_device *gaudi = hdev->asic_specific;
3452
3453         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3454                 return;
3455
3456         /* Stop CPs of MME QMANs */
3457         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3458         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3459 }
3460
3461 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3462 {
3463         struct gaudi_device *gaudi = hdev->asic_specific;
3464
3465         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3466                 return;
3467
3468         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3469         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3470         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3471         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3472         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3473         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3474         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3475         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3476 }
3477
3478 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3479 {
3480         struct gaudi_device *gaudi = hdev->asic_specific;
3481
3482         /* Stop upper CPs of QMANs */
3483
3484         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3485                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3486                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3487                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3488                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3489
3490         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3491                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3492                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3493                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3494                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3495
3496         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3497                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3498                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3499                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3500                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3501
3502         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3503                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3504                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3505                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3506                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3507
3508         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3509                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3510                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3511                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3512                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3513
3514         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3515                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3516                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3517                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3518                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3519
3520         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3521                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3522                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3523                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3524                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3525
3526         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3527                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3528                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3529                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3530                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3531
3532         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3533                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3534                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3535                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3536                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3537
3538         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3539                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3540                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3541                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3542                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3543 }
3544
3545 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3546 {
3547         struct gaudi_device *gaudi = hdev->asic_specific;
3548
3549         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3550                 return;
3551
3552         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3553         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3554         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3555 }
3556
3557 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3558 {
3559         struct gaudi_device *gaudi = hdev->asic_specific;
3560
3561         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3562                 return;
3563
3564         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3565         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3566         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3567         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3568         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3569 }
3570
3571 static void gaudi_mme_stall(struct hl_device *hdev)
3572 {
3573         struct gaudi_device *gaudi = hdev->asic_specific;
3574
3575         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3576                 return;
3577
3578         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3579         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3580         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3581         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3582         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3583         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3584         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3585         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3586         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3587         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3588         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3589         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3590         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3591         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3592         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3593         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3594         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3595 }
3596
3597 static void gaudi_tpc_stall(struct hl_device *hdev)
3598 {
3599         struct gaudi_device *gaudi = hdev->asic_specific;
3600
3601         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3602                 return;
3603
3604         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3605         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3606         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3607         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3608         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3609         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3610         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3611         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3612 }
3613
3614 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3615 {
3616         u32 qman_offset;
3617         int i;
3618
3619         if (hdev->asic_prop.fw_security_enabled)
3620                 return;
3621
3622         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3623                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3624                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3625
3626                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3627         }
3628
3629         WREG32(mmMME0_QM_CGM_CFG, 0);
3630         WREG32(mmMME0_QM_CGM_CFG1, 0);
3631         WREG32(mmMME2_QM_CGM_CFG, 0);
3632         WREG32(mmMME2_QM_CGM_CFG1, 0);
3633
3634         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3635                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3636                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3637
3638                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3639         }
3640 }
3641
3642 static void gaudi_enable_timestamp(struct hl_device *hdev)
3643 {
3644         /* Disable the timestamp counter */
3645         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3646
3647         /* Zero the lower/upper parts of the 64-bit counter */
3648         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3649         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3650
3651         /* Enable the counter */
3652         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3653 }
3654
3655 static void gaudi_disable_timestamp(struct hl_device *hdev)
3656 {
3657         /* Disable the timestamp counter */
3658         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3659 }
3660
3661 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3662 {
3663         u32 wait_timeout_ms;
3664
3665         if (hdev->pldm)
3666                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3667         else
3668                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3669
3670         if (fw_reset)
3671                 goto skip_engines;
3672
3673         gaudi_stop_nic_qmans(hdev);
3674         gaudi_stop_mme_qmans(hdev);
3675         gaudi_stop_tpc_qmans(hdev);
3676         gaudi_stop_hbm_dma_qmans(hdev);
3677         gaudi_stop_pci_dma_qmans(hdev);
3678
3679         msleep(wait_timeout_ms);
3680
3681         gaudi_pci_dma_stall(hdev);
3682         gaudi_hbm_dma_stall(hdev);
3683         gaudi_tpc_stall(hdev);
3684         gaudi_mme_stall(hdev);
3685
3686         msleep(wait_timeout_ms);
3687
3688         gaudi_disable_nic_qmans(hdev);
3689         gaudi_disable_mme_qmans(hdev);
3690         gaudi_disable_tpc_qmans(hdev);
3691         gaudi_disable_hbm_dma_qmans(hdev);
3692         gaudi_disable_pci_dma_qmans(hdev);
3693
3694         gaudi_disable_timestamp(hdev);
3695
3696 skip_engines:
3697         gaudi_disable_msi(hdev);
3698 }
3699
3700 static int gaudi_mmu_init(struct hl_device *hdev)
3701 {
3702         struct asic_fixed_properties *prop = &hdev->asic_prop;
3703         struct gaudi_device *gaudi = hdev->asic_specific;
3704         u64 hop0_addr;
3705         int rc, i;
3706
3707         if (!hdev->mmu_enable)
3708                 return 0;
3709
3710         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3711                 return 0;
3712
3713         for (i = 0 ; i < prop->max_asid ; i++) {
3714                 hop0_addr = prop->mmu_pgt_addr +
3715                                 (i * prop->mmu_hop_table_size);
3716
3717                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3718                 if (rc) {
3719                         dev_err(hdev->dev,
3720                                 "failed to set hop0 addr for asid %d\n", i);
3721                         goto err;
3722                 }
3723         }
3724
3725         /* init MMU cache manage page */
3726         WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3727         WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3728
3729         /* mem cache invalidation */
3730         WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3731
3732         hl_mmu_invalidate_cache(hdev, true, 0);
3733
3734         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3735         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3736
3737         WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3738
3739         /*
3740          * The H/W expects the first PI after init to be 1. After wraparound
3741          * we'll write 0.
3742          */
3743         gaudi->mmu_cache_inv_pi = 1;
3744
3745         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3746
3747         return 0;
3748
3749 err:
3750         return rc;
3751 }
3752
3753 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3754 {
3755         void __iomem *dst;
3756
3757         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3758
3759         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3760 }
3761
3762 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3763 {
3764         void __iomem *dst;
3765
3766         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3767
3768         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3769 }
3770
3771 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3772 {
3773         struct dynamic_fw_load_mgr *dynamic_loader;
3774         struct cpu_dyn_regs *dyn_regs;
3775
3776         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3777
3778         /*
3779          * here we update initial values for few specific dynamic regs (as
3780          * before reading the first descriptor from FW those value has to be
3781          * hard-coded) in later stages of the protocol those values will be
3782          * updated automatically by reading the FW descriptor so data there
3783          * will always be up-to-date
3784          */
3785         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3786         dyn_regs->kmd_msg_to_cpu =
3787                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3788         dyn_regs->cpu_cmd_status_to_host =
3789                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3790
3791         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3792 }
3793
3794 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3795 {
3796         struct static_fw_load_mgr *static_loader;
3797
3798         static_loader = &hdev->fw_loader.static_loader;
3799
3800         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3801         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3802         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3803         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3804         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3805         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3806         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3807         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3808         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3809         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3810         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3811         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3812         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3813                         GAUDI_PLDM_RESET_WAIT_MSEC :
3814                         GAUDI_CPU_RESET_WAIT_MSEC;
3815 }
3816
3817 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3818 {
3819         struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3820
3821         pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3822         pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3823         pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3824         pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3825         pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3826         pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3827 }
3828
3829 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3830 {
3831         struct asic_fixed_properties *prop = &hdev->asic_prop;
3832         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3833
3834         /* fill common fields */
3835         fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3836         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3837         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3838         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3839         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3840         fw_loader->skip_bmc = !hdev->bmc_enable;
3841         fw_loader->sram_bar_id = SRAM_BAR_ID;
3842         fw_loader->dram_bar_id = HBM_BAR_ID;
3843
3844         if (prop->dynamic_fw_load)
3845                 gaudi_init_dynamic_firmware_loader(hdev);
3846         else
3847                 gaudi_init_static_firmware_loader(hdev);
3848 }
3849
3850 static int gaudi_init_cpu(struct hl_device *hdev)
3851 {
3852         struct gaudi_device *gaudi = hdev->asic_specific;
3853         int rc;
3854
3855         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3856                 return 0;
3857
3858         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3859                 return 0;
3860
3861         /*
3862          * The device CPU works with 40 bits addresses.
3863          * This register sets the extension to 50 bits.
3864          */
3865         if (!hdev->asic_prop.fw_security_enabled)
3866                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3867
3868         rc = hl_fw_init_cpu(hdev);
3869
3870         if (rc)
3871                 return rc;
3872
3873         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3874
3875         return 0;
3876 }
3877
3878 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3879 {
3880         struct cpu_dyn_regs *dyn_regs =
3881                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3882         struct asic_fixed_properties *prop = &hdev->asic_prop;
3883         struct gaudi_device *gaudi = hdev->asic_specific;
3884         u32 status, irq_handler_offset;
3885         struct hl_eq *eq;
3886         struct hl_hw_queue *cpu_pq =
3887                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3888         int err;
3889
3890         if (!hdev->cpu_queues_enable)
3891                 return 0;
3892
3893         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3894                 return 0;
3895
3896         eq = &hdev->event_queue;
3897
3898         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3899         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3900
3901         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3902         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3903
3904         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3905                         lower_32_bits(hdev->cpu_accessible_dma_address));
3906         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3907                         upper_32_bits(hdev->cpu_accessible_dma_address));
3908
3909         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3910         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3911         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3912
3913         /* Used for EQ CI */
3914         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3915
3916         WREG32(mmCPU_IF_PF_PQ_PI, 0);
3917
3918         if (gaudi->multi_msi_mode)
3919                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3920         else
3921                 WREG32(mmCPU_IF_QUEUE_INIT,
3922                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3923
3924         irq_handler_offset = prop->gic_interrupts_enable ?
3925                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3926                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3927
3928         WREG32(irq_handler_offset,
3929                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3930
3931         err = hl_poll_timeout(
3932                 hdev,
3933                 mmCPU_IF_QUEUE_INIT,
3934                 status,
3935                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3936                 1000,
3937                 cpu_timeout);
3938
3939         if (err) {
3940                 dev_err(hdev->dev,
3941                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3942                 return -EIO;
3943         }
3944
3945         /* update FW application security bits */
3946         if (prop->fw_cpu_boot_dev_sts0_valid)
3947                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3948         if (prop->fw_cpu_boot_dev_sts1_valid)
3949                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3950
3951         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3952         return 0;
3953 }
3954
3955 static void gaudi_pre_hw_init(struct hl_device *hdev)
3956 {
3957         /* Perform read from the device to make sure device is up */
3958         RREG32(mmHW_STATE);
3959
3960         if (!hdev->asic_prop.fw_security_enabled) {
3961                 /* Set the access through PCI bars (Linux driver only) as
3962                  * secured
3963                  */
3964                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3965                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3966                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3967
3968                 /* Perform read to flush the waiting writes to ensure
3969                  * configuration was set in the device
3970                  */
3971                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3972         }
3973
3974         /*
3975          * Let's mark in the H/W that we have reached this point. We check
3976          * this value in the reset_before_init function to understand whether
3977          * we need to reset the chip before doing H/W init. This register is
3978          * cleared by the H/W upon H/W reset
3979          */
3980         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3981 }
3982
3983 static int gaudi_hw_init(struct hl_device *hdev)
3984 {
3985         struct gaudi_device *gaudi = hdev->asic_specific;
3986         int rc;
3987
3988         gaudi_pre_hw_init(hdev);
3989
3990         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3991          * So we set it here and if anyone tries to move it later to
3992          * a different address, there will be an error
3993          */
3994         if (hdev->asic_prop.iatu_done_by_fw)
3995                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3996
3997         /*
3998          * Before pushing u-boot/linux to device, need to set the hbm bar to
3999          * base address of dram
4000          */
4001         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4002                 dev_err(hdev->dev,
4003                         "failed to map HBM bar to DRAM base address\n");
4004                 return -EIO;
4005         }
4006
4007         rc = gaudi_init_cpu(hdev);
4008         if (rc) {
4009                 dev_err(hdev->dev, "failed to initialize CPU\n");
4010                 return rc;
4011         }
4012
4013         /* In case the clock gating was enabled in preboot we need to disable
4014          * it here before touching the MME/TPC registers.
4015          */
4016         gaudi_disable_clock_gating(hdev);
4017
4018         /* SRAM scrambler must be initialized after CPU is running from HBM */
4019         gaudi_init_scrambler_sram(hdev);
4020
4021         /* This is here just in case we are working without CPU */
4022         gaudi_init_scrambler_hbm(hdev);
4023
4024         gaudi_init_golden_registers(hdev);
4025
4026         rc = gaudi_mmu_init(hdev);
4027         if (rc)
4028                 return rc;
4029
4030         gaudi_init_security(hdev);
4031
4032         gaudi_init_pci_dma_qmans(hdev);
4033
4034         gaudi_init_hbm_dma_qmans(hdev);
4035
4036         gaudi_init_mme_qmans(hdev);
4037
4038         gaudi_init_tpc_qmans(hdev);
4039
4040         gaudi_init_nic_qmans(hdev);
4041
4042         gaudi_enable_timestamp(hdev);
4043
4044         /* MSI must be enabled before CPU queues and NIC are initialized */
4045         rc = gaudi_enable_msi(hdev);
4046         if (rc)
4047                 goto disable_queues;
4048
4049         /* must be called after MSI was enabled */
4050         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4051         if (rc) {
4052                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4053                         rc);
4054                 goto disable_msi;
4055         }
4056
4057         /* Perform read from the device to flush all configuration */
4058         RREG32(mmHW_STATE);
4059
4060         return 0;
4061
4062 disable_msi:
4063         gaudi_disable_msi(hdev);
4064 disable_queues:
4065         gaudi_disable_mme_qmans(hdev);
4066         gaudi_disable_pci_dma_qmans(hdev);
4067
4068         return rc;
4069 }
4070
4071 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4072 {
4073         struct cpu_dyn_regs *dyn_regs =
4074                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4075         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4076         struct gaudi_device *gaudi = hdev->asic_specific;
4077         bool driver_performs_reset;
4078
4079         if (!hard_reset) {
4080                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4081                 return;
4082         }
4083
4084         if (hdev->pldm) {
4085                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4086                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4087         } else {
4088                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4089                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4090         }
4091
4092         if (fw_reset) {
4093                 dev_dbg(hdev->dev,
4094                         "Firmware performs HARD reset, going to wait %dms\n",
4095                         reset_timeout_ms);
4096
4097                 goto skip_reset;
4098         }
4099
4100         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4101                                         !hdev->asic_prop.hard_reset_done_by_fw);
4102
4103         /* Set device to handle FLR by H/W as we will put the device CPU to
4104          * halt mode
4105          */
4106         if (driver_performs_reset)
4107                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4108                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4109
4110         /* If linux is loaded in the device CPU we need to communicate with it
4111          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4112          * registers in case of old F/Ws
4113          */
4114         if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4115                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4116                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4117                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4118
4119                 WREG32(irq_handler_offset,
4120                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4121
4122                 /* This is a hail-mary attempt to revive the card in the small chance that the
4123                  * f/w has experienced a watchdog event, which caused it to return back to preboot.
4124                  * In that case, triggering reset through GIC won't help. We need to trigger the
4125                  * reset as if Linux wasn't loaded.
4126                  *
4127                  * We do it only if the reset cause was HB, because that would be the indication
4128                  * of such an event.
4129                  *
4130                  * In case watchdog hasn't expired but we still got HB, then this won't do any
4131                  * damage.
4132                  */
4133                 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4134                         if (hdev->asic_prop.hard_reset_done_by_fw)
4135                                 hl_fw_ask_hard_reset_without_linux(hdev);
4136                         else
4137                                 hl_fw_ask_halt_machine_without_linux(hdev);
4138                 }
4139         } else {
4140                 if (hdev->asic_prop.hard_reset_done_by_fw)
4141                         hl_fw_ask_hard_reset_without_linux(hdev);
4142                 else
4143                         hl_fw_ask_halt_machine_without_linux(hdev);
4144         }
4145
4146         if (driver_performs_reset) {
4147
4148                 /* Configure the reset registers. Must be done as early as
4149                  * possible in case we fail during H/W initialization
4150                  */
4151                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4152                                                 (CFG_RST_H_DMA_MASK |
4153                                                 CFG_RST_H_MME_MASK |
4154                                                 CFG_RST_H_SM_MASK |
4155                                                 CFG_RST_H_TPC_7_MASK));
4156
4157                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4158
4159                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4160                                                 (CFG_RST_H_HBM_MASK |
4161                                                 CFG_RST_H_TPC_7_MASK |
4162                                                 CFG_RST_H_NIC_MASK |
4163                                                 CFG_RST_H_SM_MASK |
4164                                                 CFG_RST_H_DMA_MASK |
4165                                                 CFG_RST_H_MME_MASK |
4166                                                 CFG_RST_H_CPU_MASK |
4167                                                 CFG_RST_H_MMU_MASK));
4168
4169                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4170                                                 (CFG_RST_L_IF_MASK |
4171                                                 CFG_RST_L_PSOC_MASK |
4172                                                 CFG_RST_L_TPC_MASK));
4173
4174                 msleep(cpu_timeout_ms);
4175
4176                 /* Tell ASIC not to re-initialize PCIe */
4177                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4178
4179                 /* Restart BTL/BLR upon hard-reset */
4180                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4181
4182                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4183                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4184
4185                 dev_dbg(hdev->dev,
4186                         "Issued HARD reset command, going to wait %dms\n",
4187                         reset_timeout_ms);
4188         } else {
4189                 dev_dbg(hdev->dev,
4190                         "Firmware performs HARD reset, going to wait %dms\n",
4191                         reset_timeout_ms);
4192         }
4193
4194 skip_reset:
4195         /*
4196          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4197          * itself is in reset. Need to wait until the reset is deasserted
4198          */
4199         msleep(reset_timeout_ms);
4200
4201         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4202         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4203                 dev_err(hdev->dev,
4204                         "Timeout while waiting for device to reset 0x%x\n",
4205                         status);
4206
4207         if (gaudi) {
4208                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4209                                                 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4210                                                 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4211                                                 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4212                                                 HW_CAP_HBM_SCRAMBLER);
4213
4214                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4215
4216                 hdev->device_cpu_is_halted = false;
4217         }
4218 }
4219
4220 static int gaudi_suspend(struct hl_device *hdev)
4221 {
4222         int rc;
4223
4224         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4225         if (rc)
4226                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4227
4228         return rc;
4229 }
4230
4231 static int gaudi_resume(struct hl_device *hdev)
4232 {
4233         return gaudi_init_iatu(hdev);
4234 }
4235
4236 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4237                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4238 {
4239         int rc;
4240
4241         vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4242                         VM_DONTCOPY | VM_NORESERVE);
4243
4244         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4245                                 (dma_addr - HOST_PHYS_BASE), size);
4246         if (rc)
4247                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4248
4249         return rc;
4250 }
4251
4252 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4253 {
4254         struct cpu_dyn_regs *dyn_regs =
4255                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4256         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4257         struct gaudi_device *gaudi = hdev->asic_specific;
4258         bool invalid_queue = false;
4259         int dma_id;
4260
4261         switch (hw_queue_id) {
4262         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4263                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4264                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4265                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4266                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4267                 break;
4268
4269         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4270                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4271                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4272                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4273                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4274                 break;
4275
4276         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4277                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4278                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4279                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4280                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4281                 break;
4282
4283         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4284                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4285                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4286                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4287                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4288                 break;
4289
4290         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4291                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4292                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4293                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4294                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4295                 break;
4296
4297         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4298                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4299                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4300                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4301                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4302                 break;
4303
4304         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4305                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4306                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4307                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4308                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4309                 break;
4310
4311         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4312                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4313                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4314                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4315                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4316                 break;
4317
4318         case GAUDI_QUEUE_ID_CPU_PQ:
4319                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4320                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4321                 else
4322                         invalid_queue = true;
4323                 break;
4324
4325         case GAUDI_QUEUE_ID_MME_0_0:
4326                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4327                 break;
4328
4329         case GAUDI_QUEUE_ID_MME_0_1:
4330                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4331                 break;
4332
4333         case GAUDI_QUEUE_ID_MME_0_2:
4334                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4335                 break;
4336
4337         case GAUDI_QUEUE_ID_MME_0_3:
4338                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4339                 break;
4340
4341         case GAUDI_QUEUE_ID_MME_1_0:
4342                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4343                 break;
4344
4345         case GAUDI_QUEUE_ID_MME_1_1:
4346                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4347                 break;
4348
4349         case GAUDI_QUEUE_ID_MME_1_2:
4350                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4351                 break;
4352
4353         case GAUDI_QUEUE_ID_MME_1_3:
4354                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4355                 break;
4356
4357         case GAUDI_QUEUE_ID_TPC_0_0:
4358                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4359                 break;
4360
4361         case GAUDI_QUEUE_ID_TPC_0_1:
4362                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4363                 break;
4364
4365         case GAUDI_QUEUE_ID_TPC_0_2:
4366                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4367                 break;
4368
4369         case GAUDI_QUEUE_ID_TPC_0_3:
4370                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4371                 break;
4372
4373         case GAUDI_QUEUE_ID_TPC_1_0:
4374                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4375                 break;
4376
4377         case GAUDI_QUEUE_ID_TPC_1_1:
4378                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4379                 break;
4380
4381         case GAUDI_QUEUE_ID_TPC_1_2:
4382                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4383                 break;
4384
4385         case GAUDI_QUEUE_ID_TPC_1_3:
4386                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4387                 break;
4388
4389         case GAUDI_QUEUE_ID_TPC_2_0:
4390                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4391                 break;
4392
4393         case GAUDI_QUEUE_ID_TPC_2_1:
4394                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4395                 break;
4396
4397         case GAUDI_QUEUE_ID_TPC_2_2:
4398                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4399                 break;
4400
4401         case GAUDI_QUEUE_ID_TPC_2_3:
4402                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4403                 break;
4404
4405         case GAUDI_QUEUE_ID_TPC_3_0:
4406                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4407                 break;
4408
4409         case GAUDI_QUEUE_ID_TPC_3_1:
4410                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4411                 break;
4412
4413         case GAUDI_QUEUE_ID_TPC_3_2:
4414                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4415                 break;
4416
4417         case GAUDI_QUEUE_ID_TPC_3_3:
4418                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4419                 break;
4420
4421         case GAUDI_QUEUE_ID_TPC_4_0:
4422                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4423                 break;
4424
4425         case GAUDI_QUEUE_ID_TPC_4_1:
4426                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4427                 break;
4428
4429         case GAUDI_QUEUE_ID_TPC_4_2:
4430                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4431                 break;
4432
4433         case GAUDI_QUEUE_ID_TPC_4_3:
4434                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4435                 break;
4436
4437         case GAUDI_QUEUE_ID_TPC_5_0:
4438                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4439                 break;
4440
4441         case GAUDI_QUEUE_ID_TPC_5_1:
4442                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4443                 break;
4444
4445         case GAUDI_QUEUE_ID_TPC_5_2:
4446                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4447                 break;
4448
4449         case GAUDI_QUEUE_ID_TPC_5_3:
4450                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4451                 break;
4452
4453         case GAUDI_QUEUE_ID_TPC_6_0:
4454                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4455                 break;
4456
4457         case GAUDI_QUEUE_ID_TPC_6_1:
4458                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4459                 break;
4460
4461         case GAUDI_QUEUE_ID_TPC_6_2:
4462                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4463                 break;
4464
4465         case GAUDI_QUEUE_ID_TPC_6_3:
4466                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4467                 break;
4468
4469         case GAUDI_QUEUE_ID_TPC_7_0:
4470                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4471                 break;
4472
4473         case GAUDI_QUEUE_ID_TPC_7_1:
4474                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4475                 break;
4476
4477         case GAUDI_QUEUE_ID_TPC_7_2:
4478                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4479                 break;
4480
4481         case GAUDI_QUEUE_ID_TPC_7_3:
4482                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4483                 break;
4484
4485         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4486                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4487                         invalid_queue = true;
4488
4489                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4490                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4491                 break;
4492
4493         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4494                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4495                         invalid_queue = true;
4496
4497                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4498                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4499                 break;
4500
4501         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4502                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4503                         invalid_queue = true;
4504
4505                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4506                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4507                 break;
4508
4509         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4510                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4511                         invalid_queue = true;
4512
4513                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4514                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4515                 break;
4516
4517         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4518                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4519                         invalid_queue = true;
4520
4521                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4522                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4523                 break;
4524
4525         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4526                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4527                         invalid_queue = true;
4528
4529                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4530                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4531                 break;
4532
4533         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4534                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4535                         invalid_queue = true;
4536
4537                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4538                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4539                 break;
4540
4541         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4542                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4543                         invalid_queue = true;
4544
4545                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4546                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4547                 break;
4548
4549         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4550                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4551                         invalid_queue = true;
4552
4553                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4554                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4555                 break;
4556
4557         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4558                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4559                         invalid_queue = true;
4560
4561                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4562                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4563                 break;
4564
4565         default:
4566                 invalid_queue = true;
4567         }
4568
4569         if (invalid_queue) {
4570                 /* Should never get here */
4571                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4572                         hw_queue_id);
4573                 return;
4574         }
4575
4576         db_value = pi;
4577
4578         /* ring the doorbell */
4579         WREG32(db_reg_offset, db_value);
4580
4581         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4582                 /* make sure device CPU will read latest data from host */
4583                 mb();
4584
4585                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4586                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4587                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4588
4589                 WREG32(irq_handler_offset,
4590                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4591         }
4592 }
4593
4594 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4595                                 struct hl_bd *bd)
4596 {
4597         __le64 *pbd = (__le64 *) bd;
4598
4599         /* The QMANs are on the host memory so a simple copy suffice */
4600         pqe[0] = pbd[0];
4601         pqe[1] = pbd[1];
4602 }
4603
4604 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4605                                         dma_addr_t *dma_handle, gfp_t flags)
4606 {
4607         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4608                                                 dma_handle, flags);
4609
4610         /* Shift to the device's base physical address of host memory */
4611         if (kernel_addr)
4612                 *dma_handle += HOST_PHYS_BASE;
4613
4614         return kernel_addr;
4615 }
4616
4617 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4618                 void *cpu_addr, dma_addr_t dma_handle)
4619 {
4620         /* Cancel the device's base physical address of host memory */
4621         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4622
4623         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4624 }
4625
4626 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4627 {
4628         struct asic_fixed_properties *prop = &hdev->asic_prop;
4629         u64 cur_addr = prop->dram_user_base_address;
4630         u32 chunk_size, busy;
4631         int rc, dma_id;
4632
4633         while (cur_addr < prop->dram_end_address) {
4634                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4635                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4636
4637                         chunk_size =
4638                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4639
4640                         dev_dbg(hdev->dev,
4641                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4642                                 cur_addr, cur_addr + chunk_size);
4643
4644                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4645                                         lower_32_bits(val));
4646                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4647                                         upper_32_bits(val));
4648                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4649                                                 lower_32_bits(cur_addr));
4650                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4651                                                 upper_32_bits(cur_addr));
4652                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4653                                         chunk_size);
4654                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4655                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4656                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4657
4658                         cur_addr += chunk_size;
4659
4660                         if (cur_addr == prop->dram_end_address)
4661                                 break;
4662                 }
4663
4664                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4665                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4666
4667                         rc = hl_poll_timeout(
4668                                 hdev,
4669                                 mmDMA0_CORE_STS0 + dma_offset,
4670                                 busy,
4671                                 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4672                                 1000,
4673                                 HBM_SCRUBBING_TIMEOUT_US);
4674
4675                         if (rc) {
4676                                 dev_err(hdev->dev,
4677                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4678                                         dma_id);
4679                                 return -EIO;
4680                         }
4681                 }
4682         }
4683
4684         return 0;
4685 }
4686
4687 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4688 {
4689         struct asic_fixed_properties *prop = &hdev->asic_prop;
4690         u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4691                         min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4692         u64 addr, size, val = hdev->memory_scrub_val;
4693         ktime_t timeout;
4694         int rc = 0;
4695
4696         if (!hdev->memory_scrub)
4697                 return 0;
4698
4699         timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4700         while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4701                 if (ktime_compare(ktime_get(), timeout) > 0) {
4702                         dev_err(hdev->dev, "waiting for idle timeout\n");
4703                         return -ETIMEDOUT;
4704                 }
4705                 usleep_range((1000 >> 2) + 1, 1000);
4706         }
4707
4708         /* Scrub SRAM */
4709         addr = prop->sram_user_base_address;
4710         size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4711
4712         dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4713                         addr, addr + size, val);
4714         rc = gaudi_memset_device_memory(hdev, addr, size, val);
4715         if (rc) {
4716                 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4717                 return rc;
4718         }
4719
4720         /* Scrub HBM using all DMA channels in parallel */
4721         rc = gaudi_scrub_device_dram(hdev, val);
4722         if (rc) {
4723                 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4724                 return rc;
4725         }
4726
4727         return 0;
4728 }
4729
4730 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4731                                 u32 queue_id, dma_addr_t *dma_handle,
4732                                 u16 *queue_len)
4733 {
4734         struct gaudi_device *gaudi = hdev->asic_specific;
4735         struct gaudi_internal_qman_info *q;
4736
4737         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4738                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4739                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4740                 return NULL;
4741         }
4742
4743         q = &gaudi->internal_qmans[queue_id];
4744         *dma_handle = q->pq_dma_addr;
4745         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4746
4747         return q->pq_kernel_addr;
4748 }
4749
4750 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4751                                 u16 len, u32 timeout, u64 *result)
4752 {
4753         struct gaudi_device *gaudi = hdev->asic_specific;
4754
4755         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4756                 if (result)
4757                         *result = 0;
4758                 return 0;
4759         }
4760
4761         if (!timeout)
4762                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4763
4764         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4765                                                 timeout, result);
4766 }
4767
4768 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4769 {
4770         struct packet_msg_prot *fence_pkt;
4771         dma_addr_t pkt_dma_addr;
4772         u32 fence_val, tmp, timeout_usec;
4773         dma_addr_t fence_dma_addr;
4774         u32 *fence_ptr;
4775         int rc;
4776
4777         if (hdev->pldm)
4778                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4779         else
4780                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4781
4782         fence_val = GAUDI_QMAN0_FENCE_VAL;
4783
4784         fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4785         if (!fence_ptr) {
4786                 dev_err(hdev->dev,
4787                         "Failed to allocate memory for H/W queue %d testing\n",
4788                         hw_queue_id);
4789                 return -ENOMEM;
4790         }
4791
4792         *fence_ptr = 0;
4793
4794         fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4795                                                 &pkt_dma_addr);
4796         if (!fence_pkt) {
4797                 dev_err(hdev->dev,
4798                         "Failed to allocate packet for H/W queue %d testing\n",
4799                         hw_queue_id);
4800                 rc = -ENOMEM;
4801                 goto free_fence_ptr;
4802         }
4803
4804         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4805         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4806         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4807
4808         fence_pkt->ctl = cpu_to_le32(tmp);
4809         fence_pkt->value = cpu_to_le32(fence_val);
4810         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4811
4812         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4813                                         sizeof(struct packet_msg_prot),
4814                                         pkt_dma_addr);
4815         if (rc) {
4816                 dev_err(hdev->dev,
4817                         "Failed to send fence packet to H/W queue %d\n",
4818                         hw_queue_id);
4819                 goto free_pkt;
4820         }
4821
4822         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4823                                         1000, timeout_usec, true);
4824
4825         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4826
4827         if (rc == -ETIMEDOUT) {
4828                 dev_err(hdev->dev,
4829                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4830                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4831                 rc = -EIO;
4832         }
4833
4834 free_pkt:
4835         hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4836 free_fence_ptr:
4837         hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4838         return rc;
4839 }
4840
4841 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4842 {
4843         struct gaudi_device *gaudi = hdev->asic_specific;
4844
4845         /*
4846          * check capability here as send_cpu_message() won't update the result
4847          * value if no capability
4848          */
4849         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4850                 return 0;
4851
4852         return hl_fw_test_cpu_queue(hdev);
4853 }
4854
4855 static int gaudi_test_queues(struct hl_device *hdev)
4856 {
4857         int i, rc, ret_val = 0;
4858
4859         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4860                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4861                         rc = gaudi_test_queue(hdev, i);
4862                         if (rc)
4863                                 ret_val = -EINVAL;
4864                 }
4865         }
4866
4867         rc = gaudi_test_cpu_queue(hdev);
4868         if (rc)
4869                 ret_val = -EINVAL;
4870
4871         return ret_val;
4872 }
4873
4874 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4875                 gfp_t mem_flags, dma_addr_t *dma_handle)
4876 {
4877         void *kernel_addr;
4878
4879         if (size > GAUDI_DMA_POOL_BLK_SIZE)
4880                 return NULL;
4881
4882         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4883
4884         /* Shift to the device's base physical address of host memory */
4885         if (kernel_addr)
4886                 *dma_handle += HOST_PHYS_BASE;
4887
4888         return kernel_addr;
4889 }
4890
4891 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4892                         dma_addr_t dma_addr)
4893 {
4894         /* Cancel the device's base physical address of host memory */
4895         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4896
4897         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4898 }
4899
4900 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4901                                         size_t size, dma_addr_t *dma_handle)
4902 {
4903         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4904 }
4905
4906 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4907                                                 size_t size, void *vaddr)
4908 {
4909         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4910 }
4911
4912 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4913 {
4914         struct scatterlist *sg, *sg_next_iter;
4915         u32 count, dma_desc_cnt;
4916         u64 len, len_next;
4917         dma_addr_t addr, addr_next;
4918
4919         dma_desc_cnt = 0;
4920
4921         for_each_sgtable_dma_sg(sgt, sg, count) {
4922                 len = sg_dma_len(sg);
4923                 addr = sg_dma_address(sg);
4924
4925                 if (len == 0)
4926                         break;
4927
4928                 while ((count + 1) < sgt->nents) {
4929                         sg_next_iter = sg_next(sg);
4930                         len_next = sg_dma_len(sg_next_iter);
4931                         addr_next = sg_dma_address(sg_next_iter);
4932
4933                         if (len_next == 0)
4934                                 break;
4935
4936                         if ((addr + len == addr_next) &&
4937                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4938                                 len += len_next;
4939                                 count++;
4940                                 sg = sg_next_iter;
4941                         } else {
4942                                 break;
4943                         }
4944                 }
4945
4946                 dma_desc_cnt++;
4947         }
4948
4949         return dma_desc_cnt * sizeof(struct packet_lin_dma);
4950 }
4951
4952 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4953                                 struct hl_cs_parser *parser,
4954                                 struct packet_lin_dma *user_dma_pkt,
4955                                 u64 addr, enum dma_data_direction dir)
4956 {
4957         struct hl_userptr *userptr;
4958         int rc;
4959
4960         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4961                         parser->job_userptr_list, &userptr))
4962                 goto already_pinned;
4963
4964         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4965         if (!userptr)
4966                 return -ENOMEM;
4967
4968         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4969                                 userptr);
4970         if (rc)
4971                 goto free_userptr;
4972
4973         list_add_tail(&userptr->job_node, parser->job_userptr_list);
4974
4975         rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4976         if (rc) {
4977                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4978                 goto unpin_memory;
4979         }
4980
4981         userptr->dma_mapped = true;
4982         userptr->dir = dir;
4983
4984 already_pinned:
4985         parser->patched_cb_size +=
4986                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4987
4988         return 0;
4989
4990 unpin_memory:
4991         list_del(&userptr->job_node);
4992         hl_unpin_host_memory(hdev, userptr);
4993 free_userptr:
4994         kfree(userptr);
4995         return rc;
4996 }
4997
4998 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4999                                 struct hl_cs_parser *parser,
5000                                 struct packet_lin_dma *user_dma_pkt,
5001                                 bool src_in_host)
5002 {
5003         enum dma_data_direction dir;
5004         bool skip_host_mem_pin = false, user_memset;
5005         u64 addr;
5006         int rc = 0;
5007
5008         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5009                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5010                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5011
5012         if (src_in_host) {
5013                 if (user_memset)
5014                         skip_host_mem_pin = true;
5015
5016                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5017                 dir = DMA_TO_DEVICE;
5018                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5019         } else {
5020                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5021                 dir = DMA_FROM_DEVICE;
5022                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5023                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5024                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5025         }
5026
5027         if (skip_host_mem_pin)
5028                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5029         else
5030                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5031                                                 addr, dir);
5032
5033         return rc;
5034 }
5035
5036 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5037                                 struct hl_cs_parser *parser,
5038                                 struct packet_lin_dma *user_dma_pkt)
5039 {
5040         bool src_in_host = false;
5041         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5042                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5043                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5044
5045         dev_dbg(hdev->dev, "DMA packet details:\n");
5046         dev_dbg(hdev->dev, "source == 0x%llx\n",
5047                                 le64_to_cpu(user_dma_pkt->src_addr));
5048         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5049         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5050
5051         /*
5052          * Special handling for DMA with size 0. Bypass all validations
5053          * because no transactions will be done except for WR_COMP, which
5054          * is not a security issue
5055          */
5056         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5057                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5058                 return 0;
5059         }
5060
5061         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5062                 src_in_host = true;
5063
5064         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5065                                                 src_in_host);
5066 }
5067
5068 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5069                                         struct hl_cs_parser *parser,
5070                                         struct packet_load_and_exe *user_pkt)
5071 {
5072         u32 cfg;
5073
5074         cfg = le32_to_cpu(user_pkt->cfg);
5075
5076         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5077                 dev_err(hdev->dev,
5078                         "User not allowed to use Load and Execute\n");
5079                 return -EPERM;
5080         }
5081
5082         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5083
5084         return 0;
5085 }
5086
5087 static int gaudi_validate_cb(struct hl_device *hdev,
5088                         struct hl_cs_parser *parser, bool is_mmu)
5089 {
5090         u32 cb_parsed_length = 0;
5091         int rc = 0;
5092
5093         parser->patched_cb_size = 0;
5094
5095         /* cb_user_size is more than 0 so loop will always be executed */
5096         while (cb_parsed_length < parser->user_cb_size) {
5097                 enum packet_id pkt_id;
5098                 u16 pkt_size;
5099                 struct gaudi_packet *user_pkt;
5100
5101                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5102
5103                 pkt_id = (enum packet_id) (
5104                                 (le64_to_cpu(user_pkt->header) &
5105                                 PACKET_HEADER_PACKET_ID_MASK) >>
5106                                         PACKET_HEADER_PACKET_ID_SHIFT);
5107
5108                 if (!validate_packet_id(pkt_id)) {
5109                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5110                         rc = -EINVAL;
5111                         break;
5112                 }
5113
5114                 pkt_size = gaudi_packet_sizes[pkt_id];
5115                 cb_parsed_length += pkt_size;
5116                 if (cb_parsed_length > parser->user_cb_size) {
5117                         dev_err(hdev->dev,
5118                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5119                         rc = -EINVAL;
5120                         break;
5121                 }
5122
5123                 switch (pkt_id) {
5124                 case PACKET_MSG_PROT:
5125                         dev_err(hdev->dev,
5126                                 "User not allowed to use MSG_PROT\n");
5127                         rc = -EPERM;
5128                         break;
5129
5130                 case PACKET_CP_DMA:
5131                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5132                         rc = -EPERM;
5133                         break;
5134
5135                 case PACKET_STOP:
5136                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5137                         rc = -EPERM;
5138                         break;
5139
5140                 case PACKET_WREG_BULK:
5141                         dev_err(hdev->dev,
5142                                 "User not allowed to use WREG_BULK\n");
5143                         rc = -EPERM;
5144                         break;
5145
5146                 case PACKET_LOAD_AND_EXE:
5147                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5148                                 (struct packet_load_and_exe *) user_pkt);
5149                         break;
5150
5151                 case PACKET_LIN_DMA:
5152                         parser->contains_dma_pkt = true;
5153                         if (is_mmu)
5154                                 parser->patched_cb_size += pkt_size;
5155                         else
5156                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5157                                         (struct packet_lin_dma *) user_pkt);
5158                         break;
5159
5160                 case PACKET_WREG_32:
5161                 case PACKET_MSG_LONG:
5162                 case PACKET_MSG_SHORT:
5163                 case PACKET_REPEAT:
5164                 case PACKET_FENCE:
5165                 case PACKET_NOP:
5166                 case PACKET_ARB_POINT:
5167                         parser->patched_cb_size += pkt_size;
5168                         break;
5169
5170                 default:
5171                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5172                                 pkt_id);
5173                         rc = -EINVAL;
5174                         break;
5175                 }
5176
5177                 if (rc)
5178                         break;
5179         }
5180
5181         /*
5182          * The new CB should have space at the end for two MSG_PROT packets:
5183          * 1. Optional NOP padding for cacheline alignment
5184          * 2. A packet that will act as a completion packet
5185          * 3. A packet that will generate MSI interrupt
5186          */
5187         if (parser->completion)
5188                 parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5189                         parser->patched_cb_size);
5190
5191         return rc;
5192 }
5193
5194 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5195                                 struct hl_cs_parser *parser,
5196                                 struct packet_lin_dma *user_dma_pkt,
5197                                 struct packet_lin_dma *new_dma_pkt,
5198                                 u32 *new_dma_pkt_size)
5199 {
5200         struct hl_userptr *userptr;
5201         struct scatterlist *sg, *sg_next_iter;
5202         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5203         u64 len, len_next;
5204         dma_addr_t dma_addr, dma_addr_next;
5205         u64 device_memory_addr, addr;
5206         enum dma_data_direction dir;
5207         struct sg_table *sgt;
5208         bool src_in_host = false;
5209         bool skip_host_mem_pin = false;
5210         bool user_memset;
5211
5212         ctl = le32_to_cpu(user_dma_pkt->ctl);
5213
5214         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5215                 src_in_host = true;
5216
5217         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5218                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5219
5220         if (src_in_host) {
5221                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5222                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5223                 dir = DMA_TO_DEVICE;
5224                 if (user_memset)
5225                         skip_host_mem_pin = true;
5226         } else {
5227                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5228                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5229                 dir = DMA_FROM_DEVICE;
5230         }
5231
5232         if ((!skip_host_mem_pin) &&
5233                 (!hl_userptr_is_pinned(hdev, addr,
5234                                         le32_to_cpu(user_dma_pkt->tsize),
5235                                         parser->job_userptr_list, &userptr))) {
5236                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5237                                 addr, user_dma_pkt->tsize);
5238                 return -EFAULT;
5239         }
5240
5241         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5242                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5243                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5244                 return 0;
5245         }
5246
5247         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5248
5249         sgt = userptr->sgt;
5250         dma_desc_cnt = 0;
5251
5252         for_each_sgtable_dma_sg(sgt, sg, count) {
5253                 len = sg_dma_len(sg);
5254                 dma_addr = sg_dma_address(sg);
5255
5256                 if (len == 0)
5257                         break;
5258
5259                 while ((count + 1) < sgt->nents) {
5260                         sg_next_iter = sg_next(sg);
5261                         len_next = sg_dma_len(sg_next_iter);
5262                         dma_addr_next = sg_dma_address(sg_next_iter);
5263
5264                         if (len_next == 0)
5265                                 break;
5266
5267                         if ((dma_addr + len == dma_addr_next) &&
5268                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5269                                 len += len_next;
5270                                 count++;
5271                                 sg = sg_next_iter;
5272                         } else {
5273                                 break;
5274                         }
5275                 }
5276
5277                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5278                 if (likely(dma_desc_cnt))
5279                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5280                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5281                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5282                 new_dma_pkt->tsize = cpu_to_le32(len);
5283
5284                 if (dir == DMA_TO_DEVICE) {
5285                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5286                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5287                 } else {
5288                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5289                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5290                 }
5291
5292                 if (!user_memset)
5293                         device_memory_addr += len;
5294                 dma_desc_cnt++;
5295                 new_dma_pkt++;
5296         }
5297
5298         if (!dma_desc_cnt) {
5299                 dev_err(hdev->dev,
5300                         "Error of 0 SG entries when patching DMA packet\n");
5301                 return -EFAULT;
5302         }
5303
5304         /* Fix the last dma packet - wrcomp must be as user set it */
5305         new_dma_pkt--;
5306         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5307
5308         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5309
5310         return 0;
5311 }
5312
5313 static int gaudi_patch_cb(struct hl_device *hdev,
5314                                 struct hl_cs_parser *parser)
5315 {
5316         u32 cb_parsed_length = 0;
5317         u32 cb_patched_cur_length = 0;
5318         int rc = 0;
5319
5320         /* cb_user_size is more than 0 so loop will always be executed */
5321         while (cb_parsed_length < parser->user_cb_size) {
5322                 enum packet_id pkt_id;
5323                 u16 pkt_size;
5324                 u32 new_pkt_size = 0;
5325                 struct gaudi_packet *user_pkt, *kernel_pkt;
5326
5327                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5328                 kernel_pkt = parser->patched_cb->kernel_address +
5329                                         cb_patched_cur_length;
5330
5331                 pkt_id = (enum packet_id) (
5332                                 (le64_to_cpu(user_pkt->header) &
5333                                 PACKET_HEADER_PACKET_ID_MASK) >>
5334                                         PACKET_HEADER_PACKET_ID_SHIFT);
5335
5336                 if (!validate_packet_id(pkt_id)) {
5337                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5338                         rc = -EINVAL;
5339                         break;
5340                 }
5341
5342                 pkt_size = gaudi_packet_sizes[pkt_id];
5343                 cb_parsed_length += pkt_size;
5344                 if (cb_parsed_length > parser->user_cb_size) {
5345                         dev_err(hdev->dev,
5346                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5347                         rc = -EINVAL;
5348                         break;
5349                 }
5350
5351                 switch (pkt_id) {
5352                 case PACKET_LIN_DMA:
5353                         rc = gaudi_patch_dma_packet(hdev, parser,
5354                                         (struct packet_lin_dma *) user_pkt,
5355                                         (struct packet_lin_dma *) kernel_pkt,
5356                                         &new_pkt_size);
5357                         cb_patched_cur_length += new_pkt_size;
5358                         break;
5359
5360                 case PACKET_MSG_PROT:
5361                         dev_err(hdev->dev,
5362                                 "User not allowed to use MSG_PROT\n");
5363                         rc = -EPERM;
5364                         break;
5365
5366                 case PACKET_CP_DMA:
5367                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5368                         rc = -EPERM;
5369                         break;
5370
5371                 case PACKET_STOP:
5372                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5373                         rc = -EPERM;
5374                         break;
5375
5376                 case PACKET_WREG_32:
5377                 case PACKET_WREG_BULK:
5378                 case PACKET_MSG_LONG:
5379                 case PACKET_MSG_SHORT:
5380                 case PACKET_REPEAT:
5381                 case PACKET_FENCE:
5382                 case PACKET_NOP:
5383                 case PACKET_ARB_POINT:
5384                 case PACKET_LOAD_AND_EXE:
5385                         memcpy(kernel_pkt, user_pkt, pkt_size);
5386                         cb_patched_cur_length += pkt_size;
5387                         break;
5388
5389                 default:
5390                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5391                                 pkt_id);
5392                         rc = -EINVAL;
5393                         break;
5394                 }
5395
5396                 if (rc)
5397                         break;
5398         }
5399
5400         return rc;
5401 }
5402
5403 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5404                 struct hl_cs_parser *parser)
5405 {
5406         u64 handle;
5407         u32 patched_cb_size;
5408         struct hl_cb *user_cb;
5409         int rc;
5410
5411         /*
5412          * The new CB should have space at the end for two MSG_PROT packets:
5413          * 1. Optional NOP padding for cacheline alignment
5414          * 2. A packet that will act as a completion packet
5415          * 3. A packet that will generate MSI interrupt
5416          */
5417         if (parser->completion)
5418                 parser->patched_cb_size = parser->user_cb_size +
5419                                 gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5420         else
5421                 parser->patched_cb_size = parser->user_cb_size;
5422
5423         rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5424                                 parser->patched_cb_size, false, false,
5425                                 &handle);
5426
5427         if (rc) {
5428                 dev_err(hdev->dev,
5429                         "Failed to allocate patched CB for DMA CS %d\n",
5430                         rc);
5431                 return rc;
5432         }
5433
5434         parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5435         /* hl_cb_get should never fail */
5436         if (!parser->patched_cb) {
5437                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5438                 rc = -EFAULT;
5439                 goto out;
5440         }
5441
5442         /*
5443          * We are protected from overflow because the check
5444          * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5445          * in the common code. That check is done only if is_kernel_allocated_cb is true.
5446          *
5447          * There is no option to reach here without going through that check because:
5448          * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5449          *    an external queue.
5450          * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5451          */
5452         memcpy(parser->patched_cb->kernel_address,
5453                 parser->user_cb->kernel_address,
5454                 parser->user_cb_size);
5455
5456         patched_cb_size = parser->patched_cb_size;
5457
5458         /* Validate patched CB instead of user CB */
5459         user_cb = parser->user_cb;
5460         parser->user_cb = parser->patched_cb;
5461         rc = gaudi_validate_cb(hdev, parser, true);
5462         parser->user_cb = user_cb;
5463
5464         if (rc) {
5465                 hl_cb_put(parser->patched_cb);
5466                 goto out;
5467         }
5468
5469         if (patched_cb_size != parser->patched_cb_size) {
5470                 dev_err(hdev->dev, "user CB size mismatch\n");
5471                 hl_cb_put(parser->patched_cb);
5472                 rc = -EINVAL;
5473                 goto out;
5474         }
5475
5476 out:
5477         /*
5478          * Always call cb destroy here because we still have 1 reference
5479          * to it by calling cb_get earlier. After the job will be completed,
5480          * cb_put will release it, but here we want to remove it from the
5481          * idr
5482          */
5483         hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5484
5485         return rc;
5486 }
5487
5488 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5489                 struct hl_cs_parser *parser)
5490 {
5491         u64 handle;
5492         int rc;
5493
5494         rc = gaudi_validate_cb(hdev, parser, false);
5495
5496         if (rc)
5497                 goto free_userptr;
5498
5499         rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5500                                 parser->patched_cb_size, false, false,
5501                                 &handle);
5502         if (rc) {
5503                 dev_err(hdev->dev,
5504                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5505                 goto free_userptr;
5506         }
5507
5508         parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5509         /* hl_cb_get should never fail here */
5510         if (!parser->patched_cb) {
5511                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5512                 rc = -EFAULT;
5513                 goto out;
5514         }
5515
5516         rc = gaudi_patch_cb(hdev, parser);
5517
5518         if (rc)
5519                 hl_cb_put(parser->patched_cb);
5520
5521 out:
5522         /*
5523          * Always call cb destroy here because we still have 1 reference
5524          * to it by calling cb_get earlier. After the job will be completed,
5525          * cb_put will release it, but here we want to remove it from the
5526          * idr
5527          */
5528         hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5529
5530 free_userptr:
5531         if (rc)
5532                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5533         return rc;
5534 }
5535
5536 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5537                                         struct hl_cs_parser *parser)
5538 {
5539         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5540         struct gaudi_device *gaudi = hdev->asic_specific;
5541         u32 nic_queue_offset, nic_mask_q_id;
5542
5543         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5544                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5545                 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5546                 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5547
5548                 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5549                         dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5550                         return -EINVAL;
5551                 }
5552         }
5553
5554         /* For internal queue jobs just check if CB address is valid */
5555         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5556                                         parser->user_cb_size,
5557                                         asic_prop->sram_user_base_address,
5558                                         asic_prop->sram_end_address))
5559                 return 0;
5560
5561         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5562                                         parser->user_cb_size,
5563                                         asic_prop->dram_user_base_address,
5564                                         asic_prop->dram_end_address))
5565                 return 0;
5566
5567         /* PMMU and HPMMU addresses are equal, check only one of them */
5568         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5569                                         parser->user_cb_size,
5570                                         asic_prop->pmmu.start_addr,
5571                                         asic_prop->pmmu.end_addr))
5572                 return 0;
5573
5574         dev_err(hdev->dev,
5575                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5576                 parser->user_cb, parser->user_cb_size);
5577
5578         return -EFAULT;
5579 }
5580
5581 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5582 {
5583         struct gaudi_device *gaudi = hdev->asic_specific;
5584
5585         if (parser->queue_type == QUEUE_TYPE_INT)
5586                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5587
5588         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5589                 return gaudi_parse_cb_mmu(hdev, parser);
5590         else
5591                 return gaudi_parse_cb_no_mmu(hdev, parser);
5592 }
5593
5594 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5595                                 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5596                                 u32 msi_vec, bool eb)
5597 {
5598         struct gaudi_device *gaudi = hdev->asic_specific;
5599         struct packet_msg_prot *cq_pkt;
5600         struct packet_nop *cq_padding;
5601         u64 msi_addr;
5602         u32 tmp;
5603
5604         cq_padding = kernel_address + original_len;
5605         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5606
5607         while ((void *)cq_padding < (void *)cq_pkt) {
5608                 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5609                 cq_padding++;
5610         }
5611
5612         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5613         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5614
5615         if (eb)
5616                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5617
5618         cq_pkt->ctl = cpu_to_le32(tmp);
5619         cq_pkt->value = cpu_to_le32(cq_val);
5620         cq_pkt->addr = cpu_to_le64(cq_addr);
5621
5622         cq_pkt++;
5623
5624         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5625         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5626         cq_pkt->ctl = cpu_to_le32(tmp);
5627         cq_pkt->value = cpu_to_le32(1);
5628
5629         if (gaudi->multi_msi_mode)
5630                 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5631         else
5632                 msi_addr = mmPCIE_CORE_MSI_REQ;
5633
5634         cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5635 }
5636
5637 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5638 {
5639         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5640 }
5641
5642 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5643                                         u32 size, u64 val)
5644 {
5645         struct packet_lin_dma *lin_dma_pkt;
5646         struct hl_cs_job *job;
5647         u32 cb_size, ctl, err_cause;
5648         struct hl_cb *cb;
5649         int rc;
5650
5651         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5652         if (!cb)
5653                 return -EFAULT;
5654
5655         lin_dma_pkt = cb->kernel_address;
5656         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5657         cb_size = sizeof(*lin_dma_pkt);
5658
5659         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5660         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5661         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5662         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5663         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5664
5665         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5666         lin_dma_pkt->src_addr = cpu_to_le64(val);
5667         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5668         lin_dma_pkt->tsize = cpu_to_le32(size);
5669
5670         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5671         if (!job) {
5672                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5673                 rc = -ENOMEM;
5674                 goto release_cb;
5675         }
5676
5677         /* Verify DMA is OK */
5678         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5679         if (err_cause && !hdev->init_done) {
5680                 dev_dbg(hdev->dev,
5681                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5682                         err_cause);
5683                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5684         }
5685
5686         job->id = 0;
5687         job->user_cb = cb;
5688         atomic_inc(&job->user_cb->cs_cnt);
5689         job->user_cb_size = cb_size;
5690         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5691         job->patched_cb = job->user_cb;
5692         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5693
5694         hl_debugfs_add_job(hdev, job);
5695
5696         rc = gaudi_send_job_on_qman0(hdev, job);
5697         hl_debugfs_remove_job(hdev, job);
5698         kfree(job);
5699         atomic_dec(&cb->cs_cnt);
5700
5701         /* Verify DMA is OK */
5702         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5703         if (err_cause) {
5704                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5705                 rc = -EIO;
5706                 if (!hdev->init_done) {
5707                         dev_dbg(hdev->dev,
5708                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5709                                 err_cause);
5710                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5711                 }
5712         }
5713
5714 release_cb:
5715         hl_cb_put(cb);
5716         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5717
5718         return rc;
5719 }
5720
5721 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5722                                         u32 num_regs, u32 val)
5723 {
5724         struct packet_msg_long *pkt;
5725         struct hl_cs_job *job;
5726         u32 cb_size, ctl;
5727         struct hl_cb *cb;
5728         int i, rc;
5729
5730         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5731
5732         if (cb_size > SZ_2M) {
5733                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5734                 return -ENOMEM;
5735         }
5736
5737         cb = hl_cb_kernel_create(hdev, cb_size, false);
5738         if (!cb)
5739                 return -EFAULT;
5740
5741         pkt = cb->kernel_address;
5742
5743         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5744         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5745         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5746         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5747         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5748
5749         for (i = 0; i < num_regs ; i++, pkt++) {
5750                 pkt->ctl = cpu_to_le32(ctl);
5751                 pkt->value = cpu_to_le32(val);
5752                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5753         }
5754
5755         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5756         if (!job) {
5757                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5758                 rc = -ENOMEM;
5759                 goto release_cb;
5760         }
5761
5762         job->id = 0;
5763         job->user_cb = cb;
5764         atomic_inc(&job->user_cb->cs_cnt);
5765         job->user_cb_size = cb_size;
5766         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5767         job->patched_cb = job->user_cb;
5768         job->job_cb_size = cb_size;
5769
5770         hl_debugfs_add_job(hdev, job);
5771
5772         rc = gaudi_send_job_on_qman0(hdev, job);
5773         hl_debugfs_remove_job(hdev, job);
5774         kfree(job);
5775         atomic_dec(&cb->cs_cnt);
5776
5777 release_cb:
5778         hl_cb_put(cb);
5779         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5780
5781         return rc;
5782 }
5783
5784 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5785 {
5786         u64 base_addr;
5787         u32 num_regs;
5788         int rc;
5789
5790         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5791         num_regs = NUM_OF_SOB_IN_BLOCK;
5792         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5793         if (rc) {
5794                 dev_err(hdev->dev, "failed resetting SM registers");
5795                 return -ENOMEM;
5796         }
5797
5798         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5799         num_regs = NUM_OF_SOB_IN_BLOCK;
5800         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5801         if (rc) {
5802                 dev_err(hdev->dev, "failed resetting SM registers");
5803                 return -ENOMEM;
5804         }
5805
5806         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5807         num_regs = NUM_OF_SOB_IN_BLOCK;
5808         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5809         if (rc) {
5810                 dev_err(hdev->dev, "failed resetting SM registers");
5811                 return -ENOMEM;
5812         }
5813
5814         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5815         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5816         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5817         if (rc) {
5818                 dev_err(hdev->dev, "failed resetting SM registers");
5819                 return -ENOMEM;
5820         }
5821
5822         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5823         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5824         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5825         if (rc) {
5826                 dev_err(hdev->dev, "failed resetting SM registers");
5827                 return -ENOMEM;
5828         }
5829
5830         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5831         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5832         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5833         if (rc) {
5834                 dev_err(hdev->dev, "failed resetting SM registers");
5835                 return -ENOMEM;
5836         }
5837
5838         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5839                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5840         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5841         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5842         if (rc) {
5843                 dev_err(hdev->dev, "failed resetting SM registers");
5844                 return -ENOMEM;
5845         }
5846
5847         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5848                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5849         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5850         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5851         if (rc) {
5852                 dev_err(hdev->dev, "failed resetting SM registers");
5853                 return -ENOMEM;
5854         }
5855
5856         return 0;
5857 }
5858
5859 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5860 {
5861         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5862                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5863         int i;
5864
5865         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5866                 u64 sob_addr = CFG_BASE +
5867                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5868                                 (i * sob_delta);
5869                 u32 dma_offset = i * DMA_CORE_OFFSET;
5870
5871                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5872                                 lower_32_bits(sob_addr));
5873                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5874                                 upper_32_bits(sob_addr));
5875                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5876
5877                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5878                  * modified by the user for SRAM reduction
5879                  */
5880                 if (i > 1)
5881                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5882                                                                 0x00000001);
5883         }
5884 }
5885
5886 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5887 {
5888         u32 qman_offset;
5889         int i;
5890
5891         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5892                 qman_offset = i * DMA_QMAN_OFFSET;
5893                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5894         }
5895
5896         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5897                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5898                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5899         }
5900
5901         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5902                 qman_offset = i * TPC_QMAN_OFFSET;
5903                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5904         }
5905
5906         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5907                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5908                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5909                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5910         }
5911 }
5912
5913 static int gaudi_restore_user_registers(struct hl_device *hdev)
5914 {
5915         int rc;
5916
5917         rc = gaudi_restore_sm_registers(hdev);
5918         if (rc)
5919                 return rc;
5920
5921         gaudi_restore_dma_registers(hdev);
5922         gaudi_restore_qm_registers(hdev);
5923
5924         return 0;
5925 }
5926
5927 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5928 {
5929         return 0;
5930 }
5931
5932 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5933 {
5934         u32 size = hdev->asic_prop.mmu_pgt_size +
5935                         hdev->asic_prop.mmu_cache_mng_size;
5936         struct gaudi_device *gaudi = hdev->asic_specific;
5937         u64 addr = hdev->asic_prop.mmu_pgt_addr;
5938
5939         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5940                 return 0;
5941
5942         return gaudi_memset_device_memory(hdev, addr, size, 0);
5943 }
5944
5945 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5946 {
5947
5948 }
5949
5950 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5951                                         u32 size_to_dma, dma_addr_t dma_addr)
5952 {
5953         u32 err_cause, val;
5954         u64 dma_offset;
5955         int rc;
5956
5957         dma_offset = dma_id * DMA_CORE_OFFSET;
5958
5959         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5960         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5961         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5962         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5963         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5964         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5965                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5966
5967         rc = hl_poll_timeout(
5968                 hdev,
5969                 mmDMA0_CORE_STS0 + dma_offset,
5970                 val,
5971                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5972                 0,
5973                 1000000);
5974
5975         if (rc) {
5976                 dev_err(hdev->dev,
5977                         "DMA %d timed-out during reading of 0x%llx\n",
5978                         dma_id, addr);
5979                 return -EIO;
5980         }
5981
5982         /* Verify DMA is OK */
5983         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5984         if (err_cause) {
5985                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5986                 dev_dbg(hdev->dev,
5987                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5988                         err_cause);
5989                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5990
5991                 return -EIO;
5992         }
5993
5994         return 0;
5995 }
5996
5997 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5998                                 void *blob_addr)
5999 {
6000         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6001         u32 qm_glbl_sts0, qm_cgm_sts;
6002         u64 dma_offset, qm_offset;
6003         dma_addr_t dma_addr;
6004         void *kernel_addr;
6005         bool is_eng_idle;
6006         int rc = 0, dma_id;
6007
6008         kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
6009
6010         if (!kernel_addr)
6011                 return -ENOMEM;
6012
6013         hdev->asic_funcs->hw_queues_lock(hdev);
6014
6015         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6016         dma_offset = dma_id * DMA_CORE_OFFSET;
6017         qm_offset = dma_id * DMA_QMAN_OFFSET;
6018         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6019         qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6020         qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6021         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6022                       IS_DMA_IDLE(dma_core_sts0);
6023
6024         if (!is_eng_idle) {
6025                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6026                 dma_offset = dma_id * DMA_CORE_OFFSET;
6027                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6028                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6029                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6030                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6031                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6032                               IS_DMA_IDLE(dma_core_sts0);
6033
6034                 if (!is_eng_idle) {
6035                         dev_err_ratelimited(hdev->dev,
6036                                 "Can't read via DMA because it is BUSY\n");
6037                         rc = -EAGAIN;
6038                         goto out;
6039                 }
6040         }
6041
6042         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6043         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6044                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6045
6046         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6047          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6048          * ASID
6049          */
6050         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6051
6052         /* Verify DMA is OK */
6053         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6054         if (err_cause) {
6055                 dev_dbg(hdev->dev,
6056                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6057                         err_cause);
6058                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6059         }
6060
6061         pos = 0;
6062         size_left = size;
6063         size_to_dma = SZ_2M;
6064
6065         while (size_left > 0) {
6066
6067                 if (size_left < SZ_2M)
6068                         size_to_dma = size_left;
6069
6070                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6071                                                 dma_addr);
6072                 if (rc)
6073                         break;
6074
6075                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6076
6077                 if (size_left <= SZ_2M)
6078                         break;
6079
6080                 pos += SZ_2M;
6081                 addr += SZ_2M;
6082                 size_left -= SZ_2M;
6083         }
6084
6085         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6086          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6087          * ASID
6088          */
6089         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6090                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6091
6092         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6093
6094 out:
6095         hdev->asic_funcs->hw_queues_unlock(hdev);
6096
6097         hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6098
6099         return rc;
6100 }
6101
6102 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6103 {
6104         struct gaudi_device *gaudi = hdev->asic_specific;
6105
6106         if (hdev->reset_info.hard_reset_pending)
6107                 return U64_MAX;
6108
6109         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6110                         (addr - gaudi->hbm_bar_cur_addr));
6111 }
6112
6113 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6114 {
6115         struct gaudi_device *gaudi = hdev->asic_specific;
6116
6117         if (hdev->reset_info.hard_reset_pending)
6118                 return;
6119
6120         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6121                         (addr - gaudi->hbm_bar_cur_addr));
6122 }
6123
6124 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6125 {
6126         /* mask to zero the MMBP and ASID bits */
6127         WREG32_AND(reg, ~0x7FF);
6128         WREG32_OR(reg, asid);
6129 }
6130
6131 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6132 {
6133         struct gaudi_device *gaudi = hdev->asic_specific;
6134
6135         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6136                 return;
6137
6138         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6139                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6140                 return;
6141         }
6142
6143         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6144         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6145         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6146         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6147         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6148
6149         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6150         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6151         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6152         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6153         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6154
6155         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6156         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6157         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6158         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6159         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6160
6161         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6162         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6163         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6164         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6165         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6166
6167         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6168         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6169         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6170         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6171         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6172
6173         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6174         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6175         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6176         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6177         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6178
6179         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6180         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6181         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6182         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6183         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6184
6185         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6186         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6187         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6188         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6189         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6190
6191         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6192         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6193         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6194         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6195         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6196         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6197         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6198         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6199
6200         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6201         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6202         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6203         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6204         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6205         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6206         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6207
6208         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6209         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6210         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6211         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6212         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6213         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6214         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6215
6216         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6217         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6218         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6219         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6220         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6221         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6222         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6223
6224         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6225         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6226         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6227         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6228         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6229         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6230         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6231
6232         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6233         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6234         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6235         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6236         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6237         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6238         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6239
6240         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6241         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6242         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6243         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6244         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6245         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6246         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6247
6248         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6249         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6250         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6251         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6252         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6253         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6254         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6255
6256         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6257         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6258         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6259         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6260         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6261         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6262         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6263
6264         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6265         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6266         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6267         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6268         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6269         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6270         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6271         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6272         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6273         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6274
6275         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6276         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6277         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6278         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6279         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6280         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6281         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6282         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6283         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6284         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6285         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6286         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6287
6288         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6289                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6290                                 asid);
6291                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6292                                 asid);
6293                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6294                                 asid);
6295                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6296                                 asid);
6297                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6298                                 asid);
6299         }
6300
6301         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6302                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6303                                 asid);
6304                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6305                                 asid);
6306                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6307                                 asid);
6308                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6309                                 asid);
6310                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6311                                 asid);
6312         }
6313
6314         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6315                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6316                                 asid);
6317                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6318                                 asid);
6319                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6320                                 asid);
6321                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6322                                 asid);
6323                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6324                                 asid);
6325         }
6326
6327         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6328                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6329                                 asid);
6330                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6331                                 asid);
6332                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6333                                 asid);
6334                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6335                                 asid);
6336                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6337                                 asid);
6338         }
6339
6340         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6341                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6342                                 asid);
6343                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6344                                 asid);
6345                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6346                                 asid);
6347                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6348                                 asid);
6349                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6350                                 asid);
6351         }
6352
6353         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6354                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6355                                 asid);
6356                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6357                                 asid);
6358                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6359                                 asid);
6360                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6361                                 asid);
6362                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6363                                 asid);
6364         }
6365
6366         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6367                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6368                                 asid);
6369                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6370                                 asid);
6371                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6372                                 asid);
6373                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6374                                 asid);
6375                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6376                                 asid);
6377         }
6378
6379         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6380                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6381                                 asid);
6382                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6383                                 asid);
6384                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6385                                 asid);
6386                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6387                                 asid);
6388                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6389                                 asid);
6390         }
6391
6392         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6393                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6394                                 asid);
6395                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6396                                 asid);
6397                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6398                                 asid);
6399                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6400                                 asid);
6401                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6402                                 asid);
6403         }
6404
6405         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6406                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6407                                 asid);
6408                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6409                                 asid);
6410                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6411                                 asid);
6412                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6413                                 asid);
6414                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6415                                 asid);
6416         }
6417
6418         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6419         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6420 }
6421
6422 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6423                 struct hl_cs_job *job)
6424 {
6425         struct packet_msg_prot *fence_pkt;
6426         u32 *fence_ptr;
6427         dma_addr_t fence_dma_addr;
6428         struct hl_cb *cb;
6429         u32 tmp, timeout, dma_offset;
6430         int rc;
6431
6432         if (hdev->pldm)
6433                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6434         else
6435                 timeout = HL_DEVICE_TIMEOUT_USEC;
6436
6437         fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6438         if (!fence_ptr) {
6439                 dev_err(hdev->dev,
6440                         "Failed to allocate fence memory for QMAN0\n");
6441                 return -ENOMEM;
6442         }
6443
6444         cb = job->patched_cb;
6445
6446         fence_pkt = cb->kernel_address +
6447                         job->job_cb_size - sizeof(struct packet_msg_prot);
6448
6449         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6450         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6451         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6452
6453         fence_pkt->ctl = cpu_to_le32(tmp);
6454         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6455         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6456
6457         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6458
6459         WREG32(mmDMA0_CORE_PROT + dma_offset,
6460                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6461
6462         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6463                                         job->job_cb_size, cb->bus_address);
6464         if (rc) {
6465                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6466                 goto free_fence_ptr;
6467         }
6468
6469         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6470                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6471                                 timeout, true);
6472
6473         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6474
6475         if (rc == -ETIMEDOUT) {
6476                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6477                 goto free_fence_ptr;
6478         }
6479
6480 free_fence_ptr:
6481         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6482
6483         hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6484         return rc;
6485 }
6486
6487 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6488 {
6489         if (event_type >= GAUDI_EVENT_SIZE)
6490                 goto event_not_supported;
6491
6492         if (!gaudi_irq_map_table[event_type].valid)
6493                 goto event_not_supported;
6494
6495         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6496
6497         return;
6498
6499 event_not_supported:
6500         snprintf(desc, size, "N/A");
6501 }
6502
6503 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6504                                                         bool is_write, u16 *engine_id_1,
6505                                                         u16 *engine_id_2)
6506 {
6507         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6508
6509         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6510                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6511
6512         switch (x_y) {
6513         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6514         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6515                 dma_id[0] = 0;
6516                 dma_id[1] = 2;
6517                 break;
6518         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6519         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6520                 dma_id[0] = 1;
6521                 dma_id[1] = 3;
6522                 break;
6523         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6524         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6525                 dma_id[0] = 4;
6526                 dma_id[1] = 6;
6527                 break;
6528         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6529         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6530                 dma_id[0] = 5;
6531                 dma_id[1] = 7;
6532                 break;
6533         default:
6534                 goto unknown_initiator;
6535         }
6536
6537         for (i = 0 ; i < 2 ; i++) {
6538                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6539                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6540         }
6541
6542         switch (x_y) {
6543         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6544         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6545                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6546                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6547                         return "DMA0";
6548                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6549                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6550                         return "DMA2";
6551                 } else {
6552                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6553                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6554                         return "DMA0 or DMA2";
6555                 }
6556         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6557         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6558                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6559                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6560                         return "DMA1";
6561                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6562                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6563                         return "DMA3";
6564                 } else {
6565                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6566                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6567                         return "DMA1 or DMA3";
6568                 }
6569         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6570         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6571                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6572                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6573                         return "DMA4";
6574                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6575                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6576                         return "DMA6";
6577                 } else {
6578                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6579                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6580                         return "DMA4 or DMA6";
6581                 }
6582         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6583         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6584                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6585                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6586                         return "DMA5";
6587                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6588                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6589                         return "DMA7";
6590                 } else {
6591                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6592                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6593                         return "DMA5 or DMA7";
6594                 }
6595         }
6596
6597 unknown_initiator:
6598         return "unknown initiator";
6599 }
6600
6601 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6602                                                         u16 *engine_id_1, u16 *engine_id_2)
6603 {
6604         u32 val, x_y, axi_id;
6605
6606         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6607                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6608         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6609                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6610         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6611                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6612
6613         switch (x_y) {
6614         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6615                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6616                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6617                         return "TPC0";
6618                 }
6619                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6620                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6621                         return "NIC0";
6622                 }
6623                 break;
6624         case RAZWI_INITIATOR_ID_X_Y_TPC1:
6625                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6626                 return "TPC1";
6627         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6628         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6629                 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6630                 return "MME0";
6631         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6632         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6633                 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6634                 return "MME1";
6635         case RAZWI_INITIATOR_ID_X_Y_TPC2:
6636                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6637                 return "TPC2";
6638         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6639                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6640                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6641                         return "TPC3";
6642                 }
6643                 /* PCI, CPU or PSOC does not have engine id*/
6644                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6645                         return "PCI";
6646                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6647                         return "CPU";
6648                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6649                         return "PSOC";
6650                 break;
6651         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6652         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6653         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6654         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6655         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6656         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6657         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6658         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6659                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6660                                 engine_id_1, engine_id_2);
6661         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6662                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6663                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6664                         return "TPC4";
6665                 }
6666                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6667                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6668                         return "NIC1";
6669                 }
6670                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6671                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6672                         return "NIC2";
6673                 }
6674                 break;
6675         case RAZWI_INITIATOR_ID_X_Y_TPC5:
6676                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6677                 return "TPC5";
6678         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6679         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6680                 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6681                 return "MME2";
6682         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6683         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6684                 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6685                 return "MME3";
6686         case RAZWI_INITIATOR_ID_X_Y_TPC6:
6687                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6688                 return "TPC6";
6689         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6690                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6691                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6692                         return "TPC7";
6693                 }
6694                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6695                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6696                         return "NIC4";
6697                 }
6698                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6699                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6700                         return "NIC5";
6701                 }
6702                 break;
6703         default:
6704                 break;
6705         }
6706
6707         dev_err(hdev->dev,
6708                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6709                 val,
6710                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6711                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6712                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6713                         RAZWI_INITIATOR_AXI_ID_MASK);
6714
6715         return "unknown initiator";
6716 }
6717
6718 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6719                                                 u16 *engine_id_2, bool *is_read, bool *is_write)
6720 {
6721
6722         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6723                 dev_err_ratelimited(hdev->dev,
6724                         "RAZWI event caused by illegal write of %s\n",
6725                         gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6726                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6727                 *is_write = true;
6728         }
6729
6730         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6731                 dev_err_ratelimited(hdev->dev,
6732                         "RAZWI event caused by illegal read of %s\n",
6733                         gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6734                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6735                 *is_read = true;
6736         }
6737 }
6738
6739 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6740 {
6741         struct gaudi_device *gaudi = hdev->asic_specific;
6742         u32 val;
6743
6744         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6745                 return;
6746
6747         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6748         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6749                 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6750                 *addr <<= 32;
6751                 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6752
6753                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6754                 hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6755
6756                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6757         }
6758
6759         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6760         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6761                 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6762                 *addr <<= 32;
6763                 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6764
6765                 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6766
6767                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6768         }
6769 }
6770
6771 /*
6772  *  +-------------------+------------------------------------------------------+
6773  *  | Configuration Reg |                     Description                      |
6774  *  |      Address      |                                                      |
6775  *  +-------------------+------------------------------------------------------+
6776  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6777  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6778  *  |                   |0xF34 memory wrappers 63:32                           |
6779  *  |                   |0xF38 memory wrappers 95:64                           |
6780  *  |                   |0xF3C memory wrappers 127:96                          |
6781  *  +-------------------+------------------------------------------------------+
6782  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6783  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6784  *  |                   |0xF44 memory wrappers 63:32                           |
6785  *  |                   |0xF48 memory wrappers 95:64                           |
6786  *  |                   |0xF4C memory wrappers 127:96                          |
6787  *  +-------------------+------------------------------------------------------+
6788  */
6789 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6790                 struct ecc_info_extract_params *params, u64 *ecc_address,
6791                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6792 {
6793         u32 i, num_mem_regs, reg, err_bit;
6794         u64 err_addr, err_word = 0;
6795
6796         num_mem_regs = params->num_memories / 32 +
6797                         ((params->num_memories % 32) ? 1 : 0);
6798
6799         if (params->block_address >= CFG_BASE)
6800                 params->block_address -= CFG_BASE;
6801
6802         if (params->derr)
6803                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6804         else
6805                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6806
6807         /* Set invalid wrapper index */
6808         *memory_wrapper_idx = 0xFF;
6809
6810         /* Iterate through memory wrappers, a single bit must be set */
6811         for (i = 0 ; i < num_mem_regs ; i++) {
6812                 err_addr += i * 4;
6813                 err_word = RREG32(err_addr);
6814                 if (err_word) {
6815                         err_bit = __ffs(err_word);
6816                         *memory_wrapper_idx = err_bit + (32 * i);
6817                         break;
6818                 }
6819         }
6820
6821         if (*memory_wrapper_idx == 0xFF) {
6822                 dev_err(hdev->dev, "ECC error information cannot be found\n");
6823                 return -EINVAL;
6824         }
6825
6826         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6827                         *memory_wrapper_idx);
6828
6829         *ecc_address =
6830                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6831         *ecc_syndrom =
6832                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6833
6834         /* Clear error indication */
6835         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6836         if (params->derr)
6837                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6838         else
6839                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6840
6841         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6842
6843         return 0;
6844 }
6845
6846 /*
6847  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6848  *
6849  * @idx: the current pi/ci value
6850  * @q_len: the queue length (power of 2)
6851  *
6852  * @return the cyclically decremented index
6853  */
6854 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6855 {
6856         u32 mask = q_len - 1;
6857
6858         /*
6859          * modular decrement is equivalent to adding (queue_size -1)
6860          * later we take LSBs to make sure the value is in the
6861          * range [0, queue_len - 1]
6862          */
6863         return (idx + q_len - 1) & mask;
6864 }
6865
6866 /**
6867  * gaudi_handle_sw_config_stream_data - print SW config stream data
6868  *
6869  * @hdev: pointer to the habanalabs device structure
6870  * @stream: the QMAN's stream
6871  * @qman_base: base address of QMAN registers block
6872  * @event_mask: mask of the last events occurred
6873  */
6874 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6875                                                 u64 qman_base, u64 event_mask)
6876 {
6877         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6878         u32 cq_ptr_lo_off, size;
6879
6880         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6881
6882         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6883                                                 stream * cq_ptr_lo_off;
6884         cq_ptr_hi = cq_ptr_lo +
6885                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6886         cq_tsize = cq_ptr_lo +
6887                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6888
6889         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6890         size = RREG32(cq_tsize);
6891         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6892                                                         stream, cq_ptr, size);
6893
6894         if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6895                 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6896                 hdev->captured_err_info.undef_opcode.cq_size = size;
6897                 hdev->captured_err_info.undef_opcode.stream_id = stream;
6898         }
6899 }
6900
6901 /**
6902  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6903  *
6904  * @hdev: pointer to the habanalabs device structure
6905  * @qid_base: first QID of the QMAN (out of 4 streams)
6906  * @stream: the QMAN's stream
6907  * @qman_base: base address of QMAN registers block
6908  * @event_mask: mask of the last events occurred
6909  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6910  */
6911 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6912                                                 u32 stream, u64 qman_base,
6913                                                 u64 event_mask,
6914                                                 bool pr_sw_conf)
6915 {
6916         u32 ci, qm_ci_stream_off, queue_len;
6917         struct hl_hw_queue *q;
6918         u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6919         int i;
6920
6921         q = &hdev->kernel_queues[qid_base + stream];
6922
6923         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6924         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6925                                                 stream * qm_ci_stream_off;
6926
6927         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6928                                         q->int_queue_len : HL_QUEUE_LENGTH;
6929
6930         hdev->asic_funcs->hw_queues_lock(hdev);
6931
6932         if (pr_sw_conf)
6933                 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6934
6935         ci = RREG32(pq_ci);
6936
6937         /* we should start printing form ci -1 */
6938         ci = gaudi_queue_idx_dec(ci, queue_len);
6939         memset(addr, 0, sizeof(addr));
6940
6941         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6942                 struct hl_bd *bd;
6943                 u32 len;
6944
6945                 bd = q->kernel_address;
6946                 bd += ci;
6947
6948                 len = le32_to_cpu(bd->len);
6949                 /* len 0 means uninitialized entry- break */
6950                 if (!len)
6951                         break;
6952
6953                 addr[i] = le64_to_cpu(bd->ptr);
6954
6955                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6956                                                         stream, ci, addr[i], len);
6957
6958                 /* get previous ci, wrap if needed */
6959                 ci = gaudi_queue_idx_dec(ci, queue_len);
6960         }
6961
6962         if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6963                 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6964                 u32 arr_idx = undef_opcode->cb_addr_streams_len;
6965
6966                 if (arr_idx == 0) {
6967                         undef_opcode->timestamp = ktime_get();
6968                         undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6969                 }
6970
6971                 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6972                 undef_opcode->cb_addr_streams_len++;
6973         }
6974
6975         hdev->asic_funcs->hw_queues_unlock(hdev);
6976 }
6977
6978 /**
6979  * handle_qman_data_on_err - extract QMAN data on error
6980  *
6981  * @hdev: pointer to the habanalabs device structure
6982  * @qid_base: first QID of the QMAN (out of 4 streams)
6983  * @stream: the QMAN's stream
6984  * @qman_base: base address of QMAN registers block
6985  * @event_mask: mask of the last events occurred
6986  *
6987  * This function attempt to exatract as much data as possible on QMAN error.
6988  * On upper CP print the SW config stream data and last 8 PQEs.
6989  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6990  */
6991 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6992                                    u32 stream, u64 qman_base, u64 event_mask)
6993 {
6994         u32 i;
6995
6996         if (stream != QMAN_STREAMS) {
6997                 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6998                         qman_base, event_mask, true);
6999                 return;
7000         }
7001
7002         /* handle Lower-CP */
7003         gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
7004
7005         for (i = 0; i < QMAN_STREAMS; i++)
7006                 gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
7007                         qman_base, event_mask, false);
7008 }
7009
7010 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7011                                           const char *qm_name,
7012                                           u64 qman_base,
7013                                           u32 qid_base,
7014                                           u64 *event_mask)
7015 {
7016         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7017         u64 glbl_sts_addr, arb_err_addr;
7018         char reg_desc[32];
7019
7020         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7021         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7022
7023         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7024         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7025                 glbl_sts_clr_val = 0;
7026                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7027
7028                 if (!glbl_sts_val)
7029                         continue;
7030
7031                 if (i == QMAN_STREAMS)
7032                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7033                 else
7034                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7035
7036                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7037                         if (glbl_sts_val & BIT(j)) {
7038                                 dev_err_ratelimited(hdev->dev,
7039                                                 "%s %s. err cause: %s\n",
7040                                                 qm_name, reg_desc,
7041                                                 gaudi_qman_error_cause[j]);
7042                                 glbl_sts_clr_val |= BIT(j);
7043                         }
7044                 }
7045                 /* check for undefined opcode */
7046                 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
7047                                 hdev->captured_err_info.undef_opcode.write_enable) {
7048                         memset(&hdev->captured_err_info.undef_opcode, 0,
7049                                                 sizeof(hdev->captured_err_info.undef_opcode));
7050
7051                         hdev->captured_err_info.undef_opcode.write_enable = false;
7052                         *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7053                 }
7054
7055                 /* Write 1 clear errors */
7056                 if (!hdev->stop_on_err)
7057                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7058                 else
7059                         handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
7060         }
7061
7062         arb_err_val = RREG32(arb_err_addr);
7063
7064         if (!arb_err_val)
7065                 return;
7066
7067         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7068                 if (arb_err_val & BIT(j)) {
7069                         dev_err_ratelimited(hdev->dev,
7070                                         "%s ARB_ERR. err cause: %s\n",
7071                                         qm_name,
7072                                         gaudi_qman_arb_error_cause[j]);
7073                 }
7074         }
7075 }
7076
7077 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7078                 struct hl_eq_sm_sei_data *sei_data)
7079 {
7080         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7081
7082         /* Flip the bits as the enum is ordered in the opposite way */
7083         index = (index ^ 0x3) & 0x3;
7084
7085         switch (sei_data->sei_cause) {
7086         case SM_SEI_SO_OVERFLOW:
7087                 dev_err_ratelimited(hdev->dev,
7088                         "%s SEI Error: SOB Group %u overflow/underflow",
7089                         gaudi_sync_manager_names[index],
7090                         le32_to_cpu(sei_data->sei_log));
7091                 break;
7092         case SM_SEI_LBW_4B_UNALIGNED:
7093                 dev_err_ratelimited(hdev->dev,
7094                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7095                         gaudi_sync_manager_names[index],
7096                         le32_to_cpu(sei_data->sei_log));
7097                 break;
7098         case SM_SEI_AXI_RESPONSE_ERR:
7099                 dev_err_ratelimited(hdev->dev,
7100                         "%s SEI Error: AXI ID %u response error",
7101                         gaudi_sync_manager_names[index],
7102                         le32_to_cpu(sei_data->sei_log));
7103                 break;
7104         default:
7105                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7106                                 le32_to_cpu(sei_data->sei_log));
7107                 break;
7108         }
7109 }
7110
7111 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7112                 struct hl_eq_ecc_data *ecc_data)
7113 {
7114         struct ecc_info_extract_params params;
7115         u64 ecc_address = 0, ecc_syndrom = 0;
7116         u8 index, memory_wrapper_idx = 0;
7117         bool extract_info_from_fw;
7118         int rc;
7119
7120         if (hdev->asic_prop.fw_security_enabled) {
7121                 extract_info_from_fw = true;
7122                 goto extract_ecc_info;
7123         }
7124
7125         switch (event_type) {
7126         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7127         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7128                 extract_info_from_fw = true;
7129                 break;
7130         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7131                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7132                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7133                 params.num_memories = 90;
7134                 params.derr = false;
7135                 extract_info_from_fw = false;
7136                 break;
7137         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7138                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7139                 params.block_address =
7140                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7141                 params.num_memories = 90;
7142                 params.derr = true;
7143                 extract_info_from_fw = false;
7144                 break;
7145         case GAUDI_EVENT_MME0_ACC_SERR:
7146         case GAUDI_EVENT_MME1_ACC_SERR:
7147         case GAUDI_EVENT_MME2_ACC_SERR:
7148         case GAUDI_EVENT_MME3_ACC_SERR:
7149                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7150                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7151                 params.num_memories = 128;
7152                 params.derr = false;
7153                 extract_info_from_fw = false;
7154                 break;
7155         case GAUDI_EVENT_MME0_ACC_DERR:
7156         case GAUDI_EVENT_MME1_ACC_DERR:
7157         case GAUDI_EVENT_MME2_ACC_DERR:
7158         case GAUDI_EVENT_MME3_ACC_DERR:
7159                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7160                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7161                 params.num_memories = 128;
7162                 params.derr = true;
7163                 extract_info_from_fw = false;
7164                 break;
7165         case GAUDI_EVENT_MME0_SBAB_SERR:
7166         case GAUDI_EVENT_MME1_SBAB_SERR:
7167         case GAUDI_EVENT_MME2_SBAB_SERR:
7168         case GAUDI_EVENT_MME3_SBAB_SERR:
7169                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7170                 params.block_address =
7171                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7172                 params.num_memories = 33;
7173                 params.derr = false;
7174                 extract_info_from_fw = false;
7175                 break;
7176         case GAUDI_EVENT_MME0_SBAB_DERR:
7177         case GAUDI_EVENT_MME1_SBAB_DERR:
7178         case GAUDI_EVENT_MME2_SBAB_DERR:
7179         case GAUDI_EVENT_MME3_SBAB_DERR:
7180                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7181                 params.block_address =
7182                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7183                 params.num_memories = 33;
7184                 params.derr = true;
7185                 extract_info_from_fw = false;
7186                 break;
7187         default:
7188                 return;
7189         }
7190
7191 extract_ecc_info:
7192         if (extract_info_from_fw) {
7193                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7194                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7195                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7196         } else {
7197                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7198                                 &ecc_syndrom, &memory_wrapper_idx);
7199                 if (rc)
7200                         return;
7201         }
7202
7203         dev_err(hdev->dev,
7204                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7205                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7206 }
7207
7208 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7209 {
7210         u64 qman_base;
7211         char desc[32];
7212         u32 qid_base;
7213         u8 index;
7214
7215         switch (event_type) {
7216         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7217                 index = event_type - GAUDI_EVENT_TPC0_QM;
7218                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7219                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7220                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7221                 break;
7222         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7223                 if (event_type == GAUDI_EVENT_MME0_QM) {
7224                         index = 0;
7225                         qid_base = GAUDI_QUEUE_ID_MME_0_0;
7226                 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7227                         index = 2;
7228                         qid_base = GAUDI_QUEUE_ID_MME_1_0;
7229                 }
7230                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7231                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7232                 break;
7233         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7234                 index = event_type - GAUDI_EVENT_DMA0_QM;
7235                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7236                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7237                 if (index > 1)
7238                         qid_base++;
7239                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7240                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7241                 break;
7242         case GAUDI_EVENT_NIC0_QM0:
7243                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7244                 qman_base = mmNIC0_QM0_BASE;
7245                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7246                 break;
7247         case GAUDI_EVENT_NIC0_QM1:
7248                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7249                 qman_base = mmNIC0_QM1_BASE;
7250                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7251                 break;
7252         case GAUDI_EVENT_NIC1_QM0:
7253                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7254                 qman_base = mmNIC1_QM0_BASE;
7255                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7256                 break;
7257         case GAUDI_EVENT_NIC1_QM1:
7258                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7259                 qman_base = mmNIC1_QM1_BASE;
7260                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7261                 break;
7262         case GAUDI_EVENT_NIC2_QM0:
7263                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7264                 qman_base = mmNIC2_QM0_BASE;
7265                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7266                 break;
7267         case GAUDI_EVENT_NIC2_QM1:
7268                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7269                 qman_base = mmNIC2_QM1_BASE;
7270                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7271                 break;
7272         case GAUDI_EVENT_NIC3_QM0:
7273                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7274                 qman_base = mmNIC3_QM0_BASE;
7275                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7276                 break;
7277         case GAUDI_EVENT_NIC3_QM1:
7278                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7279                 qman_base = mmNIC3_QM1_BASE;
7280                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7281                 break;
7282         case GAUDI_EVENT_NIC4_QM0:
7283                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7284                 qman_base = mmNIC4_QM0_BASE;
7285                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7286                 break;
7287         case GAUDI_EVENT_NIC4_QM1:
7288                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7289                 qman_base = mmNIC4_QM1_BASE;
7290                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7291                 break;
7292         default:
7293                 return;
7294         }
7295
7296         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7297 }
7298
7299 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7300                                         bool razwi, u64 *event_mask)
7301 {
7302         bool is_read = false, is_write = false;
7303         u16 engine_id[2], num_of_razwi_eng = 0;
7304         char desc[64] = "";
7305         u64 razwi_addr = 0;
7306         u8 razwi_flags = 0;
7307
7308         /*
7309          * Init engine id by default as not valid and only if razwi initiated from engine with
7310          * engine id it will get valid value.
7311          */
7312         engine_id[0] = HL_RAZWI_NA_ENG_ID;
7313         engine_id[1] = HL_RAZWI_NA_ENG_ID;
7314
7315         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7316         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7317                 event_type, desc);
7318
7319         if (razwi) {
7320                 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7321                                                 &is_write);
7322                 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7323
7324                 if (is_read)
7325                         razwi_flags |= HL_RAZWI_READ;
7326                 if (is_write)
7327                         razwi_flags |= HL_RAZWI_WRITE;
7328
7329                 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7330                         if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7331                                 num_of_razwi_eng = 2;
7332                         else
7333                                 num_of_razwi_eng = 1;
7334                 }
7335
7336                 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, razwi_flags,
7337                                 event_mask);
7338         }
7339 }
7340
7341 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7342                                         struct cpucp_pkt_sync_err *sync_err)
7343 {
7344         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7345
7346         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7347                 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7348 }
7349
7350 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7351                                         struct hl_eq_fw_alive *fw_alive)
7352 {
7353         dev_err(hdev->dev,
7354                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7355                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7356                 le32_to_cpu(fw_alive->process_id),
7357                 le32_to_cpu(fw_alive->thread_id),
7358                 le64_to_cpu(fw_alive->uptime_seconds));
7359 }
7360
7361 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7362                                                 void *data)
7363 {
7364         char desc[64] = "", *type;
7365         struct eq_nic_sei_event *eq_nic_sei = data;
7366         u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7367
7368         switch (eq_nic_sei->axi_error_cause) {
7369         case RXB:
7370                 type = "RXB";
7371                 break;
7372         case RXE:
7373                 type = "RXE";
7374                 break;
7375         case TXS:
7376                 type = "TXS";
7377                 break;
7378         case TXE:
7379                 type = "TXE";
7380                 break;
7381         case QPC_RESP:
7382                 type = "QPC_RESP";
7383                 break;
7384         case NON_AXI_ERR:
7385                 type = "NON_AXI_ERR";
7386                 break;
7387         case TMR:
7388                 type = "TMR";
7389                 break;
7390         default:
7391                 dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7392                         eq_nic_sei->axi_error_cause);
7393                 type = "N/A";
7394                 break;
7395         }
7396
7397         snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7398                         eq_nic_sei->id);
7399         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7400                 event_type, desc);
7401 }
7402
7403 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7404 {
7405         /* GAUDI doesn't support any reset except hard-reset */
7406         return -EPERM;
7407 }
7408
7409 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7410                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7411 {
7412         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7413         int rc = 0;
7414
7415         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7416                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7417                 if (!hbm_ecc_data) {
7418                         dev_err(hdev->dev, "No FW ECC data");
7419                         return 0;
7420                 }
7421
7422                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7423                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7424                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7425                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7426                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7427                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7428                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7429                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7430                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7431                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7432                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7433                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7434                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7435                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7436
7437                 dev_err(hdev->dev,
7438                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7439                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7440                 dev_err(hdev->dev,
7441                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7442                         device, ch, hbm_ecc_data->first_addr, type,
7443                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7444                         hbm_ecc_data->dec_cnt);
7445                 return 0;
7446         }
7447
7448         if (hdev->asic_prop.fw_security_enabled) {
7449                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7450                 return 0;
7451         }
7452
7453         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7454         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7455                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7456                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7457                 if (val) {
7458                         rc = -EIO;
7459                         dev_err(hdev->dev,
7460                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7461                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7462                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7463                                 (val >> 4) & 0x1);
7464
7465                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7466                         dev_err(hdev->dev,
7467                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7468                                 device, ch * 2,
7469                                 RREG32(base + ch * 0x1000 + 0x064),
7470                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7471                                 (val2 & 0xFF0000) >> 16,
7472                                 (val2 & 0xFF000000) >> 24);
7473                 }
7474
7475                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7476                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7477                 if (val) {
7478                         rc = -EIO;
7479                         dev_err(hdev->dev,
7480                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7481                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7482                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7483                                 (val >> 4) & 0x1);
7484
7485                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7486                         dev_err(hdev->dev,
7487                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7488                                 device, ch * 2 + 1,
7489                                 RREG32(base + ch * 0x1000 + 0x074),
7490                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7491                                 (val2 & 0xFF0000) >> 16,
7492                                 (val2 & 0xFF000000) >> 24);
7493                 }
7494
7495                 /* Clear interrupts */
7496                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7497                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7498                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7499                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7500                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7501                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7502         }
7503
7504         val  = RREG32(base + 0x8F30);
7505         val2 = RREG32(base + 0x8F34);
7506         if (val | val2) {
7507                 rc = -EIO;
7508                 dev_err(hdev->dev,
7509                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7510                         device, val, val2);
7511         }
7512         val  = RREG32(base + 0x8F40);
7513         val2 = RREG32(base + 0x8F44);
7514         if (val | val2) {
7515                 rc = -EIO;
7516                 dev_err(hdev->dev,
7517                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7518                         device, val, val2);
7519         }
7520
7521         return rc;
7522 }
7523
7524 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7525 {
7526         switch (hbm_event_type) {
7527         case GAUDI_EVENT_HBM0_SPI_0:
7528         case GAUDI_EVENT_HBM0_SPI_1:
7529                 return 0;
7530         case GAUDI_EVENT_HBM1_SPI_0:
7531         case GAUDI_EVENT_HBM1_SPI_1:
7532                 return 1;
7533         case GAUDI_EVENT_HBM2_SPI_0:
7534         case GAUDI_EVENT_HBM2_SPI_1:
7535                 return 2;
7536         case GAUDI_EVENT_HBM3_SPI_0:
7537         case GAUDI_EVENT_HBM3_SPI_1:
7538                 return 3;
7539         default:
7540                 break;
7541         }
7542
7543         /* Should never happen */
7544         return 0;
7545 }
7546
7547 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7548                                         char *interrupt_name)
7549 {
7550         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7551         bool soft_reset_required = false;
7552
7553         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7554                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7555
7556         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7557                 if (tpc_interrupts_cause & BIT(i)) {
7558                         dev_err_ratelimited(hdev->dev,
7559                                         "TPC%d_%s interrupt cause: %s\n",
7560                                         tpc_id, interrupt_name,
7561                                         gaudi_tpc_interrupts_cause[i]);
7562                         /* If this is QM error, we need to soft-reset */
7563                         if (i == 15)
7564                                 soft_reset_required = true;
7565                 }
7566
7567         /* Clear interrupts */
7568         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7569
7570         return soft_reset_required;
7571 }
7572
7573 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7574 {
7575         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7576 }
7577
7578 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7579 {
7580         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7581 }
7582
7583 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7584 {
7585         ktime_t zero_time = ktime_set(0, 0);
7586
7587         mutex_lock(&hdev->clk_throttling.lock);
7588
7589         switch (event_type) {
7590         case GAUDI_EVENT_FIX_POWER_ENV_S:
7591                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7592                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7593                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7594                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7595                 dev_info_ratelimited(hdev->dev,
7596                         "Clock throttling due to power consumption\n");
7597                 break;
7598
7599         case GAUDI_EVENT_FIX_POWER_ENV_E:
7600                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7601                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7602                 dev_info_ratelimited(hdev->dev,
7603                         "Power envelop is safe, back to optimal clock\n");
7604                 break;
7605
7606         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7607                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7608                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7609                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7610                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7611                 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7612                 dev_info_ratelimited(hdev->dev,
7613                         "Clock throttling due to overheating\n");
7614                 break;
7615
7616         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7617                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7618                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7619                 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7620                 dev_info_ratelimited(hdev->dev,
7621                         "Thermal envelop is safe, back to optimal clock\n");
7622                 break;
7623
7624         default:
7625                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7626                         event_type);
7627                 break;
7628         }
7629
7630         mutex_unlock(&hdev->clk_throttling.lock);
7631 }
7632
7633 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7634 {
7635         struct gaudi_device *gaudi = hdev->asic_specific;
7636         u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7637         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7638         u32 fw_fatal_err_flag = 0, flags = 0;
7639         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7640                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7641         bool reset_required, reset_direct = false;
7642         u8 cause;
7643         int rc;
7644
7645         if (event_type >= GAUDI_EVENT_SIZE) {
7646                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7647                                 event_type, GAUDI_EVENT_SIZE - 1);
7648                 return;
7649         }
7650
7651         gaudi->events_stat[event_type]++;
7652         gaudi->events_stat_aggregate[event_type]++;
7653
7654         switch (event_type) {
7655         case GAUDI_EVENT_PCIE_CORE_DERR:
7656         case GAUDI_EVENT_PCIE_IF_DERR:
7657         case GAUDI_EVENT_PCIE_PHY_DERR:
7658         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7659         case GAUDI_EVENT_MME0_ACC_DERR:
7660         case GAUDI_EVENT_MME0_SBAB_DERR:
7661         case GAUDI_EVENT_MME1_ACC_DERR:
7662         case GAUDI_EVENT_MME1_SBAB_DERR:
7663         case GAUDI_EVENT_MME2_ACC_DERR:
7664         case GAUDI_EVENT_MME2_SBAB_DERR:
7665         case GAUDI_EVENT_MME3_ACC_DERR:
7666         case GAUDI_EVENT_MME3_SBAB_DERR:
7667         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7668                 fallthrough;
7669         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7670         case GAUDI_EVENT_PSOC_MEM_DERR:
7671         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7672         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7673         case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7674         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7675         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7676         case GAUDI_EVENT_MMU_DERR:
7677         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7678                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7679                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7680                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7681                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7682                 goto reset_device;
7683
7684         case GAUDI_EVENT_GIC500:
7685         case GAUDI_EVENT_AXI_ECC:
7686         case GAUDI_EVENT_L2_RAM_ECC:
7687         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7688                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7689                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7690                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7691                 goto reset_device;
7692
7693         case GAUDI_EVENT_HBM0_SPI_0:
7694         case GAUDI_EVENT_HBM1_SPI_0:
7695         case GAUDI_EVENT_HBM2_SPI_0:
7696         case GAUDI_EVENT_HBM3_SPI_0:
7697                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7698                 gaudi_hbm_read_interrupts(hdev,
7699                                 gaudi_hbm_event_to_dev(event_type),
7700                                 &eq_entry->hbm_ecc_data);
7701                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7702                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7703                 goto reset_device;
7704
7705         case GAUDI_EVENT_HBM0_SPI_1:
7706         case GAUDI_EVENT_HBM1_SPI_1:
7707         case GAUDI_EVENT_HBM2_SPI_1:
7708         case GAUDI_EVENT_HBM3_SPI_1:
7709                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7710                 gaudi_hbm_read_interrupts(hdev,
7711                                 gaudi_hbm_event_to_dev(event_type),
7712                                 &eq_entry->hbm_ecc_data);
7713                 hl_fw_unmask_irq(hdev, event_type);
7714                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7715                 break;
7716
7717         case GAUDI_EVENT_TPC0_DEC:
7718         case GAUDI_EVENT_TPC1_DEC:
7719         case GAUDI_EVENT_TPC2_DEC:
7720         case GAUDI_EVENT_TPC3_DEC:
7721         case GAUDI_EVENT_TPC4_DEC:
7722         case GAUDI_EVENT_TPC5_DEC:
7723         case GAUDI_EVENT_TPC6_DEC:
7724         case GAUDI_EVENT_TPC7_DEC:
7725                 /* In TPC DEC event, notify on TPC assertion. While there isn't
7726                  * a specific event for assertion yet, the FW generates TPC DEC event.
7727                  * The SW upper layer will inspect an internal mapped area to indicate
7728                  * if the event is a TPC Assertion or a "real" TPC DEC.
7729                  */
7730                 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7731                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7732                 reset_required = gaudi_tpc_read_interrupts(hdev,
7733                                         tpc_dec_event_to_tpc_id(event_type),
7734                                         "AXI_SLV_DEC_Error");
7735                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7736                 if (reset_required) {
7737                         dev_err(hdev->dev, "reset required due to %s\n",
7738                                 gaudi_irq_map_table[event_type].name);
7739
7740                         reset_direct = true;
7741                         goto reset_device;
7742                 } else {
7743                         hl_fw_unmask_irq(hdev, event_type);
7744                         event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7745                 }
7746                 break;
7747
7748         case GAUDI_EVENT_TPC0_KRN_ERR:
7749         case GAUDI_EVENT_TPC1_KRN_ERR:
7750         case GAUDI_EVENT_TPC2_KRN_ERR:
7751         case GAUDI_EVENT_TPC3_KRN_ERR:
7752         case GAUDI_EVENT_TPC4_KRN_ERR:
7753         case GAUDI_EVENT_TPC5_KRN_ERR:
7754         case GAUDI_EVENT_TPC6_KRN_ERR:
7755         case GAUDI_EVENT_TPC7_KRN_ERR:
7756                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7757                 reset_required = gaudi_tpc_read_interrupts(hdev,
7758                                         tpc_krn_event_to_tpc_id(event_type),
7759                                         "KRN_ERR");
7760                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7761                 if (reset_required) {
7762                         dev_err(hdev->dev, "reset required due to %s\n",
7763                                 gaudi_irq_map_table[event_type].name);
7764
7765                         reset_direct = true;
7766                         goto reset_device;
7767                 } else {
7768                         hl_fw_unmask_irq(hdev, event_type);
7769                         event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7770                 }
7771                 break;
7772
7773         case GAUDI_EVENT_PCIE_CORE_SERR:
7774         case GAUDI_EVENT_PCIE_IF_SERR:
7775         case GAUDI_EVENT_PCIE_PHY_SERR:
7776         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7777         case GAUDI_EVENT_MME0_ACC_SERR:
7778         case GAUDI_EVENT_MME0_SBAB_SERR:
7779         case GAUDI_EVENT_MME1_ACC_SERR:
7780         case GAUDI_EVENT_MME1_SBAB_SERR:
7781         case GAUDI_EVENT_MME2_ACC_SERR:
7782         case GAUDI_EVENT_MME2_SBAB_SERR:
7783         case GAUDI_EVENT_MME3_ACC_SERR:
7784         case GAUDI_EVENT_MME3_SBAB_SERR:
7785         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7786         case GAUDI_EVENT_CPU_IF_ECC_SERR:
7787         case GAUDI_EVENT_PSOC_MEM_SERR:
7788         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7789         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7790         case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7791         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7792         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7793                 fallthrough;
7794         case GAUDI_EVENT_MMU_SERR:
7795                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7796                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7797                 hl_fw_unmask_irq(hdev, event_type);
7798                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7799                 break;
7800
7801         case GAUDI_EVENT_PCIE_DEC:
7802         case GAUDI_EVENT_CPU_AXI_SPLITTER:
7803         case GAUDI_EVENT_PSOC_AXI_DEC:
7804         case GAUDI_EVENT_PSOC_PRSTN_FALL:
7805                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7806                 hl_fw_unmask_irq(hdev, event_type);
7807                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7808                 break;
7809
7810         case GAUDI_EVENT_MMU_PAGE_FAULT:
7811         case GAUDI_EVENT_MMU_WR_PERM:
7812                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7813                 hl_fw_unmask_irq(hdev, event_type);
7814                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7815                 break;
7816
7817         case GAUDI_EVENT_MME0_WBC_RSP:
7818         case GAUDI_EVENT_MME0_SBAB0_RSP:
7819         case GAUDI_EVENT_MME1_WBC_RSP:
7820         case GAUDI_EVENT_MME1_SBAB0_RSP:
7821         case GAUDI_EVENT_MME2_WBC_RSP:
7822         case GAUDI_EVENT_MME2_SBAB0_RSP:
7823         case GAUDI_EVENT_MME3_WBC_RSP:
7824         case GAUDI_EVENT_MME3_SBAB0_RSP:
7825         case GAUDI_EVENT_RAZWI_OR_ADC:
7826         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7827         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7828                 fallthrough;
7829         case GAUDI_EVENT_NIC0_QM0:
7830         case GAUDI_EVENT_NIC0_QM1:
7831         case GAUDI_EVENT_NIC1_QM0:
7832         case GAUDI_EVENT_NIC1_QM1:
7833         case GAUDI_EVENT_NIC2_QM0:
7834         case GAUDI_EVENT_NIC2_QM1:
7835         case GAUDI_EVENT_NIC3_QM0:
7836         case GAUDI_EVENT_NIC3_QM1:
7837         case GAUDI_EVENT_NIC4_QM0:
7838         case GAUDI_EVENT_NIC4_QM1:
7839         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7840         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7841                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7842                 gaudi_handle_qman_err(hdev, event_type, &event_mask);
7843                 hl_fw_unmask_irq(hdev, event_type);
7844                 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7845                 break;
7846
7847         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7848                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7849                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7850                 goto reset_device;
7851
7852         case GAUDI_EVENT_TPC0_BMON_SPMU:
7853         case GAUDI_EVENT_TPC1_BMON_SPMU:
7854         case GAUDI_EVENT_TPC2_BMON_SPMU:
7855         case GAUDI_EVENT_TPC3_BMON_SPMU:
7856         case GAUDI_EVENT_TPC4_BMON_SPMU:
7857         case GAUDI_EVENT_TPC5_BMON_SPMU:
7858         case GAUDI_EVENT_TPC6_BMON_SPMU:
7859         case GAUDI_EVENT_TPC7_BMON_SPMU:
7860         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7861                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7862                 hl_fw_unmask_irq(hdev, event_type);
7863                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7864                 break;
7865
7866         case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7867                 gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7868                 hl_fw_unmask_irq(hdev, event_type);
7869                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7870                 break;
7871
7872         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7873                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7874                 gaudi_print_sm_sei_info(hdev, event_type,
7875                                         &eq_entry->sm_sei_data);
7876                 rc = hl_state_dump(hdev);
7877                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7878                 if (rc)
7879                         dev_err(hdev->dev,
7880                                 "Error during system state dump %d\n", rc);
7881                 hl_fw_unmask_irq(hdev, event_type);
7882                 break;
7883
7884         case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7885                 break;
7886
7887         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7888                 gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7889                 hl_fw_unmask_irq(hdev, event_type);
7890                 break;
7891
7892         case GAUDI_EVENT_PSOC_GPIO_U16_0:
7893                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7894                 dev_err(hdev->dev,
7895                         "Received high temp H/W interrupt %d (cause %d)\n",
7896                         event_type, cause);
7897                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7898                 break;
7899
7900         case GAUDI_EVENT_DEV_RESET_REQ:
7901                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7902                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7903                 goto reset_device;
7904
7905         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7906                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7907                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7908                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7909                 goto reset_device;
7910
7911         case GAUDI_EVENT_FW_ALIVE_S:
7912                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7913                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7914                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7915                 goto reset_device;
7916
7917         default:
7918                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7919                                 event_type);
7920                 break;
7921         }
7922
7923         if (event_mask)
7924                 hl_notifier_event_send_all(hdev, event_mask);
7925
7926         return;
7927
7928 reset_device:
7929         reset_required = true;
7930
7931         if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7932                 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7933
7934                 /* notify on device unavailable while the reset triggered by fw */
7935                 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7936                                         HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7937         } else if (hdev->hard_reset_on_fw_events) {
7938                 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7939                 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7940         } else {
7941                 reset_required = false;
7942         }
7943
7944         if (reset_required) {
7945                 hl_device_cond_reset(hdev, flags, event_mask);
7946         } else {
7947                 hl_fw_unmask_irq(hdev, event_type);
7948                 /* Notification on occurred event needs to be sent although reset is not executed */
7949                 if (event_mask)
7950                         hl_notifier_event_send_all(hdev, event_mask);
7951         }
7952 }
7953
7954 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7955 {
7956         struct gaudi_device *gaudi = hdev->asic_specific;
7957
7958         if (aggregate) {
7959                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7960                 return gaudi->events_stat_aggregate;
7961         }
7962
7963         *size = (u32) sizeof(gaudi->events_stat);
7964         return gaudi->events_stat;
7965 }
7966
7967 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7968 {
7969         struct gaudi_device *gaudi = hdev->asic_specific;
7970         u32 status, timeout_usec;
7971         int rc;
7972
7973         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7974                 hdev->reset_info.hard_reset_pending)
7975                 return 0;
7976
7977         if (hdev->pldm)
7978                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7979         else
7980                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7981
7982         /* L0 & L1 invalidation */
7983         WREG32(mmSTLB_INV_PS, 3);
7984         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7985         WREG32(mmSTLB_INV_PS, 2);
7986
7987         rc = hl_poll_timeout(
7988                 hdev,
7989                 mmSTLB_INV_PS,
7990                 status,
7991                 !status,
7992                 1000,
7993                 timeout_usec);
7994
7995         WREG32(mmSTLB_INV_SET, 0);
7996
7997         return rc;
7998 }
7999
8000 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8001                                                 bool is_hard, u32 flags,
8002                                                 u32 asid, u64 va, u64 size)
8003 {
8004         /* Treat as invalidate all because there is no range invalidation
8005          * in Gaudi
8006          */
8007         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8008 }
8009
8010 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
8011 {
8012         u32 status, timeout_usec;
8013         int rc;
8014
8015         if (hdev->pldm)
8016                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8017         else
8018                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8019
8020         WREG32(MMU_ASID, asid);
8021         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8022         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8023         WREG32(MMU_BUSY, 0x80000000);
8024
8025         rc = hl_poll_timeout(
8026                 hdev,
8027                 MMU_BUSY,
8028                 status,
8029                 !(status & 0x80000000),
8030                 1000,
8031                 timeout_usec);
8032
8033         if (rc) {
8034                 dev_err(hdev->dev,
8035                         "Timeout during MMU hop0 config of asid %d\n", asid);
8036                 return rc;
8037         }
8038
8039         return 0;
8040 }
8041
8042 static int gaudi_send_heartbeat(struct hl_device *hdev)
8043 {
8044         struct gaudi_device *gaudi = hdev->asic_specific;
8045
8046         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8047                 return 0;
8048
8049         return hl_fw_send_heartbeat(hdev);
8050 }
8051
8052 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8053 {
8054         struct gaudi_device *gaudi = hdev->asic_specific;
8055         struct asic_fixed_properties *prop = &hdev->asic_prop;
8056         int rc;
8057
8058         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8059                 return 0;
8060
8061         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8062                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8063                                         mmCPU_BOOT_ERR1);
8064         if (rc)
8065                 return rc;
8066
8067         if (!strlen(prop->cpucp_info.card_name))
8068                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8069                                 CARD_NAME_MAX_LEN);
8070
8071         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8072
8073         set_default_power_values(hdev);
8074
8075         return 0;
8076 }
8077
8078 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8079                 struct engines_data *e)
8080 {
8081         struct gaudi_device *gaudi = hdev->asic_specific;
8082         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8083         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8084         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8085         unsigned long *mask = (unsigned long *)mask_arr;
8086         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8087         bool is_idle = true, is_eng_idle, is_slave;
8088         u64 offset;
8089         int i, dma_id, port;
8090
8091         if (e)
8092                 hl_engine_data_sprintf(e,
8093                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8094                         "---  -------  ------------  ----------  -------------\n");
8095
8096         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8097                 dma_id = gaudi_dma_assignment[i];
8098                 offset = dma_id * DMA_QMAN_OFFSET;
8099
8100                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8101                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8102                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8103                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8104                                 IS_DMA_IDLE(dma_core_sts0);
8105                 is_idle &= is_eng_idle;
8106
8107                 if (mask && !is_eng_idle)
8108                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8109                 if (e)
8110                         hl_engine_data_sprintf(e, fmt, dma_id,
8111                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8112                                 qm_cgm_sts, dma_core_sts0);
8113         }
8114
8115         if (e)
8116                 hl_engine_data_sprintf(e,
8117                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8118                         "---  -------  ------------  ----------  ----------\n");
8119
8120         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8121                 offset = i * TPC_QMAN_OFFSET;
8122                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8123                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8124                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8125                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8126                                 IS_TPC_IDLE(tpc_cfg_sts);
8127                 is_idle &= is_eng_idle;
8128
8129                 if (mask && !is_eng_idle)
8130                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8131                 if (e)
8132                         hl_engine_data_sprintf(e, fmt, i,
8133                                 is_eng_idle ? "Y" : "N",
8134                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8135         }
8136
8137         if (e)
8138                 hl_engine_data_sprintf(e,
8139                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8140                         "---  -------  ------------  ----------  -----------\n");
8141
8142         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8143                 offset = i * MME_QMAN_OFFSET;
8144                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8145                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8146
8147                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8148                 is_slave = i % 2;
8149                 if (!is_slave) {
8150                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8151                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8152                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8153                 }
8154
8155                 is_idle &= is_eng_idle;
8156
8157                 if (mask && !is_eng_idle)
8158                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8159                 if (e) {
8160                         if (!is_slave)
8161                                 hl_engine_data_sprintf(e, fmt, i,
8162                                         is_eng_idle ? "Y" : "N",
8163                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8164                         else
8165                                 hl_engine_data_sprintf(e, mme_slave_fmt, i,
8166                                         is_eng_idle ? "Y" : "N", "-",
8167                                         "-", mme_arch_sts);
8168                 }
8169         }
8170
8171         if (e)
8172                 hl_engine_data_sprintf(e,
8173                                 "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8174                                 "---  -------  ------------  ----------\n");
8175
8176         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8177                 offset = i * NIC_MACRO_QMAN_OFFSET;
8178                 port = 2 * i;
8179                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8180                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8181                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8182                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8183                         is_idle &= is_eng_idle;
8184
8185                         if (mask && !is_eng_idle)
8186                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8187                         if (e)
8188                                 hl_engine_data_sprintf(e, nic_fmt, port,
8189                                                 is_eng_idle ? "Y" : "N",
8190                                                 qm_glbl_sts0, qm_cgm_sts);
8191                 }
8192
8193                 port = 2 * i + 1;
8194                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8195                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8196                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8197                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8198                         is_idle &= is_eng_idle;
8199
8200                         if (mask && !is_eng_idle)
8201                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8202                         if (e)
8203                                 hl_engine_data_sprintf(e, nic_fmt, port,
8204                                                 is_eng_idle ? "Y" : "N",
8205                                                 qm_glbl_sts0, qm_cgm_sts);
8206                 }
8207         }
8208
8209         if (e)
8210                 hl_engine_data_sprintf(e, "\n");
8211
8212         return is_idle;
8213 }
8214
8215 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8216         __acquires(&gaudi->hw_queues_lock)
8217 {
8218         struct gaudi_device *gaudi = hdev->asic_specific;
8219
8220         spin_lock(&gaudi->hw_queues_lock);
8221 }
8222
8223 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8224         __releases(&gaudi->hw_queues_lock)
8225 {
8226         struct gaudi_device *gaudi = hdev->asic_specific;
8227
8228         spin_unlock(&gaudi->hw_queues_lock);
8229 }
8230
8231 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8232 {
8233         return hdev->pdev->device;
8234 }
8235
8236 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8237                                 size_t max_size)
8238 {
8239         struct gaudi_device *gaudi = hdev->asic_specific;
8240
8241         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8242                 return 0;
8243
8244         return hl_fw_get_eeprom_data(hdev, data, max_size);
8245 }
8246
8247 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8248 {
8249         struct gaudi_device *gaudi = hdev->asic_specific;
8250
8251         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8252                 return 0;
8253
8254         return hl_fw_get_monitor_dump(hdev, data);
8255 }
8256
8257 /*
8258  * this function should be used only during initialization and/or after reset,
8259  * when there are no active users.
8260  */
8261 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8262 {
8263         u64 kernel_timeout;
8264         u32 status, offset;
8265         int rc;
8266
8267         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8268
8269         if (hdev->pldm)
8270                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8271         else
8272                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8273
8274         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8275                         lower_32_bits(tpc_kernel));
8276         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8277                         upper_32_bits(tpc_kernel));
8278
8279         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8280                         lower_32_bits(tpc_kernel));
8281         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8282                         upper_32_bits(tpc_kernel));
8283         /* set a valid LUT pointer, content is of no significance */
8284         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8285                         lower_32_bits(tpc_kernel));
8286         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8287                         upper_32_bits(tpc_kernel));
8288
8289         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8290                         lower_32_bits(CFG_BASE +
8291                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8292
8293         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8294                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8295                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8296         /* wait a bit for the engine to start executing */
8297         usleep_range(1000, 1500);
8298
8299         /* wait until engine has finished executing */
8300         rc = hl_poll_timeout(
8301                 hdev,
8302                 mmTPC0_CFG_STATUS + offset,
8303                 status,
8304                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8305                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8306                 1000,
8307                 kernel_timeout);
8308
8309         if (rc) {
8310                 dev_err(hdev->dev,
8311                         "Timeout while waiting for TPC%d icache prefetch\n",
8312                         tpc_id);
8313                 return -EIO;
8314         }
8315
8316         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8317                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8318
8319         /* wait a bit for the engine to start executing */
8320         usleep_range(1000, 1500);
8321
8322         /* wait until engine has finished executing */
8323         rc = hl_poll_timeout(
8324                 hdev,
8325                 mmTPC0_CFG_STATUS + offset,
8326                 status,
8327                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8328                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8329                 1000,
8330                 kernel_timeout);
8331
8332         if (rc) {
8333                 dev_err(hdev->dev,
8334                         "Timeout while waiting for TPC%d vector pipe\n",
8335                         tpc_id);
8336                 return -EIO;
8337         }
8338
8339         rc = hl_poll_timeout(
8340                 hdev,
8341                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8342                 status,
8343                 (status == 0),
8344                 1000,
8345                 kernel_timeout);
8346
8347         if (rc) {
8348                 dev_err(hdev->dev,
8349                         "Timeout while waiting for TPC%d kernel to execute\n",
8350                         tpc_id);
8351                 return -EIO;
8352         }
8353
8354         return 0;
8355 }
8356
8357 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8358                 struct hl_ctx *ctx)
8359 {
8360         struct gaudi_device *gaudi = hdev->asic_specific;
8361         int min_alloc_order, rc, collective_cb_size;
8362
8363         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8364                 return 0;
8365
8366         hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8367                                                         HOST_SPACE_INTERNAL_CB_SZ,
8368                                                         &hdev->internal_cb_pool_dma_addr,
8369                                                         GFP_KERNEL | __GFP_ZERO);
8370
8371         if (!hdev->internal_cb_pool_virt_addr)
8372                 return -ENOMEM;
8373
8374         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8375                         sizeof(struct packet_fence);
8376         min_alloc_order = ilog2(collective_cb_size);
8377
8378         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8379         if (!hdev->internal_cb_pool) {
8380                 dev_err(hdev->dev,
8381                         "Failed to create internal CB pool\n");
8382                 rc = -ENOMEM;
8383                 goto free_internal_cb_pool;
8384         }
8385
8386         rc = gen_pool_add(hdev->internal_cb_pool,
8387                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8388                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8389         if (rc) {
8390                 dev_err(hdev->dev,
8391                         "Failed to add memory to internal CB pool\n");
8392                 rc = -EFAULT;
8393                 goto destroy_internal_cb_pool;
8394         }
8395
8396         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8397                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8398                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8399
8400         if (!hdev->internal_cb_va_base) {
8401                 rc = -ENOMEM;
8402                 goto destroy_internal_cb_pool;
8403         }
8404
8405         mutex_lock(&hdev->mmu_lock);
8406         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8407                         hdev->internal_cb_pool_dma_addr,
8408                         HOST_SPACE_INTERNAL_CB_SZ);
8409
8410         hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8411         mutex_unlock(&hdev->mmu_lock);
8412
8413         if (rc)
8414                 goto unreserve_internal_cb_pool;
8415
8416         return 0;
8417
8418 unreserve_internal_cb_pool:
8419         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8420                         HOST_SPACE_INTERNAL_CB_SZ);
8421 destroy_internal_cb_pool:
8422         gen_pool_destroy(hdev->internal_cb_pool);
8423 free_internal_cb_pool:
8424         hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8425                                         hdev->internal_cb_pool_dma_addr);
8426
8427         return rc;
8428 }
8429
8430 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8431                 struct hl_ctx *ctx)
8432 {
8433         struct gaudi_device *gaudi = hdev->asic_specific;
8434
8435         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8436                 return;
8437
8438         mutex_lock(&hdev->mmu_lock);
8439         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8440                         HOST_SPACE_INTERNAL_CB_SZ);
8441         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8442                         HOST_SPACE_INTERNAL_CB_SZ);
8443         hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8444         mutex_unlock(&hdev->mmu_lock);
8445
8446         gen_pool_destroy(hdev->internal_cb_pool);
8447
8448         hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8449                                         hdev->internal_cb_pool_dma_addr);
8450 }
8451
8452 static int gaudi_ctx_init(struct hl_ctx *ctx)
8453 {
8454         int rc;
8455
8456         if (ctx->asid == HL_KERNEL_ASID_ID)
8457                 return 0;
8458
8459         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8460         if (rc)
8461                 return rc;
8462
8463         rc = gaudi_restore_user_registers(ctx->hdev);
8464         if (rc)
8465                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8466
8467         return rc;
8468 }
8469
8470 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8471 {
8472         if (ctx->asid == HL_KERNEL_ASID_ID)
8473                 return;
8474
8475         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8476 }
8477
8478 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8479 {
8480         return 0;
8481 }
8482
8483 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8484 {
8485         return gaudi_cq_assignment[cq_idx];
8486 }
8487
8488 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8489 {
8490         return sizeof(struct packet_msg_short) +
8491                         sizeof(struct packet_msg_prot) * 2;
8492 }
8493
8494 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8495 {
8496         return sizeof(struct packet_msg_short) * 4 +
8497                         sizeof(struct packet_fence) +
8498                         sizeof(struct packet_msg_prot) * 2;
8499 }
8500
8501 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8502 {
8503         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8504 }
8505
8506 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8507                                 u32 size, bool eb)
8508 {
8509         struct hl_cb *cb = (struct hl_cb *) data;
8510         struct packet_msg_short *pkt;
8511         u32 value, ctl, pkt_size = sizeof(*pkt);
8512
8513         pkt = cb->kernel_address + size;
8514         memset(pkt, 0, pkt_size);
8515
8516         /* Inc by 1, Mode ADD */
8517         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8518         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8519
8520         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8521         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8522         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8523         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8524         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8525         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8526         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8527
8528         pkt->value = cpu_to_le32(value);
8529         pkt->ctl = cpu_to_le32(ctl);
8530
8531         return size + pkt_size;
8532 }
8533
8534 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8535                                         u16 addr)
8536 {
8537         u32 ctl, pkt_size = sizeof(*pkt);
8538
8539         memset(pkt, 0, pkt_size);
8540
8541         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8542         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8543         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8544         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8545         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8546         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8547
8548         pkt->value = cpu_to_le32(value);
8549         pkt->ctl = cpu_to_le32(ctl);
8550
8551         return pkt_size;
8552 }
8553
8554 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8555                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8556                 u16 sob_val, u16 mon_id)
8557 {
8558         u64 monitor_base;
8559         u32 ctl, value, pkt_size = sizeof(*pkt);
8560         u16 msg_addr_offset;
8561         u8 mask;
8562
8563         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8564                 dev_err(hdev->dev,
8565                         "sob_base %u (mask %#x) is not valid\n",
8566                         sob_base, sob_mask);
8567                 return 0;
8568         }
8569
8570         /*
8571          * monitor_base should be the content of the base0 address registers,
8572          * so it will be added to the msg short offsets
8573          */
8574         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8575
8576         msg_addr_offset =
8577                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8578                                 monitor_base;
8579
8580         memset(pkt, 0, pkt_size);
8581
8582         /* Monitor config packet: bind the monitor to a sync object */
8583         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8584         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8585         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8586                         0); /* GREATER OR EQUAL*/
8587         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8588
8589         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8590         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8591         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8592         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8593         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8594         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8595         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8596
8597         pkt->value = cpu_to_le32(value);
8598         pkt->ctl = cpu_to_le32(ctl);
8599
8600         return pkt_size;
8601 }
8602
8603 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8604 {
8605         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8606
8607         memset(pkt, 0, pkt_size);
8608
8609         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8610         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8611         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8612
8613         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8614         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8615         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8616         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8617
8618         pkt->cfg = cpu_to_le32(cfg);
8619         pkt->ctl = cpu_to_le32(ctl);
8620
8621         return pkt_size;
8622 }
8623
8624 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8625 {
8626         u32 offset, nic_index;
8627
8628         switch (queue_id) {
8629         case GAUDI_QUEUE_ID_DMA_0_0:
8630                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8631                 break;
8632         case GAUDI_QUEUE_ID_DMA_0_1:
8633                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8634                 break;
8635         case GAUDI_QUEUE_ID_DMA_0_2:
8636                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8637                 break;
8638         case GAUDI_QUEUE_ID_DMA_0_3:
8639                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8640                 break;
8641         case GAUDI_QUEUE_ID_DMA_1_0:
8642                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8643                 break;
8644         case GAUDI_QUEUE_ID_DMA_1_1:
8645                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8646                 break;
8647         case GAUDI_QUEUE_ID_DMA_1_2:
8648                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8649                 break;
8650         case GAUDI_QUEUE_ID_DMA_1_3:
8651                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8652                 break;
8653         case GAUDI_QUEUE_ID_DMA_5_0:
8654                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8655                 break;
8656         case GAUDI_QUEUE_ID_DMA_5_1:
8657                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8658                 break;
8659         case GAUDI_QUEUE_ID_DMA_5_2:
8660                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8661                 break;
8662         case GAUDI_QUEUE_ID_DMA_5_3:
8663                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8664                 break;
8665         case GAUDI_QUEUE_ID_TPC_7_0:
8666                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8667                 break;
8668         case GAUDI_QUEUE_ID_TPC_7_1:
8669                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8670                 break;
8671         case GAUDI_QUEUE_ID_TPC_7_2:
8672                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8673                 break;
8674         case GAUDI_QUEUE_ID_TPC_7_3:
8675                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8676                 break;
8677         case GAUDI_QUEUE_ID_NIC_0_0:
8678         case GAUDI_QUEUE_ID_NIC_1_0:
8679         case GAUDI_QUEUE_ID_NIC_2_0:
8680         case GAUDI_QUEUE_ID_NIC_3_0:
8681         case GAUDI_QUEUE_ID_NIC_4_0:
8682         case GAUDI_QUEUE_ID_NIC_5_0:
8683         case GAUDI_QUEUE_ID_NIC_6_0:
8684         case GAUDI_QUEUE_ID_NIC_7_0:
8685         case GAUDI_QUEUE_ID_NIC_8_0:
8686         case GAUDI_QUEUE_ID_NIC_9_0:
8687                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8688                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8689                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8690                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8691                 break;
8692         case GAUDI_QUEUE_ID_NIC_0_1:
8693         case GAUDI_QUEUE_ID_NIC_1_1:
8694         case GAUDI_QUEUE_ID_NIC_2_1:
8695         case GAUDI_QUEUE_ID_NIC_3_1:
8696         case GAUDI_QUEUE_ID_NIC_4_1:
8697         case GAUDI_QUEUE_ID_NIC_5_1:
8698         case GAUDI_QUEUE_ID_NIC_6_1:
8699         case GAUDI_QUEUE_ID_NIC_7_1:
8700         case GAUDI_QUEUE_ID_NIC_8_1:
8701         case GAUDI_QUEUE_ID_NIC_9_1:
8702                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8703                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8704                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8705                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8706                 break;
8707         case GAUDI_QUEUE_ID_NIC_0_2:
8708         case GAUDI_QUEUE_ID_NIC_1_2:
8709         case GAUDI_QUEUE_ID_NIC_2_2:
8710         case GAUDI_QUEUE_ID_NIC_3_2:
8711         case GAUDI_QUEUE_ID_NIC_4_2:
8712         case GAUDI_QUEUE_ID_NIC_5_2:
8713         case GAUDI_QUEUE_ID_NIC_6_2:
8714         case GAUDI_QUEUE_ID_NIC_7_2:
8715         case GAUDI_QUEUE_ID_NIC_8_2:
8716         case GAUDI_QUEUE_ID_NIC_9_2:
8717                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8718                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8719                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8720                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8721                 break;
8722         case GAUDI_QUEUE_ID_NIC_0_3:
8723         case GAUDI_QUEUE_ID_NIC_1_3:
8724         case GAUDI_QUEUE_ID_NIC_2_3:
8725         case GAUDI_QUEUE_ID_NIC_3_3:
8726         case GAUDI_QUEUE_ID_NIC_4_3:
8727         case GAUDI_QUEUE_ID_NIC_5_3:
8728         case GAUDI_QUEUE_ID_NIC_6_3:
8729         case GAUDI_QUEUE_ID_NIC_7_3:
8730         case GAUDI_QUEUE_ID_NIC_8_3:
8731         case GAUDI_QUEUE_ID_NIC_9_3:
8732                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8733                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8734                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8735                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8736                 break;
8737         default:
8738                 return -EINVAL;
8739         }
8740
8741         *addr = CFG_BASE + offset;
8742
8743         return 0;
8744 }
8745
8746 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8747 {
8748         u64 monitor_base;
8749         u32 size = 0;
8750         u16 msg_addr_offset;
8751
8752         /*
8753          * monitor_base should be the content of the base0 address registers,
8754          * so it will be added to the msg short offsets
8755          */
8756         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8757
8758         /* First monitor config packet: low address of the sync */
8759         msg_addr_offset =
8760                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8761                                 monitor_base;
8762
8763         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8764                                         msg_addr_offset);
8765
8766         /* Second monitor config packet: high address of the sync */
8767         msg_addr_offset =
8768                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8769                                 monitor_base;
8770
8771         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8772                                         msg_addr_offset);
8773
8774         /*
8775          * Third monitor config packet: the payload, i.e. what to write when the
8776          * sync triggers
8777          */
8778         msg_addr_offset =
8779                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8780                                 monitor_base;
8781
8782         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8783
8784         return size;
8785 }
8786
8787 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8788                                 struct hl_gen_wait_properties *prop)
8789 {
8790         struct hl_cb *cb = (struct hl_cb *) prop->data;
8791         void *buf = cb->kernel_address;
8792         u64 fence_addr = 0;
8793         u32 size = prop->size;
8794
8795         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8796                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8797                                 prop->q_idx);
8798                 return 0;
8799         }
8800
8801         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8802         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8803                         prop->sob_mask, prop->sob_val, prop->mon_id);
8804         size += gaudi_add_fence_pkt(buf + size);
8805
8806         return size;
8807 }
8808
8809 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8810 {
8811         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8812
8813         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8814                 hw_sob->sob_id);
8815
8816         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8817                         hw_sob->sob_id * 4, 0);
8818
8819         kref_init(&hw_sob->kref);
8820 }
8821
8822 static u64 gaudi_get_device_time(struct hl_device *hdev)
8823 {
8824         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8825
8826         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8827 }
8828
8829 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8830                                 u32 *block_size, u32 *block_id)
8831 {
8832         return -EPERM;
8833 }
8834
8835 static int gaudi_block_mmap(struct hl_device *hdev,
8836                                 struct vm_area_struct *vma,
8837                                 u32 block_id, u32 block_size)
8838 {
8839         return -EPERM;
8840 }
8841
8842 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8843 {
8844         struct cpu_dyn_regs *dyn_regs =
8845                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8846         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8847                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8848                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
8849
8850         WREG32(irq_handler_offset,
8851                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8852 }
8853
8854 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8855 {
8856         return -EINVAL;
8857 }
8858
8859 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8860 {
8861         switch (pll_idx) {
8862         case HL_GAUDI_CPU_PLL: return CPU_PLL;
8863         case HL_GAUDI_PCI_PLL: return PCI_PLL;
8864         case HL_GAUDI_NIC_PLL: return NIC_PLL;
8865         case HL_GAUDI_DMA_PLL: return DMA_PLL;
8866         case HL_GAUDI_MESH_PLL: return MESH_PLL;
8867         case HL_GAUDI_MME_PLL: return MME_PLL;
8868         case HL_GAUDI_TPC_PLL: return TPC_PLL;
8869         case HL_GAUDI_IF_PLL: return IF_PLL;
8870         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8871         case HL_GAUDI_HBM_PLL: return HBM_PLL;
8872         default: return -EINVAL;
8873         }
8874 }
8875
8876 static int gaudi_add_sync_to_engine_map_entry(
8877         struct hl_sync_to_engine_map *map, u32 reg_value,
8878         enum hl_sync_engine_type engine_type, u32 engine_id)
8879 {
8880         struct hl_sync_to_engine_map_entry *entry;
8881
8882         /* Reg value represents a partial address of sync object,
8883          * it is used as unique identifier. For this we need to
8884          * clear the cutoff cfg base bits from the value.
8885          */
8886         if (reg_value == 0 || reg_value == 0xffffffff)
8887                 return 0;
8888         reg_value -= lower_32_bits(CFG_BASE);
8889
8890         /* create a new hash entry */
8891         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8892         if (!entry)
8893                 return -ENOMEM;
8894         entry->engine_type = engine_type;
8895         entry->engine_id = engine_id;
8896         entry->sync_id = reg_value;
8897         hash_add(map->tb, &entry->node, reg_value);
8898
8899         return 0;
8900 }
8901
8902 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8903                                 struct hl_sync_to_engine_map *map)
8904 {
8905         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8906         int i, j, rc;
8907         u32 reg_value;
8908
8909         /* Iterate over TPC engines */
8910         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8911
8912                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8913                                         sds->props[SP_NEXT_TPC] * i);
8914
8915                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8916                                                         ENGINE_TPC, i);
8917                 if (rc)
8918                         goto free_sync_to_engine_map;
8919         }
8920
8921         /* Iterate over MME engines */
8922         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8923                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8924
8925                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8926                                                 sds->props[SP_NEXT_MME] * i +
8927                                                 j * sizeof(u32));
8928
8929                         rc = gaudi_add_sync_to_engine_map_entry(
8930                                 map, reg_value, ENGINE_MME,
8931                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8932                         if (rc)
8933                                 goto free_sync_to_engine_map;
8934                 }
8935         }
8936
8937         /* Iterate over DMA engines */
8938         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8939                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8940                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
8941                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8942                                                         ENGINE_DMA, i);
8943                 if (rc)
8944                         goto free_sync_to_engine_map;
8945         }
8946
8947         return 0;
8948
8949 free_sync_to_engine_map:
8950         hl_state_dump_free_sync_to_engine_map(map);
8951
8952         return rc;
8953 }
8954
8955 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8956 {
8957         return FIELD_GET(
8958                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8959                 mon->status);
8960 }
8961
8962 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8963 {
8964         const size_t max_write = 10;
8965         u32 gid, mask, sob;
8966         int i, offset;
8967
8968         /* Sync object ID is calculated as follows:
8969          * (8 * group_id + cleared bits in mask)
8970          */
8971         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8972                         mon->arm_data);
8973         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8974                         mon->arm_data);
8975
8976         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8977                 max_write; mask >>= 1, i++) {
8978                 if (!(mask & 1)) {
8979                         sob = gid * MONITOR_MAX_SOBS + i;
8980
8981                         if (offset > 0)
8982                                 offset += snprintf(sobs + offset, max_write,
8983                                                         ", ");
8984
8985                         offset += snprintf(sobs + offset, max_write, "%u", sob);
8986                 }
8987         }
8988 }
8989
8990 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8991                                 struct hl_device *hdev,
8992                                 struct hl_mon_state_dump *mon)
8993 {
8994         const char *name;
8995         char scratch_buf1[BIN_REG_STRING_SIZE],
8996                 scratch_buf2[BIN_REG_STRING_SIZE];
8997         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8998
8999         name = hl_state_dump_get_monitor_name(hdev, mon);
9000         if (!name)
9001                 name = "";
9002
9003         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9004
9005         return hl_snprintf_resize(
9006                 buf, size, offset,
9007                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9008                 mon->id, name,
9009                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9010                                 mon->arm_data),
9011                 hl_format_as_binary(
9012                         scratch_buf1, sizeof(scratch_buf1),
9013                         FIELD_GET(
9014                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9015                                 mon->arm_data)),
9016                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9017                                 mon->arm_data),
9018                 mon->wr_data,
9019                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9020                 hl_format_as_binary(
9021                         scratch_buf2, sizeof(scratch_buf2),
9022                         FIELD_GET(
9023                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9024                                 mon->status)),
9025                 monitored_sobs);
9026 }
9027
9028
9029 static int gaudi_print_fences_single_engine(
9030         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9031         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9032         size_t *size, size_t *offset)
9033 {
9034         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9035         int rc = -ENOMEM, i;
9036         u32 *statuses, *fences;
9037
9038         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9039                         sizeof(*statuses), GFP_KERNEL);
9040         if (!statuses)
9041                 goto out;
9042
9043         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9044                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9045                          sizeof(*fences), GFP_KERNEL);
9046         if (!fences)
9047                 goto free_status;
9048
9049         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9050                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9051
9052         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9053                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9054                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9055
9056         /* The actual print */
9057         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9058                 u32 fence_id;
9059                 u64 fence_cnt, fence_rdata;
9060                 const char *engine_name;
9061
9062                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9063                         statuses[i]))
9064                         continue;
9065
9066                 fence_id =
9067                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9068                 fence_cnt = base_offset + CFG_BASE +
9069                         sizeof(u32) *
9070                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9071                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9072                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9073                 engine_name = hl_sync_engine_to_string(engine_type);
9074
9075                 rc = hl_snprintf_resize(
9076                         buf, size, offset,
9077                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9078                         engine_name, engine_id,
9079                         i, fence_id,
9080                         fence_cnt, engine_name, engine_id, fence_id, i,
9081                         fence_rdata, engine_name, engine_id, fence_id, i,
9082                         fences[fence_id],
9083                         statuses[i]);
9084                 if (rc)
9085                         goto free_fences;
9086         }
9087
9088         rc = 0;
9089
9090 free_fences:
9091         kfree(fences);
9092 free_status:
9093         kfree(statuses);
9094 out:
9095         return rc;
9096 }
9097
9098
9099 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9100         .monitor_valid = gaudi_monitor_valid,
9101         .print_single_monitor = gaudi_print_single_monitor,
9102         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9103         .print_fences_single_engine = gaudi_print_fences_single_engine,
9104 };
9105
9106 static void gaudi_state_dump_init(struct hl_device *hdev)
9107 {
9108         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9109         int i;
9110
9111         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9112                 hash_add(sds->so_id_to_str_tb,
9113                         &gaudi_so_id_to_str[i].node,
9114                         gaudi_so_id_to_str[i].id);
9115
9116         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9117                 hash_add(sds->monitor_id_to_str_tb,
9118                         &gaudi_monitor_id_to_str[i].node,
9119                         gaudi_monitor_id_to_str[i].id);
9120
9121         sds->props = gaudi_state_dump_specs_props;
9122
9123         sds->sync_namager_names = gaudi_sync_manager_names;
9124
9125         sds->funcs = gaudi_state_dump_funcs;
9126 }
9127
9128 static u32 *gaudi_get_stream_master_qid_arr(void)
9129 {
9130         return gaudi_stream_master;
9131 }
9132
9133 static int gaudi_set_dram_properties(struct hl_device *hdev)
9134 {
9135         return 0;
9136 }
9137
9138 static int gaudi_set_binning_masks(struct hl_device *hdev)
9139 {
9140         return 0;
9141 }
9142
9143 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9144 {
9145 }
9146
9147 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9148 {
9149         struct hl_device *hdev = dev_get_drvdata(dev);
9150         struct cpucp_info *cpucp_info;
9151
9152         cpucp_info = &hdev->asic_prop.cpucp_info;
9153
9154         return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9155 }
9156
9157 static DEVICE_ATTR_RO(infineon_ver);
9158
9159 static struct attribute *gaudi_vrm_dev_attrs[] = {
9160         &dev_attr_infineon_ver.attr,
9161         NULL,
9162 };
9163
9164 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9165                                         struct attribute_group *dev_vrm_attr_grp)
9166 {
9167         hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9168         dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9169 }
9170
9171 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9172 {
9173         return 0;
9174 }
9175
9176 static const struct hl_asic_funcs gaudi_funcs = {
9177         .early_init = gaudi_early_init,
9178         .early_fini = gaudi_early_fini,
9179         .late_init = gaudi_late_init,
9180         .late_fini = gaudi_late_fini,
9181         .sw_init = gaudi_sw_init,
9182         .sw_fini = gaudi_sw_fini,
9183         .hw_init = gaudi_hw_init,
9184         .hw_fini = gaudi_hw_fini,
9185         .halt_engines = gaudi_halt_engines,
9186         .suspend = gaudi_suspend,
9187         .resume = gaudi_resume,
9188         .mmap = gaudi_mmap,
9189         .ring_doorbell = gaudi_ring_doorbell,
9190         .pqe_write = gaudi_pqe_write,
9191         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9192         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9193         .scrub_device_mem = gaudi_scrub_device_mem,
9194         .scrub_device_dram = gaudi_scrub_device_dram,
9195         .get_int_queue_base = gaudi_get_int_queue_base,
9196         .test_queues = gaudi_test_queues,
9197         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9198         .asic_dma_pool_free = gaudi_dma_pool_free,
9199         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9200         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9201         .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9202         .cs_parser = gaudi_cs_parser,
9203         .asic_dma_map_sgtable = hl_dma_map_sgtable,
9204         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9205         .update_eq_ci = gaudi_update_eq_ci,
9206         .context_switch = gaudi_context_switch,
9207         .restore_phase_topology = gaudi_restore_phase_topology,
9208         .debugfs_read_dma = gaudi_debugfs_read_dma,
9209         .add_device_attr = gaudi_add_device_attr,
9210         .handle_eqe = gaudi_handle_eqe,
9211         .get_events_stat = gaudi_get_events_stat,
9212         .read_pte = gaudi_read_pte,
9213         .write_pte = gaudi_write_pte,
9214         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9215         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9216         .mmu_prefetch_cache_range = NULL,
9217         .send_heartbeat = gaudi_send_heartbeat,
9218         .debug_coresight = gaudi_debug_coresight,
9219         .is_device_idle = gaudi_is_device_idle,
9220         .compute_reset_late_init = gaudi_compute_reset_late_init,
9221         .hw_queues_lock = gaudi_hw_queues_lock,
9222         .hw_queues_unlock = gaudi_hw_queues_unlock,
9223         .get_pci_id = gaudi_get_pci_id,
9224         .get_eeprom_data = gaudi_get_eeprom_data,
9225         .get_monitor_dump = gaudi_get_monitor_dump,
9226         .send_cpu_message = gaudi_send_cpu_message,
9227         .pci_bars_map = gaudi_pci_bars_map,
9228         .init_iatu = gaudi_init_iatu,
9229         .rreg = hl_rreg,
9230         .wreg = hl_wreg,
9231         .halt_coresight = gaudi_halt_coresight,
9232         .ctx_init = gaudi_ctx_init,
9233         .ctx_fini = gaudi_ctx_fini,
9234         .pre_schedule_cs = gaudi_pre_schedule_cs,
9235         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9236         .load_firmware_to_device = gaudi_load_firmware_to_device,
9237         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9238         .get_signal_cb_size = gaudi_get_signal_cb_size,
9239         .get_wait_cb_size = gaudi_get_wait_cb_size,
9240         .gen_signal_cb = gaudi_gen_signal_cb,
9241         .gen_wait_cb = gaudi_gen_wait_cb,
9242         .reset_sob = gaudi_reset_sob,
9243         .reset_sob_group = gaudi_reset_sob_group,
9244         .get_device_time = gaudi_get_device_time,
9245         .pb_print_security_errors = NULL,
9246         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9247         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9248         .get_dec_base_addr = NULL,
9249         .scramble_addr = hl_mmu_scramble_addr,
9250         .descramble_addr = hl_mmu_descramble_addr,
9251         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9252         .get_hw_block_id = gaudi_get_hw_block_id,
9253         .hw_block_mmap = gaudi_block_mmap,
9254         .enable_events_from_fw = gaudi_enable_events_from_fw,
9255         .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9256         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9257         .init_firmware_preload_params = gaudi_init_firmware_preload_params,
9258         .init_firmware_loader = gaudi_init_firmware_loader,
9259         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9260         .state_dump_init = gaudi_state_dump_init,
9261         .get_sob_addr = gaudi_get_sob_addr,
9262         .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9263         .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9264         .check_if_razwi_happened = gaudi_check_if_razwi_happened,
9265         .mmu_get_real_page_size = hl_mmu_get_real_page_size,
9266         .access_dev_mem = hl_access_dev_mem,
9267         .set_dram_bar_base = gaudi_set_hbm_bar_base,
9268         .send_device_activity = gaudi_send_device_activity,
9269         .set_dram_properties = gaudi_set_dram_properties,
9270         .set_binning_masks = gaudi_set_binning_masks,
9271 };
9272
9273 /**
9274  * gaudi_set_asic_funcs - set GAUDI function pointers
9275  *
9276  * @hdev: pointer to hl_device structure
9277  *
9278  */
9279 void gaudi_set_asic_funcs(struct hl_device *hdev)
9280 {
9281         hdev->asic_funcs = &gaudi_funcs;
9282 }
This page took 0.633935 seconds and 4 git commands to generate.