]> Git Repo - J-linux.git/blob - drivers/net/ethernet/mellanox/mlx5/core/health.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / drivers / net / ethernet / mellanox / mlx5 / core / health.c
1 /*
2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <linux/kernel.h>
34 #include <linux/random.h>
35 #include <linux/vmalloc.h>
36 #include <linux/hardirq.h>
37 #include <linux/mlx5/driver.h>
38 #include <linux/kern_levels.h>
39 #include "mlx5_core.h"
40 #include "lib/eq.h"
41 #include "lib/mlx5.h"
42 #include "lib/events.h"
43 #include "lib/pci_vsc.h"
44 #include "lib/tout.h"
45 #include "diag/fw_tracer.h"
46 #include "diag/reporter_vnic.h"
47
48 enum {
49         MAX_MISSES                      = 3,
50 };
51
52 enum {
53         MLX5_DROP_HEALTH_WORK,
54 };
55
56 enum  {
57         MLX5_SENSOR_NO_ERR              = 0,
58         MLX5_SENSOR_PCI_COMM_ERR        = 1,
59         MLX5_SENSOR_PCI_ERR             = 2,
60         MLX5_SENSOR_NIC_DISABLED        = 3,
61         MLX5_SENSOR_NIC_SW_RESET        = 4,
62         MLX5_SENSOR_FW_SYND_RFR         = 5,
63 };
64
65 enum {
66         MLX5_SEVERITY_MASK              = 0x7,
67         MLX5_SEVERITY_VALID_MASK        = 0x8,
68 };
69
70 u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
71 {
72         return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
73 }
74
75 void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
76 {
77         u32 cur_cmdq_addr_l_sz;
78
79         cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz);
80         iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) |
81                     state << MLX5_NIC_IFC_OFFSET,
82                     &dev->iseg->cmdq_addr_l_sz);
83 }
84
85 static bool sensor_pci_not_working(struct mlx5_core_dev *dev)
86 {
87         struct mlx5_core_health *health = &dev->priv.health;
88         struct health_buffer __iomem *h = health->health;
89
90         /* Offline PCI reads return 0xffffffff */
91         return (ioread32be(&h->fw_ver) == 0xffffffff);
92 }
93
94 static int mlx5_health_get_rfr(u8 rfr_severity)
95 {
96         return rfr_severity >> MLX5_RFR_BIT_OFFSET;
97 }
98
99 static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
100 {
101         struct mlx5_core_health *health = &dev->priv.health;
102         struct health_buffer __iomem *h = health->health;
103         u8 synd = ioread8(&h->synd);
104         u8 rfr;
105
106         rfr = mlx5_health_get_rfr(ioread8(&h->rfr_severity));
107
108         if (rfr && synd)
109                 mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
110         return rfr && synd;
111 }
112
113 u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev)
114 {
115         if (sensor_pci_not_working(dev))
116                 return MLX5_SENSOR_PCI_COMM_ERR;
117         if (pci_channel_offline(dev->pdev))
118                 return MLX5_SENSOR_PCI_ERR;
119         if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED)
120                 return MLX5_SENSOR_NIC_DISABLED;
121         if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET)
122                 return MLX5_SENSOR_NIC_SW_RESET;
123         if (sensor_fw_synd_rfr(dev))
124                 return MLX5_SENSOR_FW_SYND_RFR;
125
126         return MLX5_SENSOR_NO_ERR;
127 }
128
129 static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock)
130 {
131         enum mlx5_vsc_state state;
132         int ret;
133
134         if (!mlx5_core_is_pf(dev))
135                 return -EBUSY;
136
137         /* Try to lock GW access, this stage doesn't return
138          * EBUSY because locked GW does not mean that other PF
139          * already started the reset.
140          */
141         ret = mlx5_vsc_gw_lock(dev);
142         if (ret == -EBUSY)
143                 return -EINVAL;
144         if (ret)
145                 return ret;
146
147         state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK;
148         /* At this stage, if the return status == EBUSY, then we know
149          * for sure that another PF started the reset, so don't allow
150          * another reset.
151          */
152         ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state);
153         if (ret)
154                 mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
155
156         /* Unlock GW access */
157         mlx5_vsc_gw_unlock(dev);
158
159         return ret;
160 }
161
162 static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
163 {
164         bool supported = (ioread32be(&dev->iseg->initializing) >>
165                           MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
166         u32 fatal_error;
167
168         if (!supported)
169                 return false;
170
171         /* The reset only needs to be issued by one PF. The health buffer is
172          * shared between all functions, and will be cleared during a reset.
173          * Check again to avoid a redundant 2nd reset. If the fatal errors was
174          * PCI related a reset won't help.
175          */
176         fatal_error = mlx5_health_check_fatal_sensors(dev);
177         if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
178             fatal_error == MLX5_SENSOR_NIC_DISABLED ||
179             fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
180                 mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.");
181                 return false;
182         }
183
184         mlx5_core_warn(dev, "Issuing FW Reset\n");
185         /* Write the NIC interface field to initiate the reset, the command
186          * interface address also resides here, don't overwrite it.
187          */
188         mlx5_set_nic_state(dev, MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET);
189
190         return true;
191 }
192
193 static void enter_error_state(struct mlx5_core_dev *dev, bool force)
194 {
195         if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
196                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
197                 mlx5_cmd_flush(dev);
198         }
199
200         mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
201 }
202
203 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
204 {
205         bool err_detected = false;
206
207         /* Mark the device as fatal in order to abort FW commands */
208         if ((mlx5_health_check_fatal_sensors(dev) || force) &&
209             dev->state == MLX5_DEVICE_STATE_UP) {
210                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
211                 err_detected = true;
212         }
213         mutex_lock(&dev->intf_state_mutex);
214         if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
215                 goto unlock;/* a previous error is still being handled */
216
217         enter_error_state(dev, force);
218 unlock:
219         mutex_unlock(&dev->intf_state_mutex);
220 }
221
222 void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
223 {
224         unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE);
225         int lock = -EBUSY;
226
227         mutex_lock(&dev->intf_state_mutex);
228         if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
229                 goto unlock;
230
231         mlx5_core_err(dev, "start\n");
232
233         if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
234                 /* Get cr-dump and reset FW semaphore */
235                 lock = lock_sem_sw_reset(dev, true);
236
237                 if (lock == -EBUSY) {
238                         delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP);
239                         goto recover_from_sw_reset;
240                 }
241                 /* Execute SW reset */
242                 reset_fw_if_needed(dev);
243         }
244
245 recover_from_sw_reset:
246         /* Recover from SW reset */
247         end = jiffies + msecs_to_jiffies(delay_ms);
248         do {
249                 if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED)
250                         break;
251                 if (pci_channel_offline(dev->pdev)) {
252                         mlx5_core_err(dev, "PCI channel offline, stop waiting for NIC IFC\n");
253                         goto unlock;
254                 }
255
256                 msleep(20);
257         } while (!time_after(jiffies, end));
258
259         if (mlx5_get_nic_state(dev) != MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED) {
260                 dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
261                         mlx5_get_nic_state(dev), delay_ms);
262         }
263
264         /* Release FW semaphore if you are the lock owner */
265         if (!lock)
266                 lock_sem_sw_reset(dev, false);
267
268         mlx5_core_err(dev, "end\n");
269
270 unlock:
271         mutex_unlock(&dev->intf_state_mutex);
272 }
273
274 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
275 {
276         u8 nic_interface = mlx5_get_nic_state(dev);
277
278         switch (nic_interface) {
279         case MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER:
280                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
281                 break;
282
283         case MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED:
284                 mlx5_core_warn(dev, "starting teardown\n");
285                 break;
286
287         case MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC:
288                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
289                 break;
290
291         case MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET:
292                 /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases:
293                  * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
294                  *    and this is a VF), this is not recoverable by SW reset.
295                  *    Logging of this is handled elsewhere.
296                  * 2. FW reset has been issued by another function, driver can
297                  *    be reloaded to recover after the mode switches to
298                  *    MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED.
299                  */
300                 if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
301                         mlx5_core_warn(dev, "NIC SW reset in progress\n");
302                 break;
303
304         default:
305                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
306                                nic_interface);
307         }
308
309         mlx5_disable_device(dev);
310 }
311
312 int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
313 {
314         unsigned long end;
315
316         end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET));
317         while (sensor_pci_not_working(dev)) {
318                 if (time_after(jiffies, end))
319                         return -ETIMEDOUT;
320                 if (test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
321                         mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n");
322                         return -ENODEV;
323                 }
324                 if (pci_channel_offline(dev->pdev)) {
325                         mlx5_core_err(dev, "PCI channel offline, stop waiting for PCI\n");
326                         return -EACCES;
327                 }
328                 msleep(100);
329         }
330         return 0;
331 }
332
333 static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
334 {
335         mlx5_core_warn(dev, "handling bad device here\n");
336         mlx5_handle_bad_state(dev);
337         if (mlx5_health_wait_pci_up(dev)) {
338                 mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n");
339                 return -EIO;
340         }
341         mlx5_core_err(dev, "starting health recovery flow\n");
342         if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) {
343                 mlx5_core_err(dev, "health recovery failed\n");
344                 return -EIO;
345         }
346
347         mlx5_core_info(dev, "health recovery succeeded\n");
348         return 0;
349 }
350
351 static const char *hsynd_str(u8 synd)
352 {
353         switch (synd) {
354         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_INTERNAL_ERR:
355                 return "firmware internal error";
356         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_DEAD_IRISC:
357                 return "irisc not responding";
358         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_HW_FATAL_ERR:
359                 return "unrecoverable hardware error";
360         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_CRC_ERR:
361                 return "firmware CRC error";
362         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_FETCH_PCI_ERR:
363                 return "ICM fetch PCI error";
364         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PAGE_ERR:
365                 return "HW fatal error\n";
366         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ASYNCHRONOUS_EQ_BUF_OVERRUN:
367                 return "async EQ buffer overrun";
368         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_IN_ERR:
369                 return "EQ error";
370         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV:
371                 return "Invalid EQ referenced";
372         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR:
373                 return "FFSER error";
374         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR:
375                 return "High temperature";
376         case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR:
377                 return "ICM fetch PCI data poisoned error";
378         default:
379                 return "unrecognized error";
380         }
381 }
382
383 static const char *mlx5_loglevel_str(int level)
384 {
385         switch (level) {
386         case LOGLEVEL_EMERG:
387                 return "EMERGENCY";
388         case LOGLEVEL_ALERT:
389                 return "ALERT";
390         case LOGLEVEL_CRIT:
391                 return "CRITICAL";
392         case LOGLEVEL_ERR:
393                 return "ERROR";
394         case LOGLEVEL_WARNING:
395                 return "WARNING";
396         case LOGLEVEL_NOTICE:
397                 return "NOTICE";
398         case LOGLEVEL_INFO:
399                 return "INFO";
400         case LOGLEVEL_DEBUG:
401                 return "DEBUG";
402         }
403         return "Unknown log level";
404 }
405
406 static int mlx5_health_get_severity(u8 rfr_severity)
407 {
408         return rfr_severity & MLX5_SEVERITY_VALID_MASK ?
409                rfr_severity & MLX5_SEVERITY_MASK : LOGLEVEL_ERR;
410 }
411
412 static void print_health_info(struct mlx5_core_dev *dev)
413 {
414         struct mlx5_core_health *health = &dev->priv.health;
415         struct health_buffer __iomem *h = health->health;
416         u8 rfr_severity;
417         int severity;
418         int i;
419
420         /* If the syndrome is 0, the device is OK and no need to print buffer */
421         if (!ioread8(&h->synd))
422                 return;
423
424         if (ioread32be(&h->fw_ver) == 0xFFFFFFFF) {
425                 mlx5_log(dev, LOGLEVEL_ERR, "PCI slot is unavailable\n");
426                 return;
427         }
428
429         rfr_severity = ioread8(&h->rfr_severity);
430         severity  = mlx5_health_get_severity(rfr_severity);
431         mlx5_log(dev, severity, "Health issue observed, %s, severity(%d) %s:\n",
432                  hsynd_str(ioread8(&h->synd)), severity, mlx5_loglevel_str(severity));
433
434         for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
435                 mlx5_log(dev, severity, "assert_var[%d] 0x%08x\n", i,
436                          ioread32be(h->assert_var + i));
437
438         mlx5_log(dev, severity, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
439         mlx5_log(dev, severity, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
440         mlx5_log(dev, severity, "fw_ver %d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev),
441                  fw_rev_sub(dev));
442         mlx5_log(dev, severity, "time %u\n", ioread32be(&h->time));
443         mlx5_log(dev, severity, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
444         mlx5_log(dev, severity, "rfr %d\n", mlx5_health_get_rfr(rfr_severity));
445         mlx5_log(dev, severity, "severity %d (%s)\n", severity, mlx5_loglevel_str(severity));
446         mlx5_log(dev, severity, "irisc_index %d\n", ioread8(&h->irisc_index));
447         mlx5_log(dev, severity, "synd 0x%x: %s\n", ioread8(&h->synd),
448                  hsynd_str(ioread8(&h->synd)));
449         mlx5_log(dev, severity, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
450         mlx5_log(dev, severity, "raw fw_ver 0x%08x\n", ioread32be(&h->fw_ver));
451 }
452
453 static int
454 mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
455                           struct devlink_fmsg *fmsg,
456                           struct netlink_ext_ack *extack)
457 {
458         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
459         struct mlx5_core_health *health = &dev->priv.health;
460         struct health_buffer __iomem *h = health->health;
461         u8 synd = ioread8(&h->synd);
462
463         devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd);
464         if (!synd)
465                 return 0;
466
467         devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd));
468
469         return 0;
470 }
471
472 struct mlx5_fw_reporter_ctx {
473         u8 err_synd;
474         int miss_counter;
475 };
476
477 static void
478 mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg,
479                                struct mlx5_fw_reporter_ctx *fw_reporter_ctx)
480 {
481         devlink_fmsg_u8_pair_put(fmsg, "syndrome", fw_reporter_ctx->err_synd);
482         devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter", fw_reporter_ctx->miss_counter);
483 }
484
485 static void
486 mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
487                                        struct devlink_fmsg *fmsg)
488 {
489         struct mlx5_core_health *health = &dev->priv.health;
490         struct health_buffer __iomem *h = health->health;
491         u8 rfr_severity;
492         int i;
493
494         if (!ioread8(&h->synd))
495                 return;
496
497         devlink_fmsg_pair_nest_start(fmsg, "health buffer");
498         devlink_fmsg_obj_nest_start(fmsg);
499         devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var");
500         for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
501                 devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i));
502         devlink_fmsg_arr_pair_nest_end(fmsg);
503         devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr",
504                                   ioread32be(&h->assert_exit_ptr));
505         devlink_fmsg_u32_pair_put(fmsg, "assert_callra",
506                                   ioread32be(&h->assert_callra));
507         devlink_fmsg_u32_pair_put(fmsg, "time", ioread32be(&h->time));
508         devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id));
509         rfr_severity = ioread8(&h->rfr_severity);
510         devlink_fmsg_u8_pair_put(fmsg, "rfr", mlx5_health_get_rfr(rfr_severity));
511         devlink_fmsg_u8_pair_put(fmsg, "severity", mlx5_health_get_severity(rfr_severity));
512         devlink_fmsg_u8_pair_put(fmsg, "irisc_index", ioread8(&h->irisc_index));
513         devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd));
514         devlink_fmsg_u32_pair_put(fmsg, "ext_synd", ioread16be(&h->ext_synd));
515         devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver", ioread32be(&h->fw_ver));
516         devlink_fmsg_obj_nest_end(fmsg);
517         devlink_fmsg_pair_nest_end(fmsg);
518 }
519
520 static int
521 mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
522                       struct devlink_fmsg *fmsg, void *priv_ctx,
523                       struct netlink_ext_ack *extack)
524 {
525         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
526         int err;
527
528         err = mlx5_fw_tracer_trigger_core_dump_general(dev);
529         if (err)
530                 return err;
531
532         if (priv_ctx) {
533                 struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
534
535                 mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
536         }
537
538         mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg);
539
540         return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg);
541 }
542
543 static void mlx5_fw_reporter_err_work(struct work_struct *work)
544 {
545         struct mlx5_fw_reporter_ctx fw_reporter_ctx;
546         struct mlx5_core_health *health;
547
548         health = container_of(work, struct mlx5_core_health, report_work);
549
550         if (IS_ERR_OR_NULL(health->fw_reporter))
551                 return;
552
553         fw_reporter_ctx.err_synd = health->synd;
554         fw_reporter_ctx.miss_counter = health->miss_counter;
555         if (fw_reporter_ctx.err_synd) {
556                 devlink_health_report(health->fw_reporter,
557                                       "FW syndrome reported", &fw_reporter_ctx);
558                 return;
559         }
560         if (fw_reporter_ctx.miss_counter)
561                 devlink_health_report(health->fw_reporter,
562                                       "FW miss counter reported",
563                                       &fw_reporter_ctx);
564 }
565
566 static const struct devlink_health_reporter_ops mlx5_fw_reporter_pf_ops = {
567                 .name = "fw",
568                 .diagnose = mlx5_fw_reporter_diagnose,
569                 .dump = mlx5_fw_reporter_dump,
570 };
571
572 static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
573                 .name = "fw",
574                 .diagnose = mlx5_fw_reporter_diagnose,
575 };
576
577 static int
578 mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
579                                void *priv_ctx,
580                                struct netlink_ext_ack *extack)
581 {
582         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
583
584         return mlx5_health_try_recover(dev);
585 }
586
587 static int
588 mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
589                             struct devlink_fmsg *fmsg, void *priv_ctx,
590                             struct netlink_ext_ack *extack)
591 {
592         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
593         u32 crdump_size = dev->priv.health.crdump_size;
594         u32 *cr_data;
595         int err;
596
597         if (!mlx5_core_is_pf(dev))
598                 return -EPERM;
599
600         cr_data = kvmalloc(crdump_size, GFP_KERNEL);
601         if (!cr_data)
602                 return -ENOMEM;
603         err = mlx5_crdump_collect(dev, cr_data);
604         if (err)
605                 goto free_data;
606
607         if (priv_ctx) {
608                 struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
609
610                 mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
611         }
612
613         devlink_fmsg_binary_pair_put(fmsg, "crdump_data", cr_data, crdump_size);
614
615 free_data:
616         kvfree(cr_data);
617         return err;
618 }
619
620 static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
621 {
622         struct mlx5_fw_reporter_ctx fw_reporter_ctx;
623         struct mlx5_core_health *health;
624         struct mlx5_core_dev *dev;
625         struct devlink *devlink;
626         struct mlx5_priv *priv;
627
628         health = container_of(work, struct mlx5_core_health, fatal_report_work);
629         priv = container_of(health, struct mlx5_priv, health);
630         dev = container_of(priv, struct mlx5_core_dev, priv);
631         devlink = priv_to_devlink(dev);
632
633         mutex_lock(&dev->intf_state_mutex);
634         if (test_bit(MLX5_DROP_HEALTH_WORK, &health->flags)) {
635                 mlx5_core_err(dev, "health works are not permitted at this stage\n");
636                 mutex_unlock(&dev->intf_state_mutex);
637                 return;
638         }
639         mutex_unlock(&dev->intf_state_mutex);
640         enter_error_state(dev, false);
641         if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
642                 devl_lock(devlink);
643                 if (mlx5_health_try_recover(dev))
644                         mlx5_core_err(dev, "health recovery failed\n");
645                 devl_unlock(devlink);
646                 return;
647         }
648         fw_reporter_ctx.err_synd = health->synd;
649         fw_reporter_ctx.miss_counter = health->miss_counter;
650         if (devlink_health_report(health->fw_fatal_reporter,
651                                   "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) {
652                 /* If recovery wasn't performed, due to grace period,
653                  * unload the driver. This ensures that the driver
654                  * closes all its resources and it is not subjected to
655                  * requests from the kernel.
656                  */
657                 mlx5_core_err(dev, "Driver is in error state. Unloading\n");
658                 mlx5_unload_one(dev, false);
659         }
660 }
661
662 static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_pf_ops = {
663                 .name = "fw_fatal",
664                 .recover = mlx5_fw_fatal_reporter_recover,
665                 .dump = mlx5_fw_fatal_reporter_dump,
666 };
667
668 static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
669                 .name = "fw_fatal",
670                 .recover = mlx5_fw_fatal_reporter_recover,
671 };
672
673 #define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000
674 #define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000
675 #define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000
676 #define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD
677
678 void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
679 {
680         const struct devlink_health_reporter_ops *fw_fatal_ops;
681         struct mlx5_core_health *health = &dev->priv.health;
682         const struct devlink_health_reporter_ops *fw_ops;
683         struct devlink *devlink = priv_to_devlink(dev);
684         u64 grace_period;
685
686         fw_fatal_ops = &mlx5_fw_fatal_reporter_pf_ops;
687         fw_ops = &mlx5_fw_reporter_pf_ops;
688         if (mlx5_core_is_ecpf(dev)) {
689                 grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD;
690         } else if (mlx5_core_is_pf(dev)) {
691                 grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD;
692         } else {
693                 /* VF or SF */
694                 grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD;
695                 fw_fatal_ops = &mlx5_fw_fatal_reporter_ops;
696                 fw_ops = &mlx5_fw_reporter_ops;
697         }
698
699         health->fw_reporter =
700                 devl_health_reporter_create(devlink, fw_ops, 0, dev);
701         if (IS_ERR(health->fw_reporter))
702                 mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
703                                PTR_ERR(health->fw_reporter));
704
705         health->fw_fatal_reporter =
706                 devl_health_reporter_create(devlink,
707                                             fw_fatal_ops,
708                                             grace_period,
709                                             dev);
710         if (IS_ERR(health->fw_fatal_reporter))
711                 mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
712                                PTR_ERR(health->fw_fatal_reporter));
713 }
714
715 static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev)
716 {
717         struct mlx5_core_health *health = &dev->priv.health;
718
719         if (!IS_ERR_OR_NULL(health->fw_reporter))
720                 devlink_health_reporter_destroy(health->fw_reporter);
721
722         if (!IS_ERR_OR_NULL(health->fw_fatal_reporter))
723                 devlink_health_reporter_destroy(health->fw_fatal_reporter);
724 }
725
726 static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev)
727 {
728         unsigned long next;
729
730         get_random_bytes(&next, sizeof(next));
731         next %= HZ;
732         next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL));
733
734         return next;
735 }
736
737 void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
738 {
739         struct mlx5_core_health *health = &dev->priv.health;
740
741         if (!mlx5_dev_is_lightweight(dev))
742                 queue_work(health->wq, &health->fatal_report_work);
743 }
744
745 #define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60)
746 static void mlx5_health_log_ts_update(struct work_struct *work)
747 {
748         struct delayed_work *dwork = to_delayed_work(work);
749         u32 out[MLX5_ST_SZ_DW(mrtc_reg)] = {};
750         u32 in[MLX5_ST_SZ_DW(mrtc_reg)] = {};
751         struct mlx5_core_health *health;
752         struct mlx5_core_dev *dev;
753         struct mlx5_priv *priv;
754         u64 now_us;
755
756         health = container_of(dwork, struct mlx5_core_health, update_fw_log_ts_work);
757         priv = container_of(health, struct mlx5_priv, health);
758         dev = container_of(priv, struct mlx5_core_dev, priv);
759
760         now_us =  ktime_to_us(ktime_get_real());
761
762         MLX5_SET(mrtc_reg, in, time_h, now_us >> 32);
763         MLX5_SET(mrtc_reg, in, time_l, now_us & 0xFFFFFFFF);
764         mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MRTC, 0, 1);
765
766         queue_delayed_work(health->wq, &health->update_fw_log_ts_work,
767                            msecs_to_jiffies(MLX5_MSEC_PER_HOUR));
768 }
769
770 static void poll_health(struct timer_list *t)
771 {
772         struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
773         struct mlx5_core_health *health = &dev->priv.health;
774         struct health_buffer __iomem *h = health->health;
775         u32 fatal_error;
776         u8 prev_synd;
777         u32 count;
778
779         if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
780                 goto out;
781
782         fatal_error = mlx5_health_check_fatal_sensors(dev);
783
784         if (fatal_error && !health->fatal_error) {
785                 mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
786                 dev->priv.health.fatal_error = fatal_error;
787                 print_health_info(dev);
788                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
789                 mlx5_trigger_health_work(dev);
790                 return;
791         }
792
793         count = ioread32be(health->health_counter);
794         if (count == health->prev)
795                 ++health->miss_counter;
796         else
797                 health->miss_counter = 0;
798
799         health->prev = count;
800         if (health->miss_counter == MAX_MISSES) {
801                 mlx5_core_err(dev, "device's health compromised - reached miss count\n");
802                 print_health_info(dev);
803                 queue_work(health->wq, &health->report_work);
804         }
805
806         prev_synd = health->synd;
807         health->synd = ioread8(&h->synd);
808         if (health->synd && health->synd != prev_synd)
809                 queue_work(health->wq, &health->report_work);
810
811 out:
812         mod_timer(&health->timer, get_next_poll_jiffies(dev));
813 }
814
815 void mlx5_start_health_poll(struct mlx5_core_dev *dev)
816 {
817         u64 poll_interval_ms =  mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL);
818         struct mlx5_core_health *health = &dev->priv.health;
819
820         timer_setup(&health->timer, poll_health, 0);
821         health->fatal_error = MLX5_SENSOR_NO_ERR;
822         clear_bit(MLX5_DROP_HEALTH_WORK, &health->flags);
823         health->health = &dev->iseg->health;
824         health->health_counter = &dev->iseg->health_counter;
825
826         health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
827         add_timer(&health->timer);
828 }
829
830 void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
831 {
832         struct mlx5_core_health *health = &dev->priv.health;
833
834         if (disable_health)
835                 set_bit(MLX5_DROP_HEALTH_WORK, &health->flags);
836
837         del_timer_sync(&health->timer);
838 }
839
840 void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev)
841 {
842         struct mlx5_core_health *health = &dev->priv.health;
843
844         if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
845                 queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
846 }
847
848 void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
849 {
850         struct mlx5_core_health *health = &dev->priv.health;
851
852         set_bit(MLX5_DROP_HEALTH_WORK, &health->flags);
853         cancel_delayed_work_sync(&health->update_fw_log_ts_work);
854         cancel_work_sync(&health->report_work);
855         cancel_work_sync(&health->fatal_report_work);
856 }
857
858 void mlx5_health_cleanup(struct mlx5_core_dev *dev)
859 {
860         struct mlx5_core_health *health = &dev->priv.health;
861
862         cancel_delayed_work_sync(&health->update_fw_log_ts_work);
863         destroy_workqueue(health->wq);
864         mlx5_reporter_vnic_destroy(dev);
865         mlx5_fw_reporters_destroy(dev);
866 }
867
868 int mlx5_health_init(struct mlx5_core_dev *dev)
869 {
870         struct devlink *devlink = priv_to_devlink(dev);
871         struct mlx5_core_health *health;
872         char *name;
873
874         if (!mlx5_dev_is_lightweight(dev)) {
875                 devl_lock(devlink);
876                 mlx5_fw_reporters_create(dev);
877                 devl_unlock(devlink);
878         }
879         mlx5_reporter_vnic_create(dev);
880
881         health = &dev->priv.health;
882         name = kmalloc(64, GFP_KERNEL);
883         if (!name)
884                 goto out_err;
885
886         strcpy(name, "mlx5_health");
887         strcat(name, dev_name(dev->device));
888         health->wq = create_singlethread_workqueue(name);
889         kfree(name);
890         if (!health->wq)
891                 goto out_err;
892         INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work);
893         INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work);
894         INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update);
895
896         return 0;
897
898 out_err:
899         mlx5_reporter_vnic_destroy(dev);
900         mlx5_fw_reporters_destroy(dev);
901         return -ENOMEM;
902 }
This page took 0.078769 seconds and 4 git commands to generate.