]> Git Repo - linux.git/blob - drivers/pci/pcie/err.c
Merge tag 'tilcdc-4.20' of https://github.com/jsarha/linux into drm-next
[linux.git] / drivers / pci / pcie / err.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * This file implements the error recovery as a core part of PCIe error
4  * reporting. When a PCIe error is delivered, an error message will be
5  * collected and printed to console, then, an error recovery procedure
6  * will be executed by following the PCI error recovery rules.
7  *
8  * Copyright (C) 2006 Intel Corp.
9  *      Tom Long Nguyen ([email protected])
10  *      Zhang Yanmin ([email protected])
11  */
12
13 #include <linux/pci.h>
14 #include <linux/module.h>
15 #include <linux/pci.h>
16 #include <linux/kernel.h>
17 #include <linux/errno.h>
18 #include <linux/aer.h>
19 #include "portdrv.h"
20 #include "../pci.h"
21
22 struct aer_broadcast_data {
23         enum pci_channel_state state;
24         enum pci_ers_result result;
25 };
26
27 static pci_ers_result_t merge_result(enum pci_ers_result orig,
28                                   enum pci_ers_result new)
29 {
30         if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
31                 return PCI_ERS_RESULT_NO_AER_DRIVER;
32
33         if (new == PCI_ERS_RESULT_NONE)
34                 return orig;
35
36         switch (orig) {
37         case PCI_ERS_RESULT_CAN_RECOVER:
38         case PCI_ERS_RESULT_RECOVERED:
39                 orig = new;
40                 break;
41         case PCI_ERS_RESULT_DISCONNECT:
42                 if (new == PCI_ERS_RESULT_NEED_RESET)
43                         orig = PCI_ERS_RESULT_NEED_RESET;
44                 break;
45         default:
46                 break;
47         }
48
49         return orig;
50 }
51
52 static int report_error_detected(struct pci_dev *dev, void *data)
53 {
54         pci_ers_result_t vote;
55         const struct pci_error_handlers *err_handler;
56         struct aer_broadcast_data *result_data;
57
58         result_data = (struct aer_broadcast_data *) data;
59
60         device_lock(&dev->dev);
61         dev->error_state = result_data->state;
62
63         if (!dev->driver ||
64                 !dev->driver->err_handler ||
65                 !dev->driver->err_handler->error_detected) {
66                 if (result_data->state == pci_channel_io_frozen &&
67                         dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
68                         /*
69                          * In case of fatal recovery, if one of down-
70                          * stream device has no driver. We might be
71                          * unable to recover because a later insmod
72                          * of a driver for this device is unaware of
73                          * its hw state.
74                          */
75                         pci_printk(KERN_DEBUG, dev, "device has %s\n",
76                                    dev->driver ?
77                                    "no AER-aware driver" : "no driver");
78                 }
79
80                 /*
81                  * If there's any device in the subtree that does not
82                  * have an error_detected callback, returning
83                  * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
84                  * the subsequent mmio_enabled/slot_reset/resume
85                  * callbacks of "any" device in the subtree. All the
86                  * devices in the subtree are left in the error state
87                  * without recovery.
88                  */
89
90                 if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
91                         vote = PCI_ERS_RESULT_NO_AER_DRIVER;
92                 else
93                         vote = PCI_ERS_RESULT_NONE;
94         } else {
95                 err_handler = dev->driver->err_handler;
96                 vote = err_handler->error_detected(dev, result_data->state);
97                 pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
98         }
99
100         result_data->result = merge_result(result_data->result, vote);
101         device_unlock(&dev->dev);
102         return 0;
103 }
104
105 static int report_mmio_enabled(struct pci_dev *dev, void *data)
106 {
107         pci_ers_result_t vote;
108         const struct pci_error_handlers *err_handler;
109         struct aer_broadcast_data *result_data;
110
111         result_data = (struct aer_broadcast_data *) data;
112
113         device_lock(&dev->dev);
114         if (!dev->driver ||
115                 !dev->driver->err_handler ||
116                 !dev->driver->err_handler->mmio_enabled)
117                 goto out;
118
119         err_handler = dev->driver->err_handler;
120         vote = err_handler->mmio_enabled(dev);
121         result_data->result = merge_result(result_data->result, vote);
122 out:
123         device_unlock(&dev->dev);
124         return 0;
125 }
126
127 static int report_slot_reset(struct pci_dev *dev, void *data)
128 {
129         pci_ers_result_t vote;
130         const struct pci_error_handlers *err_handler;
131         struct aer_broadcast_data *result_data;
132
133         result_data = (struct aer_broadcast_data *) data;
134
135         device_lock(&dev->dev);
136         if (!dev->driver ||
137                 !dev->driver->err_handler ||
138                 !dev->driver->err_handler->slot_reset)
139                 goto out;
140
141         err_handler = dev->driver->err_handler;
142         vote = err_handler->slot_reset(dev);
143         result_data->result = merge_result(result_data->result, vote);
144 out:
145         device_unlock(&dev->dev);
146         return 0;
147 }
148
149 static int report_resume(struct pci_dev *dev, void *data)
150 {
151         const struct pci_error_handlers *err_handler;
152
153         device_lock(&dev->dev);
154         dev->error_state = pci_channel_io_normal;
155
156         if (!dev->driver ||
157                 !dev->driver->err_handler ||
158                 !dev->driver->err_handler->resume)
159                 goto out;
160
161         err_handler = dev->driver->err_handler;
162         err_handler->resume(dev);
163         pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
164 out:
165         device_unlock(&dev->dev);
166         return 0;
167 }
168
169 /**
170  * default_reset_link - default reset function
171  * @dev: pointer to pci_dev data structure
172  *
173  * Invoked when performing link reset on a Downstream Port or a
174  * Root Port with no aer driver.
175  */
176 static pci_ers_result_t default_reset_link(struct pci_dev *dev)
177 {
178         int rc;
179
180         rc = pci_bridge_secondary_bus_reset(dev);
181         pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
182         return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
183 }
184
185 static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
186 {
187         struct pci_dev *udev;
188         pci_ers_result_t status;
189         struct pcie_port_service_driver *driver = NULL;
190
191         if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
192                 /* Reset this port for all subordinates */
193                 udev = dev;
194         } else {
195                 /* Reset the upstream component (likely downstream port) */
196                 udev = dev->bus->self;
197         }
198
199         /* Use the aer driver of the component firstly */
200         driver = pcie_port_find_service(udev, service);
201
202         if (driver && driver->reset_link) {
203                 status = driver->reset_link(udev);
204         } else if (udev->has_secondary_link) {
205                 status = default_reset_link(udev);
206         } else {
207                 pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
208                         pci_name(udev));
209                 return PCI_ERS_RESULT_DISCONNECT;
210         }
211
212         if (status != PCI_ERS_RESULT_RECOVERED) {
213                 pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
214                         pci_name(udev));
215                 return PCI_ERS_RESULT_DISCONNECT;
216         }
217
218         return status;
219 }
220
221 /**
222  * broadcast_error_message - handle message broadcast to downstream drivers
223  * @dev: pointer to from where in a hierarchy message is broadcasted down
224  * @state: error state
225  * @error_mesg: message to print
226  * @cb: callback to be broadcasted
227  *
228  * Invoked during error recovery process. Once being invoked, the content
229  * of error severity will be broadcasted to all downstream drivers in a
230  * hierarchy in question.
231  */
232 static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
233         enum pci_channel_state state,
234         char *error_mesg,
235         int (*cb)(struct pci_dev *, void *))
236 {
237         struct aer_broadcast_data result_data;
238
239         pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg);
240         result_data.state = state;
241         if (cb == report_error_detected)
242                 result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
243         else
244                 result_data.result = PCI_ERS_RESULT_RECOVERED;
245
246         if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
247                 /*
248                  * If the error is reported by a bridge, we think this error
249                  * is related to the downstream link of the bridge, so we
250                  * do error recovery on all subordinates of the bridge instead
251                  * of the bridge and clear the error status of the bridge.
252                  */
253                 if (cb == report_error_detected)
254                         dev->error_state = state;
255                 pci_walk_bus(dev->subordinate, cb, &result_data);
256                 if (cb == report_resume) {
257                         pci_aer_clear_device_status(dev);
258                         pci_cleanup_aer_uncorrect_error_status(dev);
259                         dev->error_state = pci_channel_io_normal;
260                 }
261         } else {
262                 /*
263                  * If the error is reported by an end point, we think this
264                  * error is related to the upstream link of the end point.
265                  * The error is non fatal so the bus is ok; just invoke
266                  * the callback for the function that logged the error.
267                  */
268                 cb(dev, &result_data);
269         }
270
271         return result_data.result;
272 }
273
274 /**
275  * pcie_do_fatal_recovery - handle fatal error recovery process
276  * @dev: pointer to a pci_dev data structure of agent detecting an error
277  *
278  * Invoked when an error is fatal. Once being invoked, removes the devices
279  * beneath this AER agent, followed by reset link e.g. secondary bus reset
280  * followed by re-enumeration of devices.
281  */
282 void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service)
283 {
284         struct pci_dev *udev;
285         struct pci_bus *parent;
286         struct pci_dev *pdev, *temp;
287         pci_ers_result_t result;
288
289         if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
290                 udev = dev;
291         else
292                 udev = dev->bus->self;
293
294         parent = udev->subordinate;
295         pci_lock_rescan_remove();
296         pci_dev_get(dev);
297         list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
298                                          bus_list) {
299                 pci_dev_get(pdev);
300                 pci_dev_set_disconnected(pdev, NULL);
301                 if (pci_has_subordinate(pdev))
302                         pci_walk_bus(pdev->subordinate,
303                                      pci_dev_set_disconnected, NULL);
304                 pci_stop_and_remove_bus_device(pdev);
305                 pci_dev_put(pdev);
306         }
307
308         result = reset_link(udev, service);
309
310         if ((service == PCIE_PORT_SERVICE_AER) &&
311             (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) {
312                 /*
313                  * If the error is reported by a bridge, we think this error
314                  * is related to the downstream link of the bridge, so we
315                  * do error recovery on all subordinates of the bridge instead
316                  * of the bridge and clear the error status of the bridge.
317                  */
318                 pci_aer_clear_fatal_status(dev);
319                 pci_aer_clear_device_status(dev);
320         }
321
322         if (result == PCI_ERS_RESULT_RECOVERED) {
323                 if (pcie_wait_for_link(udev, true))
324                         pci_rescan_bus(udev->bus);
325                 pci_info(dev, "Device recovery from fatal error successful\n");
326         } else {
327                 pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
328                 pci_info(dev, "Device recovery from fatal error failed\n");
329         }
330
331         pci_dev_put(dev);
332         pci_unlock_rescan_remove();
333 }
334
335 /**
336  * pcie_do_nonfatal_recovery - handle nonfatal error recovery process
337  * @dev: pointer to a pci_dev data structure of agent detecting an error
338  *
339  * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
340  * error detected message to all downstream drivers within a hierarchy in
341  * question and return the returned code.
342  */
343 void pcie_do_nonfatal_recovery(struct pci_dev *dev)
344 {
345         pci_ers_result_t status;
346         enum pci_channel_state state;
347
348         state = pci_channel_io_normal;
349
350         status = broadcast_error_message(dev,
351                         state,
352                         "error_detected",
353                         report_error_detected);
354
355         if (status == PCI_ERS_RESULT_CAN_RECOVER)
356                 status = broadcast_error_message(dev,
357                                 state,
358                                 "mmio_enabled",
359                                 report_mmio_enabled);
360
361         if (status == PCI_ERS_RESULT_NEED_RESET) {
362                 /*
363                  * TODO: Should call platform-specific
364                  * functions to reset slot before calling
365                  * drivers' slot_reset callbacks?
366                  */
367                 status = broadcast_error_message(dev,
368                                 state,
369                                 "slot_reset",
370                                 report_slot_reset);
371         }
372
373         if (status != PCI_ERS_RESULT_RECOVERED)
374                 goto failed;
375
376         broadcast_error_message(dev,
377                                 state,
378                                 "resume",
379                                 report_resume);
380
381         pci_info(dev, "AER: Device recovery successful\n");
382         return;
383
384 failed:
385         pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
386
387         /* TODO: Should kernel panic here? */
388         pci_info(dev, "AER: Device recovery failed\n");
389 }
This page took 0.06253 seconds and 4 git commands to generate.