]> Git Repo - J-linux.git/blob - drivers/crypto/intel/qat/qat_common/adf_aer.c
Merge tag 'kbuild-v6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy...
[J-linux.git] / drivers / crypto / intel / qat / qat_common / adf_aer.c
1 // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
2 /* Copyright(c) 2014 - 2020 Intel Corporation */
3 #include <linux/kernel.h>
4 #include <linux/pci.h>
5 #include <linux/completion.h>
6 #include <linux/workqueue.h>
7 #include <linux/delay.h>
8 #include "adf_accel_devices.h"
9 #include "adf_common_drv.h"
10 #include "adf_pfvf_pf_msg.h"
11
12 struct adf_fatal_error_data {
13         struct adf_accel_dev *accel_dev;
14         struct work_struct work;
15 };
16
17 static struct workqueue_struct *device_reset_wq;
18 static struct workqueue_struct *device_sriov_wq;
19
20 static pci_ers_result_t adf_error_detected(struct pci_dev *pdev,
21                                            pci_channel_state_t state)
22 {
23         struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
24
25         dev_info(&pdev->dev, "Acceleration driver hardware error detected.\n");
26         if (!accel_dev) {
27                 dev_err(&pdev->dev, "Can't find acceleration device\n");
28                 return PCI_ERS_RESULT_DISCONNECT;
29         }
30
31         if (state == pci_channel_io_perm_failure) {
32                 dev_err(&pdev->dev, "Can't recover from device error\n");
33                 return PCI_ERS_RESULT_DISCONNECT;
34         }
35
36         set_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
37         if (accel_dev->hw_device->exit_arb) {
38                 dev_dbg(&pdev->dev, "Disabling arbitration\n");
39                 accel_dev->hw_device->exit_arb(accel_dev);
40         }
41         adf_error_notifier(accel_dev);
42         adf_pf2vf_notify_fatal_error(accel_dev);
43         adf_dev_restarting_notify(accel_dev);
44         adf_pf2vf_notify_restarting(accel_dev);
45         adf_pf2vf_wait_for_restarting_complete(accel_dev);
46         pci_clear_master(pdev);
47         adf_dev_down(accel_dev, false);
48
49         return PCI_ERS_RESULT_NEED_RESET;
50 }
51
52 /* reset dev data */
53 struct adf_reset_dev_data {
54         int mode;
55         struct adf_accel_dev *accel_dev;
56         struct completion compl;
57         struct work_struct reset_work;
58 };
59
60 /* sriov dev data */
61 struct adf_sriov_dev_data {
62         struct adf_accel_dev *accel_dev;
63         struct completion compl;
64         struct work_struct sriov_work;
65 };
66
67 void adf_reset_sbr(struct adf_accel_dev *accel_dev)
68 {
69         struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
70         struct pci_dev *parent = pdev->bus->self;
71         u16 bridge_ctl = 0;
72
73         if (!parent)
74                 parent = pdev;
75
76         if (!pci_wait_for_pending_transaction(pdev))
77                 dev_info(&GET_DEV(accel_dev),
78                          "Transaction still in progress. Proceeding\n");
79
80         dev_info(&GET_DEV(accel_dev), "Secondary bus reset\n");
81
82         pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl);
83         bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET;
84         pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl);
85         msleep(100);
86         bridge_ctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
87         pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl);
88         msleep(100);
89 }
90 EXPORT_SYMBOL_GPL(adf_reset_sbr);
91
92 void adf_reset_flr(struct adf_accel_dev *accel_dev)
93 {
94         pcie_flr(accel_to_pci_dev(accel_dev));
95 }
96 EXPORT_SYMBOL_GPL(adf_reset_flr);
97
98 void adf_dev_restore(struct adf_accel_dev *accel_dev)
99 {
100         struct adf_hw_device_data *hw_device = accel_dev->hw_device;
101         struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
102
103         if (hw_device->reset_device) {
104                 dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n",
105                          accel_dev->accel_id);
106                 hw_device->reset_device(accel_dev);
107                 pci_restore_state(pdev);
108                 pci_save_state(pdev);
109         }
110 }
111
112 static void adf_device_sriov_worker(struct work_struct *work)
113 {
114         struct adf_sriov_dev_data *sriov_data =
115                 container_of(work, struct adf_sriov_dev_data, sriov_work);
116
117         adf_reenable_sriov(sriov_data->accel_dev);
118         complete(&sriov_data->compl);
119 }
120
121 static void adf_device_reset_worker(struct work_struct *work)
122 {
123         struct adf_reset_dev_data *reset_data =
124                   container_of(work, struct adf_reset_dev_data, reset_work);
125         struct adf_accel_dev *accel_dev = reset_data->accel_dev;
126         unsigned long wait_jiffies = msecs_to_jiffies(10000);
127         struct adf_sriov_dev_data sriov_data;
128
129         adf_dev_restarting_notify(accel_dev);
130         if (adf_dev_restart(accel_dev)) {
131                 /* The device hanged and we can't restart it so stop here */
132                 dev_err(&GET_DEV(accel_dev), "Restart device failed\n");
133                 if (reset_data->mode == ADF_DEV_RESET_ASYNC ||
134                     completion_done(&reset_data->compl))
135                         kfree(reset_data);
136                 WARN(1, "QAT: device restart failed. Device is unusable\n");
137                 return;
138         }
139
140         sriov_data.accel_dev = accel_dev;
141         init_completion(&sriov_data.compl);
142         INIT_WORK(&sriov_data.sriov_work, adf_device_sriov_worker);
143         queue_work(device_sriov_wq, &sriov_data.sriov_work);
144         if (wait_for_completion_timeout(&sriov_data.compl, wait_jiffies))
145                 adf_pf2vf_notify_restarted(accel_dev);
146
147         adf_dev_restarted_notify(accel_dev);
148         clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
149
150         /*
151          * The dev is back alive. Notify the caller if in sync mode
152          *
153          * If device restart will take a more time than expected,
154          * the schedule_reset() function can timeout and exit. This can be
155          * detected by calling the completion_done() function. In this case
156          * the reset_data structure needs to be freed here.
157          */
158         if (reset_data->mode == ADF_DEV_RESET_ASYNC ||
159             completion_done(&reset_data->compl))
160                 kfree(reset_data);
161         else
162                 complete(&reset_data->compl);
163 }
164
165 static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
166                                       enum adf_dev_reset_mode mode)
167 {
168         struct adf_reset_dev_data *reset_data;
169
170         if (!adf_dev_started(accel_dev) ||
171             test_bit(ADF_STATUS_RESTARTING, &accel_dev->status))
172                 return 0;
173
174         set_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
175         reset_data = kzalloc(sizeof(*reset_data), GFP_KERNEL);
176         if (!reset_data)
177                 return -ENOMEM;
178         reset_data->accel_dev = accel_dev;
179         init_completion(&reset_data->compl);
180         reset_data->mode = mode;
181         INIT_WORK(&reset_data->reset_work, adf_device_reset_worker);
182         queue_work(device_reset_wq, &reset_data->reset_work);
183
184         /* If in sync mode wait for the result */
185         if (mode == ADF_DEV_RESET_SYNC) {
186                 int ret = 0;
187                 /* Maximum device reset time is 10 seconds */
188                 unsigned long wait_jiffies = msecs_to_jiffies(10000);
189                 unsigned long timeout = wait_for_completion_timeout(
190                                    &reset_data->compl, wait_jiffies);
191                 if (!timeout) {
192                         dev_err(&GET_DEV(accel_dev),
193                                 "Reset device timeout expired\n");
194                         ret = -EFAULT;
195                 } else {
196                         kfree(reset_data);
197                 }
198                 return ret;
199         }
200         return 0;
201 }
202
203 static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev)
204 {
205         struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
206         int res = 0;
207
208         if (!accel_dev) {
209                 pr_err("QAT: Can't find acceleration device\n");
210                 return PCI_ERS_RESULT_DISCONNECT;
211         }
212
213         if (!pdev->is_busmaster)
214                 pci_set_master(pdev);
215         pci_restore_state(pdev);
216         pci_save_state(pdev);
217         res = adf_dev_up(accel_dev, false);
218         if (res && res != -EALREADY)
219                 return PCI_ERS_RESULT_DISCONNECT;
220
221         adf_reenable_sriov(accel_dev);
222         adf_pf2vf_notify_restarted(accel_dev);
223         adf_dev_restarted_notify(accel_dev);
224         clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
225         return PCI_ERS_RESULT_RECOVERED;
226 }
227
228 static void adf_resume(struct pci_dev *pdev)
229 {
230         dev_info(&pdev->dev, "Acceleration driver reset completed\n");
231         dev_info(&pdev->dev, "Device is up and running\n");
232 }
233
234 const struct pci_error_handlers adf_err_handler = {
235         .error_detected = adf_error_detected,
236         .slot_reset = adf_slot_reset,
237         .resume = adf_resume,
238 };
239 EXPORT_SYMBOL_GPL(adf_err_handler);
240
241 int adf_dev_autoreset(struct adf_accel_dev *accel_dev)
242 {
243         if (accel_dev->autoreset_on_error)
244                 return adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_ASYNC);
245
246         return 0;
247 }
248
249 static void adf_notify_fatal_error_worker(struct work_struct *work)
250 {
251         struct adf_fatal_error_data *wq_data =
252                         container_of(work, struct adf_fatal_error_data, work);
253         struct adf_accel_dev *accel_dev = wq_data->accel_dev;
254         struct adf_hw_device_data *hw_device = accel_dev->hw_device;
255
256         adf_error_notifier(accel_dev);
257
258         if (!accel_dev->is_vf) {
259                 /* Disable arbitration to stop processing of new requests */
260                 if (accel_dev->autoreset_on_error && hw_device->exit_arb)
261                         hw_device->exit_arb(accel_dev);
262                 if (accel_dev->pf.vf_info)
263                         adf_pf2vf_notify_fatal_error(accel_dev);
264                 adf_dev_autoreset(accel_dev);
265         }
266
267         kfree(wq_data);
268 }
269
270 int adf_notify_fatal_error(struct adf_accel_dev *accel_dev)
271 {
272         struct adf_fatal_error_data *wq_data;
273
274         wq_data = kzalloc(sizeof(*wq_data), GFP_ATOMIC);
275         if (!wq_data)
276                 return -ENOMEM;
277
278         wq_data->accel_dev = accel_dev;
279         INIT_WORK(&wq_data->work, adf_notify_fatal_error_worker);
280         adf_misc_wq_queue_work(&wq_data->work);
281
282         return 0;
283 }
284
285 int adf_init_aer(void)
286 {
287         device_reset_wq = alloc_workqueue("qat_device_reset_wq",
288                                           WQ_MEM_RECLAIM, 0);
289         if (!device_reset_wq)
290                 return -EFAULT;
291
292         device_sriov_wq = alloc_workqueue("qat_device_sriov_wq", 0, 0);
293         if (!device_sriov_wq)
294                 return -EFAULT;
295
296         return 0;
297 }
298
299 void adf_exit_aer(void)
300 {
301         if (device_reset_wq)
302                 destroy_workqueue(device_reset_wq);
303         device_reset_wq = NULL;
304
305         if (device_sriov_wq)
306                 destroy_workqueue(device_sriov_wq);
307         device_sriov_wq = NULL;
308 }
This page took 0.045644 seconds and 4 git commands to generate.