1 // SPDX-License-Identifier: GPL-2.0+
3 * PCIe bandwidth controller
7 * Copyright (C) 2019 Dell Inc
8 * Copyright (C) 2023-2024 Intel Corporation
10 * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds
11 * and notify the operating system when the Link Width or Speed changes. The
12 * notification capability is required for all Root Ports and Downstream
13 * Ports supporting Link Width wider than x1 and/or multiple Link Speeds.
15 * This service port driver hooks into the Bandwidth Notification interrupt
16 * watching for changes or links becoming degraded in operation. It updates
17 * the cached Current Link Speed that is exposed to user space through sysfs.
20 #define dev_fmt(fmt) "bwctrl: " fmt
22 #include <linux/atomic.h>
23 #include <linux/bitops.h>
24 #include <linux/bits.h>
25 #include <linux/cleanup.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/mutex.h>
29 #include <linux/pci.h>
30 #include <linux/pci-bwctrl.h>
31 #include <linux/rwsem.h>
32 #include <linux/slab.h>
33 #include <linux/types.h>
39 * struct pcie_bwctrl_data - PCIe bandwidth controller
40 * @set_speed_mutex: Serializes link speed changes
41 * @lbms_count: Count for LBMS (since last reset)
42 * @cdev: Thermal cooling device associated with the port
44 struct pcie_bwctrl_data {
45 struct mutex set_speed_mutex;
47 struct thermal_cooling_device *cdev;
51 * Prevent port removal during LBMS count accessors and Link Speed changes.
53 * These have to be differentiated because pcie_bwctrl_change_speed() calls
54 * pcie_retrain_link() which uses LBMS count reset accessor on success
55 * (using just one rwsem triggers "possible recursive locking detected"
58 static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem);
59 static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem);
61 static bool pcie_valid_speed(enum pci_bus_speed speed)
63 return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT);
66 static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed)
68 static const u8 speed_conv[] = {
69 [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT,
70 [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT,
71 [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT,
72 [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT,
73 [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT,
74 [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT,
77 if (WARN_ON_ONCE(!pcie_valid_speed(speed)))
80 return speed_conv[speed];
83 static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds)
85 return __fls(supported_speeds);
89 * pcie_bwctrl_select_speed - Select Target Link Speed
91 * @speed_req: Requested PCIe Link Speed
93 * Select Target Link Speed by take into account Supported Link Speeds of
94 * both the Root Port and the Endpoint.
96 * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.)
98 static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req)
100 struct pci_bus *bus = port->subordinate;
101 u8 desired_speeds, supported_speeds;
104 desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req),
105 __fls(PCI_EXP_LNKCAP2_SLS_2_5GB));
107 supported_speeds = port->supported_speeds;
109 down_read(&pci_bus_sem);
110 dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list);
112 supported_speeds &= dev->supported_speeds;
113 up_read(&pci_bus_sem);
115 if (!supported_speeds)
116 return PCI_EXP_LNKCAP2_SLS_2_5GB;
118 return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds);
121 static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt)
125 ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2,
126 PCI_EXP_LNKCTL2_TLS, target_speed);
127 if (ret != PCIBIOS_SUCCESSFUL)
128 return pcibios_err_to_errno(ret);
130 ret = pcie_retrain_link(port, use_lt);
135 * Ensure link speed updates also with platforms that have problems
136 * with notifications.
138 if (port->subordinate)
139 pcie_update_link_speed(port->subordinate);
145 * pcie_set_target_speed - Set downstream Link Speed for PCIe Port
147 * @speed_req: Requested PCIe Link Speed
148 * @use_lt: Wait for the LT or DLLLA bit to detect the end of link training
150 * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be
151 * adjusted downwards to the best speed supported by both the Port and PCIe
152 * Device underneath it.
156 * * -EINVAL - @speed_req is not a PCIe Link Speed
157 * * -ENODEV - @port is not controllable
158 * * -ETIMEDOUT - changing Link Speed took too long
159 * * -EAGAIN - Link Speed was changed but @speed_req was not achieved
161 int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req,
164 struct pci_bus *bus = port->subordinate;
168 if (WARN_ON_ONCE(!pcie_valid_speed(speed_req)))
171 if (bus && bus->cur_bus_speed == speed_req)
174 target_speed = pcie_bwctrl_select_speed(port, speed_req);
176 scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) {
177 struct pcie_bwctrl_data *data = port->link_bwctrl;
180 * port->link_bwctrl is NULL during initial scan when called
181 * e.g. from the Target Speed quirk.
184 mutex_lock(&data->set_speed_mutex);
186 ret = pcie_bwctrl_change_speed(port, target_speed, use_lt);
189 mutex_unlock(&data->set_speed_mutex);
193 * Despite setting higher speed into the Target Link Speed, empty
194 * bus won't train to 5GT+ speeds.
196 if (!ret && bus && bus->cur_bus_speed != speed_req &&
197 !list_empty(&bus->devices))
203 static void pcie_bwnotif_enable(struct pcie_device *srv)
205 struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
206 struct pci_dev *port = srv->port;
210 /* Count LBMS seen so far as one */
211 ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
212 if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS)
213 atomic_inc(&data->lbms_count);
215 pcie_capability_set_word(port, PCI_EXP_LNKCTL,
216 PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
217 pcie_capability_write_word(port, PCI_EXP_LNKSTA,
218 PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
221 * Update after enabling notifications & clearing status bits ensures
222 * link speed is up to date.
224 pcie_update_link_speed(port->subordinate);
227 static void pcie_bwnotif_disable(struct pci_dev *port)
229 pcie_capability_clear_word(port, PCI_EXP_LNKCTL,
230 PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
233 static irqreturn_t pcie_bwnotif_irq(int irq, void *context)
235 struct pcie_device *srv = context;
236 struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
237 struct pci_dev *port = srv->port;
238 u16 link_status, events;
241 ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
242 if (ret != PCIBIOS_SUCCESSFUL)
245 events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
249 if (events & PCI_EXP_LNKSTA_LBMS)
250 atomic_inc(&data->lbms_count);
252 pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
255 * Interrupts will not be triggered from any further Link Speed
256 * change until LBMS is cleared by the write. Therefore, re-read the
257 * speed (inside pcie_update_link_speed()) after LBMS has been
258 * cleared to avoid missing link speed changes.
260 pcie_update_link_speed(port->subordinate);
265 void pcie_reset_lbms_count(struct pci_dev *port)
267 struct pcie_bwctrl_data *data;
269 guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
270 data = port->link_bwctrl;
272 atomic_set(&data->lbms_count, 0);
274 pcie_capability_write_word(port, PCI_EXP_LNKSTA,
275 PCI_EXP_LNKSTA_LBMS);
278 int pcie_lbms_count(struct pci_dev *port, unsigned long *val)
280 struct pcie_bwctrl_data *data;
282 guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
283 data = port->link_bwctrl;
287 *val = atomic_read(&data->lbms_count);
292 static int pcie_bwnotif_probe(struct pcie_device *srv)
294 struct pci_dev *port = srv->port;
297 struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device,
298 sizeof(*data), GFP_KERNEL);
302 ret = devm_mutex_init(&srv->device, &data->set_speed_mutex);
306 ret = devm_request_irq(&srv->device, srv->irq, pcie_bwnotif_irq,
307 IRQF_SHARED, "PCIe bwctrl", srv);
311 scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
312 scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
313 port->link_bwctrl = no_free_ptr(data);
314 pcie_bwnotif_enable(srv);
318 pci_dbg(port, "enabled with IRQ %d\n", srv->irq);
320 /* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */
321 port->link_bwctrl->cdev = pcie_cooling_device_register(port);
322 if (IS_ERR(port->link_bwctrl->cdev))
323 port->link_bwctrl->cdev = NULL;
328 static void pcie_bwnotif_remove(struct pcie_device *srv)
330 struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
332 pcie_cooling_device_unregister(data->cdev);
334 pcie_bwnotif_disable(srv->port);
336 scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem)
337 scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem)
338 srv->port->link_bwctrl = NULL;
341 static int pcie_bwnotif_suspend(struct pcie_device *srv)
343 pcie_bwnotif_disable(srv->port);
347 static int pcie_bwnotif_resume(struct pcie_device *srv)
349 pcie_bwnotif_enable(srv);
353 static struct pcie_port_service_driver pcie_bwctrl_driver = {
354 .name = "pcie_bwctrl",
355 .port_type = PCIE_ANY_PORT,
356 .service = PCIE_PORT_SERVICE_BWCTRL,
357 .probe = pcie_bwnotif_probe,
358 .suspend = pcie_bwnotif_suspend,
359 .resume = pcie_bwnotif_resume,
360 .remove = pcie_bwnotif_remove,
363 int __init pcie_bwctrl_init(void)
365 return pcie_port_service_register(&pcie_bwctrl_driver);