Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.8.
  1// SPDX-License-Identifier: GPL-2.0+
  2/*
  3 * PCIe bandwidth controller
  4 *
  5 * Author: Alexandru Gagniuc <mr.nuke.me@gmail.com>
  6 *
  7 * Copyright (C) 2019 Dell Inc
  8 * Copyright (C) 2023-2024 Intel Corporation
  9 *
 10 * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds
 11 * and notify the operating system when the Link Width or Speed changes. The
 12 * notification capability is required for all Root Ports and Downstream
 13 * Ports supporting Link Width wider than x1 and/or multiple Link Speeds.
 14 *
 15 * This service port driver hooks into the Bandwidth Notification interrupt
 16 * watching for changes or links becoming degraded in operation. It updates
 17 * the cached Current Link Speed that is exposed to user space through sysfs.
 18 */
 19
 20#define dev_fmt(fmt) "bwctrl: " fmt
 21
 22#include <linux/atomic.h>
 23#include <linux/bitops.h>
 24#include <linux/bits.h>
 25#include <linux/cleanup.h>
 26#include <linux/errno.h>
 27#include <linux/interrupt.h>
 28#include <linux/mutex.h>
 29#include <linux/pci.h>
 30#include <linux/pci-bwctrl.h>
 31#include <linux/rwsem.h>
 32#include <linux/slab.h>
 33#include <linux/types.h>
 34
 35#include "../pci.h"
 36#include "portdrv.h"
 37
 38/**
 39 * struct pcie_bwctrl_data - PCIe bandwidth controller
 40 * @set_speed_mutex:	Serializes link speed changes
 41 * @lbms_count:		Count for LBMS (since last reset)
 42 * @cdev:		Thermal cooling device associated with the port
 43 */
 44struct pcie_bwctrl_data {
 45	struct mutex set_speed_mutex;
 46	atomic_t lbms_count;
 47	struct thermal_cooling_device *cdev;
 48};
 49
 50/*
 51 * Prevent port removal during LBMS count accessors and Link Speed changes.
 52 *
 53 * These have to be differentiated because pcie_bwctrl_change_speed() calls
 54 * pcie_retrain_link() which uses LBMS count reset accessor on success
 55 * (using just one rwsem triggers "possible recursive locking detected"
 56 * warning).
 57 */
 58static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem);
 59static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem);
 60
 61static bool pcie_valid_speed(enum pci_bus_speed speed)
 62{
 63	return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT);
 64}
 65
 66static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed)
 67{
 68	static const u8 speed_conv[] = {
 69		[PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT,
 70		[PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT,
 71		[PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT,
 72		[PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT,
 73		[PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT,
 74		[PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT,
 75	};
 76
 77	if (WARN_ON_ONCE(!pcie_valid_speed(speed)))
 78		return 0;
 79
 80	return speed_conv[speed];
 81}
 82
 83static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds)
 84{
 85	return __fls(supported_speeds);
 86}
 87
 88/**
 89 * pcie_bwctrl_select_speed - Select Target Link Speed
 90 * @port:	PCIe Port
 91 * @speed_req:	Requested PCIe Link Speed
 92 *
 93 * Select Target Link Speed by take into account Supported Link Speeds of
 94 * both the Root Port and the Endpoint.
 95 *
 96 * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.)
 97 */
 98static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req)
 99{
100	struct pci_bus *bus = port->subordinate;
101	u8 desired_speeds, supported_speeds;
102	struct pci_dev *dev;
103
104	desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req),
105				 __fls(PCI_EXP_LNKCAP2_SLS_2_5GB));
106
107	supported_speeds = port->supported_speeds;
108	if (bus) {
109		down_read(&pci_bus_sem);
110		dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list);
111		if (dev)
112			supported_speeds &= dev->supported_speeds;
113		up_read(&pci_bus_sem);
114	}
115	if (!supported_speeds)
116		return PCI_EXP_LNKCAP2_SLS_2_5GB;
117
118	return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds);
119}
120
121static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt)
122{
123	int ret;
124
125	ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2,
126						 PCI_EXP_LNKCTL2_TLS, target_speed);
127	if (ret != PCIBIOS_SUCCESSFUL)
128		return pcibios_err_to_errno(ret);
129
130	ret = pcie_retrain_link(port, use_lt);
131	if (ret < 0)
132		return ret;
133
134	/*
135	 * Ensure link speed updates also with platforms that have problems
136	 * with notifications.
137	 */
138	if (port->subordinate)
139		pcie_update_link_speed(port->subordinate);
140
141	return 0;
142}
143
144/**
145 * pcie_set_target_speed - Set downstream Link Speed for PCIe Port
146 * @port:	PCIe Port
147 * @speed_req:	Requested PCIe Link Speed
148 * @use_lt:	Wait for the LT or DLLLA bit to detect the end of link training
149 *
150 * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be
151 * adjusted downwards to the best speed supported by both the Port and PCIe
152 * Device underneath it.
153 *
154 * Return:
155 * * 0		- on success
156 * * -EINVAL	- @speed_req is not a PCIe Link Speed
157 * * -ENODEV	- @port is not controllable
158 * * -ETIMEDOUT	- changing Link Speed took too long
159 * * -EAGAIN	- Link Speed was changed but @speed_req was not achieved
160 */
161int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req,
162			  bool use_lt)
163{
164	struct pci_bus *bus = port->subordinate;
165	u16 target_speed;
166	int ret;
167
168	if (WARN_ON_ONCE(!pcie_valid_speed(speed_req)))
169		return -EINVAL;
170
171	if (bus && bus->cur_bus_speed == speed_req)
172		return 0;
173
174	target_speed = pcie_bwctrl_select_speed(port, speed_req);
175
176	scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) {
177		struct pcie_bwctrl_data *data = port->link_bwctrl;
178
179		/*
180		 * port->link_bwctrl is NULL during initial scan when called
181		 * e.g. from the Target Speed quirk.
182		 */
183		if (data)
184			mutex_lock(&data->set_speed_mutex);
185
186		ret = pcie_bwctrl_change_speed(port, target_speed, use_lt);
187
188		if (data)
189			mutex_unlock(&data->set_speed_mutex);
190	}
191
192	/*
193	 * Despite setting higher speed into the Target Link Speed, empty
194	 * bus won't train to 5GT+ speeds.
195	 */
196	if (!ret && bus && bus->cur_bus_speed != speed_req &&
197	    !list_empty(&bus->devices))
198		ret = -EAGAIN;
199
200	return ret;
201}
202
203static void pcie_bwnotif_enable(struct pcie_device *srv)
204{
205	struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
206	struct pci_dev *port = srv->port;
207	u16 link_status;
208	int ret;
209
210	/* Count LBMS seen so far as one */
211	ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
212	if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS)
213		atomic_inc(&data->lbms_count);
214
215	pcie_capability_set_word(port, PCI_EXP_LNKCTL,
216				 PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
217	pcie_capability_write_word(port, PCI_EXP_LNKSTA,
218				   PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
219
220	/*
221	 * Update after enabling notifications & clearing status bits ensures
222	 * link speed is up to date.
223	 */
224	pcie_update_link_speed(port->subordinate);
225}
226
227static void pcie_bwnotif_disable(struct pci_dev *port)
228{
229	pcie_capability_clear_word(port, PCI_EXP_LNKCTL,
230				   PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
231}
232
233static irqreturn_t pcie_bwnotif_irq(int irq, void *context)
234{
235	struct pcie_device *srv = context;
236	struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
237	struct pci_dev *port = srv->port;
238	u16 link_status, events;
239	int ret;
240
241	ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
242	if (ret != PCIBIOS_SUCCESSFUL)
243		return IRQ_NONE;
244
245	events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
246	if (!events)
247		return IRQ_NONE;
248
249	if (events & PCI_EXP_LNKSTA_LBMS)
250		atomic_inc(&data->lbms_count);
251
252	pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
253
254	/*
255	 * Interrupts will not be triggered from any further Link Speed
256	 * change until LBMS is cleared by the write. Therefore, re-read the
257	 * speed (inside pcie_update_link_speed()) after LBMS has been
258	 * cleared to avoid missing link speed changes.
259	 */
260	pcie_update_link_speed(port->subordinate);
261
262	return IRQ_HANDLED;
263}
264
265void pcie_reset_lbms_count(struct pci_dev *port)
266{
267	struct pcie_bwctrl_data *data;
268
269	guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
270	data = port->link_bwctrl;
271	if (data)
272		atomic_set(&data->lbms_count, 0);
273	else
274		pcie_capability_write_word(port, PCI_EXP_LNKSTA,
275					   PCI_EXP_LNKSTA_LBMS);
276}
277
278int pcie_lbms_count(struct pci_dev *port, unsigned long *val)
279{
280	struct pcie_bwctrl_data *data;
281
282	guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
283	data = port->link_bwctrl;
284	if (!data)
285		return -ENOTTY;
286
287	*val = atomic_read(&data->lbms_count);
288
289	return 0;
290}
291
292static int pcie_bwnotif_probe(struct pcie_device *srv)
293{
294	struct pci_dev *port = srv->port;
295	int ret;
296
297	struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device,
298						     sizeof(*data), GFP_KERNEL);
299	if (!data)
300		return -ENOMEM;
301
302	ret = devm_mutex_init(&srv->device, &data->set_speed_mutex);
303	if (ret)
304		return ret;
305
306	scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
307		scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
308			port->link_bwctrl = data;
309
310			ret = request_irq(srv->irq, pcie_bwnotif_irq,
311					  IRQF_SHARED, "PCIe bwctrl", srv);
312			if (ret) {
313				port->link_bwctrl = NULL;
314				return ret;
315			}
316
317			pcie_bwnotif_enable(srv);
318		}
319	}
320
321	pci_dbg(port, "enabled with IRQ %d\n", srv->irq);
322
323	/* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */
324	port->link_bwctrl->cdev = pcie_cooling_device_register(port);
325	if (IS_ERR(port->link_bwctrl->cdev))
326		port->link_bwctrl->cdev = NULL;
327
328	return 0;
329}
330
331static void pcie_bwnotif_remove(struct pcie_device *srv)
332{
333	struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
334
335	pcie_cooling_device_unregister(data->cdev);
336
337	scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
338		scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
339			pcie_bwnotif_disable(srv->port);
340
341			free_irq(srv->irq, srv);
342
343			srv->port->link_bwctrl = NULL;
344		}
345	}
346}
347
348static int pcie_bwnotif_suspend(struct pcie_device *srv)
349{
350	pcie_bwnotif_disable(srv->port);
351	return 0;
352}
353
354static int pcie_bwnotif_resume(struct pcie_device *srv)
355{
356	pcie_bwnotif_enable(srv);
357	return 0;
358}
359
360static struct pcie_port_service_driver pcie_bwctrl_driver = {
361	.name		= "pcie_bwctrl",
362	.port_type	= PCIE_ANY_PORT,
363	.service	= PCIE_PORT_SERVICE_BWCTRL,
364	.probe		= pcie_bwnotif_probe,
365	.suspend	= pcie_bwnotif_suspend,
366	.resume		= pcie_bwnotif_resume,
367	.remove		= pcie_bwnotif_remove,
368};
369
370int __init pcie_bwctrl_init(void)
371{
372	return pcie_port_service_register(&pcie_bwctrl_driver);
373}