Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * PCI Message Signaled Interrupt (MSI)
  4 *
  5 * Copyright (C) 2003-2004 Intel
  6 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
  7 * Copyright (C) 2016 Christoph Hellwig.
  8 */
  9#include <linux/err.h>
 10#include <linux/export.h>
 11#include <linux/irq.h>
 12
 13#include "../pci.h"
 14#include "msi.h"
 15
 16int pci_msi_enable = 1;
 17int pci_msi_ignore_mask;
 18
 19/**
 20 * pci_msi_supported - check whether MSI may be enabled on a device
 21 * @dev: pointer to the pci_dev data structure of MSI device function
 22 * @nvec: how many MSIs have been requested?
 23 *
 24 * Look at global flags, the device itself, and its parent buses
 25 * to determine if MSI/-X are supported for the device. If MSI/-X is
 26 * supported return 1, else return 0.
 27 **/
 28static int pci_msi_supported(struct pci_dev *dev, int nvec)
 29{
 30	struct pci_bus *bus;
 31
 32	/* MSI must be globally enabled and supported by the device */
 33	if (!pci_msi_enable)
 34		return 0;
 35
 36	if (!dev || dev->no_msi)
 37		return 0;
 38
 39	/*
 40	 * You can't ask to have 0 or less MSIs configured.
 41	 *  a) it's stupid ..
 42	 *  b) the list manipulation code assumes nvec >= 1.
 43	 */
 44	if (nvec < 1)
 45		return 0;
 46
 47	/*
 48	 * Any bridge which does NOT route MSI transactions from its
 49	 * secondary bus to its primary bus must set NO_MSI flag on
 50	 * the secondary pci_bus.
 51	 *
 52	 * The NO_MSI flag can either be set directly by:
 53	 * - arch-specific PCI host bus controller drivers (deprecated)
 54	 * - quirks for specific PCI bridges
 55	 *
 56	 * or indirectly by platform-specific PCI host bridge drivers by
 57	 * advertising the 'msi_domain' property, which results in
 58	 * the NO_MSI flag when no MSI domain is found for this bridge
 59	 * at probe time.
 60	 */
 61	for (bus = dev->bus; bus; bus = bus->parent)
 62		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
 63			return 0;
 64
 65	return 1;
 66}
 67
 68static void pcim_msi_release(void *pcidev)
 69{
 70	struct pci_dev *dev = pcidev;
 71
 72	dev->is_msi_managed = false;
 73	pci_free_irq_vectors(dev);
 74}
 75
 76/*
 77 * Needs to be separate from pcim_release to prevent an ordering problem
 78 * vs. msi_device_data_release() in the MSI core code.
 79 */
 80static int pcim_setup_msi_release(struct pci_dev *dev)
 81{
 82	int ret;
 83
 84	if (!pci_is_managed(dev) || dev->is_msi_managed)
 85		return 0;
 86
 87	ret = devm_add_action(&dev->dev, pcim_msi_release, dev);
 88	if (!ret)
 89		dev->is_msi_managed = true;
 90	return ret;
 91}
 92
 93/*
 94 * Ordering vs. devres: msi device data has to be installed first so that
 95 * pcim_msi_release() is invoked before it on device release.
 96 */
 97static int pci_setup_msi_context(struct pci_dev *dev)
 98{
 99	int ret = msi_setup_device_data(&dev->dev);
100
101	if (!ret)
102		ret = pcim_setup_msi_release(dev);
103	return ret;
104}
105
106/*
107 * Helper functions for mask/unmask and MSI message handling
108 */
109
110void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set)
111{
112	raw_spinlock_t *lock = &to_pci_dev(desc->dev)->msi_lock;
113	unsigned long flags;
114
115	if (!desc->pci.msi_attrib.can_mask)
116		return;
117
118	raw_spin_lock_irqsave(lock, flags);
119	desc->pci.msi_mask &= ~clear;
120	desc->pci.msi_mask |= set;
121	pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->pci.mask_pos,
122			       desc->pci.msi_mask);
123	raw_spin_unlock_irqrestore(lock, flags);
124}
125
126/**
127 * pci_msi_mask_irq - Generic IRQ chip callback to mask PCI/MSI interrupts
128 * @data:	pointer to irqdata associated to that interrupt
129 */
130void pci_msi_mask_irq(struct irq_data *data)
131{
132	struct msi_desc *desc = irq_data_get_msi_desc(data);
133
134	__pci_msi_mask_desc(desc, BIT(data->irq - desc->irq));
135}
136EXPORT_SYMBOL_GPL(pci_msi_mask_irq);
137
138/**
139 * pci_msi_unmask_irq - Generic IRQ chip callback to unmask PCI/MSI interrupts
140 * @data:	pointer to irqdata associated to that interrupt
141 */
142void pci_msi_unmask_irq(struct irq_data *data)
143{
144	struct msi_desc *desc = irq_data_get_msi_desc(data);
145
146	__pci_msi_unmask_desc(desc, BIT(data->irq - desc->irq));
147}
148EXPORT_SYMBOL_GPL(pci_msi_unmask_irq);
149
150void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
151{
152	struct pci_dev *dev = msi_desc_to_pci_dev(entry);
153
154	BUG_ON(dev->current_state != PCI_D0);
155
156	if (entry->pci.msi_attrib.is_msix) {
157		void __iomem *base = pci_msix_desc_addr(entry);
158
159		if (WARN_ON_ONCE(entry->pci.msi_attrib.is_virtual))
160			return;
161
162		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
163		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
164		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
165	} else {
166		int pos = dev->msi_cap;
167		u16 data;
168
169		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
170				      &msg->address_lo);
171		if (entry->pci.msi_attrib.is_64) {
172			pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
173					      &msg->address_hi);
174			pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data);
175		} else {
176			msg->address_hi = 0;
177			pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data);
178		}
179		msg->data = data;
180	}
181}
182
183static inline void pci_write_msg_msi(struct pci_dev *dev, struct msi_desc *desc,
184				     struct msi_msg *msg)
185{
186	int pos = dev->msi_cap;
187	u16 msgctl;
188
189	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
190	msgctl &= ~PCI_MSI_FLAGS_QSIZE;
191	msgctl |= desc->pci.msi_attrib.multiple << 4;
192	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl);
193
194	pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, msg->address_lo);
195	if (desc->pci.msi_attrib.is_64) {
196		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,  msg->address_hi);
197		pci_write_config_word(dev, pos + PCI_MSI_DATA_64, msg->data);
198	} else {
199		pci_write_config_word(dev, pos + PCI_MSI_DATA_32, msg->data);
200	}
201	/* Ensure that the writes are visible in the device */
202	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
203}
204
205static inline void pci_write_msg_msix(struct msi_desc *desc, struct msi_msg *msg)
206{
207	void __iomem *base = pci_msix_desc_addr(desc);
208	u32 ctrl = desc->pci.msix_ctrl;
209	bool unmasked = !(ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT);
210
211	if (desc->pci.msi_attrib.is_virtual)
212		return;
213	/*
214	 * The specification mandates that the entry is masked
215	 * when the message is modified:
216	 *
217	 * "If software changes the Address or Data value of an
218	 * entry while the entry is unmasked, the result is
219	 * undefined."
220	 */
221	if (unmasked)
222		pci_msix_write_vector_ctrl(desc, ctrl | PCI_MSIX_ENTRY_CTRL_MASKBIT);
223
224	writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
225	writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
226	writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
227
228	if (unmasked)
229		pci_msix_write_vector_ctrl(desc, ctrl);
230
231	/* Ensure that the writes are visible in the device */
232	readl(base + PCI_MSIX_ENTRY_DATA);
233}
234
235void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
236{
237	struct pci_dev *dev = msi_desc_to_pci_dev(entry);
238
239	if (dev->current_state != PCI_D0 || pci_dev_is_disconnected(dev)) {
240		/* Don't touch the hardware now */
241	} else if (entry->pci.msi_attrib.is_msix) {
242		pci_write_msg_msix(entry, msg);
243	} else {
244		pci_write_msg_msi(dev, entry, msg);
245	}
246
247	entry->msg = *msg;
248
249	if (entry->write_msi_msg)
250		entry->write_msi_msg(entry, entry->write_msi_msg_data);
251}
252
253void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
254{
255	struct msi_desc *entry = irq_get_msi_desc(irq);
256
257	__pci_write_msi_msg(entry, msg);
258}
259EXPORT_SYMBOL_GPL(pci_write_msi_msg);
260
261
262/* PCI/MSI specific functionality */
263
264static void pci_intx_for_msi(struct pci_dev *dev, int enable)
265{
266	if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
267		pci_intx(dev, enable);
268}
269
270static void pci_msi_set_enable(struct pci_dev *dev, int enable)
271{
272	u16 control;
273
274	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
275	control &= ~PCI_MSI_FLAGS_ENABLE;
276	if (enable)
277		control |= PCI_MSI_FLAGS_ENABLE;
278	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
279}
280
281static int msi_setup_msi_desc(struct pci_dev *dev, int nvec,
282			      struct irq_affinity_desc *masks)
283{
284	struct msi_desc desc;
285	u16 control;
286
287	/* MSI Entry Initialization */
288	memset(&desc, 0, sizeof(desc));
289
290	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
291	/* Lies, damned lies, and MSIs */
292	if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING)
293		control |= PCI_MSI_FLAGS_MASKBIT;
294	/* Respect XEN's mask disabling */
295	if (pci_msi_ignore_mask)
296		control &= ~PCI_MSI_FLAGS_MASKBIT;
297
298	desc.nvec_used			= nvec;
299	desc.pci.msi_attrib.is_64	= !!(control & PCI_MSI_FLAGS_64BIT);
300	desc.pci.msi_attrib.can_mask	= !!(control & PCI_MSI_FLAGS_MASKBIT);
301	desc.pci.msi_attrib.default_irq	= dev->irq;
302	desc.pci.msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;
303	desc.pci.msi_attrib.multiple	= ilog2(__roundup_pow_of_two(nvec));
304	desc.affinity			= masks;
305
306	if (control & PCI_MSI_FLAGS_64BIT)
307		desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
308	else
309		desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
310
311	/* Save the initial mask status */
312	if (desc.pci.msi_attrib.can_mask)
313		pci_read_config_dword(dev, desc.pci.mask_pos, &desc.pci.msi_mask);
314
315	return msi_insert_msi_desc(&dev->dev, &desc);
316}
317
318static int msi_verify_entries(struct pci_dev *dev)
319{
320	struct msi_desc *entry;
321
322	if (!dev->no_64bit_msi)
323		return 0;
324
325	msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
326		if (entry->msg.address_hi) {
327			pci_err(dev, "arch assigned 64-bit MSI address %#x%08x but device only supports 32 bits\n",
328				entry->msg.address_hi, entry->msg.address_lo);
329			break;
330		}
331	}
332	return !entry ? 0 : -EIO;
333}
334
335/**
336 * msi_capability_init - configure device's MSI capability structure
337 * @dev: pointer to the pci_dev data structure of MSI device function
338 * @nvec: number of interrupts to allocate
339 * @affd: description of automatic IRQ affinity assignments (may be %NULL)
340 *
341 * Setup the MSI capability structure of the device with the requested
342 * number of interrupts.  A return value of zero indicates the successful
343 * setup of an entry with the new MSI IRQ.  A negative return value indicates
344 * an error, and a positive return value indicates the number of interrupts
345 * which could have been allocated.
346 */
347static int msi_capability_init(struct pci_dev *dev, int nvec,
348			       struct irq_affinity *affd)
349{
350	struct irq_affinity_desc *masks = NULL;
351	struct msi_desc *entry;
352	int ret;
353
354	/* Reject multi-MSI early on irq domain enabled architectures */
355	if (nvec > 1 && !pci_msi_domain_supports(dev, MSI_FLAG_MULTI_PCI_MSI, ALLOW_LEGACY))
356		return 1;
357
358	/*
359	 * Disable MSI during setup in the hardware, but mark it enabled
360	 * so that setup code can evaluate it.
361	 */
362	pci_msi_set_enable(dev, 0);
363	dev->msi_enabled = 1;
364
365	if (affd)
366		masks = irq_create_affinity_masks(nvec, affd);
367
368	msi_lock_descs(&dev->dev);
369	ret = msi_setup_msi_desc(dev, nvec, masks);
370	if (ret)
371		goto fail;
372
373	/* All MSIs are unmasked by default; mask them all */
374	entry = msi_first_desc(&dev->dev, MSI_DESC_ALL);
375	pci_msi_mask(entry, msi_multi_mask(entry));
376
377	/* Configure MSI capability structure */
378	ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
379	if (ret)
380		goto err;
381
382	ret = msi_verify_entries(dev);
383	if (ret)
384		goto err;
385
386	/* Set MSI enabled bits	*/
387	pci_intx_for_msi(dev, 0);
388	pci_msi_set_enable(dev, 1);
389
390	pcibios_free_irq(dev);
391	dev->irq = entry->irq;
392	goto unlock;
393
394err:
395	pci_msi_unmask(entry, msi_multi_mask(entry));
396	pci_free_msi_irqs(dev);
397fail:
398	dev->msi_enabled = 0;
399unlock:
400	msi_unlock_descs(&dev->dev);
401	kfree(masks);
402	return ret;
403}
404
405int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
406			   struct irq_affinity *affd)
407{
408	int nvec;
409	int rc;
410
411	if (!pci_msi_supported(dev, minvec) || dev->current_state != PCI_D0)
412		return -EINVAL;
413
414	/* Check whether driver already requested MSI-X IRQs */
415	if (dev->msix_enabled) {
416		pci_info(dev, "can't enable MSI (MSI-X already enabled)\n");
417		return -EINVAL;
418	}
419
420	if (maxvec < minvec)
421		return -ERANGE;
422
423	if (WARN_ON_ONCE(dev->msi_enabled))
424		return -EINVAL;
425
426	nvec = pci_msi_vec_count(dev);
427	if (nvec < 0)
428		return nvec;
429	if (nvec < minvec)
430		return -ENOSPC;
431
432	if (nvec > maxvec)
433		nvec = maxvec;
434
435	rc = pci_setup_msi_context(dev);
436	if (rc)
437		return rc;
438
439	if (!pci_setup_msi_device_domain(dev))
440		return -ENODEV;
441
442	for (;;) {
443		if (affd) {
444			nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
445			if (nvec < minvec)
446				return -ENOSPC;
447		}
448
449		rc = msi_capability_init(dev, nvec, affd);
450		if (rc == 0)
451			return nvec;
452
453		if (rc < 0)
454			return rc;
455		if (rc < minvec)
456			return -ENOSPC;
457
458		nvec = rc;
459	}
460}
461
462/**
463 * pci_msi_vec_count - Return the number of MSI vectors a device can send
464 * @dev: device to report about
465 *
466 * This function returns the number of MSI vectors a device requested via
467 * Multiple Message Capable register. It returns a negative errno if the
468 * device is not capable sending MSI interrupts. Otherwise, the call succeeds
469 * and returns a power of two, up to a maximum of 2^5 (32), according to the
470 * MSI specification.
471 **/
472int pci_msi_vec_count(struct pci_dev *dev)
473{
474	int ret;
475	u16 msgctl;
476
477	if (!dev->msi_cap)
478		return -EINVAL;
479
480	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
481	ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
482
483	return ret;
484}
485EXPORT_SYMBOL(pci_msi_vec_count);
486
487/*
488 * Architecture override returns true when the PCI MSI message should be
489 * written by the generic restore function.
490 */
491bool __weak arch_restore_msi_irqs(struct pci_dev *dev)
492{
493	return true;
494}
495
496void __pci_restore_msi_state(struct pci_dev *dev)
497{
498	struct msi_desc *entry;
499	u16 control;
500
501	if (!dev->msi_enabled)
502		return;
503
504	entry = irq_get_msi_desc(dev->irq);
505
506	pci_intx_for_msi(dev, 0);
507	pci_msi_set_enable(dev, 0);
508	if (arch_restore_msi_irqs(dev))
509		__pci_write_msi_msg(entry, &entry->msg);
510
511	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
512	pci_msi_update_mask(entry, 0, 0);
513	control &= ~PCI_MSI_FLAGS_QSIZE;
514	control |= (entry->pci.msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
515	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
516}
517
518void pci_msi_shutdown(struct pci_dev *dev)
519{
520	struct msi_desc *desc;
521
522	if (!pci_msi_enable || !dev || !dev->msi_enabled)
523		return;
524
525	pci_msi_set_enable(dev, 0);
526	pci_intx_for_msi(dev, 1);
527	dev->msi_enabled = 0;
528
529	/* Return the device with MSI unmasked as initial states */
530	desc = msi_first_desc(&dev->dev, MSI_DESC_ALL);
531	if (!WARN_ON_ONCE(!desc))
532		pci_msi_unmask(desc, msi_multi_mask(desc));
533
534	/* Restore dev->irq to its default pin-assertion IRQ */
535	dev->irq = desc->pci.msi_attrib.default_irq;
536	pcibios_alloc_irq(dev);
537}
538
539/* PCI/MSI-X specific functionality */
540
541static void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set)
542{
543	u16 ctrl;
544
545	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
546	ctrl &= ~clear;
547	ctrl |= set;
548	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl);
549}
550
551static void __iomem *msix_map_region(struct pci_dev *dev,
552				     unsigned int nr_entries)
553{
554	resource_size_t phys_addr;
555	u32 table_offset;
556	unsigned long flags;
557	u8 bir;
558
559	pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE,
560			      &table_offset);
561	bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
562	flags = pci_resource_flags(dev, bir);
563	if (!flags || (flags & IORESOURCE_UNSET))
564		return NULL;
565
566	table_offset &= PCI_MSIX_TABLE_OFFSET;
567	phys_addr = pci_resource_start(dev, bir) + table_offset;
568
569	return ioremap(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
570}
571
572/**
573 * msix_prepare_msi_desc - Prepare a half initialized MSI descriptor for operation
574 * @dev:	The PCI device for which the descriptor is prepared
575 * @desc:	The MSI descriptor for preparation
576 *
577 * This is separate from msix_setup_msi_descs() below to handle dynamic
578 * allocations for MSI-X after initial enablement.
579 *
580 * Ideally the whole MSI-X setup would work that way, but there is no way to
581 * support this for the legacy arch_setup_msi_irqs() mechanism and for the
582 * fake irq domains like the x86 XEN one. Sigh...
583 *
584 * The descriptor is zeroed and only @desc::msi_index and @desc::affinity
585 * are set. When called from msix_setup_msi_descs() then the is_virtual
586 * attribute is initialized as well.
587 *
588 * Fill in the rest.
589 */
590void msix_prepare_msi_desc(struct pci_dev *dev, struct msi_desc *desc)
591{
592	desc->nvec_used				= 1;
593	desc->pci.msi_attrib.is_msix		= 1;
594	desc->pci.msi_attrib.is_64		= 1;
595	desc->pci.msi_attrib.default_irq	= dev->irq;
596	desc->pci.mask_base			= dev->msix_base;
597	desc->pci.msi_attrib.can_mask		= !pci_msi_ignore_mask &&
598						  !desc->pci.msi_attrib.is_virtual;
599
600	if (desc->pci.msi_attrib.can_mask) {
601		void __iomem *addr = pci_msix_desc_addr(desc);
602
603		desc->pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
604	}
605}
606
607static int msix_setup_msi_descs(struct pci_dev *dev, struct msix_entry *entries,
608				int nvec, struct irq_affinity_desc *masks)
609{
610	int ret = 0, i, vec_count = pci_msix_vec_count(dev);
611	struct irq_affinity_desc *curmsk;
612	struct msi_desc desc;
613
614	memset(&desc, 0, sizeof(desc));
615
616	for (i = 0, curmsk = masks; i < nvec; i++, curmsk++) {
617		desc.msi_index = entries ? entries[i].entry : i;
618		desc.affinity = masks ? curmsk : NULL;
619		desc.pci.msi_attrib.is_virtual = desc.msi_index >= vec_count;
620
621		msix_prepare_msi_desc(dev, &desc);
622
623		ret = msi_insert_msi_desc(&dev->dev, &desc);
624		if (ret)
625			break;
626	}
627	return ret;
628}
629
630static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries)
631{
632	struct msi_desc *desc;
633
634	if (entries) {
635		msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) {
636			entries->vector = desc->irq;
637			entries++;
638		}
639	}
640}
641
642static void msix_mask_all(void __iomem *base, int tsize)
643{
644	u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT;
645	int i;
646
647	if (pci_msi_ignore_mask)
648		return;
649
650	for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE)
651		writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL);
652}
653
654static int msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries,
655				 int nvec, struct irq_affinity *affd)
656{
657	struct irq_affinity_desc *masks = NULL;
658	int ret;
659
660	if (affd)
661		masks = irq_create_affinity_masks(nvec, affd);
662
663	msi_lock_descs(&dev->dev);
664	ret = msix_setup_msi_descs(dev, entries, nvec, masks);
665	if (ret)
666		goto out_free;
667
668	ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
669	if (ret)
670		goto out_free;
671
672	/* Check if all MSI entries honor device restrictions */
673	ret = msi_verify_entries(dev);
674	if (ret)
675		goto out_free;
676
677	msix_update_entries(dev, entries);
678	goto out_unlock;
679
680out_free:
681	pci_free_msi_irqs(dev);
682out_unlock:
683	msi_unlock_descs(&dev->dev);
684	kfree(masks);
685	return ret;
686}
687
688/**
689 * msix_capability_init - configure device's MSI-X capability
690 * @dev: pointer to the pci_dev data structure of MSI-X device function
691 * @entries: pointer to an array of struct msix_entry entries
692 * @nvec: number of @entries
693 * @affd: Optional pointer to enable automatic affinity assignment
694 *
695 * Setup the MSI-X capability structure of device function with a
696 * single MSI-X IRQ. A return of zero indicates the successful setup of
697 * requested MSI-X entries with allocated IRQs or non-zero for otherwise.
698 **/
699static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
700				int nvec, struct irq_affinity *affd)
701{
702	int ret, tsize;
703	u16 control;
704
705	/*
706	 * Some devices require MSI-X to be enabled before the MSI-X
707	 * registers can be accessed.  Mask all the vectors to prevent
708	 * interrupts coming in before they're fully set up.
709	 */
710	pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL |
711				    PCI_MSIX_FLAGS_ENABLE);
712
713	/* Mark it enabled so setup functions can query it */
714	dev->msix_enabled = 1;
715
716	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
717	/* Request & Map MSI-X table region */
718	tsize = msix_table_size(control);
719	dev->msix_base = msix_map_region(dev, tsize);
720	if (!dev->msix_base) {
721		ret = -ENOMEM;
722		goto out_disable;
723	}
724
725	ret = msix_setup_interrupts(dev, entries, nvec, affd);
726	if (ret)
727		goto out_disable;
728
729	/* Disable INTX */
730	pci_intx_for_msi(dev, 0);
731
732	/*
733	 * Ensure that all table entries are masked to prevent
734	 * stale entries from firing in a crash kernel.
735	 *
736	 * Done late to deal with a broken Marvell NVME device
737	 * which takes the MSI-X mask bits into account even
738	 * when MSI-X is disabled, which prevents MSI delivery.
739	 */
740	msix_mask_all(dev->msix_base, tsize);
741	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
742
743	pcibios_free_irq(dev);
744	return 0;
745
746out_disable:
747	dev->msix_enabled = 0;
748	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0);
749
750	return ret;
751}
752
753static bool pci_msix_validate_entries(struct pci_dev *dev, struct msix_entry *entries,
754				      int nvec, int hwsize)
755{
756	bool nogap;
757	int i, j;
758
759	if (!entries)
760		return true;
761
762	nogap = pci_msi_domain_supports(dev, MSI_FLAG_MSIX_CONTIGUOUS, DENY_LEGACY);
763
764	for (i = 0; i < nvec; i++) {
765		/* Entry within hardware limit? */
766		if (entries[i].entry >= hwsize)
767			return false;
768
769		/* Check for duplicate entries */
770		for (j = i + 1; j < nvec; j++) {
771			if (entries[i].entry == entries[j].entry)
772				return false;
773		}
774		/* Check for unsupported gaps */
775		if (nogap && entries[i].entry != i)
776			return false;
777	}
778	return true;
779}
780
781int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec,
782			    int maxvec, struct irq_affinity *affd, int flags)
783{
784	int hwsize, rc, nvec = maxvec;
785
786	if (maxvec < minvec)
787		return -ERANGE;
788
789	if (dev->msi_enabled) {
790		pci_info(dev, "can't enable MSI-X (MSI already enabled)\n");
791		return -EINVAL;
792	}
793
794	if (WARN_ON_ONCE(dev->msix_enabled))
795		return -EINVAL;
796
797	/* Check MSI-X early on irq domain enabled architectures */
798	if (!pci_msi_domain_supports(dev, MSI_FLAG_PCI_MSIX, ALLOW_LEGACY))
799		return -ENOTSUPP;
800
801	if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0)
802		return -EINVAL;
803
804	hwsize = pci_msix_vec_count(dev);
805	if (hwsize < 0)
806		return hwsize;
807
808	if (!pci_msix_validate_entries(dev, entries, nvec, hwsize))
809		return -EINVAL;
810
811	if (hwsize < nvec) {
812		/* Keep the IRQ virtual hackery working */
813		if (flags & PCI_IRQ_VIRTUAL)
814			hwsize = nvec;
815		else
816			nvec = hwsize;
817	}
818
819	if (nvec < minvec)
820		return -ENOSPC;
821
822	rc = pci_setup_msi_context(dev);
823	if (rc)
824		return rc;
825
826	if (!pci_setup_msix_device_domain(dev, hwsize))
827		return -ENODEV;
828
829	for (;;) {
830		if (affd) {
831			nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
832			if (nvec < minvec)
833				return -ENOSPC;
834		}
835
836		rc = msix_capability_init(dev, entries, nvec, affd);
837		if (rc == 0)
838			return nvec;
839
840		if (rc < 0)
841			return rc;
842		if (rc < minvec)
843			return -ENOSPC;
844
845		nvec = rc;
846	}
847}
848
849void __pci_restore_msix_state(struct pci_dev *dev)
850{
851	struct msi_desc *entry;
852	bool write_msg;
853
854	if (!dev->msix_enabled)
855		return;
856
857	/* route the table */
858	pci_intx_for_msi(dev, 0);
859	pci_msix_clear_and_set_ctrl(dev, 0,
860				PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
861
862	write_msg = arch_restore_msi_irqs(dev);
863
864	msi_lock_descs(&dev->dev);
865	msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
866		if (write_msg)
867			__pci_write_msi_msg(entry, &entry->msg);
868		pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl);
869	}
870	msi_unlock_descs(&dev->dev);
871
872	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
873}
874
875void pci_msix_shutdown(struct pci_dev *dev)
876{
877	struct msi_desc *desc;
878
879	if (!pci_msi_enable || !dev || !dev->msix_enabled)
880		return;
881
882	if (pci_dev_is_disconnected(dev)) {
883		dev->msix_enabled = 0;
884		return;
885	}
886
887	/* Return the device with MSI-X masked as initial states */
888	msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL)
889		pci_msix_mask(desc);
890
891	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
892	pci_intx_for_msi(dev, 1);
893	dev->msix_enabled = 0;
894	pcibios_alloc_irq(dev);
895}
896
897/* Common interfaces */
898
899void pci_free_msi_irqs(struct pci_dev *dev)
900{
901	pci_msi_teardown_msi_irqs(dev);
902
903	if (dev->msix_base) {
904		iounmap(dev->msix_base);
905		dev->msix_base = NULL;
906	}
907}
908
909/* Misc. infrastructure */
910
911struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
912{
913	return to_pci_dev(desc->dev);
914}
915EXPORT_SYMBOL(msi_desc_to_pci_dev);
916
917void pci_no_msi(void)
918{
919	pci_msi_enable = 0;
920}