Loading...
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2014 Intel Corp.
4 * Author: Jiang Liu <jiang.liu@linux.intel.com>
5 *
6 * This file is licensed under GPLv2.
7 *
8 * This file contains common code to support Message Signaled Interrupts for
9 * PCI compatible and non PCI compatible devices.
10 */
11#include <linux/types.h>
12#include <linux/device.h>
13#include <linux/irq.h>
14#include <linux/irqdomain.h>
15#include <linux/msi.h>
16#include <linux/slab.h>
17#include <linux/sysfs.h>
18#include <linux/pci.h>
19
20#include "internals.h"
21
22/**
23 * struct msi_ctrl - MSI internal management control structure
24 * @domid: ID of the domain on which management operations should be done
25 * @first: First (hardware) slot index to operate on
26 * @last: Last (hardware) slot index to operate on
27 * @nirqs: The number of Linux interrupts to allocate. Can be larger
28 * than the range due to PCI/multi-MSI.
29 */
30struct msi_ctrl {
31 unsigned int domid;
32 unsigned int first;
33 unsigned int last;
34 unsigned int nirqs;
35};
36
37/* Invalid Xarray index which is outside of any searchable range */
38#define MSI_XA_MAX_INDEX (ULONG_MAX - 1)
39/* The maximum domain size */
40#define MSI_XA_DOMAIN_SIZE (MSI_MAX_INDEX + 1)
41
42static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl);
43static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid);
44static inline int msi_sysfs_create_group(struct device *dev);
45
46
47/**
48 * msi_alloc_desc - Allocate an initialized msi_desc
49 * @dev: Pointer to the device for which this is allocated
50 * @nvec: The number of vectors used in this entry
51 * @affinity: Optional pointer to an affinity mask array size of @nvec
52 *
53 * If @affinity is not %NULL then an affinity array[@nvec] is allocated
54 * and the affinity masks and flags from @affinity are copied.
55 *
56 * Return: pointer to allocated &msi_desc on success or %NULL on failure
57 */
58static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec,
59 const struct irq_affinity_desc *affinity)
60{
61 struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
62
63 if (!desc)
64 return NULL;
65
66 desc->dev = dev;
67 desc->nvec_used = nvec;
68 if (affinity) {
69 desc->affinity = kmemdup(affinity, nvec * sizeof(*desc->affinity), GFP_KERNEL);
70 if (!desc->affinity) {
71 kfree(desc);
72 return NULL;
73 }
74 }
75 return desc;
76}
77
78static void msi_free_desc(struct msi_desc *desc)
79{
80 kfree(desc->affinity);
81 kfree(desc);
82}
83
84static int msi_insert_desc(struct device *dev, struct msi_desc *desc,
85 unsigned int domid, unsigned int index)
86{
87 struct msi_device_data *md = dev->msi.data;
88 struct xarray *xa = &md->__domains[domid].store;
89 unsigned int hwsize;
90 int ret;
91
92 hwsize = msi_domain_get_hwsize(dev, domid);
93
94 if (index == MSI_ANY_INDEX) {
95 struct xa_limit limit = { .min = 0, .max = hwsize - 1 };
96 unsigned int index;
97
98 /* Let the xarray allocate a free index within the limit */
99 ret = xa_alloc(xa, &index, desc, limit, GFP_KERNEL);
100 if (ret)
101 goto fail;
102
103 desc->msi_index = index;
104 return 0;
105 } else {
106 if (index >= hwsize) {
107 ret = -ERANGE;
108 goto fail;
109 }
110
111 desc->msi_index = index;
112 ret = xa_insert(xa, index, desc, GFP_KERNEL);
113 if (ret)
114 goto fail;
115 return 0;
116 }
117fail:
118 msi_free_desc(desc);
119 return ret;
120}
121
122/**
123 * msi_domain_insert_msi_desc - Allocate and initialize a MSI descriptor and
124 * insert it at @init_desc->msi_index
125 *
126 * @dev: Pointer to the device for which the descriptor is allocated
127 * @domid: The id of the interrupt domain to which the desriptor is added
128 * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor
129 *
130 * Return: 0 on success or an appropriate failure code.
131 */
132int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid,
133 struct msi_desc *init_desc)
134{
135 struct msi_desc *desc;
136
137 lockdep_assert_held(&dev->msi.data->mutex);
138
139 desc = msi_alloc_desc(dev, init_desc->nvec_used, init_desc->affinity);
140 if (!desc)
141 return -ENOMEM;
142
143 /* Copy type specific data to the new descriptor. */
144 desc->pci = init_desc->pci;
145
146 return msi_insert_desc(dev, desc, domid, init_desc->msi_index);
147}
148
149static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
150{
151 switch (filter) {
152 case MSI_DESC_ALL:
153 return true;
154 case MSI_DESC_NOTASSOCIATED:
155 return !desc->irq;
156 case MSI_DESC_ASSOCIATED:
157 return !!desc->irq;
158 }
159 WARN_ON_ONCE(1);
160 return false;
161}
162
163static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl)
164{
165 unsigned int hwsize;
166
167 if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS ||
168 (dev->msi.domain &&
169 !dev->msi.data->__domains[ctrl->domid].domain)))
170 return false;
171
172 hwsize = msi_domain_get_hwsize(dev, ctrl->domid);
173 if (WARN_ON_ONCE(ctrl->first > ctrl->last ||
174 ctrl->first >= hwsize ||
175 ctrl->last >= hwsize))
176 return false;
177 return true;
178}
179
180static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl)
181{
182 struct msi_desc *desc;
183 struct xarray *xa;
184 unsigned long idx;
185
186 lockdep_assert_held(&dev->msi.data->mutex);
187
188 if (!msi_ctrl_valid(dev, ctrl))
189 return;
190
191 xa = &dev->msi.data->__domains[ctrl->domid].store;
192 xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
193 xa_erase(xa, idx);
194
195 /* Leak the descriptor when it is still referenced */
196 if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED)))
197 continue;
198 msi_free_desc(desc);
199 }
200}
201
202/**
203 * msi_domain_free_msi_descs_range - Free a range of MSI descriptors of a device in an irqdomain
204 * @dev: Device for which to free the descriptors
205 * @domid: Id of the domain to operate on
206 * @first: Index to start freeing from (inclusive)
207 * @last: Last index to be freed (inclusive)
208 */
209void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid,
210 unsigned int first, unsigned int last)
211{
212 struct msi_ctrl ctrl = {
213 .domid = domid,
214 .first = first,
215 .last = last,
216 };
217
218 msi_domain_free_descs(dev, &ctrl);
219}
220
221/**
222 * msi_domain_add_simple_msi_descs - Allocate and initialize MSI descriptors
223 * @dev: Pointer to the device for which the descriptors are allocated
224 * @ctrl: Allocation control struct
225 *
226 * Return: 0 on success or an appropriate failure code.
227 */
228static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl *ctrl)
229{
230 struct msi_desc *desc;
231 unsigned int idx;
232 int ret;
233
234 lockdep_assert_held(&dev->msi.data->mutex);
235
236 if (!msi_ctrl_valid(dev, ctrl))
237 return -EINVAL;
238
239 for (idx = ctrl->first; idx <= ctrl->last; idx++) {
240 desc = msi_alloc_desc(dev, 1, NULL);
241 if (!desc)
242 goto fail_mem;
243 ret = msi_insert_desc(dev, desc, ctrl->domid, idx);
244 if (ret)
245 goto fail;
246 }
247 return 0;
248
249fail_mem:
250 ret = -ENOMEM;
251fail:
252 msi_domain_free_descs(dev, ctrl);
253 return ret;
254}
255
256void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
257{
258 *msg = entry->msg;
259}
260
261void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
262{
263 struct msi_desc *entry = irq_get_msi_desc(irq);
264
265 __get_cached_msi_msg(entry, msg);
266}
267EXPORT_SYMBOL_GPL(get_cached_msi_msg);
268
269static void msi_device_data_release(struct device *dev, void *res)
270{
271 struct msi_device_data *md = res;
272 int i;
273
274 for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) {
275 msi_remove_device_irq_domain(dev, i);
276 WARN_ON_ONCE(!xa_empty(&md->__domains[i].store));
277 xa_destroy(&md->__domains[i].store);
278 }
279 dev->msi.data = NULL;
280}
281
282/**
283 * msi_setup_device_data - Setup MSI device data
284 * @dev: Device for which MSI device data should be set up
285 *
286 * Return: 0 on success, appropriate error code otherwise
287 *
288 * This can be called more than once for @dev. If the MSI device data is
289 * already allocated the call succeeds. The allocated memory is
290 * automatically released when the device is destroyed.
291 */
292int msi_setup_device_data(struct device *dev)
293{
294 struct msi_device_data *md;
295 int ret, i;
296
297 if (dev->msi.data)
298 return 0;
299
300 md = devres_alloc(msi_device_data_release, sizeof(*md), GFP_KERNEL);
301 if (!md)
302 return -ENOMEM;
303
304 ret = msi_sysfs_create_group(dev);
305 if (ret) {
306 devres_free(md);
307 return ret;
308 }
309
310 for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++)
311 xa_init_flags(&md->__domains[i].store, XA_FLAGS_ALLOC);
312
313 /*
314 * If @dev::msi::domain is set and is a global MSI domain, copy the
315 * pointer into the domain array so all code can operate on domain
316 * ids. The NULL pointer check is required to keep the legacy
317 * architecture specific PCI/MSI support working.
318 */
319 if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain))
320 md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain;
321
322 mutex_init(&md->mutex);
323 dev->msi.data = md;
324 devres_add(dev, md);
325 return 0;
326}
327
328/**
329 * msi_lock_descs - Lock the MSI descriptor storage of a device
330 * @dev: Device to operate on
331 */
332void msi_lock_descs(struct device *dev)
333{
334 mutex_lock(&dev->msi.data->mutex);
335}
336EXPORT_SYMBOL_GPL(msi_lock_descs);
337
338/**
339 * msi_unlock_descs - Unlock the MSI descriptor storage of a device
340 * @dev: Device to operate on
341 */
342void msi_unlock_descs(struct device *dev)
343{
344 /* Invalidate the index which was cached by the iterator */
345 dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX;
346 mutex_unlock(&dev->msi.data->mutex);
347}
348EXPORT_SYMBOL_GPL(msi_unlock_descs);
349
350static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid,
351 enum msi_desc_filter filter)
352{
353 struct xarray *xa = &md->__domains[domid].store;
354 struct msi_desc *desc;
355
356 xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) {
357 if (msi_desc_match(desc, filter))
358 return desc;
359 }
360 md->__iter_idx = MSI_XA_MAX_INDEX;
361 return NULL;
362}
363
364/**
365 * msi_domain_first_desc - Get the first MSI descriptor of an irqdomain associated to a device
366 * @dev: Device to operate on
367 * @domid: The id of the interrupt domain which should be walked.
368 * @filter: Descriptor state filter
369 *
370 * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs()
371 * must be invoked before the call.
372 *
373 * Return: Pointer to the first MSI descriptor matching the search
374 * criteria, NULL if none found.
375 */
376struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
377 enum msi_desc_filter filter)
378{
379 struct msi_device_data *md = dev->msi.data;
380
381 if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
382 return NULL;
383
384 lockdep_assert_held(&md->mutex);
385
386 md->__iter_idx = 0;
387 return msi_find_desc(md, domid, filter);
388}
389EXPORT_SYMBOL_GPL(msi_domain_first_desc);
390
391/**
392 * msi_next_desc - Get the next MSI descriptor of a device
393 * @dev: Device to operate on
394 * @domid: The id of the interrupt domain which should be walked.
395 * @filter: Descriptor state filter
396 *
397 * The first invocation of msi_next_desc() has to be preceeded by a
398 * successful invocation of __msi_first_desc(). Consecutive invocations are
399 * only valid if the previous one was successful. All these operations have
400 * to be done within the same MSI mutex held region.
401 *
402 * Return: Pointer to the next MSI descriptor matching the search
403 * criteria, NULL if none found.
404 */
405struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid,
406 enum msi_desc_filter filter)
407{
408 struct msi_device_data *md = dev->msi.data;
409
410 if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
411 return NULL;
412
413 lockdep_assert_held(&md->mutex);
414
415 if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX)
416 return NULL;
417
418 md->__iter_idx++;
419 return msi_find_desc(md, domid, filter);
420}
421EXPORT_SYMBOL_GPL(msi_next_desc);
422
423/**
424 * msi_domain_get_virq - Lookup the Linux interrupt number for a MSI index on a interrupt domain
425 * @dev: Device to operate on
426 * @domid: Domain ID of the interrupt domain associated to the device
427 * @index: MSI interrupt index to look for (0-based)
428 *
429 * Return: The Linux interrupt number on success (> 0), 0 if not found
430 */
431unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index)
432{
433 struct msi_desc *desc;
434 unsigned int ret = 0;
435 bool pcimsi = false;
436 struct xarray *xa;
437
438 if (!dev->msi.data)
439 return 0;
440
441 if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
442 return 0;
443
444 /* This check is only valid for the PCI default MSI domain */
445 if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN)
446 pcimsi = to_pci_dev(dev)->msi_enabled;
447
448 msi_lock_descs(dev);
449 xa = &dev->msi.data->__domains[domid].store;
450 desc = xa_load(xa, pcimsi ? 0 : index);
451 if (desc && desc->irq) {
452 /*
453 * PCI-MSI has only one descriptor for multiple interrupts.
454 * PCI-MSIX and platform MSI use a descriptor per
455 * interrupt.
456 */
457 if (pcimsi) {
458 if (index < desc->nvec_used)
459 ret = desc->irq + index;
460 } else {
461 ret = desc->irq;
462 }
463 }
464
465 msi_unlock_descs(dev);
466 return ret;
467}
468EXPORT_SYMBOL_GPL(msi_domain_get_virq);
469
470#ifdef CONFIG_SYSFS
471static struct attribute *msi_dev_attrs[] = {
472 NULL
473};
474
475static const struct attribute_group msi_irqs_group = {
476 .name = "msi_irqs",
477 .attrs = msi_dev_attrs,
478};
479
480static inline int msi_sysfs_create_group(struct device *dev)
481{
482 return devm_device_add_group(dev, &msi_irqs_group);
483}
484
485static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
486 char *buf)
487{
488 /* MSI vs. MSIX is per device not per interrupt */
489 bool is_msix = dev_is_pci(dev) ? to_pci_dev(dev)->msix_enabled : false;
490
491 return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi");
492}
493
494static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc)
495{
496 struct device_attribute *attrs = desc->sysfs_attrs;
497 int i;
498
499 if (!attrs)
500 return;
501
502 desc->sysfs_attrs = NULL;
503 for (i = 0; i < desc->nvec_used; i++) {
504 if (attrs[i].show)
505 sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
506 kfree(attrs[i].attr.name);
507 }
508 kfree(attrs);
509}
510
511static int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc)
512{
513 struct device_attribute *attrs;
514 int ret, i;
515
516 attrs = kcalloc(desc->nvec_used, sizeof(*attrs), GFP_KERNEL);
517 if (!attrs)
518 return -ENOMEM;
519
520 desc->sysfs_attrs = attrs;
521 for (i = 0; i < desc->nvec_used; i++) {
522 sysfs_attr_init(&attrs[i].attr);
523 attrs[i].attr.name = kasprintf(GFP_KERNEL, "%d", desc->irq + i);
524 if (!attrs[i].attr.name) {
525 ret = -ENOMEM;
526 goto fail;
527 }
528
529 attrs[i].attr.mode = 0444;
530 attrs[i].show = msi_mode_show;
531
532 ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
533 if (ret) {
534 attrs[i].show = NULL;
535 goto fail;
536 }
537 }
538 return 0;
539
540fail:
541 msi_sysfs_remove_desc(dev, desc);
542 return ret;
543}
544
545#ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS
546/**
547 * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
548 * @dev: The device (PCI, platform etc) which will get sysfs entries
549 */
550int msi_device_populate_sysfs(struct device *dev)
551{
552 struct msi_desc *desc;
553 int ret;
554
555 msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) {
556 if (desc->sysfs_attrs)
557 continue;
558 ret = msi_sysfs_populate_desc(dev, desc);
559 if (ret)
560 return ret;
561 }
562 return 0;
563}
564
565/**
566 * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device
567 * @dev: The device (PCI, platform etc) for which to remove
568 * sysfs entries
569 */
570void msi_device_destroy_sysfs(struct device *dev)
571{
572 struct msi_desc *desc;
573
574 msi_for_each_desc(desc, dev, MSI_DESC_ALL)
575 msi_sysfs_remove_desc(dev, desc);
576}
577#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK */
578#else /* CONFIG_SYSFS */
579static inline int msi_sysfs_create_group(struct device *dev) { return 0; }
580static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; }
581static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { }
582#endif /* !CONFIG_SYSFS */
583
584static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int domid)
585{
586 struct irq_domain *domain;
587
588 lockdep_assert_held(&dev->msi.data->mutex);
589
590 if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS))
591 return NULL;
592
593 domain = dev->msi.data->__domains[domid].domain;
594 if (!domain)
595 return NULL;
596
597 if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain)))
598 return NULL;
599
600 return domain;
601}
602
603static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid)
604{
605 struct msi_domain_info *info;
606 struct irq_domain *domain;
607
608 domain = msi_get_device_domain(dev, domid);
609 if (domain) {
610 info = domain->host_data;
611 return info->hwsize;
612 }
613 /* No domain, default to MSI_XA_DOMAIN_SIZE */
614 return MSI_XA_DOMAIN_SIZE;
615}
616
617static inline void irq_chip_write_msi_msg(struct irq_data *data,
618 struct msi_msg *msg)
619{
620 data->chip->irq_write_msi_msg(data, msg);
621}
622
623static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg)
624{
625 struct msi_domain_info *info = domain->host_data;
626
627 /*
628 * If the MSI provider has messed with the second message and
629 * not advertized that it is level-capable, signal the breakage.
630 */
631 WARN_ON(!((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
632 (info->chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)) &&
633 (msg[1].address_lo || msg[1].address_hi || msg[1].data));
634}
635
636/**
637 * msi_domain_set_affinity - Generic affinity setter function for MSI domains
638 * @irq_data: The irq data associated to the interrupt
639 * @mask: The affinity mask to set
640 * @force: Flag to enforce setting (disable online checks)
641 *
642 * Intended to be used by MSI interrupt controllers which are
643 * implemented with hierarchical domains.
644 *
645 * Return: IRQ_SET_MASK_* result code
646 */
647int msi_domain_set_affinity(struct irq_data *irq_data,
648 const struct cpumask *mask, bool force)
649{
650 struct irq_data *parent = irq_data->parent_data;
651 struct msi_msg msg[2] = { [1] = { }, };
652 int ret;
653
654 ret = parent->chip->irq_set_affinity(parent, mask, force);
655 if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
656 BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
657 msi_check_level(irq_data->domain, msg);
658 irq_chip_write_msi_msg(irq_data, msg);
659 }
660
661 return ret;
662}
663
664static int msi_domain_activate(struct irq_domain *domain,
665 struct irq_data *irq_data, bool early)
666{
667 struct msi_msg msg[2] = { [1] = { }, };
668
669 BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
670 msi_check_level(irq_data->domain, msg);
671 irq_chip_write_msi_msg(irq_data, msg);
672 return 0;
673}
674
675static void msi_domain_deactivate(struct irq_domain *domain,
676 struct irq_data *irq_data)
677{
678 struct msi_msg msg[2];
679
680 memset(msg, 0, sizeof(msg));
681 irq_chip_write_msi_msg(irq_data, msg);
682}
683
684static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
685 unsigned int nr_irqs, void *arg)
686{
687 struct msi_domain_info *info = domain->host_data;
688 struct msi_domain_ops *ops = info->ops;
689 irq_hw_number_t hwirq = ops->get_hwirq(info, arg);
690 int i, ret;
691
692 if (irq_find_mapping(domain, hwirq) > 0)
693 return -EEXIST;
694
695 if (domain->parent) {
696 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
697 if (ret < 0)
698 return ret;
699 }
700
701 for (i = 0; i < nr_irqs; i++) {
702 ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
703 if (ret < 0) {
704 if (ops->msi_free) {
705 for (i--; i > 0; i--)
706 ops->msi_free(domain, info, virq + i);
707 }
708 irq_domain_free_irqs_top(domain, virq, nr_irqs);
709 return ret;
710 }
711 }
712
713 return 0;
714}
715
716static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
717 unsigned int nr_irqs)
718{
719 struct msi_domain_info *info = domain->host_data;
720 int i;
721
722 if (info->ops->msi_free) {
723 for (i = 0; i < nr_irqs; i++)
724 info->ops->msi_free(domain, info, virq + i);
725 }
726 irq_domain_free_irqs_top(domain, virq, nr_irqs);
727}
728
729static const struct irq_domain_ops msi_domain_ops = {
730 .alloc = msi_domain_alloc,
731 .free = msi_domain_free,
732 .activate = msi_domain_activate,
733 .deactivate = msi_domain_deactivate,
734};
735
736static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
737 msi_alloc_info_t *arg)
738{
739 return arg->hwirq;
740}
741
742static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev,
743 int nvec, msi_alloc_info_t *arg)
744{
745 memset(arg, 0, sizeof(*arg));
746 return 0;
747}
748
749static void msi_domain_ops_set_desc(msi_alloc_info_t *arg,
750 struct msi_desc *desc)
751{
752 arg->desc = desc;
753}
754
755static int msi_domain_ops_init(struct irq_domain *domain,
756 struct msi_domain_info *info,
757 unsigned int virq, irq_hw_number_t hwirq,
758 msi_alloc_info_t *arg)
759{
760 irq_domain_set_hwirq_and_chip(domain, virq, hwirq, info->chip,
761 info->chip_data);
762 if (info->handler && info->handler_name) {
763 __irq_set_handler(virq, info->handler, 0, info->handler_name);
764 if (info->handler_data)
765 irq_set_handler_data(virq, info->handler_data);
766 }
767 return 0;
768}
769
770static struct msi_domain_ops msi_domain_ops_default = {
771 .get_hwirq = msi_domain_ops_get_hwirq,
772 .msi_init = msi_domain_ops_init,
773 .msi_prepare = msi_domain_ops_prepare,
774 .set_desc = msi_domain_ops_set_desc,
775};
776
777static void msi_domain_update_dom_ops(struct msi_domain_info *info)
778{
779 struct msi_domain_ops *ops = info->ops;
780
781 if (ops == NULL) {
782 info->ops = &msi_domain_ops_default;
783 return;
784 }
785
786 if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS))
787 return;
788
789 if (ops->get_hwirq == NULL)
790 ops->get_hwirq = msi_domain_ops_default.get_hwirq;
791 if (ops->msi_init == NULL)
792 ops->msi_init = msi_domain_ops_default.msi_init;
793 if (ops->msi_prepare == NULL)
794 ops->msi_prepare = msi_domain_ops_default.msi_prepare;
795 if (ops->set_desc == NULL)
796 ops->set_desc = msi_domain_ops_default.set_desc;
797}
798
799static void msi_domain_update_chip_ops(struct msi_domain_info *info)
800{
801 struct irq_chip *chip = info->chip;
802
803 BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask);
804 if (!chip->irq_set_affinity)
805 chip->irq_set_affinity = msi_domain_set_affinity;
806}
807
808static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode,
809 struct msi_domain_info *info,
810 unsigned int flags,
811 struct irq_domain *parent)
812{
813 struct irq_domain *domain;
814
815 if (info->hwsize > MSI_XA_DOMAIN_SIZE)
816 return NULL;
817
818 /*
819 * Hardware size 0 is valid for backwards compatibility and for
820 * domains which are not backed by a hardware table. Grant the
821 * maximum index space.
822 */
823 if (!info->hwsize)
824 info->hwsize = MSI_XA_DOMAIN_SIZE;
825
826 msi_domain_update_dom_ops(info);
827 if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
828 msi_domain_update_chip_ops(info);
829
830 domain = irq_domain_create_hierarchy(parent, flags | IRQ_DOMAIN_FLAG_MSI, 0,
831 fwnode, &msi_domain_ops, info);
832
833 if (domain) {
834 if (!domain->name && info->chip)
835 domain->name = info->chip->name;
836 irq_domain_update_bus_token(domain, info->bus_token);
837 }
838
839 return domain;
840}
841
842/**
843 * msi_create_irq_domain - Create an MSI interrupt domain
844 * @fwnode: Optional fwnode of the interrupt controller
845 * @info: MSI domain info
846 * @parent: Parent irq domain
847 *
848 * Return: pointer to the created &struct irq_domain or %NULL on failure
849 */
850struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
851 struct msi_domain_info *info,
852 struct irq_domain *parent)
853{
854 return __msi_create_irq_domain(fwnode, info, 0, parent);
855}
856
857/**
858 * msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down
859 * in the domain hierarchy
860 * @dev: The device for which the domain should be created
861 * @domain: The domain in the hierarchy this op is being called on
862 * @msi_parent_domain: The IRQ_DOMAIN_FLAG_MSI_PARENT domain for the child to
863 * be created
864 * @msi_child_info: The MSI domain info of the IRQ_DOMAIN_FLAG_MSI_DEVICE
865 * domain to be created
866 *
867 * Return: true on success, false otherwise
868 *
869 * This is the most complex problem of per device MSI domains and the
870 * underlying interrupt domain hierarchy:
871 *
872 * The device domain to be initialized requests the broadest feature set
873 * possible and the underlying domain hierarchy puts restrictions on it.
874 *
875 * That's trivial for a simple parent->child relationship, but it gets
876 * interesting with an intermediate domain: root->parent->child. The
877 * intermediate 'parent' can expand the capabilities which the 'root'
878 * domain is providing. So that creates a classic hen and egg problem:
879 * Which entity is doing the restrictions/expansions?
880 *
881 * One solution is to let the root domain handle the initialization that's
882 * why there is the @domain and the @msi_parent_domain pointer.
883 */
884bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
885 struct irq_domain *msi_parent_domain,
886 struct msi_domain_info *msi_child_info)
887{
888 struct irq_domain *parent = domain->parent;
889
890 if (WARN_ON_ONCE(!parent || !parent->msi_parent_ops ||
891 !parent->msi_parent_ops->init_dev_msi_info))
892 return false;
893
894 return parent->msi_parent_ops->init_dev_msi_info(dev, parent, msi_parent_domain,
895 msi_child_info);
896}
897
898/**
899 * msi_create_device_irq_domain - Create a device MSI interrupt domain
900 * @dev: Pointer to the device
901 * @domid: Domain id
902 * @template: MSI domain info bundle used as template
903 * @hwsize: Maximum number of MSI table entries (0 if unknown or unlimited)
904 * @domain_data: Optional pointer to domain specific data which is set in
905 * msi_domain_info::data
906 * @chip_data: Optional pointer to chip specific data which is set in
907 * msi_domain_info::chip_data
908 *
909 * Return: True on success, false otherwise
910 *
911 * There is no firmware node required for this interface because the per
912 * device domains are software constructs which are actually closer to the
913 * hardware reality than any firmware can describe them.
914 *
915 * The domain name and the irq chip name for a MSI device domain are
916 * composed by: "$(PREFIX)$(CHIPNAME)-$(DEVNAME)"
917 *
918 * $PREFIX: Optional prefix provided by the underlying MSI parent domain
919 * via msi_parent_ops::prefix. If that pointer is NULL the prefix
920 * is empty.
921 * $CHIPNAME: The name of the irq_chip in @template
922 * $DEVNAME: The name of the device
923 *
924 * This results in understandable chip names and hardware interrupt numbers
925 * in e.g. /proc/interrupts
926 *
927 * PCI-MSI-0000:00:1c.0 0-edge Parent domain has no prefix
928 * IR-PCI-MSI-0000:00:1c.4 0-edge Same with interrupt remapping prefix 'IR-'
929 *
930 * IR-PCI-MSIX-0000:3d:00.0 0-edge Hardware interrupt numbers reflect
931 * IR-PCI-MSIX-0000:3d:00.0 1-edge the real MSI-X index on that device
932 * IR-PCI-MSIX-0000:3d:00.0 2-edge
933 *
934 * On IMS domains the hardware interrupt number is either a table entry
935 * index or a purely software managed index but it is guaranteed to be
936 * unique.
937 *
938 * The domain pointer is stored in @dev::msi::data::__irqdomains[]. All
939 * subsequent operations on the domain depend on the domain id.
940 *
941 * The domain is automatically freed when the device is removed via devres
942 * in the context of @dev::msi::data freeing, but it can also be
943 * independently removed via @msi_remove_device_irq_domain().
944 */
945bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
946 const struct msi_domain_template *template,
947 unsigned int hwsize, void *domain_data,
948 void *chip_data)
949{
950 struct irq_domain *domain, *parent = dev->msi.domain;
951 const struct msi_parent_ops *pops;
952 struct msi_domain_template *bundle;
953 struct fwnode_handle *fwnode;
954
955 if (!irq_domain_is_msi_parent(parent))
956 return false;
957
958 if (domid >= MSI_MAX_DEVICE_IRQDOMAINS)
959 return false;
960
961 bundle = kmemdup(template, sizeof(*bundle), GFP_KERNEL);
962 if (!bundle)
963 return false;
964
965 bundle->info.hwsize = hwsize;
966 bundle->info.chip = &bundle->chip;
967 bundle->info.ops = &bundle->ops;
968 bundle->info.data = domain_data;
969 bundle->info.chip_data = chip_data;
970
971 pops = parent->msi_parent_ops;
972 snprintf(bundle->name, sizeof(bundle->name), "%s%s-%s",
973 pops->prefix ? : "", bundle->chip.name, dev_name(dev));
974 bundle->chip.name = bundle->name;
975
976 fwnode = irq_domain_alloc_named_fwnode(bundle->name);
977 if (!fwnode)
978 goto free_bundle;
979
980 if (msi_setup_device_data(dev))
981 goto free_fwnode;
982
983 msi_lock_descs(dev);
984
985 if (WARN_ON_ONCE(msi_get_device_domain(dev, domid)))
986 goto fail;
987
988 if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info))
989 goto fail;
990
991 domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent);
992 if (!domain)
993 goto fail;
994
995 domain->dev = dev;
996 dev->msi.data->__domains[domid].domain = domain;
997 msi_unlock_descs(dev);
998 return true;
999
1000fail:
1001 msi_unlock_descs(dev);
1002free_fwnode:
1003 irq_domain_free_fwnode(fwnode);
1004free_bundle:
1005 kfree(bundle);
1006 return false;
1007}
1008
1009/**
1010 * msi_remove_device_irq_domain - Free a device MSI interrupt domain
1011 * @dev: Pointer to the device
1012 * @domid: Domain id
1013 */
1014void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
1015{
1016 struct fwnode_handle *fwnode = NULL;
1017 struct msi_domain_info *info;
1018 struct irq_domain *domain;
1019
1020 msi_lock_descs(dev);
1021
1022 domain = msi_get_device_domain(dev, domid);
1023
1024 if (!domain || !irq_domain_is_msi_device(domain))
1025 goto unlock;
1026
1027 dev->msi.data->__domains[domid].domain = NULL;
1028 info = domain->host_data;
1029 if (irq_domain_is_msi_device(domain))
1030 fwnode = domain->fwnode;
1031 irq_domain_remove(domain);
1032 irq_domain_free_fwnode(fwnode);
1033 kfree(container_of(info, struct msi_domain_template, info));
1034
1035unlock:
1036 msi_unlock_descs(dev);
1037}
1038
1039/**
1040 * msi_match_device_irq_domain - Match a device irq domain against a bus token
1041 * @dev: Pointer to the device
1042 * @domid: Domain id
1043 * @bus_token: Bus token to match against the domain bus token
1044 *
1045 * Return: True if device domain exists and bus tokens match.
1046 */
1047bool msi_match_device_irq_domain(struct device *dev, unsigned int domid,
1048 enum irq_domain_bus_token bus_token)
1049{
1050 struct msi_domain_info *info;
1051 struct irq_domain *domain;
1052 bool ret = false;
1053
1054 msi_lock_descs(dev);
1055 domain = msi_get_device_domain(dev, domid);
1056 if (domain && irq_domain_is_msi_device(domain)) {
1057 info = domain->host_data;
1058 ret = info->bus_token == bus_token;
1059 }
1060 msi_unlock_descs(dev);
1061 return ret;
1062}
1063
1064int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
1065 int nvec, msi_alloc_info_t *arg)
1066{
1067 struct msi_domain_info *info = domain->host_data;
1068 struct msi_domain_ops *ops = info->ops;
1069
1070 return ops->msi_prepare(domain, dev, nvec, arg);
1071}
1072
1073int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
1074 int virq_base, int nvec, msi_alloc_info_t *arg)
1075{
1076 struct msi_domain_info *info = domain->host_data;
1077 struct msi_domain_ops *ops = info->ops;
1078 struct msi_ctrl ctrl = {
1079 .domid = MSI_DEFAULT_DOMAIN,
1080 .first = virq_base,
1081 .last = virq_base + nvec - 1,
1082 };
1083 struct msi_desc *desc;
1084 struct xarray *xa;
1085 int ret, virq;
1086
1087 if (!msi_ctrl_valid(dev, &ctrl))
1088 return -EINVAL;
1089
1090 msi_lock_descs(dev);
1091 ret = msi_domain_add_simple_msi_descs(dev, &ctrl);
1092 if (ret)
1093 goto unlock;
1094
1095 xa = &dev->msi.data->__domains[ctrl.domid].store;
1096
1097 for (virq = virq_base; virq < virq_base + nvec; virq++) {
1098 desc = xa_load(xa, virq);
1099 desc->irq = virq;
1100
1101 ops->set_desc(arg, desc);
1102 ret = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg);
1103 if (ret)
1104 goto fail;
1105
1106 irq_set_msi_desc(virq, desc);
1107 }
1108 msi_unlock_descs(dev);
1109 return 0;
1110
1111fail:
1112 for (--virq; virq >= virq_base; virq--)
1113 irq_domain_free_irqs_common(domain, virq, 1);
1114 msi_domain_free_descs(dev, &ctrl);
1115unlock:
1116 msi_unlock_descs(dev);
1117 return ret;
1118}
1119
1120/*
1121 * Carefully check whether the device can use reservation mode. If
1122 * reservation mode is enabled then the early activation will assign a
1123 * dummy vector to the device. If the PCI/MSI device does not support
1124 * masking of the entry then this can result in spurious interrupts when
1125 * the device driver is not absolutely careful. But even then a malfunction
1126 * of the hardware could result in a spurious interrupt on the dummy vector
1127 * and render the device unusable. If the entry can be masked then the core
1128 * logic will prevent the spurious interrupt and reservation mode can be
1129 * used. For now reservation mode is restricted to PCI/MSI.
1130 */
1131static bool msi_check_reservation_mode(struct irq_domain *domain,
1132 struct msi_domain_info *info,
1133 struct device *dev)
1134{
1135 struct msi_desc *desc;
1136
1137 switch(domain->bus_token) {
1138 case DOMAIN_BUS_PCI_MSI:
1139 case DOMAIN_BUS_PCI_DEVICE_MSI:
1140 case DOMAIN_BUS_PCI_DEVICE_MSIX:
1141 case DOMAIN_BUS_VMD_MSI:
1142 break;
1143 default:
1144 return false;
1145 }
1146
1147 if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
1148 return false;
1149
1150 if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
1151 return false;
1152
1153 /*
1154 * Checking the first MSI descriptor is sufficient. MSIX supports
1155 * masking and MSI does so when the can_mask attribute is set.
1156 */
1157 desc = msi_first_desc(dev, MSI_DESC_ALL);
1158 return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask;
1159}
1160
1161static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc,
1162 int allocated)
1163{
1164 switch(domain->bus_token) {
1165 case DOMAIN_BUS_PCI_MSI:
1166 case DOMAIN_BUS_PCI_DEVICE_MSI:
1167 case DOMAIN_BUS_PCI_DEVICE_MSIX:
1168 case DOMAIN_BUS_VMD_MSI:
1169 if (IS_ENABLED(CONFIG_PCI_MSI))
1170 break;
1171 fallthrough;
1172 default:
1173 return -ENOSPC;
1174 }
1175
1176 /* Let a failed PCI multi MSI allocation retry */
1177 if (desc->nvec_used > 1)
1178 return 1;
1179
1180 /* If there was a successful allocation let the caller know */
1181 return allocated ? allocated : -ENOSPC;
1182}
1183
1184#define VIRQ_CAN_RESERVE 0x01
1185#define VIRQ_ACTIVATE 0x02
1186#define VIRQ_NOMASK_QUIRK 0x04
1187
1188static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags)
1189{
1190 struct irq_data *irqd = irq_domain_get_irq_data(domain, virq);
1191 int ret;
1192
1193 if (!(vflags & VIRQ_CAN_RESERVE)) {
1194 irqd_clr_can_reserve(irqd);
1195 if (vflags & VIRQ_NOMASK_QUIRK)
1196 irqd_set_msi_nomask_quirk(irqd);
1197
1198 /*
1199 * If the interrupt is managed but no CPU is available to
1200 * service it, shut it down until better times. Note that
1201 * we only do this on the !RESERVE path as x86 (the only
1202 * architecture using this flag) deals with this in a
1203 * different way by using a catch-all vector.
1204 */
1205 if ((vflags & VIRQ_ACTIVATE) &&
1206 irqd_affinity_is_managed(irqd) &&
1207 !cpumask_intersects(irq_data_get_affinity_mask(irqd),
1208 cpu_online_mask)) {
1209 irqd_set_managed_shutdown(irqd);
1210 return 0;
1211 }
1212 }
1213
1214 if (!(vflags & VIRQ_ACTIVATE))
1215 return 0;
1216
1217 ret = irq_domain_activate_irq(irqd, vflags & VIRQ_CAN_RESERVE);
1218 if (ret)
1219 return ret;
1220 /*
1221 * If the interrupt uses reservation mode, clear the activated bit
1222 * so request_irq() will assign the final vector.
1223 */
1224 if (vflags & VIRQ_CAN_RESERVE)
1225 irqd_clr_activated(irqd);
1226 return 0;
1227}
1228
1229static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain,
1230 struct msi_ctrl *ctrl)
1231{
1232 struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1233 struct msi_domain_info *info = domain->host_data;
1234 struct msi_domain_ops *ops = info->ops;
1235 unsigned int vflags = 0, allocated = 0;
1236 msi_alloc_info_t arg = { };
1237 struct msi_desc *desc;
1238 unsigned long idx;
1239 int i, ret, virq;
1240
1241 ret = msi_domain_prepare_irqs(domain, dev, ctrl->nirqs, &arg);
1242 if (ret)
1243 return ret;
1244
1245 /*
1246 * This flag is set by the PCI layer as we need to activate
1247 * the MSI entries before the PCI layer enables MSI in the
1248 * card. Otherwise the card latches a random msi message.
1249 */
1250 if (info->flags & MSI_FLAG_ACTIVATE_EARLY)
1251 vflags |= VIRQ_ACTIVATE;
1252
1253 /*
1254 * Interrupt can use a reserved vector and will not occupy
1255 * a real device vector until the interrupt is requested.
1256 */
1257 if (msi_check_reservation_mode(domain, info, dev)) {
1258 vflags |= VIRQ_CAN_RESERVE;
1259 /*
1260 * MSI affinity setting requires a special quirk (X86) when
1261 * reservation mode is active.
1262 */
1263 if (info->flags & MSI_FLAG_NOMASK_QUIRK)
1264 vflags |= VIRQ_NOMASK_QUIRK;
1265 }
1266
1267 xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1268 if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED))
1269 continue;
1270
1271 /* This should return -ECONFUSED... */
1272 if (WARN_ON_ONCE(allocated >= ctrl->nirqs))
1273 return -EINVAL;
1274
1275 if (ops->prepare_desc)
1276 ops->prepare_desc(domain, &arg, desc);
1277
1278 ops->set_desc(&arg, desc);
1279
1280 virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
1281 dev_to_node(dev), &arg, false,
1282 desc->affinity);
1283 if (virq < 0)
1284 return msi_handle_pci_fail(domain, desc, allocated);
1285
1286 for (i = 0; i < desc->nvec_used; i++) {
1287 irq_set_msi_desc_off(virq, i, desc);
1288 irq_debugfs_copy_devname(virq + i, dev);
1289 ret = msi_init_virq(domain, virq + i, vflags);
1290 if (ret)
1291 return ret;
1292 }
1293 if (info->flags & MSI_FLAG_DEV_SYSFS) {
1294 ret = msi_sysfs_populate_desc(dev, desc);
1295 if (ret)
1296 return ret;
1297 }
1298 allocated++;
1299 }
1300 return 0;
1301}
1302
1303static int msi_domain_alloc_simple_msi_descs(struct device *dev,
1304 struct msi_domain_info *info,
1305 struct msi_ctrl *ctrl)
1306{
1307 if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS))
1308 return 0;
1309
1310 return msi_domain_add_simple_msi_descs(dev, ctrl);
1311}
1312
1313static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1314{
1315 struct msi_domain_info *info;
1316 struct msi_domain_ops *ops;
1317 struct irq_domain *domain;
1318 int ret;
1319
1320 if (!msi_ctrl_valid(dev, ctrl))
1321 return -EINVAL;
1322
1323 domain = msi_get_device_domain(dev, ctrl->domid);
1324 if (!domain)
1325 return -ENODEV;
1326
1327 info = domain->host_data;
1328
1329 ret = msi_domain_alloc_simple_msi_descs(dev, info, ctrl);
1330 if (ret)
1331 return ret;
1332
1333 ops = info->ops;
1334 if (ops->domain_alloc_irqs)
1335 return ops->domain_alloc_irqs(domain, dev, ctrl->nirqs);
1336
1337 return __msi_domain_alloc_irqs(dev, domain, ctrl);
1338}
1339
1340static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1341{
1342 int ret = __msi_domain_alloc_locked(dev, ctrl);
1343
1344 if (ret)
1345 msi_domain_free_locked(dev, ctrl);
1346 return ret;
1347}
1348
1349/**
1350 * msi_domain_alloc_irqs_range_locked - Allocate interrupts from a MSI interrupt domain
1351 * @dev: Pointer to device struct of the device for which the interrupts
1352 * are allocated
1353 * @domid: Id of the interrupt domain to operate on
1354 * @first: First index to allocate (inclusive)
1355 * @last: Last index to allocate (inclusive)
1356 *
1357 * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1358 * pair. Use this for MSI irqdomains which implement their own descriptor
1359 * allocation/free.
1360 *
1361 * Return: %0 on success or an error code.
1362 */
1363int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid,
1364 unsigned int first, unsigned int last)
1365{
1366 struct msi_ctrl ctrl = {
1367 .domid = domid,
1368 .first = first,
1369 .last = last,
1370 .nirqs = last + 1 - first,
1371 };
1372
1373 return msi_domain_alloc_locked(dev, &ctrl);
1374}
1375
1376/**
1377 * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain
1378 * @dev: Pointer to device struct of the device for which the interrupts
1379 * are allocated
1380 * @domid: Id of the interrupt domain to operate on
1381 * @first: First index to allocate (inclusive)
1382 * @last: Last index to allocate (inclusive)
1383 *
1384 * Return: %0 on success or an error code.
1385 */
1386int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
1387 unsigned int first, unsigned int last)
1388{
1389 int ret;
1390
1391 msi_lock_descs(dev);
1392 ret = msi_domain_alloc_irqs_range_locked(dev, domid, first, last);
1393 msi_unlock_descs(dev);
1394 return ret;
1395}
1396
1397/**
1398 * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain
1399 *
1400 * @dev: Pointer to device struct of the device for which the interrupts
1401 * are allocated
1402 * @domid: Id of the interrupt domain to operate on
1403 * @nirqs: The number of interrupts to allocate
1404 *
1405 * This function scans all MSI descriptors of the MSI domain and allocates interrupts
1406 * for all unassigned ones. That function is to be used for MSI domain usage where
1407 * the descriptor allocation is handled at the call site, e.g. PCI/MSI[X].
1408 *
1409 * Return: %0 on success or an error code.
1410 */
1411int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs)
1412{
1413 struct msi_ctrl ctrl = {
1414 .domid = domid,
1415 .first = 0,
1416 .last = msi_domain_get_hwsize(dev, domid) - 1,
1417 .nirqs = nirqs,
1418 };
1419
1420 return msi_domain_alloc_locked(dev, &ctrl);
1421}
1422
1423/**
1424 * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at
1425 * a given index - or at the next free index
1426 *
1427 * @dev: Pointer to device struct of the device for which the interrupts
1428 * are allocated
1429 * @domid: Id of the interrupt domain to operate on
1430 * @index: Index for allocation. If @index == %MSI_ANY_INDEX the allocation
1431 * uses the next free index.
1432 * @affdesc: Optional pointer to an interrupt affinity descriptor structure
1433 * @icookie: Optional pointer to a domain specific per instance cookie. If
1434 * non-NULL the content of the cookie is stored in msi_desc::data.
1435 * Must be NULL for MSI-X allocations
1436 *
1437 * This requires a MSI interrupt domain which lets the core code manage the
1438 * MSI descriptors.
1439 *
1440 * Return: struct msi_map
1441 *
1442 * On success msi_map::index contains the allocated index number and
1443 * msi_map::virq the corresponding Linux interrupt number
1444 *
1445 * On failure msi_map::index contains the error code and msi_map::virq
1446 * is %0.
1447 */
1448struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index,
1449 const struct irq_affinity_desc *affdesc,
1450 union msi_instance_cookie *icookie)
1451{
1452 struct msi_ctrl ctrl = { .domid = domid, .nirqs = 1, };
1453 struct irq_domain *domain;
1454 struct msi_map map = { };
1455 struct msi_desc *desc;
1456 int ret;
1457
1458 msi_lock_descs(dev);
1459 domain = msi_get_device_domain(dev, domid);
1460 if (!domain) {
1461 map.index = -ENODEV;
1462 goto unlock;
1463 }
1464
1465 desc = msi_alloc_desc(dev, 1, affdesc);
1466 if (!desc) {
1467 map.index = -ENOMEM;
1468 goto unlock;
1469 }
1470
1471 if (icookie)
1472 desc->data.icookie = *icookie;
1473
1474 ret = msi_insert_desc(dev, desc, domid, index);
1475 if (ret) {
1476 map.index = ret;
1477 goto unlock;
1478 }
1479
1480 ctrl.first = ctrl.last = desc->msi_index;
1481
1482 ret = __msi_domain_alloc_irqs(dev, domain, &ctrl);
1483 if (ret) {
1484 map.index = ret;
1485 msi_domain_free_locked(dev, &ctrl);
1486 } else {
1487 map.index = desc->msi_index;
1488 map.virq = desc->irq;
1489 }
1490unlock:
1491 msi_unlock_descs(dev);
1492 return map;
1493}
1494
1495static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain,
1496 struct msi_ctrl *ctrl)
1497{
1498 struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1499 struct msi_domain_info *info = domain->host_data;
1500 struct irq_data *irqd;
1501 struct msi_desc *desc;
1502 unsigned long idx;
1503 int i;
1504
1505 xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1506 /* Only handle MSI entries which have an interrupt associated */
1507 if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED))
1508 continue;
1509
1510 /* Make sure all interrupts are deactivated */
1511 for (i = 0; i < desc->nvec_used; i++) {
1512 irqd = irq_domain_get_irq_data(domain, desc->irq + i);
1513 if (irqd && irqd_is_activated(irqd))
1514 irq_domain_deactivate_irq(irqd);
1515 }
1516
1517 irq_domain_free_irqs(desc->irq, desc->nvec_used);
1518 if (info->flags & MSI_FLAG_DEV_SYSFS)
1519 msi_sysfs_remove_desc(dev, desc);
1520 desc->irq = 0;
1521 }
1522}
1523
1524static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl)
1525{
1526 struct msi_domain_info *info;
1527 struct msi_domain_ops *ops;
1528 struct irq_domain *domain;
1529
1530 if (!msi_ctrl_valid(dev, ctrl))
1531 return;
1532
1533 domain = msi_get_device_domain(dev, ctrl->domid);
1534 if (!domain)
1535 return;
1536
1537 info = domain->host_data;
1538 ops = info->ops;
1539
1540 if (ops->domain_free_irqs)
1541 ops->domain_free_irqs(domain, dev);
1542 else
1543 __msi_domain_free_irqs(dev, domain, ctrl);
1544
1545 if (ops->msi_post_free)
1546 ops->msi_post_free(domain, dev);
1547
1548 if (info->flags & MSI_FLAG_FREE_MSI_DESCS)
1549 msi_domain_free_descs(dev, ctrl);
1550}
1551
1552/**
1553 * msi_domain_free_irqs_range_locked - Free a range of interrupts from a MSI interrupt domain
1554 * associated to @dev with msi_lock held
1555 * @dev: Pointer to device struct of the device for which the interrupts
1556 * are freed
1557 * @domid: Id of the interrupt domain to operate on
1558 * @first: First index to free (inclusive)
1559 * @last: Last index to free (inclusive)
1560 */
1561void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid,
1562 unsigned int first, unsigned int last)
1563{
1564 struct msi_ctrl ctrl = {
1565 .domid = domid,
1566 .first = first,
1567 .last = last,
1568 };
1569 msi_domain_free_locked(dev, &ctrl);
1570}
1571
1572/**
1573 * msi_domain_free_irqs_range - Free a range of interrupts from a MSI interrupt domain
1574 * associated to @dev
1575 * @dev: Pointer to device struct of the device for which the interrupts
1576 * are freed
1577 * @domid: Id of the interrupt domain to operate on
1578 * @first: First index to free (inclusive)
1579 * @last: Last index to free (inclusive)
1580 */
1581void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
1582 unsigned int first, unsigned int last)
1583{
1584 msi_lock_descs(dev);
1585 msi_domain_free_irqs_range_locked(dev, domid, first, last);
1586 msi_unlock_descs(dev);
1587}
1588
1589/**
1590 * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain
1591 * associated to a device
1592 * @dev: Pointer to device struct of the device for which the interrupts
1593 * are freed
1594 * @domid: The id of the domain to operate on
1595 *
1596 * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1597 * pair. Use this for MSI irqdomains which implement their own vector
1598 * allocation.
1599 */
1600void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid)
1601{
1602 msi_domain_free_irqs_range_locked(dev, domid, 0,
1603 msi_domain_get_hwsize(dev, domid) - 1);
1604}
1605
1606/**
1607 * msi_domain_free_irqs_all - Free all interrupts from a MSI interrupt domain
1608 * associated to a device
1609 * @dev: Pointer to device struct of the device for which the interrupts
1610 * are freed
1611 * @domid: The id of the domain to operate on
1612 */
1613void msi_domain_free_irqs_all(struct device *dev, unsigned int domid)
1614{
1615 msi_lock_descs(dev);
1616 msi_domain_free_irqs_all_locked(dev, domid);
1617 msi_unlock_descs(dev);
1618}
1619
1620/**
1621 * msi_get_domain_info - Get the MSI interrupt domain info for @domain
1622 * @domain: The interrupt domain to retrieve data from
1623 *
1624 * Return: the pointer to the msi_domain_info stored in @domain->host_data.
1625 */
1626struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
1627{
1628 return (struct msi_domain_info *)domain->host_data;
1629}
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2014 Intel Corp.
4 * Author: Jiang Liu <jiang.liu@linux.intel.com>
5 *
6 * This file is licensed under GPLv2.
7 *
8 * This file contains common code to support Message Signaled Interrupts for
9 * PCI compatible and non PCI compatible devices.
10 */
11#include <linux/device.h>
12#include <linux/irq.h>
13#include <linux/irqdomain.h>
14#include <linux/msi.h>
15#include <linux/mutex.h>
16#include <linux/pci.h>
17#include <linux/slab.h>
18#include <linux/sysfs.h>
19#include <linux/types.h>
20#include <linux/xarray.h>
21
22#include "internals.h"
23
24/**
25 * struct msi_device_data - MSI per device data
26 * @properties: MSI properties which are interesting to drivers
27 * @mutex: Mutex protecting the MSI descriptor store
28 * @__domains: Internal data for per device MSI domains
29 * @__iter_idx: Index to search the next entry for iterators
30 */
31struct msi_device_data {
32 unsigned long properties;
33 struct mutex mutex;
34 struct msi_dev_domain __domains[MSI_MAX_DEVICE_IRQDOMAINS];
35 unsigned long __iter_idx;
36};
37
38/**
39 * struct msi_ctrl - MSI internal management control structure
40 * @domid: ID of the domain on which management operations should be done
41 * @first: First (hardware) slot index to operate on
42 * @last: Last (hardware) slot index to operate on
43 * @nirqs: The number of Linux interrupts to allocate. Can be larger
44 * than the range due to PCI/multi-MSI.
45 */
46struct msi_ctrl {
47 unsigned int domid;
48 unsigned int first;
49 unsigned int last;
50 unsigned int nirqs;
51};
52
53/* Invalid Xarray index which is outside of any searchable range */
54#define MSI_XA_MAX_INDEX (ULONG_MAX - 1)
55/* The maximum domain size */
56#define MSI_XA_DOMAIN_SIZE (MSI_MAX_INDEX + 1)
57
58static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl);
59static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid);
60static inline int msi_sysfs_create_group(struct device *dev);
61
62
63/**
64 * msi_alloc_desc - Allocate an initialized msi_desc
65 * @dev: Pointer to the device for which this is allocated
66 * @nvec: The number of vectors used in this entry
67 * @affinity: Optional pointer to an affinity mask array size of @nvec
68 *
69 * If @affinity is not %NULL then an affinity array[@nvec] is allocated
70 * and the affinity masks and flags from @affinity are copied.
71 *
72 * Return: pointer to allocated &msi_desc on success or %NULL on failure
73 */
74static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec,
75 const struct irq_affinity_desc *affinity)
76{
77 struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
78
79 if (!desc)
80 return NULL;
81
82 desc->dev = dev;
83 desc->nvec_used = nvec;
84 if (affinity) {
85 desc->affinity = kmemdup_array(affinity, nvec, sizeof(*desc->affinity), GFP_KERNEL);
86 if (!desc->affinity) {
87 kfree(desc);
88 return NULL;
89 }
90 }
91 return desc;
92}
93
94static void msi_free_desc(struct msi_desc *desc)
95{
96 kfree(desc->affinity);
97 kfree(desc);
98}
99
100static int msi_insert_desc(struct device *dev, struct msi_desc *desc,
101 unsigned int domid, unsigned int index)
102{
103 struct msi_device_data *md = dev->msi.data;
104 struct xarray *xa = &md->__domains[domid].store;
105 unsigned int hwsize;
106 int ret;
107
108 hwsize = msi_domain_get_hwsize(dev, domid);
109
110 if (index == MSI_ANY_INDEX) {
111 struct xa_limit limit = { .min = 0, .max = hwsize - 1 };
112 unsigned int index;
113
114 /* Let the xarray allocate a free index within the limit */
115 ret = xa_alloc(xa, &index, desc, limit, GFP_KERNEL);
116 if (ret)
117 goto fail;
118
119 desc->msi_index = index;
120 return 0;
121 } else {
122 if (index >= hwsize) {
123 ret = -ERANGE;
124 goto fail;
125 }
126
127 desc->msi_index = index;
128 ret = xa_insert(xa, index, desc, GFP_KERNEL);
129 if (ret)
130 goto fail;
131 return 0;
132 }
133fail:
134 msi_free_desc(desc);
135 return ret;
136}
137
138/**
139 * msi_domain_insert_msi_desc - Allocate and initialize a MSI descriptor and
140 * insert it at @init_desc->msi_index
141 *
142 * @dev: Pointer to the device for which the descriptor is allocated
143 * @domid: The id of the interrupt domain to which the desriptor is added
144 * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor
145 *
146 * Return: 0 on success or an appropriate failure code.
147 */
148int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid,
149 struct msi_desc *init_desc)
150{
151 struct msi_desc *desc;
152
153 lockdep_assert_held(&dev->msi.data->mutex);
154
155 desc = msi_alloc_desc(dev, init_desc->nvec_used, init_desc->affinity);
156 if (!desc)
157 return -ENOMEM;
158
159 /* Copy type specific data to the new descriptor. */
160 desc->pci = init_desc->pci;
161
162 return msi_insert_desc(dev, desc, domid, init_desc->msi_index);
163}
164
165static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
166{
167 switch (filter) {
168 case MSI_DESC_ALL:
169 return true;
170 case MSI_DESC_NOTASSOCIATED:
171 return !desc->irq;
172 case MSI_DESC_ASSOCIATED:
173 return !!desc->irq;
174 }
175 WARN_ON_ONCE(1);
176 return false;
177}
178
179static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl)
180{
181 unsigned int hwsize;
182
183 if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS ||
184 (dev->msi.domain &&
185 !dev->msi.data->__domains[ctrl->domid].domain)))
186 return false;
187
188 hwsize = msi_domain_get_hwsize(dev, ctrl->domid);
189 if (WARN_ON_ONCE(ctrl->first > ctrl->last ||
190 ctrl->first >= hwsize ||
191 ctrl->last >= hwsize))
192 return false;
193 return true;
194}
195
196static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl)
197{
198 struct msi_desc *desc;
199 struct xarray *xa;
200 unsigned long idx;
201
202 lockdep_assert_held(&dev->msi.data->mutex);
203
204 if (!msi_ctrl_valid(dev, ctrl))
205 return;
206
207 xa = &dev->msi.data->__domains[ctrl->domid].store;
208 xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
209 xa_erase(xa, idx);
210
211 /* Leak the descriptor when it is still referenced */
212 if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED)))
213 continue;
214 msi_free_desc(desc);
215 }
216}
217
218/**
219 * msi_domain_free_msi_descs_range - Free a range of MSI descriptors of a device in an irqdomain
220 * @dev: Device for which to free the descriptors
221 * @domid: Id of the domain to operate on
222 * @first: Index to start freeing from (inclusive)
223 * @last: Last index to be freed (inclusive)
224 */
225void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid,
226 unsigned int first, unsigned int last)
227{
228 struct msi_ctrl ctrl = {
229 .domid = domid,
230 .first = first,
231 .last = last,
232 };
233
234 msi_domain_free_descs(dev, &ctrl);
235}
236
237/**
238 * msi_domain_add_simple_msi_descs - Allocate and initialize MSI descriptors
239 * @dev: Pointer to the device for which the descriptors are allocated
240 * @ctrl: Allocation control struct
241 *
242 * Return: 0 on success or an appropriate failure code.
243 */
244static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl *ctrl)
245{
246 struct msi_desc *desc;
247 unsigned int idx;
248 int ret;
249
250 lockdep_assert_held(&dev->msi.data->mutex);
251
252 if (!msi_ctrl_valid(dev, ctrl))
253 return -EINVAL;
254
255 for (idx = ctrl->first; idx <= ctrl->last; idx++) {
256 desc = msi_alloc_desc(dev, 1, NULL);
257 if (!desc)
258 goto fail_mem;
259 ret = msi_insert_desc(dev, desc, ctrl->domid, idx);
260 if (ret)
261 goto fail;
262 }
263 return 0;
264
265fail_mem:
266 ret = -ENOMEM;
267fail:
268 msi_domain_free_descs(dev, ctrl);
269 return ret;
270}
271
272void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
273{
274 *msg = entry->msg;
275}
276
277void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
278{
279 struct msi_desc *entry = irq_get_msi_desc(irq);
280
281 __get_cached_msi_msg(entry, msg);
282}
283EXPORT_SYMBOL_GPL(get_cached_msi_msg);
284
285static void msi_device_data_release(struct device *dev, void *res)
286{
287 struct msi_device_data *md = res;
288 int i;
289
290 for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) {
291 msi_remove_device_irq_domain(dev, i);
292 WARN_ON_ONCE(!xa_empty(&md->__domains[i].store));
293 xa_destroy(&md->__domains[i].store);
294 }
295 dev->msi.data = NULL;
296}
297
298/**
299 * msi_setup_device_data - Setup MSI device data
300 * @dev: Device for which MSI device data should be set up
301 *
302 * Return: 0 on success, appropriate error code otherwise
303 *
304 * This can be called more than once for @dev. If the MSI device data is
305 * already allocated the call succeeds. The allocated memory is
306 * automatically released when the device is destroyed.
307 */
308int msi_setup_device_data(struct device *dev)
309{
310 struct msi_device_data *md;
311 int ret, i;
312
313 if (dev->msi.data)
314 return 0;
315
316 md = devres_alloc(msi_device_data_release, sizeof(*md), GFP_KERNEL);
317 if (!md)
318 return -ENOMEM;
319
320 ret = msi_sysfs_create_group(dev);
321 if (ret) {
322 devres_free(md);
323 return ret;
324 }
325
326 for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++)
327 xa_init_flags(&md->__domains[i].store, XA_FLAGS_ALLOC);
328
329 /*
330 * If @dev::msi::domain is set and is a global MSI domain, copy the
331 * pointer into the domain array so all code can operate on domain
332 * ids. The NULL pointer check is required to keep the legacy
333 * architecture specific PCI/MSI support working.
334 */
335 if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain))
336 md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain;
337
338 mutex_init(&md->mutex);
339 dev->msi.data = md;
340 devres_add(dev, md);
341 return 0;
342}
343
344/**
345 * msi_lock_descs - Lock the MSI descriptor storage of a device
346 * @dev: Device to operate on
347 */
348void msi_lock_descs(struct device *dev)
349{
350 mutex_lock(&dev->msi.data->mutex);
351}
352EXPORT_SYMBOL_GPL(msi_lock_descs);
353
354/**
355 * msi_unlock_descs - Unlock the MSI descriptor storage of a device
356 * @dev: Device to operate on
357 */
358void msi_unlock_descs(struct device *dev)
359{
360 /* Invalidate the index which was cached by the iterator */
361 dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX;
362 mutex_unlock(&dev->msi.data->mutex);
363}
364EXPORT_SYMBOL_GPL(msi_unlock_descs);
365
366static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid,
367 enum msi_desc_filter filter)
368{
369 struct xarray *xa = &md->__domains[domid].store;
370 struct msi_desc *desc;
371
372 xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) {
373 if (msi_desc_match(desc, filter))
374 return desc;
375 }
376 md->__iter_idx = MSI_XA_MAX_INDEX;
377 return NULL;
378}
379
380/**
381 * msi_domain_first_desc - Get the first MSI descriptor of an irqdomain associated to a device
382 * @dev: Device to operate on
383 * @domid: The id of the interrupt domain which should be walked.
384 * @filter: Descriptor state filter
385 *
386 * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs()
387 * must be invoked before the call.
388 *
389 * Return: Pointer to the first MSI descriptor matching the search
390 * criteria, NULL if none found.
391 */
392struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
393 enum msi_desc_filter filter)
394{
395 struct msi_device_data *md = dev->msi.data;
396
397 if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
398 return NULL;
399
400 lockdep_assert_held(&md->mutex);
401
402 md->__iter_idx = 0;
403 return msi_find_desc(md, domid, filter);
404}
405EXPORT_SYMBOL_GPL(msi_domain_first_desc);
406
407/**
408 * msi_next_desc - Get the next MSI descriptor of a device
409 * @dev: Device to operate on
410 * @domid: The id of the interrupt domain which should be walked.
411 * @filter: Descriptor state filter
412 *
413 * The first invocation of msi_next_desc() has to be preceeded by a
414 * successful invocation of __msi_first_desc(). Consecutive invocations are
415 * only valid if the previous one was successful. All these operations have
416 * to be done within the same MSI mutex held region.
417 *
418 * Return: Pointer to the next MSI descriptor matching the search
419 * criteria, NULL if none found.
420 */
421struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid,
422 enum msi_desc_filter filter)
423{
424 struct msi_device_data *md = dev->msi.data;
425
426 if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
427 return NULL;
428
429 lockdep_assert_held(&md->mutex);
430
431 if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX)
432 return NULL;
433
434 md->__iter_idx++;
435 return msi_find_desc(md, domid, filter);
436}
437EXPORT_SYMBOL_GPL(msi_next_desc);
438
439/**
440 * msi_domain_get_virq - Lookup the Linux interrupt number for a MSI index on a interrupt domain
441 * @dev: Device to operate on
442 * @domid: Domain ID of the interrupt domain associated to the device
443 * @index: MSI interrupt index to look for (0-based)
444 *
445 * Return: The Linux interrupt number on success (> 0), 0 if not found
446 */
447unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index)
448{
449 struct msi_desc *desc;
450 unsigned int ret = 0;
451 bool pcimsi = false;
452 struct xarray *xa;
453
454 if (!dev->msi.data)
455 return 0;
456
457 if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
458 return 0;
459
460 /* This check is only valid for the PCI default MSI domain */
461 if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN)
462 pcimsi = to_pci_dev(dev)->msi_enabled;
463
464 msi_lock_descs(dev);
465 xa = &dev->msi.data->__domains[domid].store;
466 desc = xa_load(xa, pcimsi ? 0 : index);
467 if (desc && desc->irq) {
468 /*
469 * PCI-MSI has only one descriptor for multiple interrupts.
470 * PCI-MSIX and platform MSI use a descriptor per
471 * interrupt.
472 */
473 if (pcimsi) {
474 if (index < desc->nvec_used)
475 ret = desc->irq + index;
476 } else {
477 ret = desc->irq;
478 }
479 }
480
481 msi_unlock_descs(dev);
482 return ret;
483}
484EXPORT_SYMBOL_GPL(msi_domain_get_virq);
485
486#ifdef CONFIG_SYSFS
487static struct attribute *msi_dev_attrs[] = {
488 NULL
489};
490
491static const struct attribute_group msi_irqs_group = {
492 .name = "msi_irqs",
493 .attrs = msi_dev_attrs,
494};
495
496static inline int msi_sysfs_create_group(struct device *dev)
497{
498 return devm_device_add_group(dev, &msi_irqs_group);
499}
500
501static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
502 char *buf)
503{
504 /* MSI vs. MSIX is per device not per interrupt */
505 bool is_msix = dev_is_pci(dev) ? to_pci_dev(dev)->msix_enabled : false;
506
507 return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi");
508}
509
510static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc)
511{
512 struct device_attribute *attrs = desc->sysfs_attrs;
513 int i;
514
515 if (!attrs)
516 return;
517
518 desc->sysfs_attrs = NULL;
519 for (i = 0; i < desc->nvec_used; i++) {
520 if (attrs[i].show)
521 sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
522 kfree(attrs[i].attr.name);
523 }
524 kfree(attrs);
525}
526
527static int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc)
528{
529 struct device_attribute *attrs;
530 int ret, i;
531
532 attrs = kcalloc(desc->nvec_used, sizeof(*attrs), GFP_KERNEL);
533 if (!attrs)
534 return -ENOMEM;
535
536 desc->sysfs_attrs = attrs;
537 for (i = 0; i < desc->nvec_used; i++) {
538 sysfs_attr_init(&attrs[i].attr);
539 attrs[i].attr.name = kasprintf(GFP_KERNEL, "%d", desc->irq + i);
540 if (!attrs[i].attr.name) {
541 ret = -ENOMEM;
542 goto fail;
543 }
544
545 attrs[i].attr.mode = 0444;
546 attrs[i].show = msi_mode_show;
547
548 ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
549 if (ret) {
550 attrs[i].show = NULL;
551 goto fail;
552 }
553 }
554 return 0;
555
556fail:
557 msi_sysfs_remove_desc(dev, desc);
558 return ret;
559}
560
561#if defined(CONFIG_PCI_MSI_ARCH_FALLBACKS) || defined(CONFIG_PCI_XEN)
562/**
563 * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
564 * @dev: The device (PCI, platform etc) which will get sysfs entries
565 */
566int msi_device_populate_sysfs(struct device *dev)
567{
568 struct msi_desc *desc;
569 int ret;
570
571 msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) {
572 if (desc->sysfs_attrs)
573 continue;
574 ret = msi_sysfs_populate_desc(dev, desc);
575 if (ret)
576 return ret;
577 }
578 return 0;
579}
580
581/**
582 * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device
583 * @dev: The device (PCI, platform etc) for which to remove
584 * sysfs entries
585 */
586void msi_device_destroy_sysfs(struct device *dev)
587{
588 struct msi_desc *desc;
589
590 msi_for_each_desc(desc, dev, MSI_DESC_ALL)
591 msi_sysfs_remove_desc(dev, desc);
592}
593#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK || CONFIG_PCI_XEN */
594#else /* CONFIG_SYSFS */
595static inline int msi_sysfs_create_group(struct device *dev) { return 0; }
596static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; }
597static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { }
598#endif /* !CONFIG_SYSFS */
599
600static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int domid)
601{
602 struct irq_domain *domain;
603
604 lockdep_assert_held(&dev->msi.data->mutex);
605
606 if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS))
607 return NULL;
608
609 domain = dev->msi.data->__domains[domid].domain;
610 if (!domain)
611 return NULL;
612
613 if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain)))
614 return NULL;
615
616 return domain;
617}
618
619static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid)
620{
621 struct msi_domain_info *info;
622 struct irq_domain *domain;
623
624 domain = msi_get_device_domain(dev, domid);
625 if (domain) {
626 info = domain->host_data;
627 return info->hwsize;
628 }
629 /* No domain, default to MSI_XA_DOMAIN_SIZE */
630 return MSI_XA_DOMAIN_SIZE;
631}
632
633static inline void irq_chip_write_msi_msg(struct irq_data *data,
634 struct msi_msg *msg)
635{
636 data->chip->irq_write_msi_msg(data, msg);
637}
638
639static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg)
640{
641 struct msi_domain_info *info = domain->host_data;
642
643 /*
644 * If the MSI provider has messed with the second message and
645 * not advertized that it is level-capable, signal the breakage.
646 */
647 WARN_ON(!((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
648 (info->chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)) &&
649 (msg[1].address_lo || msg[1].address_hi || msg[1].data));
650}
651
652/**
653 * msi_domain_set_affinity - Generic affinity setter function for MSI domains
654 * @irq_data: The irq data associated to the interrupt
655 * @mask: The affinity mask to set
656 * @force: Flag to enforce setting (disable online checks)
657 *
658 * Intended to be used by MSI interrupt controllers which are
659 * implemented with hierarchical domains.
660 *
661 * Return: IRQ_SET_MASK_* result code
662 */
663int msi_domain_set_affinity(struct irq_data *irq_data,
664 const struct cpumask *mask, bool force)
665{
666 struct irq_data *parent = irq_data->parent_data;
667 struct msi_msg msg[2] = { [1] = { }, };
668 int ret;
669
670 ret = parent->chip->irq_set_affinity(parent, mask, force);
671 if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
672 BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
673 msi_check_level(irq_data->domain, msg);
674 irq_chip_write_msi_msg(irq_data, msg);
675 }
676
677 return ret;
678}
679
680static int msi_domain_activate(struct irq_domain *domain,
681 struct irq_data *irq_data, bool early)
682{
683 struct msi_msg msg[2] = { [1] = { }, };
684
685 BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
686 msi_check_level(irq_data->domain, msg);
687 irq_chip_write_msi_msg(irq_data, msg);
688 return 0;
689}
690
691static void msi_domain_deactivate(struct irq_domain *domain,
692 struct irq_data *irq_data)
693{
694 struct msi_msg msg[2];
695
696 memset(msg, 0, sizeof(msg));
697 irq_chip_write_msi_msg(irq_data, msg);
698}
699
700static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
701 unsigned int nr_irqs, void *arg)
702{
703 struct msi_domain_info *info = domain->host_data;
704 struct msi_domain_ops *ops = info->ops;
705 irq_hw_number_t hwirq = ops->get_hwirq(info, arg);
706 int i, ret;
707
708 if (irq_find_mapping(domain, hwirq) > 0)
709 return -EEXIST;
710
711 if (domain->parent) {
712 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
713 if (ret < 0)
714 return ret;
715 }
716
717 for (i = 0; i < nr_irqs; i++) {
718 ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
719 if (ret < 0) {
720 if (ops->msi_free) {
721 for (i--; i >= 0; i--)
722 ops->msi_free(domain, info, virq + i);
723 }
724 irq_domain_free_irqs_top(domain, virq, nr_irqs);
725 return ret;
726 }
727 }
728
729 return 0;
730}
731
732static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
733 unsigned int nr_irqs)
734{
735 struct msi_domain_info *info = domain->host_data;
736 int i;
737
738 if (info->ops->msi_free) {
739 for (i = 0; i < nr_irqs; i++)
740 info->ops->msi_free(domain, info, virq + i);
741 }
742 irq_domain_free_irqs_top(domain, virq, nr_irqs);
743}
744
745static int msi_domain_translate(struct irq_domain *domain, struct irq_fwspec *fwspec,
746 irq_hw_number_t *hwirq, unsigned int *type)
747{
748 struct msi_domain_info *info = domain->host_data;
749
750 /*
751 * This will catch allocations through the regular irqdomain path except
752 * for MSI domains which really support this, e.g. MBIGEN.
753 */
754 if (!info->ops->msi_translate)
755 return -ENOTSUPP;
756 return info->ops->msi_translate(domain, fwspec, hwirq, type);
757}
758
759static const struct irq_domain_ops msi_domain_ops = {
760 .alloc = msi_domain_alloc,
761 .free = msi_domain_free,
762 .activate = msi_domain_activate,
763 .deactivate = msi_domain_deactivate,
764 .translate = msi_domain_translate,
765};
766
767static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
768 msi_alloc_info_t *arg)
769{
770 return arg->hwirq;
771}
772
773static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev,
774 int nvec, msi_alloc_info_t *arg)
775{
776 memset(arg, 0, sizeof(*arg));
777 return 0;
778}
779
780static void msi_domain_ops_set_desc(msi_alloc_info_t *arg,
781 struct msi_desc *desc)
782{
783 arg->desc = desc;
784}
785
786static int msi_domain_ops_init(struct irq_domain *domain,
787 struct msi_domain_info *info,
788 unsigned int virq, irq_hw_number_t hwirq,
789 msi_alloc_info_t *arg)
790{
791 irq_domain_set_hwirq_and_chip(domain, virq, hwirq, info->chip,
792 info->chip_data);
793 if (info->handler && info->handler_name) {
794 __irq_set_handler(virq, info->handler, 0, info->handler_name);
795 if (info->handler_data)
796 irq_set_handler_data(virq, info->handler_data);
797 }
798 return 0;
799}
800
801static struct msi_domain_ops msi_domain_ops_default = {
802 .get_hwirq = msi_domain_ops_get_hwirq,
803 .msi_init = msi_domain_ops_init,
804 .msi_prepare = msi_domain_ops_prepare,
805 .set_desc = msi_domain_ops_set_desc,
806};
807
808static void msi_domain_update_dom_ops(struct msi_domain_info *info)
809{
810 struct msi_domain_ops *ops = info->ops;
811
812 if (ops == NULL) {
813 info->ops = &msi_domain_ops_default;
814 return;
815 }
816
817 if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS))
818 return;
819
820 if (ops->get_hwirq == NULL)
821 ops->get_hwirq = msi_domain_ops_default.get_hwirq;
822 if (ops->msi_init == NULL)
823 ops->msi_init = msi_domain_ops_default.msi_init;
824 if (ops->msi_prepare == NULL)
825 ops->msi_prepare = msi_domain_ops_default.msi_prepare;
826 if (ops->set_desc == NULL)
827 ops->set_desc = msi_domain_ops_default.set_desc;
828}
829
830static void msi_domain_update_chip_ops(struct msi_domain_info *info)
831{
832 struct irq_chip *chip = info->chip;
833
834 BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask);
835 if (!chip->irq_set_affinity && !(info->flags & MSI_FLAG_NO_AFFINITY))
836 chip->irq_set_affinity = msi_domain_set_affinity;
837}
838
839static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode,
840 struct msi_domain_info *info,
841 unsigned int flags,
842 struct irq_domain *parent)
843{
844 struct irq_domain *domain;
845
846 if (info->hwsize > MSI_XA_DOMAIN_SIZE)
847 return NULL;
848
849 /*
850 * Hardware size 0 is valid for backwards compatibility and for
851 * domains which are not backed by a hardware table. Grant the
852 * maximum index space.
853 */
854 if (!info->hwsize)
855 info->hwsize = MSI_XA_DOMAIN_SIZE;
856
857 msi_domain_update_dom_ops(info);
858 if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
859 msi_domain_update_chip_ops(info);
860
861 domain = irq_domain_create_hierarchy(parent, flags | IRQ_DOMAIN_FLAG_MSI, 0,
862 fwnode, &msi_domain_ops, info);
863
864 if (domain) {
865 irq_domain_update_bus_token(domain, info->bus_token);
866 if (info->flags & MSI_FLAG_PARENT_PM_DEV)
867 domain->pm_dev = parent->pm_dev;
868 }
869
870 return domain;
871}
872
873/**
874 * msi_create_irq_domain - Create an MSI interrupt domain
875 * @fwnode: Optional fwnode of the interrupt controller
876 * @info: MSI domain info
877 * @parent: Parent irq domain
878 *
879 * Return: pointer to the created &struct irq_domain or %NULL on failure
880 */
881struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
882 struct msi_domain_info *info,
883 struct irq_domain *parent)
884{
885 return __msi_create_irq_domain(fwnode, info, 0, parent);
886}
887
888/**
889 * msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down
890 * in the domain hierarchy
891 * @dev: The device for which the domain should be created
892 * @domain: The domain in the hierarchy this op is being called on
893 * @msi_parent_domain: The IRQ_DOMAIN_FLAG_MSI_PARENT domain for the child to
894 * be created
895 * @msi_child_info: The MSI domain info of the IRQ_DOMAIN_FLAG_MSI_DEVICE
896 * domain to be created
897 *
898 * Return: true on success, false otherwise
899 *
900 * This is the most complex problem of per device MSI domains and the
901 * underlying interrupt domain hierarchy:
902 *
903 * The device domain to be initialized requests the broadest feature set
904 * possible and the underlying domain hierarchy puts restrictions on it.
905 *
906 * That's trivial for a simple parent->child relationship, but it gets
907 * interesting with an intermediate domain: root->parent->child. The
908 * intermediate 'parent' can expand the capabilities which the 'root'
909 * domain is providing. So that creates a classic hen and egg problem:
910 * Which entity is doing the restrictions/expansions?
911 *
912 * One solution is to let the root domain handle the initialization that's
913 * why there is the @domain and the @msi_parent_domain pointer.
914 */
915bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
916 struct irq_domain *msi_parent_domain,
917 struct msi_domain_info *msi_child_info)
918{
919 struct irq_domain *parent = domain->parent;
920
921 if (WARN_ON_ONCE(!parent || !parent->msi_parent_ops ||
922 !parent->msi_parent_ops->init_dev_msi_info))
923 return false;
924
925 return parent->msi_parent_ops->init_dev_msi_info(dev, parent, msi_parent_domain,
926 msi_child_info);
927}
928
929/**
930 * msi_create_device_irq_domain - Create a device MSI interrupt domain
931 * @dev: Pointer to the device
932 * @domid: Domain id
933 * @template: MSI domain info bundle used as template
934 * @hwsize: Maximum number of MSI table entries (0 if unknown or unlimited)
935 * @domain_data: Optional pointer to domain specific data which is set in
936 * msi_domain_info::data
937 * @chip_data: Optional pointer to chip specific data which is set in
938 * msi_domain_info::chip_data
939 *
940 * Return: True on success, false otherwise
941 *
942 * There is no firmware node required for this interface because the per
943 * device domains are software constructs which are actually closer to the
944 * hardware reality than any firmware can describe them.
945 *
946 * The domain name and the irq chip name for a MSI device domain are
947 * composed by: "$(PREFIX)$(CHIPNAME)-$(DEVNAME)"
948 *
949 * $PREFIX: Optional prefix provided by the underlying MSI parent domain
950 * via msi_parent_ops::prefix. If that pointer is NULL the prefix
951 * is empty.
952 * $CHIPNAME: The name of the irq_chip in @template
953 * $DEVNAME: The name of the device
954 *
955 * This results in understandable chip names and hardware interrupt numbers
956 * in e.g. /proc/interrupts
957 *
958 * PCI-MSI-0000:00:1c.0 0-edge Parent domain has no prefix
959 * IR-PCI-MSI-0000:00:1c.4 0-edge Same with interrupt remapping prefix 'IR-'
960 *
961 * IR-PCI-MSIX-0000:3d:00.0 0-edge Hardware interrupt numbers reflect
962 * IR-PCI-MSIX-0000:3d:00.0 1-edge the real MSI-X index on that device
963 * IR-PCI-MSIX-0000:3d:00.0 2-edge
964 *
965 * On IMS domains the hardware interrupt number is either a table entry
966 * index or a purely software managed index but it is guaranteed to be
967 * unique.
968 *
969 * The domain pointer is stored in @dev::msi::data::__irqdomains[]. All
970 * subsequent operations on the domain depend on the domain id.
971 *
972 * The domain is automatically freed when the device is removed via devres
973 * in the context of @dev::msi::data freeing, but it can also be
974 * independently removed via @msi_remove_device_irq_domain().
975 */
976bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
977 const struct msi_domain_template *template,
978 unsigned int hwsize, void *domain_data,
979 void *chip_data)
980{
981 struct irq_domain *domain, *parent = dev->msi.domain;
982 struct fwnode_handle *fwnode, *fwnalloced = NULL;
983 struct msi_domain_template *bundle;
984 const struct msi_parent_ops *pops;
985
986 if (!irq_domain_is_msi_parent(parent))
987 return false;
988
989 if (domid >= MSI_MAX_DEVICE_IRQDOMAINS)
990 return false;
991
992 bundle = kmemdup(template, sizeof(*bundle), GFP_KERNEL);
993 if (!bundle)
994 return false;
995
996 bundle->info.hwsize = hwsize;
997 bundle->info.chip = &bundle->chip;
998 bundle->info.ops = &bundle->ops;
999 bundle->info.data = domain_data;
1000 bundle->info.chip_data = chip_data;
1001
1002 pops = parent->msi_parent_ops;
1003 snprintf(bundle->name, sizeof(bundle->name), "%s%s-%s",
1004 pops->prefix ? : "", bundle->chip.name, dev_name(dev));
1005 bundle->chip.name = bundle->name;
1006
1007 /*
1008 * Using the device firmware node is required for wire to MSI
1009 * device domains so that the existing firmware results in a domain
1010 * match.
1011 * All other device domains like PCI/MSI use the named firmware
1012 * node as they are not guaranteed to have a fwnode. They are never
1013 * looked up and always handled in the context of the device.
1014 */
1015 if (bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE)
1016 fwnode = dev->fwnode;
1017 else
1018 fwnode = fwnalloced = irq_domain_alloc_named_fwnode(bundle->name);
1019
1020 if (!fwnode)
1021 goto free_bundle;
1022
1023 if (msi_setup_device_data(dev))
1024 goto free_fwnode;
1025
1026 msi_lock_descs(dev);
1027
1028 if (WARN_ON_ONCE(msi_get_device_domain(dev, domid)))
1029 goto fail;
1030
1031 if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info))
1032 goto fail;
1033
1034 domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent);
1035 if (!domain)
1036 goto fail;
1037
1038 domain->dev = dev;
1039 dev->msi.data->__domains[domid].domain = domain;
1040 msi_unlock_descs(dev);
1041 return true;
1042
1043fail:
1044 msi_unlock_descs(dev);
1045free_fwnode:
1046 irq_domain_free_fwnode(fwnalloced);
1047free_bundle:
1048 kfree(bundle);
1049 return false;
1050}
1051
1052/**
1053 * msi_remove_device_irq_domain - Free a device MSI interrupt domain
1054 * @dev: Pointer to the device
1055 * @domid: Domain id
1056 */
1057void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
1058{
1059 struct fwnode_handle *fwnode = NULL;
1060 struct msi_domain_info *info;
1061 struct irq_domain *domain;
1062
1063 msi_lock_descs(dev);
1064
1065 domain = msi_get_device_domain(dev, domid);
1066
1067 if (!domain || !irq_domain_is_msi_device(domain))
1068 goto unlock;
1069
1070 dev->msi.data->__domains[domid].domain = NULL;
1071 info = domain->host_data;
1072 if (irq_domain_is_msi_device(domain))
1073 fwnode = domain->fwnode;
1074 irq_domain_remove(domain);
1075 irq_domain_free_fwnode(fwnode);
1076 kfree(container_of(info, struct msi_domain_template, info));
1077
1078unlock:
1079 msi_unlock_descs(dev);
1080}
1081
1082/**
1083 * msi_match_device_irq_domain - Match a device irq domain against a bus token
1084 * @dev: Pointer to the device
1085 * @domid: Domain id
1086 * @bus_token: Bus token to match against the domain bus token
1087 *
1088 * Return: True if device domain exists and bus tokens match.
1089 */
1090bool msi_match_device_irq_domain(struct device *dev, unsigned int domid,
1091 enum irq_domain_bus_token bus_token)
1092{
1093 struct msi_domain_info *info;
1094 struct irq_domain *domain;
1095 bool ret = false;
1096
1097 msi_lock_descs(dev);
1098 domain = msi_get_device_domain(dev, domid);
1099 if (domain && irq_domain_is_msi_device(domain)) {
1100 info = domain->host_data;
1101 ret = info->bus_token == bus_token;
1102 }
1103 msi_unlock_descs(dev);
1104 return ret;
1105}
1106
1107static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
1108 int nvec, msi_alloc_info_t *arg)
1109{
1110 struct msi_domain_info *info = domain->host_data;
1111 struct msi_domain_ops *ops = info->ops;
1112
1113 return ops->msi_prepare(domain, dev, nvec, arg);
1114}
1115
1116/*
1117 * Carefully check whether the device can use reservation mode. If
1118 * reservation mode is enabled then the early activation will assign a
1119 * dummy vector to the device. If the PCI/MSI device does not support
1120 * masking of the entry then this can result in spurious interrupts when
1121 * the device driver is not absolutely careful. But even then a malfunction
1122 * of the hardware could result in a spurious interrupt on the dummy vector
1123 * and render the device unusable. If the entry can be masked then the core
1124 * logic will prevent the spurious interrupt and reservation mode can be
1125 * used. For now reservation mode is restricted to PCI/MSI.
1126 */
1127static bool msi_check_reservation_mode(struct irq_domain *domain,
1128 struct msi_domain_info *info,
1129 struct device *dev)
1130{
1131 struct msi_desc *desc;
1132
1133 switch(domain->bus_token) {
1134 case DOMAIN_BUS_PCI_MSI:
1135 case DOMAIN_BUS_PCI_DEVICE_MSI:
1136 case DOMAIN_BUS_PCI_DEVICE_MSIX:
1137 case DOMAIN_BUS_VMD_MSI:
1138 break;
1139 default:
1140 return false;
1141 }
1142
1143 if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
1144 return false;
1145
1146 if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
1147 return false;
1148
1149 /*
1150 * Checking the first MSI descriptor is sufficient. MSIX supports
1151 * masking and MSI does so when the can_mask attribute is set.
1152 */
1153 desc = msi_first_desc(dev, MSI_DESC_ALL);
1154 return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask;
1155}
1156
1157static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc,
1158 int allocated)
1159{
1160 switch(domain->bus_token) {
1161 case DOMAIN_BUS_PCI_MSI:
1162 case DOMAIN_BUS_PCI_DEVICE_MSI:
1163 case DOMAIN_BUS_PCI_DEVICE_MSIX:
1164 case DOMAIN_BUS_VMD_MSI:
1165 if (IS_ENABLED(CONFIG_PCI_MSI))
1166 break;
1167 fallthrough;
1168 default:
1169 return -ENOSPC;
1170 }
1171
1172 /* Let a failed PCI multi MSI allocation retry */
1173 if (desc->nvec_used > 1)
1174 return 1;
1175
1176 /* If there was a successful allocation let the caller know */
1177 return allocated ? allocated : -ENOSPC;
1178}
1179
1180#define VIRQ_CAN_RESERVE 0x01
1181#define VIRQ_ACTIVATE 0x02
1182
1183static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags)
1184{
1185 struct irq_data *irqd = irq_domain_get_irq_data(domain, virq);
1186 int ret;
1187
1188 if (!(vflags & VIRQ_CAN_RESERVE)) {
1189 irqd_clr_can_reserve(irqd);
1190
1191 /*
1192 * If the interrupt is managed but no CPU is available to
1193 * service it, shut it down until better times. Note that
1194 * we only do this on the !RESERVE path as x86 (the only
1195 * architecture using this flag) deals with this in a
1196 * different way by using a catch-all vector.
1197 */
1198 if ((vflags & VIRQ_ACTIVATE) &&
1199 irqd_affinity_is_managed(irqd) &&
1200 !cpumask_intersects(irq_data_get_affinity_mask(irqd),
1201 cpu_online_mask)) {
1202 irqd_set_managed_shutdown(irqd);
1203 return 0;
1204 }
1205 }
1206
1207 if (!(vflags & VIRQ_ACTIVATE))
1208 return 0;
1209
1210 ret = irq_domain_activate_irq(irqd, vflags & VIRQ_CAN_RESERVE);
1211 if (ret)
1212 return ret;
1213 /*
1214 * If the interrupt uses reservation mode, clear the activated bit
1215 * so request_irq() will assign the final vector.
1216 */
1217 if (vflags & VIRQ_CAN_RESERVE)
1218 irqd_clr_activated(irqd);
1219 return 0;
1220}
1221
1222static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain,
1223 struct msi_ctrl *ctrl)
1224{
1225 struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1226 struct msi_domain_info *info = domain->host_data;
1227 struct msi_domain_ops *ops = info->ops;
1228 unsigned int vflags = 0, allocated = 0;
1229 msi_alloc_info_t arg = { };
1230 struct msi_desc *desc;
1231 unsigned long idx;
1232 int i, ret, virq;
1233
1234 ret = msi_domain_prepare_irqs(domain, dev, ctrl->nirqs, &arg);
1235 if (ret)
1236 return ret;
1237
1238 /*
1239 * This flag is set by the PCI layer as we need to activate
1240 * the MSI entries before the PCI layer enables MSI in the
1241 * card. Otherwise the card latches a random msi message.
1242 */
1243 if (info->flags & MSI_FLAG_ACTIVATE_EARLY)
1244 vflags |= VIRQ_ACTIVATE;
1245
1246 /*
1247 * Interrupt can use a reserved vector and will not occupy
1248 * a real device vector until the interrupt is requested.
1249 */
1250 if (msi_check_reservation_mode(domain, info, dev))
1251 vflags |= VIRQ_CAN_RESERVE;
1252
1253 xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1254 if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED))
1255 continue;
1256
1257 /* This should return -ECONFUSED... */
1258 if (WARN_ON_ONCE(allocated >= ctrl->nirqs))
1259 return -EINVAL;
1260
1261 if (ops->prepare_desc)
1262 ops->prepare_desc(domain, &arg, desc);
1263
1264 ops->set_desc(&arg, desc);
1265
1266 virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
1267 dev_to_node(dev), &arg, false,
1268 desc->affinity);
1269 if (virq < 0)
1270 return msi_handle_pci_fail(domain, desc, allocated);
1271
1272 for (i = 0; i < desc->nvec_used; i++) {
1273 irq_set_msi_desc_off(virq, i, desc);
1274 irq_debugfs_copy_devname(virq + i, dev);
1275 ret = msi_init_virq(domain, virq + i, vflags);
1276 if (ret)
1277 return ret;
1278 }
1279 if (info->flags & MSI_FLAG_DEV_SYSFS) {
1280 ret = msi_sysfs_populate_desc(dev, desc);
1281 if (ret)
1282 return ret;
1283 }
1284 allocated++;
1285 }
1286 return 0;
1287}
1288
1289static int msi_domain_alloc_simple_msi_descs(struct device *dev,
1290 struct msi_domain_info *info,
1291 struct msi_ctrl *ctrl)
1292{
1293 if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS))
1294 return 0;
1295
1296 return msi_domain_add_simple_msi_descs(dev, ctrl);
1297}
1298
1299static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1300{
1301 struct msi_domain_info *info;
1302 struct msi_domain_ops *ops;
1303 struct irq_domain *domain;
1304 int ret;
1305
1306 if (!msi_ctrl_valid(dev, ctrl))
1307 return -EINVAL;
1308
1309 domain = msi_get_device_domain(dev, ctrl->domid);
1310 if (!domain)
1311 return -ENODEV;
1312
1313 info = domain->host_data;
1314
1315 ret = msi_domain_alloc_simple_msi_descs(dev, info, ctrl);
1316 if (ret)
1317 return ret;
1318
1319 ops = info->ops;
1320 if (ops->domain_alloc_irqs)
1321 return ops->domain_alloc_irqs(domain, dev, ctrl->nirqs);
1322
1323 return __msi_domain_alloc_irqs(dev, domain, ctrl);
1324}
1325
1326static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1327{
1328 int ret = __msi_domain_alloc_locked(dev, ctrl);
1329
1330 if (ret)
1331 msi_domain_free_locked(dev, ctrl);
1332 return ret;
1333}
1334
1335/**
1336 * msi_domain_alloc_irqs_range_locked - Allocate interrupts from a MSI interrupt domain
1337 * @dev: Pointer to device struct of the device for which the interrupts
1338 * are allocated
1339 * @domid: Id of the interrupt domain to operate on
1340 * @first: First index to allocate (inclusive)
1341 * @last: Last index to allocate (inclusive)
1342 *
1343 * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1344 * pair. Use this for MSI irqdomains which implement their own descriptor
1345 * allocation/free.
1346 *
1347 * Return: %0 on success or an error code.
1348 */
1349int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid,
1350 unsigned int first, unsigned int last)
1351{
1352 struct msi_ctrl ctrl = {
1353 .domid = domid,
1354 .first = first,
1355 .last = last,
1356 .nirqs = last + 1 - first,
1357 };
1358
1359 return msi_domain_alloc_locked(dev, &ctrl);
1360}
1361
1362/**
1363 * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain
1364 * @dev: Pointer to device struct of the device for which the interrupts
1365 * are allocated
1366 * @domid: Id of the interrupt domain to operate on
1367 * @first: First index to allocate (inclusive)
1368 * @last: Last index to allocate (inclusive)
1369 *
1370 * Return: %0 on success or an error code.
1371 */
1372int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
1373 unsigned int first, unsigned int last)
1374{
1375 int ret;
1376
1377 msi_lock_descs(dev);
1378 ret = msi_domain_alloc_irqs_range_locked(dev, domid, first, last);
1379 msi_unlock_descs(dev);
1380 return ret;
1381}
1382EXPORT_SYMBOL_GPL(msi_domain_alloc_irqs_range);
1383
1384/**
1385 * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain
1386 *
1387 * @dev: Pointer to device struct of the device for which the interrupts
1388 * are allocated
1389 * @domid: Id of the interrupt domain to operate on
1390 * @nirqs: The number of interrupts to allocate
1391 *
1392 * This function scans all MSI descriptors of the MSI domain and allocates interrupts
1393 * for all unassigned ones. That function is to be used for MSI domain usage where
1394 * the descriptor allocation is handled at the call site, e.g. PCI/MSI[X].
1395 *
1396 * Return: %0 on success or an error code.
1397 */
1398int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs)
1399{
1400 struct msi_ctrl ctrl = {
1401 .domid = domid,
1402 .first = 0,
1403 .last = msi_domain_get_hwsize(dev, domid) - 1,
1404 .nirqs = nirqs,
1405 };
1406
1407 return msi_domain_alloc_locked(dev, &ctrl);
1408}
1409
1410static struct msi_map __msi_domain_alloc_irq_at(struct device *dev, unsigned int domid,
1411 unsigned int index,
1412 const struct irq_affinity_desc *affdesc,
1413 union msi_instance_cookie *icookie)
1414{
1415 struct msi_ctrl ctrl = { .domid = domid, .nirqs = 1, };
1416 struct irq_domain *domain;
1417 struct msi_map map = { };
1418 struct msi_desc *desc;
1419 int ret;
1420
1421 domain = msi_get_device_domain(dev, domid);
1422 if (!domain) {
1423 map.index = -ENODEV;
1424 return map;
1425 }
1426
1427 desc = msi_alloc_desc(dev, 1, affdesc);
1428 if (!desc) {
1429 map.index = -ENOMEM;
1430 return map;
1431 }
1432
1433 if (icookie)
1434 desc->data.icookie = *icookie;
1435
1436 ret = msi_insert_desc(dev, desc, domid, index);
1437 if (ret) {
1438 map.index = ret;
1439 return map;
1440 }
1441
1442 ctrl.first = ctrl.last = desc->msi_index;
1443
1444 ret = __msi_domain_alloc_irqs(dev, domain, &ctrl);
1445 if (ret) {
1446 map.index = ret;
1447 msi_domain_free_locked(dev, &ctrl);
1448 } else {
1449 map.index = desc->msi_index;
1450 map.virq = desc->irq;
1451 }
1452 return map;
1453}
1454
1455/**
1456 * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at
1457 * a given index - or at the next free index
1458 *
1459 * @dev: Pointer to device struct of the device for which the interrupts
1460 * are allocated
1461 * @domid: Id of the interrupt domain to operate on
1462 * @index: Index for allocation. If @index == %MSI_ANY_INDEX the allocation
1463 * uses the next free index.
1464 * @affdesc: Optional pointer to an interrupt affinity descriptor structure
1465 * @icookie: Optional pointer to a domain specific per instance cookie. If
1466 * non-NULL the content of the cookie is stored in msi_desc::data.
1467 * Must be NULL for MSI-X allocations
1468 *
1469 * This requires a MSI interrupt domain which lets the core code manage the
1470 * MSI descriptors.
1471 *
1472 * Return: struct msi_map
1473 *
1474 * On success msi_map::index contains the allocated index number and
1475 * msi_map::virq the corresponding Linux interrupt number
1476 *
1477 * On failure msi_map::index contains the error code and msi_map::virq
1478 * is %0.
1479 */
1480struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index,
1481 const struct irq_affinity_desc *affdesc,
1482 union msi_instance_cookie *icookie)
1483{
1484 struct msi_map map;
1485
1486 msi_lock_descs(dev);
1487 map = __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie);
1488 msi_unlock_descs(dev);
1489 return map;
1490}
1491
1492/**
1493 * msi_device_domain_alloc_wired - Allocate a "wired" interrupt on @domain
1494 * @domain: The domain to allocate on
1495 * @hwirq: The hardware interrupt number to allocate for
1496 * @type: The interrupt type
1497 *
1498 * This weirdness supports wire to MSI controllers like MBIGEN.
1499 *
1500 * @hwirq is the hardware interrupt number which is handed in from
1501 * irq_create_fwspec_mapping(). As the wire to MSI domain is sparse, but
1502 * sized in firmware, the hardware interrupt number cannot be used as MSI
1503 * index. For the underlying irq chip the MSI index is irrelevant and
1504 * all it needs is the hardware interrupt number.
1505 *
1506 * To handle this the MSI index is allocated with MSI_ANY_INDEX and the
1507 * hardware interrupt number is stored along with the type information in
1508 * msi_desc::cookie so the underlying interrupt chip and domain code can
1509 * retrieve it.
1510 *
1511 * Return: The Linux interrupt number (> 0) or an error code
1512 */
1513int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
1514 unsigned int type)
1515{
1516 unsigned int domid = MSI_DEFAULT_DOMAIN;
1517 union msi_instance_cookie icookie = { };
1518 struct device *dev = domain->dev;
1519 struct msi_map map = { };
1520
1521 if (WARN_ON_ONCE(!dev || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
1522 return -EINVAL;
1523
1524 icookie.value = ((u64)type << 32) | hwirq;
1525
1526 msi_lock_descs(dev);
1527 if (WARN_ON_ONCE(msi_get_device_domain(dev, domid) != domain))
1528 map.index = -EINVAL;
1529 else
1530 map = __msi_domain_alloc_irq_at(dev, domid, MSI_ANY_INDEX, NULL, &icookie);
1531 msi_unlock_descs(dev);
1532
1533 return map.index >= 0 ? map.virq : map.index;
1534}
1535
1536static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain,
1537 struct msi_ctrl *ctrl)
1538{
1539 struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1540 struct msi_domain_info *info = domain->host_data;
1541 struct irq_data *irqd;
1542 struct msi_desc *desc;
1543 unsigned long idx;
1544 int i;
1545
1546 xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1547 /* Only handle MSI entries which have an interrupt associated */
1548 if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED))
1549 continue;
1550
1551 /* Make sure all interrupts are deactivated */
1552 for (i = 0; i < desc->nvec_used; i++) {
1553 irqd = irq_domain_get_irq_data(domain, desc->irq + i);
1554 if (irqd && irqd_is_activated(irqd))
1555 irq_domain_deactivate_irq(irqd);
1556 }
1557
1558 irq_domain_free_irqs(desc->irq, desc->nvec_used);
1559 if (info->flags & MSI_FLAG_DEV_SYSFS)
1560 msi_sysfs_remove_desc(dev, desc);
1561 desc->irq = 0;
1562 }
1563}
1564
1565static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl)
1566{
1567 struct msi_domain_info *info;
1568 struct msi_domain_ops *ops;
1569 struct irq_domain *domain;
1570
1571 if (!msi_ctrl_valid(dev, ctrl))
1572 return;
1573
1574 domain = msi_get_device_domain(dev, ctrl->domid);
1575 if (!domain)
1576 return;
1577
1578 info = domain->host_data;
1579 ops = info->ops;
1580
1581 if (ops->domain_free_irqs)
1582 ops->domain_free_irqs(domain, dev);
1583 else
1584 __msi_domain_free_irqs(dev, domain, ctrl);
1585
1586 if (ops->msi_post_free)
1587 ops->msi_post_free(domain, dev);
1588
1589 if (info->flags & MSI_FLAG_FREE_MSI_DESCS)
1590 msi_domain_free_descs(dev, ctrl);
1591}
1592
1593/**
1594 * msi_domain_free_irqs_range_locked - Free a range of interrupts from a MSI interrupt domain
1595 * associated to @dev with msi_lock held
1596 * @dev: Pointer to device struct of the device for which the interrupts
1597 * are freed
1598 * @domid: Id of the interrupt domain to operate on
1599 * @first: First index to free (inclusive)
1600 * @last: Last index to free (inclusive)
1601 */
1602void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid,
1603 unsigned int first, unsigned int last)
1604{
1605 struct msi_ctrl ctrl = {
1606 .domid = domid,
1607 .first = first,
1608 .last = last,
1609 };
1610 msi_domain_free_locked(dev, &ctrl);
1611}
1612
1613/**
1614 * msi_domain_free_irqs_range - Free a range of interrupts from a MSI interrupt domain
1615 * associated to @dev
1616 * @dev: Pointer to device struct of the device for which the interrupts
1617 * are freed
1618 * @domid: Id of the interrupt domain to operate on
1619 * @first: First index to free (inclusive)
1620 * @last: Last index to free (inclusive)
1621 */
1622void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
1623 unsigned int first, unsigned int last)
1624{
1625 msi_lock_descs(dev);
1626 msi_domain_free_irqs_range_locked(dev, domid, first, last);
1627 msi_unlock_descs(dev);
1628}
1629EXPORT_SYMBOL_GPL(msi_domain_free_irqs_all);
1630
1631/**
1632 * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain
1633 * associated to a device
1634 * @dev: Pointer to device struct of the device for which the interrupts
1635 * are freed
1636 * @domid: The id of the domain to operate on
1637 *
1638 * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1639 * pair. Use this for MSI irqdomains which implement their own vector
1640 * allocation.
1641 */
1642void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid)
1643{
1644 msi_domain_free_irqs_range_locked(dev, domid, 0,
1645 msi_domain_get_hwsize(dev, domid) - 1);
1646}
1647
1648/**
1649 * msi_domain_free_irqs_all - Free all interrupts from a MSI interrupt domain
1650 * associated to a device
1651 * @dev: Pointer to device struct of the device for which the interrupts
1652 * are freed
1653 * @domid: The id of the domain to operate on
1654 */
1655void msi_domain_free_irqs_all(struct device *dev, unsigned int domid)
1656{
1657 msi_lock_descs(dev);
1658 msi_domain_free_irqs_all_locked(dev, domid);
1659 msi_unlock_descs(dev);
1660}
1661
1662/**
1663 * msi_device_domain_free_wired - Free a wired interrupt in @domain
1664 * @domain: The domain to free the interrupt on
1665 * @virq: The Linux interrupt number to free
1666 *
1667 * This is the counterpart of msi_device_domain_alloc_wired() for the
1668 * weird wired to MSI converting domains.
1669 */
1670void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq)
1671{
1672 struct msi_desc *desc = irq_get_msi_desc(virq);
1673 struct device *dev = domain->dev;
1674
1675 if (WARN_ON_ONCE(!dev || !desc || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
1676 return;
1677
1678 msi_lock_descs(dev);
1679 if (!WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain)) {
1680 msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, desc->msi_index,
1681 desc->msi_index);
1682 }
1683 msi_unlock_descs(dev);
1684}
1685
1686/**
1687 * msi_get_domain_info - Get the MSI interrupt domain info for @domain
1688 * @domain: The interrupt domain to retrieve data from
1689 *
1690 * Return: the pointer to the msi_domain_info stored in @domain->host_data.
1691 */
1692struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
1693{
1694 return (struct msi_domain_info *)domain->host_data;
1695}
1696
1697/**
1698 * msi_device_has_isolated_msi - True if the device has isolated MSI
1699 * @dev: The device to check
1700 *
1701 * Isolated MSI means that HW modeled by an irq_domain on the path from the
1702 * initiating device to the CPU will validate that the MSI message specifies an
1703 * interrupt number that the device is authorized to trigger. This must block
1704 * devices from triggering interrupts they are not authorized to trigger.
1705 * Currently authorization means the MSI vector is one assigned to the device.
1706 *
1707 * This is interesting for securing VFIO use cases where a rouge MSI (eg created
1708 * by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to
1709 * impact outside its security domain, eg userspace triggering interrupts on
1710 * kernel drivers, a VM triggering interrupts on the hypervisor, or a VM
1711 * triggering interrupts on another VM.
1712 */
1713bool msi_device_has_isolated_msi(struct device *dev)
1714{
1715 struct irq_domain *domain = dev_get_msi_domain(dev);
1716
1717 for (; domain; domain = domain->parent)
1718 if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI)
1719 return true;
1720 return arch_is_isolated_msi();
1721}
1722EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi);