1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2014 Intel Corp.
6 * This file is licensed under GPLv2.
8 * This file contains common code to support Message Signaled Interrupts for
9 * PCI compatible and non PCI compatible devices.
11 #include <linux/types.h>
12 #include <linux/device.h>
13 #include <linux/irq.h>
14 #include <linux/irqdomain.h>
15 #include <linux/msi.h>
16 #include <linux/slab.h>
17 #include <linux/sysfs.h>
18 #include <linux/pci.h>
20 #include "internals.h"
23 * struct msi_ctrl - MSI internal management control structure
24 * @domid: ID of the domain on which management operations should be done
25 * @first: First (hardware) slot index to operate on
26 * @last: Last (hardware) slot index to operate on
27 * @nirqs: The number of Linux interrupts to allocate. Can be larger
28 * than the range due to PCI/multi-MSI.
37 /* Invalid Xarray index which is outside of any searchable range */
38 #define MSI_XA_MAX_INDEX (ULONG_MAX - 1)
39 /* The maximum domain size */
40 #define MSI_XA_DOMAIN_SIZE (MSI_MAX_INDEX + 1)
42 static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl);
43 static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid);
44 static inline int msi_sysfs_create_group(struct device *dev);
48 * msi_alloc_desc - Allocate an initialized msi_desc
49 * @dev: Pointer to the device for which this is allocated
50 * @nvec: The number of vectors used in this entry
51 * @affinity: Optional pointer to an affinity mask array size of @nvec
53 * If @affinity is not %NULL then an affinity array[@nvec] is allocated
54 * and the affinity masks and flags from @affinity are copied.
56 * Return: pointer to allocated &msi_desc on success or %NULL on failure
58 static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec,
59 const struct irq_affinity_desc *affinity)
61 struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
67 desc->nvec_used = nvec;
69 desc->affinity = kmemdup(affinity, nvec * sizeof(*desc->affinity), GFP_KERNEL);
70 if (!desc->affinity) {
78 static void msi_free_desc(struct msi_desc *desc)
80 kfree(desc->affinity);
84 static int msi_insert_desc(struct device *dev, struct msi_desc *desc,
85 unsigned int domid, unsigned int index)
87 struct msi_device_data *md = dev->msi.data;
88 struct xarray *xa = &md->__domains[domid].store;
92 hwsize = msi_domain_get_hwsize(dev, domid);
94 if (index == MSI_ANY_INDEX) {
95 struct xa_limit limit = { .min = 0, .max = hwsize - 1 };
98 /* Let the xarray allocate a free index within the limit */
99 ret = xa_alloc(xa, &index, desc, limit, GFP_KERNEL);
103 desc->msi_index = index;
106 if (index >= hwsize) {
111 desc->msi_index = index;
112 ret = xa_insert(xa, index, desc, GFP_KERNEL);
123 * msi_domain_insert_msi_desc - Allocate and initialize a MSI descriptor and
124 * insert it at @init_desc->msi_index
126 * @dev: Pointer to the device for which the descriptor is allocated
127 * @domid: The id of the interrupt domain to which the desriptor is added
128 * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor
130 * Return: 0 on success or an appropriate failure code.
132 int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid,
133 struct msi_desc *init_desc)
135 struct msi_desc *desc;
137 lockdep_assert_held(&dev->msi.data->mutex);
139 desc = msi_alloc_desc(dev, init_desc->nvec_used, init_desc->affinity);
143 /* Copy type specific data to the new descriptor. */
144 desc->pci = init_desc->pci;
146 return msi_insert_desc(dev, desc, domid, init_desc->msi_index);
149 static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
154 case MSI_DESC_NOTASSOCIATED:
156 case MSI_DESC_ASSOCIATED:
163 static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl)
167 if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS ||
169 !dev->msi.data->__domains[ctrl->domid].domain)))
172 hwsize = msi_domain_get_hwsize(dev, ctrl->domid);
173 if (WARN_ON_ONCE(ctrl->first > ctrl->last ||
174 ctrl->first >= hwsize ||
175 ctrl->last >= hwsize))
180 static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl)
182 struct msi_desc *desc;
186 lockdep_assert_held(&dev->msi.data->mutex);
188 if (!msi_ctrl_valid(dev, ctrl))
191 xa = &dev->msi.data->__domains[ctrl->domid].store;
192 xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
195 /* Leak the descriptor when it is still referenced */
196 if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED)))
203 * msi_domain_free_msi_descs_range - Free a range of MSI descriptors of a device in an irqdomain
204 * @dev: Device for which to free the descriptors
205 * @domid: Id of the domain to operate on
206 * @first: Index to start freeing from (inclusive)
207 * @last: Last index to be freed (inclusive)
209 void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid,
210 unsigned int first, unsigned int last)
212 struct msi_ctrl ctrl = {
218 msi_domain_free_descs(dev, &ctrl);
222 * msi_domain_add_simple_msi_descs - Allocate and initialize MSI descriptors
223 * @dev: Pointer to the device for which the descriptors are allocated
224 * @ctrl: Allocation control struct
226 * Return: 0 on success or an appropriate failure code.
228 static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl *ctrl)
230 struct msi_desc *desc;
234 lockdep_assert_held(&dev->msi.data->mutex);
236 if (!msi_ctrl_valid(dev, ctrl))
239 for (idx = ctrl->first; idx <= ctrl->last; idx++) {
240 desc = msi_alloc_desc(dev, 1, NULL);
243 ret = msi_insert_desc(dev, desc, ctrl->domid, idx);
252 msi_domain_free_descs(dev, ctrl);
256 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
261 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
263 struct msi_desc *entry = irq_get_msi_desc(irq);
265 __get_cached_msi_msg(entry, msg);
267 EXPORT_SYMBOL_GPL(get_cached_msi_msg);
269 static void msi_device_data_release(struct device *dev, void *res)
271 struct msi_device_data *md = res;
274 for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) {
275 msi_remove_device_irq_domain(dev, i);
276 WARN_ON_ONCE(!xa_empty(&md->__domains[i].store));
277 xa_destroy(&md->__domains[i].store);
279 dev->msi.data = NULL;
283 * msi_setup_device_data - Setup MSI device data
284 * @dev: Device for which MSI device data should be set up
286 * Return: 0 on success, appropriate error code otherwise
288 * This can be called more than once for @dev. If the MSI device data is
289 * already allocated the call succeeds. The allocated memory is
290 * automatically released when the device is destroyed.
292 int msi_setup_device_data(struct device *dev)
294 struct msi_device_data *md;
300 md = devres_alloc(msi_device_data_release, sizeof(*md), GFP_KERNEL);
304 ret = msi_sysfs_create_group(dev);
310 for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++)
311 xa_init_flags(&md->__domains[i].store, XA_FLAGS_ALLOC);
314 * If @dev::msi::domain is set and is a global MSI domain, copy the
315 * pointer into the domain array so all code can operate on domain
316 * ids. The NULL pointer check is required to keep the legacy
317 * architecture specific PCI/MSI support working.
319 if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain))
320 md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain;
322 mutex_init(&md->mutex);
329 * msi_lock_descs - Lock the MSI descriptor storage of a device
330 * @dev: Device to operate on
332 void msi_lock_descs(struct device *dev)
334 mutex_lock(&dev->msi.data->mutex);
336 EXPORT_SYMBOL_GPL(msi_lock_descs);
339 * msi_unlock_descs - Unlock the MSI descriptor storage of a device
340 * @dev: Device to operate on
342 void msi_unlock_descs(struct device *dev)
344 /* Invalidate the index which was cached by the iterator */
345 dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX;
346 mutex_unlock(&dev->msi.data->mutex);
348 EXPORT_SYMBOL_GPL(msi_unlock_descs);
350 static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid,
351 enum msi_desc_filter filter)
353 struct xarray *xa = &md->__domains[domid].store;
354 struct msi_desc *desc;
356 xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) {
357 if (msi_desc_match(desc, filter))
360 md->__iter_idx = MSI_XA_MAX_INDEX;
365 * msi_domain_first_desc - Get the first MSI descriptor of an irqdomain associated to a device
366 * @dev: Device to operate on
367 * @domid: The id of the interrupt domain which should be walked.
368 * @filter: Descriptor state filter
370 * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs()
371 * must be invoked before the call.
373 * Return: Pointer to the first MSI descriptor matching the search
374 * criteria, NULL if none found.
376 struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
377 enum msi_desc_filter filter)
379 struct msi_device_data *md = dev->msi.data;
381 if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
384 lockdep_assert_held(&md->mutex);
387 return msi_find_desc(md, domid, filter);
389 EXPORT_SYMBOL_GPL(msi_domain_first_desc);
392 * msi_next_desc - Get the next MSI descriptor of a device
393 * @dev: Device to operate on
394 * @domid: The id of the interrupt domain which should be walked.
395 * @filter: Descriptor state filter
397 * The first invocation of msi_next_desc() has to be preceeded by a
398 * successful invocation of __msi_first_desc(). Consecutive invocations are
399 * only valid if the previous one was successful. All these operations have
400 * to be done within the same MSI mutex held region.
402 * Return: Pointer to the next MSI descriptor matching the search
403 * criteria, NULL if none found.
405 struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid,
406 enum msi_desc_filter filter)
408 struct msi_device_data *md = dev->msi.data;
410 if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
413 lockdep_assert_held(&md->mutex);
415 if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX)
419 return msi_find_desc(md, domid, filter);
421 EXPORT_SYMBOL_GPL(msi_next_desc);
424 * msi_domain_get_virq - Lookup the Linux interrupt number for a MSI index on a interrupt domain
425 * @dev: Device to operate on
426 * @domid: Domain ID of the interrupt domain associated to the device
427 * @index: MSI interrupt index to look for (0-based)
429 * Return: The Linux interrupt number on success (> 0), 0 if not found
431 unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index)
433 struct msi_desc *desc;
434 unsigned int ret = 0;
441 if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
444 /* This check is only valid for the PCI default MSI domain */
445 if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN)
446 pcimsi = to_pci_dev(dev)->msi_enabled;
449 xa = &dev->msi.data->__domains[domid].store;
450 desc = xa_load(xa, pcimsi ? 0 : index);
451 if (desc && desc->irq) {
453 * PCI-MSI has only one descriptor for multiple interrupts.
454 * PCI-MSIX and platform MSI use a descriptor per
458 if (index < desc->nvec_used)
459 ret = desc->irq + index;
465 msi_unlock_descs(dev);
468 EXPORT_SYMBOL_GPL(msi_domain_get_virq);
471 static struct attribute *msi_dev_attrs[] = {
475 static const struct attribute_group msi_irqs_group = {
477 .attrs = msi_dev_attrs,
480 static inline int msi_sysfs_create_group(struct device *dev)
482 return devm_device_add_group(dev, &msi_irqs_group);
485 static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
488 /* MSI vs. MSIX is per device not per interrupt */
489 bool is_msix = dev_is_pci(dev) ? to_pci_dev(dev)->msix_enabled : false;
491 return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi");
494 static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc)
496 struct device_attribute *attrs = desc->sysfs_attrs;
502 desc->sysfs_attrs = NULL;
503 for (i = 0; i < desc->nvec_used; i++) {
505 sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
506 kfree(attrs[i].attr.name);
511 static int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc)
513 struct device_attribute *attrs;
516 attrs = kcalloc(desc->nvec_used, sizeof(*attrs), GFP_KERNEL);
520 desc->sysfs_attrs = attrs;
521 for (i = 0; i < desc->nvec_used; i++) {
522 sysfs_attr_init(&attrs[i].attr);
523 attrs[i].attr.name = kasprintf(GFP_KERNEL, "%d", desc->irq + i);
524 if (!attrs[i].attr.name) {
529 attrs[i].attr.mode = 0444;
530 attrs[i].show = msi_mode_show;
532 ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
534 attrs[i].show = NULL;
541 msi_sysfs_remove_desc(dev, desc);
545 #ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS
547 * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
548 * @dev: The device (PCI, platform etc) which will get sysfs entries
550 int msi_device_populate_sysfs(struct device *dev)
552 struct msi_desc *desc;
555 msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) {
556 if (desc->sysfs_attrs)
558 ret = msi_sysfs_populate_desc(dev, desc);
566 * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device
567 * @dev: The device (PCI, platform etc) for which to remove
570 void msi_device_destroy_sysfs(struct device *dev)
572 struct msi_desc *desc;
574 msi_for_each_desc(desc, dev, MSI_DESC_ALL)
575 msi_sysfs_remove_desc(dev, desc);
577 #endif /* CONFIG_PCI_MSI_ARCH_FALLBACK */
578 #else /* CONFIG_SYSFS */
579 static inline int msi_sysfs_create_group(struct device *dev) { return 0; }
580 static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; }
581 static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { }
582 #endif /* !CONFIG_SYSFS */
584 static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int domid)
586 struct irq_domain *domain;
588 lockdep_assert_held(&dev->msi.data->mutex);
590 if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS))
593 domain = dev->msi.data->__domains[domid].domain;
597 if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain)))
603 static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid)
605 struct msi_domain_info *info;
606 struct irq_domain *domain;
608 domain = msi_get_device_domain(dev, domid);
610 info = domain->host_data;
613 /* No domain, default to MSI_XA_DOMAIN_SIZE */
614 return MSI_XA_DOMAIN_SIZE;
617 static inline void irq_chip_write_msi_msg(struct irq_data *data,
620 data->chip->irq_write_msi_msg(data, msg);
623 static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg)
625 struct msi_domain_info *info = domain->host_data;
628 * If the MSI provider has messed with the second message and
629 * not advertized that it is level-capable, signal the breakage.
631 WARN_ON(!((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
632 (info->chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)) &&
633 (msg[1].address_lo || msg[1].address_hi || msg[1].data));
637 * msi_domain_set_affinity - Generic affinity setter function for MSI domains
638 * @irq_data: The irq data associated to the interrupt
639 * @mask: The affinity mask to set
640 * @force: Flag to enforce setting (disable online checks)
642 * Intended to be used by MSI interrupt controllers which are
643 * implemented with hierarchical domains.
645 * Return: IRQ_SET_MASK_* result code
647 int msi_domain_set_affinity(struct irq_data *irq_data,
648 const struct cpumask *mask, bool force)
650 struct irq_data *parent = irq_data->parent_data;
651 struct msi_msg msg[2] = { [1] = { }, };
654 ret = parent->chip->irq_set_affinity(parent, mask, force);
655 if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
656 BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
657 msi_check_level(irq_data->domain, msg);
658 irq_chip_write_msi_msg(irq_data, msg);
664 static int msi_domain_activate(struct irq_domain *domain,
665 struct irq_data *irq_data, bool early)
667 struct msi_msg msg[2] = { [1] = { }, };
669 BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
670 msi_check_level(irq_data->domain, msg);
671 irq_chip_write_msi_msg(irq_data, msg);
675 static void msi_domain_deactivate(struct irq_domain *domain,
676 struct irq_data *irq_data)
678 struct msi_msg msg[2];
680 memset(msg, 0, sizeof(msg));
681 irq_chip_write_msi_msg(irq_data, msg);
684 static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
685 unsigned int nr_irqs, void *arg)
687 struct msi_domain_info *info = domain->host_data;
688 struct msi_domain_ops *ops = info->ops;
689 irq_hw_number_t hwirq = ops->get_hwirq(info, arg);
692 if (irq_find_mapping(domain, hwirq) > 0)
695 if (domain->parent) {
696 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
701 for (i = 0; i < nr_irqs; i++) {
702 ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
705 for (i--; i > 0; i--)
706 ops->msi_free(domain, info, virq + i);
708 irq_domain_free_irqs_top(domain, virq, nr_irqs);
716 static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
717 unsigned int nr_irqs)
719 struct msi_domain_info *info = domain->host_data;
722 if (info->ops->msi_free) {
723 for (i = 0; i < nr_irqs; i++)
724 info->ops->msi_free(domain, info, virq + i);
726 irq_domain_free_irqs_top(domain, virq, nr_irqs);
729 static const struct irq_domain_ops msi_domain_ops = {
730 .alloc = msi_domain_alloc,
731 .free = msi_domain_free,
732 .activate = msi_domain_activate,
733 .deactivate = msi_domain_deactivate,
736 static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
737 msi_alloc_info_t *arg)
742 static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev,
743 int nvec, msi_alloc_info_t *arg)
745 memset(arg, 0, sizeof(*arg));
749 static void msi_domain_ops_set_desc(msi_alloc_info_t *arg,
750 struct msi_desc *desc)
755 static int msi_domain_ops_init(struct irq_domain *domain,
756 struct msi_domain_info *info,
757 unsigned int virq, irq_hw_number_t hwirq,
758 msi_alloc_info_t *arg)
760 irq_domain_set_hwirq_and_chip(domain, virq, hwirq, info->chip,
762 if (info->handler && info->handler_name) {
763 __irq_set_handler(virq, info->handler, 0, info->handler_name);
764 if (info->handler_data)
765 irq_set_handler_data(virq, info->handler_data);
770 static struct msi_domain_ops msi_domain_ops_default = {
771 .get_hwirq = msi_domain_ops_get_hwirq,
772 .msi_init = msi_domain_ops_init,
773 .msi_prepare = msi_domain_ops_prepare,
774 .set_desc = msi_domain_ops_set_desc,
777 static void msi_domain_update_dom_ops(struct msi_domain_info *info)
779 struct msi_domain_ops *ops = info->ops;
782 info->ops = &msi_domain_ops_default;
786 if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS))
789 if (ops->get_hwirq == NULL)
790 ops->get_hwirq = msi_domain_ops_default.get_hwirq;
791 if (ops->msi_init == NULL)
792 ops->msi_init = msi_domain_ops_default.msi_init;
793 if (ops->msi_prepare == NULL)
794 ops->msi_prepare = msi_domain_ops_default.msi_prepare;
795 if (ops->set_desc == NULL)
796 ops->set_desc = msi_domain_ops_default.set_desc;
799 static void msi_domain_update_chip_ops(struct msi_domain_info *info)
801 struct irq_chip *chip = info->chip;
803 BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask);
804 if (!chip->irq_set_affinity)
805 chip->irq_set_affinity = msi_domain_set_affinity;
808 static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode,
809 struct msi_domain_info *info,
811 struct irq_domain *parent)
813 struct irq_domain *domain;
815 if (info->hwsize > MSI_XA_DOMAIN_SIZE)
819 * Hardware size 0 is valid for backwards compatibility and for
820 * domains which are not backed by a hardware table. Grant the
821 * maximum index space.
824 info->hwsize = MSI_XA_DOMAIN_SIZE;
826 msi_domain_update_dom_ops(info);
827 if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
828 msi_domain_update_chip_ops(info);
830 domain = irq_domain_create_hierarchy(parent, flags | IRQ_DOMAIN_FLAG_MSI, 0,
831 fwnode, &msi_domain_ops, info);
834 if (!domain->name && info->chip)
835 domain->name = info->chip->name;
836 irq_domain_update_bus_token(domain, info->bus_token);
843 * msi_create_irq_domain - Create an MSI interrupt domain
844 * @fwnode: Optional fwnode of the interrupt controller
845 * @info: MSI domain info
846 * @parent: Parent irq domain
848 * Return: pointer to the created &struct irq_domain or %NULL on failure
850 struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
851 struct msi_domain_info *info,
852 struct irq_domain *parent)
854 return __msi_create_irq_domain(fwnode, info, 0, parent);
858 * msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down
859 * in the domain hierarchy
860 * @dev: The device for which the domain should be created
861 * @domain: The domain in the hierarchy this op is being called on
862 * @msi_parent_domain: The IRQ_DOMAIN_FLAG_MSI_PARENT domain for the child to
864 * @msi_child_info: The MSI domain info of the IRQ_DOMAIN_FLAG_MSI_DEVICE
865 * domain to be created
867 * Return: true on success, false otherwise
869 * This is the most complex problem of per device MSI domains and the
870 * underlying interrupt domain hierarchy:
872 * The device domain to be initialized requests the broadest feature set
873 * possible and the underlying domain hierarchy puts restrictions on it.
875 * That's trivial for a simple parent->child relationship, but it gets
876 * interesting with an intermediate domain: root->parent->child. The
877 * intermediate 'parent' can expand the capabilities which the 'root'
878 * domain is providing. So that creates a classic hen and egg problem:
879 * Which entity is doing the restrictions/expansions?
881 * One solution is to let the root domain handle the initialization that's
882 * why there is the @domain and the @msi_parent_domain pointer.
884 bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
885 struct irq_domain *msi_parent_domain,
886 struct msi_domain_info *msi_child_info)
888 struct irq_domain *parent = domain->parent;
890 if (WARN_ON_ONCE(!parent || !parent->msi_parent_ops ||
891 !parent->msi_parent_ops->init_dev_msi_info))
894 return parent->msi_parent_ops->init_dev_msi_info(dev, parent, msi_parent_domain,
899 * msi_create_device_irq_domain - Create a device MSI interrupt domain
900 * @dev: Pointer to the device
902 * @template: MSI domain info bundle used as template
903 * @hwsize: Maximum number of MSI table entries (0 if unknown or unlimited)
904 * @domain_data: Optional pointer to domain specific data which is set in
905 * msi_domain_info::data
906 * @chip_data: Optional pointer to chip specific data which is set in
907 * msi_domain_info::chip_data
909 * Return: True on success, false otherwise
911 * There is no firmware node required for this interface because the per
912 * device domains are software constructs which are actually closer to the
913 * hardware reality than any firmware can describe them.
915 * The domain name and the irq chip name for a MSI device domain are
916 * composed by: "$(PREFIX)$(CHIPNAME)-$(DEVNAME)"
918 * $PREFIX: Optional prefix provided by the underlying MSI parent domain
919 * via msi_parent_ops::prefix. If that pointer is NULL the prefix
921 * $CHIPNAME: The name of the irq_chip in @template
922 * $DEVNAME: The name of the device
924 * This results in understandable chip names and hardware interrupt numbers
925 * in e.g. /proc/interrupts
927 * PCI-MSI-0000:00:1c.0 0-edge Parent domain has no prefix
928 * IR-PCI-MSI-0000:00:1c.4 0-edge Same with interrupt remapping prefix 'IR-'
930 * IR-PCI-MSIX-0000:3d:00.0 0-edge Hardware interrupt numbers reflect
931 * IR-PCI-MSIX-0000:3d:00.0 1-edge the real MSI-X index on that device
932 * IR-PCI-MSIX-0000:3d:00.0 2-edge
934 * On IMS domains the hardware interrupt number is either a table entry
935 * index or a purely software managed index but it is guaranteed to be
938 * The domain pointer is stored in @dev::msi::data::__irqdomains[]. All
939 * subsequent operations on the domain depend on the domain id.
941 * The domain is automatically freed when the device is removed via devres
942 * in the context of @dev::msi::data freeing, but it can also be
943 * independently removed via @msi_remove_device_irq_domain().
945 bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
946 const struct msi_domain_template *template,
947 unsigned int hwsize, void *domain_data,
950 struct irq_domain *domain, *parent = dev->msi.domain;
951 const struct msi_parent_ops *pops;
952 struct msi_domain_template *bundle;
953 struct fwnode_handle *fwnode;
955 if (!irq_domain_is_msi_parent(parent))
958 if (domid >= MSI_MAX_DEVICE_IRQDOMAINS)
961 bundle = kmemdup(template, sizeof(*bundle), GFP_KERNEL);
965 bundle->info.hwsize = hwsize;
966 bundle->info.chip = &bundle->chip;
967 bundle->info.ops = &bundle->ops;
968 bundle->info.data = domain_data;
969 bundle->info.chip_data = chip_data;
971 pops = parent->msi_parent_ops;
972 snprintf(bundle->name, sizeof(bundle->name), "%s%s-%s",
973 pops->prefix ? : "", bundle->chip.name, dev_name(dev));
974 bundle->chip.name = bundle->name;
976 fwnode = irq_domain_alloc_named_fwnode(bundle->name);
980 if (msi_setup_device_data(dev))
985 if (WARN_ON_ONCE(msi_get_device_domain(dev, domid)))
988 if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info))
991 domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent);
996 dev->msi.data->__domains[domid].domain = domain;
997 msi_unlock_descs(dev);
1001 msi_unlock_descs(dev);
1010 * msi_remove_device_irq_domain - Free a device MSI interrupt domain
1011 * @dev: Pointer to the device
1014 void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
1016 struct msi_domain_info *info;
1017 struct irq_domain *domain;
1019 msi_lock_descs(dev);
1021 domain = msi_get_device_domain(dev, domid);
1023 if (!domain || !irq_domain_is_msi_device(domain))
1026 dev->msi.data->__domains[domid].domain = NULL;
1027 info = domain->host_data;
1028 irq_domain_remove(domain);
1029 kfree(container_of(info, struct msi_domain_template, info));
1032 msi_unlock_descs(dev);
1036 * msi_match_device_irq_domain - Match a device irq domain against a bus token
1037 * @dev: Pointer to the device
1039 * @bus_token: Bus token to match against the domain bus token
1041 * Return: True if device domain exists and bus tokens match.
1043 bool msi_match_device_irq_domain(struct device *dev, unsigned int domid,
1044 enum irq_domain_bus_token bus_token)
1046 struct msi_domain_info *info;
1047 struct irq_domain *domain;
1050 msi_lock_descs(dev);
1051 domain = msi_get_device_domain(dev, domid);
1052 if (domain && irq_domain_is_msi_device(domain)) {
1053 info = domain->host_data;
1054 ret = info->bus_token == bus_token;
1056 msi_unlock_descs(dev);
1060 int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
1061 int nvec, msi_alloc_info_t *arg)
1063 struct msi_domain_info *info = domain->host_data;
1064 struct msi_domain_ops *ops = info->ops;
1066 return ops->msi_prepare(domain, dev, nvec, arg);
1069 int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
1070 int virq_base, int nvec, msi_alloc_info_t *arg)
1072 struct msi_domain_info *info = domain->host_data;
1073 struct msi_domain_ops *ops = info->ops;
1074 struct msi_ctrl ctrl = {
1075 .domid = MSI_DEFAULT_DOMAIN,
1077 .last = virq_base + nvec - 1,
1079 struct msi_desc *desc;
1083 if (!msi_ctrl_valid(dev, &ctrl))
1086 msi_lock_descs(dev);
1087 ret = msi_domain_add_simple_msi_descs(dev, &ctrl);
1091 xa = &dev->msi.data->__domains[ctrl.domid].store;
1093 for (virq = virq_base; virq < virq_base + nvec; virq++) {
1094 desc = xa_load(xa, virq);
1097 ops->set_desc(arg, desc);
1098 ret = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg);
1102 irq_set_msi_desc(virq, desc);
1104 msi_unlock_descs(dev);
1108 for (--virq; virq >= virq_base; virq--)
1109 irq_domain_free_irqs_common(domain, virq, 1);
1110 msi_domain_free_descs(dev, &ctrl);
1112 msi_unlock_descs(dev);
1117 * Carefully check whether the device can use reservation mode. If
1118 * reservation mode is enabled then the early activation will assign a
1119 * dummy vector to the device. If the PCI/MSI device does not support
1120 * masking of the entry then this can result in spurious interrupts when
1121 * the device driver is not absolutely careful. But even then a malfunction
1122 * of the hardware could result in a spurious interrupt on the dummy vector
1123 * and render the device unusable. If the entry can be masked then the core
1124 * logic will prevent the spurious interrupt and reservation mode can be
1125 * used. For now reservation mode is restricted to PCI/MSI.
1127 static bool msi_check_reservation_mode(struct irq_domain *domain,
1128 struct msi_domain_info *info,
1131 struct msi_desc *desc;
1133 switch(domain->bus_token) {
1134 case DOMAIN_BUS_PCI_MSI:
1135 case DOMAIN_BUS_PCI_DEVICE_MSI:
1136 case DOMAIN_BUS_PCI_DEVICE_MSIX:
1137 case DOMAIN_BUS_VMD_MSI:
1143 if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
1146 if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
1150 * Checking the first MSI descriptor is sufficient. MSIX supports
1151 * masking and MSI does so when the can_mask attribute is set.
1153 desc = msi_first_desc(dev, MSI_DESC_ALL);
1154 return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask;
1157 static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc,
1160 switch(domain->bus_token) {
1161 case DOMAIN_BUS_PCI_MSI:
1162 case DOMAIN_BUS_PCI_DEVICE_MSI:
1163 case DOMAIN_BUS_PCI_DEVICE_MSIX:
1164 case DOMAIN_BUS_VMD_MSI:
1165 if (IS_ENABLED(CONFIG_PCI_MSI))
1172 /* Let a failed PCI multi MSI allocation retry */
1173 if (desc->nvec_used > 1)
1176 /* If there was a successful allocation let the caller know */
1177 return allocated ? allocated : -ENOSPC;
1180 #define VIRQ_CAN_RESERVE 0x01
1181 #define VIRQ_ACTIVATE 0x02
1182 #define VIRQ_NOMASK_QUIRK 0x04
1184 static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags)
1186 struct irq_data *irqd = irq_domain_get_irq_data(domain, virq);
1189 if (!(vflags & VIRQ_CAN_RESERVE)) {
1190 irqd_clr_can_reserve(irqd);
1191 if (vflags & VIRQ_NOMASK_QUIRK)
1192 irqd_set_msi_nomask_quirk(irqd);
1195 * If the interrupt is managed but no CPU is available to
1196 * service it, shut it down until better times. Note that
1197 * we only do this on the !RESERVE path as x86 (the only
1198 * architecture using this flag) deals with this in a
1199 * different way by using a catch-all vector.
1201 if ((vflags & VIRQ_ACTIVATE) &&
1202 irqd_affinity_is_managed(irqd) &&
1203 !cpumask_intersects(irq_data_get_affinity_mask(irqd),
1205 irqd_set_managed_shutdown(irqd);
1210 if (!(vflags & VIRQ_ACTIVATE))
1213 ret = irq_domain_activate_irq(irqd, vflags & VIRQ_CAN_RESERVE);
1217 * If the interrupt uses reservation mode, clear the activated bit
1218 * so request_irq() will assign the final vector.
1220 if (vflags & VIRQ_CAN_RESERVE)
1221 irqd_clr_activated(irqd);
1225 static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain,
1226 struct msi_ctrl *ctrl)
1228 struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1229 struct msi_domain_info *info = domain->host_data;
1230 struct msi_domain_ops *ops = info->ops;
1231 unsigned int vflags = 0, allocated = 0;
1232 msi_alloc_info_t arg = { };
1233 struct msi_desc *desc;
1237 ret = msi_domain_prepare_irqs(domain, dev, ctrl->nirqs, &arg);
1242 * This flag is set by the PCI layer as we need to activate
1243 * the MSI entries before the PCI layer enables MSI in the
1244 * card. Otherwise the card latches a random msi message.
1246 if (info->flags & MSI_FLAG_ACTIVATE_EARLY)
1247 vflags |= VIRQ_ACTIVATE;
1250 * Interrupt can use a reserved vector and will not occupy
1251 * a real device vector until the interrupt is requested.
1253 if (msi_check_reservation_mode(domain, info, dev)) {
1254 vflags |= VIRQ_CAN_RESERVE;
1256 * MSI affinity setting requires a special quirk (X86) when
1257 * reservation mode is active.
1259 if (info->flags & MSI_FLAG_NOMASK_QUIRK)
1260 vflags |= VIRQ_NOMASK_QUIRK;
1263 xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1264 if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED))
1267 /* This should return -ECONFUSED... */
1268 if (WARN_ON_ONCE(allocated >= ctrl->nirqs))
1271 if (ops->prepare_desc)
1272 ops->prepare_desc(domain, &arg, desc);
1274 ops->set_desc(&arg, desc);
1276 virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
1277 dev_to_node(dev), &arg, false,
1280 return msi_handle_pci_fail(domain, desc, allocated);
1282 for (i = 0; i < desc->nvec_used; i++) {
1283 irq_set_msi_desc_off(virq, i, desc);
1284 irq_debugfs_copy_devname(virq + i, dev);
1285 ret = msi_init_virq(domain, virq + i, vflags);
1289 if (info->flags & MSI_FLAG_DEV_SYSFS) {
1290 ret = msi_sysfs_populate_desc(dev, desc);
1299 static int msi_domain_alloc_simple_msi_descs(struct device *dev,
1300 struct msi_domain_info *info,
1301 struct msi_ctrl *ctrl)
1303 if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS))
1306 return msi_domain_add_simple_msi_descs(dev, ctrl);
1309 static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1311 struct msi_domain_info *info;
1312 struct msi_domain_ops *ops;
1313 struct irq_domain *domain;
1316 if (!msi_ctrl_valid(dev, ctrl))
1319 domain = msi_get_device_domain(dev, ctrl->domid);
1323 info = domain->host_data;
1325 ret = msi_domain_alloc_simple_msi_descs(dev, info, ctrl);
1330 if (ops->domain_alloc_irqs)
1331 return ops->domain_alloc_irqs(domain, dev, ctrl->nirqs);
1333 return __msi_domain_alloc_irqs(dev, domain, ctrl);
1336 static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1338 int ret = __msi_domain_alloc_locked(dev, ctrl);
1341 msi_domain_free_locked(dev, ctrl);
1346 * msi_domain_alloc_irqs_range_locked - Allocate interrupts from a MSI interrupt domain
1347 * @dev: Pointer to device struct of the device for which the interrupts
1349 * @domid: Id of the interrupt domain to operate on
1350 * @first: First index to allocate (inclusive)
1351 * @last: Last index to allocate (inclusive)
1353 * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1354 * pair. Use this for MSI irqdomains which implement their own descriptor
1357 * Return: %0 on success or an error code.
1359 int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid,
1360 unsigned int first, unsigned int last)
1362 struct msi_ctrl ctrl = {
1366 .nirqs = last + 1 - first,
1369 return msi_domain_alloc_locked(dev, &ctrl);
1373 * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain
1374 * @dev: Pointer to device struct of the device for which the interrupts
1376 * @domid: Id of the interrupt domain to operate on
1377 * @first: First index to allocate (inclusive)
1378 * @last: Last index to allocate (inclusive)
1380 * Return: %0 on success or an error code.
1382 int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
1383 unsigned int first, unsigned int last)
1387 msi_lock_descs(dev);
1388 ret = msi_domain_alloc_irqs_range_locked(dev, domid, first, last);
1389 msi_unlock_descs(dev);
1394 * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain
1396 * @dev: Pointer to device struct of the device for which the interrupts
1398 * @domid: Id of the interrupt domain to operate on
1399 * @nirqs: The number of interrupts to allocate
1401 * This function scans all MSI descriptors of the MSI domain and allocates interrupts
1402 * for all unassigned ones. That function is to be used for MSI domain usage where
1403 * the descriptor allocation is handled at the call site, e.g. PCI/MSI[X].
1405 * Return: %0 on success or an error code.
1407 int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs)
1409 struct msi_ctrl ctrl = {
1412 .last = msi_domain_get_hwsize(dev, domid) - 1,
1416 return msi_domain_alloc_locked(dev, &ctrl);
1420 * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at
1421 * a given index - or at the next free index
1423 * @dev: Pointer to device struct of the device for which the interrupts
1425 * @domid: Id of the interrupt domain to operate on
1426 * @index: Index for allocation. If @index == %MSI_ANY_INDEX the allocation
1427 * uses the next free index.
1428 * @affdesc: Optional pointer to an interrupt affinity descriptor structure
1429 * @icookie: Optional pointer to a domain specific per instance cookie. If
1430 * non-NULL the content of the cookie is stored in msi_desc::data.
1431 * Must be NULL for MSI-X allocations
1433 * This requires a MSI interrupt domain which lets the core code manage the
1436 * Return: struct msi_map
1438 * On success msi_map::index contains the allocated index number and
1439 * msi_map::virq the corresponding Linux interrupt number
1441 * On failure msi_map::index contains the error code and msi_map::virq
1444 struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index,
1445 const struct irq_affinity_desc *affdesc,
1446 union msi_instance_cookie *icookie)
1448 struct msi_ctrl ctrl = { .domid = domid, .nirqs = 1, };
1449 struct irq_domain *domain;
1450 struct msi_map map = { };
1451 struct msi_desc *desc;
1454 msi_lock_descs(dev);
1455 domain = msi_get_device_domain(dev, domid);
1457 map.index = -ENODEV;
1461 desc = msi_alloc_desc(dev, 1, affdesc);
1463 map.index = -ENOMEM;
1468 desc->data.icookie = *icookie;
1470 ret = msi_insert_desc(dev, desc, domid, index);
1476 ctrl.first = ctrl.last = desc->msi_index;
1478 ret = __msi_domain_alloc_irqs(dev, domain, &ctrl);
1481 msi_domain_free_locked(dev, &ctrl);
1483 map.index = desc->msi_index;
1484 map.virq = desc->irq;
1487 msi_unlock_descs(dev);
1491 static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain,
1492 struct msi_ctrl *ctrl)
1494 struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1495 struct msi_domain_info *info = domain->host_data;
1496 struct irq_data *irqd;
1497 struct msi_desc *desc;
1501 xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1502 /* Only handle MSI entries which have an interrupt associated */
1503 if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED))
1506 /* Make sure all interrupts are deactivated */
1507 for (i = 0; i < desc->nvec_used; i++) {
1508 irqd = irq_domain_get_irq_data(domain, desc->irq + i);
1509 if (irqd && irqd_is_activated(irqd))
1510 irq_domain_deactivate_irq(irqd);
1513 irq_domain_free_irqs(desc->irq, desc->nvec_used);
1514 if (info->flags & MSI_FLAG_DEV_SYSFS)
1515 msi_sysfs_remove_desc(dev, desc);
1520 static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl)
1522 struct msi_domain_info *info;
1523 struct msi_domain_ops *ops;
1524 struct irq_domain *domain;
1526 if (!msi_ctrl_valid(dev, ctrl))
1529 domain = msi_get_device_domain(dev, ctrl->domid);
1533 info = domain->host_data;
1536 if (ops->domain_free_irqs)
1537 ops->domain_free_irqs(domain, dev);
1539 __msi_domain_free_irqs(dev, domain, ctrl);
1541 if (ops->msi_post_free)
1542 ops->msi_post_free(domain, dev);
1544 if (info->flags & MSI_FLAG_FREE_MSI_DESCS)
1545 msi_domain_free_descs(dev, ctrl);
1549 * msi_domain_free_irqs_range_locked - Free a range of interrupts from a MSI interrupt domain
1550 * associated to @dev with msi_lock held
1551 * @dev: Pointer to device struct of the device for which the interrupts
1553 * @domid: Id of the interrupt domain to operate on
1554 * @first: First index to free (inclusive)
1555 * @last: Last index to free (inclusive)
1557 void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid,
1558 unsigned int first, unsigned int last)
1560 struct msi_ctrl ctrl = {
1565 msi_domain_free_locked(dev, &ctrl);
1569 * msi_domain_free_irqs_range - Free a range of interrupts from a MSI interrupt domain
1570 * associated to @dev
1571 * @dev: Pointer to device struct of the device for which the interrupts
1573 * @domid: Id of the interrupt domain to operate on
1574 * @first: First index to free (inclusive)
1575 * @last: Last index to free (inclusive)
1577 void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
1578 unsigned int first, unsigned int last)
1580 msi_lock_descs(dev);
1581 msi_domain_free_irqs_range_locked(dev, domid, first, last);
1582 msi_unlock_descs(dev);
1586 * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain
1587 * associated to a device
1588 * @dev: Pointer to device struct of the device for which the interrupts
1590 * @domid: The id of the domain to operate on
1592 * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1593 * pair. Use this for MSI irqdomains which implement their own vector
1596 void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid)
1598 msi_domain_free_irqs_range_locked(dev, domid, 0,
1599 msi_domain_get_hwsize(dev, domid) - 1);
1603 * msi_domain_free_irqs_all - Free all interrupts from a MSI interrupt domain
1604 * associated to a device
1605 * @dev: Pointer to device struct of the device for which the interrupts
1607 * @domid: The id of the domain to operate on
1609 void msi_domain_free_irqs_all(struct device *dev, unsigned int domid)
1611 msi_lock_descs(dev);
1612 msi_domain_free_irqs_all_locked(dev, domid);
1613 msi_unlock_descs(dev);
1617 * msi_get_domain_info - Get the MSI interrupt domain info for @domain
1618 * @domain: The interrupt domain to retrieve data from
1620 * Return: the pointer to the msi_domain_info stored in @domain->host_data.
1622 struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
1624 return (struct msi_domain_info *)domain->host_data;