Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1#include <linux/interrupt.h>
   2#include <linux/dmar.h>
   3#include <linux/spinlock.h>
   4#include <linux/slab.h>
   5#include <linux/jiffies.h>
   6#include <linux/hpet.h>
   7#include <linux/pci.h>
   8#include <linux/irq.h>
   9#include <linux/intel-iommu.h>
  10#include <linux/acpi.h>
  11#include <asm/io_apic.h>
  12#include <asm/smp.h>
  13#include <asm/cpu.h>
  14#include <asm/irq_remapping.h>
  15#include <asm/pci-direct.h>
  16#include <asm/msidef.h>
  17
  18#include "irq_remapping.h"
  19
  20struct ioapic_scope {
  21	struct intel_iommu *iommu;
  22	unsigned int id;
  23	unsigned int bus;	/* PCI bus number */
  24	unsigned int devfn;	/* PCI devfn number */
  25};
  26
  27struct hpet_scope {
  28	struct intel_iommu *iommu;
  29	u8 id;
  30	unsigned int bus;
  31	unsigned int devfn;
  32};
  33
  34#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
  35#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
  36
  37static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
  38static struct hpet_scope ir_hpet[MAX_HPET_TBS];
  39static int ir_ioapic_num, ir_hpet_num;
  40
  41/*
  42 * Lock ordering:
  43 * ->dmar_global_lock
  44 *	->irq_2_ir_lock
  45 *		->qi->q_lock
  46 *	->iommu->register_lock
  47 * Note:
  48 * intel_irq_remap_ops.{supported,prepare,enable,disable,reenable} are called
  49 * in single-threaded environment with interrupt disabled, so no need to tabke
  50 * the dmar_global_lock.
  51 */
  52static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
  53
  54static int __init parse_ioapics_under_ir(void);
  55
  56static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
  57{
  58	struct irq_cfg *cfg = irq_get_chip_data(irq);
  59	return cfg ? &cfg->irq_2_iommu : NULL;
  60}
  61
  62static int get_irte(int irq, struct irte *entry)
  63{
  64	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
  65	unsigned long flags;
  66	int index;
  67
  68	if (!entry || !irq_iommu)
  69		return -1;
  70
  71	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
  72
  73	index = irq_iommu->irte_index + irq_iommu->sub_handle;
  74	*entry = *(irq_iommu->iommu->ir_table->base + index);
  75
  76	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
  77	return 0;
  78}
  79
  80static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
  81{
  82	struct ir_table *table = iommu->ir_table;
  83	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
  84	struct irq_cfg *cfg = irq_get_chip_data(irq);
  85	unsigned int mask = 0;
  86	unsigned long flags;
  87	int index;
  88
  89	if (!count || !irq_iommu)
  90		return -1;
  91
  92	if (count > 1) {
  93		count = __roundup_pow_of_two(count);
  94		mask = ilog2(count);
  95	}
  96
  97	if (mask > ecap_max_handle_mask(iommu->ecap)) {
  98		printk(KERN_ERR
  99		       "Requested mask %x exceeds the max invalidation handle"
 100		       " mask value %Lx\n", mask,
 101		       ecap_max_handle_mask(iommu->ecap));
 102		return -1;
 103	}
 104
 105	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 106	index = bitmap_find_free_region(table->bitmap,
 107					INTR_REMAP_TABLE_ENTRIES, mask);
 108	if (index < 0) {
 109		pr_warn("IR%d: can't allocate an IRTE\n", iommu->seq_id);
 110	} else {
 111		cfg->remapped = 1;
 112		irq_iommu->iommu = iommu;
 113		irq_iommu->irte_index =  index;
 114		irq_iommu->sub_handle = 0;
 115		irq_iommu->irte_mask = mask;
 116	}
 117	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 118
 119	return index;
 120}
 121
 122static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
 123{
 124	struct qi_desc desc;
 125
 126	desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
 127		   | QI_IEC_SELECTIVE;
 128	desc.high = 0;
 129
 130	return qi_submit_sync(&desc, iommu);
 131}
 132
 133static int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 134{
 135	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 136	unsigned long flags;
 137	int index;
 138
 139	if (!irq_iommu)
 140		return -1;
 141
 142	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 143	*sub_handle = irq_iommu->sub_handle;
 144	index = irq_iommu->irte_index;
 145	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 146	return index;
 147}
 148
 149static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 150{
 151	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 152	struct irq_cfg *cfg = irq_get_chip_data(irq);
 153	unsigned long flags;
 154
 155	if (!irq_iommu)
 156		return -1;
 157
 158	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 159
 160	cfg->remapped = 1;
 161	irq_iommu->iommu = iommu;
 162	irq_iommu->irte_index = index;
 163	irq_iommu->sub_handle = subhandle;
 164	irq_iommu->irte_mask = 0;
 165
 166	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 167
 168	return 0;
 169}
 170
 171static int modify_irte(int irq, struct irte *irte_modified)
 172{
 173	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 174	struct intel_iommu *iommu;
 175	unsigned long flags;
 176	struct irte *irte;
 177	int rc, index;
 178
 179	if (!irq_iommu)
 180		return -1;
 181
 182	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 183
 184	iommu = irq_iommu->iommu;
 185
 186	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 187	irte = &iommu->ir_table->base[index];
 188
 189	set_64bit(&irte->low, irte_modified->low);
 190	set_64bit(&irte->high, irte_modified->high);
 191	__iommu_flush_cache(iommu, irte, sizeof(*irte));
 192
 193	rc = qi_flush_iec(iommu, index, 0);
 194	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 195
 196	return rc;
 197}
 198
 199static struct intel_iommu *map_hpet_to_ir(u8 hpet_id)
 200{
 201	int i;
 202
 203	for (i = 0; i < MAX_HPET_TBS; i++)
 204		if (ir_hpet[i].id == hpet_id)
 205			return ir_hpet[i].iommu;
 206	return NULL;
 207}
 208
 209static struct intel_iommu *map_ioapic_to_ir(int apic)
 210{
 211	int i;
 212
 213	for (i = 0; i < MAX_IO_APICS; i++)
 214		if (ir_ioapic[i].id == apic)
 215			return ir_ioapic[i].iommu;
 216	return NULL;
 217}
 218
 219static struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
 220{
 221	struct dmar_drhd_unit *drhd;
 222
 223	drhd = dmar_find_matched_drhd_unit(dev);
 224	if (!drhd)
 225		return NULL;
 226
 227	return drhd->iommu;
 228}
 229
 230static int clear_entries(struct irq_2_iommu *irq_iommu)
 231{
 232	struct irte *start, *entry, *end;
 233	struct intel_iommu *iommu;
 234	int index;
 235
 236	if (irq_iommu->sub_handle)
 237		return 0;
 238
 239	iommu = irq_iommu->iommu;
 240	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 241
 242	start = iommu->ir_table->base + index;
 243	end = start + (1 << irq_iommu->irte_mask);
 244
 245	for (entry = start; entry < end; entry++) {
 246		set_64bit(&entry->low, 0);
 247		set_64bit(&entry->high, 0);
 248	}
 249	bitmap_release_region(iommu->ir_table->bitmap, index,
 250			      irq_iommu->irte_mask);
 251
 252	return qi_flush_iec(iommu, index, irq_iommu->irte_mask);
 253}
 254
 255static int free_irte(int irq)
 256{
 257	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 258	unsigned long flags;
 259	int rc;
 260
 261	if (!irq_iommu)
 262		return -1;
 263
 264	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 265
 266	rc = clear_entries(irq_iommu);
 267
 268	irq_iommu->iommu = NULL;
 269	irq_iommu->irte_index = 0;
 270	irq_iommu->sub_handle = 0;
 271	irq_iommu->irte_mask = 0;
 272
 273	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 274
 275	return rc;
 276}
 277
 278/*
 279 * source validation type
 280 */
 281#define SVT_NO_VERIFY		0x0  /* no verification is required */
 282#define SVT_VERIFY_SID_SQ	0x1  /* verify using SID and SQ fields */
 283#define SVT_VERIFY_BUS		0x2  /* verify bus of request-id */
 284
 285/*
 286 * source-id qualifier
 287 */
 288#define SQ_ALL_16	0x0  /* verify all 16 bits of request-id */
 289#define SQ_13_IGNORE_1	0x1  /* verify most significant 13 bits, ignore
 290			      * the third least significant bit
 291			      */
 292#define SQ_13_IGNORE_2	0x2  /* verify most significant 13 bits, ignore
 293			      * the second and third least significant bits
 294			      */
 295#define SQ_13_IGNORE_3	0x3  /* verify most significant 13 bits, ignore
 296			      * the least three significant bits
 297			      */
 298
 299/*
 300 * set SVT, SQ and SID fields of irte to verify
 301 * source ids of interrupt requests
 302 */
 303static void set_irte_sid(struct irte *irte, unsigned int svt,
 304			 unsigned int sq, unsigned int sid)
 305{
 306	if (disable_sourceid_checking)
 307		svt = SVT_NO_VERIFY;
 308	irte->svt = svt;
 309	irte->sq = sq;
 310	irte->sid = sid;
 311}
 312
 313static int set_ioapic_sid(struct irte *irte, int apic)
 314{
 315	int i;
 316	u16 sid = 0;
 317
 318	if (!irte)
 319		return -1;
 320
 321	down_read(&dmar_global_lock);
 322	for (i = 0; i < MAX_IO_APICS; i++) {
 323		if (ir_ioapic[i].id == apic) {
 324			sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
 325			break;
 326		}
 327	}
 328	up_read(&dmar_global_lock);
 329
 330	if (sid == 0) {
 331		pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic);
 332		return -1;
 333	}
 334
 335	set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, sid);
 336
 337	return 0;
 338}
 339
 340static int set_hpet_sid(struct irte *irte, u8 id)
 341{
 342	int i;
 343	u16 sid = 0;
 344
 345	if (!irte)
 346		return -1;
 347
 348	down_read(&dmar_global_lock);
 349	for (i = 0; i < MAX_HPET_TBS; i++) {
 350		if (ir_hpet[i].id == id) {
 351			sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
 352			break;
 353		}
 354	}
 355	up_read(&dmar_global_lock);
 356
 357	if (sid == 0) {
 358		pr_warning("Failed to set source-id of HPET block (%d)\n", id);
 359		return -1;
 360	}
 361
 362	/*
 363	 * Should really use SQ_ALL_16. Some platforms are broken.
 364	 * While we figure out the right quirks for these broken platforms, use
 365	 * SQ_13_IGNORE_3 for now.
 366	 */
 367	set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, sid);
 368
 369	return 0;
 370}
 371
 372static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 373{
 374	struct pci_dev *bridge;
 375
 376	if (!irte || !dev)
 377		return -1;
 378
 379	/* PCIe device or Root Complex integrated PCI device */
 380	if (pci_is_pcie(dev) || !dev->bus->parent) {
 381		set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
 382			     (dev->bus->number << 8) | dev->devfn);
 383		return 0;
 384	}
 385
 386	bridge = pci_find_upstream_pcie_bridge(dev);
 387	if (bridge) {
 388		if (pci_is_pcie(bridge))/* this is a PCIe-to-PCI/PCIX bridge */
 389			set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
 390				(bridge->bus->number << 8) | dev->bus->number);
 391		else /* this is a legacy PCI bridge */
 392			set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
 393				(bridge->bus->number << 8) | bridge->devfn);
 394	}
 395
 396	return 0;
 397}
 398
 399static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode)
 400{
 401	u64 addr;
 402	u32 sts;
 403	unsigned long flags;
 404
 405	addr = virt_to_phys((void *)iommu->ir_table->base);
 406
 407	raw_spin_lock_irqsave(&iommu->register_lock, flags);
 408
 409	dmar_writeq(iommu->reg + DMAR_IRTA_REG,
 410		    (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
 411
 412	/* Set interrupt-remapping table pointer */
 413	iommu->gcmd |= DMA_GCMD_SIRTP;
 414	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
 415
 416	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 417		      readl, (sts & DMA_GSTS_IRTPS), sts);
 418	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 419
 420	/*
 421	 * global invalidation of interrupt entry cache before enabling
 422	 * interrupt-remapping.
 423	 */
 424	qi_global_iec(iommu);
 425
 426	raw_spin_lock_irqsave(&iommu->register_lock, flags);
 427
 428	/* Enable interrupt-remapping */
 429	iommu->gcmd |= DMA_GCMD_IRE;
 430	iommu->gcmd &= ~DMA_GCMD_CFI;  /* Block compatibility-format MSIs */
 431	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
 432
 433	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 434		      readl, (sts & DMA_GSTS_IRES), sts);
 435
 436	/*
 437	 * With CFI clear in the Global Command register, we should be
 438	 * protected from dangerous (i.e. compatibility) interrupts
 439	 * regardless of x2apic status.  Check just to be sure.
 440	 */
 441	if (sts & DMA_GSTS_CFIS)
 442		WARN(1, KERN_WARNING
 443			"Compatibility-format IRQs enabled despite intr remapping;\n"
 444			"you are vulnerable to IRQ injection.\n");
 445
 446	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 447}
 448
 449
 450static int intel_setup_irq_remapping(struct intel_iommu *iommu, int mode)
 451{
 452	struct ir_table *ir_table;
 453	struct page *pages;
 454	unsigned long *bitmap;
 455
 456	ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
 457					     GFP_ATOMIC);
 458
 459	if (!iommu->ir_table)
 460		return -ENOMEM;
 461
 462	pages = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
 463				 INTR_REMAP_PAGE_ORDER);
 464
 465	if (!pages) {
 466		pr_err("IR%d: failed to allocate pages of order %d\n",
 467		       iommu->seq_id, INTR_REMAP_PAGE_ORDER);
 468		kfree(iommu->ir_table);
 469		return -ENOMEM;
 470	}
 471
 472	bitmap = kcalloc(BITS_TO_LONGS(INTR_REMAP_TABLE_ENTRIES),
 473			 sizeof(long), GFP_ATOMIC);
 474	if (bitmap == NULL) {
 475		pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id);
 476		__free_pages(pages, INTR_REMAP_PAGE_ORDER);
 477		kfree(ir_table);
 478		return -ENOMEM;
 479	}
 480
 481	ir_table->base = page_address(pages);
 482	ir_table->bitmap = bitmap;
 483
 484	iommu_set_irq_remapping(iommu, mode);
 485	return 0;
 486}
 487
 488/*
 489 * Disable Interrupt Remapping.
 490 */
 491static void iommu_disable_irq_remapping(struct intel_iommu *iommu)
 492{
 493	unsigned long flags;
 494	u32 sts;
 495
 496	if (!ecap_ir_support(iommu->ecap))
 497		return;
 498
 499	/*
 500	 * global invalidation of interrupt entry cache before disabling
 501	 * interrupt-remapping.
 502	 */
 503	qi_global_iec(iommu);
 504
 505	raw_spin_lock_irqsave(&iommu->register_lock, flags);
 506
 507	sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
 508	if (!(sts & DMA_GSTS_IRES))
 509		goto end;
 510
 511	iommu->gcmd &= ~DMA_GCMD_IRE;
 512	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
 513
 514	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 515		      readl, !(sts & DMA_GSTS_IRES), sts);
 516
 517end:
 518	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 519}
 520
 521static int __init dmar_x2apic_optout(void)
 522{
 523	struct acpi_table_dmar *dmar;
 524	dmar = (struct acpi_table_dmar *)dmar_tbl;
 525	if (!dmar || no_x2apic_optout)
 526		return 0;
 527	return dmar->flags & DMAR_X2APIC_OPT_OUT;
 528}
 529
 530static int __init intel_irq_remapping_supported(void)
 531{
 532	struct dmar_drhd_unit *drhd;
 533	struct intel_iommu *iommu;
 534
 535	if (disable_irq_remap)
 536		return 0;
 537	if (irq_remap_broken) {
 538		printk(KERN_WARNING
 539			"This system BIOS has enabled interrupt remapping\n"
 540			"on a chipset that contains an erratum making that\n"
 541			"feature unstable.  To maintain system stability\n"
 542			"interrupt remapping is being disabled.  Please\n"
 543			"contact your BIOS vendor for an update\n");
 544		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 545		disable_irq_remap = 1;
 546		return 0;
 547	}
 548
 549	if (!dmar_ir_support())
 550		return 0;
 551
 552	for_each_iommu(iommu, drhd)
 553		if (!ecap_ir_support(iommu->ecap))
 554			return 0;
 555
 556	return 1;
 557}
 558
 559static int __init intel_enable_irq_remapping(void)
 560{
 561	struct dmar_drhd_unit *drhd;
 562	struct intel_iommu *iommu;
 563	bool x2apic_present;
 564	int setup = 0;
 565	int eim = 0;
 566
 567	x2apic_present = x2apic_supported();
 568
 569	if (parse_ioapics_under_ir() != 1) {
 570		printk(KERN_INFO "Not enable interrupt remapping\n");
 571		goto error;
 572	}
 573
 574	if (x2apic_present) {
 575		pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
 576
 577		eim = !dmar_x2apic_optout();
 578		if (!eim)
 579			printk(KERN_WARNING
 580				"Your BIOS is broken and requested that x2apic be disabled.\n"
 581				"This will slightly decrease performance.\n"
 582				"Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
 583	}
 584
 585	for_each_iommu(iommu, drhd) {
 586		/*
 587		 * If the queued invalidation is already initialized,
 588		 * shouldn't disable it.
 589		 */
 590		if (iommu->qi)
 591			continue;
 592
 593		/*
 594		 * Clear previous faults.
 595		 */
 596		dmar_fault(-1, iommu);
 597
 598		/*
 599		 * Disable intr remapping and queued invalidation, if already
 600		 * enabled prior to OS handover.
 601		 */
 602		iommu_disable_irq_remapping(iommu);
 603
 604		dmar_disable_qi(iommu);
 605	}
 606
 607	/*
 608	 * check for the Interrupt-remapping support
 609	 */
 610	for_each_iommu(iommu, drhd) {
 611		if (!ecap_ir_support(iommu->ecap))
 612			continue;
 613
 614		if (eim && !ecap_eim_support(iommu->ecap)) {
 615			printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
 616			       " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
 617			goto error;
 618		}
 619	}
 620
 621	/*
 622	 * Enable queued invalidation for all the DRHD's.
 623	 */
 624	for_each_iommu(iommu, drhd) {
 625		int ret = dmar_enable_qi(iommu);
 626
 627		if (ret) {
 628			printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
 629			       " invalidation, ecap %Lx, ret %d\n",
 630			       drhd->reg_base_addr, iommu->ecap, ret);
 631			goto error;
 632		}
 633	}
 634
 635	/*
 636	 * Setup Interrupt-remapping for all the DRHD's now.
 637	 */
 638	for_each_iommu(iommu, drhd) {
 639		if (!ecap_ir_support(iommu->ecap))
 640			continue;
 641
 642		if (intel_setup_irq_remapping(iommu, eim))
 643			goto error;
 644
 645		setup = 1;
 646	}
 647
 648	if (!setup)
 649		goto error;
 650
 651	irq_remapping_enabled = 1;
 652
 653	/*
 654	 * VT-d has a different layout for IO-APIC entries when
 655	 * interrupt remapping is enabled. So it needs a special routine
 656	 * to print IO-APIC entries for debugging purposes too.
 657	 */
 658	x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries;
 659
 660	pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic");
 661
 662	return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
 663
 664error:
 665	/*
 666	 * handle error condition gracefully here!
 667	 */
 668
 669	if (x2apic_present)
 670		pr_warn("Failed to enable irq remapping.  You are vulnerable to irq-injection attacks.\n");
 671
 672	return -1;
 673}
 674
 675static void ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope,
 676				      struct intel_iommu *iommu)
 677{
 678	struct acpi_dmar_pci_path *path;
 679	u8 bus;
 680	int count;
 681
 682	bus = scope->bus;
 683	path = (struct acpi_dmar_pci_path *)(scope + 1);
 684	count = (scope->length - sizeof(struct acpi_dmar_device_scope))
 685		/ sizeof(struct acpi_dmar_pci_path);
 686
 687	while (--count > 0) {
 688		/*
 689		 * Access PCI directly due to the PCI
 690		 * subsystem isn't initialized yet.
 691		 */
 692		bus = read_pci_config_byte(bus, path->device, path->function,
 693					   PCI_SECONDARY_BUS);
 694		path++;
 695	}
 696	ir_hpet[ir_hpet_num].bus   = bus;
 697	ir_hpet[ir_hpet_num].devfn = PCI_DEVFN(path->device, path->function);
 698	ir_hpet[ir_hpet_num].iommu = iommu;
 699	ir_hpet[ir_hpet_num].id    = scope->enumeration_id;
 700	ir_hpet_num++;
 701}
 702
 703static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
 704				      struct intel_iommu *iommu)
 705{
 706	struct acpi_dmar_pci_path *path;
 707	u8 bus;
 708	int count;
 709
 710	bus = scope->bus;
 711	path = (struct acpi_dmar_pci_path *)(scope + 1);
 712	count = (scope->length - sizeof(struct acpi_dmar_device_scope))
 713		/ sizeof(struct acpi_dmar_pci_path);
 714
 715	while (--count > 0) {
 716		/*
 717		 * Access PCI directly due to the PCI
 718		 * subsystem isn't initialized yet.
 719		 */
 720		bus = read_pci_config_byte(bus, path->device, path->function,
 721					   PCI_SECONDARY_BUS);
 722		path++;
 723	}
 724
 725	ir_ioapic[ir_ioapic_num].bus   = bus;
 726	ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->device, path->function);
 727	ir_ioapic[ir_ioapic_num].iommu = iommu;
 728	ir_ioapic[ir_ioapic_num].id    = scope->enumeration_id;
 729	ir_ioapic_num++;
 730}
 731
 732static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header,
 733				      struct intel_iommu *iommu)
 734{
 735	struct acpi_dmar_hardware_unit *drhd;
 736	struct acpi_dmar_device_scope *scope;
 737	void *start, *end;
 738
 739	drhd = (struct acpi_dmar_hardware_unit *)header;
 740
 741	start = (void *)(drhd + 1);
 742	end = ((void *)drhd) + header->length;
 743
 744	while (start < end) {
 745		scope = start;
 746		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
 747			if (ir_ioapic_num == MAX_IO_APICS) {
 748				printk(KERN_WARNING "Exceeded Max IO APICS\n");
 749				return -1;
 750			}
 751
 752			printk(KERN_INFO "IOAPIC id %d under DRHD base "
 753			       " 0x%Lx IOMMU %d\n", scope->enumeration_id,
 754			       drhd->address, iommu->seq_id);
 755
 756			ir_parse_one_ioapic_scope(scope, iommu);
 757		} else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET) {
 758			if (ir_hpet_num == MAX_HPET_TBS) {
 759				printk(KERN_WARNING "Exceeded Max HPET blocks\n");
 760				return -1;
 761			}
 762
 763			printk(KERN_INFO "HPET id %d under DRHD base"
 764			       " 0x%Lx\n", scope->enumeration_id,
 765			       drhd->address);
 766
 767			ir_parse_one_hpet_scope(scope, iommu);
 768		}
 769		start += scope->length;
 770	}
 771
 772	return 0;
 773}
 774
 775/*
 776 * Finds the assocaition between IOAPIC's and its Interrupt-remapping
 777 * hardware unit.
 778 */
 779static int __init parse_ioapics_under_ir(void)
 780{
 781	struct dmar_drhd_unit *drhd;
 782	struct intel_iommu *iommu;
 783	int ir_supported = 0;
 784	int ioapic_idx;
 785
 786	for_each_iommu(iommu, drhd)
 787		if (ecap_ir_support(iommu->ecap)) {
 788			if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu))
 789				return -1;
 790
 791			ir_supported = 1;
 792		}
 793
 794	if (!ir_supported)
 795		return 0;
 796
 797	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
 798		int ioapic_id = mpc_ioapic_id(ioapic_idx);
 799		if (!map_ioapic_to_ir(ioapic_id)) {
 800			pr_err(FW_BUG "ioapic %d has no mapping iommu, "
 801			       "interrupt remapping will be disabled\n",
 802			       ioapic_id);
 803			return -1;
 804		}
 805	}
 806
 807	return 1;
 808}
 809
 810static int __init ir_dev_scope_init(void)
 811{
 812	int ret;
 813
 814	if (!irq_remapping_enabled)
 815		return 0;
 816
 817	down_write(&dmar_global_lock);
 818	ret = dmar_dev_scope_init();
 819	up_write(&dmar_global_lock);
 820
 821	return ret;
 822}
 823rootfs_initcall(ir_dev_scope_init);
 824
 825static void disable_irq_remapping(void)
 826{
 827	struct dmar_drhd_unit *drhd;
 828	struct intel_iommu *iommu = NULL;
 829
 830	/*
 831	 * Disable Interrupt-remapping for all the DRHD's now.
 832	 */
 833	for_each_iommu(iommu, drhd) {
 834		if (!ecap_ir_support(iommu->ecap))
 835			continue;
 836
 837		iommu_disable_irq_remapping(iommu);
 838	}
 839}
 840
 841static int reenable_irq_remapping(int eim)
 842{
 843	struct dmar_drhd_unit *drhd;
 844	int setup = 0;
 845	struct intel_iommu *iommu = NULL;
 846
 847	for_each_iommu(iommu, drhd)
 848		if (iommu->qi)
 849			dmar_reenable_qi(iommu);
 850
 851	/*
 852	 * Setup Interrupt-remapping for all the DRHD's now.
 853	 */
 854	for_each_iommu(iommu, drhd) {
 855		if (!ecap_ir_support(iommu->ecap))
 856			continue;
 857
 858		/* Set up interrupt remapping for iommu.*/
 859		iommu_set_irq_remapping(iommu, eim);
 860		setup = 1;
 861	}
 862
 863	if (!setup)
 864		goto error;
 865
 866	return 0;
 867
 868error:
 869	/*
 870	 * handle error condition gracefully here!
 871	 */
 872	return -1;
 873}
 874
 875static void prepare_irte(struct irte *irte, int vector,
 876			 unsigned int dest)
 877{
 878	memset(irte, 0, sizeof(*irte));
 879
 880	irte->present = 1;
 881	irte->dst_mode = apic->irq_dest_mode;
 882	/*
 883	 * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
 884	 * actual level or edge trigger will be setup in the IO-APIC
 885	 * RTE. This will help simplify level triggered irq migration.
 886	 * For more details, see the comments (in io_apic.c) explainig IO-APIC
 887	 * irq migration in the presence of interrupt-remapping.
 888	*/
 889	irte->trigger_mode = 0;
 890	irte->dlvry_mode = apic->irq_delivery_mode;
 891	irte->vector = vector;
 892	irte->dest_id = IRTE_DEST(dest);
 893	irte->redir_hint = 1;
 894}
 895
 896static int intel_setup_ioapic_entry(int irq,
 897				    struct IO_APIC_route_entry *route_entry,
 898				    unsigned int destination, int vector,
 899				    struct io_apic_irq_attr *attr)
 900{
 901	int ioapic_id = mpc_ioapic_id(attr->ioapic);
 902	struct intel_iommu *iommu;
 903	struct IR_IO_APIC_route_entry *entry;
 904	struct irte irte;
 905	int index;
 906
 907	down_read(&dmar_global_lock);
 908	iommu = map_ioapic_to_ir(ioapic_id);
 909	if (!iommu) {
 910		pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
 911		index = -ENODEV;
 912	} else {
 913		index = alloc_irte(iommu, irq, 1);
 914		if (index < 0) {
 915			pr_warn("Failed to allocate IRTE for ioapic %d\n",
 916				ioapic_id);
 917			index = -ENOMEM;
 918		}
 919	}
 920	up_read(&dmar_global_lock);
 921	if (index < 0)
 922		return index;
 923
 924	prepare_irte(&irte, vector, destination);
 925
 926	/* Set source-id of interrupt request */
 927	set_ioapic_sid(&irte, ioapic_id);
 928
 929	modify_irte(irq, &irte);
 930
 931	apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
 932		"Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
 933		"Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
 934		"Avail:%X Vector:%02X Dest:%08X "
 935		"SID:%04X SQ:%X SVT:%X)\n",
 936		attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
 937		irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
 938		irte.avail, irte.vector, irte.dest_id,
 939		irte.sid, irte.sq, irte.svt);
 940
 941	entry = (struct IR_IO_APIC_route_entry *)route_entry;
 942	memset(entry, 0, sizeof(*entry));
 943
 944	entry->index2	= (index >> 15) & 0x1;
 945	entry->zero	= 0;
 946	entry->format	= 1;
 947	entry->index	= (index & 0x7fff);
 948	/*
 949	 * IO-APIC RTE will be configured with virtual vector.
 950	 * irq handler will do the explicit EOI to the io-apic.
 951	 */
 952	entry->vector	= attr->ioapic_pin;
 953	entry->mask	= 0;			/* enable IRQ */
 954	entry->trigger	= attr->trigger;
 955	entry->polarity	= attr->polarity;
 956
 957	/* Mask level triggered irqs.
 958	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
 959	 */
 960	if (attr->trigger)
 961		entry->mask = 1;
 962
 963	return 0;
 964}
 965
 966/*
 967 * Migrate the IO-APIC irq in the presence of intr-remapping.
 968 *
 969 * For both level and edge triggered, irq migration is a simple atomic
 970 * update(of vector and cpu destination) of IRTE and flush the hardware cache.
 971 *
 972 * For level triggered, we eliminate the io-apic RTE modification (with the
 973 * updated vector information), by using a virtual vector (io-apic pin number).
 974 * Real vector that is used for interrupting cpu will be coming from
 975 * the interrupt-remapping table entry.
 976 *
 977 * As the migration is a simple atomic update of IRTE, the same mechanism
 978 * is used to migrate MSI irq's in the presence of interrupt-remapping.
 979 */
 980static int
 981intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 982			  bool force)
 983{
 984	struct irq_cfg *cfg = data->chip_data;
 985	unsigned int dest, irq = data->irq;
 986	struct irte irte;
 987	int err;
 988
 989	if (!config_enabled(CONFIG_SMP))
 990		return -EINVAL;
 991
 992	if (!cpumask_intersects(mask, cpu_online_mask))
 993		return -EINVAL;
 994
 995	if (get_irte(irq, &irte))
 996		return -EBUSY;
 997
 998	err = assign_irq_vector(irq, cfg, mask);
 999	if (err)
1000		return err;
1001
1002	err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest);
1003	if (err) {
1004		if (assign_irq_vector(irq, cfg, data->affinity))
1005			pr_err("Failed to recover vector for irq %d\n", irq);
1006		return err;
1007	}
1008
1009	irte.vector = cfg->vector;
1010	irte.dest_id = IRTE_DEST(dest);
1011
1012	/*
1013	 * Atomically updates the IRTE with the new destination, vector
1014	 * and flushes the interrupt entry cache.
1015	 */
1016	modify_irte(irq, &irte);
1017
1018	/*
1019	 * After this point, all the interrupts will start arriving
1020	 * at the new destination. So, time to cleanup the previous
1021	 * vector allocation.
1022	 */
1023	if (cfg->move_in_progress)
1024		send_cleanup_vector(cfg);
1025
1026	cpumask_copy(data->affinity, mask);
1027	return 0;
1028}
1029
1030static void intel_compose_msi_msg(struct pci_dev *pdev,
1031				  unsigned int irq, unsigned int dest,
1032				  struct msi_msg *msg, u8 hpet_id)
1033{
1034	struct irq_cfg *cfg;
1035	struct irte irte;
1036	u16 sub_handle = 0;
1037	int ir_index;
1038
1039	cfg = irq_get_chip_data(irq);
1040
1041	ir_index = map_irq_to_irte_handle(irq, &sub_handle);
1042	BUG_ON(ir_index == -1);
1043
1044	prepare_irte(&irte, cfg->vector, dest);
1045
1046	/* Set source-id of interrupt request */
1047	if (pdev)
1048		set_msi_sid(&irte, pdev);
1049	else
1050		set_hpet_sid(&irte, hpet_id);
1051
1052	modify_irte(irq, &irte);
1053
1054	msg->address_hi = MSI_ADDR_BASE_HI;
1055	msg->data = sub_handle;
1056	msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
1057			  MSI_ADDR_IR_SHV |
1058			  MSI_ADDR_IR_INDEX1(ir_index) |
1059			  MSI_ADDR_IR_INDEX2(ir_index);
1060}
1061
1062/*
1063 * Map the PCI dev to the corresponding remapping hardware unit
1064 * and allocate 'nvec' consecutive interrupt-remapping table entries
1065 * in it.
1066 */
1067static int intel_msi_alloc_irq(struct pci_dev *dev, int irq, int nvec)
1068{
1069	struct intel_iommu *iommu;
1070	int index;
1071
1072	down_read(&dmar_global_lock);
1073	iommu = map_dev_to_ir(dev);
1074	if (!iommu) {
1075		printk(KERN_ERR
1076		       "Unable to map PCI %s to iommu\n", pci_name(dev));
1077		index = -ENOENT;
1078	} else {
1079		index = alloc_irte(iommu, irq, nvec);
1080		if (index < 0) {
1081			printk(KERN_ERR
1082			       "Unable to allocate %d IRTE for PCI %s\n",
1083			       nvec, pci_name(dev));
1084			index = -ENOSPC;
1085		}
1086	}
1087	up_read(&dmar_global_lock);
1088
1089	return index;
1090}
1091
1092static int intel_msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
1093			       int index, int sub_handle)
1094{
1095	struct intel_iommu *iommu;
1096	int ret = -ENOENT;
1097
1098	down_read(&dmar_global_lock);
1099	iommu = map_dev_to_ir(pdev);
1100	if (iommu) {
1101		/*
1102		 * setup the mapping between the irq and the IRTE
1103		 * base index, the sub_handle pointing to the
1104		 * appropriate interrupt remap table entry.
1105		 */
1106		set_irte_irq(irq, iommu, index, sub_handle);
1107		ret = 0;
1108	}
1109	up_read(&dmar_global_lock);
1110
1111	return ret;
1112}
1113
1114static int intel_setup_hpet_msi(unsigned int irq, unsigned int id)
1115{
1116	int ret = -1;
1117	struct intel_iommu *iommu;
1118	int index;
1119
1120	down_read(&dmar_global_lock);
1121	iommu = map_hpet_to_ir(id);
1122	if (iommu) {
1123		index = alloc_irte(iommu, irq, 1);
1124		if (index >= 0)
1125			ret = 0;
1126	}
1127	up_read(&dmar_global_lock);
1128
1129	return ret;
1130}
1131
1132struct irq_remap_ops intel_irq_remap_ops = {
1133	.supported		= intel_irq_remapping_supported,
1134	.prepare		= dmar_table_init,
1135	.enable			= intel_enable_irq_remapping,
1136	.disable		= disable_irq_remapping,
1137	.reenable		= reenable_irq_remapping,
1138	.enable_faulting	= enable_drhd_fault_handling,
1139	.setup_ioapic_entry	= intel_setup_ioapic_entry,
1140	.set_affinity		= intel_ioapic_set_affinity,
1141	.free_irq		= free_irte,
1142	.compose_msi_msg	= intel_compose_msi_msg,
1143	.msi_alloc_irq		= intel_msi_alloc_irq,
1144	.msi_setup_irq		= intel_msi_setup_irq,
1145	.setup_hpet_msi		= intel_setup_hpet_msi,
1146};