Linux Audio

Check our new training course

Loading...
v3.1
 
   1/*
   2 * Copyright (c) 2006, Intel Corporation.
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms and conditions of the GNU General Public License,
   6 * version 2, as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15 * Place - Suite 330, Boston, MA 02111-1307 USA.
  16 *
  17 * Copyright (C) 2006-2008 Intel Corporation
  18 * Author: Ashok Raj <ashok.raj@intel.com>
  19 * Author: Shaohua Li <shaohua.li@intel.com>
  20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  21 * Author: Fenghua Yu <fenghua.yu@intel.com>
  22 */
  23
 
 
 
  24#include <linux/init.h>
  25#include <linux/bitmap.h>
  26#include <linux/debugfs.h>
 
  27#include <linux/slab.h>
  28#include <linux/irq.h>
  29#include <linux/interrupt.h>
  30#include <linux/spinlock.h>
  31#include <linux/pci.h>
  32#include <linux/dmar.h>
  33#include <linux/dma-mapping.h>
  34#include <linux/mempool.h>
 
 
  35#include <linux/timer.h>
 
  36#include <linux/iova.h>
  37#include <linux/iommu.h>
  38#include <linux/intel-iommu.h>
  39#include <linux/syscore_ops.h>
  40#include <linux/tboot.h>
  41#include <linux/dmi.h>
  42#include <linux/pci-ats.h>
 
 
 
 
 
 
 
  43#include <asm/cacheflush.h>
  44#include <asm/iommu.h>
 
 
 
 
  45
  46#define ROOT_SIZE		VTD_PAGE_SIZE
  47#define CONTEXT_SIZE		VTD_PAGE_SIZE
  48
  49#define IS_BRIDGE_HOST_DEVICE(pdev) \
  50			    ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
  51#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
 
  52#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
  53#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
  54
  55#define IOAPIC_RANGE_START	(0xfee00000)
  56#define IOAPIC_RANGE_END	(0xfeefffff)
  57#define IOVA_START_ADDR		(0x1000)
  58
  59#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
  60
  61#define MAX_AGAW_WIDTH 64
 
  62
  63#define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
  64#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
  65
  66/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
  67   to match. That way, we can use 'unsigned long' for PFNs with impunity. */
  68#define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
  69				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
  70#define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
  71
 
 
 
  72#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
  73#define DMA_32BIT_PFN		IOVA_PFN(DMA_BIT_MASK(32))
  74#define DMA_64BIT_PFN		IOVA_PFN(DMA_BIT_MASK(64))
  75
  76/* page table handling */
  77#define LEVEL_STRIDE		(9)
  78#define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
  79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  80static inline int agaw_to_level(int agaw)
  81{
  82	return agaw + 2;
  83}
  84
  85static inline int agaw_to_width(int agaw)
  86{
  87	return 30 + agaw * LEVEL_STRIDE;
  88}
  89
  90static inline int width_to_agaw(int width)
  91{
  92	return (width - 30) / LEVEL_STRIDE;
  93}
  94
  95static inline unsigned int level_to_offset_bits(int level)
  96{
  97	return (level - 1) * LEVEL_STRIDE;
  98}
  99
 100static inline int pfn_level_offset(unsigned long pfn, int level)
 101{
 102	return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
 103}
 104
 105static inline unsigned long level_mask(int level)
 106{
 107	return -1UL << level_to_offset_bits(level);
 108}
 109
 110static inline unsigned long level_size(int level)
 111{
 112	return 1UL << level_to_offset_bits(level);
 113}
 114
 115static inline unsigned long align_to_level(unsigned long pfn, int level)
 116{
 117	return (pfn + level_size(level) - 1) & level_mask(level);
 118}
 119
 120static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
 121{
 122	return  1 << ((lvl - 1) * LEVEL_STRIDE);
 123}
 124
 125/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
 126   are never going to work. */
 127static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
 128{
 129	return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
 130}
 131
 132static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
 133{
 134	return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
 135}
 136static inline unsigned long page_to_dma_pfn(struct page *pg)
 137{
 138	return mm_to_dma_pfn(page_to_pfn(pg));
 139}
 140static inline unsigned long virt_to_dma_pfn(void *p)
 141{
 142	return page_to_dma_pfn(virt_to_page(p));
 143}
 144
 145/* global iommu list, set NULL for ignored DMAR units */
 146static struct intel_iommu **g_iommus;
 147
 148static void __init check_tylersburg_isoch(void);
 149static int rwbf_quirk;
 150
 151/*
 152 * set to 1 to panic kernel if can't successfully enable VT-d
 153 * (used when kernel is launched w/ TXT)
 154 */
 155static int force_on = 0;
 
 
 156
 157/*
 158 * 0: Present
 159 * 1-11: Reserved
 160 * 12-63: Context Ptr (12 - (haw-1))
 161 * 64-127: Reserved
 162 */
 163struct root_entry {
 164	u64	val;
 165	u64	rsvd1;
 166};
 167#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
 168static inline bool root_present(struct root_entry *root)
 
 
 
 
 
 169{
 170	return (root->val & 1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 171}
 172static inline void set_root_present(struct root_entry *root)
 
 173{
 174	root->val |= 1;
 175}
 176static inline void set_root_value(struct root_entry *root, unsigned long value)
 
 177{
 178	root->val |= value & VTD_PAGE_MASK;
 179}
 180
 181static inline struct context_entry *
 182get_context_addr_from_root(struct root_entry *root)
 183{
 184	return (struct context_entry *)
 185		(root_present(root)?phys_to_virt(
 186		root->val & VTD_PAGE_MASK) :
 187		NULL);
 188}
 189
 190/*
 191 * low 64 bits:
 192 * 0: present
 193 * 1: fault processing disable
 194 * 2-3: translation type
 195 * 12-63: address space root
 196 * high 64 bits:
 197 * 0-2: address width
 198 * 3-6: aval
 199 * 8-23: domain id
 200 */
 201struct context_entry {
 202	u64 lo;
 203	u64 hi;
 204};
 205
 206static inline bool context_present(struct context_entry *context)
 207{
 208	return (context->lo & 1);
 209}
 
 
 
 
 
 
 
 
 210static inline void context_set_present(struct context_entry *context)
 211{
 212	context->lo |= 1;
 213}
 214
 215static inline void context_set_fault_enable(struct context_entry *context)
 216{
 217	context->lo &= (((u64)-1) << 2) | 1;
 218}
 219
 220static inline void context_set_translation_type(struct context_entry *context,
 221						unsigned long value)
 222{
 223	context->lo &= (((u64)-1) << 4) | 3;
 224	context->lo |= (value & 3) << 2;
 225}
 226
 227static inline void context_set_address_root(struct context_entry *context,
 228					    unsigned long value)
 229{
 
 230	context->lo |= value & VTD_PAGE_MASK;
 231}
 232
 233static inline void context_set_address_width(struct context_entry *context,
 234					     unsigned long value)
 235{
 236	context->hi |= value & 7;
 237}
 238
 239static inline void context_set_domain_id(struct context_entry *context,
 240					 unsigned long value)
 241{
 242	context->hi |= (value & ((1 << 16) - 1)) << 8;
 243}
 244
 245static inline void context_clear_entry(struct context_entry *context)
 246{
 247	context->lo = 0;
 248	context->hi = 0;
 249}
 250
 251/*
 252 * 0: readable
 253 * 1: writable
 254 * 2-6: reserved
 255 * 7: super page
 256 * 8-10: available
 257 * 11: snoop behavior
 258 * 12-63: Host physcial address
 259 */
 260struct dma_pte {
 261	u64 val;
 262};
 263
 264static inline void dma_clear_pte(struct dma_pte *pte)
 265{
 266	pte->val = 0;
 267}
 268
 269static inline void dma_set_pte_readable(struct dma_pte *pte)
 270{
 271	pte->val |= DMA_PTE_READ;
 272}
 273
 274static inline void dma_set_pte_writable(struct dma_pte *pte)
 275{
 276	pte->val |= DMA_PTE_WRITE;
 277}
 278
 279static inline void dma_set_pte_snp(struct dma_pte *pte)
 280{
 281	pte->val |= DMA_PTE_SNP;
 282}
 283
 284static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
 285{
 286	pte->val = (pte->val & ~3) | (prot & 3);
 287}
 288
 289static inline u64 dma_pte_addr(struct dma_pte *pte)
 290{
 291#ifdef CONFIG_64BIT
 292	return pte->val & VTD_PAGE_MASK;
 293#else
 294	/* Must have a full atomic 64-bit read */
 295	return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
 296#endif
 297}
 298
 299static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
 300{
 301	pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
 302}
 303
 304static inline bool dma_pte_present(struct dma_pte *pte)
 305{
 306	return (pte->val & 3) != 0;
 307}
 308
 309static inline bool dma_pte_superpage(struct dma_pte *pte)
 310{
 311	return (pte->val & (1 << 7));
 312}
 313
 314static inline int first_pte_in_page(struct dma_pte *pte)
 315{
 316	return !((unsigned long)pte & ~VTD_PAGE_MASK);
 
 317}
 318
 319/*
 320 * This domain is a statically identity mapping domain.
 321 *	1. This domain creats a static 1:1 mapping to all usable memory.
 322 * 	2. It maps to each iommu if successful.
 323 *	3. Each iommu mapps to this domain if successful.
 324 */
 325static struct dmar_domain *si_domain;
 326static int hw_pass_through = 1;
 327
 328/* devices under the same p2p bridge are owned in one domain */
 329#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
 330
 331/* domain represents a virtual machine, more than one devices
 332 * across iommus may be owned in one domain, e.g. kvm guest.
 
 
 
 333 */
 334#define DOMAIN_FLAG_VIRTUAL_MACHINE	(1 << 1)
 335
 336/* si_domain contains mulitple devices */
 337#define DOMAIN_FLAG_STATIC_IDENTITY	(1 << 2)
 338
 339struct dmar_domain {
 340	int	id;			/* domain id */
 341	int	nid;			/* node id */
 342	unsigned long iommu_bmp;	/* bitmap of iommus this domain uses*/
 343
 344	struct list_head devices; 	/* all devices' list */
 345	struct iova_domain iovad;	/* iova's that belong to this domain */
 346
 347	struct dma_pte	*pgd;		/* virtual address */
 348	int		gaw;		/* max guest address width */
 349
 350	/* adjusted guest address width, 0 is level 2 30-bit */
 351	int		agaw;
 352
 353	int		flags;		/* flags to find out type of domain */
 354
 355	int		iommu_coherency;/* indicate coherency of iommu access */
 356	int		iommu_snooping; /* indicate snooping control feature*/
 357	int		iommu_count;	/* reference count of iommu */
 358	int		iommu_superpage;/* Level of superpages supported:
 359					   0 == 4KiB (no superpages), 1 == 2MiB,
 360					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
 361	spinlock_t	iommu_lock;	/* protect iommu set in domain */
 362	u64		max_addr;	/* maximum mapped address */
 363};
 364
 365/* PCI domain-device relationship */
 366struct device_domain_info {
 367	struct list_head link;	/* link to domain siblings */
 368	struct list_head global; /* link to global list */
 369	int segment;		/* PCI domain */
 370	u8 bus;			/* PCI bus number */
 371	u8 devfn;		/* PCI devfn number */
 372	struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
 373	struct intel_iommu *iommu; /* IOMMU used by this device */
 374	struct dmar_domain *domain; /* pointer to domain */
 375};
 376
 377static void flush_unmaps_timeout(unsigned long data);
 
 378
 379DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
 380
 381#define HIGH_WATER_MARK 250
 382struct deferred_flush_tables {
 383	int next;
 384	struct iova *iova[HIGH_WATER_MARK];
 385	struct dmar_domain *domain[HIGH_WATER_MARK];
 386};
 387
 388static struct deferred_flush_tables *deferred_flush;
 389
 390/* bitmap for indexing intel_iommus */
 391static int g_num_of_iommus;
 392
 393static DEFINE_SPINLOCK(async_umap_flush_lock);
 394static LIST_HEAD(unmaps_to_do);
 395
 396static int timer_on;
 397static long list_size;
 398
 399static void domain_remove_dev_info(struct dmar_domain *domain);
 
 
 
 
 
 
 
 
 
 
 
 400
 401#ifdef CONFIG_DMAR_DEFAULT_ON
 402int dmar_disabled = 0;
 403#else
 404int dmar_disabled = 1;
 405#endif /*CONFIG_DMAR_DEFAULT_ON*/
 
 
 
 
 406
 407static int dmar_map_gfx = 1;
 408static int dmar_forcedac;
 409static int intel_iommu_strict;
 410static int intel_iommu_superpage = 1;
 
 
 
 
 
 
 411
 412int intel_iommu_gfx_mapped;
 413EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
 414
 415#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 
 416static DEFINE_SPINLOCK(device_domain_lock);
 417static LIST_HEAD(device_domain_list);
 418
 419static struct iommu_ops intel_iommu_ops;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 420
 421static int __init intel_iommu_setup(char *str)
 422{
 423	if (!str)
 424		return -EINVAL;
 425	while (*str) {
 426		if (!strncmp(str, "on", 2)) {
 427			dmar_disabled = 0;
 428			printk(KERN_INFO "Intel-IOMMU: enabled\n");
 429		} else if (!strncmp(str, "off", 3)) {
 430			dmar_disabled = 1;
 431			printk(KERN_INFO "Intel-IOMMU: disabled\n");
 
 432		} else if (!strncmp(str, "igfx_off", 8)) {
 433			dmar_map_gfx = 0;
 434			printk(KERN_INFO
 435				"Intel-IOMMU: disable GFX device mapping\n");
 436		} else if (!strncmp(str, "forcedac", 8)) {
 437			printk(KERN_INFO
 438				"Intel-IOMMU: Forcing DAC for PCI devices\n");
 439			dmar_forcedac = 1;
 440		} else if (!strncmp(str, "strict", 6)) {
 441			printk(KERN_INFO
 442				"Intel-IOMMU: disable batched IOTLB flush\n");
 443			intel_iommu_strict = 1;
 444		} else if (!strncmp(str, "sp_off", 6)) {
 445			printk(KERN_INFO
 446				"Intel-IOMMU: disable supported super page\n");
 447			intel_iommu_superpage = 0;
 
 
 
 
 
 
 
 
 
 
 448		}
 449
 450		str += strcspn(str, ",");
 451		while (*str == ',')
 452			str++;
 453	}
 454	return 0;
 455}
 456__setup("intel_iommu=", intel_iommu_setup);
 457
 458static struct kmem_cache *iommu_domain_cache;
 459static struct kmem_cache *iommu_devinfo_cache;
 460static struct kmem_cache *iommu_iova_cache;
 461
 462static inline void *alloc_pgtable_page(int node)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 463{
 464	struct page *page;
 465	void *vaddr = NULL;
 466
 467	page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
 468	if (page)
 469		vaddr = page_address(page);
 470	return vaddr;
 471}
 472
 473static inline void free_pgtable_page(void *vaddr)
 474{
 475	free_page((unsigned long)vaddr);
 476}
 477
 478static inline void *alloc_domain_mem(void)
 479{
 480	return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
 481}
 482
 483static void free_domain_mem(void *vaddr)
 484{
 485	kmem_cache_free(iommu_domain_cache, vaddr);
 486}
 487
 488static inline void * alloc_devinfo_mem(void)
 489{
 490	return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
 491}
 492
 493static inline void free_devinfo_mem(void *vaddr)
 494{
 495	kmem_cache_free(iommu_devinfo_cache, vaddr);
 496}
 497
 498struct iova *alloc_iova_mem(void)
 499{
 500	return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
 501}
 502
 503void free_iova_mem(struct iova *iova)
 
 504{
 505	kmem_cache_free(iommu_iova_cache, iova);
 506}
 507
 
 
 508
 509static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
 510{
 511	unsigned long sagaw;
 512	int agaw = -1;
 513
 514	sagaw = cap_sagaw(iommu->cap);
 515	for (agaw = width_to_agaw(max_gaw);
 516	     agaw >= 0; agaw--) {
 517		if (test_bit(agaw, &sagaw))
 518			break;
 519	}
 520
 521	return agaw;
 522}
 523
 524/*
 525 * Calculate max SAGAW for each iommu.
 526 */
 527int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
 528{
 529	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
 530}
 531
 532/*
 533 * calculate agaw for each iommu.
 534 * "SAGAW" may be different across iommus, use a default agaw, and
 535 * get a supported less agaw for iommus that don't support the default agaw.
 536 */
 537int iommu_calculate_agaw(struct intel_iommu *iommu)
 538{
 539	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
 540}
 541
 542/* This functionin only returns single iommu in a domain */
 543static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 544{
 545	int iommu_id;
 546
 547	/* si_domain and vm domain should not get here. */
 548	BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
 549	BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
 
 
 
 550
 551	iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
 552	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
 553		return NULL;
 554
 555	return g_iommus[iommu_id];
 556}
 557
 558static void domain_update_iommu_coherency(struct dmar_domain *domain)
 559{
 
 
 
 560	int i;
 561
 562	domain->iommu_coherency = 1;
 563
 564	for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
 
 565		if (!ecap_coherent(g_iommus[i]->ecap)) {
 566			domain->iommu_coherency = 0;
 567			break;
 568		}
 569	}
 
 
 
 
 
 
 
 
 
 
 
 
 570}
 571
 572static void domain_update_iommu_snooping(struct dmar_domain *domain)
 573{
 574	int i;
 575
 576	domain->iommu_snooping = 1;
 577
 578	for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
 579		if (!ecap_sc_support(g_iommus[i]->ecap)) {
 580			domain->iommu_snooping = 0;
 581			break;
 
 
 
 582		}
 583	}
 
 
 
 584}
 585
 586static void domain_update_iommu_superpage(struct dmar_domain *domain)
 587{
 588	struct dmar_drhd_unit *drhd;
 589	struct intel_iommu *iommu = NULL;
 590	int mask = 0xf;
 591
 592	if (!intel_iommu_superpage) {
 593		domain->iommu_superpage = 0;
 594		return;
 595	}
 596
 597	/* set iommu_superpage to the smallest common denominator */
 
 598	for_each_active_iommu(iommu, drhd) {
 599		mask &= cap_super_page_val(iommu->cap);
 600		if (!mask) {
 601			break;
 
 602		}
 603	}
 604	domain->iommu_superpage = fls(mask);
 
 
 605}
 606
 607/* Some capabilities may be different across iommus */
 608static void domain_update_iommu_cap(struct dmar_domain *domain)
 609{
 610	domain_update_iommu_coherency(domain);
 611	domain_update_iommu_snooping(domain);
 612	domain_update_iommu_superpage(domain);
 613}
 614
 615static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
 
 616{
 617	struct dmar_drhd_unit *drhd = NULL;
 618	int i;
 619
 620	for_each_drhd_unit(drhd) {
 621		if (drhd->ignored)
 622			continue;
 623		if (segment != drhd->segment)
 624			continue;
 625
 626		for (i = 0; i < drhd->devices_cnt; i++) {
 627			if (drhd->devices[i] &&
 628			    drhd->devices[i]->bus->number == bus &&
 629			    drhd->devices[i]->devfn == devfn)
 630				return drhd->iommu;
 631			if (drhd->devices[i] &&
 632			    drhd->devices[i]->subordinate &&
 633			    drhd->devices[i]->subordinate->number <= bus &&
 634			    drhd->devices[i]->subordinate->subordinate >= bus)
 635				return drhd->iommu;
 636		}
 
 
 
 
 
 
 
 
 637
 638		if (drhd->include_all)
 639			return drhd->iommu;
 
 
 
 
 
 
 640	}
 
 
 641
 642	return NULL;
 
 
 643}
 644
 645static void domain_flush_cache(struct dmar_domain *domain,
 646			       void *addr, int size)
 
 
 
 
 
 
 
 
 647{
 648	if (!domain->iommu_coherency)
 649		clflush_cache_range(addr, size);
 
 
 
 
 
 
 
 
 
 
 
 
 650}
 651
 652/* Gets context entry for a given bus and devfn */
 653static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
 654		u8 bus, u8 devfn)
 655{
 656	struct root_entry *root;
 657	struct context_entry *context;
 658	unsigned long phy_addr;
 659	unsigned long flags;
 
 
 660
 661	spin_lock_irqsave(&iommu->lock, flags);
 662	root = &iommu->root_entry[bus];
 663	context = get_context_addr_from_root(root);
 664	if (!context) {
 665		context = (struct context_entry *)
 666				alloc_pgtable_page(iommu->node);
 667		if (!context) {
 668			spin_unlock_irqrestore(&iommu->lock, flags);
 
 
 
 669			return NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 670		}
 671		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
 672		phy_addr = virt_to_phys((void *)context);
 673		set_root_value(root, phy_addr);
 674		set_root_present(root);
 675		__iommu_flush_cache(iommu, root, sizeof(*root));
 676	}
 677	spin_unlock_irqrestore(&iommu->lock, flags);
 678	return &context[devfn];
 
 
 
 679}
 680
 681static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
 
 682{
 683	struct root_entry *root;
 684	struct context_entry *context;
 685	int ret;
 686	unsigned long flags;
 687
 688	spin_lock_irqsave(&iommu->lock, flags);
 689	root = &iommu->root_entry[bus];
 690	context = get_context_addr_from_root(root);
 691	if (!context) {
 692		ret = 0;
 693		goto out;
 694	}
 695	ret = context_present(&context[devfn]);
 696out:
 697	spin_unlock_irqrestore(&iommu->lock, flags);
 698	return ret;
 699}
 700
 701static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
 702{
 703	struct root_entry *root;
 704	struct context_entry *context;
 
 705	unsigned long flags;
 706
 707	spin_lock_irqsave(&iommu->lock, flags);
 708	root = &iommu->root_entry[bus];
 709	context = get_context_addr_from_root(root);
 710	if (context) {
 711		context_clear_entry(&context[devfn]);
 712		__iommu_flush_cache(iommu, &context[devfn], \
 713			sizeof(*context));
 714	}
 715	spin_unlock_irqrestore(&iommu->lock, flags);
 
 716}
 717
 718static void free_context_table(struct intel_iommu *iommu)
 719{
 720	struct root_entry *root;
 721	int i;
 722	unsigned long flags;
 723	struct context_entry *context;
 724
 725	spin_lock_irqsave(&iommu->lock, flags);
 726	if (!iommu->root_entry) {
 727		goto out;
 728	}
 729	for (i = 0; i < ROOT_ENTRY_NR; i++) {
 730		root = &iommu->root_entry[i];
 731		context = get_context_addr_from_root(root);
 732		if (context)
 733			free_pgtable_page(context);
 
 
 
 
 
 
 
 
 734	}
 735	free_pgtable_page(iommu->root_entry);
 736	iommu->root_entry = NULL;
 737out:
 738	spin_unlock_irqrestore(&iommu->lock, flags);
 739}
 740
 741static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 742				      unsigned long pfn, int target_level)
 743{
 744	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
 745	struct dma_pte *parent, *pte = NULL;
 746	int level = agaw_to_level(domain->agaw);
 747	int offset;
 748
 749	BUG_ON(!domain->pgd);
 750	BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
 
 
 
 
 751	parent = domain->pgd;
 752
 753	while (level > 0) {
 754		void *tmp_page;
 755
 756		offset = pfn_level_offset(pfn, level);
 757		pte = &parent[offset];
 758		if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
 759			break;
 760		if (level == target_level)
 761			break;
 762
 763		if (!dma_pte_present(pte)) {
 764			uint64_t pteval;
 765
 766			tmp_page = alloc_pgtable_page(domain->nid);
 767
 768			if (!tmp_page)
 769				return NULL;
 770
 771			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
 772			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
 773			if (cmpxchg64(&pte->val, 0ULL, pteval)) {
 774				/* Someone else set it while we were thinking; use theirs. */
 775				free_pgtable_page(tmp_page);
 776			} else {
 777				dma_pte_addr(pte);
 778				domain_flush_cache(domain, pte, sizeof(*pte));
 779			}
 780		}
 
 
 
 781		parent = phys_to_virt(dma_pte_addr(pte));
 782		level--;
 783	}
 784
 
 
 
 785	return pte;
 786}
 787
 788
 789/* return address's pte at specific level */
 790static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
 791					 unsigned long pfn,
 792					 int level, int *large_page)
 793{
 794	struct dma_pte *parent, *pte = NULL;
 795	int total = agaw_to_level(domain->agaw);
 796	int offset;
 797
 798	parent = domain->pgd;
 799	while (level <= total) {
 800		offset = pfn_level_offset(pfn, total);
 801		pte = &parent[offset];
 802		if (level == total)
 803			return pte;
 804
 805		if (!dma_pte_present(pte)) {
 806			*large_page = total;
 807			break;
 808		}
 809
 810		if (pte->val & DMA_PTE_LARGE_PAGE) {
 811			*large_page = total;
 812			return pte;
 813		}
 814
 815		parent = phys_to_virt(dma_pte_addr(pte));
 816		total--;
 817	}
 818	return NULL;
 819}
 820
 821/* clear last level pte, a tlb flush should be followed */
 822static int dma_pte_clear_range(struct dmar_domain *domain,
 823				unsigned long start_pfn,
 824				unsigned long last_pfn)
 825{
 826	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
 827	unsigned int large_page = 1;
 828	struct dma_pte *first_pte, *pte;
 829	int order;
 830
 831	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
 832	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
 833	BUG_ON(start_pfn > last_pfn);
 834
 835	/* we don't need lock here; nobody else touches the iova range */
 836	do {
 837		large_page = 1;
 838		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
 839		if (!pte) {
 840			start_pfn = align_to_level(start_pfn + 1, large_page + 1);
 841			continue;
 842		}
 843		do {
 844			dma_clear_pte(pte);
 845			start_pfn += lvl_to_nr_pages(large_page);
 846			pte++;
 847		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
 848
 849		domain_flush_cache(domain, first_pte,
 850				   (void *)pte - (void *)first_pte);
 851
 852	} while (start_pfn && start_pfn <= last_pfn);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 853
 854	order = (large_page - 1) * 9;
 855	return order;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 856}
 857
 858/* free page table pages. last level pte should already be cleared */
 
 
 
 859static void dma_pte_free_pagetable(struct dmar_domain *domain,
 860				   unsigned long start_pfn,
 861				   unsigned long last_pfn)
 
 862{
 863	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
 864	struct dma_pte *first_pte, *pte;
 865	int total = agaw_to_level(domain->agaw);
 866	int level;
 867	unsigned long tmp;
 868	int large_page = 2;
 869
 870	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
 871	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
 872	BUG_ON(start_pfn > last_pfn);
 873
 
 
 874	/* We don't need lock here; nobody else touches the iova range */
 875	level = 2;
 876	while (level <= total) {
 877		tmp = align_to_level(start_pfn, level);
 878
 879		/* If we can't even clear one PTE at this level, we're done */
 880		if (tmp + level_size(level) - 1 > last_pfn)
 881			return;
 
 
 
 882
 883		do {
 884			large_page = level;
 885			first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
 886			if (large_page > level)
 887				level = large_page + 1;
 888			if (!pte) {
 889				tmp = align_to_level(tmp + 1, level + 1);
 890				continue;
 891			}
 892			do {
 893				if (dma_pte_present(pte)) {
 894					free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
 895					dma_clear_pte(pte);
 896				}
 897				pte++;
 898				tmp += level_size(level);
 899			} while (!first_pte_in_page(pte) &&
 900				 tmp + level_size(level) - 1 <= last_pfn);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 901
 902			domain_flush_cache(domain, first_pte,
 903					   (void *)pte - (void *)first_pte);
 904			
 905		} while (tmp && tmp + level_size(level) - 1 <= last_pfn);
 906		level++;
 907	}
 908	/* free pgd */
 909	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
 910		free_pgtable_page(domain->pgd);
 
 
 
 911		domain->pgd = NULL;
 912	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 913}
 914
 915/* iommu handling */
 916static int iommu_alloc_root_entry(struct intel_iommu *iommu)
 917{
 918	struct root_entry *root;
 919	unsigned long flags;
 920
 921	root = (struct root_entry *)alloc_pgtable_page(iommu->node);
 922	if (!root)
 
 
 923		return -ENOMEM;
 
 924
 925	__iommu_flush_cache(iommu, root, ROOT_SIZE);
 926
 927	spin_lock_irqsave(&iommu->lock, flags);
 928	iommu->root_entry = root;
 929	spin_unlock_irqrestore(&iommu->lock, flags);
 930
 931	return 0;
 932}
 933
 934static void iommu_set_root_entry(struct intel_iommu *iommu)
 935{
 936	void *addr;
 937	u32 sts;
 938	unsigned long flag;
 939
 940	addr = iommu->root_entry;
 
 
 941
 942	spin_lock_irqsave(&iommu->register_lock, flag);
 943	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
 944
 945	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
 946
 947	/* Make sure hardware complete it */
 948	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 949		      readl, (sts & DMA_GSTS_RTPS), sts);
 950
 951	spin_unlock_irqrestore(&iommu->register_lock, flag);
 952}
 953
 954static void iommu_flush_write_buffer(struct intel_iommu *iommu)
 955{
 956	u32 val;
 957	unsigned long flag;
 958
 959	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
 960		return;
 961
 962	spin_lock_irqsave(&iommu->register_lock, flag);
 963	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
 964
 965	/* Make sure hardware complete it */
 966	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 967		      readl, (!(val & DMA_GSTS_WBFS)), val);
 968
 969	spin_unlock_irqrestore(&iommu->register_lock, flag);
 970}
 971
 972/* return value determine if we need a write buffer flush */
 973static void __iommu_flush_context(struct intel_iommu *iommu,
 974				  u16 did, u16 source_id, u8 function_mask,
 975				  u64 type)
 976{
 977	u64 val = 0;
 978	unsigned long flag;
 979
 980	switch (type) {
 981	case DMA_CCMD_GLOBAL_INVL:
 982		val = DMA_CCMD_GLOBAL_INVL;
 983		break;
 984	case DMA_CCMD_DOMAIN_INVL:
 985		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
 986		break;
 987	case DMA_CCMD_DEVICE_INVL:
 988		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
 989			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
 990		break;
 991	default:
 992		BUG();
 993	}
 994	val |= DMA_CCMD_ICC;
 995
 996	spin_lock_irqsave(&iommu->register_lock, flag);
 997	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
 998
 999	/* Make sure hardware complete it */
1000	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1001		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1002
1003	spin_unlock_irqrestore(&iommu->register_lock, flag);
1004}
1005
1006/* return value determine if we need a write buffer flush */
1007static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1008				u64 addr, unsigned int size_order, u64 type)
1009{
1010	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1011	u64 val = 0, val_iva = 0;
1012	unsigned long flag;
1013
1014	switch (type) {
1015	case DMA_TLB_GLOBAL_FLUSH:
1016		/* global flush doesn't need set IVA_REG */
1017		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1018		break;
1019	case DMA_TLB_DSI_FLUSH:
1020		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1021		break;
1022	case DMA_TLB_PSI_FLUSH:
1023		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1024		/* Note: always flush non-leaf currently */
1025		val_iva = size_order | addr;
1026		break;
1027	default:
1028		BUG();
1029	}
1030	/* Note: set drain read/write */
1031#if 0
1032	/*
1033	 * This is probably to be super secure.. Looks like we can
1034	 * ignore it without any impact.
1035	 */
1036	if (cap_read_drain(iommu->cap))
1037		val |= DMA_TLB_READ_DRAIN;
1038#endif
1039	if (cap_write_drain(iommu->cap))
1040		val |= DMA_TLB_WRITE_DRAIN;
1041
1042	spin_lock_irqsave(&iommu->register_lock, flag);
1043	/* Note: Only uses first TLB reg currently */
1044	if (val_iva)
1045		dmar_writeq(iommu->reg + tlb_offset, val_iva);
1046	dmar_writeq(iommu->reg + tlb_offset + 8, val);
1047
1048	/* Make sure hardware complete it */
1049	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1050		dmar_readq, (!(val & DMA_TLB_IVT)), val);
1051
1052	spin_unlock_irqrestore(&iommu->register_lock, flag);
1053
1054	/* check IOTLB invalidation granularity */
1055	if (DMA_TLB_IAIG(val) == 0)
1056		printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1057	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1058		pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1059			(unsigned long long)DMA_TLB_IIRG(type),
1060			(unsigned long long)DMA_TLB_IAIG(val));
1061}
1062
1063static struct device_domain_info *iommu_support_dev_iotlb(
1064	struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
 
1065{
1066	int found = 0;
1067	unsigned long flags;
1068	struct device_domain_info *info;
1069	struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1070
1071	if (!ecap_dev_iotlb_support(iommu->ecap))
1072		return NULL;
1073
1074	if (!iommu->qi)
1075		return NULL;
1076
1077	spin_lock_irqsave(&device_domain_lock, flags);
1078	list_for_each_entry(info, &domain->devices, link)
1079		if (info->bus == bus && info->devfn == devfn) {
1080			found = 1;
 
 
1081			break;
1082		}
1083	spin_unlock_irqrestore(&device_domain_lock, flags);
1084
1085	if (!found || !info->dev)
1086		return NULL;
1087
1088	if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1089		return NULL;
 
 
1090
1091	if (!dmar_find_matched_atsr_unit(info->dev))
1092		return NULL;
1093
1094	info->iommu = iommu;
 
 
 
 
 
 
 
 
 
 
 
1095
1096	return info;
1097}
1098
1099static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1100{
1101	if (!info)
 
 
 
 
1102		return;
1103
1104	pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1105}
1106
1107static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1108{
1109	if (!info->dev || !pci_ats_enabled(info->dev))
 
 
 
 
1110		return;
1111
1112	pci_disable_ats(info->dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1113}
1114
1115static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1116				  u64 addr, unsigned mask)
1117{
1118	u16 sid, qdep;
1119	unsigned long flags;
1120	struct device_domain_info *info;
1121
 
 
 
1122	spin_lock_irqsave(&device_domain_lock, flags);
1123	list_for_each_entry(info, &domain->devices, link) {
1124		if (!info->dev || !pci_ats_enabled(info->dev))
1125			continue;
1126
1127		sid = info->bus << 8 | info->devfn;
1128		qdep = pci_ats_queue_depth(info->dev);
1129		qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
 
1130	}
1131	spin_unlock_irqrestore(&device_domain_lock, flags);
1132}
1133
1134static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1135				  unsigned long pfn, unsigned int pages, int map)
 
 
1136{
1137	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1138	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
 
1139
1140	BUG_ON(pages == 0);
1141
 
 
1142	/*
1143	 * Fallback to domain selective flush if no PSI support or the size is
1144	 * too big.
1145	 * PSI requires page size to be 2 ^ x, and the base address is naturally
1146	 * aligned to the size
1147	 */
1148	if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1149		iommu->flush.flush_iotlb(iommu, did, 0, 0,
1150						DMA_TLB_DSI_FLUSH);
1151	else
1152		iommu->flush.flush_iotlb(iommu, did, addr, mask,
1153						DMA_TLB_PSI_FLUSH);
1154
1155	/*
1156	 * In caching mode, changes of pages from non-present to present require
1157	 * flush. However, device IOTLB doesn't need to be flushed in this case.
1158	 */
1159	if (!cap_caching_mode(iommu->cap) || !map)
1160		iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1161}
1162
1163static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1164{
1165	u32 pmen;
1166	unsigned long flags;
1167
1168	spin_lock_irqsave(&iommu->register_lock, flags);
 
 
 
1169	pmen = readl(iommu->reg + DMAR_PMEN_REG);
1170	pmen &= ~DMA_PMEN_EPM;
1171	writel(pmen, iommu->reg + DMAR_PMEN_REG);
1172
1173	/* wait for the protected region status bit to clear */
1174	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1175		readl, !(pmen & DMA_PMEN_PRS), pmen);
1176
1177	spin_unlock_irqrestore(&iommu->register_lock, flags);
1178}
1179
1180static int iommu_enable_translation(struct intel_iommu *iommu)
1181{
1182	u32 sts;
1183	unsigned long flags;
1184
1185	spin_lock_irqsave(&iommu->register_lock, flags);
1186	iommu->gcmd |= DMA_GCMD_TE;
1187	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1188
1189	/* Make sure hardware complete it */
1190	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1191		      readl, (sts & DMA_GSTS_TES), sts);
1192
1193	spin_unlock_irqrestore(&iommu->register_lock, flags);
1194	return 0;
1195}
1196
1197static int iommu_disable_translation(struct intel_iommu *iommu)
1198{
1199	u32 sts;
1200	unsigned long flag;
1201
1202	spin_lock_irqsave(&iommu->register_lock, flag);
1203	iommu->gcmd &= ~DMA_GCMD_TE;
1204	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1205
1206	/* Make sure hardware complete it */
1207	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1208		      readl, (!(sts & DMA_GSTS_TES)), sts);
1209
1210	spin_unlock_irqrestore(&iommu->register_lock, flag);
1211	return 0;
1212}
1213
1214
1215static int iommu_init_domains(struct intel_iommu *iommu)
1216{
1217	unsigned long ndomains;
1218	unsigned long nlongs;
1219
1220	ndomains = cap_ndoms(iommu->cap);
1221	pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1222			ndomains);
1223	nlongs = BITS_TO_LONGS(ndomains);
1224
1225	spin_lock_init(&iommu->lock);
1226
1227	/* TBD: there might be 64K domains,
1228	 * consider other allocation for future chip
1229	 */
1230	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1231	if (!iommu->domain_ids) {
1232		printk(KERN_ERR "Allocating domain id array failed\n");
 
1233		return -ENOMEM;
1234	}
1235	iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1236			GFP_KERNEL);
1237	if (!iommu->domains) {
1238		printk(KERN_ERR "Allocating domain array failed\n");
 
 
 
 
 
 
 
 
 
 
 
 
1239		return -ENOMEM;
1240	}
1241
1242	/*
1243	 * if Caching mode is set, then invalid translations are tagged
1244	 * with domainid 0. Hence we need to pre-allocate it.
 
 
1245	 */
1246	if (cap_caching_mode(iommu->cap))
1247		set_bit(0, iommu->domain_ids);
1248	return 0;
1249}
1250
 
 
 
 
 
 
 
 
 
1251
1252static void domain_exit(struct dmar_domain *domain);
1253static void vm_domain_exit(struct dmar_domain *domain);
1254
1255void free_dmar_iommu(struct intel_iommu *iommu)
1256{
1257	struct dmar_domain *domain;
1258	int i;
1259	unsigned long flags;
1260
1261	if ((iommu->domains) && (iommu->domain_ids)) {
1262		for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1263			domain = iommu->domains[i];
1264			clear_bit(i, iommu->domain_ids);
1265
1266			spin_lock_irqsave(&domain->iommu_lock, flags);
1267			if (--domain->iommu_count == 0) {
1268				if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1269					vm_domain_exit(domain);
1270				else
1271					domain_exit(domain);
1272			}
1273			spin_unlock_irqrestore(&domain->iommu_lock, flags);
1274		}
1275	}
1276
1277	if (iommu->gcmd & DMA_GCMD_TE)
1278		iommu_disable_translation(iommu);
 
 
1279
1280	if (iommu->irq) {
1281		irq_set_handler_data(iommu->irq, NULL);
1282		/* This will mask the irq */
1283		free_irq(iommu->irq, iommu);
1284		destroy_irq(iommu->irq);
1285	}
 
1286
1287	kfree(iommu->domains);
1288	kfree(iommu->domain_ids);
 
1289
1290	g_iommus[iommu->seq_id] = NULL;
 
 
 
 
1291
1292	/* if all iommus are freed, free g_iommus */
1293	for (i = 0; i < g_num_of_iommus; i++) {
1294		if (g_iommus[i])
1295			break;
 
 
1296	}
1297
1298	if (i == g_num_of_iommus)
1299		kfree(g_iommus);
1300
1301	/* free context mapping */
1302	free_context_table(iommu);
 
 
 
 
 
 
 
1303}
1304
1305static struct dmar_domain *alloc_domain(void)
1306{
1307	struct dmar_domain *domain;
1308
1309	domain = alloc_domain_mem();
1310	if (!domain)
1311		return NULL;
1312
1313	domain->nid = -1;
1314	memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1315	domain->flags = 0;
 
 
1316
1317	return domain;
1318}
1319
1320static int iommu_attach_domain(struct dmar_domain *domain,
 
1321			       struct intel_iommu *iommu)
1322{
1323	int num;
1324	unsigned long ndomains;
1325	unsigned long flags;
1326
1327	ndomains = cap_ndoms(iommu->cap);
 
1328
1329	spin_lock_irqsave(&iommu->lock, flags);
 
 
 
 
1330
1331	num = find_first_zero_bit(iommu->domain_ids, ndomains);
1332	if (num >= ndomains) {
1333		spin_unlock_irqrestore(&iommu->lock, flags);
1334		printk(KERN_ERR "IOMMU: no free domain ids\n");
1335		return -ENOMEM;
1336	}
1337
1338	domain->id = num;
1339	set_bit(num, iommu->domain_ids);
1340	set_bit(iommu->seq_id, &domain->iommu_bmp);
1341	iommu->domains[num] = domain;
1342	spin_unlock_irqrestore(&iommu->lock, flags);
 
 
 
1343
1344	return 0;
1345}
1346
1347static void iommu_detach_domain(struct dmar_domain *domain,
1348				struct intel_iommu *iommu)
1349{
1350	unsigned long flags;
1351	int num, ndomains;
1352	int found = 0;
1353
1354	spin_lock_irqsave(&iommu->lock, flags);
1355	ndomains = cap_ndoms(iommu->cap);
1356	for_each_set_bit(num, iommu->domain_ids, ndomains) {
1357		if (iommu->domains[num] == domain) {
1358			found = 1;
1359			break;
1360		}
1361	}
1362
1363	if (found) {
 
 
 
1364		clear_bit(num, iommu->domain_ids);
1365		clear_bit(iommu->seq_id, &domain->iommu_bmp);
1366		iommu->domains[num] = NULL;
 
 
1367	}
1368	spin_unlock_irqrestore(&iommu->lock, flags);
 
1369}
1370
1371static struct iova_domain reserved_iova_list;
1372static struct lock_class_key reserved_rbtree_key;
1373
1374static int dmar_init_reserved_ranges(void)
1375{
1376	struct pci_dev *pdev = NULL;
1377	struct iova *iova;
1378	int i;
1379
1380	init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1381
1382	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1383		&reserved_rbtree_key);
1384
1385	/* IOAPIC ranges shouldn't be accessed by DMA */
1386	iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1387		IOVA_PFN(IOAPIC_RANGE_END));
1388	if (!iova) {
1389		printk(KERN_ERR "Reserve IOAPIC range failed\n");
1390		return -ENODEV;
1391	}
1392
1393	/* Reserve all PCI MMIO to avoid peer-to-peer access */
1394	for_each_pci_dev(pdev) {
1395		struct resource *r;
1396
1397		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1398			r = &pdev->resource[i];
1399			if (!r->flags || !(r->flags & IORESOURCE_MEM))
1400				continue;
1401			iova = reserve_iova(&reserved_iova_list,
1402					    IOVA_PFN(r->start),
1403					    IOVA_PFN(r->end));
1404			if (!iova) {
1405				printk(KERN_ERR "Reserve iova failed\n");
1406				return -ENODEV;
1407			}
1408		}
1409	}
1410	return 0;
1411}
1412
1413static void domain_reserve_special_ranges(struct dmar_domain *domain)
1414{
1415	copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1416}
1417
1418static inline int guestwidth_to_adjustwidth(int gaw)
1419{
1420	int agaw;
1421	int r = (gaw - 12) % 9;
1422
1423	if (r == 0)
1424		agaw = gaw;
1425	else
1426		agaw = gaw + 9 - r;
1427	if (agaw > 64)
1428		agaw = 64;
1429	return agaw;
1430}
1431
1432static int domain_init(struct dmar_domain *domain, int guest_width)
 
1433{
1434	struct intel_iommu *iommu;
1435	int adjust_width, agaw;
1436	unsigned long sagaw;
 
 
 
1437
1438	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1439	spin_lock_init(&domain->iommu_lock);
 
 
1440
1441	domain_reserve_special_ranges(domain);
1442
1443	/* calculate AGAW */
1444	iommu = domain_get_iommu(domain);
1445	if (guest_width > cap_mgaw(iommu->cap))
1446		guest_width = cap_mgaw(iommu->cap);
1447	domain->gaw = guest_width;
1448	adjust_width = guestwidth_to_adjustwidth(guest_width);
1449	agaw = width_to_agaw(adjust_width);
1450	sagaw = cap_sagaw(iommu->cap);
1451	if (!test_bit(agaw, &sagaw)) {
1452		/* hardware doesn't support it, choose a bigger one */
1453		pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1454		agaw = find_next_bit(&sagaw, 5, agaw);
1455		if (agaw >= 5)
1456			return -ENODEV;
1457	}
1458	domain->agaw = agaw;
1459	INIT_LIST_HEAD(&domain->devices);
1460
1461	if (ecap_coherent(iommu->ecap))
1462		domain->iommu_coherency = 1;
1463	else
1464		domain->iommu_coherency = 0;
1465
1466	if (ecap_sc_support(iommu->ecap))
1467		domain->iommu_snooping = 1;
1468	else
1469		domain->iommu_snooping = 0;
1470
1471	domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1472	domain->iommu_count = 1;
 
 
 
1473	domain->nid = iommu->node;
1474
1475	/* always allocate the top pgd */
1476	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1477	if (!domain->pgd)
1478		return -ENOMEM;
1479	__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1480	return 0;
1481}
1482
1483static void domain_exit(struct dmar_domain *domain)
1484{
1485	struct dmar_drhd_unit *drhd;
1486	struct intel_iommu *iommu;
1487
1488	/* Domain 0 is reserved, so dont process it */
1489	if (!domain)
1490		return;
1491
1492	/* Flush any lazy unmaps that may reference this domain */
1493	if (!intel_iommu_strict)
1494		flush_unmaps_timeout(0);
1495
 
1496	domain_remove_dev_info(domain);
 
1497	/* destroy iovas */
1498	put_iova_domain(&domain->iovad);
1499
1500	/* clear ptes */
1501	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1502
1503	/* free page tables */
1504	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1505
1506	for_each_active_iommu(iommu, drhd)
1507		if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1508			iommu_detach_domain(domain, iommu);
1509
1510	free_domain_mem(domain);
1511}
1512
1513static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1514				 u8 bus, u8 devfn, int translation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1515{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1516	struct context_entry *context;
1517	unsigned long flags;
1518	struct intel_iommu *iommu;
1519	struct dma_pte *pgd;
1520	unsigned long num;
1521	unsigned long ndomains;
1522	int id;
1523	int agaw;
1524	struct device_domain_info *info = NULL;
1525
1526	pr_debug("Set context mapping for %02x:%02x.%d\n",
1527		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1528
1529	BUG_ON(!domain->pgd);
1530	BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1531	       translation != CONTEXT_TT_MULTI_LEVEL);
1532
1533	iommu = device_to_iommu(segment, bus, devfn);
1534	if (!iommu)
1535		return -ENODEV;
1536
1537	context = device_to_context_entry(iommu, bus, devfn);
 
1538	if (!context)
1539		return -ENOMEM;
1540	spin_lock_irqsave(&iommu->lock, flags);
1541	if (context_present(context)) {
1542		spin_unlock_irqrestore(&iommu->lock, flags);
1543		return 0;
1544	}
1545
1546	id = domain->id;
1547	pgd = domain->pgd;
 
1548
1549	if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1550	    domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1551		int found = 0;
 
 
 
 
 
 
 
 
1552
1553		/* find an available domain id for this device in iommu */
1554		ndomains = cap_ndoms(iommu->cap);
1555		for_each_set_bit(num, iommu->domain_ids, ndomains) {
1556			if (iommu->domains[num] == domain) {
1557				id = num;
1558				found = 1;
1559				break;
1560			}
1561		}
 
1562
1563		if (found == 0) {
1564			num = find_first_zero_bit(iommu->domain_ids, ndomains);
1565			if (num >= ndomains) {
1566				spin_unlock_irqrestore(&iommu->lock, flags);
1567				printk(KERN_ERR "IOMMU: no free domain ids\n");
1568				return -EFAULT;
1569			}
1570
1571			set_bit(num, iommu->domain_ids);
1572			iommu->domains[num] = domain;
1573			id = num;
1574		}
1575
1576		/* Skip top levels of page tables for
1577		 * iommu which has less agaw than default.
1578		 * Unnecessary for PT mode.
 
 
 
 
 
 
 
 
 
 
1579		 */
 
 
 
 
 
 
 
 
 
 
 
1580		if (translation != CONTEXT_TT_PASS_THROUGH) {
1581			for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
 
 
 
 
 
1582				pgd = phys_to_virt(dma_pte_addr(pgd));
1583				if (!dma_pte_present(pgd)) {
1584					spin_unlock_irqrestore(&iommu->lock, flags);
1585					return -ENOMEM;
1586				}
1587			}
1588		}
1589	}
1590
1591	context_set_domain_id(context, id);
 
 
 
 
1592
1593	if (translation != CONTEXT_TT_PASS_THROUGH) {
1594		info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1595		translation = info ? CONTEXT_TT_DEV_IOTLB :
1596				     CONTEXT_TT_MULTI_LEVEL;
1597	}
1598	/*
1599	 * In pass through mode, AW must be programmed to indicate the largest
1600	 * AGAW value supported by hardware. And ASR is ignored by hardware.
1601	 */
1602	if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1603		context_set_address_width(context, iommu->msagaw);
1604	else {
1605		context_set_address_root(context, virt_to_phys(pgd));
1606		context_set_address_width(context, iommu->agaw);
1607	}
1608
1609	context_set_translation_type(context, translation);
1610	context_set_fault_enable(context);
1611	context_set_present(context);
1612	domain_flush_cache(domain, context, sizeof(*context));
1613
1614	/*
1615	 * It's a non-present to present mapping. If hardware doesn't cache
1616	 * non-present entry we only need to flush the write-buffer. If the
1617	 * _does_ cache non-present entries, then it does so in the special
1618	 * domain #0, which we have to flush:
1619	 */
1620	if (cap_caching_mode(iommu->cap)) {
1621		iommu->flush.flush_context(iommu, 0,
1622					   (((u16)bus) << 8) | devfn,
1623					   DMA_CCMD_MASK_NOBIT,
1624					   DMA_CCMD_DEVICE_INVL);
1625		iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1626	} else {
1627		iommu_flush_write_buffer(iommu);
1628	}
1629	iommu_enable_dev_iotlb(info);
1630	spin_unlock_irqrestore(&iommu->lock, flags);
1631
1632	spin_lock_irqsave(&domain->iommu_lock, flags);
1633	if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1634		domain->iommu_count++;
1635		if (domain->iommu_count == 1)
1636			domain->nid = iommu->node;
1637		domain_update_iommu_cap(domain);
1638	}
1639	spin_unlock_irqrestore(&domain->iommu_lock, flags);
1640	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1641}
1642
1643static int
1644domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1645			int translation)
1646{
1647	int ret;
1648	struct pci_dev *tmp, *parent;
 
 
1649
1650	ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1651					 pdev->bus->number, pdev->devfn,
1652					 translation);
1653	if (ret)
1654		return ret;
1655
1656	/* dependent device mapping */
1657	tmp = pci_find_upstream_pcie_bridge(pdev);
1658	if (!tmp)
1659		return 0;
1660	/* Secondary interface's bus number and devfn 0 */
1661	parent = pdev->bus->self;
1662	while (parent != tmp) {
1663		ret = domain_context_mapping_one(domain,
1664						 pci_domain_nr(parent->bus),
1665						 parent->bus->number,
1666						 parent->devfn, translation);
1667		if (ret)
1668			return ret;
1669		parent = parent->bus->self;
1670	}
1671	if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1672		return domain_context_mapping_one(domain,
1673					pci_domain_nr(tmp->subordinate),
1674					tmp->subordinate->number, 0,
1675					translation);
1676	else /* this is a legacy PCI bridge */
1677		return domain_context_mapping_one(domain,
1678						  pci_domain_nr(tmp->bus),
1679						  tmp->bus->number,
1680						  tmp->devfn,
1681						  translation);
1682}
1683
1684static int domain_context_mapped(struct pci_dev *pdev)
 
 
 
 
 
 
 
 
1685{
1686	int ret;
1687	struct pci_dev *tmp, *parent;
1688	struct intel_iommu *iommu;
 
1689
1690	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1691				pdev->devfn);
1692	if (!iommu)
1693		return -ENODEV;
1694
1695	ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1696	if (!ret)
1697		return ret;
1698	/* dependent device mapping */
1699	tmp = pci_find_upstream_pcie_bridge(pdev);
1700	if (!tmp)
1701		return ret;
1702	/* Secondary interface's bus number and devfn 0 */
1703	parent = pdev->bus->self;
1704	while (parent != tmp) {
1705		ret = device_context_mapped(iommu, parent->bus->number,
1706					    parent->devfn);
1707		if (!ret)
1708			return ret;
1709		parent = parent->bus->self;
1710	}
1711	if (pci_is_pcie(tmp))
1712		return device_context_mapped(iommu, tmp->subordinate->number,
1713					     0);
1714	else
1715		return device_context_mapped(iommu, tmp->bus->number,
1716					     tmp->devfn);
1717}
1718
1719/* Returns a number of VTD pages, but aligned to MM page size */
1720static inline unsigned long aligned_nrpages(unsigned long host_addr,
1721					    size_t size)
1722{
1723	host_addr &= ~PAGE_MASK;
1724	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1725}
1726
1727/* Return largest possible superpage level for a given mapping */
1728static inline int hardware_largepage_caps(struct dmar_domain *domain,
1729					  unsigned long iov_pfn,
1730					  unsigned long phy_pfn,
1731					  unsigned long pages)
1732{
1733	int support, level = 1;
1734	unsigned long pfnmerge;
1735
1736	support = domain->iommu_superpage;
1737
1738	/* To use a large page, the virtual *and* physical addresses
1739	   must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1740	   of them will mean we have to use smaller pages. So just
1741	   merge them and check both at once. */
1742	pfnmerge = iov_pfn | phy_pfn;
1743
1744	while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1745		pages >>= VTD_STRIDE_SHIFT;
1746		if (!pages)
1747			break;
1748		pfnmerge >>= VTD_STRIDE_SHIFT;
1749		level++;
1750		support--;
1751	}
1752	return level;
1753}
1754
1755static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1756			    struct scatterlist *sg, unsigned long phys_pfn,
1757			    unsigned long nr_pages, int prot)
1758{
1759	struct dma_pte *first_pte = NULL, *pte = NULL;
1760	phys_addr_t uninitialized_var(pteval);
1761	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1762	unsigned long sg_res;
1763	unsigned int largepage_lvl = 0;
1764	unsigned long lvl_pages = 0;
1765
1766	BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1767
1768	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1769		return -EINVAL;
1770
1771	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1772
1773	if (sg)
1774		sg_res = 0;
1775	else {
1776		sg_res = nr_pages + 1;
1777		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1778	}
1779
1780	while (nr_pages > 0) {
1781		uint64_t tmp;
1782
1783		if (!sg_res) {
 
 
1784			sg_res = aligned_nrpages(sg->offset, sg->length);
1785			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1786			sg->dma_length = sg->length;
1787			pteval = page_to_phys(sg_page(sg)) | prot;
1788			phys_pfn = pteval >> VTD_PAGE_SHIFT;
1789		}
1790
1791		if (!pte) {
1792			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1793
1794			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1795			if (!pte)
1796				return -ENOMEM;
1797			/* It is large page*/
1798			if (largepage_lvl > 1)
 
 
1799				pteval |= DMA_PTE_LARGE_PAGE;
1800			else
 
 
 
 
 
 
 
 
 
 
 
 
 
1801				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
 
1802
1803		}
1804		/* We don't need lock here, nobody else
1805		 * touches the iova range
1806		 */
1807		tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1808		if (tmp) {
1809			static int dumps = 5;
1810			printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1811			       iov_pfn, tmp, (unsigned long long)pteval);
1812			if (dumps) {
1813				dumps--;
1814				debug_dma_dump_mappings(NULL);
1815			}
1816			WARN_ON(1);
1817		}
1818
1819		lvl_pages = lvl_to_nr_pages(largepage_lvl);
1820
1821		BUG_ON(nr_pages < lvl_pages);
1822		BUG_ON(sg_res < lvl_pages);
1823
1824		nr_pages -= lvl_pages;
1825		iov_pfn += lvl_pages;
1826		phys_pfn += lvl_pages;
1827		pteval += lvl_pages * VTD_PAGE_SIZE;
1828		sg_res -= lvl_pages;
1829
1830		/* If the next PTE would be the first in a new page, then we
1831		   need to flush the cache on the entries we've just written.
1832		   And then we'll need to recalculate 'pte', so clear it and
1833		   let it get set again in the if (!pte) block above.
1834
1835		   If we're done (!nr_pages) we need to flush the cache too.
1836
1837		   Also if we've been setting superpages, we may need to
1838		   recalculate 'pte' and switch back to smaller pages for the
1839		   end of the mapping, if the trailing size is not enough to
1840		   use another superpage (i.e. sg_res < lvl_pages). */
1841		pte++;
1842		if (!nr_pages || first_pte_in_page(pte) ||
1843		    (largepage_lvl > 1 && sg_res < lvl_pages)) {
1844			domain_flush_cache(domain, first_pte,
1845					   (void *)pte - (void *)first_pte);
1846			pte = NULL;
1847		}
1848
1849		if (!sg_res && nr_pages)
1850			sg = sg_next(sg);
1851	}
1852	return 0;
1853}
1854
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1855static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1856				    struct scatterlist *sg, unsigned long nr_pages,
1857				    int prot)
1858{
1859	return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1860}
1861
1862static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1863				     unsigned long phys_pfn, unsigned long nr_pages,
1864				     int prot)
1865{
1866	return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1867}
1868
1869static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1870{
 
 
 
 
1871	if (!iommu)
1872		return;
1873
1874	clear_context_table(iommu, bus, devfn);
1875	iommu->flush.flush_context(iommu, 0, 0, 0,
1876					   DMA_CCMD_GLOBAL_INVL);
1877	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1878}
1879
1880static void domain_remove_dev_info(struct dmar_domain *domain)
1881{
1882	struct device_domain_info *info;
1883	unsigned long flags;
1884	struct intel_iommu *iommu;
1885
1886	spin_lock_irqsave(&device_domain_lock, flags);
1887	while (!list_empty(&domain->devices)) {
1888		info = list_entry(domain->devices.next,
1889			struct device_domain_info, link);
1890		list_del(&info->link);
1891		list_del(&info->global);
1892		if (info->dev)
1893			info->dev->dev.archdata.iommu = NULL;
1894		spin_unlock_irqrestore(&device_domain_lock, flags);
1895
1896		iommu_disable_dev_iotlb(info);
1897		iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1898		iommu_detach_dev(iommu, info->bus, info->devfn);
1899		free_devinfo_mem(info);
1900
1901		spin_lock_irqsave(&device_domain_lock, flags);
1902	}
1903	spin_unlock_irqrestore(&device_domain_lock, flags);
1904}
1905
1906/*
1907 * find_domain
1908 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1909 */
1910static struct dmar_domain *
1911find_domain(struct pci_dev *pdev)
1912{
1913	struct device_domain_info *info;
1914
 
 
 
 
 
 
 
 
 
1915	/* No lock here, assumes no domain exit in normal case */
1916	info = pdev->dev.archdata.iommu;
1917	if (info)
 
1918		return info->domain;
1919	return NULL;
1920}
1921
1922/* domain is initialized */
1923static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1924{
1925	struct dmar_domain *domain, *found = NULL;
1926	struct intel_iommu *iommu;
1927	struct dmar_drhd_unit *drhd;
1928	struct device_domain_info *info, *tmp;
1929	struct pci_dev *dev_tmp;
 
 
 
 
 
 
 
 
 
 
 
 
1930	unsigned long flags;
1931	int bus = 0, devfn = 0;
1932	int segment;
1933	int ret;
1934
1935	domain = find_domain(pdev);
1936	if (domain)
1937		return domain;
1938
1939	segment = pci_domain_nr(pdev->bus);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1940
1941	dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1942	if (dev_tmp) {
1943		if (pci_is_pcie(dev_tmp)) {
1944			bus = dev_tmp->subordinate->number;
1945			devfn = 0;
1946		} else {
1947			bus = dev_tmp->bus->number;
1948			devfn = dev_tmp->devfn;
1949		}
1950		spin_lock_irqsave(&device_domain_lock, flags);
1951		list_for_each_entry(info, &device_domain_list, global) {
1952			if (info->segment == segment &&
1953			    info->bus == bus && info->devfn == devfn) {
1954				found = info->domain;
1955				break;
1956			}
 
 
 
 
 
1957		}
 
 
 
1958		spin_unlock_irqrestore(&device_domain_lock, flags);
1959		/* pcie-pci bridge already has a domain, uses it */
1960		if (found) {
1961			domain = found;
1962			goto found_domain;
1963		}
1964	}
1965
1966	domain = alloc_domain();
1967	if (!domain)
1968		goto error;
1969
1970	/* Allocate new domain for the device */
1971	drhd = dmar_find_matched_drhd_unit(pdev);
1972	if (!drhd) {
1973		printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1974			pci_name(pdev));
1975		return NULL;
1976	}
1977	iommu = drhd->iommu;
1978
1979	ret = iommu_attach_domain(domain, iommu);
1980	if (ret) {
1981		free_domain_mem(domain);
1982		goto error;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1983	}
1984
1985	if (domain_init(domain, gaw)) {
1986		domain_exit(domain);
1987		goto error;
 
1988	}
1989
1990	/* register pcie-to-pci device */
1991	if (dev_tmp) {
1992		info = alloc_devinfo_mem();
1993		if (!info) {
1994			domain_exit(domain);
1995			goto error;
1996		}
1997		info->segment = segment;
1998		info->bus = bus;
1999		info->devfn = devfn;
2000		info->dev = NULL;
2001		info->domain = domain;
2002		/* This domain is shared by devices under p2p bridge */
2003		domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
 
 
 
 
 
 
 
 
 
 
 
 
2004
2005		/* pcie-to-pci bridge already has a domain, uses it */
2006		found = NULL;
2007		spin_lock_irqsave(&device_domain_lock, flags);
2008		list_for_each_entry(tmp, &device_domain_list, global) {
2009			if (tmp->segment == segment &&
2010			    tmp->bus == bus && tmp->devfn == devfn) {
2011				found = tmp->domain;
2012				break;
2013			}
2014		}
2015		if (found) {
2016			spin_unlock_irqrestore(&device_domain_lock, flags);
2017			free_devinfo_mem(info);
2018			domain_exit(domain);
2019			domain = found;
2020		} else {
2021			list_add(&info->link, &domain->devices);
2022			list_add(&info->global, &device_domain_list);
2023			spin_unlock_irqrestore(&device_domain_lock, flags);
2024		}
 
 
 
 
 
2025	}
2026
2027found_domain:
2028	info = alloc_devinfo_mem();
2029	if (!info)
2030		goto error;
2031	info->segment = segment;
2032	info->bus = pdev->bus->number;
2033	info->devfn = pdev->devfn;
2034	info->dev = pdev;
2035	info->domain = domain;
2036	spin_lock_irqsave(&device_domain_lock, flags);
2037	/* somebody is fast */
2038	found = find_domain(pdev);
2039	if (found != NULL) {
2040		spin_unlock_irqrestore(&device_domain_lock, flags);
2041		if (found != domain) {
2042			domain_exit(domain);
2043			domain = found;
2044		}
2045		free_devinfo_mem(info);
2046		return domain;
2047	}
2048	list_add(&info->link, &domain->devices);
2049	list_add(&info->global, &device_domain_list);
2050	pdev->dev.archdata.iommu = info;
2051	spin_unlock_irqrestore(&device_domain_lock, flags);
2052	return domain;
2053error:
2054	/* recheck it here, maybe others set it */
2055	return find_domain(pdev);
2056}
2057
2058static int iommu_identity_mapping;
2059#define IDENTMAP_ALL		1
2060#define IDENTMAP_GFX		2
2061#define IDENTMAP_AZALIA		4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2062
2063static int iommu_domain_identity_map(struct dmar_domain *domain,
2064				     unsigned long long start,
2065				     unsigned long long end)
2066{
2067	unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2068	unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2069
2070	if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2071			  dma_to_mm_pfn(last_vpfn))) {
2072		printk(KERN_ERR "IOMMU: reserve iova failed\n");
2073		return -ENOMEM;
2074	}
2075
2076	pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2077		 start, end, domain->id);
2078	/*
2079	 * RMRR range might have overlap with physical memory range,
2080	 * clear it first
2081	 */
2082	dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2083
2084	return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2085				  last_vpfn - first_vpfn + 1,
2086				  DMA_PTE_READ|DMA_PTE_WRITE);
2087}
2088
2089static int iommu_prepare_identity_map(struct pci_dev *pdev,
2090				      unsigned long long start,
2091				      unsigned long long end)
 
2092{
2093	struct dmar_domain *domain;
2094	int ret;
2095
2096	domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2097	if (!domain)
2098		return -ENOMEM;
2099
2100	/* For _hardware_ passthrough, don't bother. But for software
2101	   passthrough, we do it anyway -- it may indicate a memory
2102	   range which is reserved in E820, so which didn't get set
2103	   up to start with in si_domain */
2104	if (domain == si_domain && hw_pass_through) {
2105		printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2106		       pci_name(pdev), start, end);
2107		return 0;
2108	}
2109
2110	printk(KERN_INFO
2111	       "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2112	       pci_name(pdev), start, end);
2113	
2114	if (end < start) {
2115		WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2116			"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2117			dmi_get_system_info(DMI_BIOS_VENDOR),
2118			dmi_get_system_info(DMI_BIOS_VERSION),
2119		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2120		ret = -EIO;
2121		goto error;
2122	}
2123
2124	if (end >> agaw_to_width(domain->agaw)) {
2125		WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2126		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2127		     agaw_to_width(domain->agaw),
2128		     dmi_get_system_info(DMI_BIOS_VENDOR),
2129		     dmi_get_system_info(DMI_BIOS_VERSION),
2130		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2131		ret = -EIO;
2132		goto error;
2133	}
2134
2135	ret = iommu_domain_identity_map(domain, start, end);
2136	if (ret)
2137		goto error;
2138
2139	/* context entry init */
2140	ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2141	if (ret)
2142		goto error;
2143
2144	return 0;
2145
2146 error:
2147	domain_exit(domain);
2148	return ret;
2149}
2150
2151static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2152	struct pci_dev *pdev)
2153{
2154	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2155		return 0;
2156	return iommu_prepare_identity_map(pdev, rmrr->base_address,
2157		rmrr->end_address);
2158}
2159
2160#ifdef CONFIG_DMAR_FLOPPY_WA
2161static inline void iommu_prepare_isa(void)
2162{
2163	struct pci_dev *pdev;
2164	int ret;
2165
2166	pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2167	if (!pdev)
2168		return;
2169
2170	printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2171	ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2172
2173	if (ret)
2174		printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2175		       "floppy might not work\n");
2176
2177}
2178#else
2179static inline void iommu_prepare_isa(void)
2180{
2181	return;
2182}
2183#endif /* !CONFIG_DMAR_FLPY_WA */
2184
2185static int md_domain_init(struct dmar_domain *domain, int guest_width);
2186
2187static int __init si_domain_work_fn(unsigned long start_pfn,
2188				    unsigned long end_pfn, void *datax)
2189{
2190	int *ret = datax;
2191
2192	*ret = iommu_domain_identity_map(si_domain,
2193					 (uint64_t)start_pfn << PAGE_SHIFT,
2194					 (uint64_t)end_pfn << PAGE_SHIFT);
2195	return *ret;
2196
2197}
2198
2199static int __init si_domain_init(int hw)
2200{
2201	struct dmar_drhd_unit *drhd;
2202	struct intel_iommu *iommu;
2203	int nid, ret = 0;
2204
2205	si_domain = alloc_domain();
2206	if (!si_domain)
2207		return -EFAULT;
2208
2209	pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2210
2211	for_each_active_iommu(iommu, drhd) {
2212		ret = iommu_attach_domain(si_domain, iommu);
2213		if (ret) {
2214			domain_exit(si_domain);
2215			return -EFAULT;
2216		}
2217	}
2218
2219	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2220		domain_exit(si_domain);
2221		return -EFAULT;
2222	}
2223
2224	si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2225
2226	if (hw)
2227		return 0;
2228
2229	for_each_online_node(nid) {
2230		work_with_active_regions(nid, si_domain_work_fn, &ret);
2231		if (ret)
2232			return ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2233	}
2234
2235	return 0;
2236}
2237
2238static void domain_remove_one_dev_info(struct dmar_domain *domain,
2239					  struct pci_dev *pdev);
2240static int identity_mapping(struct pci_dev *pdev)
2241{
2242	struct device_domain_info *info;
2243
2244	if (likely(!iommu_identity_mapping))
2245		return 0;
2246
2247	info = pdev->dev.archdata.iommu;
2248	if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2249		return (info->domain == si_domain);
2250
2251	return 0;
2252}
2253
2254static int domain_add_dev_info(struct dmar_domain *domain,
2255			       struct pci_dev *pdev,
2256			       int translation)
2257{
2258	struct device_domain_info *info;
2259	unsigned long flags;
2260	int ret;
2261
2262	info = alloc_devinfo_mem();
2263	if (!info)
2264		return -ENOMEM;
2265
2266	ret = domain_context_mapping(domain, pdev, translation);
2267	if (ret) {
2268		free_devinfo_mem(info);
2269		return ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2270	}
 
 
 
2271
2272	info->segment = pci_domain_nr(pdev->bus);
2273	info->bus = pdev->bus->number;
2274	info->devfn = pdev->devfn;
2275	info->dev = pdev;
2276	info->domain = domain;
 
 
 
 
 
 
 
 
 
 
 
 
 
2277
2278	spin_lock_irqsave(&device_domain_lock, flags);
2279	list_add(&info->link, &domain->devices);
2280	list_add(&info->global, &device_domain_list);
2281	pdev->dev.archdata.iommu = info;
2282	spin_unlock_irqrestore(&device_domain_lock, flags);
2283
2284	return 0;
 
 
 
 
2285}
2286
2287static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2288{
2289	if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2290		return 1;
2291
2292	if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2293		return 1;
2294
2295	if (!(iommu_identity_mapping & IDENTMAP_ALL))
2296		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2297
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2298	/*
2299	 * We want to start off with all devices in the 1:1 domain, and
2300	 * take them out later if we find they can't access all of memory.
2301	 *
2302	 * However, we can't do this for PCI devices behind bridges,
2303	 * because all PCI devices behind the same bridge will end up
2304	 * with the same source-id on their transactions.
2305	 *
2306	 * Practically speaking, we can't change things around for these
2307	 * devices at run-time, because we can't be sure there'll be no
2308	 * DMA transactions in flight for any of their siblings.
2309	 * 
2310	 * So PCI devices (unless they're on the root bus) as well as
2311	 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2312	 * the 1:1 domain, just in _case_ one of their siblings turns out
2313	 * not to be able to map all of memory.
2314	 */
2315	if (!pci_is_pcie(pdev)) {
2316		if (!pci_is_root_bus(pdev->bus))
2317			return 0;
2318		if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2319			return 0;
2320	} else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2321		return 0;
 
 
 
 
2322
2323	/* 
2324	 * At boot time, we don't yet know if devices will be 64-bit capable.
2325	 * Assume that they will -- if they turn out not to be, then we can 
2326	 * take them out of the 1:1 domain later.
2327	 */
2328	if (!startup) {
2329		/*
2330		 * If the device's dma_mask is less than the system's memory
2331		 * size then this is not a candidate for identity mapping.
2332		 */
2333		u64 dma_mask = pdev->dma_mask;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2334
2335		if (pdev->dev.coherent_dma_mask &&
2336		    pdev->dev.coherent_dma_mask < dma_mask)
2337			dma_mask = pdev->dev.coherent_dma_mask;
2338
2339		return dma_mask >= dma_get_required_mask(&pdev->dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2340	}
2341
2342	return 1;
 
 
 
 
 
 
 
 
2343}
2344
2345static int __init iommu_prepare_static_identity_mapping(int hw)
2346{
2347	struct pci_dev *pdev = NULL;
2348	int ret;
 
 
 
 
 
 
 
 
 
 
2349
2350	ret = si_domain_init(hw);
2351	if (ret)
2352		return -EFAULT;
 
 
 
 
 
2353
2354	for_each_pci_dev(pdev) {
2355		/* Skip Host/PCI Bridge devices */
2356		if (IS_BRIDGE_HOST_DEVICE(pdev))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2357			continue;
2358		if (iommu_should_identity_map(pdev, 1)) {
2359			printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2360			       hw ? "hardware" : "software", pci_name(pdev));
2361
2362			ret = domain_add_dev_info(si_domain, pdev,
2363						     hw ? CONTEXT_TT_PASS_THROUGH :
2364						     CONTEXT_TT_MULTI_LEVEL);
2365			if (ret)
2366				return ret;
2367		}
2368	}
2369
2370	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2371}
2372
2373static int __init init_dmars(void)
2374{
2375	struct dmar_drhd_unit *drhd;
2376	struct dmar_rmrr_unit *rmrr;
2377	struct pci_dev *pdev;
2378	struct intel_iommu *iommu;
2379	int i, ret;
2380
2381	/*
2382	 * for each drhd
2383	 *    allocate root
2384	 *    initialize and program root entry to not present
2385	 * endfor
2386	 */
2387	for_each_drhd_unit(drhd) {
2388		g_num_of_iommus++;
2389		/*
2390		 * lock not needed as this is only incremented in the single
2391		 * threaded kernel __init code path all other access are read
2392		 * only
2393		 */
 
 
 
 
 
2394	}
2395
 
 
 
 
2396	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2397			GFP_KERNEL);
2398	if (!g_iommus) {
2399		printk(KERN_ERR "Allocating global iommu array failed\n");
2400		ret = -ENOMEM;
2401		goto error;
2402	}
2403
2404	deferred_flush = kzalloc(g_num_of_iommus *
2405		sizeof(struct deferred_flush_tables), GFP_KERNEL);
2406	if (!deferred_flush) {
2407		ret = -ENOMEM;
2408		goto error;
2409	}
2410
2411	for_each_drhd_unit(drhd) {
2412		if (drhd->ignored)
 
2413			continue;
 
 
 
 
 
 
 
 
 
 
 
 
 
2414
2415		iommu = drhd->iommu;
2416		g_iommus[iommu->seq_id] = iommu;
2417
 
 
2418		ret = iommu_init_domains(iommu);
2419		if (ret)
2420			goto error;
 
 
 
 
 
 
 
 
 
2421
2422		/*
2423		 * TBD:
2424		 * we could share the same root & context tables
2425		 * among all IOMMU's. Need to Split it later.
2426		 */
2427		ret = iommu_alloc_root_entry(iommu);
2428		if (ret) {
2429			printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2430			goto error;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2431		}
 
2432		if (!ecap_pass_through(iommu->ecap))
2433			hw_pass_through = 0;
 
 
 
 
2434	}
2435
2436	/*
2437	 * Start from the sane iommu hardware state.
 
 
2438	 */
2439	for_each_drhd_unit(drhd) {
2440		if (drhd->ignored)
2441			continue;
2442
2443		iommu = drhd->iommu;
2444
2445		/*
2446		 * If the queued invalidation is already initialized by us
2447		 * (for example, while enabling interrupt-remapping) then
2448		 * we got the things already rolling from a sane state.
2449		 */
2450		if (iommu->qi)
2451			continue;
2452
2453		/*
2454		 * Clear any previous faults.
2455		 */
2456		dmar_fault(-1, iommu);
2457		/*
2458		 * Disable queued invalidation if supported and already enabled
2459		 * before OS handover.
2460		 */
2461		dmar_disable_qi(iommu);
2462	}
2463
2464	for_each_drhd_unit(drhd) {
2465		if (drhd->ignored)
2466			continue;
2467
2468		iommu = drhd->iommu;
2469
2470		if (dmar_enable_qi(iommu)) {
2471			/*
2472			 * Queued Invalidate not enabled, use Register Based
2473			 * Invalidate
2474			 */
2475			iommu->flush.flush_context = __iommu_flush_context;
2476			iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2477			printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2478			       "invalidation\n",
2479				iommu->seq_id,
2480			       (unsigned long long)drhd->reg_base_addr);
2481		} else {
2482			iommu->flush.flush_context = qi_flush_context;
2483			iommu->flush.flush_iotlb = qi_flush_iotlb;
2484			printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2485			       "invalidation\n",
2486				iommu->seq_id,
2487			       (unsigned long long)drhd->reg_base_addr);
2488		}
2489	}
2490
2491	if (iommu_pass_through)
2492		iommu_identity_mapping |= IDENTMAP_ALL;
2493
2494#ifdef CONFIG_DMAR_BROKEN_GFX_WA
2495	iommu_identity_mapping |= IDENTMAP_GFX;
2496#endif
2497
2498	check_tylersburg_isoch();
 
2499
2500	/*
2501	 * If pass through is not set or not enabled, setup context entries for
2502	 * identity mappings for rmrr, gfx, and isa and may fall back to static
2503	 * identity mapping if iommu_identity_mapping is set.
2504	 */
2505	if (iommu_identity_mapping) {
2506		ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2507		if (ret) {
2508			printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2509			goto error;
2510		}
2511	}
2512	/*
2513	 * For each rmrr
2514	 *   for each dev attached to rmrr
2515	 *   do
2516	 *     locate drhd for dev, alloc domain for dev
2517	 *     allocate free domain
2518	 *     allocate page table entries for rmrr
2519	 *     if context not allocated for bus
2520	 *           allocate and init context
2521	 *           set present in root table for this bus
2522	 *     init context with domain, translation etc
2523	 *    endfor
2524	 * endfor
2525	 */
2526	printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2527	for_each_rmrr_units(rmrr) {
2528		for (i = 0; i < rmrr->devices_cnt; i++) {
2529			pdev = rmrr->devices[i];
2530			/*
2531			 * some BIOS lists non-exist devices in DMAR
2532			 * table.
2533			 */
2534			if (!pdev)
2535				continue;
2536			ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2537			if (ret)
2538				printk(KERN_ERR
2539				       "IOMMU: mapping reserved region failed\n");
2540		}
2541	}
2542
2543	iommu_prepare_isa();
 
 
2544
2545	/*
2546	 * for each drhd
2547	 *   enable fault log
2548	 *   global invalidate context cache
2549	 *   global invalidate iotlb
2550	 *   enable translation
2551	 */
2552	for_each_drhd_unit(drhd) {
2553		if (drhd->ignored) {
2554			/*
2555			 * we always have to disable PMRs or DMA may fail on
2556			 * this device
2557			 */
2558			if (force_on)
2559				iommu_disable_protect_mem_regions(drhd->iommu);
2560			continue;
2561		}
2562		iommu = drhd->iommu;
2563
2564		iommu_flush_write_buffer(iommu);
2565
 
 
 
 
 
 
 
 
 
 
 
 
 
2566		ret = dmar_set_interrupt(iommu);
2567		if (ret)
2568			goto error;
2569
2570		iommu_set_root_entry(iommu);
2571
2572		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2573		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2574
2575		ret = iommu_enable_translation(iommu);
2576		if (ret)
2577			goto error;
2578
2579		iommu_disable_protect_mem_regions(iommu);
2580	}
2581
2582	return 0;
2583error:
2584	for_each_drhd_unit(drhd) {
2585		if (drhd->ignored)
2586			continue;
2587		iommu = drhd->iommu;
2588		free_iommu(iommu);
2589	}
 
2590	kfree(g_iommus);
 
 
2591	return ret;
2592}
2593
2594/* This takes a number of _MM_ pages, not VTD pages */
2595static struct iova *intel_alloc_iova(struct device *dev,
2596				     struct dmar_domain *domain,
2597				     unsigned long nrpages, uint64_t dma_mask)
2598{
2599	struct pci_dev *pdev = to_pci_dev(dev);
2600	struct iova *iova = NULL;
2601
2602	/* Restrict dma_mask to the width that the iommu can handle */
2603	dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
 
 
2604
2605	if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2606		/*
2607		 * First try to allocate an io virtual address in
2608		 * DMA_BIT_MASK(32) and if that fails then try allocating
2609		 * from higher range
2610		 */
2611		iova = alloc_iova(&domain->iovad, nrpages,
2612				  IOVA_PFN(DMA_BIT_MASK(32)), 1);
2613		if (iova)
2614			return iova;
2615	}
2616	iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2617	if (unlikely(!iova)) {
2618		printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2619		       nrpages, pci_name(pdev));
2620		return NULL;
2621	}
2622
2623	return iova;
2624}
2625
2626static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2627{
2628	struct dmar_domain *domain;
2629	int ret;
 
 
2630
2631	domain = get_domain_for_dev(pdev,
2632			DEFAULT_DOMAIN_ADDRESS_WIDTH);
2633	if (!domain) {
2634		printk(KERN_ERR
2635			"Allocating domain for %s failed", pci_name(pdev));
2636		return NULL;
2637	}
2638
2639	/* make sure context mapping is ok */
2640	if (unlikely(!domain_context_mapped(pdev))) {
2641		ret = domain_context_mapping(domain, pdev,
2642					     CONTEXT_TT_MULTI_LEVEL);
2643		if (ret) {
2644			printk(KERN_ERR
2645				"Domain context map for %s failed",
2646				pci_name(pdev));
2647			return NULL;
 
 
 
 
 
 
 
 
2648		}
2649	}
 
2650
2651	return domain;
2652}
2653
2654static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2655{
2656	struct device_domain_info *info;
2657
2658	/* No lock here, assumes no domain exit in normal case */
2659	info = dev->dev.archdata.iommu;
2660	if (likely(info))
2661		return info->domain;
 
2662
2663	return __get_valid_domain_for_dev(dev);
2664}
2665
2666static int iommu_dummy(struct pci_dev *pdev)
 
2667{
2668	return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2669}
2670
2671/* Check if the pdev needs to go through non-identity map and unmap process.*/
2672static int iommu_no_mapping(struct device *dev)
2673{
2674	struct pci_dev *pdev;
2675	int found;
2676
2677	if (unlikely(dev->bus != &pci_bus_type))
2678		return 1;
 
2679
2680	pdev = to_pci_dev(dev);
2681	if (iommu_dummy(pdev))
2682		return 1;
2683
2684	if (!iommu_identity_mapping)
2685		return 0;
2686
2687	found = identity_mapping(pdev);
2688	if (found) {
2689		if (iommu_should_identity_map(pdev, 0))
2690			return 1;
2691		else {
2692			/*
2693			 * 32 bit DMA is removed from si_domain and fall back
2694			 * to non-identity mapping.
2695			 */
2696			domain_remove_one_dev_info(si_domain, pdev);
2697			printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2698			       pci_name(pdev));
2699			return 0;
2700		}
2701	} else {
2702		/*
2703		 * In case of a detached 64 bit DMA device from vm, the device
2704		 * is put into si_domain for identity mapping.
2705		 */
2706		if (iommu_should_identity_map(pdev, 0)) {
2707			int ret;
2708			ret = domain_add_dev_info(si_domain, pdev,
2709						  hw_pass_through ?
2710						  CONTEXT_TT_PASS_THROUGH :
2711						  CONTEXT_TT_MULTI_LEVEL);
2712			if (!ret) {
2713				printk(KERN_INFO "64bit %s uses identity mapping\n",
2714				       pci_name(pdev));
2715				return 1;
2716			}
 
 
2717		}
 
 
2718	}
2719
2720	return 0;
2721}
2722
2723static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2724				     size_t size, int dir, u64 dma_mask)
2725{
2726	struct pci_dev *pdev = to_pci_dev(hwdev);
2727	struct dmar_domain *domain;
2728	phys_addr_t start_paddr;
2729	struct iova *iova;
2730	int prot = 0;
2731	int ret;
2732	struct intel_iommu *iommu;
2733	unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2734
2735	BUG_ON(dir == DMA_NONE);
2736
2737	if (iommu_no_mapping(hwdev))
2738		return paddr;
2739
2740	domain = get_valid_domain_for_dev(pdev);
2741	if (!domain)
2742		return 0;
2743
2744	iommu = domain_get_iommu(domain);
2745	size = aligned_nrpages(paddr, size);
2746
2747	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2748	if (!iova)
2749		goto error;
2750
2751	/*
2752	 * Check if DMAR supports zero-length reads on write only
2753	 * mappings..
2754	 */
2755	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2756			!cap_zlr(iommu->cap))
2757		prot |= DMA_PTE_READ;
2758	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2759		prot |= DMA_PTE_WRITE;
2760	/*
2761	 * paddr - (paddr + size) might be partial page, we should map the whole
2762	 * page.  Note: if two part of one page are separately mapped, we
2763	 * might have two guest_addr mapping to the same host paddr, but this
2764	 * is not a big problem
2765	 */
2766	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2767				 mm_to_dma_pfn(paddr_pfn), size, prot);
2768	if (ret)
2769		goto error;
2770
2771	/* it's a non-present to present mapping. Only flush if caching mode */
2772	if (cap_caching_mode(iommu->cap))
2773		iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2774	else
2775		iommu_flush_write_buffer(iommu);
2776
2777	start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2778	start_paddr += paddr & ~PAGE_MASK;
 
 
 
2779	return start_paddr;
2780
2781error:
2782	if (iova)
2783		__free_iova(&domain->iovad, iova);
2784	printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2785		pci_name(pdev), size, (unsigned long long)paddr, dir);
2786	return 0;
2787}
2788
2789static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2790				 unsigned long offset, size_t size,
2791				 enum dma_data_direction dir,
2792				 struct dma_attrs *attrs)
2793{
2794	return __intel_map_single(dev, page_to_phys(page) + offset, size,
2795				  dir, to_pci_dev(dev)->dma_mask);
 
 
2796}
2797
2798static void flush_unmaps(void)
 
 
2799{
2800	int i, j;
2801
2802	timer_on = 0;
2803
2804	/* just flush them all */
2805	for (i = 0; i < g_num_of_iommus; i++) {
2806		struct intel_iommu *iommu = g_iommus[i];
2807		if (!iommu)
2808			continue;
2809
2810		if (!deferred_flush[i].next)
2811			continue;
2812
2813		/* In caching mode, global flushes turn emulation expensive */
2814		if (!cap_caching_mode(iommu->cap))
2815			iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2816					 DMA_TLB_GLOBAL_FLUSH);
2817		for (j = 0; j < deferred_flush[i].next; j++) {
2818			unsigned long mask;
2819			struct iova *iova = deferred_flush[i].iova[j];
2820			struct dmar_domain *domain = deferred_flush[i].domain[j];
2821
2822			/* On real hardware multiple invalidations are expensive */
2823			if (cap_caching_mode(iommu->cap))
2824				iommu_flush_iotlb_psi(iommu, domain->id,
2825				iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2826			else {
2827				mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2828				iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2829						(uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2830			}
2831			__free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2832		}
2833		deferred_flush[i].next = 0;
2834	}
2835
2836	list_size = 0;
2837}
2838
2839static void flush_unmaps_timeout(unsigned long data)
2840{
2841	unsigned long flags;
2842
2843	spin_lock_irqsave(&async_umap_flush_lock, flags);
2844	flush_unmaps();
2845	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2846}
2847
2848static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2849{
2850	unsigned long flags;
2851	int next, iommu_id;
2852	struct intel_iommu *iommu;
2853
2854	spin_lock_irqsave(&async_umap_flush_lock, flags);
2855	if (list_size == HIGH_WATER_MARK)
2856		flush_unmaps();
2857
2858	iommu = domain_get_iommu(dom);
2859	iommu_id = iommu->seq_id;
2860
2861	next = deferred_flush[iommu_id].next;
2862	deferred_flush[iommu_id].domain[next] = dom;
2863	deferred_flush[iommu_id].iova[next] = iova;
2864	deferred_flush[iommu_id].next++;
2865
2866	if (!timer_on) {
2867		mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2868		timer_on = 1;
2869	}
2870	list_size++;
2871	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2872}
2873
2874static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2875			     size_t size, enum dma_data_direction dir,
2876			     struct dma_attrs *attrs)
2877{
2878	struct pci_dev *pdev = to_pci_dev(dev);
2879	struct dmar_domain *domain;
2880	unsigned long start_pfn, last_pfn;
2881	struct iova *iova;
 
2882	struct intel_iommu *iommu;
 
 
2883
2884	if (iommu_no_mapping(dev))
2885		return;
2886
2887	domain = find_domain(pdev);
2888	BUG_ON(!domain);
2889
2890	iommu = domain_get_iommu(domain);
2891
2892	iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2893	if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2894		      (unsigned long long)dev_addr))
2895		return;
2896
2897	start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2898	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2899
2900	pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2901		 pci_name(pdev), start_pfn, last_pfn);
2902
2903	/*  clear the whole page */
2904	dma_pte_clear_range(domain, start_pfn, last_pfn);
2905
2906	/* free page tables */
2907	dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2908
2909	if (intel_iommu_strict) {
2910		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2911				      last_pfn - start_pfn + 1, 0);
 
 
 
2912		/* free iova */
2913		__free_iova(&domain->iovad, iova);
 
2914	} else {
2915		add_unmap(domain, iova);
 
2916		/*
2917		 * queue up the release of the unmap to save the 1/6th of the
2918		 * cpu used up by the iotlb flush operation...
2919		 */
2920	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2921}
2922
2923static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2924				  dma_addr_t *dma_handle, gfp_t flags)
 
2925{
2926	void *vaddr;
2927	int order;
2928
 
 
 
2929	size = PAGE_ALIGN(size);
2930	order = get_order(size);
2931
2932	if (!iommu_no_mapping(hwdev))
2933		flags &= ~(GFP_DMA | GFP_DMA32);
2934	else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2935		if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2936			flags |= GFP_DMA;
2937		else
2938			flags |= GFP_DMA32;
2939	}
2940
2941	vaddr = (void *)__get_free_pages(flags, order);
2942	if (!vaddr)
 
2943		return NULL;
2944	memset(vaddr, 0, size);
2945
2946	*dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2947					 DMA_BIDIRECTIONAL,
2948					 hwdev->coherent_dma_mask);
2949	if (*dma_handle)
2950		return vaddr;
2951	free_pages((unsigned long)vaddr, order);
 
 
2952	return NULL;
2953}
2954
2955static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2956				dma_addr_t dma_handle)
2957{
2958	int order;
 
 
 
 
2959
2960	size = PAGE_ALIGN(size);
2961	order = get_order(size);
2962
2963	intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
2964	free_pages((unsigned long)vaddr, order);
 
2965}
2966
2967static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2968			   int nelems, enum dma_data_direction dir,
2969			   struct dma_attrs *attrs)
2970{
2971	struct pci_dev *pdev = to_pci_dev(hwdev);
2972	struct dmar_domain *domain;
2973	unsigned long start_pfn, last_pfn;
2974	struct iova *iova;
2975	struct intel_iommu *iommu;
2976
2977	if (iommu_no_mapping(hwdev))
2978		return;
2979
2980	domain = find_domain(pdev);
2981	BUG_ON(!domain);
2982
2983	iommu = domain_get_iommu(domain);
2984
2985	iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2986	if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2987		      (unsigned long long)sglist[0].dma_address))
2988		return;
2989
2990	start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2991	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2992
2993	/*  clear the whole page */
2994	dma_pte_clear_range(domain, start_pfn, last_pfn);
2995
2996	/* free page tables */
2997	dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2998
2999	if (intel_iommu_strict) {
3000		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3001				      last_pfn - start_pfn + 1, 0);
3002		/* free iova */
3003		__free_iova(&domain->iovad, iova);
3004	} else {
3005		add_unmap(domain, iova);
3006		/*
3007		 * queue up the release of the unmap to save the 1/6th of the
3008		 * cpu used up by the iotlb flush operation...
3009		 */
3010	}
3011}
3012
3013static int intel_nontranslate_map_sg(struct device *hddev,
3014	struct scatterlist *sglist, int nelems, int dir)
3015{
3016	int i;
3017	struct scatterlist *sg;
3018
3019	for_each_sg(sglist, sg, nelems, i) {
3020		BUG_ON(!sg_page(sg));
3021		sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3022		sg->dma_length = sg->length;
3023	}
3024	return nelems;
3025}
3026
3027static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3028			enum dma_data_direction dir, struct dma_attrs *attrs)
3029{
3030	int i;
3031	struct pci_dev *pdev = to_pci_dev(hwdev);
3032	struct dmar_domain *domain;
3033	size_t size = 0;
3034	int prot = 0;
3035	struct iova *iova = NULL;
3036	int ret;
3037	struct scatterlist *sg;
3038	unsigned long start_vpfn;
3039	struct intel_iommu *iommu;
3040
3041	BUG_ON(dir == DMA_NONE);
3042	if (iommu_no_mapping(hwdev))
3043		return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3044
3045	domain = get_valid_domain_for_dev(pdev);
3046	if (!domain)
3047		return 0;
3048
3049	iommu = domain_get_iommu(domain);
3050
3051	for_each_sg(sglist, sg, nelems, i)
3052		size += aligned_nrpages(sg->offset, sg->length);
3053
3054	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3055				pdev->dma_mask);
3056	if (!iova) {
3057		sglist->dma_length = 0;
3058		return 0;
3059	}
3060
3061	/*
3062	 * Check if DMAR supports zero-length reads on write only
3063	 * mappings..
3064	 */
3065	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3066			!cap_zlr(iommu->cap))
3067		prot |= DMA_PTE_READ;
3068	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3069		prot |= DMA_PTE_WRITE;
3070
3071	start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3072
3073	ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3074	if (unlikely(ret)) {
3075		/*  clear the page */
3076		dma_pte_clear_range(domain, start_vpfn,
3077				    start_vpfn + size - 1);
3078		/* free page tables */
3079		dma_pte_free_pagetable(domain, start_vpfn,
3080				       start_vpfn + size - 1);
3081		/* free iova */
3082		__free_iova(&domain->iovad, iova);
3083		return 0;
3084	}
3085
3086	/* it's a non-present to present mapping. Only flush if caching mode */
3087	if (cap_caching_mode(iommu->cap))
3088		iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3089	else
3090		iommu_flush_write_buffer(iommu);
3091
3092	return nelems;
3093}
3094
3095static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3096{
3097	return !dma_addr;
 
 
3098}
3099
3100struct dma_map_ops intel_dma_ops = {
3101	.alloc_coherent = intel_alloc_coherent,
3102	.free_coherent = intel_free_coherent,
3103	.map_sg = intel_map_sg,
3104	.unmap_sg = intel_unmap_sg,
3105	.map_page = intel_map_page,
3106	.unmap_page = intel_unmap_page,
3107	.mapping_error = intel_mapping_error,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3108};
3109
3110static inline int iommu_domain_cache_init(void)
3111{
3112	int ret = 0;
3113
3114	iommu_domain_cache = kmem_cache_create("iommu_domain",
3115					 sizeof(struct dmar_domain),
3116					 0,
3117					 SLAB_HWCACHE_ALIGN,
3118
3119					 NULL);
3120	if (!iommu_domain_cache) {
3121		printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3122		ret = -ENOMEM;
3123	}
3124
3125	return ret;
3126}
3127
3128static inline int iommu_devinfo_cache_init(void)
3129{
3130	int ret = 0;
3131
3132	iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3133					 sizeof(struct device_domain_info),
3134					 0,
3135					 SLAB_HWCACHE_ALIGN,
3136					 NULL);
3137	if (!iommu_devinfo_cache) {
3138		printk(KERN_ERR "Couldn't create devinfo cache\n");
3139		ret = -ENOMEM;
3140	}
3141
3142	return ret;
3143}
3144
3145static inline int iommu_iova_cache_init(void)
3146{
3147	int ret = 0;
3148
3149	iommu_iova_cache = kmem_cache_create("iommu_iova",
3150					 sizeof(struct iova),
3151					 0,
3152					 SLAB_HWCACHE_ALIGN,
3153					 NULL);
3154	if (!iommu_iova_cache) {
3155		printk(KERN_ERR "Couldn't create iova cache\n");
3156		ret = -ENOMEM;
3157	}
3158
3159	return ret;
3160}
3161
3162static int __init iommu_init_mempool(void)
3163{
3164	int ret;
3165	ret = iommu_iova_cache_init();
3166	if (ret)
3167		return ret;
3168
3169	ret = iommu_domain_cache_init();
3170	if (ret)
3171		goto domain_error;
3172
3173	ret = iommu_devinfo_cache_init();
3174	if (!ret)
3175		return ret;
3176
3177	kmem_cache_destroy(iommu_domain_cache);
3178domain_error:
3179	kmem_cache_destroy(iommu_iova_cache);
3180
3181	return -ENOMEM;
3182}
3183
3184static void __init iommu_exit_mempool(void)
3185{
3186	kmem_cache_destroy(iommu_devinfo_cache);
3187	kmem_cache_destroy(iommu_domain_cache);
3188	kmem_cache_destroy(iommu_iova_cache);
3189
3190}
3191
3192static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3193{
3194	struct dmar_drhd_unit *drhd;
3195	u32 vtbar;
3196	int rc;
3197
3198	/* We know that this device on this chipset has its own IOMMU.
3199	 * If we find it under a different IOMMU, then the BIOS is lying
3200	 * to us. Hope that the IOMMU for this device is actually
3201	 * disabled, and it needs no translation...
3202	 */
3203	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3204	if (rc) {
3205		/* "can't" happen */
3206		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3207		return;
3208	}
3209	vtbar &= 0xffff0000;
3210
3211	/* we know that the this iommu should be at offset 0xa000 from vtbar */
3212	drhd = dmar_find_matched_drhd_unit(pdev);
3213	if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3214			    TAINT_FIRMWARE_WORKAROUND,
3215			    "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3216		pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3217}
3218DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3219
3220static void __init init_no_remapping_devices(void)
3221{
3222	struct dmar_drhd_unit *drhd;
 
 
3223
3224	for_each_drhd_unit(drhd) {
3225		if (!drhd->include_all) {
3226			int i;
3227			for (i = 0; i < drhd->devices_cnt; i++)
3228				if (drhd->devices[i] != NULL)
3229					break;
3230			/* ignore DMAR unit if no pci devices exist */
3231			if (i == drhd->devices_cnt)
3232				drhd->ignored = 1;
3233		}
3234	}
3235
3236	for_each_drhd_unit(drhd) {
3237		int i;
3238		if (drhd->ignored || drhd->include_all)
3239			continue;
3240
3241		for (i = 0; i < drhd->devices_cnt; i++)
3242			if (drhd->devices[i] &&
3243			    !IS_GFX_DEVICE(drhd->devices[i]))
3244				break;
3245
3246		if (i < drhd->devices_cnt)
3247			continue;
3248
3249		/* This IOMMU has *only* gfx devices. Either bypass it or
3250		   set the gfx_mapped flag, as appropriate */
3251		if (dmar_map_gfx) {
3252			intel_iommu_gfx_mapped = 1;
3253		} else {
3254			drhd->ignored = 1;
3255			for (i = 0; i < drhd->devices_cnt; i++) {
3256				if (!drhd->devices[i])
3257					continue;
3258				drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3259			}
3260		}
3261	}
3262}
3263
3264#ifdef CONFIG_SUSPEND
3265static int init_iommu_hw(void)
3266{
3267	struct dmar_drhd_unit *drhd;
3268	struct intel_iommu *iommu = NULL;
3269
3270	for_each_active_iommu(iommu, drhd)
3271		if (iommu->qi)
3272			dmar_reenable_qi(iommu);
3273
3274	for_each_iommu(iommu, drhd) {
3275		if (drhd->ignored) {
3276			/*
3277			 * we always have to disable PMRs or DMA may fail on
3278			 * this device
3279			 */
3280			if (force_on)
3281				iommu_disable_protect_mem_regions(iommu);
3282			continue;
3283		}
3284	
3285		iommu_flush_write_buffer(iommu);
3286
3287		iommu_set_root_entry(iommu);
3288
3289		iommu->flush.flush_context(iommu, 0, 0, 0,
3290					   DMA_CCMD_GLOBAL_INVL);
3291		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3292					 DMA_TLB_GLOBAL_FLUSH);
3293		if (iommu_enable_translation(iommu))
3294			return 1;
3295		iommu_disable_protect_mem_regions(iommu);
3296	}
3297
3298	return 0;
3299}
3300
3301static void iommu_flush_all(void)
3302{
3303	struct dmar_drhd_unit *drhd;
3304	struct intel_iommu *iommu;
3305
3306	for_each_active_iommu(iommu, drhd) {
3307		iommu->flush.flush_context(iommu, 0, 0, 0,
3308					   DMA_CCMD_GLOBAL_INVL);
3309		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3310					 DMA_TLB_GLOBAL_FLUSH);
3311	}
3312}
3313
3314static int iommu_suspend(void)
3315{
3316	struct dmar_drhd_unit *drhd;
3317	struct intel_iommu *iommu = NULL;
3318	unsigned long flag;
3319
3320	for_each_active_iommu(iommu, drhd) {
3321		iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3322						 GFP_ATOMIC);
3323		if (!iommu->iommu_state)
3324			goto nomem;
3325	}
3326
3327	iommu_flush_all();
3328
3329	for_each_active_iommu(iommu, drhd) {
3330		iommu_disable_translation(iommu);
3331
3332		spin_lock_irqsave(&iommu->register_lock, flag);
3333
3334		iommu->iommu_state[SR_DMAR_FECTL_REG] =
3335			readl(iommu->reg + DMAR_FECTL_REG);
3336		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3337			readl(iommu->reg + DMAR_FEDATA_REG);
3338		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3339			readl(iommu->reg + DMAR_FEADDR_REG);
3340		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3341			readl(iommu->reg + DMAR_FEUADDR_REG);
3342
3343		spin_unlock_irqrestore(&iommu->register_lock, flag);
3344	}
3345	return 0;
3346
3347nomem:
3348	for_each_active_iommu(iommu, drhd)
3349		kfree(iommu->iommu_state);
3350
3351	return -ENOMEM;
3352}
3353
3354static void iommu_resume(void)
3355{
3356	struct dmar_drhd_unit *drhd;
3357	struct intel_iommu *iommu = NULL;
3358	unsigned long flag;
3359
3360	if (init_iommu_hw()) {
3361		if (force_on)
3362			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3363		else
3364			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3365		return;
3366	}
3367
3368	for_each_active_iommu(iommu, drhd) {
3369
3370		spin_lock_irqsave(&iommu->register_lock, flag);
3371
3372		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3373			iommu->reg + DMAR_FECTL_REG);
3374		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3375			iommu->reg + DMAR_FEDATA_REG);
3376		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3377			iommu->reg + DMAR_FEADDR_REG);
3378		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3379			iommu->reg + DMAR_FEUADDR_REG);
3380
3381		spin_unlock_irqrestore(&iommu->register_lock, flag);
3382	}
3383
3384	for_each_active_iommu(iommu, drhd)
3385		kfree(iommu->iommu_state);
3386}
3387
3388static struct syscore_ops iommu_syscore_ops = {
3389	.resume		= iommu_resume,
3390	.suspend	= iommu_suspend,
3391};
3392
3393static void __init init_iommu_pm_ops(void)
3394{
3395	register_syscore_ops(&iommu_syscore_ops);
3396}
3397
3398#else
3399static inline void init_iommu_pm_ops(void) {}
3400#endif	/* CONFIG_PM */
3401
3402/*
3403 * Here we only respond to action of unbound device from driver.
3404 *
3405 * Added device is not attached to its DMAR domain here yet. That will happen
3406 * when mapping the device to iova.
3407 */
3408static int device_notifier(struct notifier_block *nb,
3409				  unsigned long action, void *data)
3410{
3411	struct device *dev = data;
3412	struct pci_dev *pdev = to_pci_dev(dev);
3413	struct dmar_domain *domain;
 
 
 
3414
3415	if (iommu_no_mapping(dev))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3416		return 0;
3417
3418	domain = find_domain(pdev);
3419	if (!domain)
 
3420		return 0;
3421
3422	if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3423		domain_remove_one_dev_info(domain, pdev);
 
3424
3425		if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3426		    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3427		    list_empty(&domain->devices))
3428			domain_exit(domain);
 
 
 
 
 
 
 
 
 
 
 
 
3429	}
3430
 
 
3431	return 0;
3432}
3433
3434static struct notifier_block device_nb = {
3435	.notifier_call = device_notifier,
3436};
 
 
3437
3438int __init intel_iommu_init(void)
3439{
3440	int ret = 0;
 
3441
3442	/* VT-d is required for a TXT/tboot launch, so enforce that */
3443	force_on = tboot_force_iommu();
 
 
 
 
 
3444
3445	if (dmar_table_init()) {
3446		if (force_on)
3447			panic("tboot: Failed to initialize DMAR table\n");
3448		return 	-ENODEV;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3449	}
3450
3451	if (dmar_dev_scope_init()) {
3452		if (force_on)
3453			panic("tboot: Failed to initialize DMAR device scope\n");
3454		return 	-ENODEV;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3455	}
3456
3457	/*
3458	 * Check the need for DMA-remapping initialization now.
3459	 * Above initialization will also be used by Interrupt-remapping.
3460	 */
3461	if (no_iommu || dmar_disabled)
3462		return -ENODEV;
3463
3464	if (iommu_init_mempool()) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3465		if (force_on)
3466			panic("tboot: Failed to initialize iommu memory\n");
3467		return 	-ENODEV;
3468	}
3469
3470	if (dmar_init_reserved_ranges()) {
3471		if (force_on)
3472			panic("tboot: Failed to reserve iommu ranges\n");
3473		return 	-ENODEV;
 
 
 
 
3474	}
 
 
 
 
3475
3476	init_no_remapping_devices();
 
 
 
3477
3478	ret = init_dmars();
3479	if (ret) {
3480		if (force_on)
3481			panic("tboot: Failed to initialize DMARs\n");
3482		printk(KERN_ERR "IOMMU: dmar init failed\n");
3483		put_iova_domain(&reserved_iova_list);
3484		iommu_exit_mempool();
3485		return ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3486	}
3487	printk(KERN_INFO
3488	"PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3489
3490	init_timer(&unmap_timer);
3491#ifdef CONFIG_SWIOTLB
3492	swiotlb = 0;
3493#endif
3494	dma_ops = &intel_dma_ops;
3495
3496	init_iommu_pm_ops();
 
 
 
3497
3498	register_iommu(&intel_iommu_ops);
 
 
 
 
3499
3500	bus_register_notifier(&pci_bus_type, &device_nb);
 
 
 
 
3501
3502	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3503}
3504
3505static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3506					   struct pci_dev *pdev)
3507{
3508	struct pci_dev *tmp, *parent;
 
 
 
 
3509
3510	if (!iommu || !pdev)
3511		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3512
3513	/* dependent device detach */
3514	tmp = pci_find_upstream_pcie_bridge(pdev);
3515	/* Secondary interface's bus number and devfn 0 */
3516	if (tmp) {
3517		parent = pdev->bus->self;
3518		while (parent != tmp) {
3519			iommu_detach_dev(iommu, parent->bus->number,
3520					 parent->devfn);
3521			parent = parent->bus->self;
 
 
 
 
 
3522		}
3523		if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3524			iommu_detach_dev(iommu,
3525				tmp->subordinate->number, 0);
3526		else /* this is a legacy PCI bridge */
3527			iommu_detach_dev(iommu, tmp->bus->number,
3528					 tmp->devfn);
3529	}
 
 
3530}
3531
3532static void domain_remove_one_dev_info(struct dmar_domain *domain,
3533					  struct pci_dev *pdev)
3534{
3535	struct device_domain_info *info;
3536	struct intel_iommu *iommu;
3537	unsigned long flags;
3538	int found = 0;
3539	struct list_head *entry, *tmp;
 
 
 
 
 
 
 
 
 
3540
3541	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3542				pdev->devfn);
3543	if (!iommu)
3544		return;
 
 
 
 
 
 
 
 
 
 
 
 
3545
3546	spin_lock_irqsave(&device_domain_lock, flags);
3547	list_for_each_safe(entry, tmp, &domain->devices) {
3548		info = list_entry(entry, struct device_domain_info, link);
3549		if (info->segment == pci_domain_nr(pdev->bus) &&
3550		    info->bus == pdev->bus->number &&
3551		    info->devfn == pdev->devfn) {
3552			list_del(&info->link);
3553			list_del(&info->global);
3554			if (info->dev)
3555				info->dev->dev.archdata.iommu = NULL;
3556			spin_unlock_irqrestore(&device_domain_lock, flags);
3557
3558			iommu_disable_dev_iotlb(info);
3559			iommu_detach_dev(iommu, info->bus, info->devfn);
3560			iommu_detach_dependent_devices(iommu, pdev);
3561			free_devinfo_mem(info);
3562
3563			spin_lock_irqsave(&device_domain_lock, flags);
 
 
 
 
 
 
3564
3565			if (found)
3566				break;
3567			else
3568				continue;
3569		}
 
 
3570
3571		/* if there is no other devices under the same iommu
3572		 * owned by this domain, clear this iommu in iommu_bmp
3573		 * update iommu count and coherency
3574		 */
3575		if (iommu == device_to_iommu(info->segment, info->bus,
3576					    info->devfn))
3577			found = 1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3578	}
 
3579
3580	spin_unlock_irqrestore(&device_domain_lock, flags);
 
 
 
 
3581
3582	if (found == 0) {
3583		unsigned long tmp_flags;
3584		spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3585		clear_bit(iommu->seq_id, &domain->iommu_bmp);
3586		domain->iommu_count--;
3587		domain_update_iommu_cap(domain);
3588		spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3589
3590		if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3591		    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3592			spin_lock_irqsave(&iommu->lock, tmp_flags);
3593			clear_bit(domain->id, iommu->domain_ids);
3594			iommu->domains[domain->id] = NULL;
3595			spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3596		}
3597	}
 
 
3598}
3599
3600static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3601{
3602	struct device_domain_info *info;
 
3603	struct intel_iommu *iommu;
3604	unsigned long flags1, flags2;
3605
3606	spin_lock_irqsave(&device_domain_lock, flags1);
3607	while (!list_empty(&domain->devices)) {
3608		info = list_entry(domain->devices.next,
3609			struct device_domain_info, link);
3610		list_del(&info->link);
3611		list_del(&info->global);
3612		if (info->dev)
3613			info->dev->dev.archdata.iommu = NULL;
3614
3615		spin_unlock_irqrestore(&device_domain_lock, flags1);
 
 
 
 
3616
3617		iommu_disable_dev_iotlb(info);
3618		iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3619		iommu_detach_dev(iommu, info->bus, info->devfn);
3620		iommu_detach_dependent_devices(iommu, info->dev);
 
 
 
 
 
 
 
 
 
 
3621
3622		/* clear this iommu in iommu_bmp, update iommu count
3623		 * and capabilities
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3624		 */
3625		spin_lock_irqsave(&domain->iommu_lock, flags2);
3626		if (test_and_clear_bit(iommu->seq_id,
3627				       &domain->iommu_bmp)) {
3628			domain->iommu_count--;
3629			domain_update_iommu_cap(domain);
3630		}
3631		spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3632
3633		free_devinfo_mem(info);
3634		spin_lock_irqsave(&device_domain_lock, flags1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3635	}
3636	spin_unlock_irqrestore(&device_domain_lock, flags1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3637}
3638
3639/* domain id for virtual machine, it won't be set in context */
3640static unsigned long vm_domid;
 
 
 
 
 
3641
3642static struct dmar_domain *iommu_alloc_vm_domain(void)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3643{
3644	struct dmar_domain *domain;
 
 
3645
3646	domain = alloc_domain_mem();
3647	if (!domain)
3648		return NULL;
 
3649
3650	domain->id = vm_domid++;
3651	domain->nid = -1;
3652	memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3653	domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3654
3655	return domain;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3656}
3657
3658static int md_domain_init(struct dmar_domain *domain, int guest_width)
3659{
3660	int adjust_width;
3661
3662	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3663	spin_lock_init(&domain->iommu_lock);
3664
3665	domain_reserve_special_ranges(domain);
3666
3667	/* calculate AGAW */
3668	domain->gaw = guest_width;
3669	adjust_width = guestwidth_to_adjustwidth(guest_width);
3670	domain->agaw = width_to_agaw(adjust_width);
3671
3672	INIT_LIST_HEAD(&domain->devices);
3673
3674	domain->iommu_count = 0;
3675	domain->iommu_coherency = 0;
3676	domain->iommu_snooping = 0;
3677	domain->iommu_superpage = 0;
3678	domain->max_addr = 0;
3679	domain->nid = -1;
3680
3681	/* always allocate the top pgd */
3682	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3683	if (!domain->pgd)
3684		return -ENOMEM;
3685	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3686	return 0;
3687}
3688
3689static void iommu_free_vm_domain(struct dmar_domain *domain)
3690{
3691	unsigned long flags;
3692	struct dmar_drhd_unit *drhd;
3693	struct intel_iommu *iommu;
3694	unsigned long i;
3695	unsigned long ndomains;
3696
3697	for_each_drhd_unit(drhd) {
3698		if (drhd->ignored)
3699			continue;
3700		iommu = drhd->iommu;
 
 
 
 
 
 
 
 
 
 
3701
3702		ndomains = cap_ndoms(iommu->cap);
3703		for_each_set_bit(i, iommu->domain_ids, ndomains) {
3704			if (iommu->domains[i] == domain) {
3705				spin_lock_irqsave(&iommu->lock, flags);
3706				clear_bit(i, iommu->domain_ids);
3707				iommu->domains[i] = NULL;
3708				spin_unlock_irqrestore(&iommu->lock, flags);
3709				break;
3710			}
3711		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3712	}
 
 
3713}
3714
3715static void vm_domain_exit(struct dmar_domain *domain)
3716{
3717	/* Domain 0 is reserved, so dont process it */
3718	if (!domain)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3719		return;
3720
3721	vm_domain_remove_all_dev_info(domain);
3722	/* destroy iovas */
3723	put_iova_domain(&domain->iovad);
3724
3725	/* clear ptes */
3726	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
 
 
3727
3728	/* free page tables */
3729	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
 
3730
3731	iommu_free_vm_domain(domain);
3732	free_domain_mem(domain);
 
 
 
3733}
3734
3735static int intel_iommu_domain_init(struct iommu_domain *domain)
 
3736{
3737	struct dmar_domain *dmar_domain;
 
 
 
3738
3739	dmar_domain = iommu_alloc_vm_domain();
3740	if (!dmar_domain) {
3741		printk(KERN_ERR
3742			"intel_iommu_domain_init: dmar_domain == NULL\n");
3743		return -ENOMEM;
3744	}
3745	if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3746		printk(KERN_ERR
3747			"intel_iommu_domain_init() failed\n");
3748		vm_domain_exit(dmar_domain);
3749		return -ENOMEM;
 
 
 
 
3750	}
3751	domain_update_iommu_cap(dmar_domain);
3752	domain->priv = dmar_domain;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3753
3754	return 0;
 
 
 
 
 
 
 
 
 
 
3755}
3756
3757static void intel_iommu_domain_destroy(struct iommu_domain *domain)
 
3758{
3759	struct dmar_domain *dmar_domain = domain->priv;
 
 
 
 
 
 
 
 
 
 
 
3760
3761	domain->priv = NULL;
3762	vm_domain_exit(dmar_domain);
 
 
 
 
3763}
3764
3765static int intel_iommu_attach_device(struct iommu_domain *domain,
3766				     struct device *dev)
3767{
3768	struct dmar_domain *dmar_domain = domain->priv;
3769	struct pci_dev *pdev = to_pci_dev(dev);
3770	struct intel_iommu *iommu;
3771	int addr_width;
 
3772
3773	/* normally pdev is not mapped */
3774	if (unlikely(domain_context_mapped(pdev))) {
3775		struct dmar_domain *old_domain;
3776
3777		old_domain = find_domain(pdev);
3778		if (old_domain) {
3779			if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3780			    dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3781				domain_remove_one_dev_info(old_domain, pdev);
3782			else
3783				domain_remove_dev_info(old_domain);
3784		}
3785	}
3786
3787	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3788				pdev->devfn);
3789	if (!iommu)
3790		return -ENODEV;
3791
3792	/* check if this iommu agaw is sufficient for max mapped address */
3793	addr_width = agaw_to_width(iommu->agaw);
3794	if (addr_width > cap_mgaw(iommu->cap))
3795		addr_width = cap_mgaw(iommu->cap);
3796
3797	if (dmar_domain->max_addr > (1LL << addr_width)) {
3798		printk(KERN_ERR "%s: iommu width (%d) is not "
3799		       "sufficient for the mapped address (%llx)\n",
3800		       __func__, addr_width, dmar_domain->max_addr);
3801		return -EFAULT;
3802	}
3803	dmar_domain->gaw = addr_width;
3804
3805	/*
3806	 * Knock out extra levels of page tables if necessary
3807	 */
3808	while (iommu->agaw < dmar_domain->agaw) {
3809		struct dma_pte *pte;
3810
3811		pte = dmar_domain->pgd;
3812		if (dma_pte_present(pte)) {
3813			dmar_domain->pgd = (struct dma_pte *)
3814				phys_to_virt(dma_pte_addr(pte));
3815			free_pgtable_page(pte);
3816		}
3817		dmar_domain->agaw--;
3818	}
3819
3820	return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3821}
3822
3823static void intel_iommu_detach_device(struct iommu_domain *domain,
3824				      struct device *dev)
3825{
3826	struct dmar_domain *dmar_domain = domain->priv;
3827	struct pci_dev *pdev = to_pci_dev(dev);
3828
3829	domain_remove_one_dev_info(dmar_domain, pdev);
 
 
 
3830}
3831
3832static int intel_iommu_map(struct iommu_domain *domain,
3833			   unsigned long iova, phys_addr_t hpa,
3834			   int gfp_order, int iommu_prot)
3835{
3836	struct dmar_domain *dmar_domain = domain->priv;
3837	u64 max_addr;
3838	int prot = 0;
3839	size_t size;
3840	int ret;
3841
 
 
 
3842	if (iommu_prot & IOMMU_READ)
3843		prot |= DMA_PTE_READ;
3844	if (iommu_prot & IOMMU_WRITE)
3845		prot |= DMA_PTE_WRITE;
3846	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3847		prot |= DMA_PTE_SNP;
3848
3849	size     = PAGE_SIZE << gfp_order;
3850	max_addr = iova + size;
3851	if (dmar_domain->max_addr < max_addr) {
3852		u64 end;
3853
3854		/* check if minimum agaw is sufficient for mapped address */
3855		end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
3856		if (end < max_addr) {
3857			printk(KERN_ERR "%s: iommu width (%d) is not "
3858			       "sufficient for the mapped address (%llx)\n",
3859			       __func__, dmar_domain->gaw, max_addr);
3860			return -EFAULT;
3861		}
3862		dmar_domain->max_addr = max_addr;
3863	}
3864	/* Round up size to next multiple of PAGE_SIZE, if it and
3865	   the low bits of hpa would take us onto the next page */
3866	size = aligned_nrpages(hpa, size);
3867	ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3868				 hpa >> VTD_PAGE_SHIFT, size, prot);
3869	return ret;
3870}
3871
3872static int intel_iommu_unmap(struct iommu_domain *domain,
3873			     unsigned long iova, int gfp_order)
 
3874{
3875	struct dmar_domain *dmar_domain = domain->priv;
3876	size_t size = PAGE_SIZE << gfp_order;
3877	int order;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3878
3879	order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3880			    (iova + size - 1) >> VTD_PAGE_SHIFT);
 
 
 
 
 
3881
3882	if (dmar_domain->max_addr == iova + size)
3883		dmar_domain->max_addr = iova;
3884
3885	return order;
3886}
3887
3888static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3889					    unsigned long iova)
3890{
3891	struct dmar_domain *dmar_domain = domain->priv;
3892	struct dma_pte *pte;
 
3893	u64 phys = 0;
3894
3895	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
 
 
 
3896	if (pte)
3897		phys = dma_pte_addr(pte);
3898
3899	return phys;
3900}
3901
3902static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3903				      unsigned long cap)
3904{
3905	struct dmar_domain *dmar_domain = domain->priv;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3906
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3907	if (cap == IOMMU_CAP_CACHE_COHERENCY)
3908		return dmar_domain->iommu_snooping;
3909	if (cap == IOMMU_CAP_INTR_REMAP)
3910		return intr_remapping_enabled;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3911
3912	return 0;
3913}
3914
3915static struct iommu_ops intel_iommu_ops = {
3916	.domain_init	= intel_iommu_domain_init,
3917	.domain_destroy = intel_iommu_domain_destroy,
3918	.attach_dev	= intel_iommu_attach_device,
3919	.detach_dev	= intel_iommu_detach_device,
3920	.map		= intel_iommu_map,
3921	.unmap		= intel_iommu_unmap,
3922	.iova_to_phys	= intel_iommu_iova_to_phys,
3923	.domain_has_cap = intel_iommu_domain_has_cap,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3924};
3925
3926static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3927{
3928	/*
3929	 * Mobile 4 Series Chipset neglects to set RWBF capability,
3930	 * but needs it:
3931	 */
3932	printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3933	rwbf_quirk = 1;
3934
3935	/* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
3936	if (dev->revision == 0x07) {
3937		printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
3938		dmar_map_gfx = 0;
3939	}
3940}
3941
3942DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
 
 
 
 
 
 
3943
3944#define GGC 0x52
3945#define GGC_MEMORY_SIZE_MASK	(0xf << 8)
3946#define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
3947#define GGC_MEMORY_SIZE_1M	(0x1 << 8)
3948#define GGC_MEMORY_SIZE_2M	(0x3 << 8)
3949#define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
3950#define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
3951#define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
3952#define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
3953
3954static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
3955{
3956	unsigned short ggc;
3957
3958	if (pci_read_config_word(dev, GGC, &ggc))
3959		return;
3960
3961	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
3962		printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
3963		dmar_map_gfx = 0;
3964	} else if (dmar_map_gfx) {
3965		/* we have to ensure the gfx device is idle before we flush */
3966		printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
3967		intel_iommu_strict = 1;
3968       }
3969}
3970DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
3971DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
3972DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
3973DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
3974
3975/* On Tylersburg chipsets, some BIOSes have been known to enable the
3976   ISOCH DMAR unit for the Azalia sound device, but not give it any
3977   TLB entries, which causes it to deadlock. Check for that.  We do
3978   this in a function called from init_dmars(), instead of in a PCI
3979   quirk, because we don't want to print the obnoxious "BIOS broken"
3980   message if VT-d is actually disabled.
3981*/
3982static void __init check_tylersburg_isoch(void)
3983{
3984	struct pci_dev *pdev;
3985	uint32_t vtisochctrl;
3986
3987	/* If there's no Azalia in the system anyway, forget it. */
3988	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3989	if (!pdev)
3990		return;
3991	pci_dev_put(pdev);
3992
3993	/* System Management Registers. Might be hidden, in which case
3994	   we can't do the sanity check. But that's OK, because the
3995	   known-broken BIOSes _don't_ actually hide it, so far. */
3996	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3997	if (!pdev)
3998		return;
3999
4000	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4001		pci_dev_put(pdev);
4002		return;
4003	}
4004
4005	pci_dev_put(pdev);
4006
4007	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4008	if (vtisochctrl & 1)
4009		return;
4010
4011	/* Drop all bits other than the number of TLB entries */
4012	vtisochctrl &= 0x1c;
4013
4014	/* If we have the recommended number of TLB entries (16), fine. */
4015	if (vtisochctrl == 0x10)
4016		return;
4017
4018	/* Zero TLB entries? You get to ride the short bus to school. */
4019	if (!vtisochctrl) {
4020		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4021		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4022		     dmi_get_system_info(DMI_BIOS_VENDOR),
4023		     dmi_get_system_info(DMI_BIOS_VERSION),
4024		     dmi_get_system_info(DMI_PRODUCT_VERSION));
4025		iommu_identity_mapping |= IDENTMAP_AZALIA;
4026		return;
4027	}
4028	
4029	printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4030	       vtisochctrl);
4031}
v5.4
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright © 2006-2014 Intel Corporation.
   4 *
   5 * Authors: David Woodhouse <dwmw2@infradead.org>,
   6 *          Ashok Raj <ashok.raj@intel.com>,
   7 *          Shaohua Li <shaohua.li@intel.com>,
   8 *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
   9 *          Fenghua Yu <fenghua.yu@intel.com>
  10 *          Joerg Roedel <jroedel@suse.de>
 
 
 
 
 
 
 
 
 
 
 
 
  11 */
  12
  13#define pr_fmt(fmt)     "DMAR: " fmt
  14#define dev_fmt(fmt)    pr_fmt(fmt)
  15
  16#include <linux/init.h>
  17#include <linux/bitmap.h>
  18#include <linux/debugfs.h>
  19#include <linux/export.h>
  20#include <linux/slab.h>
  21#include <linux/irq.h>
  22#include <linux/interrupt.h>
  23#include <linux/spinlock.h>
  24#include <linux/pci.h>
  25#include <linux/dmar.h>
  26#include <linux/dma-mapping.h>
  27#include <linux/mempool.h>
  28#include <linux/memory.h>
  29#include <linux/cpu.h>
  30#include <linux/timer.h>
  31#include <linux/io.h>
  32#include <linux/iova.h>
  33#include <linux/iommu.h>
  34#include <linux/intel-iommu.h>
  35#include <linux/syscore_ops.h>
  36#include <linux/tboot.h>
  37#include <linux/dmi.h>
  38#include <linux/pci-ats.h>
  39#include <linux/memblock.h>
  40#include <linux/dma-contiguous.h>
  41#include <linux/dma-direct.h>
  42#include <linux/crash_dump.h>
  43#include <linux/numa.h>
  44#include <linux/swiotlb.h>
  45#include <asm/irq_remapping.h>
  46#include <asm/cacheflush.h>
  47#include <asm/iommu.h>
  48#include <trace/events/intel_iommu.h>
  49
  50#include "irq_remapping.h"
  51#include "intel-pasid.h"
  52
  53#define ROOT_SIZE		VTD_PAGE_SIZE
  54#define CONTEXT_SIZE		VTD_PAGE_SIZE
  55
 
 
  56#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
  57#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
  58#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
  59#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
  60
  61#define IOAPIC_RANGE_START	(0xfee00000)
  62#define IOAPIC_RANGE_END	(0xfeefffff)
  63#define IOVA_START_ADDR		(0x1000)
  64
  65#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
  66
  67#define MAX_AGAW_WIDTH 64
  68#define MAX_AGAW_PFN_WIDTH	(MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
  69
  70#define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
  71#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
  72
  73/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
  74   to match. That way, we can use 'unsigned long' for PFNs with impunity. */
  75#define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
  76				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
  77#define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
  78
  79/* IO virtual address start page frame number */
  80#define IOVA_START_PFN		(1)
  81
  82#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
 
 
  83
  84/* page table handling */
  85#define LEVEL_STRIDE		(9)
  86#define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
  87
  88/*
  89 * This bitmap is used to advertise the page sizes our hardware support
  90 * to the IOMMU core, which will then use this information to split
  91 * physically contiguous memory regions it is mapping into page sizes
  92 * that we support.
  93 *
  94 * Traditionally the IOMMU core just handed us the mappings directly,
  95 * after making sure the size is an order of a 4KiB page and that the
  96 * mapping has natural alignment.
  97 *
  98 * To retain this behavior, we currently advertise that we support
  99 * all page sizes that are an order of 4KiB.
 100 *
 101 * If at some point we'd like to utilize the IOMMU core's new behavior,
 102 * we could change this to advertise the real page sizes we support.
 103 */
 104#define INTEL_IOMMU_PGSIZES	(~0xFFFUL)
 105
 106static inline int agaw_to_level(int agaw)
 107{
 108	return agaw + 2;
 109}
 110
 111static inline int agaw_to_width(int agaw)
 112{
 113	return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
 114}
 115
 116static inline int width_to_agaw(int width)
 117{
 118	return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
 119}
 120
 121static inline unsigned int level_to_offset_bits(int level)
 122{
 123	return (level - 1) * LEVEL_STRIDE;
 124}
 125
 126static inline int pfn_level_offset(unsigned long pfn, int level)
 127{
 128	return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
 129}
 130
 131static inline unsigned long level_mask(int level)
 132{
 133	return -1UL << level_to_offset_bits(level);
 134}
 135
 136static inline unsigned long level_size(int level)
 137{
 138	return 1UL << level_to_offset_bits(level);
 139}
 140
 141static inline unsigned long align_to_level(unsigned long pfn, int level)
 142{
 143	return (pfn + level_size(level) - 1) & level_mask(level);
 144}
 145
 146static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
 147{
 148	return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
 149}
 150
 151/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
 152   are never going to work. */
 153static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
 154{
 155	return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
 156}
 157
 158static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
 159{
 160	return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
 161}
 162static inline unsigned long page_to_dma_pfn(struct page *pg)
 163{
 164	return mm_to_dma_pfn(page_to_pfn(pg));
 165}
 166static inline unsigned long virt_to_dma_pfn(void *p)
 167{
 168	return page_to_dma_pfn(virt_to_page(p));
 169}
 170
 171/* global iommu list, set NULL for ignored DMAR units */
 172static struct intel_iommu **g_iommus;
 173
 174static void __init check_tylersburg_isoch(void);
 175static int rwbf_quirk;
 176
 177/*
 178 * set to 1 to panic kernel if can't successfully enable VT-d
 179 * (used when kernel is launched w/ TXT)
 180 */
 181static int force_on = 0;
 182int intel_iommu_tboot_noforce;
 183static int no_platform_optin;
 184
 
 
 
 
 
 
 
 
 
 
 185#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
 186
 187/*
 188 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
 189 * if marked present.
 190 */
 191static phys_addr_t root_entry_lctp(struct root_entry *re)
 192{
 193	if (!(re->lo & 1))
 194		return 0;
 195
 196	return re->lo & VTD_PAGE_MASK;
 197}
 198
 199/*
 200 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
 201 * if marked present.
 202 */
 203static phys_addr_t root_entry_uctp(struct root_entry *re)
 204{
 205	if (!(re->hi & 1))
 206		return 0;
 207
 208	return re->hi & VTD_PAGE_MASK;
 209}
 210
 211static inline void context_clear_pasid_enable(struct context_entry *context)
 212{
 213	context->lo &= ~(1ULL << 11);
 214}
 215
 216static inline bool context_pasid_enabled(struct context_entry *context)
 217{
 218	return !!(context->lo & (1ULL << 11));
 219}
 220
 221static inline void context_set_copied(struct context_entry *context)
 
 222{
 223	context->hi |= (1ull << 3);
 
 
 
 224}
 225
 226static inline bool context_copied(struct context_entry *context)
 227{
 228	return !!(context->hi & (1ULL << 3));
 229}
 
 
 
 
 
 
 
 
 
 
 
 230
 231static inline bool __context_present(struct context_entry *context)
 232{
 233	return (context->lo & 1);
 234}
 235
 236bool context_present(struct context_entry *context)
 237{
 238	return context_pasid_enabled(context) ?
 239	     __context_present(context) :
 240	     __context_present(context) && !context_copied(context);
 241}
 242
 243static inline void context_set_present(struct context_entry *context)
 244{
 245	context->lo |= 1;
 246}
 247
 248static inline void context_set_fault_enable(struct context_entry *context)
 249{
 250	context->lo &= (((u64)-1) << 2) | 1;
 251}
 252
 253static inline void context_set_translation_type(struct context_entry *context,
 254						unsigned long value)
 255{
 256	context->lo &= (((u64)-1) << 4) | 3;
 257	context->lo |= (value & 3) << 2;
 258}
 259
 260static inline void context_set_address_root(struct context_entry *context,
 261					    unsigned long value)
 262{
 263	context->lo &= ~VTD_PAGE_MASK;
 264	context->lo |= value & VTD_PAGE_MASK;
 265}
 266
 267static inline void context_set_address_width(struct context_entry *context,
 268					     unsigned long value)
 269{
 270	context->hi |= value & 7;
 271}
 272
 273static inline void context_set_domain_id(struct context_entry *context,
 274					 unsigned long value)
 275{
 276	context->hi |= (value & ((1 << 16) - 1)) << 8;
 277}
 278
 279static inline int context_domain_id(struct context_entry *c)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 280{
 281	return((c->hi >> 8) & 0xffff);
 282}
 283
 284static inline void context_clear_entry(struct context_entry *context)
 
 
 
 
 
 285{
 286	context->lo = 0;
 287	context->hi = 0;
 288}
 289
 290/*
 291 * This domain is a statically identity mapping domain.
 292 *	1. This domain creats a static 1:1 mapping to all usable memory.
 293 * 	2. It maps to each iommu if successful.
 294 *	3. Each iommu mapps to this domain if successful.
 295 */
 296static struct dmar_domain *si_domain;
 297static int hw_pass_through = 1;
 298
 299/* si_domain contains mulitple devices */
 300#define DOMAIN_FLAG_STATIC_IDENTITY		BIT(0)
 301
 302/*
 303 * This is a DMA domain allocated through the iommu domain allocation
 304 * interface. But one or more devices belonging to this domain have
 305 * been chosen to use a private domain. We should avoid to use the
 306 * map/unmap/iova_to_phys APIs on it.
 307 */
 308#define DOMAIN_FLAG_LOSE_CHILDREN		BIT(1)
 309
 310#define for_each_domain_iommu(idx, domain)			\
 311	for (idx = 0; idx < g_num_of_iommus; idx++)		\
 312		if (domain->iommu_refcnt[idx])
 313
 314struct dmar_rmrr_unit {
 315	struct list_head list;		/* list of rmrr units	*/
 316	struct acpi_dmar_header *hdr;	/* ACPI header		*/
 317	u64	base_address;		/* reserved base address*/
 318	u64	end_address;		/* reserved end address */
 319	struct dmar_dev_scope *devices;	/* target devices */
 320	int	devices_cnt;		/* target device count */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 321};
 322
 323struct dmar_atsr_unit {
 324	struct list_head list;		/* list of ATSR units */
 325	struct acpi_dmar_header *hdr;	/* ACPI header */
 326	struct dmar_dev_scope *devices;	/* target devices */
 327	int devices_cnt;		/* target device count */
 328	u8 include_all:1;		/* include all ports */
 
 
 
 
 329};
 330
 331static LIST_HEAD(dmar_atsr_units);
 332static LIST_HEAD(dmar_rmrr_units);
 333
 334#define for_each_rmrr_units(rmrr) \
 335	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
 
 
 
 
 
 
 
 
 336
 337/* bitmap for indexing intel_iommus */
 338static int g_num_of_iommus;
 339
 340static void domain_exit(struct dmar_domain *domain);
 
 
 
 
 
 341static void domain_remove_dev_info(struct dmar_domain *domain);
 342static void dmar_remove_one_dev_info(struct device *dev);
 343static void __dmar_remove_one_dev_info(struct device_domain_info *info);
 344static void domain_context_clear(struct intel_iommu *iommu,
 345				 struct device *dev);
 346static int domain_detach_iommu(struct dmar_domain *domain,
 347			       struct intel_iommu *iommu);
 348static bool device_is_rmrr_locked(struct device *dev);
 349static int intel_iommu_attach_device(struct iommu_domain *domain,
 350				     struct device *dev);
 351static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
 352					    dma_addr_t iova);
 353
 354#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
 355int dmar_disabled = 0;
 356#else
 357int dmar_disabled = 1;
 358#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
 359
 360int intel_iommu_sm;
 361int intel_iommu_enabled = 0;
 362EXPORT_SYMBOL_GPL(intel_iommu_enabled);
 363
 364static int dmar_map_gfx = 1;
 365static int dmar_forcedac;
 366static int intel_iommu_strict;
 367static int intel_iommu_superpage = 1;
 368static int iommu_identity_mapping;
 369static int intel_no_bounce;
 370
 371#define IDENTMAP_ALL		1
 372#define IDENTMAP_GFX		2
 373#define IDENTMAP_AZALIA		4
 374
 375int intel_iommu_gfx_mapped;
 376EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
 377
 378#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 379#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
 380static DEFINE_SPINLOCK(device_domain_lock);
 381static LIST_HEAD(device_domain_list);
 382
 383#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) &&	\
 384				to_pci_dev(d)->untrusted)
 385
 386/*
 387 * Iterate over elements in device_domain_list and call the specified
 388 * callback @fn against each element.
 389 */
 390int for_each_device_domain(int (*fn)(struct device_domain_info *info,
 391				     void *data), void *data)
 392{
 393	int ret = 0;
 394	unsigned long flags;
 395	struct device_domain_info *info;
 396
 397	spin_lock_irqsave(&device_domain_lock, flags);
 398	list_for_each_entry(info, &device_domain_list, global) {
 399		ret = fn(info, data);
 400		if (ret) {
 401			spin_unlock_irqrestore(&device_domain_lock, flags);
 402			return ret;
 403		}
 404	}
 405	spin_unlock_irqrestore(&device_domain_lock, flags);
 406
 407	return 0;
 408}
 409
 410const struct iommu_ops intel_iommu_ops;
 411
 412static bool translation_pre_enabled(struct intel_iommu *iommu)
 413{
 414	return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
 415}
 416
 417static void clear_translation_pre_enabled(struct intel_iommu *iommu)
 418{
 419	iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
 420}
 421
 422static void init_translation_status(struct intel_iommu *iommu)
 423{
 424	u32 gsts;
 425
 426	gsts = readl(iommu->reg + DMAR_GSTS_REG);
 427	if (gsts & DMA_GSTS_TES)
 428		iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
 429}
 430
 431/* Convert generic 'struct iommu_domain to private struct dmar_domain */
 432static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
 433{
 434	return container_of(dom, struct dmar_domain, domain);
 435}
 436
 437static int __init intel_iommu_setup(char *str)
 438{
 439	if (!str)
 440		return -EINVAL;
 441	while (*str) {
 442		if (!strncmp(str, "on", 2)) {
 443			dmar_disabled = 0;
 444			pr_info("IOMMU enabled\n");
 445		} else if (!strncmp(str, "off", 3)) {
 446			dmar_disabled = 1;
 447			no_platform_optin = 1;
 448			pr_info("IOMMU disabled\n");
 449		} else if (!strncmp(str, "igfx_off", 8)) {
 450			dmar_map_gfx = 0;
 451			pr_info("Disable GFX device mapping\n");
 
 452		} else if (!strncmp(str, "forcedac", 8)) {
 453			pr_info("Forcing DAC for PCI devices\n");
 
 454			dmar_forcedac = 1;
 455		} else if (!strncmp(str, "strict", 6)) {
 456			pr_info("Disable batched IOTLB flush\n");
 
 457			intel_iommu_strict = 1;
 458		} else if (!strncmp(str, "sp_off", 6)) {
 459			pr_info("Disable supported super page\n");
 
 460			intel_iommu_superpage = 0;
 461		} else if (!strncmp(str, "sm_on", 5)) {
 462			pr_info("Intel-IOMMU: scalable mode supported\n");
 463			intel_iommu_sm = 1;
 464		} else if (!strncmp(str, "tboot_noforce", 13)) {
 465			printk(KERN_INFO
 466				"Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
 467			intel_iommu_tboot_noforce = 1;
 468		} else if (!strncmp(str, "nobounce", 8)) {
 469			pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
 470			intel_no_bounce = 1;
 471		}
 472
 473		str += strcspn(str, ",");
 474		while (*str == ',')
 475			str++;
 476	}
 477	return 0;
 478}
 479__setup("intel_iommu=", intel_iommu_setup);
 480
 481static struct kmem_cache *iommu_domain_cache;
 482static struct kmem_cache *iommu_devinfo_cache;
 
 483
 484static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
 485{
 486	struct dmar_domain **domains;
 487	int idx = did >> 8;
 488
 489	domains = iommu->domains[idx];
 490	if (!domains)
 491		return NULL;
 492
 493	return domains[did & 0xff];
 494}
 495
 496static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
 497			     struct dmar_domain *domain)
 498{
 499	struct dmar_domain **domains;
 500	int idx = did >> 8;
 501
 502	if (!iommu->domains[idx]) {
 503		size_t size = 256 * sizeof(struct dmar_domain *);
 504		iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
 505	}
 506
 507	domains = iommu->domains[idx];
 508	if (WARN_ON(!domains))
 509		return;
 510	else
 511		domains[did & 0xff] = domain;
 512}
 513
 514void *alloc_pgtable_page(int node)
 515{
 516	struct page *page;
 517	void *vaddr = NULL;
 518
 519	page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
 520	if (page)
 521		vaddr = page_address(page);
 522	return vaddr;
 523}
 524
 525void free_pgtable_page(void *vaddr)
 526{
 527	free_page((unsigned long)vaddr);
 528}
 529
 530static inline void *alloc_domain_mem(void)
 531{
 532	return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
 533}
 534
 535static void free_domain_mem(void *vaddr)
 536{
 537	kmem_cache_free(iommu_domain_cache, vaddr);
 538}
 539
 540static inline void * alloc_devinfo_mem(void)
 541{
 542	return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
 543}
 544
 545static inline void free_devinfo_mem(void *vaddr)
 546{
 547	kmem_cache_free(iommu_devinfo_cache, vaddr);
 548}
 549
 550static inline int domain_type_is_si(struct dmar_domain *domain)
 551{
 552	return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
 553}
 554
 555static inline int domain_pfn_supported(struct dmar_domain *domain,
 556				       unsigned long pfn)
 557{
 558	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
 
 559
 560	return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
 561}
 562
 563static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
 564{
 565	unsigned long sagaw;
 566	int agaw = -1;
 567
 568	sagaw = cap_sagaw(iommu->cap);
 569	for (agaw = width_to_agaw(max_gaw);
 570	     agaw >= 0; agaw--) {
 571		if (test_bit(agaw, &sagaw))
 572			break;
 573	}
 574
 575	return agaw;
 576}
 577
 578/*
 579 * Calculate max SAGAW for each iommu.
 580 */
 581int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
 582{
 583	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
 584}
 585
 586/*
 587 * calculate agaw for each iommu.
 588 * "SAGAW" may be different across iommus, use a default agaw, and
 589 * get a supported less agaw for iommus that don't support the default agaw.
 590 */
 591int iommu_calculate_agaw(struct intel_iommu *iommu)
 592{
 593	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
 594}
 595
 596/* This functionin only returns single iommu in a domain */
 597struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 598{
 599	int iommu_id;
 600
 601	/* si_domain and vm domain should not get here. */
 602	if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
 603		return NULL;
 604
 605	for_each_domain_iommu(iommu_id, domain)
 606		break;
 607
 
 608	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
 609		return NULL;
 610
 611	return g_iommus[iommu_id];
 612}
 613
 614static void domain_update_iommu_coherency(struct dmar_domain *domain)
 615{
 616	struct dmar_drhd_unit *drhd;
 617	struct intel_iommu *iommu;
 618	bool found = false;
 619	int i;
 620
 621	domain->iommu_coherency = 1;
 622
 623	for_each_domain_iommu(i, domain) {
 624		found = true;
 625		if (!ecap_coherent(g_iommus[i]->ecap)) {
 626			domain->iommu_coherency = 0;
 627			break;
 628		}
 629	}
 630	if (found)
 631		return;
 632
 633	/* No hardware attached; use lowest common denominator */
 634	rcu_read_lock();
 635	for_each_active_iommu(iommu, drhd) {
 636		if (!ecap_coherent(iommu->ecap)) {
 637			domain->iommu_coherency = 0;
 638			break;
 639		}
 640	}
 641	rcu_read_unlock();
 642}
 643
 644static int domain_update_iommu_snooping(struct intel_iommu *skip)
 645{
 646	struct dmar_drhd_unit *drhd;
 647	struct intel_iommu *iommu;
 648	int ret = 1;
 649
 650	rcu_read_lock();
 651	for_each_active_iommu(iommu, drhd) {
 652		if (iommu != skip) {
 653			if (!ecap_sc_support(iommu->ecap)) {
 654				ret = 0;
 655				break;
 656			}
 657		}
 658	}
 659	rcu_read_unlock();
 660
 661	return ret;
 662}
 663
 664static int domain_update_iommu_superpage(struct intel_iommu *skip)
 665{
 666	struct dmar_drhd_unit *drhd;
 667	struct intel_iommu *iommu;
 668	int mask = 0xf;
 669
 670	if (!intel_iommu_superpage) {
 671		return 0;
 
 672	}
 673
 674	/* set iommu_superpage to the smallest common denominator */
 675	rcu_read_lock();
 676	for_each_active_iommu(iommu, drhd) {
 677		if (iommu != skip) {
 678			mask &= cap_super_page_val(iommu->cap);
 679			if (!mask)
 680				break;
 681		}
 682	}
 683	rcu_read_unlock();
 684
 685	return fls(mask);
 686}
 687
 688/* Some capabilities may be different across iommus */
 689static void domain_update_iommu_cap(struct dmar_domain *domain)
 690{
 691	domain_update_iommu_coherency(domain);
 692	domain->iommu_snooping = domain_update_iommu_snooping(NULL);
 693	domain->iommu_superpage = domain_update_iommu_superpage(NULL);
 694}
 695
 696struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
 697					 u8 devfn, int alloc)
 698{
 699	struct root_entry *root = &iommu->root_entry[bus];
 700	struct context_entry *context;
 701	u64 *entry;
 
 
 
 
 
 702
 703	entry = &root->lo;
 704	if (sm_supported(iommu)) {
 705		if (devfn >= 0x80) {
 706			devfn -= 0x80;
 707			entry = &root->hi;
 
 
 
 
 
 708		}
 709		devfn *= 2;
 710	}
 711	if (*entry & 1)
 712		context = phys_to_virt(*entry & VTD_PAGE_MASK);
 713	else {
 714		unsigned long phy_addr;
 715		if (!alloc)
 716			return NULL;
 717
 718		context = alloc_pgtable_page(iommu->node);
 719		if (!context)
 720			return NULL;
 721
 722		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
 723		phy_addr = virt_to_phys((void *)context);
 724		*entry = phy_addr | 1;
 725		__iommu_flush_cache(iommu, entry, sizeof(*entry));
 726	}
 727	return &context[devfn];
 728}
 729
 730static int iommu_dummy(struct device *dev)
 731{
 732	return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
 733}
 734
 735/**
 736 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
 737 *				 sub-hierarchy of a candidate PCI-PCI bridge
 738 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
 739 * @bridge: the candidate PCI-PCI bridge
 740 *
 741 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
 742 */
 743static bool
 744is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
 745{
 746	struct pci_dev *pdev, *pbridge;
 747
 748	if (!dev_is_pci(dev) || !dev_is_pci(bridge))
 749		return false;
 750
 751	pdev = to_pci_dev(dev);
 752	pbridge = to_pci_dev(bridge);
 753
 754	if (pbridge->subordinate &&
 755	    pbridge->subordinate->number <= pdev->bus->number &&
 756	    pbridge->subordinate->busn_res.end >= pdev->bus->number)
 757		return true;
 758
 759	return false;
 760}
 761
 762static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
 
 
 763{
 764	struct dmar_drhd_unit *drhd = NULL;
 765	struct intel_iommu *iommu;
 766	struct device *tmp;
 767	struct pci_dev *pdev = NULL;
 768	u16 segment = 0;
 769	int i;
 770
 771	if (iommu_dummy(dev))
 772		return NULL;
 773
 774	if (dev_is_pci(dev)) {
 775		struct pci_dev *pf_pdev;
 776
 777		pdev = to_pci_dev(dev);
 778
 779#ifdef CONFIG_X86
 780		/* VMD child devices currently cannot be handled individually */
 781		if (is_vmd(pdev->bus))
 782			return NULL;
 783#endif
 784
 785		/* VFs aren't listed in scope tables; we need to look up
 786		 * the PF instead to find the IOMMU. */
 787		pf_pdev = pci_physfn(pdev);
 788		dev = &pf_pdev->dev;
 789		segment = pci_domain_nr(pdev->bus);
 790	} else if (has_acpi_companion(dev))
 791		dev = &ACPI_COMPANION(dev)->dev;
 792
 793	rcu_read_lock();
 794	for_each_active_iommu(iommu, drhd) {
 795		if (pdev && segment != drhd->segment)
 796			continue;
 797
 798		for_each_active_dev_scope(drhd->devices,
 799					  drhd->devices_cnt, i, tmp) {
 800			if (tmp == dev) {
 801				/* For a VF use its original BDF# not that of the PF
 802				 * which we used for the IOMMU lookup. Strictly speaking
 803				 * we could do this for all PCI devices; we only need to
 804				 * get the BDF# from the scope table for ACPI matches. */
 805				if (pdev && pdev->is_virtfn)
 806					goto got_pdev;
 807
 808				*bus = drhd->devices[i].bus;
 809				*devfn = drhd->devices[i].devfn;
 810				goto out;
 811			}
 812
 813			if (is_downstream_to_pci_bridge(dev, tmp))
 814				goto got_pdev;
 815		}
 816
 817		if (pdev && drhd->include_all) {
 818		got_pdev:
 819			*bus = pdev->bus->number;
 820			*devfn = pdev->devfn;
 821			goto out;
 822		}
 
 
 
 
 
 823	}
 824	iommu = NULL;
 825 out:
 826	rcu_read_unlock();
 827
 828	return iommu;
 829}
 830
 831static void domain_flush_cache(struct dmar_domain *domain,
 832			       void *addr, int size)
 833{
 834	if (!domain->iommu_coherency)
 835		clflush_cache_range(addr, size);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 836}
 837
 838static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
 839{
 
 840	struct context_entry *context;
 841	int ret = 0;
 842	unsigned long flags;
 843
 844	spin_lock_irqsave(&iommu->lock, flags);
 845	context = iommu_context_addr(iommu, bus, devfn, 0);
 846	if (context)
 847		ret = context_present(context);
 
 
 
 
 848	spin_unlock_irqrestore(&iommu->lock, flags);
 849	return ret;
 850}
 851
 852static void free_context_table(struct intel_iommu *iommu)
 853{
 
 854	int i;
 855	unsigned long flags;
 856	struct context_entry *context;
 857
 858	spin_lock_irqsave(&iommu->lock, flags);
 859	if (!iommu->root_entry) {
 860		goto out;
 861	}
 862	for (i = 0; i < ROOT_ENTRY_NR; i++) {
 863		context = iommu_context_addr(iommu, i, 0, 0);
 
 864		if (context)
 865			free_pgtable_page(context);
 866
 867		if (!sm_supported(iommu))
 868			continue;
 869
 870		context = iommu_context_addr(iommu, i, 0x80, 0);
 871		if (context)
 872			free_pgtable_page(context);
 873
 874	}
 875	free_pgtable_page(iommu->root_entry);
 876	iommu->root_entry = NULL;
 877out:
 878	spin_unlock_irqrestore(&iommu->lock, flags);
 879}
 880
 881static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 882				      unsigned long pfn, int *target_level)
 883{
 884	struct dma_pte *parent, *pte;
 
 885	int level = agaw_to_level(domain->agaw);
 886	int offset;
 887
 888	BUG_ON(!domain->pgd);
 889
 890	if (!domain_pfn_supported(domain, pfn))
 891		/* Address beyond IOMMU's addressing capabilities. */
 892		return NULL;
 893
 894	parent = domain->pgd;
 895
 896	while (1) {
 897		void *tmp_page;
 898
 899		offset = pfn_level_offset(pfn, level);
 900		pte = &parent[offset];
 901		if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
 902			break;
 903		if (level == *target_level)
 904			break;
 905
 906		if (!dma_pte_present(pte)) {
 907			uint64_t pteval;
 908
 909			tmp_page = alloc_pgtable_page(domain->nid);
 910
 911			if (!tmp_page)
 912				return NULL;
 913
 914			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
 915			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
 916			if (cmpxchg64(&pte->val, 0ULL, pteval))
 917				/* Someone else set it while we were thinking; use theirs. */
 918				free_pgtable_page(tmp_page);
 919			else
 
 920				domain_flush_cache(domain, pte, sizeof(*pte));
 
 921		}
 922		if (level == 1)
 923			break;
 924
 925		parent = phys_to_virt(dma_pte_addr(pte));
 926		level--;
 927	}
 928
 929	if (!*target_level)
 930		*target_level = level;
 931
 932	return pte;
 933}
 934
 
 935/* return address's pte at specific level */
 936static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
 937					 unsigned long pfn,
 938					 int level, int *large_page)
 939{
 940	struct dma_pte *parent, *pte;
 941	int total = agaw_to_level(domain->agaw);
 942	int offset;
 943
 944	parent = domain->pgd;
 945	while (level <= total) {
 946		offset = pfn_level_offset(pfn, total);
 947		pte = &parent[offset];
 948		if (level == total)
 949			return pte;
 950
 951		if (!dma_pte_present(pte)) {
 952			*large_page = total;
 953			break;
 954		}
 955
 956		if (dma_pte_superpage(pte)) {
 957			*large_page = total;
 958			return pte;
 959		}
 960
 961		parent = phys_to_virt(dma_pte_addr(pte));
 962		total--;
 963	}
 964	return NULL;
 965}
 966
 967/* clear last level pte, a tlb flush should be followed */
 968static void dma_pte_clear_range(struct dmar_domain *domain,
 969				unsigned long start_pfn,
 970				unsigned long last_pfn)
 971{
 972	unsigned int large_page;
 
 973	struct dma_pte *first_pte, *pte;
 
 974
 975	BUG_ON(!domain_pfn_supported(domain, start_pfn));
 976	BUG_ON(!domain_pfn_supported(domain, last_pfn));
 977	BUG_ON(start_pfn > last_pfn);
 978
 979	/* we don't need lock here; nobody else touches the iova range */
 980	do {
 981		large_page = 1;
 982		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
 983		if (!pte) {
 984			start_pfn = align_to_level(start_pfn + 1, large_page + 1);
 985			continue;
 986		}
 987		do {
 988			dma_clear_pte(pte);
 989			start_pfn += lvl_to_nr_pages(large_page);
 990			pte++;
 991		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
 992
 993		domain_flush_cache(domain, first_pte,
 994				   (void *)pte - (void *)first_pte);
 995
 996	} while (start_pfn && start_pfn <= last_pfn);
 997}
 998
 999static void dma_pte_free_level(struct dmar_domain *domain, int level,
1000			       int retain_level, struct dma_pte *pte,
1001			       unsigned long pfn, unsigned long start_pfn,
1002			       unsigned long last_pfn)
1003{
1004	pfn = max(start_pfn, pfn);
1005	pte = &pte[pfn_level_offset(pfn, level)];
1006
1007	do {
1008		unsigned long level_pfn;
1009		struct dma_pte *level_pte;
1010
1011		if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1012			goto next;
1013
1014		level_pfn = pfn & level_mask(level);
1015		level_pte = phys_to_virt(dma_pte_addr(pte));
1016
1017		if (level > 2) {
1018			dma_pte_free_level(domain, level - 1, retain_level,
1019					   level_pte, level_pfn, start_pfn,
1020					   last_pfn);
1021		}
1022
1023		/*
1024		 * Free the page table if we're below the level we want to
1025		 * retain and the range covers the entire table.
1026		 */
1027		if (level < retain_level && !(start_pfn > level_pfn ||
1028		      last_pfn < level_pfn + level_size(level) - 1)) {
1029			dma_clear_pte(pte);
1030			domain_flush_cache(domain, pte, sizeof(*pte));
1031			free_pgtable_page(level_pte);
1032		}
1033next:
1034		pfn += level_size(level);
1035	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1036}
1037
1038/*
1039 * clear last level (leaf) ptes and free page table pages below the
1040 * level we wish to keep intact.
1041 */
1042static void dma_pte_free_pagetable(struct dmar_domain *domain,
1043				   unsigned long start_pfn,
1044				   unsigned long last_pfn,
1045				   int retain_level)
1046{
1047	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1048	BUG_ON(!domain_pfn_supported(domain, last_pfn));
 
 
 
 
 
 
 
1049	BUG_ON(start_pfn > last_pfn);
1050
1051	dma_pte_clear_range(domain, start_pfn, last_pfn);
1052
1053	/* We don't need lock here; nobody else touches the iova range */
1054	dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1055			   domain->pgd, 0, start_pfn, last_pfn);
 
1056
1057	/* free pgd */
1058	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1059		free_pgtable_page(domain->pgd);
1060		domain->pgd = NULL;
1061	}
1062}
1063
1064/* When a page at a given level is being unlinked from its parent, we don't
1065   need to *modify* it at all. All we need to do is make a list of all the
1066   pages which can be freed just as soon as we've flushed the IOTLB and we
1067   know the hardware page-walk will no longer touch them.
1068   The 'pte' argument is the *parent* PTE, pointing to the page that is to
1069   be freed. */
1070static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1071					    int level, struct dma_pte *pte,
1072					    struct page *freelist)
1073{
1074	struct page *pg;
1075
1076	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1077	pg->freelist = freelist;
1078	freelist = pg;
1079
1080	if (level == 1)
1081		return freelist;
1082
1083	pte = page_address(pg);
1084	do {
1085		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1086			freelist = dma_pte_list_pagetables(domain, level - 1,
1087							   pte, freelist);
1088		pte++;
1089	} while (!first_pte_in_page(pte));
1090
1091	return freelist;
1092}
1093
1094static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1095					struct dma_pte *pte, unsigned long pfn,
1096					unsigned long start_pfn,
1097					unsigned long last_pfn,
1098					struct page *freelist)
1099{
1100	struct dma_pte *first_pte = NULL, *last_pte = NULL;
1101
1102	pfn = max(start_pfn, pfn);
1103	pte = &pte[pfn_level_offset(pfn, level)];
1104
1105	do {
1106		unsigned long level_pfn;
1107
1108		if (!dma_pte_present(pte))
1109			goto next;
1110
1111		level_pfn = pfn & level_mask(level);
1112
1113		/* If range covers entire pagetable, free it */
1114		if (start_pfn <= level_pfn &&
1115		    last_pfn >= level_pfn + level_size(level) - 1) {
1116			/* These suborbinate page tables are going away entirely. Don't
1117			   bother to clear them; we're just going to *free* them. */
1118			if (level > 1 && !dma_pte_superpage(pte))
1119				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1120
1121			dma_clear_pte(pte);
1122			if (!first_pte)
1123				first_pte = pte;
1124			last_pte = pte;
1125		} else if (level > 1) {
1126			/* Recurse down into a level that isn't *entirely* obsolete */
1127			freelist = dma_pte_clear_level(domain, level - 1,
1128						       phys_to_virt(dma_pte_addr(pte)),
1129						       level_pfn, start_pfn, last_pfn,
1130						       freelist);
1131		}
1132next:
1133		pfn += level_size(level);
1134	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1135
1136	if (first_pte)
1137		domain_flush_cache(domain, first_pte,
1138				   (void *)++last_pte - (void *)first_pte);
1139
1140	return freelist;
1141}
1142
1143/* We can't just free the pages because the IOMMU may still be walking
1144   the page tables, and may have cached the intermediate levels. The
1145   pages can only be freed after the IOTLB flush has been done. */
1146static struct page *domain_unmap(struct dmar_domain *domain,
1147				 unsigned long start_pfn,
1148				 unsigned long last_pfn)
1149{
1150	struct page *freelist;
1151
1152	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1153	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1154	BUG_ON(start_pfn > last_pfn);
1155
1156	/* we don't need lock here; nobody else touches the iova range */
1157	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1158				       domain->pgd, 0, start_pfn, last_pfn, NULL);
1159
 
 
 
 
 
 
1160	/* free pgd */
1161	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1162		struct page *pgd_page = virt_to_page(domain->pgd);
1163		pgd_page->freelist = freelist;
1164		freelist = pgd_page;
1165
1166		domain->pgd = NULL;
1167	}
1168
1169	return freelist;
1170}
1171
1172static void dma_free_pagelist(struct page *freelist)
1173{
1174	struct page *pg;
1175
1176	while ((pg = freelist)) {
1177		freelist = pg->freelist;
1178		free_pgtable_page(page_address(pg));
1179	}
1180}
1181
1182static void iova_entry_free(unsigned long data)
1183{
1184	struct page *freelist = (struct page *)data;
1185
1186	dma_free_pagelist(freelist);
1187}
1188
1189/* iommu handling */
1190static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1191{
1192	struct root_entry *root;
1193	unsigned long flags;
1194
1195	root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1196	if (!root) {
1197		pr_err("Allocating root entry for %s failed\n",
1198			iommu->name);
1199		return -ENOMEM;
1200	}
1201
1202	__iommu_flush_cache(iommu, root, ROOT_SIZE);
1203
1204	spin_lock_irqsave(&iommu->lock, flags);
1205	iommu->root_entry = root;
1206	spin_unlock_irqrestore(&iommu->lock, flags);
1207
1208	return 0;
1209}
1210
1211static void iommu_set_root_entry(struct intel_iommu *iommu)
1212{
1213	u64 addr;
1214	u32 sts;
1215	unsigned long flag;
1216
1217	addr = virt_to_phys(iommu->root_entry);
1218	if (sm_supported(iommu))
1219		addr |= DMA_RTADDR_SMT;
1220
1221	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1222	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1223
1224	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1225
1226	/* Make sure hardware complete it */
1227	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1228		      readl, (sts & DMA_GSTS_RTPS), sts);
1229
1230	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1231}
1232
1233void iommu_flush_write_buffer(struct intel_iommu *iommu)
1234{
1235	u32 val;
1236	unsigned long flag;
1237
1238	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1239		return;
1240
1241	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1242	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1243
1244	/* Make sure hardware complete it */
1245	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1246		      readl, (!(val & DMA_GSTS_WBFS)), val);
1247
1248	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1249}
1250
1251/* return value determine if we need a write buffer flush */
1252static void __iommu_flush_context(struct intel_iommu *iommu,
1253				  u16 did, u16 source_id, u8 function_mask,
1254				  u64 type)
1255{
1256	u64 val = 0;
1257	unsigned long flag;
1258
1259	switch (type) {
1260	case DMA_CCMD_GLOBAL_INVL:
1261		val = DMA_CCMD_GLOBAL_INVL;
1262		break;
1263	case DMA_CCMD_DOMAIN_INVL:
1264		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1265		break;
1266	case DMA_CCMD_DEVICE_INVL:
1267		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1268			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1269		break;
1270	default:
1271		BUG();
1272	}
1273	val |= DMA_CCMD_ICC;
1274
1275	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1276	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1277
1278	/* Make sure hardware complete it */
1279	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1280		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1281
1282	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1283}
1284
1285/* return value determine if we need a write buffer flush */
1286static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1287				u64 addr, unsigned int size_order, u64 type)
1288{
1289	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1290	u64 val = 0, val_iva = 0;
1291	unsigned long flag;
1292
1293	switch (type) {
1294	case DMA_TLB_GLOBAL_FLUSH:
1295		/* global flush doesn't need set IVA_REG */
1296		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1297		break;
1298	case DMA_TLB_DSI_FLUSH:
1299		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1300		break;
1301	case DMA_TLB_PSI_FLUSH:
1302		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1303		/* IH bit is passed in as part of address */
1304		val_iva = size_order | addr;
1305		break;
1306	default:
1307		BUG();
1308	}
1309	/* Note: set drain read/write */
1310#if 0
1311	/*
1312	 * This is probably to be super secure.. Looks like we can
1313	 * ignore it without any impact.
1314	 */
1315	if (cap_read_drain(iommu->cap))
1316		val |= DMA_TLB_READ_DRAIN;
1317#endif
1318	if (cap_write_drain(iommu->cap))
1319		val |= DMA_TLB_WRITE_DRAIN;
1320
1321	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1322	/* Note: Only uses first TLB reg currently */
1323	if (val_iva)
1324		dmar_writeq(iommu->reg + tlb_offset, val_iva);
1325	dmar_writeq(iommu->reg + tlb_offset + 8, val);
1326
1327	/* Make sure hardware complete it */
1328	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1329		dmar_readq, (!(val & DMA_TLB_IVT)), val);
1330
1331	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1332
1333	/* check IOTLB invalidation granularity */
1334	if (DMA_TLB_IAIG(val) == 0)
1335		pr_err("Flush IOTLB failed\n");
1336	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1337		pr_debug("TLB flush request %Lx, actual %Lx\n",
1338			(unsigned long long)DMA_TLB_IIRG(type),
1339			(unsigned long long)DMA_TLB_IAIG(val));
1340}
1341
1342static struct device_domain_info *
1343iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1344			 u8 bus, u8 devfn)
1345{
 
 
1346	struct device_domain_info *info;
 
1347
1348	assert_spin_locked(&device_domain_lock);
 
1349
1350	if (!iommu->qi)
1351		return NULL;
1352
 
1353	list_for_each_entry(info, &domain->devices, link)
1354		if (info->iommu == iommu && info->bus == bus &&
1355		    info->devfn == devfn) {
1356			if (info->ats_supported && info->dev)
1357				return info;
1358			break;
1359		}
 
1360
1361	return NULL;
1362}
1363
1364static void domain_update_iotlb(struct dmar_domain *domain)
1365{
1366	struct device_domain_info *info;
1367	bool has_iotlb_device = false;
1368
1369	assert_spin_locked(&device_domain_lock);
 
1370
1371	list_for_each_entry(info, &domain->devices, link) {
1372		struct pci_dev *pdev;
1373
1374		if (!info->dev || !dev_is_pci(info->dev))
1375			continue;
1376
1377		pdev = to_pci_dev(info->dev);
1378		if (pdev->ats_enabled) {
1379			has_iotlb_device = true;
1380			break;
1381		}
1382	}
1383
1384	domain->has_iotlb_device = has_iotlb_device;
1385}
1386
1387static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1388{
1389	struct pci_dev *pdev;
1390
1391	assert_spin_locked(&device_domain_lock);
1392
1393	if (!info || !dev_is_pci(info->dev))
1394		return;
1395
1396	pdev = to_pci_dev(info->dev);
1397	/* For IOMMU that supports device IOTLB throttling (DIT), we assign
1398	 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1399	 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1400	 * reserved, which should be set to 0.
1401	 */
1402	if (!ecap_dit(info->iommu->ecap))
1403		info->pfsid = 0;
1404	else {
1405		struct pci_dev *pf_pdev;
1406
1407		/* pdev will be returned if device is not a vf */
1408		pf_pdev = pci_physfn(pdev);
1409		info->pfsid = pci_dev_id(pf_pdev);
1410	}
1411
1412#ifdef CONFIG_INTEL_IOMMU_SVM
1413	/* The PCIe spec, in its wisdom, declares that the behaviour of
1414	   the device if you enable PASID support after ATS support is
1415	   undefined. So always enable PASID support on devices which
1416	   have it, even if we can't yet know if we're ever going to
1417	   use it. */
1418	if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1419		info->pasid_enabled = 1;
1420
1421	if (info->pri_supported &&
1422	    (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1)  &&
1423	    !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1424		info->pri_enabled = 1;
1425#endif
1426	if (!pdev->untrusted && info->ats_supported &&
1427	    pci_ats_page_aligned(pdev) &&
1428	    !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1429		info->ats_enabled = 1;
1430		domain_update_iotlb(info->domain);
1431		info->ats_qdep = pci_ats_queue_depth(pdev);
1432	}
1433}
1434
1435static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1436{
1437	struct pci_dev *pdev;
1438
1439	assert_spin_locked(&device_domain_lock);
1440
1441	if (!dev_is_pci(info->dev))
1442		return;
1443
1444	pdev = to_pci_dev(info->dev);
1445
1446	if (info->ats_enabled) {
1447		pci_disable_ats(pdev);
1448		info->ats_enabled = 0;
1449		domain_update_iotlb(info->domain);
1450	}
1451#ifdef CONFIG_INTEL_IOMMU_SVM
1452	if (info->pri_enabled) {
1453		pci_disable_pri(pdev);
1454		info->pri_enabled = 0;
1455	}
1456	if (info->pasid_enabled) {
1457		pci_disable_pasid(pdev);
1458		info->pasid_enabled = 0;
1459	}
1460#endif
1461}
1462
1463static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1464				  u64 addr, unsigned mask)
1465{
1466	u16 sid, qdep;
1467	unsigned long flags;
1468	struct device_domain_info *info;
1469
1470	if (!domain->has_iotlb_device)
1471		return;
1472
1473	spin_lock_irqsave(&device_domain_lock, flags);
1474	list_for_each_entry(info, &domain->devices, link) {
1475		if (!info->ats_enabled)
1476			continue;
1477
1478		sid = info->bus << 8 | info->devfn;
1479		qdep = info->ats_qdep;
1480		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1481				qdep, addr, mask);
1482	}
1483	spin_unlock_irqrestore(&device_domain_lock, flags);
1484}
1485
1486static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1487				  struct dmar_domain *domain,
1488				  unsigned long pfn, unsigned int pages,
1489				  int ih, int map)
1490{
1491	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1492	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1493	u16 did = domain->iommu_did[iommu->seq_id];
1494
1495	BUG_ON(pages == 0);
1496
1497	if (ih)
1498		ih = 1 << 6;
1499	/*
1500	 * Fallback to domain selective flush if no PSI support or the size is
1501	 * too big.
1502	 * PSI requires page size to be 2 ^ x, and the base address is naturally
1503	 * aligned to the size
1504	 */
1505	if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1506		iommu->flush.flush_iotlb(iommu, did, 0, 0,
1507						DMA_TLB_DSI_FLUSH);
1508	else
1509		iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1510						DMA_TLB_PSI_FLUSH);
1511
1512	/*
1513	 * In caching mode, changes of pages from non-present to present require
1514	 * flush. However, device IOTLB doesn't need to be flushed in this case.
1515	 */
1516	if (!cap_caching_mode(iommu->cap) || !map)
1517		iommu_flush_dev_iotlb(domain, addr, mask);
1518}
1519
1520/* Notification for newly created mappings */
1521static inline void __mapping_notify_one(struct intel_iommu *iommu,
1522					struct dmar_domain *domain,
1523					unsigned long pfn, unsigned int pages)
1524{
1525	/* It's a non-present to present mapping. Only flush if caching mode */
1526	if (cap_caching_mode(iommu->cap))
1527		iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1528	else
1529		iommu_flush_write_buffer(iommu);
1530}
1531
1532static void iommu_flush_iova(struct iova_domain *iovad)
1533{
1534	struct dmar_domain *domain;
1535	int idx;
1536
1537	domain = container_of(iovad, struct dmar_domain, iovad);
1538
1539	for_each_domain_iommu(idx, domain) {
1540		struct intel_iommu *iommu = g_iommus[idx];
1541		u16 did = domain->iommu_did[iommu->seq_id];
1542
1543		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1544
1545		if (!cap_caching_mode(iommu->cap))
1546			iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1547					      0, MAX_AGAW_PFN_WIDTH);
1548	}
1549}
1550
1551static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1552{
1553	u32 pmen;
1554	unsigned long flags;
1555
1556	if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1557		return;
1558
1559	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1560	pmen = readl(iommu->reg + DMAR_PMEN_REG);
1561	pmen &= ~DMA_PMEN_EPM;
1562	writel(pmen, iommu->reg + DMAR_PMEN_REG);
1563
1564	/* wait for the protected region status bit to clear */
1565	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1566		readl, !(pmen & DMA_PMEN_PRS), pmen);
1567
1568	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1569}
1570
1571static void iommu_enable_translation(struct intel_iommu *iommu)
1572{
1573	u32 sts;
1574	unsigned long flags;
1575
1576	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1577	iommu->gcmd |= DMA_GCMD_TE;
1578	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1579
1580	/* Make sure hardware complete it */
1581	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1582		      readl, (sts & DMA_GSTS_TES), sts);
1583
1584	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 
1585}
1586
1587static void iommu_disable_translation(struct intel_iommu *iommu)
1588{
1589	u32 sts;
1590	unsigned long flag;
1591
1592	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1593	iommu->gcmd &= ~DMA_GCMD_TE;
1594	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1595
1596	/* Make sure hardware complete it */
1597	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1598		      readl, (!(sts & DMA_GSTS_TES)), sts);
1599
1600	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 
1601}
1602
 
1603static int iommu_init_domains(struct intel_iommu *iommu)
1604{
1605	u32 ndomains, nlongs;
1606	size_t size;
1607
1608	ndomains = cap_ndoms(iommu->cap);
1609	pr_debug("%s: Number of Domains supported <%d>\n",
1610		 iommu->name, ndomains);
1611	nlongs = BITS_TO_LONGS(ndomains);
1612
1613	spin_lock_init(&iommu->lock);
1614
 
 
 
1615	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1616	if (!iommu->domain_ids) {
1617		pr_err("%s: Allocating domain id array failed\n",
1618		       iommu->name);
1619		return -ENOMEM;
1620	}
1621
1622	size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1623	iommu->domains = kzalloc(size, GFP_KERNEL);
1624
1625	if (iommu->domains) {
1626		size = 256 * sizeof(struct dmar_domain *);
1627		iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1628	}
1629
1630	if (!iommu->domains || !iommu->domains[0]) {
1631		pr_err("%s: Allocating domain array failed\n",
1632		       iommu->name);
1633		kfree(iommu->domain_ids);
1634		kfree(iommu->domains);
1635		iommu->domain_ids = NULL;
1636		iommu->domains    = NULL;
1637		return -ENOMEM;
1638	}
1639
1640	/*
1641	 * If Caching mode is set, then invalid translations are tagged
1642	 * with domain-id 0, hence we need to pre-allocate it. We also
1643	 * use domain-id 0 as a marker for non-allocated domain-id, so
1644	 * make sure it is not used for a real domain.
1645	 */
1646	set_bit(0, iommu->domain_ids);
 
 
 
1647
1648	/*
1649	 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1650	 * entry for first-level or pass-through translation modes should
1651	 * be programmed with a domain id different from those used for
1652	 * second-level or nested translation. We reserve a domain id for
1653	 * this purpose.
1654	 */
1655	if (sm_supported(iommu))
1656		set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1657
1658	return 0;
1659}
1660
1661static void disable_dmar_iommu(struct intel_iommu *iommu)
1662{
1663	struct device_domain_info *info, *tmp;
 
1664	unsigned long flags;
1665
1666	if (!iommu->domains || !iommu->domain_ids)
1667		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
1668
1669	spin_lock_irqsave(&device_domain_lock, flags);
1670	list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1671		if (info->iommu != iommu)
1672			continue;
1673
1674		if (!info->dev || !info->domain)
1675			continue;
1676
1677		__dmar_remove_one_dev_info(info);
 
1678	}
1679	spin_unlock_irqrestore(&device_domain_lock, flags);
1680
1681	if (iommu->gcmd & DMA_GCMD_TE)
1682		iommu_disable_translation(iommu);
1683}
1684
1685static void free_dmar_iommu(struct intel_iommu *iommu)
1686{
1687	if ((iommu->domains) && (iommu->domain_ids)) {
1688		int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1689		int i;
1690
1691		for (i = 0; i < elems; i++)
1692			kfree(iommu->domains[i]);
1693		kfree(iommu->domains);
1694		kfree(iommu->domain_ids);
1695		iommu->domains = NULL;
1696		iommu->domain_ids = NULL;
1697	}
1698
1699	g_iommus[iommu->seq_id] = NULL;
 
1700
1701	/* free context mapping */
1702	free_context_table(iommu);
1703
1704#ifdef CONFIG_INTEL_IOMMU_SVM
1705	if (pasid_supported(iommu)) {
1706		if (ecap_prs(iommu->ecap))
1707			intel_svm_finish_prq(iommu);
1708	}
1709#endif
1710}
1711
1712static struct dmar_domain *alloc_domain(int flags)
1713{
1714	struct dmar_domain *domain;
1715
1716	domain = alloc_domain_mem();
1717	if (!domain)
1718		return NULL;
1719
1720	memset(domain, 0, sizeof(*domain));
1721	domain->nid = NUMA_NO_NODE;
1722	domain->flags = flags;
1723	domain->has_iotlb_device = false;
1724	INIT_LIST_HEAD(&domain->devices);
1725
1726	return domain;
1727}
1728
1729/* Must be called with iommu->lock */
1730static int domain_attach_iommu(struct dmar_domain *domain,
1731			       struct intel_iommu *iommu)
1732{
 
1733	unsigned long ndomains;
1734	int num;
1735
1736	assert_spin_locked(&device_domain_lock);
1737	assert_spin_locked(&iommu->lock);
1738
1739	domain->iommu_refcnt[iommu->seq_id] += 1;
1740	domain->iommu_count += 1;
1741	if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1742		ndomains = cap_ndoms(iommu->cap);
1743		num      = find_first_zero_bit(iommu->domain_ids, ndomains);
1744
1745		if (num >= ndomains) {
1746			pr_err("%s: No free domain ids\n", iommu->name);
1747			domain->iommu_refcnt[iommu->seq_id] -= 1;
1748			domain->iommu_count -= 1;
1749			return -ENOSPC;
1750		}
1751
1752		set_bit(num, iommu->domain_ids);
1753		set_iommu_domain(iommu, num, domain);
1754
1755		domain->iommu_did[iommu->seq_id] = num;
1756		domain->nid			 = iommu->node;
1757
1758		domain_update_iommu_cap(domain);
1759	}
1760
1761	return 0;
1762}
1763
1764static int domain_detach_iommu(struct dmar_domain *domain,
1765			       struct intel_iommu *iommu)
1766{
1767	int num, count;
 
 
1768
1769	assert_spin_locked(&device_domain_lock);
1770	assert_spin_locked(&iommu->lock);
 
 
 
 
 
 
1771
1772	domain->iommu_refcnt[iommu->seq_id] -= 1;
1773	count = --domain->iommu_count;
1774	if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1775		num = domain->iommu_did[iommu->seq_id];
1776		clear_bit(num, iommu->domain_ids);
1777		set_iommu_domain(iommu, num, NULL);
1778
1779		domain_update_iommu_cap(domain);
1780		domain->iommu_did[iommu->seq_id] = 0;
1781	}
1782
1783	return count;
1784}
1785
1786static struct iova_domain reserved_iova_list;
1787static struct lock_class_key reserved_rbtree_key;
1788
1789static int dmar_init_reserved_ranges(void)
1790{
1791	struct pci_dev *pdev = NULL;
1792	struct iova *iova;
1793	int i;
1794
1795	init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1796
1797	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1798		&reserved_rbtree_key);
1799
1800	/* IOAPIC ranges shouldn't be accessed by DMA */
1801	iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1802		IOVA_PFN(IOAPIC_RANGE_END));
1803	if (!iova) {
1804		pr_err("Reserve IOAPIC range failed\n");
1805		return -ENODEV;
1806	}
1807
1808	/* Reserve all PCI MMIO to avoid peer-to-peer access */
1809	for_each_pci_dev(pdev) {
1810		struct resource *r;
1811
1812		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1813			r = &pdev->resource[i];
1814			if (!r->flags || !(r->flags & IORESOURCE_MEM))
1815				continue;
1816			iova = reserve_iova(&reserved_iova_list,
1817					    IOVA_PFN(r->start),
1818					    IOVA_PFN(r->end));
1819			if (!iova) {
1820				pci_err(pdev, "Reserve iova for %pR failed\n", r);
1821				return -ENODEV;
1822			}
1823		}
1824	}
1825	return 0;
1826}
1827
1828static void domain_reserve_special_ranges(struct dmar_domain *domain)
1829{
1830	copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1831}
1832
1833static inline int guestwidth_to_adjustwidth(int gaw)
1834{
1835	int agaw;
1836	int r = (gaw - 12) % 9;
1837
1838	if (r == 0)
1839		agaw = gaw;
1840	else
1841		agaw = gaw + 9 - r;
1842	if (agaw > 64)
1843		agaw = 64;
1844	return agaw;
1845}
1846
1847static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1848		       int guest_width)
1849{
 
1850	int adjust_width, agaw;
1851	unsigned long sagaw;
1852	int err;
1853
1854	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1855
1856	err = init_iova_flush_queue(&domain->iovad,
1857				    iommu_flush_iova, iova_entry_free);
1858	if (err)
1859		return err;
1860
1861	domain_reserve_special_ranges(domain);
1862
1863	/* calculate AGAW */
 
1864	if (guest_width > cap_mgaw(iommu->cap))
1865		guest_width = cap_mgaw(iommu->cap);
1866	domain->gaw = guest_width;
1867	adjust_width = guestwidth_to_adjustwidth(guest_width);
1868	agaw = width_to_agaw(adjust_width);
1869	sagaw = cap_sagaw(iommu->cap);
1870	if (!test_bit(agaw, &sagaw)) {
1871		/* hardware doesn't support it, choose a bigger one */
1872		pr_debug("Hardware doesn't support agaw %d\n", agaw);
1873		agaw = find_next_bit(&sagaw, 5, agaw);
1874		if (agaw >= 5)
1875			return -ENODEV;
1876	}
1877	domain->agaw = agaw;
 
1878
1879	if (ecap_coherent(iommu->ecap))
1880		domain->iommu_coherency = 1;
1881	else
1882		domain->iommu_coherency = 0;
1883
1884	if (ecap_sc_support(iommu->ecap))
1885		domain->iommu_snooping = 1;
1886	else
1887		domain->iommu_snooping = 0;
1888
1889	if (intel_iommu_superpage)
1890		domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1891	else
1892		domain->iommu_superpage = 0;
1893
1894	domain->nid = iommu->node;
1895
1896	/* always allocate the top pgd */
1897	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1898	if (!domain->pgd)
1899		return -ENOMEM;
1900	__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1901	return 0;
1902}
1903
1904static void domain_exit(struct dmar_domain *domain)
1905{
 
 
 
 
 
 
 
 
 
 
1906
1907	/* Remove associated devices and clear attached or cached domains */
1908	domain_remove_dev_info(domain);
1909
1910	/* destroy iovas */
1911	put_iova_domain(&domain->iovad);
1912
1913	if (domain->pgd) {
1914		struct page *freelist;
1915
1916		freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1917		dma_free_pagelist(freelist);
1918	}
 
 
 
1919
1920	free_domain_mem(domain);
1921}
1922
1923/*
1924 * Get the PASID directory size for scalable mode context entry.
1925 * Value of X in the PDTS field of a scalable mode context entry
1926 * indicates PASID directory with 2^(X + 7) entries.
1927 */
1928static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1929{
1930	int pds, max_pde;
1931
1932	max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1933	pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1934	if (pds < 7)
1935		return 0;
1936
1937	return pds - 7;
1938}
1939
1940/*
1941 * Set the RID_PASID field of a scalable mode context entry. The
1942 * IOMMU hardware will use the PASID value set in this field for
1943 * DMA translations of DMA requests without PASID.
1944 */
1945static inline void
1946context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1947{
1948	context->hi |= pasid & ((1 << 20) - 1);
1949	context->hi |= (1 << 20);
1950}
1951
1952/*
1953 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1954 * entry.
1955 */
1956static inline void context_set_sm_dte(struct context_entry *context)
1957{
1958	context->lo |= (1 << 2);
1959}
1960
1961/*
1962 * Set the PRE(Page Request Enable) field of a scalable mode context
1963 * entry.
1964 */
1965static inline void context_set_sm_pre(struct context_entry *context)
1966{
1967	context->lo |= (1 << 4);
1968}
1969
1970/* Convert value to context PASID directory size field coding. */
1971#define context_pdts(pds)	(((pds) & 0x7) << 9)
1972
1973static int domain_context_mapping_one(struct dmar_domain *domain,
1974				      struct intel_iommu *iommu,
1975				      struct pasid_table *table,
1976				      u8 bus, u8 devfn)
1977{
1978	u16 did = domain->iommu_did[iommu->seq_id];
1979	int translation = CONTEXT_TT_MULTI_LEVEL;
1980	struct device_domain_info *info = NULL;
1981	struct context_entry *context;
1982	unsigned long flags;
1983	int ret;
1984
1985	WARN_ON(did == 0);
1986
1987	if (hw_pass_through && domain_type_is_si(domain))
1988		translation = CONTEXT_TT_PASS_THROUGH;
 
1989
1990	pr_debug("Set context mapping for %02x:%02x.%d\n",
1991		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1992
1993	BUG_ON(!domain->pgd);
 
 
1994
1995	spin_lock_irqsave(&device_domain_lock, flags);
1996	spin_lock(&iommu->lock);
 
1997
1998	ret = -ENOMEM;
1999	context = iommu_context_addr(iommu, bus, devfn, 1);
2000	if (!context)
2001		goto out_unlock;
 
 
 
 
 
2002
2003	ret = 0;
2004	if (context_present(context))
2005		goto out_unlock;
2006
2007	/*
2008	 * For kdump cases, old valid entries may be cached due to the
2009	 * in-flight DMA and copied pgtable, but there is no unmapping
2010	 * behaviour for them, thus we need an explicit cache flush for
2011	 * the newly-mapped device. For kdump, at this point, the device
2012	 * is supposed to finish reset at its driver probe stage, so no
2013	 * in-flight DMA will exist, and we don't need to worry anymore
2014	 * hereafter.
2015	 */
2016	if (context_copied(context)) {
2017		u16 did_old = context_domain_id(context);
2018
2019		if (did_old < cap_ndoms(iommu->cap)) {
2020			iommu->flush.flush_context(iommu, did_old,
2021						   (((u16)bus) << 8) | devfn,
2022						   DMA_CCMD_MASK_NOBIT,
2023						   DMA_CCMD_DEVICE_INVL);
2024			iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2025						 DMA_TLB_DSI_FLUSH);
 
2026		}
2027	}
2028
2029	context_clear_entry(context);
 
 
 
 
 
 
2030
2031	if (sm_supported(iommu)) {
2032		unsigned long pds;
 
 
2033
2034		WARN_ON(!table);
2035
2036		/* Setup the PASID DIR pointer: */
2037		pds = context_get_sm_pds(table);
2038		context->lo = (u64)virt_to_phys(table->table) |
2039				context_pdts(pds);
2040
2041		/* Setup the RID_PASID field: */
2042		context_set_sm_rid2pasid(context, PASID_RID2PASID);
2043
2044		/*
2045		 * Setup the Device-TLB enable bit and Page request
2046		 * Enable bit:
2047		 */
2048		info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2049		if (info && info->ats_supported)
2050			context_set_sm_dte(context);
2051		if (info && info->pri_supported)
2052			context_set_sm_pre(context);
2053	} else {
2054		struct dma_pte *pgd = domain->pgd;
2055		int agaw;
2056
2057		context_set_domain_id(context, did);
2058
2059		if (translation != CONTEXT_TT_PASS_THROUGH) {
2060			/*
2061			 * Skip top levels of page tables for iommu which has
2062			 * less agaw than default. Unnecessary for PT mode.
2063			 */
2064			for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2065				ret = -ENOMEM;
2066				pgd = phys_to_virt(dma_pte_addr(pgd));
2067				if (!dma_pte_present(pgd))
2068					goto out_unlock;
 
 
2069			}
 
 
2070
2071			info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2072			if (info && info->ats_supported)
2073				translation = CONTEXT_TT_DEV_IOTLB;
2074			else
2075				translation = CONTEXT_TT_MULTI_LEVEL;
2076
2077			context_set_address_root(context, virt_to_phys(pgd));
2078			context_set_address_width(context, agaw);
2079		} else {
2080			/*
2081			 * In pass through mode, AW must be programmed to
2082			 * indicate the largest AGAW value supported by
2083			 * hardware. And ASR is ignored by hardware.
2084			 */
2085			context_set_address_width(context, iommu->msagaw);
2086		}
2087
2088		context_set_translation_type(context, translation);
 
 
2089	}
2090
 
2091	context_set_fault_enable(context);
2092	context_set_present(context);
2093	domain_flush_cache(domain, context, sizeof(*context));
2094
2095	/*
2096	 * It's a non-present to present mapping. If hardware doesn't cache
2097	 * non-present entry we only need to flush the write-buffer. If the
2098	 * _does_ cache non-present entries, then it does so in the special
2099	 * domain #0, which we have to flush:
2100	 */
2101	if (cap_caching_mode(iommu->cap)) {
2102		iommu->flush.flush_context(iommu, 0,
2103					   (((u16)bus) << 8) | devfn,
2104					   DMA_CCMD_MASK_NOBIT,
2105					   DMA_CCMD_DEVICE_INVL);
2106		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2107	} else {
2108		iommu_flush_write_buffer(iommu);
2109	}
2110	iommu_enable_dev_iotlb(info);
 
2111
2112	ret = 0;
2113
2114out_unlock:
2115	spin_unlock(&iommu->lock);
2116	spin_unlock_irqrestore(&device_domain_lock, flags);
2117
2118	return ret;
2119}
2120
2121struct domain_context_mapping_data {
2122	struct dmar_domain *domain;
2123	struct intel_iommu *iommu;
2124	struct pasid_table *table;
2125};
2126
2127static int domain_context_mapping_cb(struct pci_dev *pdev,
2128				     u16 alias, void *opaque)
2129{
2130	struct domain_context_mapping_data *data = opaque;
2131
2132	return domain_context_mapping_one(data->domain, data->iommu,
2133					  data->table, PCI_BUS_NUM(alias),
2134					  alias & 0xff);
2135}
2136
2137static int
2138domain_context_mapping(struct dmar_domain *domain, struct device *dev)
 
2139{
2140	struct domain_context_mapping_data data;
2141	struct pasid_table *table;
2142	struct intel_iommu *iommu;
2143	u8 bus, devfn;
2144
2145	iommu = device_to_iommu(dev, &bus, &devfn);
2146	if (!iommu)
2147		return -ENODEV;
 
 
2148
2149	table = intel_pasid_get_table(dev);
2150
2151	if (!dev_is_pci(dev))
2152		return domain_context_mapping_one(domain, iommu, table,
2153						  bus, devfn);
2154
2155	data.domain = domain;
2156	data.iommu = iommu;
2157	data.table = table;
2158
2159	return pci_for_each_dma_alias(to_pci_dev(dev),
2160				      &domain_context_mapping_cb, &data);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2161}
2162
2163static int domain_context_mapped_cb(struct pci_dev *pdev,
2164				    u16 alias, void *opaque)
2165{
2166	struct intel_iommu *iommu = opaque;
2167
2168	return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2169}
2170
2171static int domain_context_mapped(struct device *dev)
2172{
 
 
2173	struct intel_iommu *iommu;
2174	u8 bus, devfn;
2175
2176	iommu = device_to_iommu(dev, &bus, &devfn);
 
2177	if (!iommu)
2178		return -ENODEV;
2179
2180	if (!dev_is_pci(dev))
2181		return device_context_mapped(iommu, bus, devfn);
2182
2183	return !pci_for_each_dma_alias(to_pci_dev(dev),
2184				       domain_context_mapped_cb, iommu);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2185}
2186
2187/* Returns a number of VTD pages, but aligned to MM page size */
2188static inline unsigned long aligned_nrpages(unsigned long host_addr,
2189					    size_t size)
2190{
2191	host_addr &= ~PAGE_MASK;
2192	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2193}
2194
2195/* Return largest possible superpage level for a given mapping */
2196static inline int hardware_largepage_caps(struct dmar_domain *domain,
2197					  unsigned long iov_pfn,
2198					  unsigned long phy_pfn,
2199					  unsigned long pages)
2200{
2201	int support, level = 1;
2202	unsigned long pfnmerge;
2203
2204	support = domain->iommu_superpage;
2205
2206	/* To use a large page, the virtual *and* physical addresses
2207	   must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2208	   of them will mean we have to use smaller pages. So just
2209	   merge them and check both at once. */
2210	pfnmerge = iov_pfn | phy_pfn;
2211
2212	while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2213		pages >>= VTD_STRIDE_SHIFT;
2214		if (!pages)
2215			break;
2216		pfnmerge >>= VTD_STRIDE_SHIFT;
2217		level++;
2218		support--;
2219	}
2220	return level;
2221}
2222
2223static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2224			    struct scatterlist *sg, unsigned long phys_pfn,
2225			    unsigned long nr_pages, int prot)
2226{
2227	struct dma_pte *first_pte = NULL, *pte = NULL;
2228	phys_addr_t uninitialized_var(pteval);
2229	unsigned long sg_res = 0;
 
2230	unsigned int largepage_lvl = 0;
2231	unsigned long lvl_pages = 0;
2232
2233	BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2234
2235	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2236		return -EINVAL;
2237
2238	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2239
2240	if (!sg) {
2241		sg_res = nr_pages;
 
 
2242		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2243	}
2244
2245	while (nr_pages > 0) {
2246		uint64_t tmp;
2247
2248		if (!sg_res) {
2249			unsigned int pgoff = sg->offset & ~PAGE_MASK;
2250
2251			sg_res = aligned_nrpages(sg->offset, sg->length);
2252			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2253			sg->dma_length = sg->length;
2254			pteval = (sg_phys(sg) - pgoff) | prot;
2255			phys_pfn = pteval >> VTD_PAGE_SHIFT;
2256		}
2257
2258		if (!pte) {
2259			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2260
2261			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2262			if (!pte)
2263				return -ENOMEM;
2264			/* It is large page*/
2265			if (largepage_lvl > 1) {
2266				unsigned long nr_superpages, end_pfn;
2267
2268				pteval |= DMA_PTE_LARGE_PAGE;
2269				lvl_pages = lvl_to_nr_pages(largepage_lvl);
2270
2271				nr_superpages = sg_res / lvl_pages;
2272				end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2273
2274				/*
2275				 * Ensure that old small page tables are
2276				 * removed to make room for superpage(s).
2277				 * We're adding new large pages, so make sure
2278				 * we don't remove their parent tables.
2279				 */
2280				dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2281						       largepage_lvl + 1);
2282			} else {
2283				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2284			}
2285
2286		}
2287		/* We don't need lock here, nobody else
2288		 * touches the iova range
2289		 */
2290		tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2291		if (tmp) {
2292			static int dumps = 5;
2293			pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2294				iov_pfn, tmp, (unsigned long long)pteval);
2295			if (dumps) {
2296				dumps--;
2297				debug_dma_dump_mappings(NULL);
2298			}
2299			WARN_ON(1);
2300		}
2301
2302		lvl_pages = lvl_to_nr_pages(largepage_lvl);
2303
2304		BUG_ON(nr_pages < lvl_pages);
2305		BUG_ON(sg_res < lvl_pages);
2306
2307		nr_pages -= lvl_pages;
2308		iov_pfn += lvl_pages;
2309		phys_pfn += lvl_pages;
2310		pteval += lvl_pages * VTD_PAGE_SIZE;
2311		sg_res -= lvl_pages;
2312
2313		/* If the next PTE would be the first in a new page, then we
2314		   need to flush the cache on the entries we've just written.
2315		   And then we'll need to recalculate 'pte', so clear it and
2316		   let it get set again in the if (!pte) block above.
2317
2318		   If we're done (!nr_pages) we need to flush the cache too.
2319
2320		   Also if we've been setting superpages, we may need to
2321		   recalculate 'pte' and switch back to smaller pages for the
2322		   end of the mapping, if the trailing size is not enough to
2323		   use another superpage (i.e. sg_res < lvl_pages). */
2324		pte++;
2325		if (!nr_pages || first_pte_in_page(pte) ||
2326		    (largepage_lvl > 1 && sg_res < lvl_pages)) {
2327			domain_flush_cache(domain, first_pte,
2328					   (void *)pte - (void *)first_pte);
2329			pte = NULL;
2330		}
2331
2332		if (!sg_res && nr_pages)
2333			sg = sg_next(sg);
2334	}
2335	return 0;
2336}
2337
2338static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2339			  struct scatterlist *sg, unsigned long phys_pfn,
2340			  unsigned long nr_pages, int prot)
2341{
2342	int iommu_id, ret;
2343	struct intel_iommu *iommu;
2344
2345	/* Do the real mapping first */
2346	ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2347	if (ret)
2348		return ret;
2349
2350	for_each_domain_iommu(iommu_id, domain) {
2351		iommu = g_iommus[iommu_id];
2352		__mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2353	}
2354
2355	return 0;
2356}
2357
2358static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2359				    struct scatterlist *sg, unsigned long nr_pages,
2360				    int prot)
2361{
2362	return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2363}
2364
2365static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2366				     unsigned long phys_pfn, unsigned long nr_pages,
2367				     int prot)
2368{
2369	return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2370}
2371
2372static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2373{
2374	unsigned long flags;
2375	struct context_entry *context;
2376	u16 did_old;
2377
2378	if (!iommu)
2379		return;
2380
2381	spin_lock_irqsave(&iommu->lock, flags);
2382	context = iommu_context_addr(iommu, bus, devfn, 0);
2383	if (!context) {
2384		spin_unlock_irqrestore(&iommu->lock, flags);
2385		return;
2386	}
2387	did_old = context_domain_id(context);
2388	context_clear_entry(context);
2389	__iommu_flush_cache(iommu, context, sizeof(*context));
2390	spin_unlock_irqrestore(&iommu->lock, flags);
2391	iommu->flush.flush_context(iommu,
2392				   did_old,
2393				   (((u16)bus) << 8) | devfn,
2394				   DMA_CCMD_MASK_NOBIT,
2395				   DMA_CCMD_DEVICE_INVL);
2396	iommu->flush.flush_iotlb(iommu,
2397				 did_old,
2398				 0,
2399				 0,
2400				 DMA_TLB_DSI_FLUSH);
2401}
2402
2403static inline void unlink_domain_info(struct device_domain_info *info)
2404{
2405	assert_spin_locked(&device_domain_lock);
2406	list_del(&info->link);
2407	list_del(&info->global);
2408	if (info->dev)
2409		info->dev->archdata.iommu = NULL;
2410}
2411
2412static void domain_remove_dev_info(struct dmar_domain *domain)
2413{
2414	struct device_domain_info *info, *tmp;
2415	unsigned long flags;
 
2416
2417	spin_lock_irqsave(&device_domain_lock, flags);
2418	list_for_each_entry_safe(info, tmp, &domain->devices, link)
2419		__dmar_remove_one_dev_info(info);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2420	spin_unlock_irqrestore(&device_domain_lock, flags);
2421}
2422
2423/*
2424 * find_domain
2425 * Note: we use struct device->archdata.iommu stores the info
2426 */
2427static struct dmar_domain *find_domain(struct device *dev)
 
2428{
2429	struct device_domain_info *info;
2430
2431	if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) {
2432		struct iommu_domain *domain;
2433
2434		dev->archdata.iommu = NULL;
2435		domain = iommu_get_domain_for_dev(dev);
2436		if (domain)
2437			intel_iommu_attach_device(domain, dev);
2438	}
2439
2440	/* No lock here, assumes no domain exit in normal case */
2441	info = dev->archdata.iommu;
2442
2443	if (likely(info))
2444		return info->domain;
2445	return NULL;
2446}
2447
2448static inline struct device_domain_info *
2449dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2450{
2451	struct device_domain_info *info;
2452
2453	list_for_each_entry(info, &device_domain_list, global)
2454		if (info->iommu->segment == segment && info->bus == bus &&
2455		    info->devfn == devfn)
2456			return info;
2457
2458	return NULL;
2459}
2460
2461static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2462						    int bus, int devfn,
2463						    struct device *dev,
2464						    struct dmar_domain *domain)
2465{
2466	struct dmar_domain *found = NULL;
2467	struct device_domain_info *info;
2468	unsigned long flags;
 
 
2469	int ret;
2470
2471	info = alloc_devinfo_mem();
2472	if (!info)
2473		return NULL;
2474
2475	info->bus = bus;
2476	info->devfn = devfn;
2477	info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2478	info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2479	info->ats_qdep = 0;
2480	info->dev = dev;
2481	info->domain = domain;
2482	info->iommu = iommu;
2483	info->pasid_table = NULL;
2484	info->auxd_enabled = 0;
2485	INIT_LIST_HEAD(&info->auxiliary_domains);
2486
2487	if (dev && dev_is_pci(dev)) {
2488		struct pci_dev *pdev = to_pci_dev(info->dev);
2489
2490		if (!pdev->untrusted &&
2491		    !pci_ats_disabled() &&
2492		    ecap_dev_iotlb_support(iommu->ecap) &&
2493		    pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2494		    dmar_find_matched_atsr_unit(pdev))
2495			info->ats_supported = 1;
2496
2497		if (sm_supported(iommu)) {
2498			if (pasid_supported(iommu)) {
2499				int features = pci_pasid_features(pdev);
2500				if (features >= 0)
2501					info->pasid_supported = features | 1;
2502			}
2503
2504			if (info->ats_supported && ecap_prs(iommu->ecap) &&
2505			    pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2506				info->pri_supported = 1;
 
 
 
 
 
2507		}
2508	}
2509
2510	spin_lock_irqsave(&device_domain_lock, flags);
2511	if (dev)
2512		found = find_domain(dev);
2513
2514	if (!found) {
2515		struct device_domain_info *info2;
2516		info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2517		if (info2) {
2518			found      = info2->domain;
2519			info2->dev = dev;
2520		}
2521	}
2522
2523	if (found) {
2524		spin_unlock_irqrestore(&device_domain_lock, flags);
2525		free_devinfo_mem(info);
2526		/* Caller must free the original domain */
2527		return found;
 
 
2528	}
2529
2530	spin_lock(&iommu->lock);
2531	ret = domain_attach_iommu(domain, iommu);
2532	spin_unlock(&iommu->lock);
2533
2534	if (ret) {
2535		spin_unlock_irqrestore(&device_domain_lock, flags);
2536		free_devinfo_mem(info);
 
 
2537		return NULL;
2538	}
 
2539
2540	list_add(&info->link, &domain->devices);
2541	list_add(&info->global, &device_domain_list);
2542	if (dev)
2543		dev->archdata.iommu = info;
2544	spin_unlock_irqrestore(&device_domain_lock, flags);
2545
2546	/* PASID table is mandatory for a PCI device in scalable mode. */
2547	if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2548		ret = intel_pasid_alloc_table(dev);
2549		if (ret) {
2550			dev_err(dev, "PASID table allocation failed\n");
2551			dmar_remove_one_dev_info(dev);
2552			return NULL;
2553		}
2554
2555		/* Setup the PASID entry for requests without PASID: */
2556		spin_lock(&iommu->lock);
2557		if (hw_pass_through && domain_type_is_si(domain))
2558			ret = intel_pasid_setup_pass_through(iommu, domain,
2559					dev, PASID_RID2PASID);
2560		else
2561			ret = intel_pasid_setup_second_level(iommu, domain,
2562					dev, PASID_RID2PASID);
2563		spin_unlock(&iommu->lock);
2564		if (ret) {
2565			dev_err(dev, "Setup RID2PASID failed\n");
2566			dmar_remove_one_dev_info(dev);
2567			return NULL;
2568		}
2569	}
2570
2571	if (dev && domain_context_mapping(domain, dev)) {
2572		dev_err(dev, "Domain context map failed\n");
2573		dmar_remove_one_dev_info(dev);
2574		return NULL;
2575	}
2576
2577	return domain;
2578}
2579
2580static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2581{
2582	*(u16 *)opaque = alias;
2583	return 0;
2584}
2585
2586static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
2587{
2588	struct device_domain_info *info;
2589	struct dmar_domain *domain = NULL;
2590	struct intel_iommu *iommu;
2591	u16 dma_alias;
2592	unsigned long flags;
2593	u8 bus, devfn;
2594
2595	iommu = device_to_iommu(dev, &bus, &devfn);
2596	if (!iommu)
2597		return NULL;
2598
2599	if (dev_is_pci(dev)) {
2600		struct pci_dev *pdev = to_pci_dev(dev);
2601
2602		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2603
 
 
2604		spin_lock_irqsave(&device_domain_lock, flags);
2605		info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2606						      PCI_BUS_NUM(dma_alias),
2607						      dma_alias & 0xff);
2608		if (info) {
2609			iommu = info->iommu;
2610			domain = info->domain;
 
 
 
 
 
 
 
 
 
 
2611		}
2612		spin_unlock_irqrestore(&device_domain_lock, flags);
2613
2614		/* DMA alias already has a domain, use it */
2615		if (info)
2616			goto out;
2617	}
2618
2619	/* Allocate and initialize new domain for the device */
2620	domain = alloc_domain(0);
2621	if (!domain)
2622		return NULL;
2623	if (domain_init(domain, iommu, gaw)) {
2624		domain_exit(domain);
2625		return NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
2626	}
2627
2628out:
 
 
2629	return domain;
 
 
 
2630}
2631
2632static struct dmar_domain *set_domain_for_dev(struct device *dev,
2633					      struct dmar_domain *domain)
2634{
2635	struct intel_iommu *iommu;
2636	struct dmar_domain *tmp;
2637	u16 req_id, dma_alias;
2638	u8 bus, devfn;
2639
2640	iommu = device_to_iommu(dev, &bus, &devfn);
2641	if (!iommu)
2642		return NULL;
2643
2644	req_id = ((u16)bus << 8) | devfn;
2645
2646	if (dev_is_pci(dev)) {
2647		struct pci_dev *pdev = to_pci_dev(dev);
2648
2649		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2650
2651		/* register PCI DMA alias device */
2652		if (req_id != dma_alias) {
2653			tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2654					dma_alias & 0xff, NULL, domain);
2655
2656			if (!tmp || tmp != domain)
2657				return tmp;
2658		}
2659	}
2660
2661	tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2662	if (!tmp || tmp != domain)
2663		return tmp;
2664
2665	return domain;
2666}
2667
2668static int iommu_domain_identity_map(struct dmar_domain *domain,
2669				     unsigned long long start,
2670				     unsigned long long end)
2671{
2672	unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2673	unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2674
2675	if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2676			  dma_to_mm_pfn(last_vpfn))) {
2677		pr_err("Reserving iova failed\n");
2678		return -ENOMEM;
2679	}
2680
2681	pr_debug("Mapping reserved region %llx-%llx\n", start, end);
 
2682	/*
2683	 * RMRR range might have overlap with physical memory range,
2684	 * clear it first
2685	 */
2686	dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2687
2688	return __domain_mapping(domain, first_vpfn, NULL,
2689				first_vpfn, last_vpfn - first_vpfn + 1,
2690				DMA_PTE_READ|DMA_PTE_WRITE);
2691}
2692
2693static int domain_prepare_identity_map(struct device *dev,
2694				       struct dmar_domain *domain,
2695				       unsigned long long start,
2696				       unsigned long long end)
2697{
 
 
 
 
 
 
 
2698	/* For _hardware_ passthrough, don't bother. But for software
2699	   passthrough, we do it anyway -- it may indicate a memory
2700	   range which is reserved in E820, so which didn't get set
2701	   up to start with in si_domain */
2702	if (domain == si_domain && hw_pass_through) {
2703		dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2704			 start, end);
2705		return 0;
2706	}
2707
2708	dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
2709
 
 
2710	if (end < start) {
2711		WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2712			"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2713			dmi_get_system_info(DMI_BIOS_VENDOR),
2714			dmi_get_system_info(DMI_BIOS_VERSION),
2715		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2716		return -EIO;
 
2717	}
2718
2719	if (end >> agaw_to_width(domain->agaw)) {
2720		WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2721		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2722		     agaw_to_width(domain->agaw),
2723		     dmi_get_system_info(DMI_BIOS_VENDOR),
2724		     dmi_get_system_info(DMI_BIOS_VERSION),
2725		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2726		return -EIO;
 
2727	}
2728
2729	return iommu_domain_identity_map(domain, start, end);
 
 
 
 
 
 
 
 
 
 
 
 
 
2730}
2731
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2732static int md_domain_init(struct dmar_domain *domain, int guest_width);
2733
 
 
 
 
 
 
 
 
 
 
 
 
2734static int __init si_domain_init(int hw)
2735{
2736	struct dmar_rmrr_unit *rmrr;
2737	struct device *dev;
2738	int i, nid, ret;
2739
2740	si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2741	if (!si_domain)
2742		return -EFAULT;
2743
 
 
 
 
 
 
 
 
 
 
2744	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2745		domain_exit(si_domain);
2746		return -EFAULT;
2747	}
2748
 
 
2749	if (hw)
2750		return 0;
2751
2752	for_each_online_node(nid) {
2753		unsigned long start_pfn, end_pfn;
2754		int i;
2755
2756		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2757			ret = iommu_domain_identity_map(si_domain,
2758					PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2759			if (ret)
2760				return ret;
2761		}
2762	}
2763
2764	/*
2765	 * Normally we use DMA domains for devices which have RMRRs. But we
2766	 * loose this requirement for graphic and usb devices. Identity map
2767	 * the RMRRs for graphic and USB devices so that they could use the
2768	 * si_domain.
2769	 */
2770	for_each_rmrr_units(rmrr) {
2771		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2772					  i, dev) {
2773			unsigned long long start = rmrr->base_address;
2774			unsigned long long end = rmrr->end_address;
2775
2776			if (device_is_rmrr_locked(dev))
2777				continue;
2778
2779			if (WARN_ON(end < start ||
2780				    end >> agaw_to_width(si_domain->agaw)))
2781				continue;
2782
2783			ret = iommu_domain_identity_map(si_domain, start, end);
2784			if (ret)
2785				return ret;
2786		}
2787	}
2788
2789	return 0;
2790}
2791
2792static int identity_mapping(struct device *dev)
 
 
2793{
2794	struct device_domain_info *info;
2795
2796	info = dev->archdata.iommu;
2797	if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO)
 
 
 
2798		return (info->domain == si_domain);
2799
2800	return 0;
2801}
2802
2803static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
 
 
2804{
2805	struct dmar_domain *ndomain;
2806	struct intel_iommu *iommu;
2807	u8 bus, devfn;
2808
2809	iommu = device_to_iommu(dev, &bus, &devfn);
2810	if (!iommu)
2811		return -ENODEV;
2812
2813	ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2814	if (ndomain != domain)
2815		return -EBUSY;
2816
2817	return 0;
2818}
2819
2820static bool device_has_rmrr(struct device *dev)
2821{
2822	struct dmar_rmrr_unit *rmrr;
2823	struct device *tmp;
2824	int i;
2825
2826	rcu_read_lock();
2827	for_each_rmrr_units(rmrr) {
2828		/*
2829		 * Return TRUE if this RMRR contains the device that
2830		 * is passed in.
2831		 */
2832		for_each_active_dev_scope(rmrr->devices,
2833					  rmrr->devices_cnt, i, tmp)
2834			if (tmp == dev ||
2835			    is_downstream_to_pci_bridge(dev, tmp)) {
2836				rcu_read_unlock();
2837				return true;
2838			}
2839	}
2840	rcu_read_unlock();
2841	return false;
2842}
2843
2844/**
2845 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2846 * is relaxable (ie. is allowed to be not enforced under some conditions)
2847 * @dev: device handle
2848 *
2849 * We assume that PCI USB devices with RMRRs have them largely
2850 * for historical reasons and that the RMRR space is not actively used post
2851 * boot.  This exclusion may change if vendors begin to abuse it.
2852 *
2853 * The same exception is made for graphics devices, with the requirement that
2854 * any use of the RMRR regions will be torn down before assigning the device
2855 * to a guest.
2856 *
2857 * Return: true if the RMRR is relaxable, false otherwise
2858 */
2859static bool device_rmrr_is_relaxable(struct device *dev)
2860{
2861	struct pci_dev *pdev;
2862
2863	if (!dev_is_pci(dev))
2864		return false;
 
 
 
2865
2866	pdev = to_pci_dev(dev);
2867	if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2868		return true;
2869	else
2870		return false;
2871}
2872
2873/*
2874 * There are a couple cases where we need to restrict the functionality of
2875 * devices associated with RMRRs.  The first is when evaluating a device for
2876 * identity mapping because problems exist when devices are moved in and out
2877 * of domains and their respective RMRR information is lost.  This means that
2878 * a device with associated RMRRs will never be in a "passthrough" domain.
2879 * The second is use of the device through the IOMMU API.  This interface
2880 * expects to have full control of the IOVA space for the device.  We cannot
2881 * satisfy both the requirement that RMRR access is maintained and have an
2882 * unencumbered IOVA space.  We also have no ability to quiesce the device's
2883 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2884 * We therefore prevent devices associated with an RMRR from participating in
2885 * the IOMMU API, which eliminates them from device assignment.
2886 *
2887 * In both cases, devices which have relaxable RMRRs are not concerned by this
2888 * restriction. See device_rmrr_is_relaxable comment.
2889 */
2890static bool device_is_rmrr_locked(struct device *dev)
2891{
2892	if (!device_has_rmrr(dev))
2893		return false;
2894
2895	if (device_rmrr_is_relaxable(dev))
2896		return false;
2897
2898	return true;
2899}
2900
2901/*
2902 * Return the required default domain type for a specific device.
2903 *
2904 * @dev: the device in query
2905 * @startup: true if this is during early boot
2906 *
2907 * Returns:
2908 *  - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2909 *  - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2910 *  - 0: both identity and dynamic domains work for this device
2911 */
2912static int device_def_domain_type(struct device *dev)
2913{
2914	if (dev_is_pci(dev)) {
2915		struct pci_dev *pdev = to_pci_dev(dev);
2916
2917		if (device_is_rmrr_locked(dev))
2918			return IOMMU_DOMAIN_DMA;
2919
2920		/*
2921		 * Prevent any device marked as untrusted from getting
2922		 * placed into the statically identity mapping domain.
2923		 */
2924		if (pdev->untrusted)
2925			return IOMMU_DOMAIN_DMA;
2926
2927		if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2928			return IOMMU_DOMAIN_IDENTITY;
2929
2930		if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2931			return IOMMU_DOMAIN_IDENTITY;
2932
2933		/*
2934		 * We want to start off with all devices in the 1:1 domain, and
2935		 * take them out later if we find they can't access all of memory.
2936		 *
2937		 * However, we can't do this for PCI devices behind bridges,
2938		 * because all PCI devices behind the same bridge will end up
2939		 * with the same source-id on their transactions.
2940		 *
2941		 * Practically speaking, we can't change things around for these
2942		 * devices at run-time, because we can't be sure there'll be no
2943		 * DMA transactions in flight for any of their siblings.
2944		 *
2945		 * So PCI devices (unless they're on the root bus) as well as
2946		 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2947		 * the 1:1 domain, just in _case_ one of their siblings turns out
2948		 * not to be able to map all of memory.
2949		 */
2950		if (!pci_is_pcie(pdev)) {
2951			if (!pci_is_root_bus(pdev->bus))
2952				return IOMMU_DOMAIN_DMA;
2953			if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2954				return IOMMU_DOMAIN_DMA;
2955		} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2956			return IOMMU_DOMAIN_DMA;
2957	} else {
2958		if (device_has_rmrr(dev))
2959			return IOMMU_DOMAIN_DMA;
2960	}
2961
2962	return (iommu_identity_mapping & IDENTMAP_ALL) ?
2963			IOMMU_DOMAIN_IDENTITY : 0;
2964}
2965
2966static void intel_iommu_init_qi(struct intel_iommu *iommu)
2967{
2968	/*
2969	 * Start from the sane iommu hardware state.
2970	 * If the queued invalidation is already initialized by us
2971	 * (for example, while enabling interrupt-remapping) then
2972	 * we got the things already rolling from a sane state.
 
 
 
 
 
 
 
 
 
 
 
2973	 */
2974	if (!iommu->qi) {
2975		/*
2976		 * Clear any previous faults.
2977		 */
2978		dmar_fault(-1, iommu);
2979		/*
2980		 * Disable queued invalidation if supported and already enabled
2981		 * before OS handover.
2982		 */
2983		dmar_disable_qi(iommu);
2984	}
2985
2986	if (dmar_enable_qi(iommu)) {
 
 
 
 
 
2987		/*
2988		 * Queued Invalidate not enabled, use Register Based Invalidate
 
2989		 */
2990		iommu->flush.flush_context = __iommu_flush_context;
2991		iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2992		pr_info("%s: Using Register based invalidation\n",
2993			iommu->name);
2994	} else {
2995		iommu->flush.flush_context = qi_flush_context;
2996		iommu->flush.flush_iotlb = qi_flush_iotlb;
2997		pr_info("%s: Using Queued invalidation\n", iommu->name);
2998	}
2999}
3000
3001static int copy_context_table(struct intel_iommu *iommu,
3002			      struct root_entry *old_re,
3003			      struct context_entry **tbl,
3004			      int bus, bool ext)
3005{
3006	int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
3007	struct context_entry *new_ce = NULL, ce;
3008	struct context_entry *old_ce = NULL;
3009	struct root_entry re;
3010	phys_addr_t old_ce_phys;
3011
3012	tbl_idx = ext ? bus * 2 : bus;
3013	memcpy(&re, old_re, sizeof(re));
3014
3015	for (devfn = 0; devfn < 256; devfn++) {
3016		/* First calculate the correct index */
3017		idx = (ext ? devfn * 2 : devfn) % 256;
3018
3019		if (idx == 0) {
3020			/* First save what we may have and clean up */
3021			if (new_ce) {
3022				tbl[tbl_idx] = new_ce;
3023				__iommu_flush_cache(iommu, new_ce,
3024						    VTD_PAGE_SIZE);
3025				pos = 1;
3026			}
3027
3028			if (old_ce)
3029				memunmap(old_ce);
 
3030
3031			ret = 0;
3032			if (devfn < 0x80)
3033				old_ce_phys = root_entry_lctp(&re);
3034			else
3035				old_ce_phys = root_entry_uctp(&re);
3036
3037			if (!old_ce_phys) {
3038				if (ext && devfn == 0) {
3039					/* No LCTP, try UCTP */
3040					devfn = 0x7f;
3041					continue;
3042				} else {
3043					goto out;
3044				}
3045			}
3046
3047			ret = -ENOMEM;
3048			old_ce = memremap(old_ce_phys, PAGE_SIZE,
3049					MEMREMAP_WB);
3050			if (!old_ce)
3051				goto out;
3052
3053			new_ce = alloc_pgtable_page(iommu->node);
3054			if (!new_ce)
3055				goto out_unmap;
3056
3057			ret = 0;
3058		}
3059
3060		/* Now copy the context entry */
3061		memcpy(&ce, old_ce + idx, sizeof(ce));
3062
3063		if (!__context_present(&ce))
3064			continue;
3065
3066		did = context_domain_id(&ce);
3067		if (did >= 0 && did < cap_ndoms(iommu->cap))
3068			set_bit(did, iommu->domain_ids);
3069
3070		/*
3071		 * We need a marker for copied context entries. This
3072		 * marker needs to work for the old format as well as
3073		 * for extended context entries.
3074		 *
3075		 * Bit 67 of the context entry is used. In the old
3076		 * format this bit is available to software, in the
3077		 * extended format it is the PGE bit, but PGE is ignored
3078		 * by HW if PASIDs are disabled (and thus still
3079		 * available).
3080		 *
3081		 * So disable PASIDs first and then mark the entry
3082		 * copied. This means that we don't copy PASID
3083		 * translations from the old kernel, but this is fine as
3084		 * faults there are not fatal.
3085		 */
3086		context_clear_pasid_enable(&ce);
3087		context_set_copied(&ce);
3088
3089		new_ce[idx] = ce;
3090	}
3091
3092	tbl[tbl_idx + pos] = new_ce;
3093
3094	__iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3095
3096out_unmap:
3097	memunmap(old_ce);
3098
3099out:
3100	return ret;
3101}
3102
3103static int copy_translation_tables(struct intel_iommu *iommu)
3104{
3105	struct context_entry **ctxt_tbls;
3106	struct root_entry *old_rt;
3107	phys_addr_t old_rt_phys;
3108	int ctxt_table_entries;
3109	unsigned long flags;
3110	u64 rtaddr_reg;
3111	int bus, ret;
3112	bool new_ext, ext;
3113
3114	rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3115	ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
3116	new_ext    = !!ecap_ecs(iommu->ecap);
3117
3118	/*
3119	 * The RTT bit can only be changed when translation is disabled,
3120	 * but disabling translation means to open a window for data
3121	 * corruption. So bail out and don't copy anything if we would
3122	 * have to change the bit.
3123	 */
3124	if (new_ext != ext)
3125		return -EINVAL;
3126
3127	old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3128	if (!old_rt_phys)
3129		return -EINVAL;
3130
3131	old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3132	if (!old_rt)
3133		return -ENOMEM;
3134
3135	/* This is too big for the stack - allocate it from slab */
3136	ctxt_table_entries = ext ? 512 : 256;
3137	ret = -ENOMEM;
3138	ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3139	if (!ctxt_tbls)
3140		goto out_unmap;
3141
3142	for (bus = 0; bus < 256; bus++) {
3143		ret = copy_context_table(iommu, &old_rt[bus],
3144					 ctxt_tbls, bus, ext);
3145		if (ret) {
3146			pr_err("%s: Failed to copy context table for bus %d\n",
3147				iommu->name, bus);
3148			continue;
 
 
 
 
 
 
 
 
 
3149		}
3150	}
3151
3152	spin_lock_irqsave(&iommu->lock, flags);
3153
3154	/* Context tables are copied, now write them to the root_entry table */
3155	for (bus = 0; bus < 256; bus++) {
3156		int idx = ext ? bus * 2 : bus;
3157		u64 val;
3158
3159		if (ctxt_tbls[idx]) {
3160			val = virt_to_phys(ctxt_tbls[idx]) | 1;
3161			iommu->root_entry[bus].lo = val;
3162		}
3163
3164		if (!ext || !ctxt_tbls[idx + 1])
3165			continue;
3166
3167		val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3168		iommu->root_entry[bus].hi = val;
3169	}
3170
3171	spin_unlock_irqrestore(&iommu->lock, flags);
3172
3173	kfree(ctxt_tbls);
3174
3175	__iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3176
3177	ret = 0;
3178
3179out_unmap:
3180	memunmap(old_rt);
3181
3182	return ret;
3183}
3184
3185static int __init init_dmars(void)
3186{
3187	struct dmar_drhd_unit *drhd;
 
 
3188	struct intel_iommu *iommu;
3189	int ret;
3190
3191	/*
3192	 * for each drhd
3193	 *    allocate root
3194	 *    initialize and program root entry to not present
3195	 * endfor
3196	 */
3197	for_each_drhd_unit(drhd) {
 
3198		/*
3199		 * lock not needed as this is only incremented in the single
3200		 * threaded kernel __init code path all other access are read
3201		 * only
3202		 */
3203		if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3204			g_num_of_iommus++;
3205			continue;
3206		}
3207		pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3208	}
3209
3210	/* Preallocate enough resources for IOMMU hot-addition */
3211	if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3212		g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3213
3214	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3215			GFP_KERNEL);
3216	if (!g_iommus) {
3217		pr_err("Allocating global iommu array failed\n");
 
 
 
 
 
 
 
3218		ret = -ENOMEM;
3219		goto error;
3220	}
3221
3222	for_each_iommu(iommu, drhd) {
3223		if (drhd->ignored) {
3224			iommu_disable_translation(iommu);
3225			continue;
3226		}
3227
3228		/*
3229		 * Find the max pasid size of all IOMMU's in the system.
3230		 * We need to ensure the system pasid table is no bigger
3231		 * than the smallest supported.
3232		 */
3233		if (pasid_supported(iommu)) {
3234			u32 temp = 2 << ecap_pss(iommu->ecap);
3235
3236			intel_pasid_max_id = min_t(u32, temp,
3237						   intel_pasid_max_id);
3238		}
3239
 
3240		g_iommus[iommu->seq_id] = iommu;
3241
3242		intel_iommu_init_qi(iommu);
3243
3244		ret = iommu_init_domains(iommu);
3245		if (ret)
3246			goto free_iommu;
3247
3248		init_translation_status(iommu);
3249
3250		if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3251			iommu_disable_translation(iommu);
3252			clear_translation_pre_enabled(iommu);
3253			pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3254				iommu->name);
3255		}
3256
3257		/*
3258		 * TBD:
3259		 * we could share the same root & context tables
3260		 * among all IOMMU's. Need to Split it later.
3261		 */
3262		ret = iommu_alloc_root_entry(iommu);
3263		if (ret)
3264			goto free_iommu;
3265
3266		if (translation_pre_enabled(iommu)) {
3267			pr_info("Translation already enabled - trying to copy translation structures\n");
3268
3269			ret = copy_translation_tables(iommu);
3270			if (ret) {
3271				/*
3272				 * We found the IOMMU with translation
3273				 * enabled - but failed to copy over the
3274				 * old root-entry table. Try to proceed
3275				 * by disabling translation now and
3276				 * allocating a clean root-entry table.
3277				 * This might cause DMAR faults, but
3278				 * probably the dump will still succeed.
3279				 */
3280				pr_err("Failed to copy translation tables from previous kernel for %s\n",
3281				       iommu->name);
3282				iommu_disable_translation(iommu);
3283				clear_translation_pre_enabled(iommu);
3284			} else {
3285				pr_info("Copied translation tables from previous kernel for %s\n",
3286					iommu->name);
3287			}
3288		}
3289
3290		if (!ecap_pass_through(iommu->ecap))
3291			hw_pass_through = 0;
3292#ifdef CONFIG_INTEL_IOMMU_SVM
3293		if (pasid_supported(iommu))
3294			intel_svm_init(iommu);
3295#endif
3296	}
3297
3298	/*
3299	 * Now that qi is enabled on all iommus, set the root entry and flush
3300	 * caches. This is required on some Intel X58 chipsets, otherwise the
3301	 * flush_context function will loop forever and the boot hangs.
3302	 */
3303	for_each_active_iommu(iommu, drhd) {
3304		iommu_flush_write_buffer(iommu);
3305		iommu_set_root_entry(iommu);
3306		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3307		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3308	}
3309
3310	if (iommu_default_passthrough())
3311		iommu_identity_mapping |= IDENTMAP_ALL;
3312
3313#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3314	dmar_map_gfx = 0;
3315#endif
3316
3317	if (!dmar_map_gfx)
3318		iommu_identity_mapping |= IDENTMAP_GFX;
3319
3320	check_tylersburg_isoch();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3321
3322	ret = si_domain_init(hw_pass_through);
3323	if (ret)
3324		goto free_iommu;
3325
3326	/*
3327	 * for each drhd
3328	 *   enable fault log
3329	 *   global invalidate context cache
3330	 *   global invalidate iotlb
3331	 *   enable translation
3332	 */
3333	for_each_iommu(iommu, drhd) {
3334		if (drhd->ignored) {
3335			/*
3336			 * we always have to disable PMRs or DMA may fail on
3337			 * this device
3338			 */
3339			if (force_on)
3340				iommu_disable_protect_mem_regions(iommu);
3341			continue;
3342		}
 
3343
3344		iommu_flush_write_buffer(iommu);
3345
3346#ifdef CONFIG_INTEL_IOMMU_SVM
3347		if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3348			/*
3349			 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3350			 * could cause possible lock race condition.
3351			 */
3352			up_write(&dmar_global_lock);
3353			ret = intel_svm_enable_prq(iommu);
3354			down_write(&dmar_global_lock);
3355			if (ret)
3356				goto free_iommu;
3357		}
3358#endif
3359		ret = dmar_set_interrupt(iommu);
3360		if (ret)
3361			goto free_iommu;
 
 
 
 
 
 
 
 
 
 
 
3362	}
3363
3364	return 0;
3365
3366free_iommu:
3367	for_each_active_iommu(iommu, drhd) {
3368		disable_dmar_iommu(iommu);
3369		free_dmar_iommu(iommu);
 
3370	}
3371
3372	kfree(g_iommus);
3373
3374error:
3375	return ret;
3376}
3377
3378/* This takes a number of _MM_ pages, not VTD pages */
3379static unsigned long intel_alloc_iova(struct device *dev,
3380				     struct dmar_domain *domain,
3381				     unsigned long nrpages, uint64_t dma_mask)
3382{
3383	unsigned long iova_pfn;
 
3384
3385	/* Restrict dma_mask to the width that the iommu can handle */
3386	dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3387	/* Ensure we reserve the whole size-aligned region */
3388	nrpages = __roundup_pow_of_two(nrpages);
3389
3390	if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3391		/*
3392		 * First try to allocate an io virtual address in
3393		 * DMA_BIT_MASK(32) and if that fails then try allocating
3394		 * from higher range
3395		 */
3396		iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3397					   IOVA_PFN(DMA_BIT_MASK(32)), false);
3398		if (iova_pfn)
3399			return iova_pfn;
3400	}
3401	iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3402				   IOVA_PFN(dma_mask), true);
3403	if (unlikely(!iova_pfn)) {
3404		dev_err(dev, "Allocating %ld-page iova failed", nrpages);
3405		return 0;
3406	}
3407
3408	return iova_pfn;
3409}
3410
3411static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
3412{
3413	struct dmar_domain *domain, *tmp;
3414	struct dmar_rmrr_unit *rmrr;
3415	struct device *i_dev;
3416	int i, ret;
3417
3418	/* Device shouldn't be attached by any domains. */
3419	domain = find_domain(dev);
3420	if (domain)
 
 
3421		return NULL;
 
3422
3423	domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3424	if (!domain)
3425		goto out;
3426
3427	/* We have a new domain - setup possible RMRRs for the device */
3428	rcu_read_lock();
3429	for_each_rmrr_units(rmrr) {
3430		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3431					  i, i_dev) {
3432			if (i_dev != dev)
3433				continue;
3434
3435			ret = domain_prepare_identity_map(dev, domain,
3436							  rmrr->base_address,
3437							  rmrr->end_address);
3438			if (ret)
3439				dev_err(dev, "Mapping reserved region failed\n");
3440		}
3441	}
3442	rcu_read_unlock();
3443
3444	tmp = set_domain_for_dev(dev, domain);
3445	if (!tmp || domain != tmp) {
3446		domain_exit(domain);
3447		domain = tmp;
3448	}
 
3449
3450out:
3451	if (!domain)
3452		dev_err(dev, "Allocating domain failed\n");
3453	else
3454		domain->domain.type = IOMMU_DOMAIN_DMA;
3455
3456	return domain;
3457}
3458
3459/* Check if the dev needs to go through non-identity map and unmap process.*/
3460static bool iommu_need_mapping(struct device *dev)
3461{
3462	int ret;
 
3463
3464	if (iommu_dummy(dev))
3465		return false;
 
 
 
3466
3467	ret = identity_mapping(dev);
3468	if (ret) {
3469		u64 dma_mask = *dev->dma_mask;
3470
3471		if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
3472			dma_mask = dev->coherent_dma_mask;
 
3473
3474		if (dma_mask >= dma_direct_get_required_mask(dev))
3475			return false;
3476
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3477		/*
3478		 * 32 bit DMA is removed from si_domain and fall back to
3479		 * non-identity mapping.
3480		 */
3481		dmar_remove_one_dev_info(dev);
3482		ret = iommu_request_dma_domain_for_dev(dev);
3483		if (ret) {
3484			struct iommu_domain *domain;
3485			struct dmar_domain *dmar_domain;
3486
3487			domain = iommu_get_domain_for_dev(dev);
3488			if (domain) {
3489				dmar_domain = to_dmar_domain(domain);
3490				dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
3491			}
3492			dmar_remove_one_dev_info(dev);
3493			get_private_domain_for_dev(dev);
3494		}
3495
3496		dev_info(dev, "32bit DMA uses non-identity mapping\n");
3497	}
3498
3499	return true;
3500}
3501
3502static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3503				     size_t size, int dir, u64 dma_mask)
3504{
 
3505	struct dmar_domain *domain;
3506	phys_addr_t start_paddr;
3507	unsigned long iova_pfn;
3508	int prot = 0;
3509	int ret;
3510	struct intel_iommu *iommu;
3511	unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3512
3513	BUG_ON(dir == DMA_NONE);
3514
3515	domain = find_domain(dev);
 
 
 
3516	if (!domain)
3517		return DMA_MAPPING_ERROR;
3518
3519	iommu = domain_get_iommu(domain);
3520	size = aligned_nrpages(paddr, size);
3521
3522	iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3523	if (!iova_pfn)
3524		goto error;
3525
3526	/*
3527	 * Check if DMAR supports zero-length reads on write only
3528	 * mappings..
3529	 */
3530	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3531			!cap_zlr(iommu->cap))
3532		prot |= DMA_PTE_READ;
3533	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3534		prot |= DMA_PTE_WRITE;
3535	/*
3536	 * paddr - (paddr + size) might be partial page, we should map the whole
3537	 * page.  Note: if two part of one page are separately mapped, we
3538	 * might have two guest_addr mapping to the same host paddr, but this
3539	 * is not a big problem
3540	 */
3541	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3542				 mm_to_dma_pfn(paddr_pfn), size, prot);
3543	if (ret)
3544		goto error;
3545
3546	start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
 
 
 
 
 
 
3547	start_paddr += paddr & ~PAGE_MASK;
3548
3549	trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT);
3550
3551	return start_paddr;
3552
3553error:
3554	if (iova_pfn)
3555		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3556	dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3557		size, (unsigned long long)paddr, dir);
3558	return DMA_MAPPING_ERROR;
3559}
3560
3561static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3562				 unsigned long offset, size_t size,
3563				 enum dma_data_direction dir,
3564				 unsigned long attrs)
3565{
3566	if (iommu_need_mapping(dev))
3567		return __intel_map_single(dev, page_to_phys(page) + offset,
3568				size, dir, *dev->dma_mask);
3569	return dma_direct_map_page(dev, page, offset, size, dir, attrs);
3570}
3571
3572static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3573				     size_t size, enum dma_data_direction dir,
3574				     unsigned long attrs)
3575{
3576	if (iommu_need_mapping(dev))
3577		return __intel_map_single(dev, phys_addr, size, dir,
3578				*dev->dma_mask);
3579	return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3580}
3581
3582static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
3583{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3584	struct dmar_domain *domain;
3585	unsigned long start_pfn, last_pfn;
3586	unsigned long nrpages;
3587	unsigned long iova_pfn;
3588	struct intel_iommu *iommu;
3589	struct page *freelist;
3590	struct pci_dev *pdev = NULL;
3591
3592	domain = find_domain(dev);
 
 
 
3593	BUG_ON(!domain);
3594
3595	iommu = domain_get_iommu(domain);
3596
3597	iova_pfn = IOVA_PFN(dev_addr);
 
 
 
 
 
 
 
 
 
3598
3599	nrpages = aligned_nrpages(dev_addr, size);
3600	start_pfn = mm_to_dma_pfn(iova_pfn);
3601	last_pfn = start_pfn + nrpages - 1;
3602
3603	if (dev_is_pci(dev))
3604		pdev = to_pci_dev(dev);
3605
3606	freelist = domain_unmap(domain, start_pfn, last_pfn);
3607	if (intel_iommu_strict || (pdev && pdev->untrusted) ||
3608			!has_iova_flush_queue(&domain->iovad)) {
3609		iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3610				      nrpages, !freelist, 0);
3611		/* free iova */
3612		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3613		dma_free_pagelist(freelist);
3614	} else {
3615		queue_iova(&domain->iovad, iova_pfn, nrpages,
3616			   (unsigned long)freelist);
3617		/*
3618		 * queue up the release of the unmap to save the 1/6th of the
3619		 * cpu used up by the iotlb flush operation...
3620		 */
3621	}
3622
3623	trace_unmap_single(dev, dev_addr, size);
3624}
3625
3626static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3627			     size_t size, enum dma_data_direction dir,
3628			     unsigned long attrs)
3629{
3630	if (iommu_need_mapping(dev))
3631		intel_unmap(dev, dev_addr, size);
3632	else
3633		dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3634}
3635
3636static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3637		size_t size, enum dma_data_direction dir, unsigned long attrs)
3638{
3639	if (iommu_need_mapping(dev))
3640		intel_unmap(dev, dev_addr, size);
3641}
3642
3643static void *intel_alloc_coherent(struct device *dev, size_t size,
3644				  dma_addr_t *dma_handle, gfp_t flags,
3645				  unsigned long attrs)
3646{
3647	struct page *page = NULL;
3648	int order;
3649
3650	if (!iommu_need_mapping(dev))
3651		return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3652
3653	size = PAGE_ALIGN(size);
3654	order = get_order(size);
3655
3656	if (gfpflags_allow_blocking(flags)) {
3657		unsigned int count = size >> PAGE_SHIFT;
3658
3659		page = dma_alloc_from_contiguous(dev, count, order,
3660						 flags & __GFP_NOWARN);
 
 
3661	}
3662
3663	if (!page)
3664		page = alloc_pages(flags, order);
3665	if (!page)
3666		return NULL;
3667	memset(page_address(page), 0, size);
3668
3669	*dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3670					 DMA_BIDIRECTIONAL,
3671					 dev->coherent_dma_mask);
3672	if (*dma_handle != DMA_MAPPING_ERROR)
3673		return page_address(page);
3674	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3675		__free_pages(page, order);
3676
3677	return NULL;
3678}
3679
3680static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3681				dma_addr_t dma_handle, unsigned long attrs)
3682{
3683	int order;
3684	struct page *page = virt_to_page(vaddr);
3685
3686	if (!iommu_need_mapping(dev))
3687		return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3688
3689	size = PAGE_ALIGN(size);
3690	order = get_order(size);
3691
3692	intel_unmap(dev, dma_handle, size);
3693	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3694		__free_pages(page, order);
3695}
3696
3697static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3698			   int nelems, enum dma_data_direction dir,
3699			   unsigned long attrs)
3700{
3701	dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3702	unsigned long nrpages = 0;
3703	struct scatterlist *sg;
3704	int i;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3705
3706	if (!iommu_need_mapping(dev))
3707		return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3708
3709	for_each_sg(sglist, sg, nelems, i) {
3710		nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
 
 
 
 
 
 
 
 
 
3711	}
 
3712
3713	intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
 
 
 
 
3714
3715	trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
 
 
 
 
 
3716}
3717
3718static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3719			enum dma_data_direction dir, unsigned long attrs)
3720{
3721	int i;
 
3722	struct dmar_domain *domain;
3723	size_t size = 0;
3724	int prot = 0;
3725	unsigned long iova_pfn;
3726	int ret;
3727	struct scatterlist *sg;
3728	unsigned long start_vpfn;
3729	struct intel_iommu *iommu;
3730
3731	BUG_ON(dir == DMA_NONE);
3732	if (!iommu_need_mapping(dev))
3733		return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
3734
3735	domain = find_domain(dev);
3736	if (!domain)
3737		return 0;
3738
3739	iommu = domain_get_iommu(domain);
3740
3741	for_each_sg(sglist, sg, nelems, i)
3742		size += aligned_nrpages(sg->offset, sg->length);
3743
3744	iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3745				*dev->dma_mask);
3746	if (!iova_pfn) {
3747		sglist->dma_length = 0;
3748		return 0;
3749	}
3750
3751	/*
3752	 * Check if DMAR supports zero-length reads on write only
3753	 * mappings..
3754	 */
3755	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3756			!cap_zlr(iommu->cap))
3757		prot |= DMA_PTE_READ;
3758	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3759		prot |= DMA_PTE_WRITE;
3760
3761	start_vpfn = mm_to_dma_pfn(iova_pfn);
3762
3763	ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3764	if (unlikely(ret)) {
 
 
 
 
3765		dma_pte_free_pagetable(domain, start_vpfn,
3766				       start_vpfn + size - 1,
3767				       agaw_to_level(domain->agaw) + 1);
3768		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3769		return 0;
3770	}
3771
3772	trace_map_sg(dev, iova_pfn << PAGE_SHIFT,
3773		     sg_phys(sglist), size << VTD_PAGE_SHIFT);
 
 
 
3774
3775	return nelems;
3776}
3777
3778static u64 intel_get_required_mask(struct device *dev)
3779{
3780	if (!iommu_need_mapping(dev))
3781		return dma_direct_get_required_mask(dev);
3782	return DMA_BIT_MASK(32);
3783}
3784
3785static const struct dma_map_ops intel_dma_ops = {
3786	.alloc = intel_alloc_coherent,
3787	.free = intel_free_coherent,
3788	.map_sg = intel_map_sg,
3789	.unmap_sg = intel_unmap_sg,
3790	.map_page = intel_map_page,
3791	.unmap_page = intel_unmap_page,
3792	.map_resource = intel_map_resource,
3793	.unmap_resource = intel_unmap_resource,
3794	.dma_supported = dma_direct_supported,
3795	.mmap = dma_common_mmap,
3796	.get_sgtable = dma_common_get_sgtable,
3797	.get_required_mask = intel_get_required_mask,
3798};
3799
3800static void
3801bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size,
3802		   enum dma_data_direction dir, enum dma_sync_target target)
3803{
3804	struct dmar_domain *domain;
3805	phys_addr_t tlb_addr;
3806
3807	domain = find_domain(dev);
3808	if (WARN_ON(!domain))
3809		return;
3810
3811	tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr);
3812	if (is_swiotlb_buffer(tlb_addr))
3813		swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target);
3814}
3815
3816static dma_addr_t
3817bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
3818		  enum dma_data_direction dir, unsigned long attrs,
3819		  u64 dma_mask)
3820{
3821	size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3822	struct dmar_domain *domain;
3823	struct intel_iommu *iommu;
3824	unsigned long iova_pfn;
3825	unsigned long nrpages;
3826	phys_addr_t tlb_addr;
3827	int prot = 0;
3828	int ret;
3829
3830	domain = find_domain(dev);
3831	if (WARN_ON(dir == DMA_NONE || !domain))
3832		return DMA_MAPPING_ERROR;
3833
3834	iommu = domain_get_iommu(domain);
3835	if (WARN_ON(!iommu))
3836		return DMA_MAPPING_ERROR;
3837
3838	nrpages = aligned_nrpages(0, size);
3839	iova_pfn = intel_alloc_iova(dev, domain,
3840				    dma_to_mm_pfn(nrpages), dma_mask);
3841	if (!iova_pfn)
3842		return DMA_MAPPING_ERROR;
3843
3844	/*
3845	 * Check if DMAR supports zero-length reads on write only
3846	 * mappings..
3847	 */
3848	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL ||
3849			!cap_zlr(iommu->cap))
3850		prot |= DMA_PTE_READ;
3851	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3852		prot |= DMA_PTE_WRITE;
3853
3854	/*
3855	 * If both the physical buffer start address and size are
3856	 * page aligned, we don't need to use a bounce page.
3857	 */
3858	if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
3859		tlb_addr = swiotlb_tbl_map_single(dev,
3860				__phys_to_dma(dev, io_tlb_start),
3861				paddr, size, aligned_size, dir, attrs);
3862		if (tlb_addr == DMA_MAPPING_ERROR) {
3863			goto swiotlb_error;
3864		} else {
3865			/* Cleanup the padding area. */
3866			void *padding_start = phys_to_virt(tlb_addr);
3867			size_t padding_size = aligned_size;
3868
3869			if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
3870			    (dir == DMA_TO_DEVICE ||
3871			     dir == DMA_BIDIRECTIONAL)) {
3872				padding_start += size;
3873				padding_size -= size;
3874			}
3875
3876			memset(padding_start, 0, padding_size);
3877		}
3878	} else {
3879		tlb_addr = paddr;
3880	}
3881
3882	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3883				 tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot);
3884	if (ret)
3885		goto mapping_error;
3886
3887	trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size);
3888
3889	return (phys_addr_t)iova_pfn << PAGE_SHIFT;
3890
3891mapping_error:
3892	if (is_swiotlb_buffer(tlb_addr))
3893		swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3894					 aligned_size, dir, attrs);
3895swiotlb_error:
3896	free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3897	dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n",
3898		size, (unsigned long long)paddr, dir);
3899
3900	return DMA_MAPPING_ERROR;
3901}
3902
3903static void
3904bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
3905		    enum dma_data_direction dir, unsigned long attrs)
3906{
3907	size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3908	struct dmar_domain *domain;
3909	phys_addr_t tlb_addr;
3910
3911	domain = find_domain(dev);
3912	if (WARN_ON(!domain))
3913		return;
3914
3915	tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr);
3916	if (WARN_ON(!tlb_addr))
3917		return;
3918
3919	intel_unmap(dev, dev_addr, size);
3920	if (is_swiotlb_buffer(tlb_addr))
3921		swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3922					 aligned_size, dir, attrs);
3923
3924	trace_bounce_unmap_single(dev, dev_addr, size);
3925}
3926
3927static dma_addr_t
3928bounce_map_page(struct device *dev, struct page *page, unsigned long offset,
3929		size_t size, enum dma_data_direction dir, unsigned long attrs)
3930{
3931	return bounce_map_single(dev, page_to_phys(page) + offset,
3932				 size, dir, attrs, *dev->dma_mask);
3933}
3934
3935static dma_addr_t
3936bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
3937		    enum dma_data_direction dir, unsigned long attrs)
3938{
3939	return bounce_map_single(dev, phys_addr, size,
3940				 dir, attrs, *dev->dma_mask);
3941}
3942
3943static void
3944bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size,
3945		  enum dma_data_direction dir, unsigned long attrs)
3946{
3947	bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3948}
3949
3950static void
3951bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size,
3952		      enum dma_data_direction dir, unsigned long attrs)
3953{
3954	bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3955}
3956
3957static void
3958bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3959		enum dma_data_direction dir, unsigned long attrs)
3960{
3961	struct scatterlist *sg;
3962	int i;
3963
3964	for_each_sg(sglist, sg, nelems, i)
3965		bounce_unmap_page(dev, sg->dma_address,
3966				  sg_dma_len(sg), dir, attrs);
3967}
3968
3969static int
3970bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3971	      enum dma_data_direction dir, unsigned long attrs)
3972{
3973	int i;
3974	struct scatterlist *sg;
3975
3976	for_each_sg(sglist, sg, nelems, i) {
3977		sg->dma_address = bounce_map_page(dev, sg_page(sg),
3978						  sg->offset, sg->length,
3979						  dir, attrs);
3980		if (sg->dma_address == DMA_MAPPING_ERROR)
3981			goto out_unmap;
3982		sg_dma_len(sg) = sg->length;
3983	}
3984
3985	return nelems;
3986
3987out_unmap:
3988	bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
3989	return 0;
3990}
3991
3992static void
3993bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
3994			   size_t size, enum dma_data_direction dir)
3995{
3996	bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU);
3997}
3998
3999static void
4000bounce_sync_single_for_device(struct device *dev, dma_addr_t addr,
4001			      size_t size, enum dma_data_direction dir)
4002{
4003	bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE);
4004}
4005
4006static void
4007bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
4008		       int nelems, enum dma_data_direction dir)
4009{
4010	struct scatterlist *sg;
4011	int i;
4012
4013	for_each_sg(sglist, sg, nelems, i)
4014		bounce_sync_single(dev, sg_dma_address(sg),
4015				   sg_dma_len(sg), dir, SYNC_FOR_CPU);
4016}
4017
4018static void
4019bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
4020			  int nelems, enum dma_data_direction dir)
4021{
4022	struct scatterlist *sg;
4023	int i;
4024
4025	for_each_sg(sglist, sg, nelems, i)
4026		bounce_sync_single(dev, sg_dma_address(sg),
4027				   sg_dma_len(sg), dir, SYNC_FOR_DEVICE);
4028}
4029
4030static const struct dma_map_ops bounce_dma_ops = {
4031	.alloc			= intel_alloc_coherent,
4032	.free			= intel_free_coherent,
4033	.map_sg			= bounce_map_sg,
4034	.unmap_sg		= bounce_unmap_sg,
4035	.map_page		= bounce_map_page,
4036	.unmap_page		= bounce_unmap_page,
4037	.sync_single_for_cpu	= bounce_sync_single_for_cpu,
4038	.sync_single_for_device	= bounce_sync_single_for_device,
4039	.sync_sg_for_cpu	= bounce_sync_sg_for_cpu,
4040	.sync_sg_for_device	= bounce_sync_sg_for_device,
4041	.map_resource		= bounce_map_resource,
4042	.unmap_resource		= bounce_unmap_resource,
4043	.dma_supported		= dma_direct_supported,
4044};
4045
4046static inline int iommu_domain_cache_init(void)
4047{
4048	int ret = 0;
4049
4050	iommu_domain_cache = kmem_cache_create("iommu_domain",
4051					 sizeof(struct dmar_domain),
4052					 0,
4053					 SLAB_HWCACHE_ALIGN,
4054
4055					 NULL);
4056	if (!iommu_domain_cache) {
4057		pr_err("Couldn't create iommu_domain cache\n");
4058		ret = -ENOMEM;
4059	}
4060
4061	return ret;
4062}
4063
4064static inline int iommu_devinfo_cache_init(void)
4065{
4066	int ret = 0;
4067
4068	iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
4069					 sizeof(struct device_domain_info),
4070					 0,
4071					 SLAB_HWCACHE_ALIGN,
4072					 NULL);
4073	if (!iommu_devinfo_cache) {
4074		pr_err("Couldn't create devinfo cache\n");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4075		ret = -ENOMEM;
4076	}
4077
4078	return ret;
4079}
4080
4081static int __init iommu_init_mempool(void)
4082{
4083	int ret;
4084	ret = iova_cache_get();
4085	if (ret)
4086		return ret;
4087
4088	ret = iommu_domain_cache_init();
4089	if (ret)
4090		goto domain_error;
4091
4092	ret = iommu_devinfo_cache_init();
4093	if (!ret)
4094		return ret;
4095
4096	kmem_cache_destroy(iommu_domain_cache);
4097domain_error:
4098	iova_cache_put();
4099
4100	return -ENOMEM;
4101}
4102
4103static void __init iommu_exit_mempool(void)
4104{
4105	kmem_cache_destroy(iommu_devinfo_cache);
4106	kmem_cache_destroy(iommu_domain_cache);
4107	iova_cache_put();
 
4108}
4109
4110static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
4111{
4112	struct dmar_drhd_unit *drhd;
4113	u32 vtbar;
4114	int rc;
4115
4116	/* We know that this device on this chipset has its own IOMMU.
4117	 * If we find it under a different IOMMU, then the BIOS is lying
4118	 * to us. Hope that the IOMMU for this device is actually
4119	 * disabled, and it needs no translation...
4120	 */
4121	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4122	if (rc) {
4123		/* "can't" happen */
4124		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4125		return;
4126	}
4127	vtbar &= 0xffff0000;
4128
4129	/* we know that the this iommu should be at offset 0xa000 from vtbar */
4130	drhd = dmar_find_matched_drhd_unit(pdev);
4131	if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4132			    TAINT_FIRMWARE_WORKAROUND,
4133			    "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4134		pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4135}
4136DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4137
4138static void __init init_no_remapping_devices(void)
4139{
4140	struct dmar_drhd_unit *drhd;
4141	struct device *dev;
4142	int i;
4143
4144	for_each_drhd_unit(drhd) {
4145		if (!drhd->include_all) {
4146			for_each_active_dev_scope(drhd->devices,
4147						  drhd->devices_cnt, i, dev)
4148				break;
4149			/* ignore DMAR unit if no devices exist */
 
4150			if (i == drhd->devices_cnt)
4151				drhd->ignored = 1;
4152		}
4153	}
4154
4155	for_each_active_drhd_unit(drhd) {
4156		if (drhd->include_all)
 
4157			continue;
4158
4159		for_each_active_dev_scope(drhd->devices,
4160					  drhd->devices_cnt, i, dev)
4161			if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
4162				break;
 
4163		if (i < drhd->devices_cnt)
4164			continue;
4165
4166		/* This IOMMU has *only* gfx devices. Either bypass it or
4167		   set the gfx_mapped flag, as appropriate */
4168		if (!dmar_map_gfx) {
 
 
4169			drhd->ignored = 1;
4170			for_each_active_dev_scope(drhd->devices,
4171						  drhd->devices_cnt, i, dev)
4172				dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
 
 
4173		}
4174	}
4175}
4176
4177#ifdef CONFIG_SUSPEND
4178static int init_iommu_hw(void)
4179{
4180	struct dmar_drhd_unit *drhd;
4181	struct intel_iommu *iommu = NULL;
4182
4183	for_each_active_iommu(iommu, drhd)
4184		if (iommu->qi)
4185			dmar_reenable_qi(iommu);
4186
4187	for_each_iommu(iommu, drhd) {
4188		if (drhd->ignored) {
4189			/*
4190			 * we always have to disable PMRs or DMA may fail on
4191			 * this device
4192			 */
4193			if (force_on)
4194				iommu_disable_protect_mem_regions(iommu);
4195			continue;
4196		}
4197
4198		iommu_flush_write_buffer(iommu);
4199
4200		iommu_set_root_entry(iommu);
4201
4202		iommu->flush.flush_context(iommu, 0, 0, 0,
4203					   DMA_CCMD_GLOBAL_INVL);
4204		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4205		iommu_enable_translation(iommu);
 
 
4206		iommu_disable_protect_mem_regions(iommu);
4207	}
4208
4209	return 0;
4210}
4211
4212static void iommu_flush_all(void)
4213{
4214	struct dmar_drhd_unit *drhd;
4215	struct intel_iommu *iommu;
4216
4217	for_each_active_iommu(iommu, drhd) {
4218		iommu->flush.flush_context(iommu, 0, 0, 0,
4219					   DMA_CCMD_GLOBAL_INVL);
4220		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
4221					 DMA_TLB_GLOBAL_FLUSH);
4222	}
4223}
4224
4225static int iommu_suspend(void)
4226{
4227	struct dmar_drhd_unit *drhd;
4228	struct intel_iommu *iommu = NULL;
4229	unsigned long flag;
4230
4231	for_each_active_iommu(iommu, drhd) {
4232		iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
4233						 GFP_ATOMIC);
4234		if (!iommu->iommu_state)
4235			goto nomem;
4236	}
4237
4238	iommu_flush_all();
4239
4240	for_each_active_iommu(iommu, drhd) {
4241		iommu_disable_translation(iommu);
4242
4243		raw_spin_lock_irqsave(&iommu->register_lock, flag);
4244
4245		iommu->iommu_state[SR_DMAR_FECTL_REG] =
4246			readl(iommu->reg + DMAR_FECTL_REG);
4247		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4248			readl(iommu->reg + DMAR_FEDATA_REG);
4249		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4250			readl(iommu->reg + DMAR_FEADDR_REG);
4251		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4252			readl(iommu->reg + DMAR_FEUADDR_REG);
4253
4254		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4255	}
4256	return 0;
4257
4258nomem:
4259	for_each_active_iommu(iommu, drhd)
4260		kfree(iommu->iommu_state);
4261
4262	return -ENOMEM;
4263}
4264
4265static void iommu_resume(void)
4266{
4267	struct dmar_drhd_unit *drhd;
4268	struct intel_iommu *iommu = NULL;
4269	unsigned long flag;
4270
4271	if (init_iommu_hw()) {
4272		if (force_on)
4273			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4274		else
4275			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4276		return;
4277	}
4278
4279	for_each_active_iommu(iommu, drhd) {
4280
4281		raw_spin_lock_irqsave(&iommu->register_lock, flag);
4282
4283		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4284			iommu->reg + DMAR_FECTL_REG);
4285		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4286			iommu->reg + DMAR_FEDATA_REG);
4287		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4288			iommu->reg + DMAR_FEADDR_REG);
4289		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4290			iommu->reg + DMAR_FEUADDR_REG);
4291
4292		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4293	}
4294
4295	for_each_active_iommu(iommu, drhd)
4296		kfree(iommu->iommu_state);
4297}
4298
4299static struct syscore_ops iommu_syscore_ops = {
4300	.resume		= iommu_resume,
4301	.suspend	= iommu_suspend,
4302};
4303
4304static void __init init_iommu_pm_ops(void)
4305{
4306	register_syscore_ops(&iommu_syscore_ops);
4307}
4308
4309#else
4310static inline void init_iommu_pm_ops(void) {}
4311#endif	/* CONFIG_PM */
4312
4313int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
 
 
 
 
 
 
 
4314{
4315	struct acpi_dmar_reserved_memory *rmrr;
4316	struct dmar_rmrr_unit *rmrru;
4317
4318	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4319	if (!rmrru)
4320		goto out;
4321
4322	rmrru->hdr = header;
4323	rmrr = (struct acpi_dmar_reserved_memory *)header;
4324	rmrru->base_address = rmrr->base_address;
4325	rmrru->end_address = rmrr->end_address;
4326
4327	rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4328				((void *)rmrr) + rmrr->header.length,
4329				&rmrru->devices_cnt);
4330	if (rmrru->devices_cnt && rmrru->devices == NULL)
4331		goto free_rmrru;
4332
4333	list_add(&rmrru->list, &dmar_rmrr_units);
4334
4335	return 0;
4336free_rmrru:
4337	kfree(rmrru);
4338out:
4339	return -ENOMEM;
4340}
4341
4342static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4343{
4344	struct dmar_atsr_unit *atsru;
4345	struct acpi_dmar_atsr *tmp;
4346
4347	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4348		tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4349		if (atsr->segment != tmp->segment)
4350			continue;
4351		if (atsr->header.length != tmp->header.length)
4352			continue;
4353		if (memcmp(atsr, tmp, atsr->header.length) == 0)
4354			return atsru;
4355	}
4356
4357	return NULL;
4358}
4359
4360int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4361{
4362	struct acpi_dmar_atsr *atsr;
4363	struct dmar_atsr_unit *atsru;
4364
4365	if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
4366		return 0;
4367
4368	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4369	atsru = dmar_find_atsr(atsr);
4370	if (atsru)
4371		return 0;
4372
4373	atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4374	if (!atsru)
4375		return -ENOMEM;
4376
4377	/*
4378	 * If memory is allocated from slab by ACPI _DSM method, we need to
4379	 * copy the memory content because the memory buffer will be freed
4380	 * on return.
4381	 */
4382	atsru->hdr = (void *)(atsru + 1);
4383	memcpy(atsru->hdr, hdr, hdr->length);
4384	atsru->include_all = atsr->flags & 0x1;
4385	if (!atsru->include_all) {
4386		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4387				(void *)atsr + atsr->header.length,
4388				&atsru->devices_cnt);
4389		if (atsru->devices_cnt && atsru->devices == NULL) {
4390			kfree(atsru);
4391			return -ENOMEM;
4392		}
4393	}
4394
4395	list_add_rcu(&atsru->list, &dmar_atsr_units);
4396
4397	return 0;
4398}
4399
4400static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4401{
4402	dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4403	kfree(atsru);
4404}
4405
4406int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4407{
4408	struct acpi_dmar_atsr *atsr;
4409	struct dmar_atsr_unit *atsru;
4410
4411	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4412	atsru = dmar_find_atsr(atsr);
4413	if (atsru) {
4414		list_del_rcu(&atsru->list);
4415		synchronize_rcu();
4416		intel_iommu_free_atsr(atsru);
4417	}
4418
4419	return 0;
4420}
4421
4422int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4423{
4424	int i;
4425	struct device *dev;
4426	struct acpi_dmar_atsr *atsr;
4427	struct dmar_atsr_unit *atsru;
4428
4429	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4430	atsru = dmar_find_atsr(atsr);
4431	if (!atsru)
4432		return 0;
4433
4434	if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4435		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4436					  i, dev)
4437			return -EBUSY;
4438	}
4439
4440	return 0;
4441}
4442
4443static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4444{
4445	int sp, ret;
4446	struct intel_iommu *iommu = dmaru->iommu;
4447
4448	if (g_iommus[iommu->seq_id])
4449		return 0;
4450
4451	if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4452		pr_warn("%s: Doesn't support hardware pass through.\n",
4453			iommu->name);
4454		return -ENXIO;
4455	}
4456	if (!ecap_sc_support(iommu->ecap) &&
4457	    domain_update_iommu_snooping(iommu)) {
4458		pr_warn("%s: Doesn't support snooping.\n",
4459			iommu->name);
4460		return -ENXIO;
4461	}
4462	sp = domain_update_iommu_superpage(iommu) - 1;
4463	if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4464		pr_warn("%s: Doesn't support large page.\n",
4465			iommu->name);
4466		return -ENXIO;
4467	}
4468
4469	/*
4470	 * Disable translation if already enabled prior to OS handover.
 
4471	 */
4472	if (iommu->gcmd & DMA_GCMD_TE)
4473		iommu_disable_translation(iommu);
4474
4475	g_iommus[iommu->seq_id] = iommu;
4476	ret = iommu_init_domains(iommu);
4477	if (ret == 0)
4478		ret = iommu_alloc_root_entry(iommu);
4479	if (ret)
4480		goto out;
4481
4482#ifdef CONFIG_INTEL_IOMMU_SVM
4483	if (pasid_supported(iommu))
4484		intel_svm_init(iommu);
4485#endif
4486
4487	if (dmaru->ignored) {
4488		/*
4489		 * we always have to disable PMRs or DMA may fail on this device
4490		 */
4491		if (force_on)
4492			iommu_disable_protect_mem_regions(iommu);
4493		return 0;
4494	}
4495
4496	intel_iommu_init_qi(iommu);
4497	iommu_flush_write_buffer(iommu);
4498
4499#ifdef CONFIG_INTEL_IOMMU_SVM
4500	if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
4501		ret = intel_svm_enable_prq(iommu);
4502		if (ret)
4503			goto disable_iommu;
4504	}
4505#endif
4506	ret = dmar_set_interrupt(iommu);
4507	if (ret)
4508		goto disable_iommu;
4509
4510	iommu_set_root_entry(iommu);
4511	iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4512	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4513	iommu_enable_translation(iommu);
4514
4515	iommu_disable_protect_mem_regions(iommu);
4516	return 0;
4517
4518disable_iommu:
4519	disable_dmar_iommu(iommu);
4520out:
4521	free_dmar_iommu(iommu);
4522	return ret;
4523}
4524
4525int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4526{
4527	int ret = 0;
4528	struct intel_iommu *iommu = dmaru->iommu;
4529
4530	if (!intel_iommu_enabled)
4531		return 0;
4532	if (iommu == NULL)
4533		return -EINVAL;
4534
4535	if (insert) {
4536		ret = intel_iommu_add(dmaru);
4537	} else {
4538		disable_dmar_iommu(iommu);
4539		free_dmar_iommu(iommu);
4540	}
 
 
4541
4542	return ret;
4543}
 
 
 
4544
4545static void intel_iommu_free_dmars(void)
4546{
4547	struct dmar_rmrr_unit *rmrru, *rmrr_n;
4548	struct dmar_atsr_unit *atsru, *atsr_n;
4549
4550	list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4551		list_del(&rmrru->list);
4552		dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4553		kfree(rmrru);
4554	}
4555
4556	list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4557		list_del(&atsru->list);
4558		intel_iommu_free_atsr(atsru);
4559	}
4560}
4561
4562int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4563{
4564	int i, ret = 1;
4565	struct pci_bus *bus;
4566	struct pci_dev *bridge = NULL;
4567	struct device *tmp;
4568	struct acpi_dmar_atsr *atsr;
4569	struct dmar_atsr_unit *atsru;
4570
4571	dev = pci_physfn(dev);
4572	for (bus = dev->bus; bus; bus = bus->parent) {
4573		bridge = bus->self;
4574		/* If it's an integrated device, allow ATS */
4575		if (!bridge)
4576			return 1;
4577		/* Connected via non-PCIe: no ATS */
4578		if (!pci_is_pcie(bridge) ||
4579		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4580			return 0;
4581		/* If we found the root port, look it up in the ATSR */
4582		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4583			break;
4584	}
4585
4586	rcu_read_lock();
4587	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4588		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4589		if (atsr->segment != pci_domain_nr(dev->bus))
4590			continue;
4591
4592		for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4593			if (tmp == &bridge->dev)
4594				goto out;
4595
4596		if (atsru->include_all)
4597			goto out;
4598	}
4599	ret = 0;
4600out:
4601	rcu_read_unlock();
4602
4603	return ret;
4604}
4605
4606int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
 
4607{
4608	int ret;
4609	struct dmar_rmrr_unit *rmrru;
4610	struct dmar_atsr_unit *atsru;
4611	struct acpi_dmar_atsr *atsr;
4612	struct acpi_dmar_reserved_memory *rmrr;
4613
4614	if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4615		return 0;
4616
4617	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4618		rmrr = container_of(rmrru->hdr,
4619				    struct acpi_dmar_reserved_memory, header);
4620		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4621			ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4622				((void *)rmrr) + rmrr->header.length,
4623				rmrr->segment, rmrru->devices,
4624				rmrru->devices_cnt);
4625			if (ret < 0)
4626				return ret;
4627		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4628			dmar_remove_dev_scope(info, rmrr->segment,
4629				rmrru->devices, rmrru->devices_cnt);
4630		}
4631	}
4632
4633	list_for_each_entry(atsru, &dmar_atsr_units, list) {
4634		if (atsru->include_all)
4635			continue;
4636
4637		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4638		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4639			ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4640					(void *)atsr + atsr->header.length,
4641					atsr->segment, atsru->devices,
4642					atsru->devices_cnt);
4643			if (ret > 0)
4644				break;
4645			else if (ret < 0)
4646				return ret;
4647		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4648			if (dmar_remove_dev_scope(info, atsr->segment,
4649					atsru->devices, atsru->devices_cnt))
4650				break;
4651		}
 
 
 
 
 
 
4652	}
4653
4654	return 0;
4655}
4656
4657static int intel_iommu_memory_notifier(struct notifier_block *nb,
4658				       unsigned long val, void *v)
4659{
4660	struct memory_notify *mhp = v;
4661	unsigned long long start, end;
4662	unsigned long start_vpfn, last_vpfn;
4663
4664	switch (val) {
4665	case MEM_GOING_ONLINE:
4666		start = mhp->start_pfn << PAGE_SHIFT;
4667		end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4668		if (iommu_domain_identity_map(si_domain, start, end)) {
4669			pr_warn("Failed to build identity map for [%llx-%llx]\n",
4670				start, end);
4671			return NOTIFY_BAD;
4672		}
4673		break;
4674
4675	case MEM_OFFLINE:
4676	case MEM_CANCEL_ONLINE:
4677		start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4678		last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4679		while (start_vpfn <= last_vpfn) {
4680			struct iova *iova;
4681			struct dmar_drhd_unit *drhd;
4682			struct intel_iommu *iommu;
4683			struct page *freelist;
4684
4685			iova = find_iova(&si_domain->iovad, start_vpfn);
4686			if (iova == NULL) {
4687				pr_debug("Failed get IOVA for PFN %lx\n",
4688					 start_vpfn);
4689				break;
4690			}
4691
4692			iova = split_and_remove_iova(&si_domain->iovad, iova,
4693						     start_vpfn, last_vpfn);
4694			if (iova == NULL) {
4695				pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4696					start_vpfn, last_vpfn);
4697				return NOTIFY_BAD;
4698			}
 
 
 
 
4699
4700			freelist = domain_unmap(si_domain, iova->pfn_lo,
4701					       iova->pfn_hi);
 
 
4702
4703			rcu_read_lock();
4704			for_each_active_iommu(iommu, drhd)
4705				iommu_flush_iotlb_psi(iommu, si_domain,
4706					iova->pfn_lo, iova_size(iova),
4707					!freelist, 0);
4708			rcu_read_unlock();
4709			dma_free_pagelist(freelist);
4710
4711			start_vpfn = iova->pfn_hi + 1;
4712			free_iova_mem(iova);
 
 
4713		}
4714		break;
4715	}
4716
4717	return NOTIFY_OK;
4718}
4719
4720static struct notifier_block intel_iommu_memory_nb = {
4721	.notifier_call = intel_iommu_memory_notifier,
4722	.priority = 0
4723};
4724
4725static void free_all_cpu_cached_iovas(unsigned int cpu)
4726{
4727	int i;
4728
4729	for (i = 0; i < g_num_of_iommus; i++) {
4730		struct intel_iommu *iommu = g_iommus[i];
4731		struct dmar_domain *domain;
4732		int did;
4733
4734		if (!iommu)
4735			continue;
4736
4737		for (did = 0; did < cap_ndoms(iommu->cap); did++) {
4738			domain = get_iommu_domain(iommu, (u16)did);
4739
4740			if (!domain)
4741				continue;
4742			free_cpu_cached_iovas(cpu, &domain->iovad);
4743		}
4744	}
4745}
4746
4747static int intel_iommu_cpu_dead(unsigned int cpu)
4748{
4749	free_all_cpu_cached_iovas(cpu);
4750	return 0;
4751}
4752
4753static void intel_disable_iommus(void)
4754{
4755	struct intel_iommu *iommu = NULL;
4756	struct dmar_drhd_unit *drhd;
4757
4758	for_each_iommu(iommu, drhd)
4759		iommu_disable_translation(iommu);
4760}
4761
4762static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4763{
4764	struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4765
4766	return container_of(iommu_dev, struct intel_iommu, iommu);
4767}
4768
4769static ssize_t intel_iommu_show_version(struct device *dev,
4770					struct device_attribute *attr,
4771					char *buf)
4772{
4773	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4774	u32 ver = readl(iommu->reg + DMAR_VER_REG);
4775	return sprintf(buf, "%d:%d\n",
4776		       DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4777}
4778static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4779
4780static ssize_t intel_iommu_show_address(struct device *dev,
4781					struct device_attribute *attr,
4782					char *buf)
4783{
4784	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4785	return sprintf(buf, "%llx\n", iommu->reg_phys);
4786}
4787static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4788
4789static ssize_t intel_iommu_show_cap(struct device *dev,
4790				    struct device_attribute *attr,
4791				    char *buf)
4792{
4793	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4794	return sprintf(buf, "%llx\n", iommu->cap);
4795}
4796static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4797
4798static ssize_t intel_iommu_show_ecap(struct device *dev,
4799				    struct device_attribute *attr,
4800				    char *buf)
4801{
4802	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4803	return sprintf(buf, "%llx\n", iommu->ecap);
4804}
4805static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4806
4807static ssize_t intel_iommu_show_ndoms(struct device *dev,
4808				      struct device_attribute *attr,
4809				      char *buf)
4810{
4811	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4812	return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4813}
4814static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4815
4816static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4817					   struct device_attribute *attr,
4818					   char *buf)
4819{
4820	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4821	return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4822						  cap_ndoms(iommu->cap)));
4823}
4824static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4825
4826static struct attribute *intel_iommu_attrs[] = {
4827	&dev_attr_version.attr,
4828	&dev_attr_address.attr,
4829	&dev_attr_cap.attr,
4830	&dev_attr_ecap.attr,
4831	&dev_attr_domains_supported.attr,
4832	&dev_attr_domains_used.attr,
4833	NULL,
4834};
4835
4836static struct attribute_group intel_iommu_group = {
4837	.name = "intel-iommu",
4838	.attrs = intel_iommu_attrs,
4839};
4840
4841const struct attribute_group *intel_iommu_groups[] = {
4842	&intel_iommu_group,
4843	NULL,
4844};
4845
4846static inline bool has_untrusted_dev(void)
4847{
4848	struct pci_dev *pdev = NULL;
4849
4850	for_each_pci_dev(pdev)
4851		if (pdev->untrusted)
4852			return true;
4853
4854	return false;
4855}
4856
4857static int __init platform_optin_force_iommu(void)
4858{
4859	if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev())
4860		return 0;
4861
4862	if (no_iommu || dmar_disabled)
4863		pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4864
4865	/*
4866	 * If Intel-IOMMU is disabled by default, we will apply identity
4867	 * map for all devices except those marked as being untrusted.
4868	 */
4869	if (dmar_disabled)
4870		iommu_identity_mapping |= IDENTMAP_ALL;
4871
4872	dmar_disabled = 0;
4873	no_iommu = 0;
4874
4875	return 1;
4876}
4877
4878static int __init probe_acpi_namespace_devices(void)
4879{
4880	struct dmar_drhd_unit *drhd;
4881	/* To avoid a -Wunused-but-set-variable warning. */
4882	struct intel_iommu *iommu __maybe_unused;
4883	struct device *dev;
4884	int i, ret = 0;
4885
4886	for_each_active_iommu(iommu, drhd) {
4887		for_each_active_dev_scope(drhd->devices,
4888					  drhd->devices_cnt, i, dev) {
4889			struct acpi_device_physical_node *pn;
4890			struct iommu_group *group;
4891			struct acpi_device *adev;
4892
4893			if (dev->bus != &acpi_bus_type)
4894				continue;
4895
4896			adev = to_acpi_device(dev);
4897			mutex_lock(&adev->physical_node_lock);
4898			list_for_each_entry(pn,
4899					    &adev->physical_node_list, node) {
4900				group = iommu_group_get(pn->dev);
4901				if (group) {
4902					iommu_group_put(group);
4903					continue;
4904				}
4905
4906				pn->dev->bus->iommu_ops = &intel_iommu_ops;
4907				ret = iommu_probe_device(pn->dev);
4908				if (ret)
4909					break;
4910			}
4911			mutex_unlock(&adev->physical_node_lock);
4912
4913			if (ret)
4914				return ret;
 
 
 
 
4915		}
4916	}
4917
4918	return 0;
4919}
4920
4921int __init intel_iommu_init(void)
4922{
4923	int ret = -ENODEV;
4924	struct dmar_drhd_unit *drhd;
4925	struct intel_iommu *iommu;
 
4926
4927	/*
4928	 * Intel IOMMU is required for a TXT/tboot launch or platform
4929	 * opt in, so enforce that.
4930	 */
4931	force_on = tboot_force_iommu() || platform_optin_force_iommu();
 
 
 
4932
4933	if (iommu_init_mempool()) {
4934		if (force_on)
4935			panic("tboot: Failed to initialize iommu memory\n");
4936		return -ENOMEM;
4937	}
4938
4939	down_write(&dmar_global_lock);
4940	if (dmar_table_init()) {
4941		if (force_on)
4942			panic("tboot: Failed to initialize DMAR table\n");
4943		goto out_free_dmar;
4944	}
4945
4946	if (dmar_dev_scope_init() < 0) {
4947		if (force_on)
4948			panic("tboot: Failed to initialize DMAR device scope\n");
4949		goto out_free_dmar;
4950	}
4951
4952	up_write(&dmar_global_lock);
4953
4954	/*
4955	 * The bus notifier takes the dmar_global_lock, so lockdep will
4956	 * complain later when we register it under the lock.
4957	 */
4958	dmar_register_bus_notifier();
4959
4960	down_write(&dmar_global_lock);
4961
4962	if (no_iommu || dmar_disabled) {
4963		/*
4964		 * We exit the function here to ensure IOMMU's remapping and
4965		 * mempool aren't setup, which means that the IOMMU's PMRs
4966		 * won't be disabled via the call to init_dmars(). So disable
4967		 * it explicitly here. The PMRs were setup by tboot prior to
4968		 * calling SENTER, but the kernel is expected to reset/tear
4969		 * down the PMRs.
4970		 */
4971		if (intel_iommu_tboot_noforce) {
4972			for_each_iommu(iommu, drhd)
4973				iommu_disable_protect_mem_regions(iommu);
 
 
4974		}
 
4975
4976		/*
4977		 * Make sure the IOMMUs are switched off, even when we
4978		 * boot into a kexec kernel and the previous kernel left
4979		 * them enabled
4980		 */
4981		intel_disable_iommus();
4982		goto out_free_dmar;
4983	}
4984
4985	if (list_empty(&dmar_rmrr_units))
4986		pr_info("No RMRR found\n");
4987
4988	if (list_empty(&dmar_atsr_units))
4989		pr_info("No ATSR found\n");
4990
4991	if (dmar_init_reserved_ranges()) {
4992		if (force_on)
4993			panic("tboot: Failed to reserve iommu ranges\n");
4994		goto out_free_reserved_range;
4995	}
4996
4997	if (dmar_map_gfx)
4998		intel_iommu_gfx_mapped = 1;
4999
5000	init_no_remapping_devices();
5001
5002	ret = init_dmars();
5003	if (ret) {
5004		if (force_on)
5005			panic("tboot: Failed to initialize DMARs\n");
5006		pr_err("Initialization failed\n");
5007		goto out_free_reserved_range;
5008	}
5009	up_write(&dmar_global_lock);
5010
5011#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
5012	/*
5013	 * If the system has no untrusted device or the user has decided
5014	 * to disable the bounce page mechanisms, we don't need swiotlb.
5015	 * Mark this and the pre-allocated bounce pages will be released
5016	 * later.
5017	 */
5018	if (!has_untrusted_dev() || intel_no_bounce)
5019		swiotlb = 0;
5020#endif
5021	dma_ops = &intel_dma_ops;
5022
5023	init_iommu_pm_ops();
5024
5025	for_each_active_iommu(iommu, drhd) {
5026		iommu_device_sysfs_add(&iommu->iommu, NULL,
5027				       intel_iommu_groups,
5028				       "%s", iommu->name);
5029		iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
5030		iommu_device_register(&iommu->iommu);
5031	}
5032
5033	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
5034	if (si_domain && !hw_pass_through)
5035		register_memory_notifier(&intel_iommu_memory_nb);
5036	cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
5037			  intel_iommu_cpu_dead);
5038
5039	down_read(&dmar_global_lock);
5040	if (probe_acpi_namespace_devices())
5041		pr_warn("ACPI name space devices didn't probe correctly\n");
5042	up_read(&dmar_global_lock);
5043
5044	/* Finally, we enable the DMA remapping hardware. */
5045	for_each_iommu(iommu, drhd) {
5046		if (!drhd->ignored && !translation_pre_enabled(iommu))
5047			iommu_enable_translation(iommu);
5048
5049		iommu_disable_protect_mem_regions(iommu);
5050	}
5051	pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
5052
5053	intel_iommu_enabled = 1;
5054	intel_iommu_debugfs_init();
5055
5056	return 0;
5057
5058out_free_reserved_range:
5059	put_iova_domain(&reserved_iova_list);
5060out_free_dmar:
5061	intel_iommu_free_dmars();
5062	up_write(&dmar_global_lock);
5063	iommu_exit_mempool();
5064	return ret;
5065}
5066
5067static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
5068{
5069	struct intel_iommu *iommu = opaque;
5070
5071	domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
5072	return 0;
5073}
5074
5075/*
5076 * NB - intel-iommu lacks any sort of reference counting for the users of
5077 * dependent devices.  If multiple endpoints have intersecting dependent
5078 * devices, unbinding the driver from any one of them will possibly leave
5079 * the others unable to operate.
5080 */
5081static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
5082{
5083	if (!iommu || !dev || !dev_is_pci(dev))
5084		return;
5085
5086	pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
5087}
5088
5089static void __dmar_remove_one_dev_info(struct device_domain_info *info)
5090{
5091	struct dmar_domain *domain;
5092	struct intel_iommu *iommu;
5093	unsigned long flags;
5094
5095	assert_spin_locked(&device_domain_lock);
5096
5097	if (WARN_ON(!info))
5098		return;
5099
5100	iommu = info->iommu;
5101	domain = info->domain;
 
 
5102
5103	if (info->dev) {
5104		if (dev_is_pci(info->dev) && sm_supported(iommu))
5105			intel_pasid_tear_down_entry(iommu, info->dev,
5106					PASID_RID2PASID);
5107
5108		iommu_disable_dev_iotlb(info);
5109		domain_context_clear(iommu, info->dev);
5110		intel_pasid_free_table(info->dev);
5111	}
5112
5113	unlink_domain_info(info);
5114
5115	spin_lock_irqsave(&iommu->lock, flags);
5116	domain_detach_iommu(domain, iommu);
5117	spin_unlock_irqrestore(&iommu->lock, flags);
5118
5119	/* free the private domain */
5120	if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
5121	    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
5122	    list_empty(&domain->devices))
5123		domain_exit(info->domain);
5124
5125	free_devinfo_mem(info);
5126}
5127
5128static void dmar_remove_one_dev_info(struct device *dev)
5129{
5130	struct device_domain_info *info;
5131	unsigned long flags;
5132
5133	spin_lock_irqsave(&device_domain_lock, flags);
5134	info = dev->archdata.iommu;
5135	if (info)
5136		__dmar_remove_one_dev_info(info);
5137	spin_unlock_irqrestore(&device_domain_lock, flags);
5138}
5139
5140static int md_domain_init(struct dmar_domain *domain, int guest_width)
5141{
5142	int adjust_width;
5143
5144	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
 
 
5145	domain_reserve_special_ranges(domain);
5146
5147	/* calculate AGAW */
5148	domain->gaw = guest_width;
5149	adjust_width = guestwidth_to_adjustwidth(guest_width);
5150	domain->agaw = width_to_agaw(adjust_width);
5151
 
 
 
5152	domain->iommu_coherency = 0;
5153	domain->iommu_snooping = 0;
5154	domain->iommu_superpage = 0;
5155	domain->max_addr = 0;
 
5156
5157	/* always allocate the top pgd */
5158	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5159	if (!domain->pgd)
5160		return -ENOMEM;
5161	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5162	return 0;
5163}
5164
5165static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
5166{
5167	struct dmar_domain *dmar_domain;
5168	struct iommu_domain *domain;
 
 
 
5169
5170	switch (type) {
5171	case IOMMU_DOMAIN_DMA:
5172	/* fallthrough */
5173	case IOMMU_DOMAIN_UNMANAGED:
5174		dmar_domain = alloc_domain(0);
5175		if (!dmar_domain) {
5176			pr_err("Can't allocate dmar_domain\n");
5177			return NULL;
5178		}
5179		if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
5180			pr_err("Domain initialization failed\n");
5181			domain_exit(dmar_domain);
5182			return NULL;
5183		}
5184
5185		if (type == IOMMU_DOMAIN_DMA &&
5186		    init_iova_flush_queue(&dmar_domain->iovad,
5187					  iommu_flush_iova, iova_entry_free)) {
5188			pr_warn("iova flush queue initialization failed\n");
5189			intel_iommu_strict = 1;
 
 
 
 
5190		}
5191
5192		domain_update_iommu_cap(dmar_domain);
5193
5194		domain = &dmar_domain->domain;
5195		domain->geometry.aperture_start = 0;
5196		domain->geometry.aperture_end   =
5197				__DOMAIN_MAX_ADDR(dmar_domain->gaw);
5198		domain->geometry.force_aperture = true;
5199
5200		return domain;
5201	case IOMMU_DOMAIN_IDENTITY:
5202		return &si_domain->domain;
5203	default:
5204		return NULL;
5205	}
5206
5207	return NULL;
5208}
5209
5210static void intel_iommu_domain_free(struct iommu_domain *domain)
5211{
5212	if (domain != &si_domain->domain)
5213		domain_exit(to_dmar_domain(domain));
5214}
5215
5216/*
5217 * Check whether a @domain could be attached to the @dev through the
5218 * aux-domain attach/detach APIs.
5219 */
5220static inline bool
5221is_aux_domain(struct device *dev, struct iommu_domain *domain)
5222{
5223	struct device_domain_info *info = dev->archdata.iommu;
5224
5225	return info && info->auxd_enabled &&
5226			domain->type == IOMMU_DOMAIN_UNMANAGED;
5227}
5228
5229static void auxiliary_link_device(struct dmar_domain *domain,
5230				  struct device *dev)
5231{
5232	struct device_domain_info *info = dev->archdata.iommu;
5233
5234	assert_spin_locked(&device_domain_lock);
5235	if (WARN_ON(!info))
5236		return;
5237
5238	domain->auxd_refcnt++;
5239	list_add(&domain->auxd, &info->auxiliary_domains);
5240}
5241
5242static void auxiliary_unlink_device(struct dmar_domain *domain,
5243				    struct device *dev)
5244{
5245	struct device_domain_info *info = dev->archdata.iommu;
5246
5247	assert_spin_locked(&device_domain_lock);
5248	if (WARN_ON(!info))
5249		return;
5250
5251	list_del(&domain->auxd);
5252	domain->auxd_refcnt--;
5253
5254	if (!domain->auxd_refcnt && domain->default_pasid > 0)
5255		intel_pasid_free_id(domain->default_pasid);
5256}
5257
5258static int aux_domain_add_dev(struct dmar_domain *domain,
5259			      struct device *dev)
5260{
5261	int ret;
5262	u8 bus, devfn;
5263	unsigned long flags;
5264	struct intel_iommu *iommu;
5265
5266	iommu = device_to_iommu(dev, &bus, &devfn);
5267	if (!iommu)
5268		return -ENODEV;
5269
5270	if (domain->default_pasid <= 0) {
5271		int pasid;
5272
5273		pasid = intel_pasid_alloc_id(domain, PASID_MIN,
5274					     pci_max_pasids(to_pci_dev(dev)),
5275					     GFP_KERNEL);
5276		if (pasid <= 0) {
5277			pr_err("Can't allocate default pasid\n");
5278			return -ENODEV;
5279		}
5280		domain->default_pasid = pasid;
5281	}
5282
5283	spin_lock_irqsave(&device_domain_lock, flags);
5284	/*
5285	 * iommu->lock must be held to attach domain to iommu and setup the
5286	 * pasid entry for second level translation.
5287	 */
5288	spin_lock(&iommu->lock);
5289	ret = domain_attach_iommu(domain, iommu);
5290	if (ret)
5291		goto attach_failed;
5292
5293	/* Setup the PASID entry for mediated devices: */
5294	ret = intel_pasid_setup_second_level(iommu, domain, dev,
5295					     domain->default_pasid);
5296	if (ret)
5297		goto table_failed;
5298	spin_unlock(&iommu->lock);
5299
5300	auxiliary_link_device(domain, dev);
5301
5302	spin_unlock_irqrestore(&device_domain_lock, flags);
5303
5304	return 0;
5305
5306table_failed:
5307	domain_detach_iommu(domain, iommu);
5308attach_failed:
5309	spin_unlock(&iommu->lock);
5310	spin_unlock_irqrestore(&device_domain_lock, flags);
5311	if (!domain->auxd_refcnt && domain->default_pasid > 0)
5312		intel_pasid_free_id(domain->default_pasid);
5313
5314	return ret;
5315}
5316
5317static void aux_domain_remove_dev(struct dmar_domain *domain,
5318				  struct device *dev)
5319{
5320	struct device_domain_info *info;
5321	struct intel_iommu *iommu;
5322	unsigned long flags;
5323
5324	if (!is_aux_domain(dev, &domain->domain))
5325		return;
5326
5327	spin_lock_irqsave(&device_domain_lock, flags);
5328	info = dev->archdata.iommu;
5329	iommu = info->iommu;
5330
5331	auxiliary_unlink_device(domain, dev);
5332
5333	spin_lock(&iommu->lock);
5334	intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5335	domain_detach_iommu(domain, iommu);
5336	spin_unlock(&iommu->lock);
5337
5338	spin_unlock_irqrestore(&device_domain_lock, flags);
5339}
5340
5341static int prepare_domain_attach_device(struct iommu_domain *domain,
5342					struct device *dev)
5343{
5344	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
 
5345	struct intel_iommu *iommu;
5346	int addr_width;
5347	u8 bus, devfn;
5348
5349	iommu = device_to_iommu(dev, &bus, &devfn);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5350	if (!iommu)
5351		return -ENODEV;
5352
5353	/* check if this iommu agaw is sufficient for max mapped address */
5354	addr_width = agaw_to_width(iommu->agaw);
5355	if (addr_width > cap_mgaw(iommu->cap))
5356		addr_width = cap_mgaw(iommu->cap);
5357
5358	if (dmar_domain->max_addr > (1LL << addr_width)) {
5359		dev_err(dev, "%s: iommu width (%d) is not "
5360		        "sufficient for the mapped address (%llx)\n",
5361		        __func__, addr_width, dmar_domain->max_addr);
5362		return -EFAULT;
5363	}
5364	dmar_domain->gaw = addr_width;
5365
5366	/*
5367	 * Knock out extra levels of page tables if necessary
5368	 */
5369	while (iommu->agaw < dmar_domain->agaw) {
5370		struct dma_pte *pte;
5371
5372		pte = dmar_domain->pgd;
5373		if (dma_pte_present(pte)) {
5374			dmar_domain->pgd = (struct dma_pte *)
5375				phys_to_virt(dma_pte_addr(pte));
5376			free_pgtable_page(pte);
5377		}
5378		dmar_domain->agaw--;
5379	}
5380
5381	return 0;
5382}
5383
5384static int intel_iommu_attach_device(struct iommu_domain *domain,
5385				     struct device *dev)
5386{
5387	int ret;
5388
5389	if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
5390	    device_is_rmrr_locked(dev)) {
5391		dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
5392		return -EPERM;
5393	}
5394
5395	if (is_aux_domain(dev, domain))
5396		return -EPERM;
5397
5398	/* normally dev is not mapped */
5399	if (unlikely(domain_context_mapped(dev))) {
5400		struct dmar_domain *old_domain;
5401
5402		old_domain = find_domain(dev);
5403		if (old_domain)
5404			dmar_remove_one_dev_info(dev);
5405	}
5406
5407	ret = prepare_domain_attach_device(domain, dev);
5408	if (ret)
5409		return ret;
5410
5411	return domain_add_dev_info(to_dmar_domain(domain), dev);
5412}
5413
5414static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5415					 struct device *dev)
5416{
5417	int ret;
5418
5419	if (!is_aux_domain(dev, domain))
5420		return -EPERM;
5421
5422	ret = prepare_domain_attach_device(domain, dev);
5423	if (ret)
5424		return ret;
5425
5426	return aux_domain_add_dev(to_dmar_domain(domain), dev);
5427}
5428
5429static void intel_iommu_detach_device(struct iommu_domain *domain,
5430				      struct device *dev)
5431{
5432	dmar_remove_one_dev_info(dev);
5433}
5434
5435static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5436					  struct device *dev)
5437{
5438	aux_domain_remove_dev(to_dmar_domain(domain), dev);
5439}
5440
5441static int intel_iommu_map(struct iommu_domain *domain,
5442			   unsigned long iova, phys_addr_t hpa,
5443			   size_t size, int iommu_prot)
5444{
5445	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5446	u64 max_addr;
5447	int prot = 0;
 
5448	int ret;
5449
5450	if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5451		return -EINVAL;
5452
5453	if (iommu_prot & IOMMU_READ)
5454		prot |= DMA_PTE_READ;
5455	if (iommu_prot & IOMMU_WRITE)
5456		prot |= DMA_PTE_WRITE;
5457	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5458		prot |= DMA_PTE_SNP;
5459
 
5460	max_addr = iova + size;
5461	if (dmar_domain->max_addr < max_addr) {
5462		u64 end;
5463
5464		/* check if minimum agaw is sufficient for mapped address */
5465		end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5466		if (end < max_addr) {
5467			pr_err("%s: iommu width (%d) is not "
5468			       "sufficient for the mapped address (%llx)\n",
5469			       __func__, dmar_domain->gaw, max_addr);
5470			return -EFAULT;
5471		}
5472		dmar_domain->max_addr = max_addr;
5473	}
5474	/* Round up size to next multiple of PAGE_SIZE, if it and
5475	   the low bits of hpa would take us onto the next page */
5476	size = aligned_nrpages(hpa, size);
5477	ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5478				 hpa >> VTD_PAGE_SHIFT, size, prot);
5479	return ret;
5480}
5481
5482static size_t intel_iommu_unmap(struct iommu_domain *domain,
5483				unsigned long iova, size_t size,
5484				struct iommu_iotlb_gather *gather)
5485{
5486	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5487	struct page *freelist = NULL;
5488	unsigned long start_pfn, last_pfn;
5489	unsigned int npages;
5490	int iommu_id, level = 0;
5491
5492	/* Cope with horrid API which requires us to unmap more than the
5493	   size argument if it happens to be a large-page mapping. */
5494	BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5495	if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5496		return 0;
5497
5498	if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5499		size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5500
5501	start_pfn = iova >> VTD_PAGE_SHIFT;
5502	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5503
5504	freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5505
5506	npages = last_pfn - start_pfn + 1;
5507
5508	for_each_domain_iommu(iommu_id, dmar_domain)
5509		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5510				      start_pfn, npages, !freelist, 0);
5511
5512	dma_free_pagelist(freelist);
5513
5514	if (dmar_domain->max_addr == iova + size)
5515		dmar_domain->max_addr = iova;
5516
5517	return size;
5518}
5519
5520static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5521					    dma_addr_t iova)
5522{
5523	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5524	struct dma_pte *pte;
5525	int level = 0;
5526	u64 phys = 0;
5527
5528	if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5529		return 0;
5530
5531	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5532	if (pte)
5533		phys = dma_pte_addr(pte);
5534
5535	return phys;
5536}
5537
5538static inline bool scalable_mode_support(void)
 
5539{
5540	struct dmar_drhd_unit *drhd;
5541	struct intel_iommu *iommu;
5542	bool ret = true;
5543
5544	rcu_read_lock();
5545	for_each_active_iommu(iommu, drhd) {
5546		if (!sm_supported(iommu)) {
5547			ret = false;
5548			break;
5549		}
5550	}
5551	rcu_read_unlock();
5552
5553	return ret;
5554}
5555
5556static inline bool iommu_pasid_support(void)
5557{
5558	struct dmar_drhd_unit *drhd;
5559	struct intel_iommu *iommu;
5560	bool ret = true;
5561
5562	rcu_read_lock();
5563	for_each_active_iommu(iommu, drhd) {
5564		if (!pasid_supported(iommu)) {
5565			ret = false;
5566			break;
5567		}
5568	}
5569	rcu_read_unlock();
5570
5571	return ret;
5572}
5573
5574static bool intel_iommu_capable(enum iommu_cap cap)
5575{
5576	if (cap == IOMMU_CAP_CACHE_COHERENCY)
5577		return domain_update_iommu_snooping(NULL) == 1;
5578	if (cap == IOMMU_CAP_INTR_REMAP)
5579		return irq_remapping_enabled == 1;
5580
5581	return false;
5582}
5583
5584static int intel_iommu_add_device(struct device *dev)
5585{
5586	struct dmar_domain *dmar_domain;
5587	struct iommu_domain *domain;
5588	struct intel_iommu *iommu;
5589	struct iommu_group *group;
5590	u8 bus, devfn;
5591	int ret;
5592
5593	iommu = device_to_iommu(dev, &bus, &devfn);
5594	if (!iommu)
5595		return -ENODEV;
5596
5597	iommu_device_link(&iommu->iommu, dev);
5598
5599	if (translation_pre_enabled(iommu))
5600		dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
5601
5602	group = iommu_group_get_for_dev(dev);
5603
5604	if (IS_ERR(group))
5605		return PTR_ERR(group);
5606
5607	iommu_group_put(group);
5608
5609	domain = iommu_get_domain_for_dev(dev);
5610	dmar_domain = to_dmar_domain(domain);
5611	if (domain->type == IOMMU_DOMAIN_DMA) {
5612		if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
5613			ret = iommu_request_dm_for_dev(dev);
5614			if (ret) {
5615				dmar_remove_one_dev_info(dev);
5616				dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5617				domain_add_dev_info(si_domain, dev);
5618				dev_info(dev,
5619					 "Device uses a private identity domain.\n");
5620			}
5621		}
5622	} else {
5623		if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
5624			ret = iommu_request_dma_domain_for_dev(dev);
5625			if (ret) {
5626				dmar_remove_one_dev_info(dev);
5627				dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5628				if (!get_private_domain_for_dev(dev)) {
5629					dev_warn(dev,
5630						 "Failed to get a private domain.\n");
5631					return -ENOMEM;
5632				}
5633
5634				dev_info(dev,
5635					 "Device uses a private dma domain.\n");
5636			}
5637		}
5638	}
5639
5640	if (device_needs_bounce(dev)) {
5641		dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n");
5642		set_dma_ops(dev, &bounce_dma_ops);
5643	}
5644
5645	return 0;
5646}
5647
5648static void intel_iommu_remove_device(struct device *dev)
5649{
5650	struct intel_iommu *iommu;
5651	u8 bus, devfn;
5652
5653	iommu = device_to_iommu(dev, &bus, &devfn);
5654	if (!iommu)
5655		return;
5656
5657	dmar_remove_one_dev_info(dev);
5658
5659	iommu_group_remove_device(dev);
5660
5661	iommu_device_unlink(&iommu->iommu, dev);
5662
5663	if (device_needs_bounce(dev))
5664		set_dma_ops(dev, NULL);
5665}
5666
5667static void intel_iommu_get_resv_regions(struct device *device,
5668					 struct list_head *head)
5669{
5670	int prot = DMA_PTE_READ | DMA_PTE_WRITE;
5671	struct iommu_resv_region *reg;
5672	struct dmar_rmrr_unit *rmrr;
5673	struct device *i_dev;
5674	int i;
5675
5676	down_read(&dmar_global_lock);
5677	for_each_rmrr_units(rmrr) {
5678		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5679					  i, i_dev) {
5680			struct iommu_resv_region *resv;
5681			enum iommu_resv_type type;
5682			size_t length;
5683
5684			if (i_dev != device &&
5685			    !is_downstream_to_pci_bridge(device, i_dev))
5686				continue;
5687
5688			length = rmrr->end_address - rmrr->base_address + 1;
5689
5690			type = device_rmrr_is_relaxable(device) ?
5691				IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5692
5693			resv = iommu_alloc_resv_region(rmrr->base_address,
5694						       length, prot, type);
5695			if (!resv)
5696				break;
5697
5698			list_add_tail(&resv->list, head);
5699		}
5700	}
5701	up_read(&dmar_global_lock);
5702
5703#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5704	if (dev_is_pci(device)) {
5705		struct pci_dev *pdev = to_pci_dev(device);
5706
5707		if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5708			reg = iommu_alloc_resv_region(0, 1UL << 24, 0,
5709						      IOMMU_RESV_DIRECT);
5710			if (reg)
5711				list_add_tail(&reg->list, head);
5712		}
5713	}
5714#endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5715
5716	reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5717				      IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5718				      0, IOMMU_RESV_MSI);
5719	if (!reg)
5720		return;
5721	list_add_tail(&reg->list, head);
5722}
5723
5724static void intel_iommu_put_resv_regions(struct device *dev,
5725					 struct list_head *head)
5726{
5727	struct iommu_resv_region *entry, *next;
5728
5729	list_for_each_entry_safe(entry, next, head, list)
5730		kfree(entry);
5731}
5732
5733int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
5734{
5735	struct device_domain_info *info;
5736	struct context_entry *context;
5737	struct dmar_domain *domain;
5738	unsigned long flags;
5739	u64 ctx_lo;
5740	int ret;
5741
5742	domain = find_domain(dev);
5743	if (!domain)
5744		return -EINVAL;
5745
5746	spin_lock_irqsave(&device_domain_lock, flags);
5747	spin_lock(&iommu->lock);
5748
5749	ret = -EINVAL;
5750	info = dev->archdata.iommu;
5751	if (!info || !info->pasid_supported)
5752		goto out;
5753
5754	context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5755	if (WARN_ON(!context))
5756		goto out;
5757
5758	ctx_lo = context[0].lo;
5759
5760	if (!(ctx_lo & CONTEXT_PASIDE)) {
5761		ctx_lo |= CONTEXT_PASIDE;
5762		context[0].lo = ctx_lo;
5763		wmb();
5764		iommu->flush.flush_context(iommu,
5765					   domain->iommu_did[iommu->seq_id],
5766					   PCI_DEVID(info->bus, info->devfn),
5767					   DMA_CCMD_MASK_NOBIT,
5768					   DMA_CCMD_DEVICE_INVL);
5769	}
5770
5771	/* Enable PASID support in the device, if it wasn't already */
5772	if (!info->pasid_enabled)
5773		iommu_enable_dev_iotlb(info);
5774
5775	ret = 0;
5776
5777 out:
5778	spin_unlock(&iommu->lock);
5779	spin_unlock_irqrestore(&device_domain_lock, flags);
5780
5781	return ret;
5782}
5783
5784static void intel_iommu_apply_resv_region(struct device *dev,
5785					  struct iommu_domain *domain,
5786					  struct iommu_resv_region *region)
5787{
5788	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5789	unsigned long start, end;
5790
5791	start = IOVA_PFN(region->start);
5792	end   = IOVA_PFN(region->start + region->length - 1);
5793
5794	WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5795}
5796
5797#ifdef CONFIG_INTEL_IOMMU_SVM
5798struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5799{
5800	struct intel_iommu *iommu;
5801	u8 bus, devfn;
5802
5803	if (iommu_dummy(dev)) {
5804		dev_warn(dev,
5805			 "No IOMMU translation for device; cannot enable SVM\n");
5806		return NULL;
5807	}
5808
5809	iommu = device_to_iommu(dev, &bus, &devfn);
5810	if ((!iommu)) {
5811		dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5812		return NULL;
5813	}
5814
5815	return iommu;
5816}
5817#endif /* CONFIG_INTEL_IOMMU_SVM */
5818
5819static int intel_iommu_enable_auxd(struct device *dev)
5820{
5821	struct device_domain_info *info;
5822	struct intel_iommu *iommu;
5823	unsigned long flags;
5824	u8 bus, devfn;
5825	int ret;
5826
5827	iommu = device_to_iommu(dev, &bus, &devfn);
5828	if (!iommu || dmar_disabled)
5829		return -EINVAL;
5830
5831	if (!sm_supported(iommu) || !pasid_supported(iommu))
5832		return -EINVAL;
5833
5834	ret = intel_iommu_enable_pasid(iommu, dev);
5835	if (ret)
5836		return -ENODEV;
5837
5838	spin_lock_irqsave(&device_domain_lock, flags);
5839	info = dev->archdata.iommu;
5840	info->auxd_enabled = 1;
5841	spin_unlock_irqrestore(&device_domain_lock, flags);
5842
5843	return 0;
5844}
5845
5846static int intel_iommu_disable_auxd(struct device *dev)
5847{
5848	struct device_domain_info *info;
5849	unsigned long flags;
5850
5851	spin_lock_irqsave(&device_domain_lock, flags);
5852	info = dev->archdata.iommu;
5853	if (!WARN_ON(!info))
5854		info->auxd_enabled = 0;
5855	spin_unlock_irqrestore(&device_domain_lock, flags);
5856
5857	return 0;
5858}
5859
5860/*
5861 * A PCI express designated vendor specific extended capability is defined
5862 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5863 * for system software and tools to detect endpoint devices supporting the
5864 * Intel scalable IO virtualization without host driver dependency.
5865 *
5866 * Returns the address of the matching extended capability structure within
5867 * the device's PCI configuration space or 0 if the device does not support
5868 * it.
5869 */
5870static int siov_find_pci_dvsec(struct pci_dev *pdev)
5871{
5872	int pos;
5873	u16 vendor, id;
5874
5875	pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5876	while (pos) {
5877		pci_read_config_word(pdev, pos + 4, &vendor);
5878		pci_read_config_word(pdev, pos + 8, &id);
5879		if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5880			return pos;
5881
5882		pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5883	}
5884
5885	return 0;
5886}
5887
5888static bool
5889intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5890{
5891	if (feat == IOMMU_DEV_FEAT_AUX) {
5892		int ret;
5893
5894		if (!dev_is_pci(dev) || dmar_disabled ||
5895		    !scalable_mode_support() || !iommu_pasid_support())
5896			return false;
5897
5898		ret = pci_pasid_features(to_pci_dev(dev));
5899		if (ret < 0)
5900			return false;
5901
5902		return !!siov_find_pci_dvsec(to_pci_dev(dev));
5903	}
5904
5905	return false;
5906}
5907
5908static int
5909intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5910{
5911	if (feat == IOMMU_DEV_FEAT_AUX)
5912		return intel_iommu_enable_auxd(dev);
5913
5914	return -ENODEV;
5915}
5916
5917static int
5918intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5919{
5920	if (feat == IOMMU_DEV_FEAT_AUX)
5921		return intel_iommu_disable_auxd(dev);
5922
5923	return -ENODEV;
5924}
5925
5926static bool
5927intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5928{
5929	struct device_domain_info *info = dev->archdata.iommu;
5930
5931	if (feat == IOMMU_DEV_FEAT_AUX)
5932		return scalable_mode_support() && info && info->auxd_enabled;
5933
5934	return false;
5935}
5936
5937static int
5938intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5939{
5940	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5941
5942	return dmar_domain->default_pasid > 0 ?
5943			dmar_domain->default_pasid : -EINVAL;
5944}
5945
5946static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5947					   struct device *dev)
5948{
5949	return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
5950}
5951
5952const struct iommu_ops intel_iommu_ops = {
5953	.capable		= intel_iommu_capable,
5954	.domain_alloc		= intel_iommu_domain_alloc,
5955	.domain_free		= intel_iommu_domain_free,
5956	.attach_dev		= intel_iommu_attach_device,
5957	.detach_dev		= intel_iommu_detach_device,
5958	.aux_attach_dev		= intel_iommu_aux_attach_device,
5959	.aux_detach_dev		= intel_iommu_aux_detach_device,
5960	.aux_get_pasid		= intel_iommu_aux_get_pasid,
5961	.map			= intel_iommu_map,
5962	.unmap			= intel_iommu_unmap,
5963	.iova_to_phys		= intel_iommu_iova_to_phys,
5964	.add_device		= intel_iommu_add_device,
5965	.remove_device		= intel_iommu_remove_device,
5966	.get_resv_regions	= intel_iommu_get_resv_regions,
5967	.put_resv_regions	= intel_iommu_put_resv_regions,
5968	.apply_resv_region	= intel_iommu_apply_resv_region,
5969	.device_group		= pci_device_group,
5970	.dev_has_feat		= intel_iommu_dev_has_feat,
5971	.dev_feat_enabled	= intel_iommu_dev_feat_enabled,
5972	.dev_enable_feat	= intel_iommu_dev_enable_feat,
5973	.dev_disable_feat	= intel_iommu_dev_disable_feat,
5974	.is_attach_deferred	= intel_iommu_is_attach_deferred,
5975	.pgsize_bitmap		= INTEL_IOMMU_PGSIZES,
5976};
5977
5978static void quirk_iommu_igfx(struct pci_dev *dev)
5979{
5980	pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
5981	dmar_map_gfx = 0;
5982}
5983
5984/* G4x/GM45 integrated gfx dmar support is totally busted. */
5985DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
5986DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
5987DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
5988DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
5989DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
5990DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
5991DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
5992
5993/* Broadwell igfx malfunctions with dmar */
5994DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
5995DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
5996DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
5997DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
5998DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
5999DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
6000DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
6001DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
6002DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
6003DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
6004DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
6005DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
6006DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
6007DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
6008DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
6009DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
6010DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
6011DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
6012DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
6013DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
6014DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
6015DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
6016DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
6017DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
6018
6019static void quirk_iommu_rwbf(struct pci_dev *dev)
6020{
6021	/*
6022	 * Mobile 4 Series Chipset neglects to set RWBF capability,
6023	 * but needs it. Same seems to hold for the desktop versions.
6024	 */
6025	pci_info(dev, "Forcing write-buffer flush capability\n");
6026	rwbf_quirk = 1;
 
 
 
 
 
 
6027}
6028
6029DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
6030DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
6031DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
6032DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
6033DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
6034DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
6035DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
6036
6037#define GGC 0x52
6038#define GGC_MEMORY_SIZE_MASK	(0xf << 8)
6039#define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
6040#define GGC_MEMORY_SIZE_1M	(0x1 << 8)
6041#define GGC_MEMORY_SIZE_2M	(0x3 << 8)
6042#define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
6043#define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
6044#define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
6045#define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
6046
6047static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
6048{
6049	unsigned short ggc;
6050
6051	if (pci_read_config_word(dev, GGC, &ggc))
6052		return;
6053
6054	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
6055		pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
6056		dmar_map_gfx = 0;
6057	} else if (dmar_map_gfx) {
6058		/* we have to ensure the gfx device is idle before we flush */
6059		pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6060		intel_iommu_strict = 1;
6061       }
6062}
6063DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
6064DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
6065DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
6066DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
6067
6068/* On Tylersburg chipsets, some BIOSes have been known to enable the
6069   ISOCH DMAR unit for the Azalia sound device, but not give it any
6070   TLB entries, which causes it to deadlock. Check for that.  We do
6071   this in a function called from init_dmars(), instead of in a PCI
6072   quirk, because we don't want to print the obnoxious "BIOS broken"
6073   message if VT-d is actually disabled.
6074*/
6075static void __init check_tylersburg_isoch(void)
6076{
6077	struct pci_dev *pdev;
6078	uint32_t vtisochctrl;
6079
6080	/* If there's no Azalia in the system anyway, forget it. */
6081	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
6082	if (!pdev)
6083		return;
6084	pci_dev_put(pdev);
6085
6086	/* System Management Registers. Might be hidden, in which case
6087	   we can't do the sanity check. But that's OK, because the
6088	   known-broken BIOSes _don't_ actually hide it, so far. */
6089	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
6090	if (!pdev)
6091		return;
6092
6093	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
6094		pci_dev_put(pdev);
6095		return;
6096	}
6097
6098	pci_dev_put(pdev);
6099
6100	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
6101	if (vtisochctrl & 1)
6102		return;
6103
6104	/* Drop all bits other than the number of TLB entries */
6105	vtisochctrl &= 0x1c;
6106
6107	/* If we have the recommended number of TLB entries (16), fine. */
6108	if (vtisochctrl == 0x10)
6109		return;
6110
6111	/* Zero TLB entries? You get to ride the short bus to school. */
6112	if (!vtisochctrl) {
6113		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
6114		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
6115		     dmi_get_system_info(DMI_BIOS_VENDOR),
6116		     dmi_get_system_info(DMI_BIOS_VERSION),
6117		     dmi_get_system_info(DMI_PRODUCT_VERSION));
6118		iommu_identity_mapping |= IDENTMAP_AZALIA;
6119		return;
6120	}
6121
6122	pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
6123	       vtisochctrl);
6124}