Linux Audio

Check our new training course

Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
   4 * Author: Joerg Roedel <jroedel@suse.de>
   5 *         Leo Duran <leo.duran@amd.com>
   6 */
   7
   8#define pr_fmt(fmt)     "AMD-Vi: " fmt
   9#define dev_fmt(fmt)    pr_fmt(fmt)
  10
  11#include <linux/pci.h>
  12#include <linux/acpi.h>
  13#include <linux/list.h>
  14#include <linux/bitmap.h>
  15#include <linux/slab.h>
  16#include <linux/syscore_ops.h>
  17#include <linux/interrupt.h>
  18#include <linux/msi.h>
  19#include <linux/irq.h>
  20#include <linux/amd-iommu.h>
  21#include <linux/export.h>
  22#include <linux/kmemleak.h>
  23#include <linux/cc_platform.h>
  24#include <linux/iopoll.h>
  25#include <asm/pci-direct.h>
  26#include <asm/iommu.h>
  27#include <asm/apic.h>
  28#include <asm/gart.h>
  29#include <asm/x86_init.h>
  30#include <asm/io_apic.h>
  31#include <asm/irq_remapping.h>
  32#include <asm/set_memory.h>
  33#include <asm/sev.h>
  34
  35#include <linux/crash_dump.h>
  36
  37#include "amd_iommu.h"
  38#include "../irq_remapping.h"
  39#include "../iommu-pages.h"
  40
  41/*
  42 * definitions for the ACPI scanning code
  43 */
  44#define IVRS_HEADER_LENGTH 48
  45
  46#define ACPI_IVHD_TYPE_MAX_SUPPORTED	0x40
  47#define ACPI_IVMD_TYPE_ALL              0x20
  48#define ACPI_IVMD_TYPE                  0x21
  49#define ACPI_IVMD_TYPE_RANGE            0x22
  50
  51#define IVHD_DEV_ALL                    0x01
  52#define IVHD_DEV_SELECT                 0x02
  53#define IVHD_DEV_SELECT_RANGE_START     0x03
  54#define IVHD_DEV_RANGE_END              0x04
  55#define IVHD_DEV_ALIAS                  0x42
  56#define IVHD_DEV_ALIAS_RANGE            0x43
  57#define IVHD_DEV_EXT_SELECT             0x46
  58#define IVHD_DEV_EXT_SELECT_RANGE       0x47
  59#define IVHD_DEV_SPECIAL		0x48
  60#define IVHD_DEV_ACPI_HID		0xf0
  61
  62#define UID_NOT_PRESENT                 0
  63#define UID_IS_INTEGER                  1
  64#define UID_IS_CHARACTER                2
  65
  66#define IVHD_SPECIAL_IOAPIC		1
  67#define IVHD_SPECIAL_HPET		2
  68
  69#define IVHD_FLAG_HT_TUN_EN_MASK        0x01
  70#define IVHD_FLAG_PASSPW_EN_MASK        0x02
  71#define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
  72#define IVHD_FLAG_ISOC_EN_MASK          0x08
  73
  74#define IVMD_FLAG_EXCL_RANGE            0x08
  75#define IVMD_FLAG_IW                    0x04
  76#define IVMD_FLAG_IR                    0x02
  77#define IVMD_FLAG_UNITY_MAP             0x01
  78
  79#define ACPI_DEVFLAG_INITPASS           0x01
  80#define ACPI_DEVFLAG_EXTINT             0x02
  81#define ACPI_DEVFLAG_NMI                0x04
  82#define ACPI_DEVFLAG_SYSMGT1            0x10
  83#define ACPI_DEVFLAG_SYSMGT2            0x20
  84#define ACPI_DEVFLAG_LINT0              0x40
  85#define ACPI_DEVFLAG_LINT1              0x80
  86#define ACPI_DEVFLAG_ATSDIS             0x10000000
  87
 
 
  88#define IVRS_GET_SBDF_ID(seg, bus, dev, fn)	(((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
  89						 | ((dev & 0x1f) << 3) | (fn & 0x7))
  90
  91/*
  92 * ACPI table definitions
  93 *
  94 * These data structures are laid over the table to parse the important values
  95 * out of it.
  96 */
  97
  98/*
  99 * structure describing one IOMMU in the ACPI table. Typically followed by one
 100 * or more ivhd_entrys.
 101 */
 102struct ivhd_header {
 103	u8 type;
 104	u8 flags;
 105	u16 length;
 106	u16 devid;
 107	u16 cap_ptr;
 108	u64 mmio_phys;
 109	u16 pci_seg;
 110	u16 info;
 111	u32 efr_attr;
 112
 113	/* Following only valid on IVHD type 11h and 40h */
 114	u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
 115	u64 efr_reg2;
 116} __attribute__((packed));
 117
 118/*
 119 * A device entry describing which devices a specific IOMMU translates and
 120 * which requestor ids they use.
 121 */
 122struct ivhd_entry {
 123	u8 type;
 124	u16 devid;
 125	u8 flags;
 126	struct_group(ext_hid,
 127		u32 ext;
 128		u32 hidh;
 129	);
 130	u64 cid;
 131	u8 uidf;
 132	u8 uidl;
 133	u8 uid;
 134} __attribute__((packed));
 135
 136/*
 137 * An AMD IOMMU memory definition structure. It defines things like exclusion
 138 * ranges for devices and regions that should be unity mapped.
 139 */
 140struct ivmd_header {
 141	u8 type;
 142	u8 flags;
 143	u16 length;
 144	u16 devid;
 145	u16 aux;
 146	u16 pci_seg;
 147	u8  resv[6];
 148	u64 range_start;
 149	u64 range_length;
 150} __attribute__((packed));
 151
 152bool amd_iommu_dump;
 153bool amd_iommu_irq_remap __read_mostly;
 154
 155enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1;
 156/* Guest page table level */
 157int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL;
 158
 159int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
 160static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
 161
 162static bool amd_iommu_detected;
 163static bool amd_iommu_disabled __initdata;
 164static bool amd_iommu_force_enable __initdata;
 165static bool amd_iommu_irtcachedis;
 166static int amd_iommu_target_ivhd_type;
 167
 168/* Global EFR and EFR2 registers */
 169u64 amd_iommu_efr;
 170u64 amd_iommu_efr2;
 171
 172/* SNP is enabled on the system? */
 173bool amd_iommu_snp_en;
 174EXPORT_SYMBOL(amd_iommu_snp_en);
 175
 176LIST_HEAD(amd_iommu_pci_seg_list);	/* list of all PCI segments */
 177LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
 178					   system */
 179
 
 
 
 180/* Number of IOMMUs present in the system */
 181static int amd_iommus_present;
 182
 183/* IOMMUs have a non-present cache? */
 184bool amd_iommu_np_cache __read_mostly;
 185bool amd_iommu_iotlb_sup __read_mostly = true;
 186
 
 
 
 187static bool amd_iommu_pc_present __read_mostly;
 188bool amdr_ivrs_remap_support __read_mostly;
 189
 190bool amd_iommu_force_isolation __read_mostly;
 191
 192unsigned long amd_iommu_pgsize_bitmap __ro_after_init = AMD_IOMMU_PGSIZES;
 
 
 
 
 193
 194enum iommu_init_state {
 195	IOMMU_START_STATE,
 196	IOMMU_IVRS_DETECTED,
 197	IOMMU_ACPI_FINISHED,
 198	IOMMU_ENABLED,
 199	IOMMU_PCI_INIT,
 200	IOMMU_INTERRUPTS_EN,
 201	IOMMU_INITIALIZED,
 202	IOMMU_NOT_FOUND,
 203	IOMMU_INIT_ERROR,
 204	IOMMU_CMDLINE_DISABLED,
 205};
 206
 207/* Early ioapic and hpet maps from kernel command line */
 208#define EARLY_MAP_SIZE		4
 209static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
 210static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
 211static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
 212
 213static int __initdata early_ioapic_map_size;
 214static int __initdata early_hpet_map_size;
 215static int __initdata early_acpihid_map_size;
 216
 217static bool __initdata cmdline_maps;
 218
 219static enum iommu_init_state init_state = IOMMU_START_STATE;
 220
 221static int amd_iommu_enable_interrupts(void);
 222static int __init iommu_go_to_state(enum iommu_init_state state);
 223static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg);
 224
 225static bool amd_iommu_pre_enabled = true;
 226
 227static u32 amd_iommu_ivinfo __initdata;
 228
 229bool translation_pre_enabled(struct amd_iommu *iommu)
 230{
 231	return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
 232}
 233
 234static void clear_translation_pre_enabled(struct amd_iommu *iommu)
 235{
 236	iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
 237}
 238
 239static void init_translation_status(struct amd_iommu *iommu)
 240{
 241	u64 ctrl;
 242
 243	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 244	if (ctrl & (1<<CONTROL_IOMMU_EN))
 245		iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
 246}
 247
 248static inline unsigned long tbl_size(int entry_size, int last_bdf)
 249{
 250	unsigned shift = PAGE_SHIFT +
 251			 get_order((last_bdf + 1) * entry_size);
 252
 253	return 1UL << shift;
 254}
 255
 256int amd_iommu_get_num_iommus(void)
 257{
 258	return amd_iommus_present;
 259}
 260
 261/*
 262 * Iterate through all the IOMMUs to get common EFR
 263 * masks among all IOMMUs and warn if found inconsistency.
 264 */
 265static __init void get_global_efr(void)
 266{
 267	struct amd_iommu *iommu;
 268
 269	for_each_iommu(iommu) {
 270		u64 tmp = iommu->features;
 271		u64 tmp2 = iommu->features2;
 272
 273		if (list_is_first(&iommu->list, &amd_iommu_list)) {
 274			amd_iommu_efr = tmp;
 275			amd_iommu_efr2 = tmp2;
 276			continue;
 277		}
 278
 279		if (amd_iommu_efr == tmp &&
 280		    amd_iommu_efr2 == tmp2)
 281			continue;
 282
 283		pr_err(FW_BUG
 284		       "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n",
 285		       tmp, tmp2, amd_iommu_efr, amd_iommu_efr2,
 286		       iommu->index, iommu->pci_seg->id,
 287		       PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid),
 288		       PCI_FUNC(iommu->devid));
 289
 290		amd_iommu_efr &= tmp;
 291		amd_iommu_efr2 &= tmp2;
 292	}
 293
 294	pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
 295}
 296
 
 
 
 
 
 297/*
 298 * For IVHD type 0x11/0x40, EFR is also available via IVHD.
 299 * Default to IVHD EFR since it is available sooner
 300 * (i.e. before PCI init).
 301 */
 302static void __init early_iommu_features_init(struct amd_iommu *iommu,
 303					     struct ivhd_header *h)
 304{
 305	if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) {
 306		iommu->features = h->efr_reg;
 307		iommu->features2 = h->efr_reg2;
 308	}
 309	if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP)
 310		amdr_ivrs_remap_support = true;
 311}
 312
 313/* Access to l1 and l2 indexed register spaces */
 314
 315static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
 316{
 317	u32 val;
 318
 319	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
 320	pci_read_config_dword(iommu->dev, 0xfc, &val);
 321	return val;
 322}
 323
 324static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
 325{
 326	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
 327	pci_write_config_dword(iommu->dev, 0xfc, val);
 328	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
 329}
 330
 331static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
 332{
 333	u32 val;
 334
 335	pci_write_config_dword(iommu->dev, 0xf0, address);
 336	pci_read_config_dword(iommu->dev, 0xf4, &val);
 337	return val;
 338}
 339
 340static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
 341{
 342	pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
 343	pci_write_config_dword(iommu->dev, 0xf4, val);
 344}
 345
 346/****************************************************************************
 347 *
 348 * AMD IOMMU MMIO register space handling functions
 349 *
 350 * These functions are used to program the IOMMU device registers in
 351 * MMIO space required for that driver.
 352 *
 353 ****************************************************************************/
 354
 355/*
 356 * This function set the exclusion range in the IOMMU. DMA accesses to the
 357 * exclusion range are passed through untranslated
 358 */
 359static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 360{
 361	u64 start = iommu->exclusion_start & PAGE_MASK;
 362	u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
 363	u64 entry;
 364
 365	if (!iommu->exclusion_start)
 366		return;
 367
 368	entry = start | MMIO_EXCL_ENABLE_MASK;
 369	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
 370			&entry, sizeof(entry));
 371
 372	entry = limit;
 373	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
 374			&entry, sizeof(entry));
 375}
 376
 377static void iommu_set_cwwb_range(struct amd_iommu *iommu)
 378{
 379	u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
 380	u64 entry = start & PM_ADDR_MASK;
 381
 382	if (!check_feature(FEATURE_SNP))
 383		return;
 384
 385	/* Note:
 386	 * Re-purpose Exclusion base/limit registers for Completion wait
 387	 * write-back base/limit.
 388	 */
 389	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
 390		    &entry, sizeof(entry));
 391
 392	/* Note:
 393	 * Default to 4 Kbytes, which can be specified by setting base
 394	 * address equal to the limit address.
 395	 */
 396	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
 397		    &entry, sizeof(entry));
 398}
 399
 400/* Programs the physical address of the device table into the IOMMU hardware */
 401static void iommu_set_device_table(struct amd_iommu *iommu)
 402{
 403	u64 entry;
 404	u32 dev_table_size = iommu->pci_seg->dev_table_size;
 405	void *dev_table = (void *)get_dev_table(iommu);
 406
 407	BUG_ON(iommu->mmio_base == NULL);
 408
 409	entry = iommu_virt_to_phys(dev_table);
 410	entry |= (dev_table_size >> 12) - 1;
 411	memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
 412			&entry, sizeof(entry));
 413}
 414
 415/* Generic functions to enable/disable certain features of the IOMMU. */
 416void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
 417{
 418	u64 ctrl;
 419
 420	ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
 421	ctrl |= (1ULL << bit);
 422	writeq(ctrl, iommu->mmio_base +  MMIO_CONTROL_OFFSET);
 423}
 424
 425static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 426{
 427	u64 ctrl;
 428
 429	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 430	ctrl &= ~(1ULL << bit);
 431	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 432}
 433
 434static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
 435{
 436	u64 ctrl;
 437
 438	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 439	ctrl &= ~CTRL_INV_TO_MASK;
 440	ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
 441	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 442}
 443
 444/* Function to enable the hardware */
 445static void iommu_enable(struct amd_iommu *iommu)
 446{
 447	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 448}
 449
 450static void iommu_disable(struct amd_iommu *iommu)
 451{
 452	if (!iommu->mmio_base)
 453		return;
 454
 455	/* Disable command buffer */
 456	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 457
 458	/* Disable event logging and event interrupts */
 459	iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
 460	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
 461
 462	/* Disable IOMMU GA_LOG */
 463	iommu_feature_disable(iommu, CONTROL_GALOG_EN);
 464	iommu_feature_disable(iommu, CONTROL_GAINT_EN);
 465
 466	/* Disable IOMMU PPR logging */
 467	iommu_feature_disable(iommu, CONTROL_PPRLOG_EN);
 468	iommu_feature_disable(iommu, CONTROL_PPRINT_EN);
 469
 470	/* Disable IOMMU hardware itself */
 471	iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
 472
 473	/* Clear IRTE cache disabling bit */
 474	iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
 475}
 476
 477/*
 478 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
 479 * the system has one.
 480 */
 481static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
 482{
 483	if (!request_mem_region(address, end, "amd_iommu")) {
 484		pr_err("Can not reserve memory region %llx-%llx for mmio\n",
 485			address, end);
 486		pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
 487		return NULL;
 488	}
 489
 490	return (u8 __iomem *)ioremap(address, end);
 491}
 492
 493static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
 494{
 495	if (iommu->mmio_base)
 496		iounmap(iommu->mmio_base);
 497	release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
 498}
 499
 500static inline u32 get_ivhd_header_size(struct ivhd_header *h)
 501{
 502	u32 size = 0;
 503
 504	switch (h->type) {
 505	case 0x10:
 506		size = 24;
 507		break;
 508	case 0x11:
 509	case 0x40:
 510		size = 40;
 511		break;
 512	}
 513	return size;
 514}
 515
 516/****************************************************************************
 517 *
 518 * The functions below belong to the first pass of AMD IOMMU ACPI table
 519 * parsing. In this pass we try to find out the highest device id this
 520 * code has to handle. Upon this information the size of the shared data
 521 * structures is determined later.
 522 *
 523 ****************************************************************************/
 524
 525/*
 526 * This function calculates the length of a given IVHD entry
 527 */
 528static inline int ivhd_entry_length(u8 *ivhd)
 529{
 530	u32 type = ((struct ivhd_entry *)ivhd)->type;
 531
 532	if (type < 0x80) {
 533		return 0x04 << (*ivhd >> 6);
 534	} else if (type == IVHD_DEV_ACPI_HID) {
 535		/* For ACPI_HID, offset 21 is uid len */
 536		return *((u8 *)ivhd + 21) + 22;
 537	}
 538	return 0;
 539}
 540
 541/*
 542 * After reading the highest device id from the IOMMU PCI capability header
 543 * this function looks if there is a higher device id defined in the ACPI table
 544 */
 545static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
 546{
 547	u8 *p = (void *)h, *end = (void *)h;
 548	struct ivhd_entry *dev;
 549	int last_devid = -EINVAL;
 550
 551	u32 ivhd_size = get_ivhd_header_size(h);
 552
 553	if (!ivhd_size) {
 554		pr_err("Unsupported IVHD type %#x\n", h->type);
 555		return -EINVAL;
 556	}
 557
 558	p += ivhd_size;
 559	end += h->length;
 560
 561	while (p < end) {
 562		dev = (struct ivhd_entry *)p;
 563		switch (dev->type) {
 564		case IVHD_DEV_ALL:
 565			/* Use maximum BDF value for DEV_ALL */
 566			return 0xffff;
 567		case IVHD_DEV_SELECT:
 568		case IVHD_DEV_RANGE_END:
 569		case IVHD_DEV_ALIAS:
 570		case IVHD_DEV_EXT_SELECT:
 571			/* all the above subfield types refer to device ids */
 572			if (dev->devid > last_devid)
 573				last_devid = dev->devid;
 574			break;
 575		default:
 576			break;
 577		}
 578		p += ivhd_entry_length(p);
 579	}
 580
 581	WARN_ON(p != end);
 582
 583	return last_devid;
 584}
 585
 586static int __init check_ivrs_checksum(struct acpi_table_header *table)
 587{
 588	int i;
 589	u8 checksum = 0, *p = (u8 *)table;
 590
 591	for (i = 0; i < table->length; ++i)
 592		checksum += p[i];
 593	if (checksum != 0) {
 594		/* ACPI table corrupt */
 595		pr_err(FW_BUG "IVRS invalid checksum\n");
 596		return -ENODEV;
 597	}
 598
 599	return 0;
 600}
 601
 602/*
 603 * Iterate over all IVHD entries in the ACPI table and find the highest device
 604 * id which we need to handle. This is the first of three functions which parse
 605 * the ACPI table. So we check the checksum here.
 606 */
 607static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg)
 608{
 609	u8 *p = (u8 *)table, *end = (u8 *)table;
 610	struct ivhd_header *h;
 611	int last_devid, last_bdf = 0;
 612
 613	p += IVRS_HEADER_LENGTH;
 614
 615	end += table->length;
 616	while (p < end) {
 617		h = (struct ivhd_header *)p;
 618		if (h->pci_seg == pci_seg &&
 619		    h->type == amd_iommu_target_ivhd_type) {
 620			last_devid = find_last_devid_from_ivhd(h);
 621
 622			if (last_devid < 0)
 623				return -EINVAL;
 624			if (last_devid > last_bdf)
 625				last_bdf = last_devid;
 626		}
 627		p += h->length;
 628	}
 629	WARN_ON(p != end);
 630
 631	return last_bdf;
 632}
 633
 634/****************************************************************************
 635 *
 636 * The following functions belong to the code path which parses the ACPI table
 637 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
 638 * data structures, initialize the per PCI segment device/alias/rlookup table
 639 * and also basically initialize the hardware.
 640 *
 641 ****************************************************************************/
 642
 643/* Allocate per PCI segment device table */
 644static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
 645{
 646	pci_seg->dev_table = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32,
 647					       get_order(pci_seg->dev_table_size));
 648	if (!pci_seg->dev_table)
 649		return -ENOMEM;
 650
 651	return 0;
 652}
 653
 654static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
 655{
 656	iommu_free_pages(pci_seg->dev_table,
 657			 get_order(pci_seg->dev_table_size));
 658	pci_seg->dev_table = NULL;
 659}
 660
 661/* Allocate per PCI segment IOMMU rlookup table. */
 662static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
 663{
 664	pci_seg->rlookup_table = iommu_alloc_pages(GFP_KERNEL,
 665						   get_order(pci_seg->rlookup_table_size));
 
 666	if (pci_seg->rlookup_table == NULL)
 667		return -ENOMEM;
 668
 669	return 0;
 670}
 671
 672static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
 673{
 674	iommu_free_pages(pci_seg->rlookup_table,
 675			 get_order(pci_seg->rlookup_table_size));
 676	pci_seg->rlookup_table = NULL;
 677}
 678
 679static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
 680{
 681	pci_seg->irq_lookup_table = iommu_alloc_pages(GFP_KERNEL,
 682						      get_order(pci_seg->rlookup_table_size));
 
 683	kmemleak_alloc(pci_seg->irq_lookup_table,
 684		       pci_seg->rlookup_table_size, 1, GFP_KERNEL);
 685	if (pci_seg->irq_lookup_table == NULL)
 686		return -ENOMEM;
 687
 688	return 0;
 689}
 690
 691static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
 692{
 693	kmemleak_free(pci_seg->irq_lookup_table);
 694	iommu_free_pages(pci_seg->irq_lookup_table,
 695			 get_order(pci_seg->rlookup_table_size));
 696	pci_seg->irq_lookup_table = NULL;
 697}
 698
 699static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
 700{
 701	int i;
 702
 703	pci_seg->alias_table = iommu_alloc_pages(GFP_KERNEL,
 704						 get_order(pci_seg->alias_table_size));
 705	if (!pci_seg->alias_table)
 706		return -ENOMEM;
 707
 708	/*
 709	 * let all alias entries point to itself
 710	 */
 711	for (i = 0; i <= pci_seg->last_bdf; ++i)
 712		pci_seg->alias_table[i] = i;
 713
 714	return 0;
 715}
 716
 717static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
 718{
 719	iommu_free_pages(pci_seg->alias_table,
 720			 get_order(pci_seg->alias_table_size));
 721	pci_seg->alias_table = NULL;
 722}
 723
 724/*
 725 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
 726 * write commands to that buffer later and the IOMMU will execute them
 727 * asynchronously
 728 */
 729static int __init alloc_command_buffer(struct amd_iommu *iommu)
 730{
 731	iommu->cmd_buf = iommu_alloc_pages(GFP_KERNEL,
 732					   get_order(CMD_BUFFER_SIZE));
 733
 734	return iommu->cmd_buf ? 0 : -ENOMEM;
 735}
 736
 737/*
 738 * Interrupt handler has processed all pending events and adjusted head
 739 * and tail pointer. Reset overflow mask and restart logging again.
 740 */
 741void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type,
 742			   u8 cntrl_intr, u8 cntrl_log,
 743			   u32 status_run_mask, u32 status_overflow_mask)
 744{
 745	u32 status;
 746
 747	status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 748	if (status & status_run_mask)
 749		return;
 750
 751	pr_info_ratelimited("IOMMU %s log restarting\n", evt_type);
 752
 753	iommu_feature_disable(iommu, cntrl_log);
 754	iommu_feature_disable(iommu, cntrl_intr);
 755
 756	writel(status_overflow_mask, iommu->mmio_base + MMIO_STATUS_OFFSET);
 757
 758	iommu_feature_enable(iommu, cntrl_intr);
 759	iommu_feature_enable(iommu, cntrl_log);
 760}
 761
 762/*
 763 * This function restarts event logging in case the IOMMU experienced
 764 * an event log buffer overflow.
 765 */
 766void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
 767{
 768	amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN,
 769			      CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK,
 770			      MMIO_STATUS_EVT_OVERFLOW_MASK);
 771}
 772
 773/*
 774 * This function restarts event logging in case the IOMMU experienced
 775 * GA log overflow.
 776 */
 777void amd_iommu_restart_ga_log(struct amd_iommu *iommu)
 778{
 779	amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN,
 780			      CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK,
 781			      MMIO_STATUS_GALOG_OVERFLOW_MASK);
 782}
 783
 784/*
 785 * This function resets the command buffer if the IOMMU stopped fetching
 786 * commands from it.
 787 */
 788static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
 789{
 790	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 791
 792	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
 793	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 794	iommu->cmd_buf_head = 0;
 795	iommu->cmd_buf_tail = 0;
 796
 797	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
 798}
 799
 800/*
 801 * This function writes the command buffer address to the hardware and
 802 * enables it.
 803 */
 804static void iommu_enable_command_buffer(struct amd_iommu *iommu)
 805{
 806	u64 entry;
 807
 808	BUG_ON(iommu->cmd_buf == NULL);
 809
 810	entry = iommu_virt_to_phys(iommu->cmd_buf);
 811	entry |= MMIO_CMD_SIZE_512;
 812
 813	memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
 814		    &entry, sizeof(entry));
 815
 816	amd_iommu_reset_cmd_buffer(iommu);
 817}
 818
 819/*
 820 * This function disables the command buffer
 821 */
 822static void iommu_disable_command_buffer(struct amd_iommu *iommu)
 823{
 824	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 825}
 826
 827static void __init free_command_buffer(struct amd_iommu *iommu)
 828{
 829	iommu_free_pages(iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
 830}
 831
 832void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp,
 833				  size_t size)
 834{
 835	int order = get_order(size);
 836	void *buf = iommu_alloc_pages(gfp, order);
 837
 838	if (buf &&
 839	    check_feature(FEATURE_SNP) &&
 840	    set_memory_4k((unsigned long)buf, (1 << order))) {
 841		iommu_free_pages(buf, order);
 842		buf = NULL;
 843	}
 844
 845	return buf;
 846}
 847
 848/* allocates the memory where the IOMMU will log its events to */
 849static int __init alloc_event_buffer(struct amd_iommu *iommu)
 850{
 851	iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL,
 852					      EVT_BUFFER_SIZE);
 853
 854	return iommu->evt_buf ? 0 : -ENOMEM;
 855}
 856
 857static void iommu_enable_event_buffer(struct amd_iommu *iommu)
 858{
 859	u64 entry;
 860
 861	BUG_ON(iommu->evt_buf == NULL);
 862
 863	entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
 864
 865	memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
 866		    &entry, sizeof(entry));
 867
 868	/* set head and tail to zero manually */
 869	writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 870	writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
 871
 872	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 873}
 874
 875/*
 876 * This function disables the event log buffer
 877 */
 878static void iommu_disable_event_buffer(struct amd_iommu *iommu)
 879{
 880	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
 881}
 882
 883static void __init free_event_buffer(struct amd_iommu *iommu)
 884{
 885	iommu_free_pages(iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 886}
 887
 888static void free_ga_log(struct amd_iommu *iommu)
 889{
 890#ifdef CONFIG_IRQ_REMAP
 891	iommu_free_pages(iommu->ga_log, get_order(GA_LOG_SIZE));
 892	iommu_free_pages(iommu->ga_log_tail, get_order(8));
 893#endif
 894}
 895
 896#ifdef CONFIG_IRQ_REMAP
 897static int iommu_ga_log_enable(struct amd_iommu *iommu)
 898{
 899	u32 status, i;
 900	u64 entry;
 901
 902	if (!iommu->ga_log)
 903		return -EINVAL;
 904
 905	entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
 906	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
 907		    &entry, sizeof(entry));
 908	entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
 909		 (BIT_ULL(52)-1)) & ~7ULL;
 910	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
 911		    &entry, sizeof(entry));
 912	writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
 913	writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
 914
 915
 916	iommu_feature_enable(iommu, CONTROL_GAINT_EN);
 917	iommu_feature_enable(iommu, CONTROL_GALOG_EN);
 918
 919	for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
 920		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 921		if (status & (MMIO_STATUS_GALOG_RUN_MASK))
 922			break;
 923		udelay(10);
 924	}
 925
 926	if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
 927		return -EINVAL;
 928
 929	return 0;
 930}
 931
 932static int iommu_init_ga_log(struct amd_iommu *iommu)
 933{
 934	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
 935		return 0;
 936
 937	iommu->ga_log = iommu_alloc_pages(GFP_KERNEL, get_order(GA_LOG_SIZE));
 
 938	if (!iommu->ga_log)
 939		goto err_out;
 940
 941	iommu->ga_log_tail = iommu_alloc_pages(GFP_KERNEL, get_order(8));
 
 942	if (!iommu->ga_log_tail)
 943		goto err_out;
 944
 945	return 0;
 946err_out:
 947	free_ga_log(iommu);
 948	return -EINVAL;
 949}
 950#endif /* CONFIG_IRQ_REMAP */
 951
 952static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
 953{
 954	iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 1);
 955
 956	return iommu->cmd_sem ? 0 : -ENOMEM;
 957}
 958
 959static void __init free_cwwb_sem(struct amd_iommu *iommu)
 960{
 961	if (iommu->cmd_sem)
 962		iommu_free_page((void *)iommu->cmd_sem);
 963}
 964
 965static void iommu_enable_xt(struct amd_iommu *iommu)
 966{
 967#ifdef CONFIG_IRQ_REMAP
 968	/*
 969	 * XT mode (32-bit APIC destination ID) requires
 970	 * GA mode (128-bit IRTE support) as a prerequisite.
 971	 */
 972	if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
 973	    amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
 974		iommu_feature_enable(iommu, CONTROL_XT_EN);
 975#endif /* CONFIG_IRQ_REMAP */
 976}
 977
 978static void iommu_enable_gt(struct amd_iommu *iommu)
 979{
 980	if (!check_feature(FEATURE_GT))
 981		return;
 982
 983	iommu_feature_enable(iommu, CONTROL_GT_EN);
 984}
 985
 986/* sets a specific bit in the device table entry. */
 987static void __set_dev_entry_bit(struct dev_table_entry *dev_table,
 988				u16 devid, u8 bit)
 989{
 990	int i = (bit >> 6) & 0x03;
 991	int _bit = bit & 0x3f;
 992
 993	dev_table[devid].data[i] |= (1UL << _bit);
 994}
 995
 996static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
 997{
 998	struct dev_table_entry *dev_table = get_dev_table(iommu);
 999
1000	return __set_dev_entry_bit(dev_table, devid, bit);
1001}
1002
1003static int __get_dev_entry_bit(struct dev_table_entry *dev_table,
1004			       u16 devid, u8 bit)
1005{
1006	int i = (bit >> 6) & 0x03;
1007	int _bit = bit & 0x3f;
1008
1009	return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
1010}
1011
1012static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
1013{
1014	struct dev_table_entry *dev_table = get_dev_table(iommu);
1015
1016	return __get_dev_entry_bit(dev_table, devid, bit);
1017}
1018
1019static bool __copy_device_table(struct amd_iommu *iommu)
1020{
1021	u64 int_ctl, int_tab_len, entry = 0;
1022	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1023	struct dev_table_entry *old_devtb = NULL;
1024	u32 lo, hi, devid, old_devtb_size;
1025	phys_addr_t old_devtb_phys;
1026	u16 dom_id, dte_v, irq_v;
 
1027	u64 tmp;
1028
1029	/* Each IOMMU use separate device table with the same size */
1030	lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
1031	hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
1032	entry = (((u64) hi) << 32) + lo;
1033
1034	old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
1035	if (old_devtb_size != pci_seg->dev_table_size) {
1036		pr_err("The device table size of IOMMU:%d is not expected!\n",
1037			iommu->index);
1038		return false;
1039	}
1040
1041	/*
1042	 * When SME is enabled in the first kernel, the entry includes the
1043	 * memory encryption mask(sme_me_mask), we must remove the memory
1044	 * encryption mask to obtain the true physical address in kdump kernel.
1045	 */
1046	old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
1047
1048	if (old_devtb_phys >= 0x100000000ULL) {
1049		pr_err("The address of old device table is above 4G, not trustworthy!\n");
1050		return false;
1051	}
1052	old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel())
1053		    ? (__force void *)ioremap_encrypted(old_devtb_phys,
1054							pci_seg->dev_table_size)
1055		    : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB);
1056
1057	if (!old_devtb)
1058		return false;
1059
1060	pci_seg->old_dev_tbl_cpy = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32,
1061						     get_order(pci_seg->dev_table_size));
 
1062	if (pci_seg->old_dev_tbl_cpy == NULL) {
1063		pr_err("Failed to allocate memory for copying old device table!\n");
1064		memunmap(old_devtb);
1065		return false;
1066	}
1067
1068	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
1069		pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid];
1070		dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
1071		dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
1072
1073		if (dte_v && dom_id) {
1074			pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
1075			pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
1076			/* Reserve the Domain IDs used by previous kernel */
1077			if (ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC) != dom_id) {
1078				pr_err("Failed to reserve domain ID 0x%x\n", dom_id);
1079				memunmap(old_devtb);
1080				return false;
1081			}
1082			/* If gcr3 table existed, mask it out */
1083			if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
1084				tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
1085				tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
1086				pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
1087				tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
1088				tmp |= DTE_FLAG_GV;
1089				pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
1090			}
1091		}
1092
1093		irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
1094		int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
1095		int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
1096		if (irq_v && (int_ctl || int_tab_len)) {
1097			if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
1098			    (int_tab_len != DTE_INTTABLEN)) {
1099				pr_err("Wrong old irq remapping flag: %#x\n", devid);
1100				memunmap(old_devtb);
1101				return false;
1102			}
1103
1104			pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
1105		}
1106	}
1107	memunmap(old_devtb);
1108
1109	return true;
1110}
1111
1112static bool copy_device_table(void)
1113{
1114	struct amd_iommu *iommu;
1115	struct amd_iommu_pci_seg *pci_seg;
1116
1117	if (!amd_iommu_pre_enabled)
1118		return false;
1119
1120	pr_warn("Translation is already enabled - trying to copy translation structures\n");
1121
1122	/*
1123	 * All IOMMUs within PCI segment shares common device table.
1124	 * Hence copy device table only once per PCI segment.
1125	 */
1126	for_each_pci_segment(pci_seg) {
1127		for_each_iommu(iommu) {
1128			if (pci_seg->id != iommu->pci_seg->id)
1129				continue;
1130			if (!__copy_device_table(iommu))
1131				return false;
1132			break;
1133		}
1134	}
1135
1136	return true;
1137}
1138
1139void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid)
1140{
1141	int sysmgt;
1142
1143	sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) |
1144		 (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1);
1145
1146	if (sysmgt == 0x01)
1147		set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW);
1148}
1149
1150/*
1151 * This function takes the device specific flags read from the ACPI
1152 * table and sets up the device table entry with that information
1153 */
1154static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
1155					   u16 devid, u32 flags, u32 ext_flags)
1156{
1157	if (flags & ACPI_DEVFLAG_INITPASS)
1158		set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
1159	if (flags & ACPI_DEVFLAG_EXTINT)
1160		set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
1161	if (flags & ACPI_DEVFLAG_NMI)
1162		set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
1163	if (flags & ACPI_DEVFLAG_SYSMGT1)
1164		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
1165	if (flags & ACPI_DEVFLAG_SYSMGT2)
1166		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
1167	if (flags & ACPI_DEVFLAG_LINT0)
1168		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
1169	if (flags & ACPI_DEVFLAG_LINT1)
1170		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
1171
1172	amd_iommu_apply_erratum_63(iommu, devid);
1173
1174	amd_iommu_set_rlookup_table(iommu, devid);
1175}
1176
1177int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
1178{
1179	struct devid_map *entry;
1180	struct list_head *list;
1181
1182	if (type == IVHD_SPECIAL_IOAPIC)
1183		list = &ioapic_map;
1184	else if (type == IVHD_SPECIAL_HPET)
1185		list = &hpet_map;
1186	else
1187		return -EINVAL;
1188
1189	list_for_each_entry(entry, list, list) {
1190		if (!(entry->id == id && entry->cmd_line))
1191			continue;
1192
1193		pr_info("Command-line override present for %s id %d - ignoring\n",
1194			type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1195
1196		*devid = entry->devid;
1197
1198		return 0;
1199	}
1200
1201	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1202	if (!entry)
1203		return -ENOMEM;
1204
1205	entry->id	= id;
1206	entry->devid	= *devid;
1207	entry->cmd_line	= cmd_line;
1208
1209	list_add_tail(&entry->list, list);
1210
1211	return 0;
1212}
1213
1214static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
1215				      bool cmd_line)
1216{
1217	struct acpihid_map_entry *entry;
1218	struct list_head *list = &acpihid_map;
1219
1220	list_for_each_entry(entry, list, list) {
1221		if (strcmp(entry->hid, hid) ||
1222		    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1223		    !entry->cmd_line)
1224			continue;
1225
1226		pr_info("Command-line override for hid:%s uid:%s\n",
1227			hid, uid);
1228		*devid = entry->devid;
1229		return 0;
1230	}
1231
1232	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1233	if (!entry)
1234		return -ENOMEM;
1235
1236	memcpy(entry->uid, uid, strlen(uid));
1237	memcpy(entry->hid, hid, strlen(hid));
1238	entry->devid = *devid;
1239	entry->cmd_line	= cmd_line;
1240	entry->root_devid = (entry->devid & (~0x7));
1241
1242	pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
1243		entry->cmd_line ? "cmd" : "ivrs",
1244		entry->hid, entry->uid, entry->root_devid);
1245
1246	list_add_tail(&entry->list, list);
1247	return 0;
1248}
1249
1250static int __init add_early_maps(void)
1251{
1252	int i, ret;
1253
1254	for (i = 0; i < early_ioapic_map_size; ++i) {
1255		ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1256					 early_ioapic_map[i].id,
1257					 &early_ioapic_map[i].devid,
1258					 early_ioapic_map[i].cmd_line);
1259		if (ret)
1260			return ret;
1261	}
1262
1263	for (i = 0; i < early_hpet_map_size; ++i) {
1264		ret = add_special_device(IVHD_SPECIAL_HPET,
1265					 early_hpet_map[i].id,
1266					 &early_hpet_map[i].devid,
1267					 early_hpet_map[i].cmd_line);
1268		if (ret)
1269			return ret;
1270	}
1271
1272	for (i = 0; i < early_acpihid_map_size; ++i) {
1273		ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1274					  early_acpihid_map[i].uid,
1275					  &early_acpihid_map[i].devid,
1276					  early_acpihid_map[i].cmd_line);
1277		if (ret)
1278			return ret;
1279	}
1280
1281	return 0;
1282}
1283
1284/*
1285 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1286 * initializes the hardware and our data structures with it.
1287 */
1288static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1289					struct ivhd_header *h)
1290{
1291	u8 *p = (u8 *)h;
1292	u8 *end = p, flags = 0;
1293	u16 devid = 0, devid_start = 0, devid_to = 0, seg_id;
1294	u32 dev_i, ext_flags = 0;
1295	bool alias = false;
1296	struct ivhd_entry *e;
1297	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1298	u32 ivhd_size;
1299	int ret;
1300
1301
1302	ret = add_early_maps();
1303	if (ret)
1304		return ret;
1305
1306	amd_iommu_apply_ivrs_quirks();
1307
1308	/*
1309	 * First save the recommended feature enable bits from ACPI
1310	 */
1311	iommu->acpi_flags = h->flags;
1312
1313	/*
1314	 * Done. Now parse the device entries
1315	 */
1316	ivhd_size = get_ivhd_header_size(h);
1317	if (!ivhd_size) {
1318		pr_err("Unsupported IVHD type %#x\n", h->type);
1319		return -EINVAL;
1320	}
1321
1322	p += ivhd_size;
1323
1324	end += h->length;
1325
1326
1327	while (p < end) {
1328		e = (struct ivhd_entry *)p;
1329		seg_id = pci_seg->id;
1330
1331		switch (e->type) {
1332		case IVHD_DEV_ALL:
1333
1334			DUMP_printk("  DEV_ALL\t\t\tflags: %02x\n", e->flags);
1335
1336			for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i)
1337				set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
1338			break;
1339		case IVHD_DEV_SELECT:
1340
1341			DUMP_printk("  DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x "
1342				    "flags: %02x\n",
1343				    seg_id, PCI_BUS_NUM(e->devid),
1344				    PCI_SLOT(e->devid),
1345				    PCI_FUNC(e->devid),
1346				    e->flags);
1347
1348			devid = e->devid;
1349			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1350			break;
1351		case IVHD_DEV_SELECT_RANGE_START:
1352
1353			DUMP_printk("  DEV_SELECT_RANGE_START\t "
1354				    "devid: %04x:%02x:%02x.%x flags: %02x\n",
1355				    seg_id, PCI_BUS_NUM(e->devid),
1356				    PCI_SLOT(e->devid),
1357				    PCI_FUNC(e->devid),
1358				    e->flags);
1359
1360			devid_start = e->devid;
1361			flags = e->flags;
1362			ext_flags = 0;
1363			alias = false;
1364			break;
1365		case IVHD_DEV_ALIAS:
1366
1367			DUMP_printk("  DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x "
1368				    "flags: %02x devid_to: %02x:%02x.%x\n",
1369				    seg_id, PCI_BUS_NUM(e->devid),
1370				    PCI_SLOT(e->devid),
1371				    PCI_FUNC(e->devid),
1372				    e->flags,
1373				    PCI_BUS_NUM(e->ext >> 8),
1374				    PCI_SLOT(e->ext >> 8),
1375				    PCI_FUNC(e->ext >> 8));
1376
1377			devid = e->devid;
1378			devid_to = e->ext >> 8;
1379			set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
1380			set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1381			pci_seg->alias_table[devid] = devid_to;
1382			break;
1383		case IVHD_DEV_ALIAS_RANGE:
1384
1385			DUMP_printk("  DEV_ALIAS_RANGE\t\t "
1386				    "devid: %04x:%02x:%02x.%x flags: %02x "
1387				    "devid_to: %04x:%02x:%02x.%x\n",
1388				    seg_id, PCI_BUS_NUM(e->devid),
1389				    PCI_SLOT(e->devid),
1390				    PCI_FUNC(e->devid),
1391				    e->flags,
1392				    seg_id, PCI_BUS_NUM(e->ext >> 8),
1393				    PCI_SLOT(e->ext >> 8),
1394				    PCI_FUNC(e->ext >> 8));
1395
1396			devid_start = e->devid;
1397			flags = e->flags;
1398			devid_to = e->ext >> 8;
1399			ext_flags = 0;
1400			alias = true;
1401			break;
1402		case IVHD_DEV_EXT_SELECT:
1403
1404			DUMP_printk("  DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x "
1405				    "flags: %02x ext: %08x\n",
1406				    seg_id, PCI_BUS_NUM(e->devid),
1407				    PCI_SLOT(e->devid),
1408				    PCI_FUNC(e->devid),
1409				    e->flags, e->ext);
1410
1411			devid = e->devid;
1412			set_dev_entry_from_acpi(iommu, devid, e->flags,
1413						e->ext);
1414			break;
1415		case IVHD_DEV_EXT_SELECT_RANGE:
1416
1417			DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
1418				    "%04x:%02x:%02x.%x flags: %02x ext: %08x\n",
1419				    seg_id, PCI_BUS_NUM(e->devid),
1420				    PCI_SLOT(e->devid),
1421				    PCI_FUNC(e->devid),
1422				    e->flags, e->ext);
1423
1424			devid_start = e->devid;
1425			flags = e->flags;
1426			ext_flags = e->ext;
1427			alias = false;
1428			break;
1429		case IVHD_DEV_RANGE_END:
1430
1431			DUMP_printk("  DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n",
1432				    seg_id, PCI_BUS_NUM(e->devid),
1433				    PCI_SLOT(e->devid),
1434				    PCI_FUNC(e->devid));
1435
1436			devid = e->devid;
1437			for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
1438				if (alias) {
1439					pci_seg->alias_table[dev_i] = devid_to;
1440					set_dev_entry_from_acpi(iommu,
1441						devid_to, flags, ext_flags);
1442				}
1443				set_dev_entry_from_acpi(iommu, dev_i,
1444							flags, ext_flags);
1445			}
1446			break;
1447		case IVHD_DEV_SPECIAL: {
1448			u8 handle, type;
1449			const char *var;
1450			u32 devid;
1451			int ret;
1452
1453			handle = e->ext & 0xff;
1454			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8));
1455			type   = (e->ext >> 24) & 0xff;
1456
1457			if (type == IVHD_SPECIAL_IOAPIC)
1458				var = "IOAPIC";
1459			else if (type == IVHD_SPECIAL_HPET)
1460				var = "HPET";
1461			else
1462				var = "UNKNOWN";
1463
1464			DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n",
1465				    var, (int)handle,
1466				    seg_id, PCI_BUS_NUM(devid),
1467				    PCI_SLOT(devid),
1468				    PCI_FUNC(devid));
1469
1470			ret = add_special_device(type, handle, &devid, false);
1471			if (ret)
1472				return ret;
1473
1474			/*
1475			 * add_special_device might update the devid in case a
1476			 * command-line override is present. So call
1477			 * set_dev_entry_from_acpi after add_special_device.
1478			 */
1479			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1480
1481			break;
1482		}
1483		case IVHD_DEV_ACPI_HID: {
1484			u32 devid;
1485			u8 hid[ACPIHID_HID_LEN];
1486			u8 uid[ACPIHID_UID_LEN];
1487			int ret;
1488
1489			if (h->type != 0x40) {
1490				pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1491				       e->type);
1492				break;
1493			}
1494
1495			BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1);
1496			memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1);
1497			hid[ACPIHID_HID_LEN - 1] = '\0';
1498
1499			if (!(*hid)) {
1500				pr_err(FW_BUG "Invalid HID.\n");
1501				break;
1502			}
1503
1504			uid[0] = '\0';
1505			switch (e->uidf) {
1506			case UID_NOT_PRESENT:
1507
1508				if (e->uidl != 0)
1509					pr_warn(FW_BUG "Invalid UID length.\n");
1510
1511				break;
1512			case UID_IS_INTEGER:
1513
1514				sprintf(uid, "%d", e->uid);
1515
1516				break;
1517			case UID_IS_CHARACTER:
1518
1519				memcpy(uid, &e->uid, e->uidl);
1520				uid[e->uidl] = '\0';
1521
1522				break;
1523			default:
1524				break;
1525			}
1526
1527			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
1528			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n",
1529				    hid, uid, seg_id,
1530				    PCI_BUS_NUM(devid),
1531				    PCI_SLOT(devid),
1532				    PCI_FUNC(devid));
1533
1534			flags = e->flags;
1535
1536			ret = add_acpi_hid_device(hid, uid, &devid, false);
1537			if (ret)
1538				return ret;
1539
1540			/*
1541			 * add_special_device might update the devid in case a
1542			 * command-line override is present. So call
1543			 * set_dev_entry_from_acpi after add_special_device.
1544			 */
1545			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1546
1547			break;
1548		}
1549		default:
1550			break;
1551		}
1552
1553		p += ivhd_entry_length(p);
1554	}
1555
1556	return 0;
1557}
1558
1559/* Allocate PCI segment data structure */
1560static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
1561					  struct acpi_table_header *ivrs_base)
1562{
1563	struct amd_iommu_pci_seg *pci_seg;
1564	int last_bdf;
1565
1566	/*
1567	 * First parse ACPI tables to find the largest Bus/Dev/Func we need to
1568	 * handle in this PCI segment. Upon this information the shared data
1569	 * structures for the PCI segments in the system will be allocated.
1570	 */
1571	last_bdf = find_last_devid_acpi(ivrs_base, id);
1572	if (last_bdf < 0)
1573		return NULL;
1574
1575	pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL);
1576	if (pci_seg == NULL)
1577		return NULL;
1578
1579	pci_seg->last_bdf = last_bdf;
1580	DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf);
1581	pci_seg->dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf);
1582	pci_seg->alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf);
1583	pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf);
1584
1585	pci_seg->id = id;
1586	init_llist_head(&pci_seg->dev_data_list);
1587	INIT_LIST_HEAD(&pci_seg->unity_map);
1588	list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list);
1589
1590	if (alloc_dev_table(pci_seg))
1591		return NULL;
1592	if (alloc_alias_table(pci_seg))
1593		return NULL;
1594	if (alloc_rlookup_table(pci_seg))
1595		return NULL;
1596
1597	return pci_seg;
1598}
1599
1600static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id,
1601					struct acpi_table_header *ivrs_base)
1602{
1603	struct amd_iommu_pci_seg *pci_seg;
1604
1605	for_each_pci_segment(pci_seg) {
1606		if (pci_seg->id == id)
1607			return pci_seg;
1608	}
1609
1610	return alloc_pci_segment(id, ivrs_base);
1611}
1612
1613static void __init free_pci_segments(void)
1614{
1615	struct amd_iommu_pci_seg *pci_seg, *next;
1616
1617	for_each_pci_segment_safe(pci_seg, next) {
1618		list_del(&pci_seg->list);
1619		free_irq_lookup_table(pci_seg);
1620		free_rlookup_table(pci_seg);
1621		free_alias_table(pci_seg);
1622		free_dev_table(pci_seg);
1623		kfree(pci_seg);
1624	}
1625}
1626
1627static void __init free_sysfs(struct amd_iommu *iommu)
1628{
1629	if (iommu->iommu.dev) {
1630		iommu_device_unregister(&iommu->iommu);
1631		iommu_device_sysfs_remove(&iommu->iommu);
1632	}
1633}
1634
1635static void __init free_iommu_one(struct amd_iommu *iommu)
1636{
1637	free_sysfs(iommu);
1638	free_cwwb_sem(iommu);
1639	free_command_buffer(iommu);
1640	free_event_buffer(iommu);
1641	amd_iommu_free_ppr_log(iommu);
1642	free_ga_log(iommu);
1643	iommu_unmap_mmio_space(iommu);
1644	amd_iommu_iopf_uninit(iommu);
1645}
1646
1647static void __init free_iommu_all(void)
1648{
1649	struct amd_iommu *iommu, *next;
1650
1651	for_each_iommu_safe(iommu, next) {
1652		list_del(&iommu->list);
1653		free_iommu_one(iommu);
1654		kfree(iommu);
1655	}
1656}
1657
1658/*
1659 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1660 * Workaround:
1661 *     BIOS should disable L2B micellaneous clock gating by setting
1662 *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1663 */
1664static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1665{
1666	u32 value;
1667
1668	if ((boot_cpu_data.x86 != 0x15) ||
1669	    (boot_cpu_data.x86_model < 0x10) ||
1670	    (boot_cpu_data.x86_model > 0x1f))
1671		return;
1672
1673	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1674	pci_read_config_dword(iommu->dev, 0xf4, &value);
1675
1676	if (value & BIT(2))
1677		return;
1678
1679	/* Select NB indirect register 0x90 and enable writing */
1680	pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1681
1682	pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1683	pci_info(iommu->dev, "Applying erratum 746 workaround\n");
1684
1685	/* Clear the enable writing bit */
1686	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1687}
1688
1689/*
1690 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1691 * Workaround:
1692 *     BIOS should enable ATS write permission check by setting
1693 *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1694 */
1695static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1696{
1697	u32 value;
1698
1699	if ((boot_cpu_data.x86 != 0x15) ||
1700	    (boot_cpu_data.x86_model < 0x30) ||
1701	    (boot_cpu_data.x86_model > 0x3f))
1702		return;
1703
1704	/* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1705	value = iommu_read_l2(iommu, 0x47);
1706
1707	if (value & BIT(0))
1708		return;
1709
1710	/* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1711	iommu_write_l2(iommu, 0x47, value | BIT(0));
1712
1713	pci_info(iommu->dev, "Applying ATS write check workaround\n");
1714}
1715
1716/*
1717 * This function glues the initialization function for one IOMMU
1718 * together and also allocates the command buffer and programs the
1719 * hardware. It does NOT enable the IOMMU. This is done afterwards.
1720 */
1721static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
1722				 struct acpi_table_header *ivrs_base)
1723{
1724	struct amd_iommu_pci_seg *pci_seg;
1725
1726	pci_seg = get_pci_segment(h->pci_seg, ivrs_base);
1727	if (pci_seg == NULL)
1728		return -ENOMEM;
1729	iommu->pci_seg = pci_seg;
1730
1731	raw_spin_lock_init(&iommu->lock);
1732	atomic64_set(&iommu->cmd_sem_val, 0);
1733
1734	/* Add IOMMU to internal data structures */
1735	list_add_tail(&iommu->list, &amd_iommu_list);
1736	iommu->index = amd_iommus_present++;
1737
1738	if (unlikely(iommu->index >= MAX_IOMMUS)) {
1739		WARN(1, "System has more IOMMUs than supported by this driver\n");
1740		return -ENOSYS;
1741	}
1742
 
 
 
1743	/*
1744	 * Copy data from ACPI table entry to the iommu struct
1745	 */
1746	iommu->devid   = h->devid;
1747	iommu->cap_ptr = h->cap_ptr;
1748	iommu->mmio_phys = h->mmio_phys;
1749
1750	switch (h->type) {
1751	case 0x10:
1752		/* Check if IVHD EFR contains proper max banks/counters */
1753		if ((h->efr_attr != 0) &&
1754		    ((h->efr_attr & (0xF << 13)) != 0) &&
1755		    ((h->efr_attr & (0x3F << 17)) != 0))
1756			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1757		else
1758			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1759
1760		/*
1761		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1762		 * GAM also requires GA mode. Therefore, we need to
1763		 * check cmpxchg16b support before enabling it.
1764		 */
1765		if (!boot_cpu_has(X86_FEATURE_CX16) ||
1766		    ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
1767			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1768		break;
1769	case 0x11:
1770	case 0x40:
1771		if (h->efr_reg & (1 << 9))
1772			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1773		else
1774			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1775
1776		/*
1777		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1778		 * XT, GAM also requires GA mode. Therefore, we need to
1779		 * check cmpxchg16b support before enabling them.
1780		 */
1781		if (!boot_cpu_has(X86_FEATURE_CX16) ||
1782		    ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) {
1783			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1784			break;
1785		}
1786
1787		if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
1788			amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
1789
1790		early_iommu_features_init(iommu, h);
1791
1792		break;
1793	default:
1794		return -EINVAL;
1795	}
1796
1797	iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1798						iommu->mmio_phys_end);
1799	if (!iommu->mmio_base)
1800		return -ENOMEM;
1801
1802	return init_iommu_from_acpi(iommu, h);
1803}
1804
1805static int __init init_iommu_one_late(struct amd_iommu *iommu)
1806{
1807	int ret;
1808
1809	if (alloc_cwwb_sem(iommu))
1810		return -ENOMEM;
1811
1812	if (alloc_command_buffer(iommu))
1813		return -ENOMEM;
1814
1815	if (alloc_event_buffer(iommu))
1816		return -ENOMEM;
1817
1818	iommu->int_enabled = false;
1819
1820	init_translation_status(iommu);
1821	if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1822		iommu_disable(iommu);
1823		clear_translation_pre_enabled(iommu);
1824		pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1825			iommu->index);
1826	}
1827	if (amd_iommu_pre_enabled)
1828		amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1829
1830	if (amd_iommu_irq_remap) {
1831		ret = amd_iommu_create_irq_domain(iommu);
1832		if (ret)
1833			return ret;
1834	}
1835
1836	/*
1837	 * Make sure IOMMU is not considered to translate itself. The IVRS
1838	 * table tells us so, but this is a lie!
1839	 */
1840	iommu->pci_seg->rlookup_table[iommu->devid] = NULL;
1841
1842	return 0;
1843}
1844
1845/**
1846 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
1847 * @ivrs: Pointer to the IVRS header
1848 *
1849 * This function search through all IVDB of the maximum supported IVHD
1850 */
1851static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
1852{
1853	u8 *base = (u8 *)ivrs;
1854	struct ivhd_header *ivhd = (struct ivhd_header *)
1855					(base + IVRS_HEADER_LENGTH);
1856	u8 last_type = ivhd->type;
1857	u16 devid = ivhd->devid;
1858
1859	while (((u8 *)ivhd - base < ivrs->length) &&
1860	       (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
1861		u8 *p = (u8 *) ivhd;
1862
1863		if (ivhd->devid == devid)
1864			last_type = ivhd->type;
1865		ivhd = (struct ivhd_header *)(p + ivhd->length);
1866	}
1867
1868	return last_type;
1869}
1870
1871/*
1872 * Iterates over all IOMMU entries in the ACPI table, allocates the
1873 * IOMMU structure and initializes it with init_iommu_one()
1874 */
1875static int __init init_iommu_all(struct acpi_table_header *table)
1876{
1877	u8 *p = (u8 *)table, *end = (u8 *)table;
1878	struct ivhd_header *h;
1879	struct amd_iommu *iommu;
1880	int ret;
1881
1882	end += table->length;
1883	p += IVRS_HEADER_LENGTH;
1884
1885	/* Phase 1: Process all IVHD blocks */
1886	while (p < end) {
1887		h = (struct ivhd_header *)p;
1888		if (*p == amd_iommu_target_ivhd_type) {
1889
1890			DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x "
1891				    "flags: %01x info %04x\n",
1892				    h->pci_seg, PCI_BUS_NUM(h->devid),
1893				    PCI_SLOT(h->devid), PCI_FUNC(h->devid),
1894				    h->cap_ptr, h->flags, h->info);
1895			DUMP_printk("       mmio-addr: %016llx\n",
1896				    h->mmio_phys);
1897
1898			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1899			if (iommu == NULL)
1900				return -ENOMEM;
1901
1902			ret = init_iommu_one(iommu, h, table);
1903			if (ret)
1904				return ret;
1905		}
1906		p += h->length;
1907
1908	}
1909	WARN_ON(p != end);
1910
1911	/* Phase 2 : Early feature support check */
1912	get_global_efr();
1913
1914	/* Phase 3 : Enabling IOMMU features */
1915	for_each_iommu(iommu) {
1916		ret = init_iommu_one_late(iommu);
1917		if (ret)
1918			return ret;
1919	}
1920
1921	return 0;
1922}
1923
1924static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1925{
1926	u64 val;
1927	struct pci_dev *pdev = iommu->dev;
1928
1929	if (!check_feature(FEATURE_PC))
1930		return;
1931
1932	amd_iommu_pc_present = true;
1933
1934	pci_info(pdev, "IOMMU performance counters supported\n");
1935
1936	val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1937	iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1938	iommu->max_counters = (u8) ((val >> 7) & 0xf);
1939
1940	return;
1941}
1942
1943static ssize_t amd_iommu_show_cap(struct device *dev,
1944				  struct device_attribute *attr,
1945				  char *buf)
1946{
1947	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1948	return sysfs_emit(buf, "%x\n", iommu->cap);
1949}
1950static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
1951
1952static ssize_t amd_iommu_show_features(struct device *dev,
1953				       struct device_attribute *attr,
1954				       char *buf)
1955{
1956	return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2);
 
1957}
1958static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
1959
1960static struct attribute *amd_iommu_attrs[] = {
1961	&dev_attr_cap.attr,
1962	&dev_attr_features.attr,
1963	NULL,
1964};
1965
1966static struct attribute_group amd_iommu_group = {
1967	.name = "amd-iommu",
1968	.attrs = amd_iommu_attrs,
1969};
1970
1971static const struct attribute_group *amd_iommu_groups[] = {
1972	&amd_iommu_group,
1973	NULL,
1974};
1975
1976/*
1977 * Note: IVHD 0x11 and 0x40 also contains exact copy
1978 * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
1979 * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
1980 */
1981static void __init late_iommu_features_init(struct amd_iommu *iommu)
1982{
1983	u64 features, features2;
1984
1985	if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
1986		return;
1987
1988	/* read extended feature bits */
1989	features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
1990	features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
1991
1992	if (!amd_iommu_efr) {
1993		amd_iommu_efr = features;
1994		amd_iommu_efr2 = features2;
1995		return;
1996	}
1997
1998	/*
1999	 * Sanity check and warn if EFR values from
2000	 * IVHD and MMIO conflict.
2001	 */
2002	if (features != amd_iommu_efr ||
2003	    features2 != amd_iommu_efr2) {
2004		pr_warn(FW_WARN
2005			"EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
2006			features, amd_iommu_efr,
2007			features2, amd_iommu_efr2);
2008	}
2009}
2010
2011static int __init iommu_init_pci(struct amd_iommu *iommu)
2012{
2013	int cap_ptr = iommu->cap_ptr;
2014	int ret;
2015
2016	iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2017						 PCI_BUS_NUM(iommu->devid),
2018						 iommu->devid & 0xff);
2019	if (!iommu->dev)
2020		return -ENODEV;
2021
2022	/* Prevent binding other PCI device drivers to IOMMU devices */
2023	iommu->dev->match_driver = false;
2024
2025	/* ACPI _PRT won't have an IRQ for IOMMU */
2026	iommu->dev->irq_managed = 1;
2027
2028	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
2029			      &iommu->cap);
2030
2031	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
2032		amd_iommu_iotlb_sup = false;
2033
2034	late_iommu_features_init(iommu);
2035
2036	if (check_feature(FEATURE_GT)) {
2037		int glxval;
 
2038		u64 pasmax;
2039
2040		pasmax = FIELD_GET(FEATURE_PASMAX, amd_iommu_efr);
2041		iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1;
 
 
 
2042
2043		BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK);
2044
2045		glxval = FIELD_GET(FEATURE_GLX, amd_iommu_efr);
 
2046
2047		if (amd_iommu_max_glx_val == -1)
2048			amd_iommu_max_glx_val = glxval;
2049		else
2050			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
 
2051
2052		iommu_enable_gt(iommu);
 
 
 
2053	}
2054
2055	if (check_feature(FEATURE_PPR) && amd_iommu_alloc_ppr_log(iommu))
2056		return -ENOMEM;
2057
2058	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
2059		pr_info("Using strict mode due to virtualization\n");
2060		iommu_set_dma_strict();
2061		amd_iommu_np_cache = true;
2062	}
2063
2064	init_iommu_perf_ctr(iommu);
2065
 
 
 
 
 
 
 
 
 
 
 
2066	if (is_rd890_iommu(iommu->dev)) {
2067		int i, j;
2068
2069		iommu->root_pdev =
2070			pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2071						    iommu->dev->bus->number,
2072						    PCI_DEVFN(0, 0));
2073
2074		/*
2075		 * Some rd890 systems may not be fully reconfigured by the
2076		 * BIOS, so it's necessary for us to store this information so
2077		 * it can be reprogrammed on resume
2078		 */
2079		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
2080				&iommu->stored_addr_lo);
2081		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
2082				&iommu->stored_addr_hi);
2083
2084		/* Low bit locks writes to configuration space */
2085		iommu->stored_addr_lo &= ~1;
2086
2087		for (i = 0; i < 6; i++)
2088			for (j = 0; j < 0x12; j++)
2089				iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
2090
2091		for (i = 0; i < 0x83; i++)
2092			iommu->stored_l2[i] = iommu_read_l2(iommu, i);
2093	}
2094
2095	amd_iommu_erratum_746_workaround(iommu);
2096	amd_iommu_ats_write_check_workaround(iommu);
2097
2098	ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
2099			       amd_iommu_groups, "ivhd%d", iommu->index);
2100	if (ret)
2101		return ret;
2102
2103	/*
2104	 * Allocate per IOMMU IOPF queue here so that in attach device path,
2105	 * PRI capable device can be added to IOPF queue
2106	 */
2107	if (amd_iommu_gt_ppr_supported()) {
2108		ret = amd_iommu_iopf_init(iommu);
2109		if (ret)
2110			return ret;
2111	}
2112
2113	iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
2114
2115	return pci_enable_device(iommu->dev);
2116}
2117
2118static void print_iommu_info(void)
2119{
2120	int i;
2121	static const char * const feat_str[] = {
2122		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
2123		"IA", "GA", "HE", "PC"
2124	};
 
2125
2126	if (amd_iommu_efr) {
2127		pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2);
 
2128
2129		for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
2130			if (check_feature(1ULL << i))
2131				pr_cont(" %s", feat_str[i]);
2132		}
2133
2134		if (check_feature(FEATURE_GAM_VAPIC))
2135			pr_cont(" GA_vAPIC");
 
 
2136
2137		if (check_feature(FEATURE_SNP))
2138			pr_cont(" SNP");
2139
2140		pr_cont("\n");
2141	}
2142
 
 
 
2143	if (irq_remapping_enabled) {
2144		pr_info("Interrupt remapping enabled\n");
2145		if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2146			pr_info("X2APIC enabled\n");
2147	}
2148	if (amd_iommu_pgtable == PD_MODE_V2) {
2149		pr_info("V2 page table enabled (Paging mode : %d level)\n",
2150			amd_iommu_gpt_level);
2151	}
2152}
2153
2154static int __init amd_iommu_init_pci(void)
2155{
2156	struct amd_iommu *iommu;
2157	struct amd_iommu_pci_seg *pci_seg;
2158	int ret;
2159
2160	/* Init global identity domain before registering IOMMU */
2161	amd_iommu_init_identity_domain();
2162
2163	for_each_iommu(iommu) {
2164		ret = iommu_init_pci(iommu);
2165		if (ret) {
2166			pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n",
2167			       iommu->index, ret);
2168			goto out;
2169		}
2170		/* Need to setup range after PCI init */
2171		iommu_set_cwwb_range(iommu);
2172	}
2173
2174	/*
2175	 * Order is important here to make sure any unity map requirements are
2176	 * fulfilled. The unity mappings are created and written to the device
2177	 * table during the iommu_init_pci() call.
2178	 *
2179	 * After that we call init_device_table_dma() to make sure any
2180	 * uninitialized DTE will block DMA, and in the end we flush the caches
2181	 * of all IOMMUs to make sure the changes to the device table are
2182	 * active.
2183	 */
2184	for_each_pci_segment(pci_seg)
2185		init_device_table_dma(pci_seg);
2186
2187	for_each_iommu(iommu)
2188		amd_iommu_flush_all_caches(iommu);
2189
2190	print_iommu_info();
2191
2192out:
2193	return ret;
2194}
2195
2196/****************************************************************************
2197 *
2198 * The following functions initialize the MSI interrupts for all IOMMUs
2199 * in the system. It's a bit challenging because there could be multiple
2200 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
2201 * pci_dev.
2202 *
2203 ****************************************************************************/
2204
2205static int iommu_setup_msi(struct amd_iommu *iommu)
2206{
2207	int r;
2208
2209	r = pci_enable_msi(iommu->dev);
2210	if (r)
2211		return r;
2212
2213	r = request_threaded_irq(iommu->dev->irq,
2214				 amd_iommu_int_handler,
2215				 amd_iommu_int_thread,
2216				 0, "AMD-Vi",
2217				 iommu);
2218
2219	if (r) {
2220		pci_disable_msi(iommu->dev);
2221		return r;
2222	}
2223
2224	return 0;
2225}
2226
2227union intcapxt {
2228	u64	capxt;
2229	struct {
2230		u64	reserved_0		:  2,
2231			dest_mode_logical	:  1,
2232			reserved_1		:  5,
2233			destid_0_23		: 24,
2234			vector			:  8,
2235			reserved_2		: 16,
2236			destid_24_31		:  8;
2237	};
2238} __attribute__ ((packed));
2239
2240
2241static struct irq_chip intcapxt_controller;
2242
2243static int intcapxt_irqdomain_activate(struct irq_domain *domain,
2244				       struct irq_data *irqd, bool reserve)
2245{
2246	return 0;
2247}
2248
2249static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
2250					  struct irq_data *irqd)
2251{
2252}
2253
2254
2255static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
2256				    unsigned int nr_irqs, void *arg)
2257{
2258	struct irq_alloc_info *info = arg;
2259	int i, ret;
2260
2261	if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI)
2262		return -EINVAL;
2263
2264	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
2265	if (ret < 0)
2266		return ret;
2267
2268	for (i = virq; i < virq + nr_irqs; i++) {
2269		struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
2270
2271		irqd->chip = &intcapxt_controller;
2272		irqd->hwirq = info->hwirq;
2273		irqd->chip_data = info->data;
2274		__irq_set_handler(i, handle_edge_irq, 0, "edge");
2275	}
2276
2277	return ret;
2278}
2279
2280static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2281				    unsigned int nr_irqs)
2282{
2283	irq_domain_free_irqs_top(domain, virq, nr_irqs);
2284}
2285
2286
2287static void intcapxt_unmask_irq(struct irq_data *irqd)
2288{
2289	struct amd_iommu *iommu = irqd->chip_data;
2290	struct irq_cfg *cfg = irqd_cfg(irqd);
2291	union intcapxt xt;
2292
2293	xt.capxt = 0ULL;
2294	xt.dest_mode_logical = apic->dest_mode_logical;
2295	xt.vector = cfg->vector;
2296	xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
2297	xt.destid_24_31 = cfg->dest_apicid >> 24;
2298
2299	writeq(xt.capxt, iommu->mmio_base + irqd->hwirq);
 
 
 
 
 
 
2300}
2301
2302static void intcapxt_mask_irq(struct irq_data *irqd)
2303{
2304	struct amd_iommu *iommu = irqd->chip_data;
2305
2306	writeq(0, iommu->mmio_base + irqd->hwirq);
 
 
2307}
2308
2309
2310static int intcapxt_set_affinity(struct irq_data *irqd,
2311				 const struct cpumask *mask, bool force)
2312{
2313	struct irq_data *parent = irqd->parent_data;
2314	int ret;
2315
2316	ret = parent->chip->irq_set_affinity(parent, mask, force);
2317	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
2318		return ret;
2319	return 0;
2320}
2321
2322static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on)
2323{
2324	return on ? -EOPNOTSUPP : 0;
2325}
2326
2327static struct irq_chip intcapxt_controller = {
2328	.name			= "IOMMU-MSI",
2329	.irq_unmask		= intcapxt_unmask_irq,
2330	.irq_mask		= intcapxt_mask_irq,
2331	.irq_ack		= irq_chip_ack_parent,
2332	.irq_retrigger		= irq_chip_retrigger_hierarchy,
2333	.irq_set_affinity       = intcapxt_set_affinity,
2334	.irq_set_wake		= intcapxt_set_wake,
2335	.flags			= IRQCHIP_MASK_ON_SUSPEND,
2336};
2337
2338static const struct irq_domain_ops intcapxt_domain_ops = {
2339	.alloc			= intcapxt_irqdomain_alloc,
2340	.free			= intcapxt_irqdomain_free,
2341	.activate		= intcapxt_irqdomain_activate,
2342	.deactivate		= intcapxt_irqdomain_deactivate,
2343};
2344
2345
2346static struct irq_domain *iommu_irqdomain;
2347
2348static struct irq_domain *iommu_get_irqdomain(void)
2349{
2350	struct fwnode_handle *fn;
2351
2352	/* No need for locking here (yet) as the init is single-threaded */
2353	if (iommu_irqdomain)
2354		return iommu_irqdomain;
2355
2356	fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI");
2357	if (!fn)
2358		return NULL;
2359
2360	iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0,
2361						      fn, &intcapxt_domain_ops,
2362						      NULL);
2363	if (!iommu_irqdomain)
2364		irq_domain_free_fwnode(fn);
2365
2366	return iommu_irqdomain;
2367}
2368
2369static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname,
2370				  int hwirq, irq_handler_t thread_fn)
2371{
2372	struct irq_domain *domain;
2373	struct irq_alloc_info info;
2374	int irq, ret;
2375	int node = dev_to_node(&iommu->dev->dev);
2376
2377	domain = iommu_get_irqdomain();
2378	if (!domain)
2379		return -ENXIO;
2380
2381	init_irq_alloc_info(&info, NULL);
2382	info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
2383	info.data = iommu;
2384	info.hwirq = hwirq;
2385
2386	irq = irq_domain_alloc_irqs(domain, 1, node, &info);
2387	if (irq < 0) {
2388		irq_domain_remove(domain);
2389		return irq;
2390	}
2391
2392	ret = request_threaded_irq(irq, amd_iommu_int_handler,
2393				   thread_fn, 0, devname, iommu);
2394	if (ret) {
2395		irq_domain_free_irqs(irq, 1);
2396		irq_domain_remove(domain);
2397		return ret;
2398	}
2399
2400	return 0;
2401}
2402
2403static int iommu_setup_intcapxt(struct amd_iommu *iommu)
2404{
2405	int ret;
2406
2407	snprintf(iommu->evt_irq_name, sizeof(iommu->evt_irq_name),
2408		 "AMD-Vi%d-Evt", iommu->index);
2409	ret = __iommu_setup_intcapxt(iommu, iommu->evt_irq_name,
2410				     MMIO_INTCAPXT_EVT_OFFSET,
2411				     amd_iommu_int_thread_evtlog);
2412	if (ret)
2413		return ret;
2414
2415	snprintf(iommu->ppr_irq_name, sizeof(iommu->ppr_irq_name),
2416		 "AMD-Vi%d-PPR", iommu->index);
2417	ret = __iommu_setup_intcapxt(iommu, iommu->ppr_irq_name,
2418				     MMIO_INTCAPXT_PPR_OFFSET,
2419				     amd_iommu_int_thread_pprlog);
2420	if (ret)
2421		return ret;
2422
2423#ifdef CONFIG_IRQ_REMAP
2424	snprintf(iommu->ga_irq_name, sizeof(iommu->ga_irq_name),
2425		 "AMD-Vi%d-GA", iommu->index);
2426	ret = __iommu_setup_intcapxt(iommu, iommu->ga_irq_name,
2427				     MMIO_INTCAPXT_GALOG_OFFSET,
2428				     amd_iommu_int_thread_galog);
2429#endif
2430
2431	return ret;
2432}
2433
2434static int iommu_init_irq(struct amd_iommu *iommu)
2435{
2436	int ret;
2437
2438	if (iommu->int_enabled)
2439		goto enable_faults;
2440
2441	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2442		ret = iommu_setup_intcapxt(iommu);
2443	else if (iommu->dev->msi_cap)
2444		ret = iommu_setup_msi(iommu);
2445	else
2446		ret = -ENODEV;
2447
2448	if (ret)
2449		return ret;
2450
2451	iommu->int_enabled = true;
2452enable_faults:
2453
2454	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2455		iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
2456
2457	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
2458
 
 
2459	return 0;
2460}
2461
2462/****************************************************************************
2463 *
2464 * The next functions belong to the third pass of parsing the ACPI
2465 * table. In this last pass the memory mapping requirements are
2466 * gathered (like exclusion and unity mapping ranges).
2467 *
2468 ****************************************************************************/
2469
2470static void __init free_unity_maps(void)
2471{
2472	struct unity_map_entry *entry, *next;
2473	struct amd_iommu_pci_seg *p, *pci_seg;
2474
2475	for_each_pci_segment_safe(pci_seg, p) {
2476		list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) {
2477			list_del(&entry->list);
2478			kfree(entry);
2479		}
2480	}
2481}
2482
2483/* called for unity map ACPI definition */
2484static int __init init_unity_map_range(struct ivmd_header *m,
2485				       struct acpi_table_header *ivrs_base)
2486{
2487	struct unity_map_entry *e = NULL;
2488	struct amd_iommu_pci_seg *pci_seg;
2489	char *s;
2490
2491	pci_seg = get_pci_segment(m->pci_seg, ivrs_base);
2492	if (pci_seg == NULL)
2493		return -ENOMEM;
2494
2495	e = kzalloc(sizeof(*e), GFP_KERNEL);
2496	if (e == NULL)
2497		return -ENOMEM;
2498
2499	switch (m->type) {
2500	default:
2501		kfree(e);
2502		return 0;
2503	case ACPI_IVMD_TYPE:
2504		s = "IVMD_TYPEi\t\t\t";
2505		e->devid_start = e->devid_end = m->devid;
2506		break;
2507	case ACPI_IVMD_TYPE_ALL:
2508		s = "IVMD_TYPE_ALL\t\t";
2509		e->devid_start = 0;
2510		e->devid_end = pci_seg->last_bdf;
2511		break;
2512	case ACPI_IVMD_TYPE_RANGE:
2513		s = "IVMD_TYPE_RANGE\t\t";
2514		e->devid_start = m->devid;
2515		e->devid_end = m->aux;
2516		break;
2517	}
2518	e->address_start = PAGE_ALIGN(m->range_start);
2519	e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2520	e->prot = m->flags >> 1;
2521
2522	/*
2523	 * Treat per-device exclusion ranges as r/w unity-mapped regions
2524	 * since some buggy BIOSes might lead to the overwritten exclusion
2525	 * range (exclusion_start and exclusion_length members). This
2526	 * happens when there are multiple exclusion ranges (IVMD entries)
2527	 * defined in ACPI table.
2528	 */
2529	if (m->flags & IVMD_FLAG_EXCL_RANGE)
2530		e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
2531
2532	DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: "
2533		    "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx"
2534		    " flags: %x\n", s, m->pci_seg,
2535		    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2536		    PCI_FUNC(e->devid_start), m->pci_seg,
2537		    PCI_BUS_NUM(e->devid_end),
2538		    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2539		    e->address_start, e->address_end, m->flags);
2540
2541	list_add_tail(&e->list, &pci_seg->unity_map);
2542
2543	return 0;
2544}
2545
2546/* iterates over all memory definitions we find in the ACPI table */
2547static int __init init_memory_definitions(struct acpi_table_header *table)
2548{
2549	u8 *p = (u8 *)table, *end = (u8 *)table;
2550	struct ivmd_header *m;
2551
2552	end += table->length;
2553	p += IVRS_HEADER_LENGTH;
2554
2555	while (p < end) {
2556		m = (struct ivmd_header *)p;
2557		if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
2558			init_unity_map_range(m, table);
2559
2560		p += m->length;
2561	}
2562
2563	return 0;
2564}
2565
2566/*
2567 * Init the device table to not allow DMA access for devices
2568 */
2569static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2570{
2571	u32 devid;
2572	struct dev_table_entry *dev_table = pci_seg->dev_table;
2573
2574	if (dev_table == NULL)
2575		return;
2576
2577	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2578		__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID);
2579		if (!amd_iommu_snp_en)
2580			__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION);
2581	}
2582}
2583
2584static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2585{
2586	u32 devid;
2587	struct dev_table_entry *dev_table = pci_seg->dev_table;
2588
2589	if (dev_table == NULL)
2590		return;
2591
2592	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2593		dev_table[devid].data[0] = 0ULL;
2594		dev_table[devid].data[1] = 0ULL;
2595	}
2596}
2597
2598static void init_device_table(void)
2599{
2600	struct amd_iommu_pci_seg *pci_seg;
2601	u32 devid;
2602
2603	if (!amd_iommu_irq_remap)
2604		return;
2605
2606	for_each_pci_segment(pci_seg) {
2607		for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
2608			__set_dev_entry_bit(pci_seg->dev_table,
2609					    devid, DEV_ENTRY_IRQ_TBL_EN);
2610	}
2611}
2612
2613static void iommu_init_flags(struct amd_iommu *iommu)
2614{
2615	iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2616		iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2617		iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2618
2619	iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2620		iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2621		iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2622
2623	iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2624		iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2625		iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2626
2627	iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2628		iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2629		iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2630
2631	/*
2632	 * make IOMMU memory accesses cache coherent
2633	 */
2634	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2635
2636	/* Set IOTLB invalidation timeout to 1s */
2637	iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
2638
2639	/* Enable Enhanced Peripheral Page Request Handling */
2640	if (check_feature(FEATURE_EPHSUP))
2641		iommu_feature_enable(iommu, CONTROL_EPH_EN);
2642}
2643
2644static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2645{
2646	int i, j;
2647	u32 ioc_feature_control;
2648	struct pci_dev *pdev = iommu->root_pdev;
2649
2650	/* RD890 BIOSes may not have completely reconfigured the iommu */
2651	if (!is_rd890_iommu(iommu->dev) || !pdev)
2652		return;
2653
2654	/*
2655	 * First, we need to ensure that the iommu is enabled. This is
2656	 * controlled by a register in the northbridge
2657	 */
2658
2659	/* Select Northbridge indirect register 0x75 and enable writing */
2660	pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2661	pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2662
2663	/* Enable the iommu */
2664	if (!(ioc_feature_control & 0x1))
2665		pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2666
2667	/* Restore the iommu BAR */
2668	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2669			       iommu->stored_addr_lo);
2670	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2671			       iommu->stored_addr_hi);
2672
2673	/* Restore the l1 indirect regs for each of the 6 l1s */
2674	for (i = 0; i < 6; i++)
2675		for (j = 0; j < 0x12; j++)
2676			iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2677
2678	/* Restore the l2 indirect regs */
2679	for (i = 0; i < 0x83; i++)
2680		iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2681
2682	/* Lock PCI setup registers */
2683	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2684			       iommu->stored_addr_lo | 1);
2685}
2686
2687static void iommu_enable_ga(struct amd_iommu *iommu)
2688{
2689#ifdef CONFIG_IRQ_REMAP
2690	switch (amd_iommu_guest_ir) {
2691	case AMD_IOMMU_GUEST_IR_VAPIC:
2692	case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2693		iommu_feature_enable(iommu, CONTROL_GA_EN);
2694		iommu->irte_ops = &irte_128_ops;
2695		break;
2696	default:
2697		iommu->irte_ops = &irte_32_ops;
2698		break;
2699	}
2700#endif
2701}
2702
2703static void iommu_disable_irtcachedis(struct amd_iommu *iommu)
2704{
2705	iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
2706}
2707
2708static void iommu_enable_irtcachedis(struct amd_iommu *iommu)
2709{
2710	u64 ctrl;
2711
2712	if (!amd_iommu_irtcachedis)
2713		return;
2714
2715	/*
2716	 * Note:
2717	 * The support for IRTCacheDis feature is dertermined by
2718	 * checking if the bit is writable.
2719	 */
2720	iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS);
2721	ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
2722	ctrl &= (1ULL << CONTROL_IRTCACHEDIS);
2723	if (ctrl)
2724		iommu->irtcachedis_enabled = true;
2725	pr_info("iommu%d (%#06x) : IRT cache is %s\n",
2726		iommu->index, iommu->devid,
2727		iommu->irtcachedis_enabled ? "disabled" : "enabled");
2728}
2729
2730static void early_enable_iommu(struct amd_iommu *iommu)
2731{
2732	iommu_disable(iommu);
2733	iommu_init_flags(iommu);
2734	iommu_set_device_table(iommu);
2735	iommu_enable_command_buffer(iommu);
2736	iommu_enable_event_buffer(iommu);
2737	iommu_set_exclusion_range(iommu);
2738	iommu_enable_gt(iommu);
2739	iommu_enable_ga(iommu);
2740	iommu_enable_xt(iommu);
2741	iommu_enable_irtcachedis(iommu);
2742	iommu_enable(iommu);
2743	amd_iommu_flush_all_caches(iommu);
2744}
2745
2746/*
2747 * This function finally enables all IOMMUs found in the system after
2748 * they have been initialized.
2749 *
2750 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
2751 * the old content of device table entries. Not this case or copy failed,
2752 * just continue as normal kernel does.
2753 */
2754static void early_enable_iommus(void)
2755{
2756	struct amd_iommu *iommu;
2757	struct amd_iommu_pci_seg *pci_seg;
2758
2759	if (!copy_device_table()) {
2760		/*
2761		 * If come here because of failure in copying device table from old
2762		 * kernel with all IOMMUs enabled, print error message and try to
2763		 * free allocated old_dev_tbl_cpy.
2764		 */
2765		if (amd_iommu_pre_enabled)
2766			pr_err("Failed to copy DEV table from previous kernel.\n");
2767
2768		for_each_pci_segment(pci_seg) {
2769			if (pci_seg->old_dev_tbl_cpy != NULL) {
2770				iommu_free_pages(pci_seg->old_dev_tbl_cpy,
2771						 get_order(pci_seg->dev_table_size));
2772				pci_seg->old_dev_tbl_cpy = NULL;
2773			}
2774		}
2775
2776		for_each_iommu(iommu) {
2777			clear_translation_pre_enabled(iommu);
2778			early_enable_iommu(iommu);
2779		}
2780	} else {
2781		pr_info("Copied DEV table from previous kernel.\n");
2782
2783		for_each_pci_segment(pci_seg) {
2784			iommu_free_pages(pci_seg->dev_table,
2785					 get_order(pci_seg->dev_table_size));
2786			pci_seg->dev_table = pci_seg->old_dev_tbl_cpy;
2787		}
2788
2789		for_each_iommu(iommu) {
2790			iommu_disable_command_buffer(iommu);
2791			iommu_disable_event_buffer(iommu);
2792			iommu_disable_irtcachedis(iommu);
2793			iommu_enable_command_buffer(iommu);
2794			iommu_enable_event_buffer(iommu);
2795			iommu_enable_ga(iommu);
2796			iommu_enable_xt(iommu);
2797			iommu_enable_irtcachedis(iommu);
2798			iommu_set_device_table(iommu);
2799			amd_iommu_flush_all_caches(iommu);
2800		}
2801	}
2802}
2803
2804static void enable_iommus_ppr(void)
2805{
2806	struct amd_iommu *iommu;
2807
2808	if (!amd_iommu_gt_ppr_supported())
2809		return;
2810
2811	for_each_iommu(iommu)
2812		amd_iommu_enable_ppr_log(iommu);
2813}
2814
2815static void enable_iommus_vapic(void)
2816{
2817#ifdef CONFIG_IRQ_REMAP
2818	u32 status, i;
2819	struct amd_iommu *iommu;
2820
2821	for_each_iommu(iommu) {
2822		/*
2823		 * Disable GALog if already running. It could have been enabled
2824		 * in the previous boot before kdump.
2825		 */
2826		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2827		if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2828			continue;
2829
2830		iommu_feature_disable(iommu, CONTROL_GALOG_EN);
2831		iommu_feature_disable(iommu, CONTROL_GAINT_EN);
2832
2833		/*
2834		 * Need to set and poll check the GALOGRun bit to zero before
2835		 * we can set/ modify GA Log registers safely.
2836		 */
2837		for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
2838			status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2839			if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2840				break;
2841			udelay(10);
2842		}
2843
2844		if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
2845			return;
2846	}
2847
2848	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
2849	    !check_feature(FEATURE_GAM_VAPIC)) {
2850		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2851		return;
2852	}
2853
2854	if (amd_iommu_snp_en &&
2855	    !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) {
2856		pr_warn("Force to disable Virtual APIC due to SNP\n");
2857		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2858		return;
2859	}
2860
2861	/* Enabling GAM and SNPAVIC support */
2862	for_each_iommu(iommu) {
2863		if (iommu_init_ga_log(iommu) ||
2864		    iommu_ga_log_enable(iommu))
2865			return;
2866
2867		iommu_feature_enable(iommu, CONTROL_GAM_EN);
2868		if (amd_iommu_snp_en)
2869			iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN);
2870	}
2871
2872	amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
2873	pr_info("Virtual APIC enabled\n");
2874#endif
2875}
2876
 
 
 
 
 
 
 
2877static void disable_iommus(void)
2878{
2879	struct amd_iommu *iommu;
2880
2881	for_each_iommu(iommu)
2882		iommu_disable(iommu);
2883
2884#ifdef CONFIG_IRQ_REMAP
2885	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2886		amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
2887#endif
2888}
2889
2890/*
2891 * Suspend/Resume support
2892 * disable suspend until real resume implemented
2893 */
2894
2895static void amd_iommu_resume(void)
2896{
2897	struct amd_iommu *iommu;
2898
2899	for_each_iommu(iommu)
2900		iommu_apply_resume_quirks(iommu);
2901
2902	/* re-load the hardware */
2903	for_each_iommu(iommu)
2904		early_enable_iommu(iommu);
2905
2906	amd_iommu_enable_interrupts();
2907}
2908
2909static int amd_iommu_suspend(void)
2910{
2911	/* disable IOMMUs to go out of the way for BIOS */
2912	disable_iommus();
2913
2914	return 0;
2915}
2916
2917static struct syscore_ops amd_iommu_syscore_ops = {
2918	.suspend = amd_iommu_suspend,
2919	.resume = amd_iommu_resume,
2920};
2921
2922static void __init free_iommu_resources(void)
2923{
2924	kmem_cache_destroy(amd_iommu_irq_cache);
2925	amd_iommu_irq_cache = NULL;
2926
2927	free_iommu_all();
2928	free_pci_segments();
2929}
2930
2931/* SB IOAPIC is always on this device in AMD systems */
2932#define IOAPIC_SB_DEVID		((0x00 << 8) | PCI_DEVFN(0x14, 0))
2933
2934static bool __init check_ioapic_information(void)
2935{
2936	const char *fw_bug = FW_BUG;
2937	bool ret, has_sb_ioapic;
2938	int idx;
2939
2940	has_sb_ioapic = false;
2941	ret           = false;
2942
2943	/*
2944	 * If we have map overrides on the kernel command line the
2945	 * messages in this function might not describe firmware bugs
2946	 * anymore - so be careful
2947	 */
2948	if (cmdline_maps)
2949		fw_bug = "";
2950
2951	for (idx = 0; idx < nr_ioapics; idx++) {
2952		int devid, id = mpc_ioapic_id(idx);
2953
2954		devid = get_ioapic_devid(id);
2955		if (devid < 0) {
2956			pr_err("%s: IOAPIC[%d] not in IVRS table\n",
2957				fw_bug, id);
2958			ret = false;
2959		} else if (devid == IOAPIC_SB_DEVID) {
2960			has_sb_ioapic = true;
2961			ret           = true;
2962		}
2963	}
2964
2965	if (!has_sb_ioapic) {
2966		/*
2967		 * We expect the SB IOAPIC to be listed in the IVRS
2968		 * table. The system timer is connected to the SB IOAPIC
2969		 * and if we don't have it in the list the system will
2970		 * panic at boot time.  This situation usually happens
2971		 * when the BIOS is buggy and provides us the wrong
2972		 * device id for the IOAPIC in the system.
2973		 */
2974		pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
2975	}
2976
2977	if (!ret)
2978		pr_err("Disabling interrupt remapping\n");
2979
2980	return ret;
2981}
2982
2983static void __init free_dma_resources(void)
2984{
2985	ida_destroy(&pdom_ids);
 
 
2986
2987	free_unity_maps();
2988}
2989
2990static void __init ivinfo_init(void *ivrs)
2991{
2992	amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
2993}
2994
2995/*
2996 * This is the hardware init function for AMD IOMMU in the system.
2997 * This function is called either from amd_iommu_init or from the interrupt
2998 * remapping setup code.
2999 *
3000 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
3001 * four times:
3002 *
3003 *	1 pass) Discover the most comprehensive IVHD type to use.
3004 *
3005 *	2 pass) Find the highest PCI device id the driver has to handle.
3006 *		Upon this information the size of the data structures is
3007 *		determined that needs to be allocated.
3008 *
3009 *	3 pass) Initialize the data structures just allocated with the
3010 *		information in the ACPI table about available AMD IOMMUs
3011 *		in the system. It also maps the PCI devices in the
3012 *		system to specific IOMMUs
3013 *
3014 *	4 pass) After the basic data structures are allocated and
3015 *		initialized we update them with information about memory
3016 *		remapping requirements parsed out of the ACPI table in
3017 *		this last pass.
3018 *
3019 * After everything is set up the IOMMUs are enabled and the necessary
3020 * hotplug and suspend notifiers are registered.
3021 */
3022static int __init early_amd_iommu_init(void)
3023{
3024	struct acpi_table_header *ivrs_base;
3025	int remap_cache_sz, ret;
3026	acpi_status status;
3027
3028	if (!amd_iommu_detected)
3029		return -ENODEV;
3030
3031	status = acpi_get_table("IVRS", 0, &ivrs_base);
3032	if (status == AE_NOT_FOUND)
3033		return -ENODEV;
3034	else if (ACPI_FAILURE(status)) {
3035		const char *err = acpi_format_exception(status);
3036		pr_err("IVRS table error: %s\n", err);
3037		return -EINVAL;
3038	}
3039
3040	/*
3041	 * Validate checksum here so we don't need to do it when
3042	 * we actually parse the table
3043	 */
3044	ret = check_ivrs_checksum(ivrs_base);
3045	if (ret)
3046		goto out;
3047
3048	ivinfo_init(ivrs_base);
3049
3050	amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
3051	DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
3052
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3053	/*
3054	 * now the data structures are allocated and basically initialized
3055	 * start the real acpi table scan
3056	 */
3057	ret = init_iommu_all(ivrs_base);
3058	if (ret)
3059		goto out;
3060
3061	/* 5 level guest page table */
3062	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3063	    FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL)
3064		amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
3065
3066	if (amd_iommu_pgtable == PD_MODE_V2) {
3067		if (!amd_iommu_v2_pgtbl_supported()) {
3068			pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
3069			amd_iommu_pgtable = PD_MODE_V1;
3070		}
3071	}
3072
3073	/* Disable any previously enabled IOMMUs */
3074	if (!is_kdump_kernel() || amd_iommu_disabled)
3075		disable_iommus();
3076
3077	if (amd_iommu_irq_remap)
3078		amd_iommu_irq_remap = check_ioapic_information();
3079
3080	if (amd_iommu_irq_remap) {
3081		struct amd_iommu_pci_seg *pci_seg;
3082		/*
3083		 * Interrupt remapping enabled, create kmem_cache for the
3084		 * remapping tables.
3085		 */
3086		ret = -ENOMEM;
3087		if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
3088			remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
3089		else
3090			remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
3091		amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
3092							remap_cache_sz,
3093							DTE_INTTAB_ALIGNMENT,
3094							0, NULL);
3095		if (!amd_iommu_irq_cache)
3096			goto out;
3097
3098		for_each_pci_segment(pci_seg) {
3099			if (alloc_irq_lookup_table(pci_seg))
3100				goto out;
3101		}
3102	}
3103
3104	ret = init_memory_definitions(ivrs_base);
3105	if (ret)
3106		goto out;
3107
3108	/* init the device table */
3109	init_device_table();
3110
3111out:
3112	/* Don't leak any ACPI memory */
3113	acpi_put_table(ivrs_base);
3114
3115	return ret;
3116}
3117
3118static int amd_iommu_enable_interrupts(void)
3119{
3120	struct amd_iommu *iommu;
3121	int ret = 0;
3122
3123	for_each_iommu(iommu) {
3124		ret = iommu_init_irq(iommu);
3125		if (ret)
3126			goto out;
3127	}
3128
3129	/*
3130	 * Interrupt handler is ready to process interrupts. Enable
3131	 * PPR and GA log interrupt for all IOMMUs.
3132	 */
3133	enable_iommus_vapic();
3134	enable_iommus_ppr();
3135
3136out:
3137	return ret;
3138}
3139
3140static bool __init detect_ivrs(void)
3141{
3142	struct acpi_table_header *ivrs_base;
3143	acpi_status status;
3144	int i;
3145
3146	status = acpi_get_table("IVRS", 0, &ivrs_base);
3147	if (status == AE_NOT_FOUND)
3148		return false;
3149	else if (ACPI_FAILURE(status)) {
3150		const char *err = acpi_format_exception(status);
3151		pr_err("IVRS table error: %s\n", err);
3152		return false;
3153	}
3154
3155	acpi_put_table(ivrs_base);
3156
3157	if (amd_iommu_force_enable)
3158		goto out;
3159
3160	/* Don't use IOMMU if there is Stoney Ridge graphics */
3161	for (i = 0; i < 32; i++) {
3162		u32 pci_id;
3163
3164		pci_id = read_pci_config(0, i, 0, 0);
3165		if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
3166			pr_info("Disable IOMMU on Stoney Ridge\n");
3167			return false;
3168		}
3169	}
3170
3171out:
3172	/* Make sure ACS will be enabled during PCI probe */
3173	pci_request_acs();
3174
3175	return true;
3176}
3177
3178static void iommu_snp_enable(void)
3179{
3180#ifdef CONFIG_KVM_AMD_SEV
3181	if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
3182		return;
3183	/*
3184	 * The SNP support requires that IOMMU must be enabled, and is
3185	 * configured with V1 page table (DTE[Mode] = 0 is not supported).
3186	 */
3187	if (no_iommu || iommu_default_passthrough()) {
3188		pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
3189		goto disable_snp;
3190	}
3191
3192	if (amd_iommu_pgtable != PD_MODE_V1) {
3193		pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n");
3194		goto disable_snp;
3195	}
3196
3197	amd_iommu_snp_en = check_feature(FEATURE_SNP);
3198	if (!amd_iommu_snp_en) {
3199		pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
3200		goto disable_snp;
3201	}
3202
3203	pr_info("IOMMU SNP support enabled.\n");
3204	return;
3205
3206disable_snp:
3207	cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
3208#endif
3209}
3210
3211/****************************************************************************
3212 *
3213 * AMD IOMMU Initialization State Machine
3214 *
3215 ****************************************************************************/
3216
3217static int __init state_next(void)
3218{
3219	int ret = 0;
3220
3221	switch (init_state) {
3222	case IOMMU_START_STATE:
3223		if (!detect_ivrs()) {
3224			init_state	= IOMMU_NOT_FOUND;
3225			ret		= -ENODEV;
3226		} else {
3227			init_state	= IOMMU_IVRS_DETECTED;
3228		}
3229		break;
3230	case IOMMU_IVRS_DETECTED:
3231		if (amd_iommu_disabled) {
3232			init_state = IOMMU_CMDLINE_DISABLED;
3233			ret = -EINVAL;
3234		} else {
3235			ret = early_amd_iommu_init();
3236			init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
3237		}
3238		break;
3239	case IOMMU_ACPI_FINISHED:
3240		early_enable_iommus();
3241		x86_platform.iommu_shutdown = disable_iommus;
3242		init_state = IOMMU_ENABLED;
3243		break;
3244	case IOMMU_ENABLED:
3245		register_syscore_ops(&amd_iommu_syscore_ops);
3246		iommu_snp_enable();
3247		ret = amd_iommu_init_pci();
3248		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
 
 
3249		break;
3250	case IOMMU_PCI_INIT:
3251		ret = amd_iommu_enable_interrupts();
3252		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
3253		break;
3254	case IOMMU_INTERRUPTS_EN:
3255		init_state = IOMMU_INITIALIZED;
3256		break;
3257	case IOMMU_INITIALIZED:
3258		/* Nothing to do */
3259		break;
3260	case IOMMU_NOT_FOUND:
3261	case IOMMU_INIT_ERROR:
3262	case IOMMU_CMDLINE_DISABLED:
3263		/* Error states => do nothing */
3264		ret = -EINVAL;
3265		break;
3266	default:
3267		/* Unknown state */
3268		BUG();
3269	}
3270
3271	if (ret) {
3272		free_dma_resources();
3273		if (!irq_remapping_enabled) {
3274			disable_iommus();
3275			free_iommu_resources();
3276		} else {
3277			struct amd_iommu *iommu;
3278			struct amd_iommu_pci_seg *pci_seg;
3279
3280			for_each_pci_segment(pci_seg)
3281				uninit_device_table_dma(pci_seg);
3282
3283			for_each_iommu(iommu)
3284				amd_iommu_flush_all_caches(iommu);
3285		}
3286	}
3287	return ret;
3288}
3289
3290static int __init iommu_go_to_state(enum iommu_init_state state)
3291{
3292	int ret = -EINVAL;
3293
3294	while (init_state != state) {
3295		if (init_state == IOMMU_NOT_FOUND         ||
3296		    init_state == IOMMU_INIT_ERROR        ||
3297		    init_state == IOMMU_CMDLINE_DISABLED)
3298			break;
3299		ret = state_next();
3300	}
3301
3302	return ret;
3303}
3304
3305#ifdef CONFIG_IRQ_REMAP
3306int __init amd_iommu_prepare(void)
3307{
3308	int ret;
3309
3310	amd_iommu_irq_remap = true;
3311
3312	ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
3313	if (ret) {
3314		amd_iommu_irq_remap = false;
3315		return ret;
3316	}
3317
3318	return amd_iommu_irq_remap ? 0 : -ENODEV;
3319}
3320
3321int __init amd_iommu_enable(void)
3322{
3323	int ret;
3324
3325	ret = iommu_go_to_state(IOMMU_ENABLED);
3326	if (ret)
3327		return ret;
3328
3329	irq_remapping_enabled = 1;
3330	return amd_iommu_xt_mode;
3331}
3332
3333void amd_iommu_disable(void)
3334{
3335	amd_iommu_suspend();
3336}
3337
3338int amd_iommu_reenable(int mode)
3339{
3340	amd_iommu_resume();
3341
3342	return 0;
3343}
3344
3345int amd_iommu_enable_faulting(unsigned int cpu)
3346{
3347	/* We enable MSI later when PCI is initialized */
3348	return 0;
3349}
3350#endif
3351
3352/*
3353 * This is the core init function for AMD IOMMU hardware in the system.
3354 * This function is called from the generic x86 DMA layer initialization
3355 * code.
3356 */
3357static int __init amd_iommu_init(void)
3358{
3359	struct amd_iommu *iommu;
3360	int ret;
3361
3362	ret = iommu_go_to_state(IOMMU_INITIALIZED);
3363#ifdef CONFIG_GART_IOMMU
3364	if (ret && list_empty(&amd_iommu_list)) {
3365		/*
3366		 * We failed to initialize the AMD IOMMU - try fallback
3367		 * to GART if possible.
3368		 */
3369		gart_iommu_init();
3370	}
3371#endif
3372
3373	for_each_iommu(iommu)
3374		amd_iommu_debugfs_setup(iommu);
3375
3376	return ret;
3377}
3378
3379static bool amd_iommu_sme_check(void)
3380{
3381	if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) ||
3382	    (boot_cpu_data.x86 != 0x17))
3383		return true;
3384
3385	/* For Fam17h, a specific level of support is required */
3386	if (boot_cpu_data.microcode >= 0x08001205)
3387		return true;
3388
3389	if ((boot_cpu_data.microcode >= 0x08001126) &&
3390	    (boot_cpu_data.microcode <= 0x080011ff))
3391		return true;
3392
3393	pr_notice("IOMMU not currently supported when SME is active\n");
3394
3395	return false;
3396}
3397
3398/****************************************************************************
3399 *
3400 * Early detect code. This code runs at IOMMU detection time in the DMA
3401 * layer. It just looks if there is an IVRS ACPI table to detect AMD
3402 * IOMMUs
3403 *
3404 ****************************************************************************/
3405int __init amd_iommu_detect(void)
3406{
3407	int ret;
3408
3409	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
3410		return -ENODEV;
3411
3412	if (!amd_iommu_sme_check())
3413		return -ENODEV;
3414
3415	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
3416	if (ret)
3417		return ret;
3418
3419	amd_iommu_detected = true;
3420	iommu_detected = 1;
3421	x86_init.iommu.iommu_init = amd_iommu_init;
3422
3423	return 1;
3424}
3425
3426/****************************************************************************
3427 *
3428 * Parsing functions for the AMD IOMMU specific kernel command line
3429 * options.
3430 *
3431 ****************************************************************************/
3432
3433static int __init parse_amd_iommu_dump(char *str)
3434{
3435	amd_iommu_dump = true;
3436
3437	return 1;
3438}
3439
3440static int __init parse_amd_iommu_intr(char *str)
3441{
3442	for (; *str; ++str) {
3443		if (strncmp(str, "legacy", 6) == 0) {
3444			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3445			break;
3446		}
3447		if (strncmp(str, "vapic", 5) == 0) {
3448			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
3449			break;
3450		}
3451	}
3452	return 1;
3453}
3454
3455static int __init parse_amd_iommu_options(char *str)
3456{
3457	if (!str)
3458		return -EINVAL;
3459
3460	while (*str) {
3461		if (strncmp(str, "fullflush", 9) == 0) {
3462			pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
3463			iommu_set_dma_strict();
3464		} else if (strncmp(str, "force_enable", 12) == 0) {
3465			amd_iommu_force_enable = true;
3466		} else if (strncmp(str, "off", 3) == 0) {
3467			amd_iommu_disabled = true;
3468		} else if (strncmp(str, "force_isolation", 15) == 0) {
3469			amd_iommu_force_isolation = true;
3470		} else if (strncmp(str, "pgtbl_v1", 8) == 0) {
3471			amd_iommu_pgtable = PD_MODE_V1;
3472		} else if (strncmp(str, "pgtbl_v2", 8) == 0) {
3473			amd_iommu_pgtable = PD_MODE_V2;
3474		} else if (strncmp(str, "irtcachedis", 11) == 0) {
3475			amd_iommu_irtcachedis = true;
3476		} else if (strncmp(str, "nohugepages", 11) == 0) {
3477			pr_info("Restricting V1 page-sizes to 4KiB");
3478			amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_4K;
3479		} else if (strncmp(str, "v2_pgsizes_only", 15) == 0) {
3480			pr_info("Restricting V1 page-sizes to 4KiB/2MiB/1GiB");
3481			amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
3482		} else {
3483			pr_notice("Unknown option - '%s'\n", str);
3484		}
3485
3486		str += strcspn(str, ",");
3487		while (*str == ',')
3488			str++;
3489	}
3490
3491	return 1;
3492}
3493
3494static int __init parse_ivrs_ioapic(char *str)
3495{
3496	u32 seg = 0, bus, dev, fn;
3497	int id, i;
3498	u32 devid;
3499
3500	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3501	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3502		goto found;
3503
3504	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3505	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3506		pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n",
3507			str, id, seg, bus, dev, fn);
3508		goto found;
3509	}
3510
3511	pr_err("Invalid command line: ivrs_ioapic%s\n", str);
3512	return 1;
3513
3514found:
3515	if (early_ioapic_map_size == EARLY_MAP_SIZE) {
3516		pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
3517			str);
3518		return 1;
3519	}
3520
3521	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3522
3523	cmdline_maps			= true;
3524	i				= early_ioapic_map_size++;
3525	early_ioapic_map[i].id		= id;
3526	early_ioapic_map[i].devid	= devid;
3527	early_ioapic_map[i].cmd_line	= true;
3528
3529	return 1;
3530}
3531
3532static int __init parse_ivrs_hpet(char *str)
3533{
3534	u32 seg = 0, bus, dev, fn;
3535	int id, i;
3536	u32 devid;
3537
3538	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3539	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3540		goto found;
3541
3542	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3543	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3544		pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n",
3545			str, id, seg, bus, dev, fn);
3546		goto found;
3547	}
3548
3549	pr_err("Invalid command line: ivrs_hpet%s\n", str);
3550	return 1;
3551
3552found:
3553	if (early_hpet_map_size == EARLY_MAP_SIZE) {
3554		pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
3555			str);
3556		return 1;
3557	}
3558
3559	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3560
3561	cmdline_maps			= true;
3562	i				= early_hpet_map_size++;
3563	early_hpet_map[i].id		= id;
3564	early_hpet_map[i].devid		= devid;
3565	early_hpet_map[i].cmd_line	= true;
3566
3567	return 1;
3568}
3569
3570#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
3571
3572static int __init parse_ivrs_acpihid(char *str)
3573{
3574	u32 seg = 0, bus, dev, fn;
3575	char *hid, *uid, *p, *addr;
3576	char acpiid[ACPIID_LEN] = {0};
3577	int i;
3578
3579	addr = strchr(str, '@');
3580	if (!addr) {
3581		addr = strchr(str, '=');
3582		if (!addr)
3583			goto not_found;
3584
3585		++addr;
3586
3587		if (strlen(addr) > ACPIID_LEN)
3588			goto not_found;
3589
3590		if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
3591		    sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
3592			pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
3593				str, acpiid, seg, bus, dev, fn);
3594			goto found;
3595		}
3596		goto not_found;
3597	}
3598
3599	/* We have the '@', make it the terminator to get just the acpiid */
3600	*addr++ = 0;
3601
3602	if (strlen(str) > ACPIID_LEN + 1)
3603		goto not_found;
3604
3605	if (sscanf(str, "=%s", acpiid) != 1)
3606		goto not_found;
3607
3608	if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 ||
3609	    sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4)
3610		goto found;
3611
3612not_found:
3613	pr_err("Invalid command line: ivrs_acpihid%s\n", str);
3614	return 1;
3615
3616found:
3617	p = acpiid;
3618	hid = strsep(&p, ":");
3619	uid = p;
3620
3621	if (!hid || !(*hid) || !uid) {
3622		pr_err("Invalid command line: hid or uid\n");
3623		return 1;
3624	}
3625
3626	/*
3627	 * Ignore leading zeroes after ':', so e.g., AMDI0095:00
3628	 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match
3629	 */
3630	while (*uid == '0' && *(uid + 1))
3631		uid++;
3632
3633	i = early_acpihid_map_size++;
3634	memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
3635	memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
3636	early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3637	early_acpihid_map[i].cmd_line	= true;
3638
3639	return 1;
3640}
3641
3642__setup("amd_iommu_dump",	parse_amd_iommu_dump);
3643__setup("amd_iommu=",		parse_amd_iommu_options);
3644__setup("amd_iommu_intr=",	parse_amd_iommu_intr);
3645__setup("ivrs_ioapic",		parse_ivrs_ioapic);
3646__setup("ivrs_hpet",		parse_ivrs_hpet);
3647__setup("ivrs_acpihid",		parse_ivrs_acpihid);
3648
3649bool amd_iommu_pasid_supported(void)
3650{
3651	/* CPU page table size should match IOMMU guest page table size */
3652	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3653	    amd_iommu_gpt_level != PAGE_MODE_5_LEVEL)
3654		return false;
3655
3656	/*
3657	 * Since DTE[Mode]=0 is prohibited on SNP-enabled system
3658	 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
3659	 * setting up IOMMUv1 page table.
3660	 */
3661	return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en;
3662}
 
3663
3664struct amd_iommu *get_amd_iommu(unsigned int idx)
3665{
3666	unsigned int i = 0;
3667	struct amd_iommu *iommu;
3668
3669	for_each_iommu(iommu)
3670		if (i++ == idx)
3671			return iommu;
3672	return NULL;
3673}
3674
3675/****************************************************************************
3676 *
3677 * IOMMU EFR Performance Counter support functionality. This code allows
3678 * access to the IOMMU PC functionality.
3679 *
3680 ****************************************************************************/
3681
3682u8 amd_iommu_pc_get_max_banks(unsigned int idx)
3683{
3684	struct amd_iommu *iommu = get_amd_iommu(idx);
3685
3686	if (iommu)
3687		return iommu->max_banks;
3688
3689	return 0;
3690}
 
3691
3692bool amd_iommu_pc_supported(void)
3693{
3694	return amd_iommu_pc_present;
3695}
 
3696
3697u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3698{
3699	struct amd_iommu *iommu = get_amd_iommu(idx);
3700
3701	if (iommu)
3702		return iommu->max_counters;
3703
3704	return 0;
3705}
 
3706
3707static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3708				u8 fxn, u64 *value, bool is_write)
3709{
3710	u32 offset;
3711	u32 max_offset_lim;
3712
3713	/* Make sure the IOMMU PC resource is available */
3714	if (!amd_iommu_pc_present)
3715		return -ENODEV;
3716
3717	/* Check for valid iommu and pc register indexing */
3718	if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3719		return -ENODEV;
3720
3721	offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3722
3723	/* Limit the offset to the hw defined mmio region aperture */
3724	max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3725				(iommu->max_counters << 8) | 0x28);
3726	if ((offset < MMIO_CNTR_REG_OFFSET) ||
3727	    (offset > max_offset_lim))
3728		return -EINVAL;
3729
3730	if (is_write) {
3731		u64 val = *value & GENMASK_ULL(47, 0);
3732
3733		writel((u32)val, iommu->mmio_base + offset);
3734		writel((val >> 32), iommu->mmio_base + offset + 4);
3735	} else {
3736		*value = readl(iommu->mmio_base + offset + 4);
3737		*value <<= 32;
3738		*value |= readl(iommu->mmio_base + offset);
3739		*value &= GENMASK_ULL(47, 0);
3740	}
3741
3742	return 0;
3743}
3744
3745int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3746{
3747	if (!iommu)
3748		return -EINVAL;
3749
3750	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
3751}
3752
3753int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3754{
3755	if (!iommu)
3756		return -EINVAL;
3757
3758	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
3759}
3760
3761#ifdef CONFIG_KVM_AMD_SEV
3762static int iommu_page_make_shared(void *page)
3763{
3764	unsigned long paddr, pfn;
3765
3766	paddr = iommu_virt_to_phys(page);
3767	/* Cbit maybe set in the paddr */
3768	pfn = __sme_clr(paddr) >> PAGE_SHIFT;
3769
3770	if (!(pfn % PTRS_PER_PMD)) {
3771		int ret, level;
3772		bool assigned;
3773
3774		ret = snp_lookup_rmpentry(pfn, &assigned, &level);
3775		if (ret) {
3776			pr_warn("IOMMU PFN %lx RMP lookup failed, ret %d\n", pfn, ret);
3777			return ret;
3778		}
3779
3780		if (!assigned) {
3781			pr_warn("IOMMU PFN %lx not assigned in RMP table\n", pfn);
3782			return -EINVAL;
3783		}
3784
3785		if (level > PG_LEVEL_4K) {
3786			ret = psmash(pfn);
3787			if (!ret)
3788				goto done;
3789
3790			pr_warn("PSMASH failed for IOMMU PFN %lx huge RMP entry, ret: %d, level: %d\n",
3791				pfn, ret, level);
3792			return ret;
3793		}
3794	}
3795
3796done:
3797	return rmp_make_shared(pfn, PG_LEVEL_4K);
3798}
3799
3800static int iommu_make_shared(void *va, size_t size)
3801{
3802	void *page;
3803	int ret;
3804
3805	if (!va)
3806		return 0;
3807
3808	for (page = va; page < (va + size); page += PAGE_SIZE) {
3809		ret = iommu_page_make_shared(page);
3810		if (ret)
3811			return ret;
3812	}
3813
3814	return 0;
3815}
3816
3817int amd_iommu_snp_disable(void)
3818{
3819	struct amd_iommu *iommu;
3820	int ret;
3821
3822	if (!amd_iommu_snp_en)
3823		return 0;
3824
3825	for_each_iommu(iommu) {
3826		ret = iommu_make_shared(iommu->evt_buf, EVT_BUFFER_SIZE);
3827		if (ret)
3828			return ret;
3829
3830		ret = iommu_make_shared(iommu->ppr_log, PPR_LOG_SIZE);
3831		if (ret)
3832			return ret;
3833
3834		ret = iommu_make_shared((void *)iommu->cmd_sem, PAGE_SIZE);
3835		if (ret)
3836			return ret;
 
3837	}
3838
3839	return 0;
3840}
3841EXPORT_SYMBOL_GPL(amd_iommu_snp_disable);
3842#endif
v6.2
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
   4 * Author: Joerg Roedel <jroedel@suse.de>
   5 *         Leo Duran <leo.duran@amd.com>
   6 */
   7
   8#define pr_fmt(fmt)     "AMD-Vi: " fmt
   9#define dev_fmt(fmt)    pr_fmt(fmt)
  10
  11#include <linux/pci.h>
  12#include <linux/acpi.h>
  13#include <linux/list.h>
  14#include <linux/bitmap.h>
  15#include <linux/slab.h>
  16#include <linux/syscore_ops.h>
  17#include <linux/interrupt.h>
  18#include <linux/msi.h>
  19#include <linux/irq.h>
  20#include <linux/amd-iommu.h>
  21#include <linux/export.h>
  22#include <linux/kmemleak.h>
  23#include <linux/cc_platform.h>
  24#include <linux/iopoll.h>
  25#include <asm/pci-direct.h>
  26#include <asm/iommu.h>
  27#include <asm/apic.h>
  28#include <asm/gart.h>
  29#include <asm/x86_init.h>
  30#include <asm/io_apic.h>
  31#include <asm/irq_remapping.h>
  32#include <asm/set_memory.h>
 
  33
  34#include <linux/crash_dump.h>
  35
  36#include "amd_iommu.h"
  37#include "../irq_remapping.h"
 
  38
  39/*
  40 * definitions for the ACPI scanning code
  41 */
  42#define IVRS_HEADER_LENGTH 48
  43
  44#define ACPI_IVHD_TYPE_MAX_SUPPORTED	0x40
  45#define ACPI_IVMD_TYPE_ALL              0x20
  46#define ACPI_IVMD_TYPE                  0x21
  47#define ACPI_IVMD_TYPE_RANGE            0x22
  48
  49#define IVHD_DEV_ALL                    0x01
  50#define IVHD_DEV_SELECT                 0x02
  51#define IVHD_DEV_SELECT_RANGE_START     0x03
  52#define IVHD_DEV_RANGE_END              0x04
  53#define IVHD_DEV_ALIAS                  0x42
  54#define IVHD_DEV_ALIAS_RANGE            0x43
  55#define IVHD_DEV_EXT_SELECT             0x46
  56#define IVHD_DEV_EXT_SELECT_RANGE       0x47
  57#define IVHD_DEV_SPECIAL		0x48
  58#define IVHD_DEV_ACPI_HID		0xf0
  59
  60#define UID_NOT_PRESENT                 0
  61#define UID_IS_INTEGER                  1
  62#define UID_IS_CHARACTER                2
  63
  64#define IVHD_SPECIAL_IOAPIC		1
  65#define IVHD_SPECIAL_HPET		2
  66
  67#define IVHD_FLAG_HT_TUN_EN_MASK        0x01
  68#define IVHD_FLAG_PASSPW_EN_MASK        0x02
  69#define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
  70#define IVHD_FLAG_ISOC_EN_MASK          0x08
  71
  72#define IVMD_FLAG_EXCL_RANGE            0x08
  73#define IVMD_FLAG_IW                    0x04
  74#define IVMD_FLAG_IR                    0x02
  75#define IVMD_FLAG_UNITY_MAP             0x01
  76
  77#define ACPI_DEVFLAG_INITPASS           0x01
  78#define ACPI_DEVFLAG_EXTINT             0x02
  79#define ACPI_DEVFLAG_NMI                0x04
  80#define ACPI_DEVFLAG_SYSMGT1            0x10
  81#define ACPI_DEVFLAG_SYSMGT2            0x20
  82#define ACPI_DEVFLAG_LINT0              0x40
  83#define ACPI_DEVFLAG_LINT1              0x80
  84#define ACPI_DEVFLAG_ATSDIS             0x10000000
  85
  86#define LOOP_TIMEOUT	2000000
  87
  88#define IVRS_GET_SBDF_ID(seg, bus, dev, fn)	(((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
  89						 | ((dev & 0x1f) << 3) | (fn & 0x7))
  90
  91/*
  92 * ACPI table definitions
  93 *
  94 * These data structures are laid over the table to parse the important values
  95 * out of it.
  96 */
  97
  98/*
  99 * structure describing one IOMMU in the ACPI table. Typically followed by one
 100 * or more ivhd_entrys.
 101 */
 102struct ivhd_header {
 103	u8 type;
 104	u8 flags;
 105	u16 length;
 106	u16 devid;
 107	u16 cap_ptr;
 108	u64 mmio_phys;
 109	u16 pci_seg;
 110	u16 info;
 111	u32 efr_attr;
 112
 113	/* Following only valid on IVHD type 11h and 40h */
 114	u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
 115	u64 efr_reg2;
 116} __attribute__((packed));
 117
 118/*
 119 * A device entry describing which devices a specific IOMMU translates and
 120 * which requestor ids they use.
 121 */
 122struct ivhd_entry {
 123	u8 type;
 124	u16 devid;
 125	u8 flags;
 126	struct_group(ext_hid,
 127		u32 ext;
 128		u32 hidh;
 129	);
 130	u64 cid;
 131	u8 uidf;
 132	u8 uidl;
 133	u8 uid;
 134} __attribute__((packed));
 135
 136/*
 137 * An AMD IOMMU memory definition structure. It defines things like exclusion
 138 * ranges for devices and regions that should be unity mapped.
 139 */
 140struct ivmd_header {
 141	u8 type;
 142	u8 flags;
 143	u16 length;
 144	u16 devid;
 145	u16 aux;
 146	u16 pci_seg;
 147	u8  resv[6];
 148	u64 range_start;
 149	u64 range_length;
 150} __attribute__((packed));
 151
 152bool amd_iommu_dump;
 153bool amd_iommu_irq_remap __read_mostly;
 154
 155enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
 
 
 156
 157int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
 158static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
 159
 160static bool amd_iommu_detected;
 161static bool amd_iommu_disabled __initdata;
 162static bool amd_iommu_force_enable __initdata;
 
 163static int amd_iommu_target_ivhd_type;
 164
 165/* Global EFR and EFR2 registers */
 166u64 amd_iommu_efr;
 167u64 amd_iommu_efr2;
 168
 169/* SNP is enabled on the system? */
 170bool amd_iommu_snp_en;
 171EXPORT_SYMBOL(amd_iommu_snp_en);
 172
 173LIST_HEAD(amd_iommu_pci_seg_list);	/* list of all PCI segments */
 174LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
 175					   system */
 176
 177/* Array to assign indices to IOMMUs*/
 178struct amd_iommu *amd_iommus[MAX_IOMMUS];
 179
 180/* Number of IOMMUs present in the system */
 181static int amd_iommus_present;
 182
 183/* IOMMUs have a non-present cache? */
 184bool amd_iommu_np_cache __read_mostly;
 185bool amd_iommu_iotlb_sup __read_mostly = true;
 186
 187u32 amd_iommu_max_pasid __read_mostly = ~0;
 188
 189bool amd_iommu_v2_present __read_mostly;
 190static bool amd_iommu_pc_present __read_mostly;
 191bool amdr_ivrs_remap_support __read_mostly;
 192
 193bool amd_iommu_force_isolation __read_mostly;
 194
 195/*
 196 * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
 197 * to know which ones are already in use.
 198 */
 199unsigned long *amd_iommu_pd_alloc_bitmap;
 200
 201enum iommu_init_state {
 202	IOMMU_START_STATE,
 203	IOMMU_IVRS_DETECTED,
 204	IOMMU_ACPI_FINISHED,
 205	IOMMU_ENABLED,
 206	IOMMU_PCI_INIT,
 207	IOMMU_INTERRUPTS_EN,
 208	IOMMU_INITIALIZED,
 209	IOMMU_NOT_FOUND,
 210	IOMMU_INIT_ERROR,
 211	IOMMU_CMDLINE_DISABLED,
 212};
 213
 214/* Early ioapic and hpet maps from kernel command line */
 215#define EARLY_MAP_SIZE		4
 216static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
 217static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
 218static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
 219
 220static int __initdata early_ioapic_map_size;
 221static int __initdata early_hpet_map_size;
 222static int __initdata early_acpihid_map_size;
 223
 224static bool __initdata cmdline_maps;
 225
 226static enum iommu_init_state init_state = IOMMU_START_STATE;
 227
 228static int amd_iommu_enable_interrupts(void);
 229static int __init iommu_go_to_state(enum iommu_init_state state);
 230static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg);
 231
 232static bool amd_iommu_pre_enabled = true;
 233
 234static u32 amd_iommu_ivinfo __initdata;
 235
 236bool translation_pre_enabled(struct amd_iommu *iommu)
 237{
 238	return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
 239}
 240
 241static void clear_translation_pre_enabled(struct amd_iommu *iommu)
 242{
 243	iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
 244}
 245
 246static void init_translation_status(struct amd_iommu *iommu)
 247{
 248	u64 ctrl;
 249
 250	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 251	if (ctrl & (1<<CONTROL_IOMMU_EN))
 252		iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
 253}
 254
 255static inline unsigned long tbl_size(int entry_size, int last_bdf)
 256{
 257	unsigned shift = PAGE_SHIFT +
 258			 get_order((last_bdf + 1) * entry_size);
 259
 260	return 1UL << shift;
 261}
 262
 263int amd_iommu_get_num_iommus(void)
 264{
 265	return amd_iommus_present;
 266}
 267
 268/*
 269 * Iterate through all the IOMMUs to get common EFR
 270 * masks among all IOMMUs and warn if found inconsistency.
 271 */
 272static void get_global_efr(void)
 273{
 274	struct amd_iommu *iommu;
 275
 276	for_each_iommu(iommu) {
 277		u64 tmp = iommu->features;
 278		u64 tmp2 = iommu->features2;
 279
 280		if (list_is_first(&iommu->list, &amd_iommu_list)) {
 281			amd_iommu_efr = tmp;
 282			amd_iommu_efr2 = tmp2;
 283			continue;
 284		}
 285
 286		if (amd_iommu_efr == tmp &&
 287		    amd_iommu_efr2 == tmp2)
 288			continue;
 289
 290		pr_err(FW_BUG
 291		       "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n",
 292		       tmp, tmp2, amd_iommu_efr, amd_iommu_efr2,
 293		       iommu->index, iommu->pci_seg->id,
 294		       PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid),
 295		       PCI_FUNC(iommu->devid));
 296
 297		amd_iommu_efr &= tmp;
 298		amd_iommu_efr2 &= tmp2;
 299	}
 300
 301	pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
 302}
 303
 304static bool check_feature_on_all_iommus(u64 mask)
 305{
 306	return !!(amd_iommu_efr & mask);
 307}
 308
 309/*
 310 * For IVHD type 0x11/0x40, EFR is also available via IVHD.
 311 * Default to IVHD EFR since it is available sooner
 312 * (i.e. before PCI init).
 313 */
 314static void __init early_iommu_features_init(struct amd_iommu *iommu,
 315					     struct ivhd_header *h)
 316{
 317	if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) {
 318		iommu->features = h->efr_reg;
 319		iommu->features2 = h->efr_reg2;
 320	}
 321	if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP)
 322		amdr_ivrs_remap_support = true;
 323}
 324
 325/* Access to l1 and l2 indexed register spaces */
 326
 327static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
 328{
 329	u32 val;
 330
 331	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
 332	pci_read_config_dword(iommu->dev, 0xfc, &val);
 333	return val;
 334}
 335
 336static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
 337{
 338	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
 339	pci_write_config_dword(iommu->dev, 0xfc, val);
 340	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
 341}
 342
 343static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
 344{
 345	u32 val;
 346
 347	pci_write_config_dword(iommu->dev, 0xf0, address);
 348	pci_read_config_dword(iommu->dev, 0xf4, &val);
 349	return val;
 350}
 351
 352static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
 353{
 354	pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
 355	pci_write_config_dword(iommu->dev, 0xf4, val);
 356}
 357
 358/****************************************************************************
 359 *
 360 * AMD IOMMU MMIO register space handling functions
 361 *
 362 * These functions are used to program the IOMMU device registers in
 363 * MMIO space required for that driver.
 364 *
 365 ****************************************************************************/
 366
 367/*
 368 * This function set the exclusion range in the IOMMU. DMA accesses to the
 369 * exclusion range are passed through untranslated
 370 */
 371static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 372{
 373	u64 start = iommu->exclusion_start & PAGE_MASK;
 374	u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
 375	u64 entry;
 376
 377	if (!iommu->exclusion_start)
 378		return;
 379
 380	entry = start | MMIO_EXCL_ENABLE_MASK;
 381	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
 382			&entry, sizeof(entry));
 383
 384	entry = limit;
 385	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
 386			&entry, sizeof(entry));
 387}
 388
 389static void iommu_set_cwwb_range(struct amd_iommu *iommu)
 390{
 391	u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
 392	u64 entry = start & PM_ADDR_MASK;
 393
 394	if (!check_feature_on_all_iommus(FEATURE_SNP))
 395		return;
 396
 397	/* Note:
 398	 * Re-purpose Exclusion base/limit registers for Completion wait
 399	 * write-back base/limit.
 400	 */
 401	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
 402		    &entry, sizeof(entry));
 403
 404	/* Note:
 405	 * Default to 4 Kbytes, which can be specified by setting base
 406	 * address equal to the limit address.
 407	 */
 408	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
 409		    &entry, sizeof(entry));
 410}
 411
 412/* Programs the physical address of the device table into the IOMMU hardware */
 413static void iommu_set_device_table(struct amd_iommu *iommu)
 414{
 415	u64 entry;
 416	u32 dev_table_size = iommu->pci_seg->dev_table_size;
 417	void *dev_table = (void *)get_dev_table(iommu);
 418
 419	BUG_ON(iommu->mmio_base == NULL);
 420
 421	entry = iommu_virt_to_phys(dev_table);
 422	entry |= (dev_table_size >> 12) - 1;
 423	memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
 424			&entry, sizeof(entry));
 425}
 426
 427/* Generic functions to enable/disable certain features of the IOMMU. */
 428static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
 429{
 430	u64 ctrl;
 431
 432	ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
 433	ctrl |= (1ULL << bit);
 434	writeq(ctrl, iommu->mmio_base +  MMIO_CONTROL_OFFSET);
 435}
 436
 437static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 438{
 439	u64 ctrl;
 440
 441	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 442	ctrl &= ~(1ULL << bit);
 443	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 444}
 445
 446static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
 447{
 448	u64 ctrl;
 449
 450	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 451	ctrl &= ~CTRL_INV_TO_MASK;
 452	ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
 453	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 454}
 455
 456/* Function to enable the hardware */
 457static void iommu_enable(struct amd_iommu *iommu)
 458{
 459	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 460}
 461
 462static void iommu_disable(struct amd_iommu *iommu)
 463{
 464	if (!iommu->mmio_base)
 465		return;
 466
 467	/* Disable command buffer */
 468	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 469
 470	/* Disable event logging and event interrupts */
 471	iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
 472	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
 473
 474	/* Disable IOMMU GA_LOG */
 475	iommu_feature_disable(iommu, CONTROL_GALOG_EN);
 476	iommu_feature_disable(iommu, CONTROL_GAINT_EN);
 477
 
 
 
 
 478	/* Disable IOMMU hardware itself */
 479	iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
 
 
 
 480}
 481
 482/*
 483 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
 484 * the system has one.
 485 */
 486static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
 487{
 488	if (!request_mem_region(address, end, "amd_iommu")) {
 489		pr_err("Can not reserve memory region %llx-%llx for mmio\n",
 490			address, end);
 491		pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
 492		return NULL;
 493	}
 494
 495	return (u8 __iomem *)ioremap(address, end);
 496}
 497
 498static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
 499{
 500	if (iommu->mmio_base)
 501		iounmap(iommu->mmio_base);
 502	release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
 503}
 504
 505static inline u32 get_ivhd_header_size(struct ivhd_header *h)
 506{
 507	u32 size = 0;
 508
 509	switch (h->type) {
 510	case 0x10:
 511		size = 24;
 512		break;
 513	case 0x11:
 514	case 0x40:
 515		size = 40;
 516		break;
 517	}
 518	return size;
 519}
 520
 521/****************************************************************************
 522 *
 523 * The functions below belong to the first pass of AMD IOMMU ACPI table
 524 * parsing. In this pass we try to find out the highest device id this
 525 * code has to handle. Upon this information the size of the shared data
 526 * structures is determined later.
 527 *
 528 ****************************************************************************/
 529
 530/*
 531 * This function calculates the length of a given IVHD entry
 532 */
 533static inline int ivhd_entry_length(u8 *ivhd)
 534{
 535	u32 type = ((struct ivhd_entry *)ivhd)->type;
 536
 537	if (type < 0x80) {
 538		return 0x04 << (*ivhd >> 6);
 539	} else if (type == IVHD_DEV_ACPI_HID) {
 540		/* For ACPI_HID, offset 21 is uid len */
 541		return *((u8 *)ivhd + 21) + 22;
 542	}
 543	return 0;
 544}
 545
 546/*
 547 * After reading the highest device id from the IOMMU PCI capability header
 548 * this function looks if there is a higher device id defined in the ACPI table
 549 */
 550static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
 551{
 552	u8 *p = (void *)h, *end = (void *)h;
 553	struct ivhd_entry *dev;
 554	int last_devid = -EINVAL;
 555
 556	u32 ivhd_size = get_ivhd_header_size(h);
 557
 558	if (!ivhd_size) {
 559		pr_err("Unsupported IVHD type %#x\n", h->type);
 560		return -EINVAL;
 561	}
 562
 563	p += ivhd_size;
 564	end += h->length;
 565
 566	while (p < end) {
 567		dev = (struct ivhd_entry *)p;
 568		switch (dev->type) {
 569		case IVHD_DEV_ALL:
 570			/* Use maximum BDF value for DEV_ALL */
 571			return 0xffff;
 572		case IVHD_DEV_SELECT:
 573		case IVHD_DEV_RANGE_END:
 574		case IVHD_DEV_ALIAS:
 575		case IVHD_DEV_EXT_SELECT:
 576			/* all the above subfield types refer to device ids */
 577			if (dev->devid > last_devid)
 578				last_devid = dev->devid;
 579			break;
 580		default:
 581			break;
 582		}
 583		p += ivhd_entry_length(p);
 584	}
 585
 586	WARN_ON(p != end);
 587
 588	return last_devid;
 589}
 590
 591static int __init check_ivrs_checksum(struct acpi_table_header *table)
 592{
 593	int i;
 594	u8 checksum = 0, *p = (u8 *)table;
 595
 596	for (i = 0; i < table->length; ++i)
 597		checksum += p[i];
 598	if (checksum != 0) {
 599		/* ACPI table corrupt */
 600		pr_err(FW_BUG "IVRS invalid checksum\n");
 601		return -ENODEV;
 602	}
 603
 604	return 0;
 605}
 606
 607/*
 608 * Iterate over all IVHD entries in the ACPI table and find the highest device
 609 * id which we need to handle. This is the first of three functions which parse
 610 * the ACPI table. So we check the checksum here.
 611 */
 612static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg)
 613{
 614	u8 *p = (u8 *)table, *end = (u8 *)table;
 615	struct ivhd_header *h;
 616	int last_devid, last_bdf = 0;
 617
 618	p += IVRS_HEADER_LENGTH;
 619
 620	end += table->length;
 621	while (p < end) {
 622		h = (struct ivhd_header *)p;
 623		if (h->pci_seg == pci_seg &&
 624		    h->type == amd_iommu_target_ivhd_type) {
 625			last_devid = find_last_devid_from_ivhd(h);
 626
 627			if (last_devid < 0)
 628				return -EINVAL;
 629			if (last_devid > last_bdf)
 630				last_bdf = last_devid;
 631		}
 632		p += h->length;
 633	}
 634	WARN_ON(p != end);
 635
 636	return last_bdf;
 637}
 638
 639/****************************************************************************
 640 *
 641 * The following functions belong to the code path which parses the ACPI table
 642 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
 643 * data structures, initialize the per PCI segment device/alias/rlookup table
 644 * and also basically initialize the hardware.
 645 *
 646 ****************************************************************************/
 647
 648/* Allocate per PCI segment device table */
 649static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
 650{
 651	pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32,
 652						      get_order(pci_seg->dev_table_size));
 653	if (!pci_seg->dev_table)
 654		return -ENOMEM;
 655
 656	return 0;
 657}
 658
 659static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
 660{
 661	free_pages((unsigned long)pci_seg->dev_table,
 662		    get_order(pci_seg->dev_table_size));
 663	pci_seg->dev_table = NULL;
 664}
 665
 666/* Allocate per PCI segment IOMMU rlookup table. */
 667static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
 668{
 669	pci_seg->rlookup_table = (void *)__get_free_pages(
 670						GFP_KERNEL | __GFP_ZERO,
 671						get_order(pci_seg->rlookup_table_size));
 672	if (pci_seg->rlookup_table == NULL)
 673		return -ENOMEM;
 674
 675	return 0;
 676}
 677
 678static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
 679{
 680	free_pages((unsigned long)pci_seg->rlookup_table,
 681		   get_order(pci_seg->rlookup_table_size));
 682	pci_seg->rlookup_table = NULL;
 683}
 684
 685static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
 686{
 687	pci_seg->irq_lookup_table = (void *)__get_free_pages(
 688					     GFP_KERNEL | __GFP_ZERO,
 689					     get_order(pci_seg->rlookup_table_size));
 690	kmemleak_alloc(pci_seg->irq_lookup_table,
 691		       pci_seg->rlookup_table_size, 1, GFP_KERNEL);
 692	if (pci_seg->irq_lookup_table == NULL)
 693		return -ENOMEM;
 694
 695	return 0;
 696}
 697
 698static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
 699{
 700	kmemleak_free(pci_seg->irq_lookup_table);
 701	free_pages((unsigned long)pci_seg->irq_lookup_table,
 702		   get_order(pci_seg->rlookup_table_size));
 703	pci_seg->irq_lookup_table = NULL;
 704}
 705
 706static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
 707{
 708	int i;
 709
 710	pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL,
 711					get_order(pci_seg->alias_table_size));
 712	if (!pci_seg->alias_table)
 713		return -ENOMEM;
 714
 715	/*
 716	 * let all alias entries point to itself
 717	 */
 718	for (i = 0; i <= pci_seg->last_bdf; ++i)
 719		pci_seg->alias_table[i] = i;
 720
 721	return 0;
 722}
 723
 724static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
 725{
 726	free_pages((unsigned long)pci_seg->alias_table,
 727		   get_order(pci_seg->alias_table_size));
 728	pci_seg->alias_table = NULL;
 729}
 730
 731/*
 732 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
 733 * write commands to that buffer later and the IOMMU will execute them
 734 * asynchronously
 735 */
 736static int __init alloc_command_buffer(struct amd_iommu *iommu)
 737{
 738	iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 739						  get_order(CMD_BUFFER_SIZE));
 740
 741	return iommu->cmd_buf ? 0 : -ENOMEM;
 742}
 743
 744/*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 745 * This function restarts event logging in case the IOMMU experienced
 746 * an event log buffer overflow.
 747 */
 748void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
 749{
 750	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
 751	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 
 
 
 
 
 
 
 
 
 
 
 
 752}
 753
 754/*
 755 * This function resets the command buffer if the IOMMU stopped fetching
 756 * commands from it.
 757 */
 758static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
 759{
 760	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 761
 762	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
 763	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 764	iommu->cmd_buf_head = 0;
 765	iommu->cmd_buf_tail = 0;
 766
 767	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
 768}
 769
 770/*
 771 * This function writes the command buffer address to the hardware and
 772 * enables it.
 773 */
 774static void iommu_enable_command_buffer(struct amd_iommu *iommu)
 775{
 776	u64 entry;
 777
 778	BUG_ON(iommu->cmd_buf == NULL);
 779
 780	entry = iommu_virt_to_phys(iommu->cmd_buf);
 781	entry |= MMIO_CMD_SIZE_512;
 782
 783	memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
 784		    &entry, sizeof(entry));
 785
 786	amd_iommu_reset_cmd_buffer(iommu);
 787}
 788
 789/*
 790 * This function disables the command buffer
 791 */
 792static void iommu_disable_command_buffer(struct amd_iommu *iommu)
 793{
 794	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 795}
 796
 797static void __init free_command_buffer(struct amd_iommu *iommu)
 798{
 799	free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
 800}
 801
 802static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
 803					 gfp_t gfp, size_t size)
 804{
 805	int order = get_order(size);
 806	void *buf = (void *)__get_free_pages(gfp, order);
 807
 808	if (buf &&
 809	    check_feature_on_all_iommus(FEATURE_SNP) &&
 810	    set_memory_4k((unsigned long)buf, (1 << order))) {
 811		free_pages((unsigned long)buf, order);
 812		buf = NULL;
 813	}
 814
 815	return buf;
 816}
 817
 818/* allocates the memory where the IOMMU will log its events to */
 819static int __init alloc_event_buffer(struct amd_iommu *iommu)
 820{
 821	iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
 822					      EVT_BUFFER_SIZE);
 823
 824	return iommu->evt_buf ? 0 : -ENOMEM;
 825}
 826
 827static void iommu_enable_event_buffer(struct amd_iommu *iommu)
 828{
 829	u64 entry;
 830
 831	BUG_ON(iommu->evt_buf == NULL);
 832
 833	entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
 834
 835	memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
 836		    &entry, sizeof(entry));
 837
 838	/* set head and tail to zero manually */
 839	writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 840	writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
 841
 842	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 843}
 844
 845/*
 846 * This function disables the event log buffer
 847 */
 848static void iommu_disable_event_buffer(struct amd_iommu *iommu)
 849{
 850	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
 851}
 852
 853static void __init free_event_buffer(struct amd_iommu *iommu)
 854{
 855	free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
 856}
 857
 858/* allocates the memory where the IOMMU will log its events to */
 859static int __init alloc_ppr_log(struct amd_iommu *iommu)
 860{
 861	iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
 862					      PPR_LOG_SIZE);
 863
 864	return iommu->ppr_log ? 0 : -ENOMEM;
 865}
 866
 867static void iommu_enable_ppr_log(struct amd_iommu *iommu)
 868{
 869	u64 entry;
 870
 871	if (iommu->ppr_log == NULL)
 872		return;
 873
 874	entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
 875
 876	memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
 877		    &entry, sizeof(entry));
 878
 879	/* set head and tail to zero manually */
 880	writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 881	writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
 882
 883	iommu_feature_enable(iommu, CONTROL_PPRLOG_EN);
 884	iommu_feature_enable(iommu, CONTROL_PPR_EN);
 885}
 886
 887static void __init free_ppr_log(struct amd_iommu *iommu)
 888{
 889	free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
 890}
 891
 892static void free_ga_log(struct amd_iommu *iommu)
 893{
 894#ifdef CONFIG_IRQ_REMAP
 895	free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE));
 896	free_pages((unsigned long)iommu->ga_log_tail, get_order(8));
 897#endif
 898}
 899
 900#ifdef CONFIG_IRQ_REMAP
 901static int iommu_ga_log_enable(struct amd_iommu *iommu)
 902{
 903	u32 status, i;
 904	u64 entry;
 905
 906	if (!iommu->ga_log)
 907		return -EINVAL;
 908
 909	entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
 910	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
 911		    &entry, sizeof(entry));
 912	entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
 913		 (BIT_ULL(52)-1)) & ~7ULL;
 914	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
 915		    &entry, sizeof(entry));
 916	writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
 917	writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
 918
 919
 920	iommu_feature_enable(iommu, CONTROL_GAINT_EN);
 921	iommu_feature_enable(iommu, CONTROL_GALOG_EN);
 922
 923	for (i = 0; i < LOOP_TIMEOUT; ++i) {
 924		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 925		if (status & (MMIO_STATUS_GALOG_RUN_MASK))
 926			break;
 927		udelay(10);
 928	}
 929
 930	if (WARN_ON(i >= LOOP_TIMEOUT))
 931		return -EINVAL;
 932
 933	return 0;
 934}
 935
 936static int iommu_init_ga_log(struct amd_iommu *iommu)
 937{
 938	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
 939		return 0;
 940
 941	iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 942					get_order(GA_LOG_SIZE));
 943	if (!iommu->ga_log)
 944		goto err_out;
 945
 946	iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 947					get_order(8));
 948	if (!iommu->ga_log_tail)
 949		goto err_out;
 950
 951	return 0;
 952err_out:
 953	free_ga_log(iommu);
 954	return -EINVAL;
 955}
 956#endif /* CONFIG_IRQ_REMAP */
 957
 958static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
 959{
 960	iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1);
 961
 962	return iommu->cmd_sem ? 0 : -ENOMEM;
 963}
 964
 965static void __init free_cwwb_sem(struct amd_iommu *iommu)
 966{
 967	if (iommu->cmd_sem)
 968		free_page((unsigned long)iommu->cmd_sem);
 969}
 970
 971static void iommu_enable_xt(struct amd_iommu *iommu)
 972{
 973#ifdef CONFIG_IRQ_REMAP
 974	/*
 975	 * XT mode (32-bit APIC destination ID) requires
 976	 * GA mode (128-bit IRTE support) as a prerequisite.
 977	 */
 978	if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
 979	    amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
 980		iommu_feature_enable(iommu, CONTROL_XT_EN);
 981#endif /* CONFIG_IRQ_REMAP */
 982}
 983
 984static void iommu_enable_gt(struct amd_iommu *iommu)
 985{
 986	if (!iommu_feature(iommu, FEATURE_GT))
 987		return;
 988
 989	iommu_feature_enable(iommu, CONTROL_GT_EN);
 990}
 991
 992/* sets a specific bit in the device table entry. */
 993static void __set_dev_entry_bit(struct dev_table_entry *dev_table,
 994				u16 devid, u8 bit)
 995{
 996	int i = (bit >> 6) & 0x03;
 997	int _bit = bit & 0x3f;
 998
 999	dev_table[devid].data[i] |= (1UL << _bit);
1000}
1001
1002static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
1003{
1004	struct dev_table_entry *dev_table = get_dev_table(iommu);
1005
1006	return __set_dev_entry_bit(dev_table, devid, bit);
1007}
1008
1009static int __get_dev_entry_bit(struct dev_table_entry *dev_table,
1010			       u16 devid, u8 bit)
1011{
1012	int i = (bit >> 6) & 0x03;
1013	int _bit = bit & 0x3f;
1014
1015	return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
1016}
1017
1018static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
1019{
1020	struct dev_table_entry *dev_table = get_dev_table(iommu);
1021
1022	return __get_dev_entry_bit(dev_table, devid, bit);
1023}
1024
1025static bool __copy_device_table(struct amd_iommu *iommu)
1026{
1027	u64 int_ctl, int_tab_len, entry = 0;
1028	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1029	struct dev_table_entry *old_devtb = NULL;
1030	u32 lo, hi, devid, old_devtb_size;
1031	phys_addr_t old_devtb_phys;
1032	u16 dom_id, dte_v, irq_v;
1033	gfp_t gfp_flag;
1034	u64 tmp;
1035
1036	/* Each IOMMU use separate device table with the same size */
1037	lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
1038	hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
1039	entry = (((u64) hi) << 32) + lo;
1040
1041	old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
1042	if (old_devtb_size != pci_seg->dev_table_size) {
1043		pr_err("The device table size of IOMMU:%d is not expected!\n",
1044			iommu->index);
1045		return false;
1046	}
1047
1048	/*
1049	 * When SME is enabled in the first kernel, the entry includes the
1050	 * memory encryption mask(sme_me_mask), we must remove the memory
1051	 * encryption mask to obtain the true physical address in kdump kernel.
1052	 */
1053	old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
1054
1055	if (old_devtb_phys >= 0x100000000ULL) {
1056		pr_err("The address of old device table is above 4G, not trustworthy!\n");
1057		return false;
1058	}
1059	old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel())
1060		    ? (__force void *)ioremap_encrypted(old_devtb_phys,
1061							pci_seg->dev_table_size)
1062		    : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB);
1063
1064	if (!old_devtb)
1065		return false;
1066
1067	gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;
1068	pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
1069						    get_order(pci_seg->dev_table_size));
1070	if (pci_seg->old_dev_tbl_cpy == NULL) {
1071		pr_err("Failed to allocate memory for copying old device table!\n");
1072		memunmap(old_devtb);
1073		return false;
1074	}
1075
1076	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
1077		pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid];
1078		dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
1079		dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
1080
1081		if (dte_v && dom_id) {
1082			pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
1083			pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
1084			__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
 
 
 
 
 
1085			/* If gcr3 table existed, mask it out */
1086			if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
1087				tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
1088				tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
1089				pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
1090				tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
1091				tmp |= DTE_FLAG_GV;
1092				pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
1093			}
1094		}
1095
1096		irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
1097		int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
1098		int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
1099		if (irq_v && (int_ctl || int_tab_len)) {
1100			if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
1101			    (int_tab_len != DTE_INTTABLEN)) {
1102				pr_err("Wrong old irq remapping flag: %#x\n", devid);
1103				memunmap(old_devtb);
1104				return false;
1105			}
1106
1107			pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
1108		}
1109	}
1110	memunmap(old_devtb);
1111
1112	return true;
1113}
1114
1115static bool copy_device_table(void)
1116{
1117	struct amd_iommu *iommu;
1118	struct amd_iommu_pci_seg *pci_seg;
1119
1120	if (!amd_iommu_pre_enabled)
1121		return false;
1122
1123	pr_warn("Translation is already enabled - trying to copy translation structures\n");
1124
1125	/*
1126	 * All IOMMUs within PCI segment shares common device table.
1127	 * Hence copy device table only once per PCI segment.
1128	 */
1129	for_each_pci_segment(pci_seg) {
1130		for_each_iommu(iommu) {
1131			if (pci_seg->id != iommu->pci_seg->id)
1132				continue;
1133			if (!__copy_device_table(iommu))
1134				return false;
1135			break;
1136		}
1137	}
1138
1139	return true;
1140}
1141
1142void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid)
1143{
1144	int sysmgt;
1145
1146	sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) |
1147		 (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1);
1148
1149	if (sysmgt == 0x01)
1150		set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW);
1151}
1152
1153/*
1154 * This function takes the device specific flags read from the ACPI
1155 * table and sets up the device table entry with that information
1156 */
1157static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
1158					   u16 devid, u32 flags, u32 ext_flags)
1159{
1160	if (flags & ACPI_DEVFLAG_INITPASS)
1161		set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
1162	if (flags & ACPI_DEVFLAG_EXTINT)
1163		set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
1164	if (flags & ACPI_DEVFLAG_NMI)
1165		set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
1166	if (flags & ACPI_DEVFLAG_SYSMGT1)
1167		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
1168	if (flags & ACPI_DEVFLAG_SYSMGT2)
1169		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
1170	if (flags & ACPI_DEVFLAG_LINT0)
1171		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
1172	if (flags & ACPI_DEVFLAG_LINT1)
1173		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
1174
1175	amd_iommu_apply_erratum_63(iommu, devid);
1176
1177	amd_iommu_set_rlookup_table(iommu, devid);
1178}
1179
1180int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
1181{
1182	struct devid_map *entry;
1183	struct list_head *list;
1184
1185	if (type == IVHD_SPECIAL_IOAPIC)
1186		list = &ioapic_map;
1187	else if (type == IVHD_SPECIAL_HPET)
1188		list = &hpet_map;
1189	else
1190		return -EINVAL;
1191
1192	list_for_each_entry(entry, list, list) {
1193		if (!(entry->id == id && entry->cmd_line))
1194			continue;
1195
1196		pr_info("Command-line override present for %s id %d - ignoring\n",
1197			type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1198
1199		*devid = entry->devid;
1200
1201		return 0;
1202	}
1203
1204	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1205	if (!entry)
1206		return -ENOMEM;
1207
1208	entry->id	= id;
1209	entry->devid	= *devid;
1210	entry->cmd_line	= cmd_line;
1211
1212	list_add_tail(&entry->list, list);
1213
1214	return 0;
1215}
1216
1217static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
1218				      bool cmd_line)
1219{
1220	struct acpihid_map_entry *entry;
1221	struct list_head *list = &acpihid_map;
1222
1223	list_for_each_entry(entry, list, list) {
1224		if (strcmp(entry->hid, hid) ||
1225		    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1226		    !entry->cmd_line)
1227			continue;
1228
1229		pr_info("Command-line override for hid:%s uid:%s\n",
1230			hid, uid);
1231		*devid = entry->devid;
1232		return 0;
1233	}
1234
1235	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1236	if (!entry)
1237		return -ENOMEM;
1238
1239	memcpy(entry->uid, uid, strlen(uid));
1240	memcpy(entry->hid, hid, strlen(hid));
1241	entry->devid = *devid;
1242	entry->cmd_line	= cmd_line;
1243	entry->root_devid = (entry->devid & (~0x7));
1244
1245	pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
1246		entry->cmd_line ? "cmd" : "ivrs",
1247		entry->hid, entry->uid, entry->root_devid);
1248
1249	list_add_tail(&entry->list, list);
1250	return 0;
1251}
1252
1253static int __init add_early_maps(void)
1254{
1255	int i, ret;
1256
1257	for (i = 0; i < early_ioapic_map_size; ++i) {
1258		ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1259					 early_ioapic_map[i].id,
1260					 &early_ioapic_map[i].devid,
1261					 early_ioapic_map[i].cmd_line);
1262		if (ret)
1263			return ret;
1264	}
1265
1266	for (i = 0; i < early_hpet_map_size; ++i) {
1267		ret = add_special_device(IVHD_SPECIAL_HPET,
1268					 early_hpet_map[i].id,
1269					 &early_hpet_map[i].devid,
1270					 early_hpet_map[i].cmd_line);
1271		if (ret)
1272			return ret;
1273	}
1274
1275	for (i = 0; i < early_acpihid_map_size; ++i) {
1276		ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1277					  early_acpihid_map[i].uid,
1278					  &early_acpihid_map[i].devid,
1279					  early_acpihid_map[i].cmd_line);
1280		if (ret)
1281			return ret;
1282	}
1283
1284	return 0;
1285}
1286
1287/*
1288 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1289 * initializes the hardware and our data structures with it.
1290 */
1291static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1292					struct ivhd_header *h)
1293{
1294	u8 *p = (u8 *)h;
1295	u8 *end = p, flags = 0;
1296	u16 devid = 0, devid_start = 0, devid_to = 0, seg_id;
1297	u32 dev_i, ext_flags = 0;
1298	bool alias = false;
1299	struct ivhd_entry *e;
1300	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1301	u32 ivhd_size;
1302	int ret;
1303
1304
1305	ret = add_early_maps();
1306	if (ret)
1307		return ret;
1308
1309	amd_iommu_apply_ivrs_quirks();
1310
1311	/*
1312	 * First save the recommended feature enable bits from ACPI
1313	 */
1314	iommu->acpi_flags = h->flags;
1315
1316	/*
1317	 * Done. Now parse the device entries
1318	 */
1319	ivhd_size = get_ivhd_header_size(h);
1320	if (!ivhd_size) {
1321		pr_err("Unsupported IVHD type %#x\n", h->type);
1322		return -EINVAL;
1323	}
1324
1325	p += ivhd_size;
1326
1327	end += h->length;
1328
1329
1330	while (p < end) {
1331		e = (struct ivhd_entry *)p;
1332		seg_id = pci_seg->id;
1333
1334		switch (e->type) {
1335		case IVHD_DEV_ALL:
1336
1337			DUMP_printk("  DEV_ALL\t\t\tflags: %02x\n", e->flags);
1338
1339			for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i)
1340				set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
1341			break;
1342		case IVHD_DEV_SELECT:
1343
1344			DUMP_printk("  DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x "
1345				    "flags: %02x\n",
1346				    seg_id, PCI_BUS_NUM(e->devid),
1347				    PCI_SLOT(e->devid),
1348				    PCI_FUNC(e->devid),
1349				    e->flags);
1350
1351			devid = e->devid;
1352			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1353			break;
1354		case IVHD_DEV_SELECT_RANGE_START:
1355
1356			DUMP_printk("  DEV_SELECT_RANGE_START\t "
1357				    "devid: %04x:%02x:%02x.%x flags: %02x\n",
1358				    seg_id, PCI_BUS_NUM(e->devid),
1359				    PCI_SLOT(e->devid),
1360				    PCI_FUNC(e->devid),
1361				    e->flags);
1362
1363			devid_start = e->devid;
1364			flags = e->flags;
1365			ext_flags = 0;
1366			alias = false;
1367			break;
1368		case IVHD_DEV_ALIAS:
1369
1370			DUMP_printk("  DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x "
1371				    "flags: %02x devid_to: %02x:%02x.%x\n",
1372				    seg_id, PCI_BUS_NUM(e->devid),
1373				    PCI_SLOT(e->devid),
1374				    PCI_FUNC(e->devid),
1375				    e->flags,
1376				    PCI_BUS_NUM(e->ext >> 8),
1377				    PCI_SLOT(e->ext >> 8),
1378				    PCI_FUNC(e->ext >> 8));
1379
1380			devid = e->devid;
1381			devid_to = e->ext >> 8;
1382			set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
1383			set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1384			pci_seg->alias_table[devid] = devid_to;
1385			break;
1386		case IVHD_DEV_ALIAS_RANGE:
1387
1388			DUMP_printk("  DEV_ALIAS_RANGE\t\t "
1389				    "devid: %04x:%02x:%02x.%x flags: %02x "
1390				    "devid_to: %04x:%02x:%02x.%x\n",
1391				    seg_id, PCI_BUS_NUM(e->devid),
1392				    PCI_SLOT(e->devid),
1393				    PCI_FUNC(e->devid),
1394				    e->flags,
1395				    seg_id, PCI_BUS_NUM(e->ext >> 8),
1396				    PCI_SLOT(e->ext >> 8),
1397				    PCI_FUNC(e->ext >> 8));
1398
1399			devid_start = e->devid;
1400			flags = e->flags;
1401			devid_to = e->ext >> 8;
1402			ext_flags = 0;
1403			alias = true;
1404			break;
1405		case IVHD_DEV_EXT_SELECT:
1406
1407			DUMP_printk("  DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x "
1408				    "flags: %02x ext: %08x\n",
1409				    seg_id, PCI_BUS_NUM(e->devid),
1410				    PCI_SLOT(e->devid),
1411				    PCI_FUNC(e->devid),
1412				    e->flags, e->ext);
1413
1414			devid = e->devid;
1415			set_dev_entry_from_acpi(iommu, devid, e->flags,
1416						e->ext);
1417			break;
1418		case IVHD_DEV_EXT_SELECT_RANGE:
1419
1420			DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
1421				    "%04x:%02x:%02x.%x flags: %02x ext: %08x\n",
1422				    seg_id, PCI_BUS_NUM(e->devid),
1423				    PCI_SLOT(e->devid),
1424				    PCI_FUNC(e->devid),
1425				    e->flags, e->ext);
1426
1427			devid_start = e->devid;
1428			flags = e->flags;
1429			ext_flags = e->ext;
1430			alias = false;
1431			break;
1432		case IVHD_DEV_RANGE_END:
1433
1434			DUMP_printk("  DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n",
1435				    seg_id, PCI_BUS_NUM(e->devid),
1436				    PCI_SLOT(e->devid),
1437				    PCI_FUNC(e->devid));
1438
1439			devid = e->devid;
1440			for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
1441				if (alias) {
1442					pci_seg->alias_table[dev_i] = devid_to;
1443					set_dev_entry_from_acpi(iommu,
1444						devid_to, flags, ext_flags);
1445				}
1446				set_dev_entry_from_acpi(iommu, dev_i,
1447							flags, ext_flags);
1448			}
1449			break;
1450		case IVHD_DEV_SPECIAL: {
1451			u8 handle, type;
1452			const char *var;
1453			u32 devid;
1454			int ret;
1455
1456			handle = e->ext & 0xff;
1457			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8));
1458			type   = (e->ext >> 24) & 0xff;
1459
1460			if (type == IVHD_SPECIAL_IOAPIC)
1461				var = "IOAPIC";
1462			else if (type == IVHD_SPECIAL_HPET)
1463				var = "HPET";
1464			else
1465				var = "UNKNOWN";
1466
1467			DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n",
1468				    var, (int)handle,
1469				    seg_id, PCI_BUS_NUM(devid),
1470				    PCI_SLOT(devid),
1471				    PCI_FUNC(devid));
1472
1473			ret = add_special_device(type, handle, &devid, false);
1474			if (ret)
1475				return ret;
1476
1477			/*
1478			 * add_special_device might update the devid in case a
1479			 * command-line override is present. So call
1480			 * set_dev_entry_from_acpi after add_special_device.
1481			 */
1482			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1483
1484			break;
1485		}
1486		case IVHD_DEV_ACPI_HID: {
1487			u32 devid;
1488			u8 hid[ACPIHID_HID_LEN];
1489			u8 uid[ACPIHID_UID_LEN];
1490			int ret;
1491
1492			if (h->type != 0x40) {
1493				pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1494				       e->type);
1495				break;
1496			}
1497
1498			BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1);
1499			memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1);
1500			hid[ACPIHID_HID_LEN - 1] = '\0';
1501
1502			if (!(*hid)) {
1503				pr_err(FW_BUG "Invalid HID.\n");
1504				break;
1505			}
1506
1507			uid[0] = '\0';
1508			switch (e->uidf) {
1509			case UID_NOT_PRESENT:
1510
1511				if (e->uidl != 0)
1512					pr_warn(FW_BUG "Invalid UID length.\n");
1513
1514				break;
1515			case UID_IS_INTEGER:
1516
1517				sprintf(uid, "%d", e->uid);
1518
1519				break;
1520			case UID_IS_CHARACTER:
1521
1522				memcpy(uid, &e->uid, e->uidl);
1523				uid[e->uidl] = '\0';
1524
1525				break;
1526			default:
1527				break;
1528			}
1529
1530			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
1531			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n",
1532				    hid, uid, seg_id,
1533				    PCI_BUS_NUM(devid),
1534				    PCI_SLOT(devid),
1535				    PCI_FUNC(devid));
1536
1537			flags = e->flags;
1538
1539			ret = add_acpi_hid_device(hid, uid, &devid, false);
1540			if (ret)
1541				return ret;
1542
1543			/*
1544			 * add_special_device might update the devid in case a
1545			 * command-line override is present. So call
1546			 * set_dev_entry_from_acpi after add_special_device.
1547			 */
1548			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1549
1550			break;
1551		}
1552		default:
1553			break;
1554		}
1555
1556		p += ivhd_entry_length(p);
1557	}
1558
1559	return 0;
1560}
1561
1562/* Allocate PCI segment data structure */
1563static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
1564					  struct acpi_table_header *ivrs_base)
1565{
1566	struct amd_iommu_pci_seg *pci_seg;
1567	int last_bdf;
1568
1569	/*
1570	 * First parse ACPI tables to find the largest Bus/Dev/Func we need to
1571	 * handle in this PCI segment. Upon this information the shared data
1572	 * structures for the PCI segments in the system will be allocated.
1573	 */
1574	last_bdf = find_last_devid_acpi(ivrs_base, id);
1575	if (last_bdf < 0)
1576		return NULL;
1577
1578	pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL);
1579	if (pci_seg == NULL)
1580		return NULL;
1581
1582	pci_seg->last_bdf = last_bdf;
1583	DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf);
1584	pci_seg->dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf);
1585	pci_seg->alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf);
1586	pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf);
1587
1588	pci_seg->id = id;
1589	init_llist_head(&pci_seg->dev_data_list);
1590	INIT_LIST_HEAD(&pci_seg->unity_map);
1591	list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list);
1592
1593	if (alloc_dev_table(pci_seg))
1594		return NULL;
1595	if (alloc_alias_table(pci_seg))
1596		return NULL;
1597	if (alloc_rlookup_table(pci_seg))
1598		return NULL;
1599
1600	return pci_seg;
1601}
1602
1603static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id,
1604					struct acpi_table_header *ivrs_base)
1605{
1606	struct amd_iommu_pci_seg *pci_seg;
1607
1608	for_each_pci_segment(pci_seg) {
1609		if (pci_seg->id == id)
1610			return pci_seg;
1611	}
1612
1613	return alloc_pci_segment(id, ivrs_base);
1614}
1615
1616static void __init free_pci_segments(void)
1617{
1618	struct amd_iommu_pci_seg *pci_seg, *next;
1619
1620	for_each_pci_segment_safe(pci_seg, next) {
1621		list_del(&pci_seg->list);
1622		free_irq_lookup_table(pci_seg);
1623		free_rlookup_table(pci_seg);
1624		free_alias_table(pci_seg);
1625		free_dev_table(pci_seg);
1626		kfree(pci_seg);
1627	}
1628}
1629
 
 
 
 
 
 
 
 
1630static void __init free_iommu_one(struct amd_iommu *iommu)
1631{
 
1632	free_cwwb_sem(iommu);
1633	free_command_buffer(iommu);
1634	free_event_buffer(iommu);
1635	free_ppr_log(iommu);
1636	free_ga_log(iommu);
1637	iommu_unmap_mmio_space(iommu);
 
1638}
1639
1640static void __init free_iommu_all(void)
1641{
1642	struct amd_iommu *iommu, *next;
1643
1644	for_each_iommu_safe(iommu, next) {
1645		list_del(&iommu->list);
1646		free_iommu_one(iommu);
1647		kfree(iommu);
1648	}
1649}
1650
1651/*
1652 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1653 * Workaround:
1654 *     BIOS should disable L2B micellaneous clock gating by setting
1655 *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1656 */
1657static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1658{
1659	u32 value;
1660
1661	if ((boot_cpu_data.x86 != 0x15) ||
1662	    (boot_cpu_data.x86_model < 0x10) ||
1663	    (boot_cpu_data.x86_model > 0x1f))
1664		return;
1665
1666	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1667	pci_read_config_dword(iommu->dev, 0xf4, &value);
1668
1669	if (value & BIT(2))
1670		return;
1671
1672	/* Select NB indirect register 0x90 and enable writing */
1673	pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1674
1675	pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1676	pci_info(iommu->dev, "Applying erratum 746 workaround\n");
1677
1678	/* Clear the enable writing bit */
1679	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1680}
1681
1682/*
1683 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1684 * Workaround:
1685 *     BIOS should enable ATS write permission check by setting
1686 *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1687 */
1688static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1689{
1690	u32 value;
1691
1692	if ((boot_cpu_data.x86 != 0x15) ||
1693	    (boot_cpu_data.x86_model < 0x30) ||
1694	    (boot_cpu_data.x86_model > 0x3f))
1695		return;
1696
1697	/* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1698	value = iommu_read_l2(iommu, 0x47);
1699
1700	if (value & BIT(0))
1701		return;
1702
1703	/* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1704	iommu_write_l2(iommu, 0x47, value | BIT(0));
1705
1706	pci_info(iommu->dev, "Applying ATS write check workaround\n");
1707}
1708
1709/*
1710 * This function glues the initialization function for one IOMMU
1711 * together and also allocates the command buffer and programs the
1712 * hardware. It does NOT enable the IOMMU. This is done afterwards.
1713 */
1714static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
1715				 struct acpi_table_header *ivrs_base)
1716{
1717	struct amd_iommu_pci_seg *pci_seg;
1718
1719	pci_seg = get_pci_segment(h->pci_seg, ivrs_base);
1720	if (pci_seg == NULL)
1721		return -ENOMEM;
1722	iommu->pci_seg = pci_seg;
1723
1724	raw_spin_lock_init(&iommu->lock);
1725	iommu->cmd_sem_val = 0;
1726
1727	/* Add IOMMU to internal data structures */
1728	list_add_tail(&iommu->list, &amd_iommu_list);
1729	iommu->index = amd_iommus_present++;
1730
1731	if (unlikely(iommu->index >= MAX_IOMMUS)) {
1732		WARN(1, "System has more IOMMUs than supported by this driver\n");
1733		return -ENOSYS;
1734	}
1735
1736	/* Index is fine - add IOMMU to the array */
1737	amd_iommus[iommu->index] = iommu;
1738
1739	/*
1740	 * Copy data from ACPI table entry to the iommu struct
1741	 */
1742	iommu->devid   = h->devid;
1743	iommu->cap_ptr = h->cap_ptr;
1744	iommu->mmio_phys = h->mmio_phys;
1745
1746	switch (h->type) {
1747	case 0x10:
1748		/* Check if IVHD EFR contains proper max banks/counters */
1749		if ((h->efr_attr != 0) &&
1750		    ((h->efr_attr & (0xF << 13)) != 0) &&
1751		    ((h->efr_attr & (0x3F << 17)) != 0))
1752			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1753		else
1754			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1755
1756		/*
1757		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1758		 * GAM also requires GA mode. Therefore, we need to
1759		 * check cmpxchg16b support before enabling it.
1760		 */
1761		if (!boot_cpu_has(X86_FEATURE_CX16) ||
1762		    ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
1763			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1764		break;
1765	case 0x11:
1766	case 0x40:
1767		if (h->efr_reg & (1 << 9))
1768			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1769		else
1770			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1771
1772		/*
1773		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1774		 * XT, GAM also requires GA mode. Therefore, we need to
1775		 * check cmpxchg16b support before enabling them.
1776		 */
1777		if (!boot_cpu_has(X86_FEATURE_CX16) ||
1778		    ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) {
1779			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1780			break;
1781		}
1782
1783		if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
1784			amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
1785
1786		early_iommu_features_init(iommu, h);
1787
1788		break;
1789	default:
1790		return -EINVAL;
1791	}
1792
1793	iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1794						iommu->mmio_phys_end);
1795	if (!iommu->mmio_base)
1796		return -ENOMEM;
1797
1798	return init_iommu_from_acpi(iommu, h);
1799}
1800
1801static int __init init_iommu_one_late(struct amd_iommu *iommu)
1802{
1803	int ret;
1804
1805	if (alloc_cwwb_sem(iommu))
1806		return -ENOMEM;
1807
1808	if (alloc_command_buffer(iommu))
1809		return -ENOMEM;
1810
1811	if (alloc_event_buffer(iommu))
1812		return -ENOMEM;
1813
1814	iommu->int_enabled = false;
1815
1816	init_translation_status(iommu);
1817	if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1818		iommu_disable(iommu);
1819		clear_translation_pre_enabled(iommu);
1820		pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1821			iommu->index);
1822	}
1823	if (amd_iommu_pre_enabled)
1824		amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1825
1826	if (amd_iommu_irq_remap) {
1827		ret = amd_iommu_create_irq_domain(iommu);
1828		if (ret)
1829			return ret;
1830	}
1831
1832	/*
1833	 * Make sure IOMMU is not considered to translate itself. The IVRS
1834	 * table tells us so, but this is a lie!
1835	 */
1836	iommu->pci_seg->rlookup_table[iommu->devid] = NULL;
1837
1838	return 0;
1839}
1840
1841/**
1842 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
1843 * @ivrs: Pointer to the IVRS header
1844 *
1845 * This function search through all IVDB of the maximum supported IVHD
1846 */
1847static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
1848{
1849	u8 *base = (u8 *)ivrs;
1850	struct ivhd_header *ivhd = (struct ivhd_header *)
1851					(base + IVRS_HEADER_LENGTH);
1852	u8 last_type = ivhd->type;
1853	u16 devid = ivhd->devid;
1854
1855	while (((u8 *)ivhd - base < ivrs->length) &&
1856	       (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
1857		u8 *p = (u8 *) ivhd;
1858
1859		if (ivhd->devid == devid)
1860			last_type = ivhd->type;
1861		ivhd = (struct ivhd_header *)(p + ivhd->length);
1862	}
1863
1864	return last_type;
1865}
1866
1867/*
1868 * Iterates over all IOMMU entries in the ACPI table, allocates the
1869 * IOMMU structure and initializes it with init_iommu_one()
1870 */
1871static int __init init_iommu_all(struct acpi_table_header *table)
1872{
1873	u8 *p = (u8 *)table, *end = (u8 *)table;
1874	struct ivhd_header *h;
1875	struct amd_iommu *iommu;
1876	int ret;
1877
1878	end += table->length;
1879	p += IVRS_HEADER_LENGTH;
1880
1881	/* Phase 1: Process all IVHD blocks */
1882	while (p < end) {
1883		h = (struct ivhd_header *)p;
1884		if (*p == amd_iommu_target_ivhd_type) {
1885
1886			DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x "
1887				    "flags: %01x info %04x\n",
1888				    h->pci_seg, PCI_BUS_NUM(h->devid),
1889				    PCI_SLOT(h->devid), PCI_FUNC(h->devid),
1890				    h->cap_ptr, h->flags, h->info);
1891			DUMP_printk("       mmio-addr: %016llx\n",
1892				    h->mmio_phys);
1893
1894			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1895			if (iommu == NULL)
1896				return -ENOMEM;
1897
1898			ret = init_iommu_one(iommu, h, table);
1899			if (ret)
1900				return ret;
1901		}
1902		p += h->length;
1903
1904	}
1905	WARN_ON(p != end);
1906
1907	/* Phase 2 : Early feature support check */
1908	get_global_efr();
1909
1910	/* Phase 3 : Enabling IOMMU features */
1911	for_each_iommu(iommu) {
1912		ret = init_iommu_one_late(iommu);
1913		if (ret)
1914			return ret;
1915	}
1916
1917	return 0;
1918}
1919
1920static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1921{
1922	u64 val;
1923	struct pci_dev *pdev = iommu->dev;
1924
1925	if (!iommu_feature(iommu, FEATURE_PC))
1926		return;
1927
1928	amd_iommu_pc_present = true;
1929
1930	pci_info(pdev, "IOMMU performance counters supported\n");
1931
1932	val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1933	iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1934	iommu->max_counters = (u8) ((val >> 7) & 0xf);
1935
1936	return;
1937}
1938
1939static ssize_t amd_iommu_show_cap(struct device *dev,
1940				  struct device_attribute *attr,
1941				  char *buf)
1942{
1943	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1944	return sprintf(buf, "%x\n", iommu->cap);
1945}
1946static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
1947
1948static ssize_t amd_iommu_show_features(struct device *dev,
1949				       struct device_attribute *attr,
1950				       char *buf)
1951{
1952	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1953	return sprintf(buf, "%llx:%llx\n", iommu->features2, iommu->features);
1954}
1955static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
1956
1957static struct attribute *amd_iommu_attrs[] = {
1958	&dev_attr_cap.attr,
1959	&dev_attr_features.attr,
1960	NULL,
1961};
1962
1963static struct attribute_group amd_iommu_group = {
1964	.name = "amd-iommu",
1965	.attrs = amd_iommu_attrs,
1966};
1967
1968static const struct attribute_group *amd_iommu_groups[] = {
1969	&amd_iommu_group,
1970	NULL,
1971};
1972
1973/*
1974 * Note: IVHD 0x11 and 0x40 also contains exact copy
1975 * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
1976 * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
1977 */
1978static void __init late_iommu_features_init(struct amd_iommu *iommu)
1979{
1980	u64 features, features2;
1981
1982	if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
1983		return;
1984
1985	/* read extended feature bits */
1986	features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
1987	features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
1988
1989	if (!iommu->features) {
1990		iommu->features = features;
1991		iommu->features2 = features2;
1992		return;
1993	}
1994
1995	/*
1996	 * Sanity check and warn if EFR values from
1997	 * IVHD and MMIO conflict.
1998	 */
1999	if (features != iommu->features ||
2000	    features2 != iommu->features2) {
2001		pr_warn(FW_WARN
2002			"EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
2003			features, iommu->features,
2004			features2, iommu->features2);
2005	}
2006}
2007
2008static int __init iommu_init_pci(struct amd_iommu *iommu)
2009{
2010	int cap_ptr = iommu->cap_ptr;
2011	int ret;
2012
2013	iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2014						 PCI_BUS_NUM(iommu->devid),
2015						 iommu->devid & 0xff);
2016	if (!iommu->dev)
2017		return -ENODEV;
2018
2019	/* Prevent binding other PCI device drivers to IOMMU devices */
2020	iommu->dev->match_driver = false;
2021
 
 
 
2022	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
2023			      &iommu->cap);
2024
2025	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
2026		amd_iommu_iotlb_sup = false;
2027
2028	late_iommu_features_init(iommu);
2029
2030	if (iommu_feature(iommu, FEATURE_GT)) {
2031		int glxval;
2032		u32 max_pasid;
2033		u64 pasmax;
2034
2035		pasmax = iommu->features & FEATURE_PASID_MASK;
2036		pasmax >>= FEATURE_PASID_SHIFT;
2037		max_pasid  = (1 << (pasmax + 1)) - 1;
2038
2039		amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
2040
2041		BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
2042
2043		glxval   = iommu->features & FEATURE_GLXVAL_MASK;
2044		glxval >>= FEATURE_GLXVAL_SHIFT;
2045
2046		if (amd_iommu_max_glx_val == -1)
2047			amd_iommu_max_glx_val = glxval;
2048		else
2049			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
2050	}
2051
2052	if (iommu_feature(iommu, FEATURE_GT) &&
2053	    iommu_feature(iommu, FEATURE_PPR)) {
2054		iommu->is_iommu_v2   = true;
2055		amd_iommu_v2_present = true;
2056	}
2057
2058	if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
2059		return -ENOMEM;
2060
2061	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
2062		pr_info("Using strict mode due to virtualization\n");
2063		iommu_set_dma_strict();
2064		amd_iommu_np_cache = true;
2065	}
2066
2067	init_iommu_perf_ctr(iommu);
2068
2069	if (amd_iommu_pgtable == AMD_IOMMU_V2) {
2070		if (!iommu_feature(iommu, FEATURE_GIOSUP) ||
2071		    !iommu_feature(iommu, FEATURE_GT)) {
2072			pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
2073			amd_iommu_pgtable = AMD_IOMMU_V1;
2074		} else if (iommu_default_passthrough()) {
2075			pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n");
2076			amd_iommu_pgtable = AMD_IOMMU_V1;
2077		}
2078	}
2079
2080	if (is_rd890_iommu(iommu->dev)) {
2081		int i, j;
2082
2083		iommu->root_pdev =
2084			pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2085						    iommu->dev->bus->number,
2086						    PCI_DEVFN(0, 0));
2087
2088		/*
2089		 * Some rd890 systems may not be fully reconfigured by the
2090		 * BIOS, so it's necessary for us to store this information so
2091		 * it can be reprogrammed on resume
2092		 */
2093		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
2094				&iommu->stored_addr_lo);
2095		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
2096				&iommu->stored_addr_hi);
2097
2098		/* Low bit locks writes to configuration space */
2099		iommu->stored_addr_lo &= ~1;
2100
2101		for (i = 0; i < 6; i++)
2102			for (j = 0; j < 0x12; j++)
2103				iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
2104
2105		for (i = 0; i < 0x83; i++)
2106			iommu->stored_l2[i] = iommu_read_l2(iommu, i);
2107	}
2108
2109	amd_iommu_erratum_746_workaround(iommu);
2110	amd_iommu_ats_write_check_workaround(iommu);
2111
2112	ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
2113			       amd_iommu_groups, "ivhd%d", iommu->index);
2114	if (ret)
2115		return ret;
2116
 
 
 
 
 
 
 
 
 
 
2117	iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
2118
2119	return pci_enable_device(iommu->dev);
2120}
2121
2122static void print_iommu_info(void)
2123{
 
2124	static const char * const feat_str[] = {
2125		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
2126		"IA", "GA", "HE", "PC"
2127	};
2128	struct amd_iommu *iommu;
2129
2130	for_each_iommu(iommu) {
2131		struct pci_dev *pdev = iommu->dev;
2132		int i;
2133
2134		pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
2135
2136		if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
2137			pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2);
2138
2139			for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
2140				if (iommu_feature(iommu, (1ULL << i)))
2141					pr_cont(" %s", feat_str[i]);
2142			}
2143
2144			if (iommu->features & FEATURE_GAM_VAPIC)
2145				pr_cont(" GA_vAPIC");
2146
2147			if (iommu->features & FEATURE_SNP)
2148				pr_cont(" SNP");
2149
2150			pr_cont("\n");
2151		}
2152	}
2153	if (irq_remapping_enabled) {
2154		pr_info("Interrupt remapping enabled\n");
2155		if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2156			pr_info("X2APIC enabled\n");
2157	}
2158	if (amd_iommu_pgtable == AMD_IOMMU_V2)
2159		pr_info("V2 page table enabled\n");
 
 
2160}
2161
2162static int __init amd_iommu_init_pci(void)
2163{
2164	struct amd_iommu *iommu;
2165	struct amd_iommu_pci_seg *pci_seg;
2166	int ret;
2167
 
 
 
2168	for_each_iommu(iommu) {
2169		ret = iommu_init_pci(iommu);
2170		if (ret) {
2171			pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n",
2172			       iommu->index, ret);
2173			goto out;
2174		}
2175		/* Need to setup range after PCI init */
2176		iommu_set_cwwb_range(iommu);
2177	}
2178
2179	/*
2180	 * Order is important here to make sure any unity map requirements are
2181	 * fulfilled. The unity mappings are created and written to the device
2182	 * table during the iommu_init_pci() call.
2183	 *
2184	 * After that we call init_device_table_dma() to make sure any
2185	 * uninitialized DTE will block DMA, and in the end we flush the caches
2186	 * of all IOMMUs to make sure the changes to the device table are
2187	 * active.
2188	 */
2189	for_each_pci_segment(pci_seg)
2190		init_device_table_dma(pci_seg);
2191
2192	for_each_iommu(iommu)
2193		iommu_flush_all_caches(iommu);
2194
2195	print_iommu_info();
2196
2197out:
2198	return ret;
2199}
2200
2201/****************************************************************************
2202 *
2203 * The following functions initialize the MSI interrupts for all IOMMUs
2204 * in the system. It's a bit challenging because there could be multiple
2205 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
2206 * pci_dev.
2207 *
2208 ****************************************************************************/
2209
2210static int iommu_setup_msi(struct amd_iommu *iommu)
2211{
2212	int r;
2213
2214	r = pci_enable_msi(iommu->dev);
2215	if (r)
2216		return r;
2217
2218	r = request_threaded_irq(iommu->dev->irq,
2219				 amd_iommu_int_handler,
2220				 amd_iommu_int_thread,
2221				 0, "AMD-Vi",
2222				 iommu);
2223
2224	if (r) {
2225		pci_disable_msi(iommu->dev);
2226		return r;
2227	}
2228
2229	return 0;
2230}
2231
2232union intcapxt {
2233	u64	capxt;
2234	struct {
2235		u64	reserved_0		:  2,
2236			dest_mode_logical	:  1,
2237			reserved_1		:  5,
2238			destid_0_23		: 24,
2239			vector			:  8,
2240			reserved_2		: 16,
2241			destid_24_31		:  8;
2242	};
2243} __attribute__ ((packed));
2244
2245
2246static struct irq_chip intcapxt_controller;
2247
2248static int intcapxt_irqdomain_activate(struct irq_domain *domain,
2249				       struct irq_data *irqd, bool reserve)
2250{
2251	return 0;
2252}
2253
2254static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
2255					  struct irq_data *irqd)
2256{
2257}
2258
2259
2260static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
2261				    unsigned int nr_irqs, void *arg)
2262{
2263	struct irq_alloc_info *info = arg;
2264	int i, ret;
2265
2266	if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI)
2267		return -EINVAL;
2268
2269	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
2270	if (ret < 0)
2271		return ret;
2272
2273	for (i = virq; i < virq + nr_irqs; i++) {
2274		struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
2275
2276		irqd->chip = &intcapxt_controller;
 
2277		irqd->chip_data = info->data;
2278		__irq_set_handler(i, handle_edge_irq, 0, "edge");
2279	}
2280
2281	return ret;
2282}
2283
2284static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2285				    unsigned int nr_irqs)
2286{
2287	irq_domain_free_irqs_top(domain, virq, nr_irqs);
2288}
2289
2290
2291static void intcapxt_unmask_irq(struct irq_data *irqd)
2292{
2293	struct amd_iommu *iommu = irqd->chip_data;
2294	struct irq_cfg *cfg = irqd_cfg(irqd);
2295	union intcapxt xt;
2296
2297	xt.capxt = 0ULL;
2298	xt.dest_mode_logical = apic->dest_mode_logical;
2299	xt.vector = cfg->vector;
2300	xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
2301	xt.destid_24_31 = cfg->dest_apicid >> 24;
2302
2303	/**
2304	 * Current IOMMU implementation uses the same IRQ for all
2305	 * 3 IOMMU interrupts.
2306	 */
2307	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
2308	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
2309	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
2310}
2311
2312static void intcapxt_mask_irq(struct irq_data *irqd)
2313{
2314	struct amd_iommu *iommu = irqd->chip_data;
2315
2316	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
2317	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
2318	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
2319}
2320
2321
2322static int intcapxt_set_affinity(struct irq_data *irqd,
2323				 const struct cpumask *mask, bool force)
2324{
2325	struct irq_data *parent = irqd->parent_data;
2326	int ret;
2327
2328	ret = parent->chip->irq_set_affinity(parent, mask, force);
2329	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
2330		return ret;
2331	return 0;
2332}
2333
2334static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on)
2335{
2336	return on ? -EOPNOTSUPP : 0;
2337}
2338
2339static struct irq_chip intcapxt_controller = {
2340	.name			= "IOMMU-MSI",
2341	.irq_unmask		= intcapxt_unmask_irq,
2342	.irq_mask		= intcapxt_mask_irq,
2343	.irq_ack		= irq_chip_ack_parent,
2344	.irq_retrigger		= irq_chip_retrigger_hierarchy,
2345	.irq_set_affinity       = intcapxt_set_affinity,
2346	.irq_set_wake		= intcapxt_set_wake,
2347	.flags			= IRQCHIP_MASK_ON_SUSPEND,
2348};
2349
2350static const struct irq_domain_ops intcapxt_domain_ops = {
2351	.alloc			= intcapxt_irqdomain_alloc,
2352	.free			= intcapxt_irqdomain_free,
2353	.activate		= intcapxt_irqdomain_activate,
2354	.deactivate		= intcapxt_irqdomain_deactivate,
2355};
2356
2357
2358static struct irq_domain *iommu_irqdomain;
2359
2360static struct irq_domain *iommu_get_irqdomain(void)
2361{
2362	struct fwnode_handle *fn;
2363
2364	/* No need for locking here (yet) as the init is single-threaded */
2365	if (iommu_irqdomain)
2366		return iommu_irqdomain;
2367
2368	fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI");
2369	if (!fn)
2370		return NULL;
2371
2372	iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0,
2373						      fn, &intcapxt_domain_ops,
2374						      NULL);
2375	if (!iommu_irqdomain)
2376		irq_domain_free_fwnode(fn);
2377
2378	return iommu_irqdomain;
2379}
2380
2381static int iommu_setup_intcapxt(struct amd_iommu *iommu)
 
2382{
2383	struct irq_domain *domain;
2384	struct irq_alloc_info info;
2385	int irq, ret;
 
2386
2387	domain = iommu_get_irqdomain();
2388	if (!domain)
2389		return -ENXIO;
2390
2391	init_irq_alloc_info(&info, NULL);
2392	info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
2393	info.data = iommu;
 
2394
2395	irq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info);
2396	if (irq < 0) {
2397		irq_domain_remove(domain);
2398		return irq;
2399	}
2400
2401	ret = request_threaded_irq(irq, amd_iommu_int_handler,
2402				   amd_iommu_int_thread, 0, "AMD-Vi", iommu);
2403	if (ret) {
2404		irq_domain_free_irqs(irq, 1);
2405		irq_domain_remove(domain);
2406		return ret;
2407	}
2408
2409	return 0;
2410}
2411
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2412static int iommu_init_irq(struct amd_iommu *iommu)
2413{
2414	int ret;
2415
2416	if (iommu->int_enabled)
2417		goto enable_faults;
2418
2419	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2420		ret = iommu_setup_intcapxt(iommu);
2421	else if (iommu->dev->msi_cap)
2422		ret = iommu_setup_msi(iommu);
2423	else
2424		ret = -ENODEV;
2425
2426	if (ret)
2427		return ret;
2428
2429	iommu->int_enabled = true;
2430enable_faults:
2431
2432	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2433		iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
2434
2435	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
2436
2437	if (iommu->ppr_log != NULL)
2438		iommu_feature_enable(iommu, CONTROL_PPRINT_EN);
2439	return 0;
2440}
2441
2442/****************************************************************************
2443 *
2444 * The next functions belong to the third pass of parsing the ACPI
2445 * table. In this last pass the memory mapping requirements are
2446 * gathered (like exclusion and unity mapping ranges).
2447 *
2448 ****************************************************************************/
2449
2450static void __init free_unity_maps(void)
2451{
2452	struct unity_map_entry *entry, *next;
2453	struct amd_iommu_pci_seg *p, *pci_seg;
2454
2455	for_each_pci_segment_safe(pci_seg, p) {
2456		list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) {
2457			list_del(&entry->list);
2458			kfree(entry);
2459		}
2460	}
2461}
2462
2463/* called for unity map ACPI definition */
2464static int __init init_unity_map_range(struct ivmd_header *m,
2465				       struct acpi_table_header *ivrs_base)
2466{
2467	struct unity_map_entry *e = NULL;
2468	struct amd_iommu_pci_seg *pci_seg;
2469	char *s;
2470
2471	pci_seg = get_pci_segment(m->pci_seg, ivrs_base);
2472	if (pci_seg == NULL)
2473		return -ENOMEM;
2474
2475	e = kzalloc(sizeof(*e), GFP_KERNEL);
2476	if (e == NULL)
2477		return -ENOMEM;
2478
2479	switch (m->type) {
2480	default:
2481		kfree(e);
2482		return 0;
2483	case ACPI_IVMD_TYPE:
2484		s = "IVMD_TYPEi\t\t\t";
2485		e->devid_start = e->devid_end = m->devid;
2486		break;
2487	case ACPI_IVMD_TYPE_ALL:
2488		s = "IVMD_TYPE_ALL\t\t";
2489		e->devid_start = 0;
2490		e->devid_end = pci_seg->last_bdf;
2491		break;
2492	case ACPI_IVMD_TYPE_RANGE:
2493		s = "IVMD_TYPE_RANGE\t\t";
2494		e->devid_start = m->devid;
2495		e->devid_end = m->aux;
2496		break;
2497	}
2498	e->address_start = PAGE_ALIGN(m->range_start);
2499	e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2500	e->prot = m->flags >> 1;
2501
2502	/*
2503	 * Treat per-device exclusion ranges as r/w unity-mapped regions
2504	 * since some buggy BIOSes might lead to the overwritten exclusion
2505	 * range (exclusion_start and exclusion_length members). This
2506	 * happens when there are multiple exclusion ranges (IVMD entries)
2507	 * defined in ACPI table.
2508	 */
2509	if (m->flags & IVMD_FLAG_EXCL_RANGE)
2510		e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
2511
2512	DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: "
2513		    "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx"
2514		    " flags: %x\n", s, m->pci_seg,
2515		    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2516		    PCI_FUNC(e->devid_start), m->pci_seg,
2517		    PCI_BUS_NUM(e->devid_end),
2518		    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2519		    e->address_start, e->address_end, m->flags);
2520
2521	list_add_tail(&e->list, &pci_seg->unity_map);
2522
2523	return 0;
2524}
2525
2526/* iterates over all memory definitions we find in the ACPI table */
2527static int __init init_memory_definitions(struct acpi_table_header *table)
2528{
2529	u8 *p = (u8 *)table, *end = (u8 *)table;
2530	struct ivmd_header *m;
2531
2532	end += table->length;
2533	p += IVRS_HEADER_LENGTH;
2534
2535	while (p < end) {
2536		m = (struct ivmd_header *)p;
2537		if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
2538			init_unity_map_range(m, table);
2539
2540		p += m->length;
2541	}
2542
2543	return 0;
2544}
2545
2546/*
2547 * Init the device table to not allow DMA access for devices
2548 */
2549static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2550{
2551	u32 devid;
2552	struct dev_table_entry *dev_table = pci_seg->dev_table;
2553
2554	if (dev_table == NULL)
2555		return;
2556
2557	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2558		__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID);
2559		if (!amd_iommu_snp_en)
2560			__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION);
2561	}
2562}
2563
2564static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2565{
2566	u32 devid;
2567	struct dev_table_entry *dev_table = pci_seg->dev_table;
2568
2569	if (dev_table == NULL)
2570		return;
2571
2572	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2573		dev_table[devid].data[0] = 0ULL;
2574		dev_table[devid].data[1] = 0ULL;
2575	}
2576}
2577
2578static void init_device_table(void)
2579{
2580	struct amd_iommu_pci_seg *pci_seg;
2581	u32 devid;
2582
2583	if (!amd_iommu_irq_remap)
2584		return;
2585
2586	for_each_pci_segment(pci_seg) {
2587		for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
2588			__set_dev_entry_bit(pci_seg->dev_table,
2589					    devid, DEV_ENTRY_IRQ_TBL_EN);
2590	}
2591}
2592
2593static void iommu_init_flags(struct amd_iommu *iommu)
2594{
2595	iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2596		iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2597		iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2598
2599	iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2600		iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2601		iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2602
2603	iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2604		iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2605		iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2606
2607	iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2608		iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2609		iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2610
2611	/*
2612	 * make IOMMU memory accesses cache coherent
2613	 */
2614	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2615
2616	/* Set IOTLB invalidation timeout to 1s */
2617	iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
 
 
 
 
2618}
2619
2620static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2621{
2622	int i, j;
2623	u32 ioc_feature_control;
2624	struct pci_dev *pdev = iommu->root_pdev;
2625
2626	/* RD890 BIOSes may not have completely reconfigured the iommu */
2627	if (!is_rd890_iommu(iommu->dev) || !pdev)
2628		return;
2629
2630	/*
2631	 * First, we need to ensure that the iommu is enabled. This is
2632	 * controlled by a register in the northbridge
2633	 */
2634
2635	/* Select Northbridge indirect register 0x75 and enable writing */
2636	pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2637	pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2638
2639	/* Enable the iommu */
2640	if (!(ioc_feature_control & 0x1))
2641		pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2642
2643	/* Restore the iommu BAR */
2644	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2645			       iommu->stored_addr_lo);
2646	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2647			       iommu->stored_addr_hi);
2648
2649	/* Restore the l1 indirect regs for each of the 6 l1s */
2650	for (i = 0; i < 6; i++)
2651		for (j = 0; j < 0x12; j++)
2652			iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2653
2654	/* Restore the l2 indirect regs */
2655	for (i = 0; i < 0x83; i++)
2656		iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2657
2658	/* Lock PCI setup registers */
2659	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2660			       iommu->stored_addr_lo | 1);
2661}
2662
2663static void iommu_enable_ga(struct amd_iommu *iommu)
2664{
2665#ifdef CONFIG_IRQ_REMAP
2666	switch (amd_iommu_guest_ir) {
2667	case AMD_IOMMU_GUEST_IR_VAPIC:
2668	case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2669		iommu_feature_enable(iommu, CONTROL_GA_EN);
2670		iommu->irte_ops = &irte_128_ops;
2671		break;
2672	default:
2673		iommu->irte_ops = &irte_32_ops;
2674		break;
2675	}
2676#endif
2677}
2678
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2679static void early_enable_iommu(struct amd_iommu *iommu)
2680{
2681	iommu_disable(iommu);
2682	iommu_init_flags(iommu);
2683	iommu_set_device_table(iommu);
2684	iommu_enable_command_buffer(iommu);
2685	iommu_enable_event_buffer(iommu);
2686	iommu_set_exclusion_range(iommu);
 
2687	iommu_enable_ga(iommu);
2688	iommu_enable_xt(iommu);
 
2689	iommu_enable(iommu);
2690	iommu_flush_all_caches(iommu);
2691}
2692
2693/*
2694 * This function finally enables all IOMMUs found in the system after
2695 * they have been initialized.
2696 *
2697 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
2698 * the old content of device table entries. Not this case or copy failed,
2699 * just continue as normal kernel does.
2700 */
2701static void early_enable_iommus(void)
2702{
2703	struct amd_iommu *iommu;
2704	struct amd_iommu_pci_seg *pci_seg;
2705
2706	if (!copy_device_table()) {
2707		/*
2708		 * If come here because of failure in copying device table from old
2709		 * kernel with all IOMMUs enabled, print error message and try to
2710		 * free allocated old_dev_tbl_cpy.
2711		 */
2712		if (amd_iommu_pre_enabled)
2713			pr_err("Failed to copy DEV table from previous kernel.\n");
2714
2715		for_each_pci_segment(pci_seg) {
2716			if (pci_seg->old_dev_tbl_cpy != NULL) {
2717				free_pages((unsigned long)pci_seg->old_dev_tbl_cpy,
2718						get_order(pci_seg->dev_table_size));
2719				pci_seg->old_dev_tbl_cpy = NULL;
2720			}
2721		}
2722
2723		for_each_iommu(iommu) {
2724			clear_translation_pre_enabled(iommu);
2725			early_enable_iommu(iommu);
2726		}
2727	} else {
2728		pr_info("Copied DEV table from previous kernel.\n");
2729
2730		for_each_pci_segment(pci_seg) {
2731			free_pages((unsigned long)pci_seg->dev_table,
2732				   get_order(pci_seg->dev_table_size));
2733			pci_seg->dev_table = pci_seg->old_dev_tbl_cpy;
2734		}
2735
2736		for_each_iommu(iommu) {
2737			iommu_disable_command_buffer(iommu);
2738			iommu_disable_event_buffer(iommu);
 
2739			iommu_enable_command_buffer(iommu);
2740			iommu_enable_event_buffer(iommu);
2741			iommu_enable_ga(iommu);
2742			iommu_enable_xt(iommu);
 
2743			iommu_set_device_table(iommu);
2744			iommu_flush_all_caches(iommu);
2745		}
2746	}
2747}
2748
2749static void enable_iommus_v2(void)
2750{
2751	struct amd_iommu *iommu;
2752
2753	for_each_iommu(iommu) {
2754		iommu_enable_ppr_log(iommu);
2755		iommu_enable_gt(iommu);
2756	}
 
2757}
2758
2759static void enable_iommus_vapic(void)
2760{
2761#ifdef CONFIG_IRQ_REMAP
2762	u32 status, i;
2763	struct amd_iommu *iommu;
2764
2765	for_each_iommu(iommu) {
2766		/*
2767		 * Disable GALog if already running. It could have been enabled
2768		 * in the previous boot before kdump.
2769		 */
2770		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2771		if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2772			continue;
2773
2774		iommu_feature_disable(iommu, CONTROL_GALOG_EN);
2775		iommu_feature_disable(iommu, CONTROL_GAINT_EN);
2776
2777		/*
2778		 * Need to set and poll check the GALOGRun bit to zero before
2779		 * we can set/ modify GA Log registers safely.
2780		 */
2781		for (i = 0; i < LOOP_TIMEOUT; ++i) {
2782			status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2783			if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2784				break;
2785			udelay(10);
2786		}
2787
2788		if (WARN_ON(i >= LOOP_TIMEOUT))
2789			return;
2790	}
2791
2792	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
2793	    !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) {
2794		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2795		return;
2796	}
2797
2798	if (amd_iommu_snp_en &&
2799	    !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) {
2800		pr_warn("Force to disable Virtual APIC due to SNP\n");
2801		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2802		return;
2803	}
2804
2805	/* Enabling GAM and SNPAVIC support */
2806	for_each_iommu(iommu) {
2807		if (iommu_init_ga_log(iommu) ||
2808		    iommu_ga_log_enable(iommu))
2809			return;
2810
2811		iommu_feature_enable(iommu, CONTROL_GAM_EN);
2812		if (amd_iommu_snp_en)
2813			iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN);
2814	}
2815
2816	amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
2817	pr_info("Virtual APIC enabled\n");
2818#endif
2819}
2820
2821static void enable_iommus(void)
2822{
2823	early_enable_iommus();
2824	enable_iommus_vapic();
2825	enable_iommus_v2();
2826}
2827
2828static void disable_iommus(void)
2829{
2830	struct amd_iommu *iommu;
2831
2832	for_each_iommu(iommu)
2833		iommu_disable(iommu);
2834
2835#ifdef CONFIG_IRQ_REMAP
2836	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2837		amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
2838#endif
2839}
2840
2841/*
2842 * Suspend/Resume support
2843 * disable suspend until real resume implemented
2844 */
2845
2846static void amd_iommu_resume(void)
2847{
2848	struct amd_iommu *iommu;
2849
2850	for_each_iommu(iommu)
2851		iommu_apply_resume_quirks(iommu);
2852
2853	/* re-load the hardware */
2854	enable_iommus();
 
2855
2856	amd_iommu_enable_interrupts();
2857}
2858
2859static int amd_iommu_suspend(void)
2860{
2861	/* disable IOMMUs to go out of the way for BIOS */
2862	disable_iommus();
2863
2864	return 0;
2865}
2866
2867static struct syscore_ops amd_iommu_syscore_ops = {
2868	.suspend = amd_iommu_suspend,
2869	.resume = amd_iommu_resume,
2870};
2871
2872static void __init free_iommu_resources(void)
2873{
2874	kmem_cache_destroy(amd_iommu_irq_cache);
2875	amd_iommu_irq_cache = NULL;
2876
2877	free_iommu_all();
2878	free_pci_segments();
2879}
2880
2881/* SB IOAPIC is always on this device in AMD systems */
2882#define IOAPIC_SB_DEVID		((0x00 << 8) | PCI_DEVFN(0x14, 0))
2883
2884static bool __init check_ioapic_information(void)
2885{
2886	const char *fw_bug = FW_BUG;
2887	bool ret, has_sb_ioapic;
2888	int idx;
2889
2890	has_sb_ioapic = false;
2891	ret           = false;
2892
2893	/*
2894	 * If we have map overrides on the kernel command line the
2895	 * messages in this function might not describe firmware bugs
2896	 * anymore - so be careful
2897	 */
2898	if (cmdline_maps)
2899		fw_bug = "";
2900
2901	for (idx = 0; idx < nr_ioapics; idx++) {
2902		int devid, id = mpc_ioapic_id(idx);
2903
2904		devid = get_ioapic_devid(id);
2905		if (devid < 0) {
2906			pr_err("%s: IOAPIC[%d] not in IVRS table\n",
2907				fw_bug, id);
2908			ret = false;
2909		} else if (devid == IOAPIC_SB_DEVID) {
2910			has_sb_ioapic = true;
2911			ret           = true;
2912		}
2913	}
2914
2915	if (!has_sb_ioapic) {
2916		/*
2917		 * We expect the SB IOAPIC to be listed in the IVRS
2918		 * table. The system timer is connected to the SB IOAPIC
2919		 * and if we don't have it in the list the system will
2920		 * panic at boot time.  This situation usually happens
2921		 * when the BIOS is buggy and provides us the wrong
2922		 * device id for the IOAPIC in the system.
2923		 */
2924		pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
2925	}
2926
2927	if (!ret)
2928		pr_err("Disabling interrupt remapping\n");
2929
2930	return ret;
2931}
2932
2933static void __init free_dma_resources(void)
2934{
2935	free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
2936		   get_order(MAX_DOMAIN_ID/8));
2937	amd_iommu_pd_alloc_bitmap = NULL;
2938
2939	free_unity_maps();
2940}
2941
2942static void __init ivinfo_init(void *ivrs)
2943{
2944	amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
2945}
2946
2947/*
2948 * This is the hardware init function for AMD IOMMU in the system.
2949 * This function is called either from amd_iommu_init or from the interrupt
2950 * remapping setup code.
2951 *
2952 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
2953 * four times:
2954 *
2955 *	1 pass) Discover the most comprehensive IVHD type to use.
2956 *
2957 *	2 pass) Find the highest PCI device id the driver has to handle.
2958 *		Upon this information the size of the data structures is
2959 *		determined that needs to be allocated.
2960 *
2961 *	3 pass) Initialize the data structures just allocated with the
2962 *		information in the ACPI table about available AMD IOMMUs
2963 *		in the system. It also maps the PCI devices in the
2964 *		system to specific IOMMUs
2965 *
2966 *	4 pass) After the basic data structures are allocated and
2967 *		initialized we update them with information about memory
2968 *		remapping requirements parsed out of the ACPI table in
2969 *		this last pass.
2970 *
2971 * After everything is set up the IOMMUs are enabled and the necessary
2972 * hotplug and suspend notifiers are registered.
2973 */
2974static int __init early_amd_iommu_init(void)
2975{
2976	struct acpi_table_header *ivrs_base;
2977	int remap_cache_sz, ret;
2978	acpi_status status;
2979
2980	if (!amd_iommu_detected)
2981		return -ENODEV;
2982
2983	status = acpi_get_table("IVRS", 0, &ivrs_base);
2984	if (status == AE_NOT_FOUND)
2985		return -ENODEV;
2986	else if (ACPI_FAILURE(status)) {
2987		const char *err = acpi_format_exception(status);
2988		pr_err("IVRS table error: %s\n", err);
2989		return -EINVAL;
2990	}
2991
2992	/*
2993	 * Validate checksum here so we don't need to do it when
2994	 * we actually parse the table
2995	 */
2996	ret = check_ivrs_checksum(ivrs_base);
2997	if (ret)
2998		goto out;
2999
3000	ivinfo_init(ivrs_base);
3001
3002	amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
3003	DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
3004
3005	/* Device table - directly used by all IOMMUs */
3006	ret = -ENOMEM;
3007
3008	amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
3009					    GFP_KERNEL | __GFP_ZERO,
3010					    get_order(MAX_DOMAIN_ID/8));
3011	if (amd_iommu_pd_alloc_bitmap == NULL)
3012		goto out;
3013
3014	/*
3015	 * never allocate domain 0 because its used as the non-allocated and
3016	 * error value placeholder
3017	 */
3018	__set_bit(0, amd_iommu_pd_alloc_bitmap);
3019
3020	/*
3021	 * now the data structures are allocated and basically initialized
3022	 * start the real acpi table scan
3023	 */
3024	ret = init_iommu_all(ivrs_base);
3025	if (ret)
3026		goto out;
3027
 
 
 
 
 
 
 
 
 
 
 
 
3028	/* Disable any previously enabled IOMMUs */
3029	if (!is_kdump_kernel() || amd_iommu_disabled)
3030		disable_iommus();
3031
3032	if (amd_iommu_irq_remap)
3033		amd_iommu_irq_remap = check_ioapic_information();
3034
3035	if (amd_iommu_irq_remap) {
3036		struct amd_iommu_pci_seg *pci_seg;
3037		/*
3038		 * Interrupt remapping enabled, create kmem_cache for the
3039		 * remapping tables.
3040		 */
3041		ret = -ENOMEM;
3042		if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
3043			remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
3044		else
3045			remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
3046		amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
3047							remap_cache_sz,
3048							DTE_INTTAB_ALIGNMENT,
3049							0, NULL);
3050		if (!amd_iommu_irq_cache)
3051			goto out;
3052
3053		for_each_pci_segment(pci_seg) {
3054			if (alloc_irq_lookup_table(pci_seg))
3055				goto out;
3056		}
3057	}
3058
3059	ret = init_memory_definitions(ivrs_base);
3060	if (ret)
3061		goto out;
3062
3063	/* init the device table */
3064	init_device_table();
3065
3066out:
3067	/* Don't leak any ACPI memory */
3068	acpi_put_table(ivrs_base);
3069
3070	return ret;
3071}
3072
3073static int amd_iommu_enable_interrupts(void)
3074{
3075	struct amd_iommu *iommu;
3076	int ret = 0;
3077
3078	for_each_iommu(iommu) {
3079		ret = iommu_init_irq(iommu);
3080		if (ret)
3081			goto out;
3082	}
3083
 
 
 
 
 
 
 
3084out:
3085	return ret;
3086}
3087
3088static bool __init detect_ivrs(void)
3089{
3090	struct acpi_table_header *ivrs_base;
3091	acpi_status status;
3092	int i;
3093
3094	status = acpi_get_table("IVRS", 0, &ivrs_base);
3095	if (status == AE_NOT_FOUND)
3096		return false;
3097	else if (ACPI_FAILURE(status)) {
3098		const char *err = acpi_format_exception(status);
3099		pr_err("IVRS table error: %s\n", err);
3100		return false;
3101	}
3102
3103	acpi_put_table(ivrs_base);
3104
3105	if (amd_iommu_force_enable)
3106		goto out;
3107
3108	/* Don't use IOMMU if there is Stoney Ridge graphics */
3109	for (i = 0; i < 32; i++) {
3110		u32 pci_id;
3111
3112		pci_id = read_pci_config(0, i, 0, 0);
3113		if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
3114			pr_info("Disable IOMMU on Stoney Ridge\n");
3115			return false;
3116		}
3117	}
3118
3119out:
3120	/* Make sure ACS will be enabled during PCI probe */
3121	pci_request_acs();
3122
3123	return true;
3124}
3125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3126/****************************************************************************
3127 *
3128 * AMD IOMMU Initialization State Machine
3129 *
3130 ****************************************************************************/
3131
3132static int __init state_next(void)
3133{
3134	int ret = 0;
3135
3136	switch (init_state) {
3137	case IOMMU_START_STATE:
3138		if (!detect_ivrs()) {
3139			init_state	= IOMMU_NOT_FOUND;
3140			ret		= -ENODEV;
3141		} else {
3142			init_state	= IOMMU_IVRS_DETECTED;
3143		}
3144		break;
3145	case IOMMU_IVRS_DETECTED:
3146		if (amd_iommu_disabled) {
3147			init_state = IOMMU_CMDLINE_DISABLED;
3148			ret = -EINVAL;
3149		} else {
3150			ret = early_amd_iommu_init();
3151			init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
3152		}
3153		break;
3154	case IOMMU_ACPI_FINISHED:
3155		early_enable_iommus();
3156		x86_platform.iommu_shutdown = disable_iommus;
3157		init_state = IOMMU_ENABLED;
3158		break;
3159	case IOMMU_ENABLED:
3160		register_syscore_ops(&amd_iommu_syscore_ops);
 
3161		ret = amd_iommu_init_pci();
3162		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
3163		enable_iommus_vapic();
3164		enable_iommus_v2();
3165		break;
3166	case IOMMU_PCI_INIT:
3167		ret = amd_iommu_enable_interrupts();
3168		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
3169		break;
3170	case IOMMU_INTERRUPTS_EN:
3171		init_state = IOMMU_INITIALIZED;
3172		break;
3173	case IOMMU_INITIALIZED:
3174		/* Nothing to do */
3175		break;
3176	case IOMMU_NOT_FOUND:
3177	case IOMMU_INIT_ERROR:
3178	case IOMMU_CMDLINE_DISABLED:
3179		/* Error states => do nothing */
3180		ret = -EINVAL;
3181		break;
3182	default:
3183		/* Unknown state */
3184		BUG();
3185	}
3186
3187	if (ret) {
3188		free_dma_resources();
3189		if (!irq_remapping_enabled) {
3190			disable_iommus();
3191			free_iommu_resources();
3192		} else {
3193			struct amd_iommu *iommu;
3194			struct amd_iommu_pci_seg *pci_seg;
3195
3196			for_each_pci_segment(pci_seg)
3197				uninit_device_table_dma(pci_seg);
3198
3199			for_each_iommu(iommu)
3200				iommu_flush_all_caches(iommu);
3201		}
3202	}
3203	return ret;
3204}
3205
3206static int __init iommu_go_to_state(enum iommu_init_state state)
3207{
3208	int ret = -EINVAL;
3209
3210	while (init_state != state) {
3211		if (init_state == IOMMU_NOT_FOUND         ||
3212		    init_state == IOMMU_INIT_ERROR        ||
3213		    init_state == IOMMU_CMDLINE_DISABLED)
3214			break;
3215		ret = state_next();
3216	}
3217
3218	return ret;
3219}
3220
3221#ifdef CONFIG_IRQ_REMAP
3222int __init amd_iommu_prepare(void)
3223{
3224	int ret;
3225
3226	amd_iommu_irq_remap = true;
3227
3228	ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
3229	if (ret) {
3230		amd_iommu_irq_remap = false;
3231		return ret;
3232	}
3233
3234	return amd_iommu_irq_remap ? 0 : -ENODEV;
3235}
3236
3237int __init amd_iommu_enable(void)
3238{
3239	int ret;
3240
3241	ret = iommu_go_to_state(IOMMU_ENABLED);
3242	if (ret)
3243		return ret;
3244
3245	irq_remapping_enabled = 1;
3246	return amd_iommu_xt_mode;
3247}
3248
3249void amd_iommu_disable(void)
3250{
3251	amd_iommu_suspend();
3252}
3253
3254int amd_iommu_reenable(int mode)
3255{
3256	amd_iommu_resume();
3257
3258	return 0;
3259}
3260
3261int __init amd_iommu_enable_faulting(void)
3262{
3263	/* We enable MSI later when PCI is initialized */
3264	return 0;
3265}
3266#endif
3267
3268/*
3269 * This is the core init function for AMD IOMMU hardware in the system.
3270 * This function is called from the generic x86 DMA layer initialization
3271 * code.
3272 */
3273static int __init amd_iommu_init(void)
3274{
3275	struct amd_iommu *iommu;
3276	int ret;
3277
3278	ret = iommu_go_to_state(IOMMU_INITIALIZED);
3279#ifdef CONFIG_GART_IOMMU
3280	if (ret && list_empty(&amd_iommu_list)) {
3281		/*
3282		 * We failed to initialize the AMD IOMMU - try fallback
3283		 * to GART if possible.
3284		 */
3285		gart_iommu_init();
3286	}
3287#endif
3288
3289	for_each_iommu(iommu)
3290		amd_iommu_debugfs_setup(iommu);
3291
3292	return ret;
3293}
3294
3295static bool amd_iommu_sme_check(void)
3296{
3297	if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) ||
3298	    (boot_cpu_data.x86 != 0x17))
3299		return true;
3300
3301	/* For Fam17h, a specific level of support is required */
3302	if (boot_cpu_data.microcode >= 0x08001205)
3303		return true;
3304
3305	if ((boot_cpu_data.microcode >= 0x08001126) &&
3306	    (boot_cpu_data.microcode <= 0x080011ff))
3307		return true;
3308
3309	pr_notice("IOMMU not currently supported when SME is active\n");
3310
3311	return false;
3312}
3313
3314/****************************************************************************
3315 *
3316 * Early detect code. This code runs at IOMMU detection time in the DMA
3317 * layer. It just looks if there is an IVRS ACPI table to detect AMD
3318 * IOMMUs
3319 *
3320 ****************************************************************************/
3321int __init amd_iommu_detect(void)
3322{
3323	int ret;
3324
3325	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
3326		return -ENODEV;
3327
3328	if (!amd_iommu_sme_check())
3329		return -ENODEV;
3330
3331	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
3332	if (ret)
3333		return ret;
3334
3335	amd_iommu_detected = true;
3336	iommu_detected = 1;
3337	x86_init.iommu.iommu_init = amd_iommu_init;
3338
3339	return 1;
3340}
3341
3342/****************************************************************************
3343 *
3344 * Parsing functions for the AMD IOMMU specific kernel command line
3345 * options.
3346 *
3347 ****************************************************************************/
3348
3349static int __init parse_amd_iommu_dump(char *str)
3350{
3351	amd_iommu_dump = true;
3352
3353	return 1;
3354}
3355
3356static int __init parse_amd_iommu_intr(char *str)
3357{
3358	for (; *str; ++str) {
3359		if (strncmp(str, "legacy", 6) == 0) {
3360			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3361			break;
3362		}
3363		if (strncmp(str, "vapic", 5) == 0) {
3364			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
3365			break;
3366		}
3367	}
3368	return 1;
3369}
3370
3371static int __init parse_amd_iommu_options(char *str)
3372{
3373	if (!str)
3374		return -EINVAL;
3375
3376	while (*str) {
3377		if (strncmp(str, "fullflush", 9) == 0) {
3378			pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
3379			iommu_set_dma_strict();
3380		} else if (strncmp(str, "force_enable", 12) == 0) {
3381			amd_iommu_force_enable = true;
3382		} else if (strncmp(str, "off", 3) == 0) {
3383			amd_iommu_disabled = true;
3384		} else if (strncmp(str, "force_isolation", 15) == 0) {
3385			amd_iommu_force_isolation = true;
3386		} else if (strncmp(str, "pgtbl_v1", 8) == 0) {
3387			amd_iommu_pgtable = AMD_IOMMU_V1;
3388		} else if (strncmp(str, "pgtbl_v2", 8) == 0) {
3389			amd_iommu_pgtable = AMD_IOMMU_V2;
 
 
 
 
 
 
 
 
3390		} else {
3391			pr_notice("Unknown option - '%s'\n", str);
3392		}
3393
3394		str += strcspn(str, ",");
3395		while (*str == ',')
3396			str++;
3397	}
3398
3399	return 1;
3400}
3401
3402static int __init parse_ivrs_ioapic(char *str)
3403{
3404	u32 seg = 0, bus, dev, fn;
3405	int id, i;
3406	u32 devid;
3407
3408	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3409	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3410		goto found;
3411
3412	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3413	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3414		pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n",
3415			str, id, seg, bus, dev, fn);
3416		goto found;
3417	}
3418
3419	pr_err("Invalid command line: ivrs_ioapic%s\n", str);
3420	return 1;
3421
3422found:
3423	if (early_ioapic_map_size == EARLY_MAP_SIZE) {
3424		pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
3425			str);
3426		return 1;
3427	}
3428
3429	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3430
3431	cmdline_maps			= true;
3432	i				= early_ioapic_map_size++;
3433	early_ioapic_map[i].id		= id;
3434	early_ioapic_map[i].devid	= devid;
3435	early_ioapic_map[i].cmd_line	= true;
3436
3437	return 1;
3438}
3439
3440static int __init parse_ivrs_hpet(char *str)
3441{
3442	u32 seg = 0, bus, dev, fn;
3443	int id, i;
3444	u32 devid;
3445
3446	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3447	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3448		goto found;
3449
3450	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3451	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3452		pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n",
3453			str, id, seg, bus, dev, fn);
3454		goto found;
3455	}
3456
3457	pr_err("Invalid command line: ivrs_hpet%s\n", str);
3458	return 1;
3459
3460found:
3461	if (early_hpet_map_size == EARLY_MAP_SIZE) {
3462		pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
3463			str);
3464		return 1;
3465	}
3466
3467	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3468
3469	cmdline_maps			= true;
3470	i				= early_hpet_map_size++;
3471	early_hpet_map[i].id		= id;
3472	early_hpet_map[i].devid		= devid;
3473	early_hpet_map[i].cmd_line	= true;
3474
3475	return 1;
3476}
3477
 
 
3478static int __init parse_ivrs_acpihid(char *str)
3479{
3480	u32 seg = 0, bus, dev, fn;
3481	char *hid, *uid, *p, *addr;
3482	char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
3483	int i;
3484
3485	addr = strchr(str, '@');
3486	if (!addr) {
 
 
 
 
 
 
 
 
 
3487		if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
3488		    sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
3489			pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
3490				str, acpiid, seg, bus, dev, fn);
3491			goto found;
3492		}
3493		goto not_found;
3494	}
3495
3496	/* We have the '@', make it the terminator to get just the acpiid */
3497	*addr++ = 0;
3498
 
 
 
3499	if (sscanf(str, "=%s", acpiid) != 1)
3500		goto not_found;
3501
3502	if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 ||
3503	    sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4)
3504		goto found;
3505
3506not_found:
3507	pr_err("Invalid command line: ivrs_acpihid%s\n", str);
3508	return 1;
3509
3510found:
3511	p = acpiid;
3512	hid = strsep(&p, ":");
3513	uid = p;
3514
3515	if (!hid || !(*hid) || !uid) {
3516		pr_err("Invalid command line: hid or uid\n");
3517		return 1;
3518	}
3519
3520	/*
3521	 * Ignore leading zeroes after ':', so e.g., AMDI0095:00
3522	 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match
3523	 */
3524	while (*uid == '0' && *(uid + 1))
3525		uid++;
3526
3527	i = early_acpihid_map_size++;
3528	memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
3529	memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
3530	early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3531	early_acpihid_map[i].cmd_line	= true;
3532
3533	return 1;
3534}
3535
3536__setup("amd_iommu_dump",	parse_amd_iommu_dump);
3537__setup("amd_iommu=",		parse_amd_iommu_options);
3538__setup("amd_iommu_intr=",	parse_amd_iommu_intr);
3539__setup("ivrs_ioapic",		parse_ivrs_ioapic);
3540__setup("ivrs_hpet",		parse_ivrs_hpet);
3541__setup("ivrs_acpihid",		parse_ivrs_acpihid);
3542
3543bool amd_iommu_v2_supported(void)
3544{
 
 
 
 
 
3545	/*
3546	 * Since DTE[Mode]=0 is prohibited on SNP-enabled system
3547	 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
3548	 * setting up IOMMUv1 page table.
3549	 */
3550	return amd_iommu_v2_present && !amd_iommu_snp_en;
3551}
3552EXPORT_SYMBOL(amd_iommu_v2_supported);
3553
3554struct amd_iommu *get_amd_iommu(unsigned int idx)
3555{
3556	unsigned int i = 0;
3557	struct amd_iommu *iommu;
3558
3559	for_each_iommu(iommu)
3560		if (i++ == idx)
3561			return iommu;
3562	return NULL;
3563}
3564
3565/****************************************************************************
3566 *
3567 * IOMMU EFR Performance Counter support functionality. This code allows
3568 * access to the IOMMU PC functionality.
3569 *
3570 ****************************************************************************/
3571
3572u8 amd_iommu_pc_get_max_banks(unsigned int idx)
3573{
3574	struct amd_iommu *iommu = get_amd_iommu(idx);
3575
3576	if (iommu)
3577		return iommu->max_banks;
3578
3579	return 0;
3580}
3581EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
3582
3583bool amd_iommu_pc_supported(void)
3584{
3585	return amd_iommu_pc_present;
3586}
3587EXPORT_SYMBOL(amd_iommu_pc_supported);
3588
3589u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3590{
3591	struct amd_iommu *iommu = get_amd_iommu(idx);
3592
3593	if (iommu)
3594		return iommu->max_counters;
3595
3596	return 0;
3597}
3598EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
3599
3600static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3601				u8 fxn, u64 *value, bool is_write)
3602{
3603	u32 offset;
3604	u32 max_offset_lim;
3605
3606	/* Make sure the IOMMU PC resource is available */
3607	if (!amd_iommu_pc_present)
3608		return -ENODEV;
3609
3610	/* Check for valid iommu and pc register indexing */
3611	if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3612		return -ENODEV;
3613
3614	offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3615
3616	/* Limit the offset to the hw defined mmio region aperture */
3617	max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3618				(iommu->max_counters << 8) | 0x28);
3619	if ((offset < MMIO_CNTR_REG_OFFSET) ||
3620	    (offset > max_offset_lim))
3621		return -EINVAL;
3622
3623	if (is_write) {
3624		u64 val = *value & GENMASK_ULL(47, 0);
3625
3626		writel((u32)val, iommu->mmio_base + offset);
3627		writel((val >> 32), iommu->mmio_base + offset + 4);
3628	} else {
3629		*value = readl(iommu->mmio_base + offset + 4);
3630		*value <<= 32;
3631		*value |= readl(iommu->mmio_base + offset);
3632		*value &= GENMASK_ULL(47, 0);
3633	}
3634
3635	return 0;
3636}
3637
3638int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3639{
3640	if (!iommu)
3641		return -EINVAL;
3642
3643	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
3644}
3645
3646int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3647{
3648	if (!iommu)
3649		return -EINVAL;
3650
3651	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
3652}
3653
3654#ifdef CONFIG_AMD_MEM_ENCRYPT
3655int amd_iommu_snp_enable(void)
3656{
3657	/*
3658	 * The SNP support requires that IOMMU must be enabled, and is
3659	 * not configured in the passthrough mode.
3660	 */
3661	if (no_iommu || iommu_default_passthrough()) {
3662		pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported");
3663		return -EINVAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3664	}
3665
3666	/*
3667	 * Prevent enabling SNP after IOMMU_ENABLED state because this process
3668	 * affect how IOMMU driver sets up data structures and configures
3669	 * IOMMU hardware.
3670	 */
3671	if (init_state > IOMMU_ENABLED) {
3672		pr_err("SNP: Too late to enable SNP for IOMMU.\n");
3673		return -EINVAL;
 
 
 
 
 
 
 
 
3674	}
3675
3676	amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP);
 
 
 
 
 
 
 
3677	if (!amd_iommu_snp_en)
3678		return -EINVAL;
 
 
 
 
 
3679
3680	pr_info("SNP enabled\n");
 
 
3681
3682	/* Enforce IOMMU v1 pagetable when SNP is enabled. */
3683	if (amd_iommu_pgtable != AMD_IOMMU_V1) {
3684		pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n");
3685		amd_iommu_pgtable = AMD_IOMMU_V1;
3686	}
3687
3688	return 0;
3689}
 
3690#endif