Linux Audio

Check our new training course

Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
   4 * Author: Joerg Roedel <jroedel@suse.de>
   5 *         Leo Duran <leo.duran@amd.com>
   6 */
   7
   8#define pr_fmt(fmt)     "AMD-Vi: " fmt
   9#define dev_fmt(fmt)    pr_fmt(fmt)
  10
  11#include <linux/ratelimit.h>
  12#include <linux/pci.h>
  13#include <linux/acpi.h>
 
 
  14#include <linux/pci-ats.h>
  15#include <linux/bitmap.h>
  16#include <linux/slab.h>
  17#include <linux/debugfs.h>
  18#include <linux/scatterlist.h>
  19#include <linux/dma-map-ops.h>
  20#include <linux/dma-direct.h>
  21#include <linux/idr.h>
  22#include <linux/iommu-helper.h>
  23#include <linux/delay.h>
  24#include <linux/amd-iommu.h>
  25#include <linux/notifier.h>
  26#include <linux/export.h>
  27#include <linux/irq.h>
  28#include <linux/msi.h>
 
  29#include <linux/irqdomain.h>
  30#include <linux/percpu.h>
  31#include <linux/io-pgtable.h>
  32#include <linux/cc_platform.h>
  33#include <asm/irq_remapping.h>
  34#include <asm/io_apic.h>
  35#include <asm/apic.h>
  36#include <asm/hw_irq.h>
 
  37#include <asm/proto.h>
  38#include <asm/iommu.h>
  39#include <asm/gart.h>
  40#include <asm/dma.h>
  41#include <uapi/linux/iommufd.h>
  42
  43#include "amd_iommu.h"
  44#include "../dma-iommu.h"
  45#include "../irq_remapping.h"
  46#include "../iommu-pages.h"
  47
  48#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
  49
 
 
 
 
 
 
  50/* Reserved IOVA ranges */
  51#define MSI_RANGE_START		(0xfee00000)
  52#define MSI_RANGE_END		(0xfeefffff)
  53#define HT_RANGE_START		(0xfd00000000ULL)
  54#define HT_RANGE_END		(0xffffffffffULL)
  55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  56LIST_HEAD(ioapic_map);
  57LIST_HEAD(hpet_map);
  58LIST_HEAD(acpihid_map);
  59
 
 
 
 
  60const struct iommu_ops amd_iommu_ops;
  61static const struct iommu_dirty_ops amd_dirty_ops;
  62
 
  63int amd_iommu_max_glx_val = -1;
  64
  65/*
  66 * general struct to manage commands send to an IOMMU
  67 */
  68struct iommu_cmd {
  69	u32 data[4];
  70};
  71
  72/*
  73 * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
  74 * to know which ones are already in use.
  75 */
  76DEFINE_IDA(pdom_ids);
  77
  78struct kmem_cache *amd_iommu_irq_cache;
  79
  80static int amd_iommu_attach_device(struct iommu_domain *dom,
  81				   struct device *dev);
  82
  83static void set_dte_entry(struct amd_iommu *iommu,
  84			  struct iommu_dev_data *dev_data);
  85
  86/****************************************************************************
  87 *
  88 * Helper functions
  89 *
  90 ****************************************************************************/
  91
  92static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom)
  93{
  94	return (pdom && (pdom->pd_mode == PD_MODE_V2));
  95}
  96
  97static inline bool pdom_is_in_pt_mode(struct protection_domain *pdom)
  98{
  99	return (pdom->domain.type == IOMMU_DOMAIN_IDENTITY);
 100}
 101
 102/*
 103 * We cannot support PASID w/ existing v1 page table in the same domain
 104 * since it will be nested. However, existing domain w/ v2 page table
 105 * or passthrough mode can be used for PASID.
 106 */
 107static inline bool pdom_is_sva_capable(struct protection_domain *pdom)
 108{
 109	return pdom_is_v2_pgtbl_mode(pdom) || pdom_is_in_pt_mode(pdom);
 110}
 111
 112static inline int get_acpihid_device_id(struct device *dev,
 113					struct acpihid_map_entry **entry)
 114{
 115	struct acpi_device *adev = ACPI_COMPANION(dev);
 116	struct acpihid_map_entry *p;
 117
 118	if (!adev)
 119		return -ENODEV;
 120
 121	list_for_each_entry(p, &acpihid_map, list) {
 122		if (acpi_dev_hid_uid_match(adev, p->hid,
 123					   p->uid[0] ? p->uid : NULL)) {
 124			if (entry)
 125				*entry = p;
 126			return p->devid;
 127		}
 128	}
 129	return -EINVAL;
 130}
 131
 132static inline int get_device_sbdf_id(struct device *dev)
 133{
 134	int sbdf;
 135
 136	if (dev_is_pci(dev))
 137		sbdf = get_pci_sbdf_id(to_pci_dev(dev));
 138	else
 139		sbdf = get_acpihid_device_id(dev, NULL);
 140
 141	return sbdf;
 142}
 143
 144struct dev_table_entry *get_dev_table(struct amd_iommu *iommu)
 145{
 146	struct dev_table_entry *dev_table;
 147	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
 148
 149	BUG_ON(pci_seg == NULL);
 150	dev_table = pci_seg->dev_table;
 151	BUG_ON(dev_table == NULL);
 152
 153	return dev_table;
 154}
 155
 156static inline u16 get_device_segment(struct device *dev)
 
 157{
 158	u16 seg;
 159
 160	if (dev_is_pci(dev)) {
 161		struct pci_dev *pdev = to_pci_dev(dev);
 162
 163		seg = pci_domain_nr(pdev->bus);
 164	} else {
 165		u32 devid = get_acpihid_device_id(dev, NULL);
 166
 167		seg = PCI_SBDF_TO_SEGID(devid);
 168	}
 169
 170	return seg;
 171}
 172
 173/* Writes the specific IOMMU for a device into the PCI segment rlookup table */
 174void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid)
 175{
 176	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
 177
 178	pci_seg->rlookup_table[devid] = iommu;
 179}
 180
 181static struct amd_iommu *__rlookup_amd_iommu(u16 seg, u16 devid)
 182{
 183	struct amd_iommu_pci_seg *pci_seg;
 184
 185	for_each_pci_segment(pci_seg) {
 186		if (pci_seg->id == seg)
 187			return pci_seg->rlookup_table[devid];
 188	}
 189	return NULL;
 190}
 191
 192static struct amd_iommu *rlookup_amd_iommu(struct device *dev)
 
 193{
 194	u16 seg = get_device_segment(dev);
 195	int devid = get_device_sbdf_id(dev);
 196
 197	if (devid < 0)
 198		return NULL;
 199	return __rlookup_amd_iommu(seg, PCI_SBDF_TO_DEVID(devid));
 
 
 200}
 201
 202static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid)
 203{
 204	struct iommu_dev_data *dev_data;
 205	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
 206
 207	dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
 208	if (!dev_data)
 209		return NULL;
 210
 211	mutex_init(&dev_data->mutex);
 212	dev_data->devid = devid;
 213	ratelimit_default_init(&dev_data->rs);
 214
 215	llist_add(&dev_data->dev_data_list, &pci_seg->dev_data_list);
 216	return dev_data;
 217}
 218
 219static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid)
 220{
 221	struct iommu_dev_data *dev_data;
 222	struct llist_node *node;
 223	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
 224
 225	if (llist_empty(&pci_seg->dev_data_list))
 226		return NULL;
 227
 228	node = pci_seg->dev_data_list.first;
 229	llist_for_each_entry(dev_data, node, dev_data_list) {
 230		if (dev_data->devid == devid)
 231			return dev_data;
 232	}
 233
 234	return NULL;
 235}
 236
 237static int clone_alias(struct pci_dev *pdev, u16 alias, void *data)
 238{
 239	struct amd_iommu *iommu;
 240	struct dev_table_entry *dev_table;
 241	u16 devid = pci_dev_id(pdev);
 242
 243	if (devid == alias)
 244		return 0;
 245
 246	iommu = rlookup_amd_iommu(&pdev->dev);
 247	if (!iommu)
 248		return 0;
 249
 250	amd_iommu_set_rlookup_table(iommu, alias);
 251	dev_table = get_dev_table(iommu);
 252	memcpy(dev_table[alias].data,
 253	       dev_table[devid].data,
 254	       sizeof(dev_table[alias].data));
 255
 256	return 0;
 257}
 258
 259static void clone_aliases(struct amd_iommu *iommu, struct device *dev)
 260{
 261	struct pci_dev *pdev;
 262
 263	if (!dev_is_pci(dev))
 264		return;
 265	pdev = to_pci_dev(dev);
 266
 267	/*
 268	 * The IVRS alias stored in the alias table may not be
 269	 * part of the PCI DMA aliases if it's bus differs
 270	 * from the original device.
 271	 */
 272	clone_alias(pdev, iommu->pci_seg->alias_table[pci_dev_id(pdev)], NULL);
 273
 274	pci_for_each_dma_alias(pdev, clone_alias, NULL);
 275}
 276
 277static void setup_aliases(struct amd_iommu *iommu, struct device *dev)
 278{
 279	struct pci_dev *pdev = to_pci_dev(dev);
 280	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
 281	u16 ivrs_alias;
 282
 283	/* For ACPI HID devices, there are no aliases */
 284	if (!dev_is_pci(dev))
 285		return;
 286
 287	/*
 288	 * Add the IVRS alias to the pci aliases if it is on the same
 289	 * bus. The IVRS table may know about a quirk that we don't.
 290	 */
 291	ivrs_alias = pci_seg->alias_table[pci_dev_id(pdev)];
 292	if (ivrs_alias != pci_dev_id(pdev) &&
 293	    PCI_BUS_NUM(ivrs_alias) == pdev->bus->number)
 294		pci_add_dma_alias(pdev, ivrs_alias & 0xff, 1);
 295
 296	clone_aliases(iommu, dev);
 
 
 297}
 298
 299static struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid)
 300{
 301	struct iommu_dev_data *dev_data;
 
 302
 303	dev_data = search_dev_data(iommu, devid);
 304
 305	if (dev_data == NULL) {
 306		dev_data = alloc_dev_data(iommu, devid);
 307		if (!dev_data)
 308			return NULL;
 309
 310		if (translation_pre_enabled(iommu))
 311			dev_data->defer_attach = true;
 312	}
 313
 314	return dev_data;
 315}
 316
 317/*
 318* Find or create an IOMMU group for a acpihid device.
 319*/
 320static struct iommu_group *acpihid_device_group(struct device *dev)
 321{
 322	struct acpihid_map_entry *p, *entry = NULL;
 323	int devid;
 324
 325	devid = get_acpihid_device_id(dev, &entry);
 326	if (devid < 0)
 327		return ERR_PTR(devid);
 328
 329	list_for_each_entry(p, &acpihid_map, list) {
 330		if ((devid == p->devid) && p->group)
 331			entry->group = p->group;
 332	}
 333
 334	if (!entry->group)
 335		entry->group = generic_device_group(dev);
 336	else
 337		iommu_group_ref_get(entry->group);
 338
 339	return entry->group;
 340}
 341
 342static inline bool pdev_pasid_supported(struct iommu_dev_data *dev_data)
 343{
 344	return (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP);
 345}
 346
 347static u32 pdev_get_caps(struct pci_dev *pdev)
 348{
 349	int features;
 350	u32 flags = 0;
 351
 352	if (pci_ats_supported(pdev))
 353		flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
 354
 355	if (pci_pri_supported(pdev))
 356		flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
 357
 358	features = pci_pasid_features(pdev);
 359	if (features >= 0) {
 360		flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
 361
 362		if (features & PCI_PASID_CAP_EXEC)
 363			flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
 364
 365		if (features & PCI_PASID_CAP_PRIV)
 366			flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
 367	}
 368
 369	return flags;
 370}
 371
 372static inline int pdev_enable_cap_ats(struct pci_dev *pdev)
 373{
 374	struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
 375	int ret = -EINVAL;
 
 
 
 376
 377	if (dev_data->ats_enabled)
 378		return 0;
 379
 380	if (amd_iommu_iotlb_sup &&
 381	    (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP)) {
 382		ret = pci_enable_ats(pdev, PAGE_SHIFT);
 383		if (!ret) {
 384			dev_data->ats_enabled = 1;
 385			dev_data->ats_qdep    = pci_ats_queue_depth(pdev);
 386		}
 387	}
 388
 389	return ret;
 390}
 391
 392static inline void pdev_disable_cap_ats(struct pci_dev *pdev)
 393{
 394	struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
 395
 396	if (dev_data->ats_enabled) {
 397		pci_disable_ats(pdev);
 398		dev_data->ats_enabled = 0;
 399	}
 400}
 401
 402static inline int pdev_enable_cap_pri(struct pci_dev *pdev)
 403{
 404	struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
 405	int ret = -EINVAL;
 406
 407	if (dev_data->pri_enabled)
 408		return 0;
 409
 410	if (!dev_data->ats_enabled)
 411		return 0;
 412
 413	if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) {
 414		/*
 415		 * First reset the PRI state of the device.
 416		 * FIXME: Hardcode number of outstanding requests for now
 417		 */
 418		if (!pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) {
 419			dev_data->pri_enabled = 1;
 420			dev_data->pri_tlp     = pci_prg_resp_pasid_required(pdev);
 421
 422			ret = 0;
 423		}
 424	}
 425
 426	return ret;
 427}
 428
 429static inline void pdev_disable_cap_pri(struct pci_dev *pdev)
 430{
 431	struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
 432
 433	if (dev_data->pri_enabled) {
 434		pci_disable_pri(pdev);
 435		dev_data->pri_enabled = 0;
 436	}
 437}
 438
 439static inline int pdev_enable_cap_pasid(struct pci_dev *pdev)
 440{
 441	struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
 442	int ret = -EINVAL;
 443
 444	if (dev_data->pasid_enabled)
 445		return 0;
 446
 447	if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) {
 448		/* Only allow access to user-accessible pages */
 449		ret = pci_enable_pasid(pdev, 0);
 450		if (!ret)
 451			dev_data->pasid_enabled = 1;
 452	}
 453
 454	return ret;
 455}
 456
 457static inline void pdev_disable_cap_pasid(struct pci_dev *pdev)
 458{
 459	struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
 460
 461	if (dev_data->pasid_enabled) {
 462		pci_disable_pasid(pdev);
 463		dev_data->pasid_enabled = 0;
 464	}
 465}
 466
 467static void pdev_enable_caps(struct pci_dev *pdev)
 468{
 469	pdev_enable_cap_ats(pdev);
 470	pdev_enable_cap_pasid(pdev);
 471	pdev_enable_cap_pri(pdev);
 472}
 473
 474static void pdev_disable_caps(struct pci_dev *pdev)
 475{
 476	pdev_disable_cap_ats(pdev);
 477	pdev_disable_cap_pasid(pdev);
 478	pdev_disable_cap_pri(pdev);
 479}
 480
 481/*
 482 * This function checks if the driver got a valid device from the caller to
 483 * avoid dereferencing invalid pointers.
 484 */
 485static bool check_device(struct device *dev)
 486{
 487	struct amd_iommu_pci_seg *pci_seg;
 488	struct amd_iommu *iommu;
 489	int devid, sbdf;
 490
 491	if (!dev)
 492		return false;
 493
 494	sbdf = get_device_sbdf_id(dev);
 495	if (sbdf < 0)
 496		return false;
 497	devid = PCI_SBDF_TO_DEVID(sbdf);
 498
 499	iommu = rlookup_amd_iommu(dev);
 500	if (!iommu)
 501		return false;
 502
 503	/* Out of our scope? */
 504	pci_seg = iommu->pci_seg;
 505	if (devid > pci_seg->last_bdf)
 506		return false;
 507
 508	return true;
 509}
 510
 511static int iommu_init_device(struct amd_iommu *iommu, struct device *dev)
 512{
 513	struct iommu_dev_data *dev_data;
 514	int devid, sbdf;
 515
 516	if (dev_iommu_priv_get(dev))
 517		return 0;
 518
 519	sbdf = get_device_sbdf_id(dev);
 520	if (sbdf < 0)
 521		return sbdf;
 522
 523	devid = PCI_SBDF_TO_DEVID(sbdf);
 524	dev_data = find_dev_data(iommu, devid);
 525	if (!dev_data)
 526		return -ENOMEM;
 527
 528	dev_data->dev = dev;
 529	setup_aliases(iommu, dev);
 530
 531	/*
 532	 * By default we use passthrough mode for IOMMUv2 capable device.
 533	 * But if amd_iommu=force_isolation is set (e.g. to debug DMA to
 534	 * invalid address), we ignore the capability for the device so
 535	 * it'll be forced to go into translation mode.
 536	 */
 537	if ((iommu_default_passthrough() || !amd_iommu_force_isolation) &&
 538	    dev_is_pci(dev) && amd_iommu_gt_ppr_supported()) {
 539		dev_data->flags = pdev_get_caps(to_pci_dev(dev));
 
 
 
 540	}
 541
 542	dev_iommu_priv_set(dev, dev_data);
 543
 544	return 0;
 545}
 546
 547static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev)
 548{
 549	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
 550	struct dev_table_entry *dev_table = get_dev_table(iommu);
 551	int devid, sbdf;
 552
 553	sbdf = get_device_sbdf_id(dev);
 554	if (sbdf < 0)
 555		return;
 556
 557	devid = PCI_SBDF_TO_DEVID(sbdf);
 558	pci_seg->rlookup_table[devid] = NULL;
 559	memset(&dev_table[devid], 0, sizeof(struct dev_table_entry));
 560
 561	setup_aliases(iommu, dev);
 562}
 563
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 564
 565/****************************************************************************
 566 *
 567 * Interrupt handling functions
 568 *
 569 ****************************************************************************/
 570
 571static void dump_dte_entry(struct amd_iommu *iommu, u16 devid)
 572{
 573	int i;
 574	struct dev_table_entry *dev_table = get_dev_table(iommu);
 575
 576	for (i = 0; i < 4; ++i)
 577		pr_err("DTE[%d]: %016llx\n", i, dev_table[devid].data[i]);
 
 578}
 579
 580static void dump_command(unsigned long phys_addr)
 581{
 582	struct iommu_cmd *cmd = iommu_phys_to_virt(phys_addr);
 583	int i;
 584
 585	for (i = 0; i < 4; ++i)
 586		pr_err("CMD[%d]: %08x\n", i, cmd->data[i]);
 587}
 588
 589static void amd_iommu_report_rmp_hw_error(struct amd_iommu *iommu, volatile u32 *event)
 590{
 591	struct iommu_dev_data *dev_data = NULL;
 592	int devid, vmg_tag, flags;
 593	struct pci_dev *pdev;
 594	u64 spa;
 595
 596	devid   = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
 597	vmg_tag = (event[1]) & 0xFFFF;
 598	flags   = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
 599	spa     = ((u64)event[3] << 32) | (event[2] & 0xFFFFFFF8);
 600
 601	pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid),
 602					   devid & 0xff);
 603	if (pdev)
 604		dev_data = dev_iommu_priv_get(&pdev->dev);
 605
 606	if (dev_data) {
 607		if (__ratelimit(&dev_data->rs)) {
 608			pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
 609				vmg_tag, spa, flags);
 610		}
 611	} else {
 612		pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%04x:%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
 613			iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 614			vmg_tag, spa, flags);
 615	}
 616
 617	if (pdev)
 618		pci_dev_put(pdev);
 619}
 620
 621static void amd_iommu_report_rmp_fault(struct amd_iommu *iommu, volatile u32 *event)
 622{
 623	struct iommu_dev_data *dev_data = NULL;
 624	int devid, flags_rmp, vmg_tag, flags;
 625	struct pci_dev *pdev;
 626	u64 gpa;
 627
 628	devid     = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
 629	flags_rmp = (event[0] >> EVENT_FLAGS_SHIFT) & 0xFF;
 630	vmg_tag   = (event[1]) & 0xFFFF;
 631	flags     = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
 632	gpa       = ((u64)event[3] << 32) | event[2];
 633
 634	pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid),
 635					   devid & 0xff);
 636	if (pdev)
 637		dev_data = dev_iommu_priv_get(&pdev->dev);
 638
 639	if (dev_data) {
 640		if (__ratelimit(&dev_data->rs)) {
 641			pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
 642				vmg_tag, gpa, flags_rmp, flags);
 643		}
 644	} else {
 645		pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%04x:%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
 646			iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 647			vmg_tag, gpa, flags_rmp, flags);
 648	}
 649
 650	if (pdev)
 651		pci_dev_put(pdev);
 652}
 653
 654#define IS_IOMMU_MEM_TRANSACTION(flags)		\
 655	(((flags) & EVENT_FLAG_I) == 0)
 656
 657#define IS_WRITE_REQUEST(flags)			\
 658	((flags) & EVENT_FLAG_RW)
 659
 660static void amd_iommu_report_page_fault(struct amd_iommu *iommu,
 661					u16 devid, u16 domain_id,
 662					u64 address, int flags)
 663{
 664	struct iommu_dev_data *dev_data = NULL;
 665	struct pci_dev *pdev;
 666
 667	pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid),
 668					   devid & 0xff);
 669	if (pdev)
 670		dev_data = dev_iommu_priv_get(&pdev->dev);
 671
 672	if (dev_data) {
 673		/*
 674		 * If this is a DMA fault (for which the I(nterrupt)
 675		 * bit will be unset), allow report_iommu_fault() to
 676		 * prevent logging it.
 677		 */
 678		if (IS_IOMMU_MEM_TRANSACTION(flags)) {
 679			/* Device not attached to domain properly */
 680			if (dev_data->domain == NULL) {
 681				pr_err_ratelimited("Event logged [Device not attached to domain properly]\n");
 682				pr_err_ratelimited("  device=%04x:%02x:%02x.%x domain=0x%04x\n",
 683						   iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
 684						   PCI_FUNC(devid), domain_id);
 685				goto out;
 686			}
 687
 688			if (!report_iommu_fault(&dev_data->domain->domain,
 689						&pdev->dev, address,
 690						IS_WRITE_REQUEST(flags) ?
 691							IOMMU_FAULT_WRITE :
 692							IOMMU_FAULT_READ))
 693				goto out;
 694		}
 695
 696		if (__ratelimit(&dev_data->rs)) {
 697			pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
 698				domain_id, address, flags);
 699		}
 700	} else {
 701		pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%04x:%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
 702			iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 703			domain_id, address, flags);
 704	}
 705
 706out:
 707	if (pdev)
 708		pci_dev_put(pdev);
 709}
 710
 711static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 712{
 713	struct device *dev = iommu->iommu.dev;
 714	int type, devid, flags, tag;
 715	volatile u32 *event = __evt;
 716	int count = 0;
 717	u64 address;
 718	u32 pasid;
 719
 720retry:
 721	type    = (event[1] >> EVENT_TYPE_SHIFT)  & EVENT_TYPE_MASK;
 722	devid   = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
 723	pasid   = (event[0] & EVENT_DOMID_MASK_HI) |
 724		  (event[1] & EVENT_DOMID_MASK_LO);
 725	flags   = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
 726	address = (u64)(((u64)event[3]) << 32) | event[2];
 727
 728	if (type == 0) {
 729		/* Did we hit the erratum? */
 730		if (++count == LOOP_TIMEOUT) {
 731			pr_err("No event written to event log\n");
 732			return;
 733		}
 734		udelay(1);
 735		goto retry;
 736	}
 737
 738	if (type == EVENT_TYPE_IO_FAULT) {
 739		amd_iommu_report_page_fault(iommu, devid, pasid, address, flags);
 740		return;
 741	}
 742
 743	switch (type) {
 744	case EVENT_TYPE_ILL_DEV:
 745		dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
 746			iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 747			pasid, address, flags);
 748		dump_dte_entry(iommu, devid);
 749		break;
 750	case EVENT_TYPE_DEV_TAB_ERR:
 751		dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%04x:%02x:%02x.%x "
 752			"address=0x%llx flags=0x%04x]\n",
 753			iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 754			address, flags);
 755		break;
 756	case EVENT_TYPE_PAGE_TAB_ERR:
 757		dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%04x:%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n",
 758			iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 759			pasid, address, flags);
 760		break;
 761	case EVENT_TYPE_ILL_CMD:
 762		dev_err(dev, "Event logged [ILLEGAL_COMMAND_ERROR address=0x%llx]\n", address);
 763		dump_command(address);
 764		break;
 765	case EVENT_TYPE_CMD_HARD_ERR:
 766		dev_err(dev, "Event logged [COMMAND_HARDWARE_ERROR address=0x%llx flags=0x%04x]\n",
 767			address, flags);
 768		break;
 769	case EVENT_TYPE_IOTLB_INV_TO:
 770		dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%04x:%02x:%02x.%x address=0x%llx]\n",
 771			iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 772			address);
 773		break;
 774	case EVENT_TYPE_INV_DEV_REQ:
 775		dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
 776			iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 777			pasid, address, flags);
 778		break;
 779	case EVENT_TYPE_RMP_FAULT:
 780		amd_iommu_report_rmp_fault(iommu, event);
 781		break;
 782	case EVENT_TYPE_RMP_HW_ERR:
 783		amd_iommu_report_rmp_hw_error(iommu, event);
 784		break;
 785	case EVENT_TYPE_INV_PPR_REQ:
 786		pasid = PPR_PASID(*((u64 *)__evt));
 787		tag = event[1] & 0x03FF;
 788		dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n",
 789			iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 790			pasid, address, flags, tag);
 791		break;
 792	default:
 793		dev_err(dev, "Event logged [UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n",
 794			event[0], event[1], event[2], event[3]);
 795	}
 796
 797	/*
 798	 * To detect the hardware errata 732 we need to clear the
 799	 * entry back to zero. This issue does not exist on SNP
 800	 * enabled system. Also this buffer is not writeable on
 801	 * SNP enabled system.
 802	 */
 803	if (!amd_iommu_snp_en)
 804		memset(__evt, 0, 4 * sizeof(u32));
 805}
 806
 807static void iommu_poll_events(struct amd_iommu *iommu)
 808{
 809	u32 head, tail;
 810
 811	head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 812	tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
 813
 814	while (head != tail) {
 815		iommu_print_event(iommu, iommu->evt_buf + head);
 816
 817		/* Update head pointer of hardware ring-buffer */
 818		head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE;
 819		writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 820	}
 821
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 822}
 823
 824#ifdef CONFIG_IRQ_REMAP
 825static int (*iommu_ga_log_notifier)(u32);
 826
 827int amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
 828{
 829	iommu_ga_log_notifier = notifier;
 830
 831	return 0;
 832}
 833EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier);
 834
 835static void iommu_poll_ga_log(struct amd_iommu *iommu)
 836{
 837	u32 head, tail;
 838
 839	if (iommu->ga_log == NULL)
 840		return;
 841
 842	head = readl(iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
 843	tail = readl(iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
 844
 845	while (head != tail) {
 846		volatile u64 *raw;
 847		u64 log_entry;
 848
 849		raw = (u64 *)(iommu->ga_log + head);
 
 850
 851		/* Avoid memcpy function-call overhead */
 852		log_entry = *raw;
 853
 854		/* Update head pointer of hardware ring-buffer */
 855		head = (head + GA_ENTRY_SIZE) % GA_LOG_SIZE;
 856		writel(head, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
 857
 858		/* Handle GA entry */
 859		switch (GA_REQ_TYPE(log_entry)) {
 860		case GA_GUEST_NR:
 861			if (!iommu_ga_log_notifier)
 862				break;
 863
 864			pr_debug("%s: devid=%#x, ga_tag=%#x\n",
 865				 __func__, GA_DEVID(log_entry),
 866				 GA_TAG(log_entry));
 867
 868			if (iommu_ga_log_notifier(GA_TAG(log_entry)) != 0)
 869				pr_err("GA log notifier failed.\n");
 870			break;
 871		default:
 872			break;
 873		}
 874	}
 875}
 
 876
 877static void
 878amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu)
 879{
 880	if (!irq_remapping_enabled || !dev_is_pci(dev) ||
 881	    !pci_dev_has_default_msi_parent_domain(to_pci_dev(dev)))
 882		return;
 883
 884	dev_set_msi_domain(dev, iommu->ir_domain);
 885}
 886
 887#else /* CONFIG_IRQ_REMAP */
 888static inline void
 889amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) { }
 890#endif /* !CONFIG_IRQ_REMAP */
 891
 892static void amd_iommu_handle_irq(void *data, const char *evt_type,
 893				 u32 int_mask, u32 overflow_mask,
 894				 void (*int_handler)(struct amd_iommu *),
 895				 void (*overflow_handler)(struct amd_iommu *))
 896{
 897	struct amd_iommu *iommu = (struct amd_iommu *) data;
 898	u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 899	u32 mask = int_mask | overflow_mask;
 900
 901	while (status & mask) {
 902		/* Enable interrupt sources again */
 903		writel(mask, iommu->mmio_base + MMIO_STATUS_OFFSET);
 
 
 
 
 
 
 904
 905		if (int_handler) {
 906			pr_devel("Processing IOMMU (ivhd%d) %s Log\n",
 907				 iommu->index, evt_type);
 908			int_handler(iommu);
 909		}
 910
 911		if ((status & overflow_mask) && overflow_handler)
 912			overflow_handler(iommu);
 
 
 
 
 913
 914		/*
 915		 * Hardware bug: ERBT1312
 916		 * When re-enabling interrupt (by writing 1
 917		 * to clear the bit), the hardware might also try to set
 918		 * the interrupt bit in the event status register.
 919		 * In this scenario, the bit will be set, and disable
 920		 * subsequent interrupts.
 921		 *
 922		 * Workaround: The IOMMU driver should read back the
 923		 * status register and check if the interrupt bits are cleared.
 924		 * If not, driver will need to go through the interrupt handler
 925		 * again and re-clear the bits
 926		 */
 927		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 928	}
 929}
 930
 931irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data)
 932{
 933	amd_iommu_handle_irq(data, "Evt", MMIO_STATUS_EVT_INT_MASK,
 934			     MMIO_STATUS_EVT_OVERFLOW_MASK,
 935			     iommu_poll_events, amd_iommu_restart_event_logging);
 936
 937	return IRQ_HANDLED;
 938}
 939
 940irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data)
 941{
 942	amd_iommu_handle_irq(data, "PPR", MMIO_STATUS_PPR_INT_MASK,
 943			     MMIO_STATUS_PPR_OVERFLOW_MASK,
 944			     amd_iommu_poll_ppr_log, amd_iommu_restart_ppr_log);
 945
 946	return IRQ_HANDLED;
 947}
 948
 949irqreturn_t amd_iommu_int_thread_galog(int irq, void *data)
 950{
 951#ifdef CONFIG_IRQ_REMAP
 952	amd_iommu_handle_irq(data, "GA", MMIO_STATUS_GALOG_INT_MASK,
 953			     MMIO_STATUS_GALOG_OVERFLOW_MASK,
 954			     iommu_poll_ga_log, amd_iommu_restart_ga_log);
 955#endif
 956
 957	return IRQ_HANDLED;
 958}
 959
 960irqreturn_t amd_iommu_int_thread(int irq, void *data)
 961{
 962	amd_iommu_int_thread_evtlog(irq, data);
 963	amd_iommu_int_thread_pprlog(irq, data);
 964	amd_iommu_int_thread_galog(irq, data);
 965
 966	return IRQ_HANDLED;
 967}
 968
 969irqreturn_t amd_iommu_int_handler(int irq, void *data)
 970{
 971	return IRQ_WAKE_THREAD;
 972}
 973
 974/****************************************************************************
 975 *
 976 * IOMMU command queuing functions
 977 *
 978 ****************************************************************************/
 979
 980static int wait_on_sem(struct amd_iommu *iommu, u64 data)
 981{
 982	int i = 0;
 983
 984	while (*iommu->cmd_sem != data && i < LOOP_TIMEOUT) {
 985		udelay(1);
 986		i += 1;
 987	}
 988
 989	if (i == LOOP_TIMEOUT) {
 990		pr_alert("Completion-Wait loop timed out\n");
 991		return -EIO;
 992	}
 993
 994	return 0;
 995}
 996
 997static void copy_cmd_to_buffer(struct amd_iommu *iommu,
 998			       struct iommu_cmd *cmd)
 999{
1000	u8 *target;
1001	u32 tail;
1002
1003	/* Copy command to buffer */
1004	tail = iommu->cmd_buf_tail;
1005	target = iommu->cmd_buf + tail;
1006	memcpy(target, cmd, sizeof(*cmd));
1007
1008	tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
1009	iommu->cmd_buf_tail = tail;
1010
1011	/* Tell the IOMMU about it */
1012	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
1013}
1014
1015static void build_completion_wait(struct iommu_cmd *cmd,
1016				  struct amd_iommu *iommu,
1017				  u64 data)
1018{
1019	u64 paddr = iommu_virt_to_phys((void *)iommu->cmd_sem);
 
 
1020
1021	memset(cmd, 0, sizeof(*cmd));
1022	cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK;
1023	cmd->data[1] = upper_32_bits(paddr);
1024	cmd->data[2] = lower_32_bits(data);
1025	cmd->data[3] = upper_32_bits(data);
1026	CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
1027}
1028
1029static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
1030{
1031	memset(cmd, 0, sizeof(*cmd));
1032	cmd->data[0] = devid;
1033	CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
1034}
1035
1036/*
1037 * Builds an invalidation address which is suitable for one page or multiple
1038 * pages. Sets the size bit (S) as needed is more than one page is flushed.
1039 */
1040static inline u64 build_inv_address(u64 address, size_t size)
1041{
1042	u64 pages, end, msb_diff;
 
1043
1044	pages = iommu_num_pages(address, size, PAGE_SIZE);
 
1045
1046	if (pages == 1)
1047		return address & PAGE_MASK;
 
 
 
 
 
 
1048
1049	end = address + size - 1;
1050
1051	/*
1052	 * msb_diff would hold the index of the most significant bit that
1053	 * flipped between the start and end.
1054	 */
1055	msb_diff = fls64(end ^ address) - 1;
 
 
 
 
 
1056
1057	/*
1058	 * Bits 63:52 are sign extended. If for some reason bit 51 is different
1059	 * between the start and the end, invalidate everything.
1060	 */
1061	if (unlikely(msb_diff > 51)) {
1062		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
1063	} else {
 
 
 
1064		/*
1065		 * The msb-bit must be clear on the address. Just set all the
1066		 * lower bits.
1067		 */
1068		address |= (1ull << msb_diff) - 1;
 
1069	}
1070
1071	/* Clear bits 11:0 */
1072	address &= PAGE_MASK;
1073
1074	/* Set the size bit - we flush more than one 4kb page */
1075	return address | CMD_INV_IOMMU_PAGES_SIZE_MASK;
 
 
 
 
 
 
 
1076}
1077
1078static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
1079				  size_t size, u16 domid,
1080				  ioasid_t pasid, bool gn)
1081{
1082	u64 inv_address = build_inv_address(address, size);
1083
1084	memset(cmd, 0, sizeof(*cmd));
1085
1086	cmd->data[1] |= domid;
1087	cmd->data[2]  = lower_32_bits(inv_address);
1088	cmd->data[3]  = upper_32_bits(inv_address);
1089	/* PDE bit - we want to flush everything, not only the PTEs */
 
 
1090	cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
1091	if (gn) {
1092		cmd->data[0] |= pasid;
1093		cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
1094	}
1095	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
1096}
1097
1098static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
1099				  u64 address, size_t size,
1100				  ioasid_t pasid, bool gn)
1101{
1102	u64 inv_address = build_inv_address(address, size);
1103
1104	memset(cmd, 0, sizeof(*cmd));
1105
 
 
1106	cmd->data[0]  = devid;
1107	cmd->data[0] |= (qdep & 0xff) << 24;
 
1108	cmd->data[1]  = devid;
1109	cmd->data[2]  = lower_32_bits(inv_address);
1110	cmd->data[3]  = upper_32_bits(inv_address);
1111	if (gn) {
1112		cmd->data[0] |= ((pasid >> 8) & 0xff) << 16;
1113		cmd->data[1] |= (pasid & 0xff) << 16;
1114		cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
1115	}
1116
1117	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
1118}
1119
1120static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, u32 pasid,
1121			       int status, int tag, u8 gn)
1122{
1123	memset(cmd, 0, sizeof(*cmd));
1124
1125	cmd->data[0]  = devid;
1126	if (gn) {
1127		cmd->data[1]  = pasid;
1128		cmd->data[2]  = CMD_INV_IOMMU_PAGES_GN_MASK;
1129	}
1130	cmd->data[3]  = tag & 0x1ff;
1131	cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT;
1132
1133	CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR);
1134}
1135
1136static void build_inv_all(struct iommu_cmd *cmd)
1137{
1138	memset(cmd, 0, sizeof(*cmd));
1139	CMD_SET_TYPE(cmd, CMD_INV_ALL);
1140}
1141
1142static void build_inv_irt(struct iommu_cmd *cmd, u16 devid)
1143{
1144	memset(cmd, 0, sizeof(*cmd));
1145	cmd->data[0] = devid;
1146	CMD_SET_TYPE(cmd, CMD_INV_IRT);
1147}
1148
1149/*
1150 * Writes the command to the IOMMUs command buffer and informs the
1151 * hardware about the new command.
1152 */
1153static int __iommu_queue_command_sync(struct amd_iommu *iommu,
1154				      struct iommu_cmd *cmd,
1155				      bool sync)
1156{
1157	unsigned int count = 0;
1158	u32 left, next_tail;
1159
1160	next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
1161again:
1162	left      = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE;
1163
1164	if (left <= 0x20) {
1165		/* Skip udelay() the first time around */
1166		if (count++) {
1167			if (count == LOOP_TIMEOUT) {
1168				pr_err("Command buffer timeout\n");
1169				return -EIO;
1170			}
1171
1172			udelay(1);
1173		}
1174
1175		/* Update head and recheck remaining space */
1176		iommu->cmd_buf_head = readl(iommu->mmio_base +
1177					    MMIO_CMD_HEAD_OFFSET);
1178
1179		goto again;
1180	}
1181
1182	copy_cmd_to_buffer(iommu, cmd);
1183
1184	/* Do we need to make sure all commands are processed? */
1185	iommu->need_sync = sync;
1186
1187	return 0;
1188}
1189
1190static int iommu_queue_command_sync(struct amd_iommu *iommu,
1191				    struct iommu_cmd *cmd,
1192				    bool sync)
1193{
1194	unsigned long flags;
1195	int ret;
1196
1197	raw_spin_lock_irqsave(&iommu->lock, flags);
1198	ret = __iommu_queue_command_sync(iommu, cmd, sync);
1199	raw_spin_unlock_irqrestore(&iommu->lock, flags);
1200
1201	return ret;
1202}
1203
1204static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
1205{
1206	return iommu_queue_command_sync(iommu, cmd, true);
1207}
1208
1209/*
1210 * This function queues a completion wait command into the command
1211 * buffer of an IOMMU
1212 */
1213static int iommu_completion_wait(struct amd_iommu *iommu)
1214{
1215	struct iommu_cmd cmd;
1216	unsigned long flags;
1217	int ret;
1218	u64 data;
1219
1220	if (!iommu->need_sync)
1221		return 0;
1222
1223	data = atomic64_inc_return(&iommu->cmd_sem_val);
1224	build_completion_wait(&cmd, iommu, data);
1225
1226	raw_spin_lock_irqsave(&iommu->lock, flags);
1227
 
 
1228	ret = __iommu_queue_command_sync(iommu, &cmd, false);
1229	if (ret)
1230		goto out_unlock;
1231
1232	ret = wait_on_sem(iommu, data);
1233
1234out_unlock:
1235	raw_spin_unlock_irqrestore(&iommu->lock, flags);
1236
1237	return ret;
1238}
1239
1240static void domain_flush_complete(struct protection_domain *domain)
1241{
1242	struct pdom_iommu_info *pdom_iommu_info;
1243	unsigned long i;
1244
1245	lockdep_assert_held(&domain->lock);
1246
1247	/*
1248	 * Devices of this domain are behind this IOMMU
1249	 * We need to wait for completion of all commands.
1250	 */
1251	 xa_for_each(&domain->iommu_array, i, pdom_iommu_info)
1252		iommu_completion_wait(pdom_iommu_info->iommu);
1253}
1254
1255static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
1256{
1257	struct iommu_cmd cmd;
1258
1259	build_inv_dte(&cmd, devid);
1260
1261	return iommu_queue_command(iommu, &cmd);
1262}
1263
1264static void amd_iommu_flush_dte_all(struct amd_iommu *iommu)
1265{
1266	u32 devid;
1267	u16 last_bdf = iommu->pci_seg->last_bdf;
1268
1269	for (devid = 0; devid <= last_bdf; ++devid)
1270		iommu_flush_dte(iommu, devid);
1271
1272	iommu_completion_wait(iommu);
1273}
1274
1275/*
1276 * This function uses heavy locking and may disable irqs for some time. But
1277 * this is no issue because it is only called during resume.
1278 */
1279static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu)
1280{
1281	u32 dom_id;
1282	u16 last_bdf = iommu->pci_seg->last_bdf;
1283
1284	for (dom_id = 0; dom_id <= last_bdf; ++dom_id) {
1285		struct iommu_cmd cmd;
1286		build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
1287				      dom_id, IOMMU_NO_PASID, false);
1288		iommu_queue_command(iommu, &cmd);
1289	}
1290
1291	iommu_completion_wait(iommu);
1292}
1293
1294static void amd_iommu_flush_tlb_domid(struct amd_iommu *iommu, u32 dom_id)
1295{
1296	struct iommu_cmd cmd;
1297
1298	build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
1299			      dom_id, IOMMU_NO_PASID, false);
1300	iommu_queue_command(iommu, &cmd);
1301
1302	iommu_completion_wait(iommu);
1303}
1304
1305static void amd_iommu_flush_all(struct amd_iommu *iommu)
1306{
1307	struct iommu_cmd cmd;
1308
1309	build_inv_all(&cmd);
1310
1311	iommu_queue_command(iommu, &cmd);
1312	iommu_completion_wait(iommu);
1313}
1314
1315static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid)
1316{
1317	struct iommu_cmd cmd;
1318
1319	build_inv_irt(&cmd, devid);
1320
1321	iommu_queue_command(iommu, &cmd);
1322}
1323
1324static void amd_iommu_flush_irt_all(struct amd_iommu *iommu)
1325{
1326	u32 devid;
1327	u16 last_bdf = iommu->pci_seg->last_bdf;
1328
1329	if (iommu->irtcachedis_enabled)
1330		return;
1331
1332	for (devid = 0; devid <= last_bdf; devid++)
1333		iommu_flush_irt(iommu, devid);
1334
1335	iommu_completion_wait(iommu);
1336}
1337
1338void amd_iommu_flush_all_caches(struct amd_iommu *iommu)
1339{
1340	if (check_feature(FEATURE_IA)) {
1341		amd_iommu_flush_all(iommu);
1342	} else {
1343		amd_iommu_flush_dte_all(iommu);
1344		amd_iommu_flush_irt_all(iommu);
1345		amd_iommu_flush_tlb_all(iommu);
1346	}
1347}
1348
1349/*
1350 * Command send function for flushing on-device TLB
1351 */
1352static int device_flush_iotlb(struct iommu_dev_data *dev_data, u64 address,
1353			      size_t size, ioasid_t pasid, bool gn)
1354{
1355	struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
1356	struct iommu_cmd cmd;
1357	int qdep = dev_data->ats_qdep;
 
 
 
1358
1359	build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address,
1360			      size, pasid, gn);
1361
1362	return iommu_queue_command(iommu, &cmd);
1363}
1364
1365static int device_flush_dte_alias(struct pci_dev *pdev, u16 alias, void *data)
1366{
1367	struct amd_iommu *iommu = data;
1368
1369	return iommu_flush_dte(iommu, alias);
1370}
1371
1372/*
1373 * Command send function for invalidating a device table entry
1374 */
1375static int device_flush_dte(struct iommu_dev_data *dev_data)
1376{
1377	struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
1378	struct pci_dev *pdev = NULL;
1379	struct amd_iommu_pci_seg *pci_seg;
1380	u16 alias;
1381	int ret;
1382
1383	if (dev_is_pci(dev_data->dev))
1384		pdev = to_pci_dev(dev_data->dev);
1385
1386	if (pdev)
1387		ret = pci_for_each_dma_alias(pdev,
1388					     device_flush_dte_alias, iommu);
1389	else
1390		ret = iommu_flush_dte(iommu, dev_data->devid);
1391	if (ret)
1392		return ret;
1393
1394	pci_seg = iommu->pci_seg;
1395	alias = pci_seg->alias_table[dev_data->devid];
1396	if (alias != dev_data->devid) {
1397		ret = iommu_flush_dte(iommu, alias);
1398		if (ret)
1399			return ret;
1400	}
1401
1402	if (dev_data->ats_enabled) {
1403		/* Invalidate the entire contents of an IOTLB */
1404		ret = device_flush_iotlb(dev_data, 0, ~0UL,
1405					 IOMMU_NO_PASID, false);
1406	}
1407
1408	return ret;
1409}
1410
1411static int domain_flush_pages_v2(struct protection_domain *pdom,
1412				 u64 address, size_t size)
 
 
 
 
 
1413{
1414	struct iommu_dev_data *dev_data;
1415	struct iommu_cmd cmd;
1416	int ret = 0;
1417
1418	lockdep_assert_held(&pdom->lock);
1419	list_for_each_entry(dev_data, &pdom->dev_list, list) {
1420		struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev);
1421		u16 domid = dev_data->gcr3_info.domid;
1422
1423		build_inv_iommu_pages(&cmd, address, size,
1424				      domid, IOMMU_NO_PASID, true);
1425
1426		ret |= iommu_queue_command(iommu, &cmd);
1427	}
1428
1429	return ret;
1430}
1431
1432static int domain_flush_pages_v1(struct protection_domain *pdom,
1433				 u64 address, size_t size)
1434{
1435	struct pdom_iommu_info *pdom_iommu_info;
1436	struct iommu_cmd cmd;
1437	int ret = 0;
1438	unsigned long i;
1439
1440	lockdep_assert_held(&pdom->lock);
1441
1442	build_inv_iommu_pages(&cmd, address, size,
1443			      pdom->id, IOMMU_NO_PASID, false);
1444
1445	xa_for_each(&pdom->iommu_array, i, pdom_iommu_info) {
1446		/*
1447		 * Devices of this domain are behind this IOMMU
1448		 * We need a TLB flush
1449		 */
1450		ret |= iommu_queue_command(pdom_iommu_info->iommu, &cmd);
1451	}
1452
1453	return ret;
1454}
1455
1456/*
1457 * TLB invalidation function which is called from the mapping functions.
1458 * It flushes range of PTEs of the domain.
1459 */
1460static void __domain_flush_pages(struct protection_domain *domain,
1461				 u64 address, size_t size)
1462{
1463	struct iommu_dev_data *dev_data;
1464	int ret = 0;
1465	ioasid_t pasid = IOMMU_NO_PASID;
1466	bool gn = false;
1467
1468	lockdep_assert_held(&domain->lock);
1469
1470	if (pdom_is_v2_pgtbl_mode(domain)) {
1471		gn = true;
1472		ret = domain_flush_pages_v2(domain, address, size);
1473	} else {
1474		ret = domain_flush_pages_v1(domain, address, size);
1475	}
1476
1477	list_for_each_entry(dev_data, &domain->dev_list, list) {
1478
1479		if (!dev_data->ats_enabled)
1480			continue;
1481
1482		ret |= device_flush_iotlb(dev_data, address, size, pasid, gn);
1483	}
1484
1485	WARN_ON(ret);
1486}
1487
1488void amd_iommu_domain_flush_pages(struct protection_domain *domain,
1489				  u64 address, size_t size)
1490{
1491	lockdep_assert_held(&domain->lock);
1492
1493	if (likely(!amd_iommu_np_cache)) {
1494		__domain_flush_pages(domain, address, size);
1495
1496		/* Wait until IOMMU TLB and all device IOTLB flushes are complete */
1497		domain_flush_complete(domain);
1498
1499		return;
1500	}
1501
1502	/*
1503	 * When NpCache is on, we infer that we run in a VM and use a vIOMMU.
1504	 * In such setups it is best to avoid flushes of ranges which are not
1505	 * naturally aligned, since it would lead to flushes of unmodified
1506	 * PTEs. Such flushes would require the hypervisor to do more work than
1507	 * necessary. Therefore, perform repeated flushes of aligned ranges
1508	 * until you cover the range. Each iteration flushes the smaller
1509	 * between the natural alignment of the address that we flush and the
1510	 * greatest naturally aligned region that fits in the range.
1511	 */
1512	while (size != 0) {
1513		int addr_alignment = __ffs(address);
1514		int size_alignment = __fls(size);
1515		int min_alignment;
1516		size_t flush_size;
1517
1518		/*
1519		 * size is always non-zero, but address might be zero, causing
1520		 * addr_alignment to be negative. As the casting of the
1521		 * argument in __ffs(address) to long might trim the high bits
1522		 * of the address on x86-32, cast to long when doing the check.
1523		 */
1524		if (likely((unsigned long)address != 0))
1525			min_alignment = min(addr_alignment, size_alignment);
1526		else
1527			min_alignment = size_alignment;
1528
1529		flush_size = 1ul << min_alignment;
1530
1531		__domain_flush_pages(domain, address, flush_size);
1532		address += flush_size;
1533		size -= flush_size;
1534	}
1535
1536	/* Wait until IOMMU TLB and all device IOTLB flushes are complete */
1537	domain_flush_complete(domain);
1538}
1539
1540/* Flush the whole IO/TLB for a given protection domain - including PDE */
1541static void amd_iommu_domain_flush_all(struct protection_domain *domain)
1542{
1543	amd_iommu_domain_flush_pages(domain, 0,
1544				     CMD_INV_IOMMU_ALL_PAGES_ADDRESS);
1545}
1546
1547void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
1548				     ioasid_t pasid, u64 address, size_t size)
1549{
1550	struct iommu_cmd cmd;
1551	struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev);
1552
1553	build_inv_iommu_pages(&cmd, address, size,
1554			      dev_data->gcr3_info.domid, pasid, true);
1555	iommu_queue_command(iommu, &cmd);
1556
1557	if (dev_data->ats_enabled)
1558		device_flush_iotlb(dev_data, address, size, pasid, true);
1559
1560	iommu_completion_wait(iommu);
1561}
 
1562
1563static void dev_flush_pasid_all(struct iommu_dev_data *dev_data,
1564				ioasid_t pasid)
1565{
1566	amd_iommu_dev_flush_pasid_pages(dev_data, pasid, 0,
1567					CMD_INV_IOMMU_ALL_PAGES_ADDRESS);
 
1568}
1569
1570/* Flush the not present cache if it exists */
1571static void domain_flush_np_cache(struct protection_domain *domain,
1572		dma_addr_t iova, size_t size)
1573{
1574	if (unlikely(amd_iommu_np_cache)) {
1575		unsigned long flags;
1576
1577		spin_lock_irqsave(&domain->lock, flags);
1578		amd_iommu_domain_flush_pages(domain, iova, size);
 
1579		spin_unlock_irqrestore(&domain->lock, flags);
1580	}
1581}
1582
1583
1584/*
1585 * This function flushes the DTEs for all devices in domain
1586 */
1587void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
1588{
1589	struct iommu_dev_data *dev_data;
1590
1591	lockdep_assert_held(&domain->lock);
 
 
1592
1593	list_for_each_entry(dev_data, &domain->dev_list, list) {
1594		struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev);
 
 
 
 
1595
1596		set_dte_entry(iommu, dev_data);
1597		clone_aliases(iommu, dev_data->dev);
 
 
 
 
1598	}
 
1599
1600	list_for_each_entry(dev_data, &domain->dev_list, list)
1601		device_flush_dte(dev_data);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1602
1603	domain_flush_complete(domain);
1604}
1605
1606int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag)
1607{
1608	struct iommu_dev_data *dev_data;
1609	struct amd_iommu *iommu;
1610	struct iommu_cmd cmd;
1611
1612	dev_data = dev_iommu_priv_get(dev);
1613	iommu    = get_amd_iommu_from_dev(dev);
1614
1615	build_complete_ppr(&cmd, dev_data->devid, pasid, status,
1616			   tag, dev_data->pri_tlp);
1617
1618	return iommu_queue_command(iommu, &cmd);
1619}
1620
1621/****************************************************************************
1622 *
1623 * The next functions belong to the domain allocation. A domain is
1624 * allocated for every IOMMU as the default domain. If device isolation
1625 * is enabled, every device get its own domain. The most important thing
1626 * about domains is the page table mapping the DMA address space they
1627 * contain.
1628 *
1629 ****************************************************************************/
1630
1631static int pdom_id_alloc(void)
1632{
1633	return ida_alloc_range(&pdom_ids, 1, MAX_DOMAIN_ID - 1, GFP_ATOMIC);
1634}
1635
1636static void pdom_id_free(int id)
 
 
 
 
 
 
 
1637{
1638	ida_free(&pdom_ids, id);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1639}
1640
1641static void free_gcr3_tbl_level1(u64 *tbl)
1642{
1643	u64 *ptr;
1644	int i;
 
 
 
 
 
 
1645
1646	for (i = 0; i < 512; ++i) {
1647		if (!(tbl[i] & GCR3_VALID))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1648			continue;
 
1649
1650		ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1651
1652		iommu_free_page(ptr);
 
 
 
 
 
 
 
 
 
 
 
1653	}
 
 
1654}
1655
1656static void free_gcr3_tbl_level2(u64 *tbl)
 
 
 
 
 
 
1657{
1658	u64 *ptr;
1659	int i;
 
1660
1661	for (i = 0; i < 512; ++i) {
1662		if (!(tbl[i] & GCR3_VALID))
1663			continue;
1664
1665		ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK);
1666
1667		free_gcr3_tbl_level1(ptr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1668	}
 
 
 
 
 
 
 
 
 
1669}
1670
1671static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info)
1672{
1673	if (gcr3_info->glx == 2)
1674		free_gcr3_tbl_level2(gcr3_info->gcr3_tbl);
1675	else if (gcr3_info->glx == 1)
1676		free_gcr3_tbl_level1(gcr3_info->gcr3_tbl);
1677	else
1678		WARN_ON_ONCE(gcr3_info->glx != 0);
1679
1680	gcr3_info->glx = 0;
 
 
 
1681
1682	/* Free per device domain ID */
1683	pdom_id_free(gcr3_info->domid);
1684
1685	iommu_free_page(gcr3_info->gcr3_tbl);
1686	gcr3_info->gcr3_tbl = NULL;
 
 
1687}
1688
1689/*
1690 * Number of GCR3 table levels required. Level must be 4-Kbyte
1691 * page and can contain up to 512 entries.
 
 
 
1692 */
1693static int get_gcr3_levels(int pasids)
1694{
1695	int levels;
 
 
 
 
 
 
 
 
1696
1697	if (pasids == -1)
1698		return amd_iommu_max_glx_val;
1699
1700	levels = get_count_order(pasids);
 
 
1701
1702	return levels ? (DIV_ROUND_UP(levels, 9) - 1) : levels;
1703}
1704
1705static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info,
1706			    struct amd_iommu *iommu, int pasids)
1707{
1708	int levels = get_gcr3_levels(pasids);
1709	int nid = iommu ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
1710	int domid;
1711
1712	if (levels > amd_iommu_max_glx_val)
1713		return -EINVAL;
1714
1715	if (gcr3_info->gcr3_tbl)
1716		return -EBUSY;
1717
1718	/* Allocate per device domain ID */
1719	domid = pdom_id_alloc();
1720	if (domid <= 0)
1721		return -ENOSPC;
1722	gcr3_info->domid = domid;
1723
1724	gcr3_info->gcr3_tbl = iommu_alloc_page_node(nid, GFP_ATOMIC);
1725	if (gcr3_info->gcr3_tbl == NULL) {
1726		pdom_id_free(domid);
1727		return -ENOMEM;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1728	}
1729
1730	gcr3_info->glx = levels;
 
1731
1732	return 0;
1733}
1734
1735static u64 *__get_gcr3_pte(struct gcr3_tbl_info *gcr3_info,
1736			   ioasid_t pasid, bool alloc)
 
1737{
1738	int index;
 
1739	u64 *pte;
1740	u64 *root = gcr3_info->gcr3_tbl;
1741	int level = gcr3_info->glx;
1742
1743	while (true) {
1744
1745		index = (pasid >> (9 * level)) & 0x1ff;
1746		pte   = &root[index];
1747
1748		if (level == 0)
1749			break;
1750
1751		if (!(*pte & GCR3_VALID)) {
1752			if (!alloc)
1753				return NULL;
1754
1755			root = (void *)get_zeroed_page(GFP_ATOMIC);
1756			if (root == NULL)
1757				return NULL;
1758
1759			*pte = iommu_virt_to_phys(root) | GCR3_VALID;
 
 
1760		}
1761
1762		root = iommu_phys_to_virt(*pte & PAGE_MASK);
1763
1764		level -= 1;
1765	}
1766
1767	return pte;
 
 
1768}
1769
1770static int update_gcr3(struct iommu_dev_data *dev_data,
1771		       ioasid_t pasid, unsigned long gcr3, bool set)
1772{
1773	struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
1774	u64 *pte;
 
 
 
 
1775
1776	pte = __get_gcr3_pte(gcr3_info, pasid, true);
1777	if (pte == NULL)
1778		return -ENOMEM;
1779
1780	if (set)
1781		*pte = (gcr3 & PAGE_MASK) | GCR3_VALID;
 
 
 
1782	else
1783		*pte = 0;
 
1784
1785	dev_flush_pasid_all(dev_data, pasid);
1786	return 0;
1787}
1788
1789int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid,
1790		       unsigned long gcr3)
1791{
1792	struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
1793	int ret;
 
 
 
1794
1795	iommu_group_mutex_assert(dev_data->dev);
 
 
 
1796
1797	ret = update_gcr3(dev_data, pasid, gcr3, true);
1798	if (ret)
1799		return ret;
1800
1801	gcr3_info->pasid_cnt++;
1802	return ret;
 
 
1803}
1804
1805int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid)
1806{
1807	struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
1808	int ret;
1809
1810	iommu_group_mutex_assert(dev_data->dev);
 
 
1811
1812	ret = update_gcr3(dev_data, pasid, 0, false);
1813	if (ret)
1814		return ret;
1815
1816	gcr3_info->pasid_cnt--;
1817	return ret;
1818}
1819
1820static void set_dte_entry(struct amd_iommu *iommu,
1821			  struct iommu_dev_data *dev_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
1822{
1823	u64 pte_root = 0;
1824	u64 flags = 0;
1825	u32 old_domid;
1826	u16 devid = dev_data->devid;
1827	u16 domid;
1828	struct protection_domain *domain = dev_data->domain;
1829	struct dev_table_entry *dev_table = get_dev_table(iommu);
1830	struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
1831
1832	if (gcr3_info && gcr3_info->gcr3_tbl)
1833		domid = dev_data->gcr3_info.domid;
1834	else
1835		domid = domain->id;
1836
1837	if (domain->iop.mode != PAGE_MODE_NONE)
1838		pte_root = iommu_virt_to_phys(domain->iop.root);
1839
1840	pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
1841		    << DEV_ENTRY_MODE_SHIFT;
 
1842
1843	pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V;
1844
1845	/*
1846	 * When SNP is enabled, Only set TV bit when IOMMU
1847	 * page translation is in use.
1848	 */
1849	if (!amd_iommu_snp_en || (domid != 0))
1850		pte_root |= DTE_FLAG_TV;
1851
1852	flags = dev_table[devid].data[1];
1853
1854	if (dev_data->ats_enabled)
1855		flags |= DTE_FLAG_IOTLB;
1856
1857	if (dev_data->ppr)
1858		pte_root |= 1ULL << DEV_ENTRY_PPR;
1859
1860	if (domain->dirty_tracking)
1861		pte_root |= DTE_FLAG_HAD;
 
1862
1863	if (gcr3_info && gcr3_info->gcr3_tbl) {
1864		u64 gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
1865		u64 glx  = gcr3_info->glx;
1866		u64 tmp;
1867
1868		pte_root |= DTE_FLAG_GV;
1869		pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT;
1870
1871		/* First mask out possible old values for GCR3 table */
1872		tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
1873		flags    &= ~tmp;
1874
1875		tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
1876		flags    &= ~tmp;
1877
1878		/* Encode GCR3 table into DTE */
1879		tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
1880		pte_root |= tmp;
1881
1882		tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B;
1883		flags    |= tmp;
1884
1885		tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
1886		flags    |= tmp;
1887
1888		if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) {
1889			dev_table[devid].data[2] |=
1890				((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT);
1891		}
1892
1893		/* GIOV is supported with V2 page table mode only */
1894		if (pdom_is_v2_pgtbl_mode(domain))
1895			pte_root |= DTE_FLAG_GIOV;
1896	}
1897
1898	flags &= ~DEV_DOMID_MASK;
1899	flags |= domid;
1900
1901	old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK;
1902	dev_table[devid].data[1]  = flags;
1903	dev_table[devid].data[0]  = pte_root;
1904
1905	/*
1906	 * A kdump kernel might be replacing a domain ID that was copied from
1907	 * the previous kernel--if so, it needs to flush the translation cache
1908	 * entries for the old domain ID that is being overwritten
1909	 */
1910	if (old_domid) {
 
 
1911		amd_iommu_flush_tlb_domid(iommu, old_domid);
1912	}
1913}
1914
1915static void clear_dte_entry(struct amd_iommu *iommu, u16 devid)
1916{
1917	struct dev_table_entry *dev_table = get_dev_table(iommu);
1918
1919	/* remove entry from the device table seen by the hardware */
1920	dev_table[devid].data[0]  = DTE_FLAG_V;
1921
1922	if (!amd_iommu_snp_en)
1923		dev_table[devid].data[0] |= DTE_FLAG_TV;
1924
1925	dev_table[devid].data[1] &= DTE_FLAG_MASK;
1926
1927	amd_iommu_apply_erratum_63(iommu, devid);
1928}
1929
1930/* Update and flush DTE for the given device */
1931static void dev_update_dte(struct iommu_dev_data *dev_data, bool set)
1932{
1933	struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev);
 
 
1934
1935	if (set)
1936		set_dte_entry(iommu, dev_data);
1937	else
1938		clear_dte_entry(iommu, dev_data->devid);
 
 
 
 
 
 
 
 
 
 
 
 
1939
1940	clone_aliases(iommu, dev_data->dev);
1941	device_flush_dte(dev_data);
1942	iommu_completion_wait(iommu);
1943}
1944
1945/*
1946 * If domain is SVA capable then initialize GCR3 table. Also if domain is
1947 * in v2 page table mode then update GCR3[0].
1948 */
1949static int init_gcr3_table(struct iommu_dev_data *dev_data,
1950			   struct protection_domain *pdom)
1951{
1952	struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
1953	int max_pasids = dev_data->max_pasids;
1954	int ret = 0;
1955
1956	 /*
1957	  * If domain is in pt mode then setup GCR3 table only if device
1958	  * is PASID capable
1959	  */
1960	if (pdom_is_in_pt_mode(pdom) && !pdev_pasid_supported(dev_data))
1961		return ret;
1962
1963	/*
1964	 * By default, setup GCR3 table to support MAX PASIDs
1965	 * supported by the device/IOMMU.
1966	 */
1967	ret = setup_gcr3_table(&dev_data->gcr3_info, iommu,
1968			       max_pasids > 0 ?  max_pasids : 1);
1969	if (ret)
1970		return ret;
1971
1972	/* Setup GCR3[0] only if domain is setup with v2 page table mode */
1973	if (!pdom_is_v2_pgtbl_mode(pdom))
1974		return ret;
 
 
1975
1976	ret = update_gcr3(dev_data, 0, iommu_virt_to_phys(pdom->iop.pgd), true);
1977	if (ret)
1978		free_gcr3_table(&dev_data->gcr3_info);
1979
1980	return ret;
 
 
 
 
 
 
 
 
1981}
1982
1983static void destroy_gcr3_table(struct iommu_dev_data *dev_data,
1984			       struct protection_domain *pdom)
1985{
1986	struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
 
 
 
1987
1988	if (pdom_is_v2_pgtbl_mode(pdom))
1989		update_gcr3(dev_data, 0, 0, false);
 
 
 
1990
1991	if (gcr3_info->gcr3_tbl == NULL)
1992		return;
 
1993
1994	free_gcr3_table(gcr3_info);
 
 
 
 
1995}
1996
1997static int pdom_attach_iommu(struct amd_iommu *iommu,
1998			     struct protection_domain *pdom)
1999{
2000	struct pdom_iommu_info *pdom_iommu_info, *curr;
2001	struct io_pgtable_cfg *cfg = &pdom->iop.pgtbl.cfg;
2002	unsigned long flags;
2003	int ret = 0;
2004
2005	spin_lock_irqsave(&pdom->lock, flags);
 
 
 
 
2006
2007	pdom_iommu_info = xa_load(&pdom->iommu_array, iommu->index);
2008	if (pdom_iommu_info) {
2009		pdom_iommu_info->refcnt++;
2010		goto out_unlock;
2011	}
2012
2013	pdom_iommu_info = kzalloc(sizeof(*pdom_iommu_info), GFP_ATOMIC);
2014	if (!pdom_iommu_info) {
2015		ret = -ENOMEM;
2016		goto out_unlock;
2017	}
2018
2019	pdom_iommu_info->iommu = iommu;
2020	pdom_iommu_info->refcnt = 1;
 
 
2021
2022	curr = xa_cmpxchg(&pdom->iommu_array, iommu->index,
2023			  NULL, pdom_iommu_info, GFP_ATOMIC);
2024	if (curr) {
2025		kfree(pdom_iommu_info);
2026		ret = -ENOSPC;
2027		goto out_unlock;
2028	}
2029
2030	/* Update NUMA Node ID */
2031	if (cfg->amd.nid == NUMA_NO_NODE)
2032		cfg->amd.nid = dev_to_node(&iommu->dev->dev);
2033
2034out_unlock:
2035	spin_unlock_irqrestore(&pdom->lock, flags);
2036	return ret;
2037}
2038
2039static void pdom_detach_iommu(struct amd_iommu *iommu,
2040			      struct protection_domain *pdom)
2041{
2042	struct pdom_iommu_info *pdom_iommu_info;
2043	unsigned long flags;
2044
2045	spin_lock_irqsave(&pdom->lock, flags);
2046
2047	pdom_iommu_info = xa_load(&pdom->iommu_array, iommu->index);
2048	if (!pdom_iommu_info) {
2049		spin_unlock_irqrestore(&pdom->lock, flags);
2050		return;
2051	}
2052
2053	pdom_iommu_info->refcnt--;
2054	if (pdom_iommu_info->refcnt == 0) {
2055		xa_erase(&pdom->iommu_array, iommu->index);
2056		kfree(pdom_iommu_info);
2057	}
2058
2059	spin_unlock_irqrestore(&pdom->lock, flags);
2060}
2061
2062/*
2063 * If a device is not yet associated with a domain, this function makes the
2064 * device visible in the domain
2065 */
2066static int attach_device(struct device *dev,
2067			 struct protection_domain *domain)
2068{
2069	struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
2070	struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
2071	struct pci_dev *pdev;
2072	unsigned long flags;
2073	int ret = 0;
2074
2075	mutex_lock(&dev_data->mutex);
2076
2077	if (dev_data->domain != NULL) {
2078		ret = -EBUSY;
2079		goto out;
2080	}
2081
2082	/* Do reference counting */
2083	ret = pdom_attach_iommu(iommu, domain);
2084	if (ret)
 
2085		goto out;
2086
2087	/* Setup GCR3 table */
2088	if (pdom_is_sva_capable(domain)) {
2089		ret = init_gcr3_table(dev_data, domain);
2090		if (ret) {
2091			pdom_detach_iommu(iommu, domain);
 
 
 
 
2092			goto out;
 
 
 
 
 
 
 
 
2093		}
 
 
 
 
2094	}
2095
2096	pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL;
2097	if (pdev && pdom_is_sva_capable(domain)) {
2098		pdev_enable_caps(pdev);
2099
2100		/*
2101		 * Device can continue to function even if IOPF
2102		 * enablement failed. Hence in error path just
2103		 * disable device PRI support.
2104		 */
2105		if (amd_iommu_iopf_add_device(iommu, dev_data))
2106			pdev_disable_cap_pri(pdev);
2107	} else if (pdev) {
2108		pdev_enable_cap_ats(pdev);
2109	}
2110
2111	/* Update data structures */
2112	dev_data->domain = domain;
2113	spin_lock_irqsave(&domain->lock, flags);
2114	list_add(&dev_data->list, &domain->dev_list);
2115	spin_unlock_irqrestore(&domain->lock, flags);
 
2116
2117	/* Update device table */
2118	dev_update_dte(dev_data, true);
2119
2120out:
2121	mutex_unlock(&dev_data->mutex);
 
 
2122
2123	return ret;
2124}
2125
2126/*
2127 * Removes a device from a protection domain (with devtable_lock held)
2128 */
2129static void detach_device(struct device *dev)
2130{
2131	struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
2132	struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
2133	struct protection_domain *domain = dev_data->domain;
2134	unsigned long flags;
2135
2136	mutex_lock(&dev_data->mutex);
 
 
 
 
 
2137
2138	/*
2139	 * First check if the device is still attached. It might already
2140	 * be detached from its domain because the generic
2141	 * iommu_detach_group code detached it and we try again here in
2142	 * our alias handling.
2143	 */
2144	if (WARN_ON(!dev_data->domain))
2145		goto out;
2146
2147	/* Remove IOPF handler */
2148	if (dev_data->ppr) {
2149		iopf_queue_flush_dev(dev);
2150		amd_iommu_iopf_remove_device(iommu, dev_data);
2151	}
2152
2153	if (dev_is_pci(dev))
2154		pdev_disable_caps(to_pci_dev(dev));
2155
2156	/* Clear DTE and flush the entry */
2157	dev_update_dte(dev_data, false);
2158
2159	/* Flush IOTLB and wait for the flushes to finish */
2160	spin_lock_irqsave(&domain->lock, flags);
2161	amd_iommu_domain_flush_all(domain);
2162	list_del(&dev_data->list);
2163	spin_unlock_irqrestore(&domain->lock, flags);
2164
2165	/* Clear GCR3 table */
2166	if (pdom_is_sva_capable(domain))
2167		destroy_gcr3_table(dev_data, domain);
2168
2169	/* Update data structures */
2170	dev_data->domain = NULL;
 
 
2171
2172	/* decrease reference counters - needs to happen after the flushes */
2173	pdom_detach_iommu(iommu, domain);
2174
2175out:
2176	mutex_unlock(&dev_data->mutex);
 
 
2177}
2178
2179static struct iommu_device *amd_iommu_probe_device(struct device *dev)
2180{
2181	struct iommu_device *iommu_dev;
2182	struct amd_iommu *iommu;
2183	struct iommu_dev_data *dev_data;
2184	int ret;
2185
2186	if (!check_device(dev))
2187		return ERR_PTR(-ENODEV);
2188
2189	iommu = rlookup_amd_iommu(dev);
2190	if (!iommu)
2191		return ERR_PTR(-ENODEV);
2192
2193	/* Not registered yet? */
2194	if (!iommu->iommu.ops)
2195		return ERR_PTR(-ENODEV);
2196
2197	if (dev_iommu_priv_get(dev))
2198		return &iommu->iommu;
2199
2200	ret = iommu_init_device(iommu, dev);
2201	if (ret) {
2202		dev_err(dev, "Failed to initialize - trying to proceed anyway\n");
 
2203		iommu_dev = ERR_PTR(ret);
2204		iommu_ignore_device(iommu, dev);
2205		goto out_err;
2206	}
2207
2208	amd_iommu_set_pci_msi_domain(dev, iommu);
2209	iommu_dev = &iommu->iommu;
2210
2211	/*
2212	 * If IOMMU and device supports PASID then it will contain max
2213	 * supported PASIDs, else it will be zero.
2214	 */
2215	dev_data = dev_iommu_priv_get(dev);
2216	if (amd_iommu_pasid_supported() && dev_is_pci(dev) &&
2217	    pdev_pasid_supported(dev_data)) {
2218		dev_data->max_pasids = min_t(u32, iommu->iommu.max_pasids,
2219					     pci_max_pasids(to_pci_dev(dev)));
2220	}
2221
2222out_err:
2223	iommu_completion_wait(iommu);
2224
2225	if (dev_is_pci(dev))
2226		pci_prepare_ats(to_pci_dev(dev), PAGE_SHIFT);
2227
2228	return iommu_dev;
2229}
2230
 
 
 
 
 
 
 
 
 
 
2231static void amd_iommu_release_device(struct device *dev)
2232{
2233	struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
 
2234
2235	WARN_ON(dev_data->domain);
 
2236
2237	/*
2238	 * We keep dev_data around for unplugged devices and reuse it when the
2239	 * device is re-plugged - not doing so would introduce a ton of races.
2240	 */
2241}
2242
2243static struct iommu_group *amd_iommu_device_group(struct device *dev)
2244{
2245	if (dev_is_pci(dev))
2246		return pci_device_group(dev);
2247
2248	return acpihid_device_group(dev);
2249}
2250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2251/*****************************************************************************
2252 *
2253 * The following functions belong to the exported interface of AMD IOMMU
2254 *
2255 * This interface allows access to lower level functions of the IOMMU
2256 * like protection domain handling and assignement of devices to domains
2257 * which is not possible with the dma_ops interface.
2258 *
2259 *****************************************************************************/
2260
2261void protection_domain_free(struct protection_domain *domain)
2262{
2263	WARN_ON(!list_empty(&domain->dev_list));
2264	if (domain->domain.type & __IOMMU_DOMAIN_PAGING)
2265		free_io_pgtable_ops(&domain->iop.pgtbl.ops);
2266	pdom_id_free(domain->id);
2267	kfree(domain);
2268}
2269
2270static void protection_domain_init(struct protection_domain *domain, int nid)
2271{
2272	spin_lock_init(&domain->lock);
2273	INIT_LIST_HEAD(&domain->dev_list);
2274	INIT_LIST_HEAD(&domain->dev_data_list);
2275	xa_init(&domain->iommu_array);
2276	domain->iop.pgtbl.cfg.amd.nid = nid;
 
 
 
2277}
2278
2279struct protection_domain *protection_domain_alloc(int nid)
2280{
2281	struct protection_domain *domain;
2282	int domid;
2283
2284	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
2285	if (!domain)
2286		return NULL;
2287
2288	domid = pdom_id_alloc();
2289	if (domid <= 0) {
2290		kfree(domain);
2291		return NULL;
2292	}
2293	domain->id = domid;
2294
2295	protection_domain_init(domain, nid);
 
 
2296
2297	return domain;
2298}
2299
2300static int pdom_setup_pgtable(struct protection_domain *domain)
2301{
2302	struct io_pgtable_ops *pgtbl_ops;
2303	enum io_pgtable_fmt fmt;
2304
2305	switch (domain->pd_mode) {
2306	case PD_MODE_V1:
2307		fmt = AMD_IOMMU_V1;
2308		break;
2309	case PD_MODE_V2:
2310		fmt = AMD_IOMMU_V2;
2311		break;
2312	}
2313
2314	pgtbl_ops = alloc_io_pgtable_ops(fmt, &domain->iop.pgtbl.cfg, domain);
2315	if (!pgtbl_ops)
 
2316		return -ENOMEM;
 
 
 
 
 
 
 
 
 
2317
2318	return 0;
2319}
2320
2321static inline u64 dma_max_address(enum protection_domain_mode pgtable)
2322{
2323	if (pgtable == PD_MODE_V1)
2324		return ~0ULL;
2325
2326	/* V2 with 4/5 level page table */
2327	return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1);
2328}
2329
2330static bool amd_iommu_hd_support(struct amd_iommu *iommu)
2331{
2332	return iommu && (iommu->features & FEATURE_HDSUP);
 
 
 
 
 
 
2333}
2334
2335static struct iommu_domain *
2336do_iommu_domain_alloc(struct device *dev, u32 flags,
2337		      enum protection_domain_mode pgtable)
2338{
2339	bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
2340	struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
2341	struct protection_domain *domain;
2342	int ret;
2343
2344	domain = protection_domain_alloc(dev_to_node(dev));
2345	if (!domain)
2346		return ERR_PTR(-ENOMEM);
2347
2348	domain->pd_mode = pgtable;
2349	ret = pdom_setup_pgtable(domain);
2350	if (ret) {
2351		pdom_id_free(domain->id);
2352		kfree(domain);
2353		return ERR_PTR(ret);
2354	}
2355
2356	domain->domain.geometry.aperture_start = 0;
2357	domain->domain.geometry.aperture_end   = dma_max_address(pgtable);
2358	domain->domain.geometry.force_aperture = true;
2359	domain->domain.pgsize_bitmap = domain->iop.pgtbl.cfg.pgsize_bitmap;
2360
2361	domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
2362	domain->domain.ops = iommu->iommu.ops->default_domain_ops;
2363
2364	if (dirty_tracking)
2365		domain->domain.dirty_ops = &amd_dirty_ops;
 
2366
2367	return &domain->domain;
2368}
2369
2370static struct iommu_domain *
2371amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
2372				    const struct iommu_user_data *user_data)
2373
2374{
2375	struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
2376	const u32 supported_flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
2377						IOMMU_HWPT_ALLOC_PASID;
2378
2379	if ((flags & ~supported_flags) || user_data)
2380		return ERR_PTR(-EOPNOTSUPP);
2381
2382	switch (flags & supported_flags) {
2383	case IOMMU_HWPT_ALLOC_DIRTY_TRACKING:
2384		/* Allocate domain with v1 page table for dirty tracking */
2385		if (!amd_iommu_hd_support(iommu))
2386			break;
2387		return do_iommu_domain_alloc(dev, flags, PD_MODE_V1);
2388	case IOMMU_HWPT_ALLOC_PASID:
2389		/* Allocate domain with v2 page table if IOMMU supports PASID. */
2390		if (!amd_iommu_pasid_supported())
2391			break;
2392		return do_iommu_domain_alloc(dev, flags, PD_MODE_V2);
2393	case 0:
2394		/* If nothing specific is required use the kernel commandline default */
2395		return do_iommu_domain_alloc(dev, 0, amd_iommu_pgtable);
2396	default:
2397		break;
2398	}
2399	return ERR_PTR(-EOPNOTSUPP);
2400}
2401
2402void amd_iommu_domain_free(struct iommu_domain *dom)
2403{
2404	struct protection_domain *domain = to_pdomain(dom);
2405
2406	protection_domain_free(domain);
2407}
2408
2409static int blocked_domain_attach_device(struct iommu_domain *domain,
2410					struct device *dev)
2411{
2412	struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
2413
2414	if (dev_data->domain)
2415		detach_device(dev);
2416
2417	/* Clear DTE and flush the entry */
2418	mutex_lock(&dev_data->mutex);
2419	dev_update_dte(dev_data, false);
2420	mutex_unlock(&dev_data->mutex);
2421
2422	return 0;
 
 
 
 
 
 
2423}
2424
2425static struct iommu_domain blocked_domain = {
2426	.type = IOMMU_DOMAIN_BLOCKED,
2427	.ops = &(const struct iommu_domain_ops) {
2428		.attach_dev     = blocked_domain_attach_device,
2429	}
2430};
2431
2432static struct protection_domain identity_domain;
 
2433
2434static const struct iommu_domain_ops identity_domain_ops = {
2435	.attach_dev = amd_iommu_attach_device,
2436};
2437
2438void amd_iommu_init_identity_domain(void)
2439{
2440	struct iommu_domain *domain = &identity_domain.domain;
2441
2442	domain->type = IOMMU_DOMAIN_IDENTITY;
2443	domain->ops = &identity_domain_ops;
2444	domain->owner = &amd_iommu_ops;
2445
2446	identity_domain.id = pdom_id_alloc();
 
 
 
 
2447
2448	protection_domain_init(&identity_domain, NUMA_NO_NODE);
2449}
2450
2451/* Same as blocked domain except it supports only ops->attach_dev() */
2452static struct iommu_domain release_domain = {
2453	.type = IOMMU_DOMAIN_BLOCKED,
2454	.ops = &(const struct iommu_domain_ops) {
2455		.attach_dev     = blocked_domain_attach_device,
2456	}
2457};
2458
2459static int amd_iommu_attach_device(struct iommu_domain *dom,
2460				   struct device *dev)
2461{
2462	struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
2463	struct protection_domain *domain = to_pdomain(dom);
2464	struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
 
2465	int ret;
2466
2467	/*
2468	 * Skip attach device to domain if new domain is same as
2469	 * devices current domain
2470	 */
2471	if (dev_data->domain == domain)
2472		return 0;
2473
 
2474	dev_data->defer_attach = false;
2475
2476	/*
2477	 * Restrict to devices with compatible IOMMU hardware support
2478	 * when enforcement of dirty tracking is enabled.
2479	 */
2480	if (dom->dirty_ops && !amd_iommu_hd_support(iommu))
2481		return -EINVAL;
2482
2483	if (dev_data->domain)
2484		detach_device(dev);
2485
2486	ret = attach_device(dev, domain);
2487
2488#ifdef CONFIG_IRQ_REMAP
2489	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) {
2490		if (dom->type == IOMMU_DOMAIN_UNMANAGED)
2491			dev_data->use_vapic = 1;
2492		else
2493			dev_data->use_vapic = 0;
2494	}
2495#endif
2496
2497	return ret;
2498}
2499
2500static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
2501				    unsigned long iova, size_t size)
2502{
2503	struct protection_domain *domain = to_pdomain(dom);
2504	struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
2505
2506	if (ops->map_pages)
2507		domain_flush_np_cache(domain, iova, size);
2508	return 0;
2509}
2510
2511static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova,
2512			       phys_addr_t paddr, size_t pgsize, size_t pgcount,
2513			       int iommu_prot, gfp_t gfp, size_t *mapped)
2514{
2515	struct protection_domain *domain = to_pdomain(dom);
2516	struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
2517	int prot = 0;
2518	int ret = -EINVAL;
2519
2520	if ((domain->pd_mode == PD_MODE_V1) &&
2521	    (domain->iop.mode == PAGE_MODE_NONE))
2522		return -EINVAL;
2523
2524	if (iommu_prot & IOMMU_READ)
2525		prot |= IOMMU_PROT_IR;
2526	if (iommu_prot & IOMMU_WRITE)
2527		prot |= IOMMU_PROT_IW;
2528
2529	if (ops->map_pages) {
2530		ret = ops->map_pages(ops, iova, paddr, pgsize,
2531				     pgcount, prot, gfp, mapped);
2532	}
2533
2534	return ret;
2535}
2536
2537static void amd_iommu_iotlb_gather_add_page(struct iommu_domain *domain,
2538					    struct iommu_iotlb_gather *gather,
2539					    unsigned long iova, size_t size)
2540{
2541	/*
2542	 * AMD's IOMMU can flush as many pages as necessary in a single flush.
2543	 * Unless we run in a virtual machine, which can be inferred according
2544	 * to whether "non-present cache" is on, it is probably best to prefer
2545	 * (potentially) too extensive TLB flushing (i.e., more misses) over
2546	 * mutliple TLB flushes (i.e., more flushes). For virtual machines the
2547	 * hypervisor needs to synchronize the host IOMMU PTEs with those of
2548	 * the guest, and the trade-off is different: unnecessary TLB flushes
2549	 * should be avoided.
2550	 */
2551	if (amd_iommu_np_cache &&
2552	    iommu_iotlb_gather_is_disjoint(gather, iova, size))
2553		iommu_iotlb_sync(domain, gather);
2554
2555	iommu_iotlb_gather_add_range(gather, iova, size);
2556}
2557
2558static size_t amd_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova,
2559				    size_t pgsize, size_t pgcount,
2560				    struct iommu_iotlb_gather *gather)
2561{
2562	struct protection_domain *domain = to_pdomain(dom);
2563	struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
2564	size_t r;
2565
2566	if ((domain->pd_mode == PD_MODE_V1) &&
2567	    (domain->iop.mode == PAGE_MODE_NONE))
2568		return 0;
2569
2570	r = (ops->unmap_pages) ? ops->unmap_pages(ops, iova, pgsize, pgcount, NULL) : 0;
2571
2572	if (r)
2573		amd_iommu_iotlb_gather_add_page(dom, gather, iova, r);
2574
2575	return r;
2576}
2577
2578static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
2579					  dma_addr_t iova)
2580{
2581	struct protection_domain *domain = to_pdomain(dom);
2582	struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
 
 
 
 
 
 
 
 
 
 
 
2583
2584	return ops->iova_to_phys(ops, iova);
 
 
 
2585}
2586
2587static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
2588{
2589	switch (cap) {
2590	case IOMMU_CAP_CACHE_COHERENCY:
2591		return true;
 
 
2592	case IOMMU_CAP_NOEXEC:
2593		return false;
2594	case IOMMU_CAP_PRE_BOOT_PROTECTION:
2595		return amdr_ivrs_remap_support;
2596	case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
2597		return true;
2598	case IOMMU_CAP_DEFERRED_FLUSH:
2599		return true;
2600	case IOMMU_CAP_DIRTY_TRACKING: {
2601		struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
2602
2603		return amd_iommu_hd_support(iommu);
2604	}
2605	default:
2606		break;
2607	}
2608
2609	return false;
2610}
2611
2612static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
2613					bool enable)
2614{
2615	struct protection_domain *pdomain = to_pdomain(domain);
2616	struct dev_table_entry *dev_table;
2617	struct iommu_dev_data *dev_data;
2618	bool domain_flush = false;
2619	struct amd_iommu *iommu;
2620	unsigned long flags;
2621	u64 pte_root;
2622
2623	spin_lock_irqsave(&pdomain->lock, flags);
2624	if (!(pdomain->dirty_tracking ^ enable)) {
2625		spin_unlock_irqrestore(&pdomain->lock, flags);
2626		return 0;
2627	}
2628
2629	list_for_each_entry(dev_data, &pdomain->dev_list, list) {
2630		iommu = get_amd_iommu_from_dev_data(dev_data);
2631
2632		dev_table = get_dev_table(iommu);
2633		pte_root = dev_table[dev_data->devid].data[0];
2634
2635		pte_root = (enable ? pte_root | DTE_FLAG_HAD :
2636				     pte_root & ~DTE_FLAG_HAD);
2637
2638		/* Flush device DTE */
2639		dev_table[dev_data->devid].data[0] = pte_root;
2640		device_flush_dte(dev_data);
2641		domain_flush = true;
2642	}
2643
2644	/* Flush IOTLB to mark IOPTE dirty on the next translation(s) */
2645	if (domain_flush)
2646		amd_iommu_domain_flush_all(pdomain);
2647
2648	pdomain->dirty_tracking = enable;
2649	spin_unlock_irqrestore(&pdomain->lock, flags);
2650
2651	return 0;
2652}
2653
2654static int amd_iommu_read_and_clear_dirty(struct iommu_domain *domain,
2655					  unsigned long iova, size_t size,
2656					  unsigned long flags,
2657					  struct iommu_dirty_bitmap *dirty)
2658{
2659	struct protection_domain *pdomain = to_pdomain(domain);
2660	struct io_pgtable_ops *ops = &pdomain->iop.pgtbl.ops;
2661	unsigned long lflags;
2662
2663	if (!ops || !ops->read_and_clear_dirty)
2664		return -EOPNOTSUPP;
2665
2666	spin_lock_irqsave(&pdomain->lock, lflags);
2667	if (!pdomain->dirty_tracking && dirty->bitmap) {
2668		spin_unlock_irqrestore(&pdomain->lock, lflags);
2669		return -EINVAL;
2670	}
2671	spin_unlock_irqrestore(&pdomain->lock, lflags);
2672
2673	return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
2674}
2675
2676static void amd_iommu_get_resv_regions(struct device *dev,
2677				       struct list_head *head)
2678{
2679	struct iommu_resv_region *region;
2680	struct unity_map_entry *entry;
2681	struct amd_iommu *iommu;
2682	struct amd_iommu_pci_seg *pci_seg;
2683	int devid, sbdf;
2684
2685	sbdf = get_device_sbdf_id(dev);
2686	if (sbdf < 0)
2687		return;
2688
2689	devid = PCI_SBDF_TO_DEVID(sbdf);
2690	iommu = get_amd_iommu_from_dev(dev);
2691	pci_seg = iommu->pci_seg;
2692
2693	list_for_each_entry(entry, &pci_seg->unity_map, list) {
2694		int type, prot = 0;
2695		size_t length;
2696
2697		if (devid < entry->devid_start || devid > entry->devid_end)
2698			continue;
2699
2700		type   = IOMMU_RESV_DIRECT;
2701		length = entry->address_end - entry->address_start;
2702		if (entry->prot & IOMMU_PROT_IR)
2703			prot |= IOMMU_READ;
2704		if (entry->prot & IOMMU_PROT_IW)
2705			prot |= IOMMU_WRITE;
2706		if (entry->prot & IOMMU_UNITY_MAP_FLAG_EXCL_RANGE)
2707			/* Exclusion range */
2708			type = IOMMU_RESV_RESERVED;
2709
2710		region = iommu_alloc_resv_region(entry->address_start,
2711						 length, prot, type,
2712						 GFP_KERNEL);
2713		if (!region) {
2714			dev_err(dev, "Out of memory allocating dm-regions\n");
2715			return;
2716		}
2717		list_add_tail(&region->list, head);
2718	}
2719
2720	region = iommu_alloc_resv_region(MSI_RANGE_START,
2721					 MSI_RANGE_END - MSI_RANGE_START + 1,
2722					 0, IOMMU_RESV_MSI, GFP_KERNEL);
2723	if (!region)
2724		return;
2725	list_add_tail(&region->list, head);
2726
2727	region = iommu_alloc_resv_region(HT_RANGE_START,
2728					 HT_RANGE_END - HT_RANGE_START + 1,
2729					 0, IOMMU_RESV_RESERVED, GFP_KERNEL);
2730	if (!region)
2731		return;
2732	list_add_tail(&region->list, head);
2733}
2734
2735static bool amd_iommu_is_attach_deferred(struct device *dev)
 
2736{
2737	struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
2738
2739	return dev_data->defer_attach;
2740}
 
2741
2742static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
2743{
2744	struct protection_domain *dom = to_pdomain(domain);
2745	unsigned long flags;
2746
2747	spin_lock_irqsave(&dom->lock, flags);
2748	amd_iommu_domain_flush_all(dom);
 
2749	spin_unlock_irqrestore(&dom->lock, flags);
2750}
2751
2752static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
2753				 struct iommu_iotlb_gather *gather)
2754{
2755	struct protection_domain *dom = to_pdomain(domain);
2756	unsigned long flags;
2757
2758	spin_lock_irqsave(&dom->lock, flags);
2759	amd_iommu_domain_flush_pages(dom, gather->start,
2760				     gather->end - gather->start + 1);
2761	spin_unlock_irqrestore(&dom->lock, flags);
2762}
2763
2764static int amd_iommu_def_domain_type(struct device *dev)
2765{
2766	struct iommu_dev_data *dev_data;
2767
2768	dev_data = dev_iommu_priv_get(dev);
2769	if (!dev_data)
2770		return 0;
2771
2772	/* Always use DMA domain for untrusted device */
2773	if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted)
2774		return IOMMU_DOMAIN_DMA;
2775
2776	/*
2777	 * Do not identity map IOMMUv2 capable devices when:
2778	 *  - memory encryption is active, because some of those devices
2779	 *    (AMD GPUs) don't have the encryption bit in their DMA-mask
2780	 *    and require remapping.
2781	 *  - SNP is enabled, because it prohibits DTE[Mode]=0.
2782	 */
2783	if (pdev_pasid_supported(dev_data) &&
2784	    !cc_platform_has(CC_ATTR_MEM_ENCRYPT) &&
2785	    !amd_iommu_snp_en) {
2786		return IOMMU_DOMAIN_IDENTITY;
2787	}
2788
2789	return 0;
2790}
2791
2792static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2793{
2794	/* IOMMU_PTE_FC is always set */
2795	return true;
2796}
 
2797
2798static const struct iommu_dirty_ops amd_dirty_ops = {
2799	.set_dirty_tracking = amd_iommu_set_dirty_tracking,
2800	.read_and_clear_dirty = amd_iommu_read_and_clear_dirty,
2801};
 
2802
2803static int amd_iommu_dev_enable_feature(struct device *dev,
2804					enum iommu_dev_features feat)
2805{
2806	int ret = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2807
2808	switch (feat) {
2809	case IOMMU_DEV_FEAT_IOPF:
2810	case IOMMU_DEV_FEAT_SVA:
2811		break;
2812	default:
2813		ret = -EINVAL;
2814		break;
2815	}
2816	return ret;
2817}
 
2818
2819static int amd_iommu_dev_disable_feature(struct device *dev,
2820					 enum iommu_dev_features feat)
2821{
2822	int ret = 0;
 
 
2823
2824	switch (feat) {
2825	case IOMMU_DEV_FEAT_IOPF:
2826	case IOMMU_DEV_FEAT_SVA:
2827		break;
2828	default:
2829		ret = -EINVAL;
2830		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2831	}
 
 
 
 
 
 
 
 
2832	return ret;
2833}
2834
2835const struct iommu_ops amd_iommu_ops = {
2836	.capable = amd_iommu_capable,
2837	.blocked_domain = &blocked_domain,
2838	.release_domain = &release_domain,
2839	.identity_domain = &identity_domain.domain,
2840	.domain_alloc_paging_flags = amd_iommu_domain_alloc_paging_flags,
2841	.domain_alloc_sva = amd_iommu_domain_alloc_sva,
2842	.probe_device = amd_iommu_probe_device,
2843	.release_device = amd_iommu_release_device,
2844	.device_group = amd_iommu_device_group,
2845	.get_resv_regions = amd_iommu_get_resv_regions,
2846	.is_attach_deferred = amd_iommu_is_attach_deferred,
2847	.def_domain_type = amd_iommu_def_domain_type,
2848	.dev_enable_feat = amd_iommu_dev_enable_feature,
2849	.dev_disable_feat = amd_iommu_dev_disable_feature,
2850	.remove_dev_pasid = amd_iommu_remove_dev_pasid,
2851	.page_response = amd_iommu_page_response,
2852	.default_domain_ops = &(const struct iommu_domain_ops) {
2853		.attach_dev	= amd_iommu_attach_device,
2854		.map_pages	= amd_iommu_map_pages,
2855		.unmap_pages	= amd_iommu_unmap_pages,
2856		.iotlb_sync_map	= amd_iommu_iotlb_sync_map,
2857		.iova_to_phys	= amd_iommu_iova_to_phys,
2858		.flush_iotlb_all = amd_iommu_flush_iotlb_all,
2859		.iotlb_sync	= amd_iommu_iotlb_sync,
2860		.free		= amd_iommu_domain_free,
2861		.enforce_cache_coherency = amd_iommu_enforce_cache_coherency,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2862	}
2863};
2864
2865#ifdef CONFIG_IRQ_REMAP
 
2866
2867/*****************************************************************************
2868 *
2869 * Interrupt Remapping Implementation
2870 *
2871 *****************************************************************************/
2872
2873static struct irq_chip amd_ir_chip;
2874static DEFINE_SPINLOCK(iommu_table_lock);
 
 
 
 
 
 
 
2875
2876static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
 
 
 
2877{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2878	int ret;
2879	u64 data;
 
 
 
 
 
 
 
 
 
 
 
2880	unsigned long flags;
2881	struct iommu_cmd cmd, cmd2;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2882
2883	if (iommu->irtcachedis_enabled)
2884		return;
2885
2886	build_inv_irt(&cmd, devid);
2887	data = atomic64_inc_return(&iommu->cmd_sem_val);
2888	build_completion_wait(&cmd2, iommu, data);
 
2889
2890	raw_spin_lock_irqsave(&iommu->lock, flags);
2891	ret = __iommu_queue_command_sync(iommu, &cmd, true);
2892	if (ret)
2893		goto out;
2894	ret = __iommu_queue_command_sync(iommu, &cmd2, false);
2895	if (ret)
2896		goto out;
2897	wait_on_sem(iommu, data);
2898out:
2899	raw_spin_unlock_irqrestore(&iommu->lock, flags);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2900}
 
 
 
2901
2902static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
2903			      struct irq_remap_table *table)
 
 
 
 
 
 
 
 
2904{
2905	u64 dte;
2906	struct dev_table_entry *dev_table = get_dev_table(iommu);
2907
2908	dte	= dev_table[devid].data[2];
2909	dte	&= ~DTE_IRQ_PHYS_ADDR_MASK;
2910	dte	|= iommu_virt_to_phys(table->table);
2911	dte	|= DTE_IRQ_REMAP_INTCTL;
2912	dte	|= DTE_INTTABLEN;
2913	dte	|= DTE_IRQ_REMAP_ENABLE;
2914
2915	dev_table[devid].data[2] = dte;
2916}
2917
2918static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid)
2919{
2920	struct irq_remap_table *table;
2921	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
2922
2923	if (WARN_ONCE(!pci_seg->rlookup_table[devid],
2924		      "%s: no iommu for devid %x:%x\n",
2925		      __func__, pci_seg->id, devid))
2926		return NULL;
2927
2928	table = pci_seg->irq_lookup_table[devid];
2929	if (WARN_ONCE(!table, "%s: no table for devid %x:%x\n",
2930		      __func__, pci_seg->id, devid))
2931		return NULL;
2932
2933	return table;
2934}
2935
2936static struct irq_remap_table *__alloc_irq_table(void)
2937{
2938	struct irq_remap_table *table;
2939
2940	table = kzalloc(sizeof(*table), GFP_KERNEL);
2941	if (!table)
2942		return NULL;
2943
2944	table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL);
2945	if (!table->table) {
2946		kfree(table);
2947		return NULL;
2948	}
2949	raw_spin_lock_init(&table->lock);
2950
2951	if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
2952		memset(table->table, 0,
2953		       MAX_IRQS_PER_TABLE * sizeof(u32));
2954	else
2955		memset(table->table, 0,
2956		       (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
2957	return table;
2958}
2959
2960static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid,
2961				  struct irq_remap_table *table)
2962{
2963	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
2964
2965	pci_seg->irq_lookup_table[devid] = table;
2966	set_dte_irq_entry(iommu, devid, table);
2967	iommu_flush_dte(iommu, devid);
2968}
2969
2970static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias,
2971				       void *data)
2972{
2973	struct irq_remap_table *table = data;
2974	struct amd_iommu_pci_seg *pci_seg;
2975	struct amd_iommu *iommu = rlookup_amd_iommu(&pdev->dev);
2976
2977	if (!iommu)
2978		return -EINVAL;
2979
2980	pci_seg = iommu->pci_seg;
2981	pci_seg->irq_lookup_table[alias] = table;
2982	set_dte_irq_entry(iommu, alias, table);
2983	iommu_flush_dte(pci_seg->rlookup_table[alias], alias);
2984
2985	return 0;
2986}
2987
2988static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
2989					       u16 devid, struct pci_dev *pdev)
2990{
2991	struct irq_remap_table *table = NULL;
2992	struct irq_remap_table *new_table = NULL;
2993	struct amd_iommu_pci_seg *pci_seg;
2994	unsigned long flags;
2995	u16 alias;
2996
2997	spin_lock_irqsave(&iommu_table_lock, flags);
2998
2999	pci_seg = iommu->pci_seg;
3000	table = pci_seg->irq_lookup_table[devid];
 
 
 
3001	if (table)
3002		goto out_unlock;
3003
3004	alias = pci_seg->alias_table[devid];
3005	table = pci_seg->irq_lookup_table[alias];
3006	if (table) {
3007		set_remap_table_entry(iommu, devid, table);
3008		goto out_wait;
3009	}
3010	spin_unlock_irqrestore(&iommu_table_lock, flags);
3011
3012	/* Nothing there yet, allocate new irq remapping table */
3013	new_table = __alloc_irq_table();
3014	if (!new_table)
3015		return NULL;
3016
3017	spin_lock_irqsave(&iommu_table_lock, flags);
3018
3019	table = pci_seg->irq_lookup_table[devid];
3020	if (table)
3021		goto out_unlock;
3022
3023	table = pci_seg->irq_lookup_table[alias];
3024	if (table) {
3025		set_remap_table_entry(iommu, devid, table);
3026		goto out_wait;
3027	}
3028
3029	table = new_table;
3030	new_table = NULL;
3031
3032	if (pdev)
3033		pci_for_each_dma_alias(pdev, set_remap_table_entry_alias,
3034				       table);
3035	else
3036		set_remap_table_entry(iommu, devid, table);
3037
3038	if (devid != alias)
3039		set_remap_table_entry(iommu, alias, table);
3040
3041out_wait:
3042	iommu_completion_wait(iommu);
3043
3044out_unlock:
3045	spin_unlock_irqrestore(&iommu_table_lock, flags);
3046
3047	if (new_table) {
3048		kmem_cache_free(amd_iommu_irq_cache, new_table->table);
3049		kfree(new_table);
3050	}
3051	return table;
3052}
3053
3054static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count,
3055			   bool align, struct pci_dev *pdev)
3056{
3057	struct irq_remap_table *table;
3058	int index, c, alignment = 1;
3059	unsigned long flags;
 
3060
3061	table = alloc_irq_table(iommu, devid, pdev);
 
 
 
3062	if (!table)
3063		return -ENODEV;
3064
3065	if (align)
3066		alignment = roundup_pow_of_two(count);
3067
3068	raw_spin_lock_irqsave(&table->lock, flags);
3069
3070	/* Scan table for free entries */
3071	for (index = ALIGN(table->min_index, alignment), c = 0;
3072	     index < MAX_IRQS_PER_TABLE;) {
3073		if (!iommu->irte_ops->is_allocated(table, index)) {
3074			c += 1;
3075		} else {
3076			c     = 0;
3077			index = ALIGN(index + 1, alignment);
3078			continue;
3079		}
3080
3081		if (c == count)	{
3082			for (; c != 0; --c)
3083				iommu->irte_ops->set_allocated(table, index - c + 1);
3084
3085			index -= count - 1;
3086			goto out;
3087		}
3088
3089		index++;
3090	}
3091
3092	index = -ENOSPC;
3093
3094out:
3095	raw_spin_unlock_irqrestore(&table->lock, flags);
3096
3097	return index;
3098}
3099
3100static int __modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
3101			    struct irte_ga *irte)
3102{
 
3103	struct irq_remap_table *table;
3104	struct irte_ga *entry;
3105	unsigned long flags;
3106	u128 old;
 
 
 
 
3107
3108	table = get_irq_table(iommu, devid);
3109	if (!table)
3110		return -ENOMEM;
3111
3112	raw_spin_lock_irqsave(&table->lock, flags);
3113
3114	entry = (struct irte_ga *)table->table;
3115	entry = &entry[index];
3116
 
 
 
3117	/*
3118	 * We use cmpxchg16 to atomically update the 128-bit IRTE,
3119	 * and it cannot be updated by the hardware or other processors
3120	 * behind us, so the return value of cmpxchg16 should be the
3121	 * same as the old value.
3122	 */
3123	old = entry->irte;
3124	WARN_ON(!try_cmpxchg128(&entry->irte, &old, irte->irte));
3125
3126	raw_spin_unlock_irqrestore(&table->lock, flags);
 
3127
3128	return 0;
3129}
3130
3131static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
3132			  struct irte_ga *irte)
3133{
3134	bool ret;
3135
3136	ret = __modify_irte_ga(iommu, devid, index, irte);
3137	if (ret)
3138		return ret;
3139
3140	iommu_flush_irt_and_complete(iommu, devid);
 
3141
3142	return 0;
3143}
3144
3145static int modify_irte(struct amd_iommu *iommu,
3146		       u16 devid, int index, union irte *irte)
3147{
3148	struct irq_remap_table *table;
 
3149	unsigned long flags;
3150
3151	table = get_irq_table(iommu, devid);
 
 
 
 
3152	if (!table)
3153		return -ENOMEM;
3154
3155	raw_spin_lock_irqsave(&table->lock, flags);
3156	table->table[index] = irte->val;
3157	raw_spin_unlock_irqrestore(&table->lock, flags);
3158
3159	iommu_flush_irt_and_complete(iommu, devid);
 
3160
3161	return 0;
3162}
3163
3164static void free_irte(struct amd_iommu *iommu, u16 devid, int index)
3165{
3166	struct irq_remap_table *table;
 
3167	unsigned long flags;
3168
3169	table = get_irq_table(iommu, devid);
 
 
 
 
3170	if (!table)
3171		return;
3172
3173	raw_spin_lock_irqsave(&table->lock, flags);
3174	iommu->irte_ops->clear_allocated(table, index);
3175	raw_spin_unlock_irqrestore(&table->lock, flags);
3176
3177	iommu_flush_irt_and_complete(iommu, devid);
 
3178}
3179
3180static void irte_prepare(void *entry,
3181			 u32 delivery_mode, bool dest_mode,
3182			 u8 vector, u32 dest_apicid, int devid)
3183{
3184	union irte *irte = (union irte *) entry;
3185
3186	irte->val                = 0;
3187	irte->fields.vector      = vector;
3188	irte->fields.int_type    = delivery_mode;
3189	irte->fields.destination = dest_apicid;
3190	irte->fields.dm          = dest_mode;
3191	irte->fields.valid       = 1;
3192}
3193
3194static void irte_ga_prepare(void *entry,
3195			    u32 delivery_mode, bool dest_mode,
3196			    u8 vector, u32 dest_apicid, int devid)
3197{
3198	struct irte_ga *irte = (struct irte_ga *) entry;
3199
3200	irte->lo.val                      = 0;
3201	irte->hi.val                      = 0;
3202	irte->lo.fields_remap.int_type    = delivery_mode;
3203	irte->lo.fields_remap.dm          = dest_mode;
3204	irte->hi.fields.vector            = vector;
3205	irte->lo.fields_remap.destination = APICID_TO_IRTE_DEST_LO(dest_apicid);
3206	irte->hi.fields.destination       = APICID_TO_IRTE_DEST_HI(dest_apicid);
3207	irte->lo.fields_remap.valid       = 1;
3208}
3209
3210static void irte_activate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index)
3211{
3212	union irte *irte = (union irte *) entry;
3213
3214	irte->fields.valid = 1;
3215	modify_irte(iommu, devid, index, irte);
3216}
3217
3218static void irte_ga_activate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index)
3219{
3220	struct irte_ga *irte = (struct irte_ga *) entry;
3221
3222	irte->lo.fields_remap.valid = 1;
3223	modify_irte_ga(iommu, devid, index, irte);
3224}
3225
3226static void irte_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index)
3227{
3228	union irte *irte = (union irte *) entry;
3229
3230	irte->fields.valid = 0;
3231	modify_irte(iommu, devid, index, irte);
3232}
3233
3234static void irte_ga_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index)
3235{
3236	struct irte_ga *irte = (struct irte_ga *) entry;
3237
3238	irte->lo.fields_remap.valid = 0;
3239	modify_irte_ga(iommu, devid, index, irte);
3240}
3241
3242static void irte_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index,
3243			      u8 vector, u32 dest_apicid)
3244{
3245	union irte *irte = (union irte *) entry;
3246
3247	irte->fields.vector = vector;
3248	irte->fields.destination = dest_apicid;
3249	modify_irte(iommu, devid, index, irte);
3250}
3251
3252static void irte_ga_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index,
3253				 u8 vector, u32 dest_apicid)
3254{
3255	struct irte_ga *irte = (struct irte_ga *) entry;
3256
3257	if (!irte->lo.fields_remap.guest_mode) {
3258		irte->hi.fields.vector = vector;
3259		irte->lo.fields_remap.destination =
3260					APICID_TO_IRTE_DEST_LO(dest_apicid);
3261		irte->hi.fields.destination =
3262					APICID_TO_IRTE_DEST_HI(dest_apicid);
3263		modify_irte_ga(iommu, devid, index, irte);
3264	}
3265}
3266
3267#define IRTE_ALLOCATED (~1U)
3268static void irte_set_allocated(struct irq_remap_table *table, int index)
3269{
3270	table->table[index] = IRTE_ALLOCATED;
3271}
3272
3273static void irte_ga_set_allocated(struct irq_remap_table *table, int index)
3274{
3275	struct irte_ga *ptr = (struct irte_ga *)table->table;
3276	struct irte_ga *irte = &ptr[index];
3277
3278	memset(&irte->lo.val, 0, sizeof(u64));
3279	memset(&irte->hi.val, 0, sizeof(u64));
3280	irte->hi.fields.vector = 0xff;
3281}
3282
3283static bool irte_is_allocated(struct irq_remap_table *table, int index)
3284{
3285	union irte *ptr = (union irte *)table->table;
3286	union irte *irte = &ptr[index];
3287
3288	return irte->val != 0;
3289}
3290
3291static bool irte_ga_is_allocated(struct irq_remap_table *table, int index)
3292{
3293	struct irte_ga *ptr = (struct irte_ga *)table->table;
3294	struct irte_ga *irte = &ptr[index];
3295
3296	return irte->hi.fields.vector != 0;
3297}
3298
3299static void irte_clear_allocated(struct irq_remap_table *table, int index)
3300{
3301	table->table[index] = 0;
3302}
3303
3304static void irte_ga_clear_allocated(struct irq_remap_table *table, int index)
3305{
3306	struct irte_ga *ptr = (struct irte_ga *)table->table;
3307	struct irte_ga *irte = &ptr[index];
3308
3309	memset(&irte->lo.val, 0, sizeof(u64));
3310	memset(&irte->hi.val, 0, sizeof(u64));
3311}
3312
3313static int get_devid(struct irq_alloc_info *info)
3314{
 
 
3315	switch (info->type) {
3316	case X86_IRQ_ALLOC_TYPE_IOAPIC:
3317		return get_ioapic_devid(info->devid);
 
3318	case X86_IRQ_ALLOC_TYPE_HPET:
3319		return get_hpet_devid(info->devid);
3320	case X86_IRQ_ALLOC_TYPE_PCI_MSI:
3321	case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
3322		return get_device_sbdf_id(msi_desc_to_dev(info->desc));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3323	default:
3324		WARN_ON_ONCE(1);
3325		return -1;
3326	}
 
 
3327}
3328
3329struct irq_remap_ops amd_iommu_irq_ops = {
3330	.prepare		= amd_iommu_prepare,
3331	.enable			= amd_iommu_enable,
3332	.disable		= amd_iommu_disable,
3333	.reenable		= amd_iommu_reenable,
3334	.enable_faulting	= amd_iommu_enable_faulting,
 
 
3335};
3336
3337static void fill_msi_msg(struct msi_msg *msg, u32 index)
3338{
3339	msg->data = index;
3340	msg->address_lo = 0;
3341	msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
3342	msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
3343}
3344
3345static void irq_remapping_prepare_irte(struct amd_ir_data *data,
3346				       struct irq_cfg *irq_cfg,
3347				       struct irq_alloc_info *info,
3348				       int devid, int index, int sub_handle)
3349{
3350	struct irq_2_irte *irte_info = &data->irq_2_irte;
3351	struct amd_iommu *iommu = data->iommu;
 
 
3352
3353	if (!iommu)
3354		return;
3355
3356	data->irq_2_irte.devid = devid;
3357	data->irq_2_irte.index = index + sub_handle;
3358	iommu->irte_ops->prepare(data->entry, APIC_DELIVERY_MODE_FIXED,
3359				 apic->dest_mode_logical, irq_cfg->vector,
3360				 irq_cfg->dest_apicid, devid);
3361
3362	switch (info->type) {
3363	case X86_IRQ_ALLOC_TYPE_IOAPIC:
 
 
 
 
 
 
 
 
 
 
 
 
 
3364	case X86_IRQ_ALLOC_TYPE_HPET:
3365	case X86_IRQ_ALLOC_TYPE_PCI_MSI:
3366	case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
3367		fill_msi_msg(&data->msi_entry, irte_info->index);
 
 
3368		break;
3369
3370	default:
3371		BUG_ON(1);
3372		break;
3373	}
3374}
3375
3376struct amd_irte_ops irte_32_ops = {
3377	.prepare = irte_prepare,
3378	.activate = irte_activate,
3379	.deactivate = irte_deactivate,
3380	.set_affinity = irte_set_affinity,
3381	.set_allocated = irte_set_allocated,
3382	.is_allocated = irte_is_allocated,
3383	.clear_allocated = irte_clear_allocated,
3384};
3385
3386struct amd_irte_ops irte_128_ops = {
3387	.prepare = irte_ga_prepare,
3388	.activate = irte_ga_activate,
3389	.deactivate = irte_ga_deactivate,
3390	.set_affinity = irte_ga_set_affinity,
3391	.set_allocated = irte_ga_set_allocated,
3392	.is_allocated = irte_ga_is_allocated,
3393	.clear_allocated = irte_ga_clear_allocated,
3394};
3395
3396static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
3397			       unsigned int nr_irqs, void *arg)
3398{
3399	struct irq_alloc_info *info = arg;
3400	struct irq_data *irq_data;
3401	struct amd_ir_data *data = NULL;
3402	struct amd_iommu *iommu;
3403	struct irq_cfg *cfg;
3404	int i, ret, devid, seg, sbdf;
3405	int index;
3406
3407	if (!info)
3408		return -EINVAL;
3409	if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI)
 
3410		return -EINVAL;
3411
3412	sbdf = get_devid(info);
3413	if (sbdf < 0)
3414		return -EINVAL;
 
 
 
3415
3416	seg = PCI_SBDF_TO_SEGID(sbdf);
3417	devid = PCI_SBDF_TO_DEVID(sbdf);
3418	iommu = __rlookup_amd_iommu(seg, devid);
3419	if (!iommu)
3420		return -EINVAL;
3421
3422	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
3423	if (ret < 0)
3424		return ret;
3425
3426	if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
3427		struct irq_remap_table *table;
 
3428
3429		table = alloc_irq_table(iommu, devid, NULL);
3430		if (table) {
3431			if (!table->min_index) {
3432				/*
3433				 * Keep the first 32 indexes free for IOAPIC
3434				 * interrupts.
3435				 */
3436				table->min_index = 32;
 
3437				for (i = 0; i < 32; ++i)
3438					iommu->irte_ops->set_allocated(table, i);
3439			}
3440			WARN_ON(table->min_index != 32);
3441			index = info->ioapic.pin;
3442		} else {
3443			index = -ENOMEM;
3444		}
3445	} else if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI ||
3446		   info->type == X86_IRQ_ALLOC_TYPE_PCI_MSIX) {
3447		bool align = (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI);
3448
3449		index = alloc_irq_index(iommu, devid, nr_irqs, align,
3450					msi_desc_to_pci_dev(info->desc));
3451	} else {
3452		index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL);
3453	}
3454
3455	if (index < 0) {
3456		pr_warn("Failed to allocate IRTE\n");
3457		ret = index;
3458		goto out_free_parent;
3459	}
3460
3461	for (i = 0; i < nr_irqs; i++) {
3462		irq_data = irq_domain_get_irq_data(domain, virq + i);
3463		cfg = irq_data ? irqd_cfg(irq_data) : NULL;
3464		if (!cfg) {
3465			ret = -EINVAL;
3466			goto out_free_data;
3467		}
3468
3469		ret = -ENOMEM;
3470		data = kzalloc(sizeof(*data), GFP_KERNEL);
3471		if (!data)
3472			goto out_free_data;
3473
3474		if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
3475			data->entry = kzalloc(sizeof(union irte), GFP_KERNEL);
3476		else
3477			data->entry = kzalloc(sizeof(struct irte_ga),
3478						     GFP_KERNEL);
3479		if (!data->entry) {
3480			kfree(data);
3481			goto out_free_data;
3482		}
3483
3484		data->iommu = iommu;
3485		irq_data->hwirq = (devid << 16) + i;
3486		irq_data->chip_data = data;
3487		irq_data->chip = &amd_ir_chip;
3488		irq_remapping_prepare_irte(data, cfg, info, devid, index, i);
3489		irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
3490	}
3491
3492	return 0;
3493
3494out_free_data:
3495	for (i--; i >= 0; i--) {
3496		irq_data = irq_domain_get_irq_data(domain, virq + i);
3497		if (irq_data)
3498			kfree(irq_data->chip_data);
3499	}
3500	for (i = 0; i < nr_irqs; i++)
3501		free_irte(iommu, devid, index + i);
3502out_free_parent:
3503	irq_domain_free_irqs_common(domain, virq, nr_irqs);
3504	return ret;
3505}
3506
3507static void irq_remapping_free(struct irq_domain *domain, unsigned int virq,
3508			       unsigned int nr_irqs)
3509{
3510	struct irq_2_irte *irte_info;
3511	struct irq_data *irq_data;
3512	struct amd_ir_data *data;
3513	int i;
3514
3515	for (i = 0; i < nr_irqs; i++) {
3516		irq_data = irq_domain_get_irq_data(domain, virq  + i);
3517		if (irq_data && irq_data->chip_data) {
3518			data = irq_data->chip_data;
3519			irte_info = &data->irq_2_irte;
3520			free_irte(data->iommu, irte_info->devid, irte_info->index);
3521			kfree(data->entry);
3522			kfree(data);
3523		}
3524	}
3525	irq_domain_free_irqs_common(domain, virq, nr_irqs);
3526}
3527
3528static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
3529			       struct amd_ir_data *ir_data,
3530			       struct irq_2_irte *irte_info,
3531			       struct irq_cfg *cfg);
3532
3533static int irq_remapping_activate(struct irq_domain *domain,
3534				  struct irq_data *irq_data, bool reserve)
3535{
3536	struct amd_ir_data *data = irq_data->chip_data;
3537	struct irq_2_irte *irte_info = &data->irq_2_irte;
3538	struct amd_iommu *iommu = data->iommu;
3539	struct irq_cfg *cfg = irqd_cfg(irq_data);
3540
3541	if (!iommu)
3542		return 0;
3543
3544	iommu->irte_ops->activate(iommu, data->entry, irte_info->devid,
3545				  irte_info->index);
3546	amd_ir_update_irte(irq_data, iommu, data, irte_info, cfg);
3547	return 0;
3548}
3549
3550static void irq_remapping_deactivate(struct irq_domain *domain,
3551				     struct irq_data *irq_data)
3552{
3553	struct amd_ir_data *data = irq_data->chip_data;
3554	struct irq_2_irte *irte_info = &data->irq_2_irte;
3555	struct amd_iommu *iommu = data->iommu;
3556
3557	if (iommu)
3558		iommu->irte_ops->deactivate(iommu, data->entry, irte_info->devid,
3559					    irte_info->index);
3560}
3561
3562static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec,
3563				enum irq_domain_bus_token bus_token)
3564{
3565	struct amd_iommu *iommu;
3566	int devid = -1;
3567
3568	if (!amd_iommu_irq_remap)
3569		return 0;
3570
3571	if (x86_fwspec_is_ioapic(fwspec))
3572		devid = get_ioapic_devid(fwspec->param[0]);
3573	else if (x86_fwspec_is_hpet(fwspec))
3574		devid = get_hpet_devid(fwspec->param[0]);
3575
3576	if (devid < 0)
3577		return 0;
3578	iommu = __rlookup_amd_iommu((devid >> 16), (devid & 0xffff));
3579
3580	return iommu && iommu->ir_domain == d;
3581}
3582
3583static const struct irq_domain_ops amd_ir_domain_ops = {
3584	.select = irq_remapping_select,
3585	.alloc = irq_remapping_alloc,
3586	.free = irq_remapping_free,
3587	.activate = irq_remapping_activate,
3588	.deactivate = irq_remapping_deactivate,
3589};
3590
3591int amd_iommu_activate_guest_mode(void *data)
3592{
3593	struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
3594	struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
3595	u64 valid;
3596
3597	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry)
 
3598		return 0;
3599
3600	valid = entry->lo.fields_vapic.valid;
3601
3602	entry->lo.val = 0;
3603	entry->hi.val = 0;
3604
3605	entry->lo.fields_vapic.valid       = valid;
3606	entry->lo.fields_vapic.guest_mode  = 1;
3607	entry->lo.fields_vapic.ga_log_intr = 1;
3608	entry->hi.fields.ga_root_ptr       = ir_data->ga_root_ptr;
3609	entry->hi.fields.vector            = ir_data->ga_vector;
3610	entry->lo.fields_vapic.ga_tag      = ir_data->ga_tag;
3611
3612	return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
3613			      ir_data->irq_2_irte.index, entry);
3614}
3615EXPORT_SYMBOL(amd_iommu_activate_guest_mode);
3616
3617int amd_iommu_deactivate_guest_mode(void *data)
3618{
3619	struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
3620	struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
3621	struct irq_cfg *cfg = ir_data->cfg;
3622	u64 valid;
3623
3624	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
3625	    !entry || !entry->lo.fields_vapic.guest_mode)
3626		return 0;
3627
3628	valid = entry->lo.fields_remap.valid;
3629
3630	entry->lo.val = 0;
3631	entry->hi.val = 0;
3632
3633	entry->lo.fields_remap.valid       = valid;
3634	entry->lo.fields_remap.dm          = apic->dest_mode_logical;
3635	entry->lo.fields_remap.int_type    = APIC_DELIVERY_MODE_FIXED;
3636	entry->hi.fields.vector            = cfg->vector;
3637	entry->lo.fields_remap.destination =
3638				APICID_TO_IRTE_DEST_LO(cfg->dest_apicid);
3639	entry->hi.fields.destination =
3640				APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
3641
3642	return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
3643			      ir_data->irq_2_irte.index, entry);
3644}
3645EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
3646
3647static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
3648{
3649	int ret;
 
3650	struct amd_iommu_pi_data *pi_data = vcpu_info;
3651	struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data;
3652	struct amd_ir_data *ir_data = data->chip_data;
3653	struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
3654	struct iommu_dev_data *dev_data;
3655
3656	if (ir_data->iommu == NULL)
3657		return -EINVAL;
3658
3659	dev_data = search_dev_data(ir_data->iommu, irte_info->devid);
3660
3661	/* Note:
3662	 * This device has never been set up for guest mode.
3663	 * we should not modify the IRTE
3664	 */
3665	if (!dev_data || !dev_data->use_vapic)
3666		return 0;
3667
3668	ir_data->cfg = irqd_cfg(data);
3669	pi_data->ir_data = ir_data;
3670
3671	/* Note:
3672	 * SVM tries to set up for VAPIC mode, but we are in
3673	 * legacy mode. So, we force legacy mode instead.
3674	 */
3675	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) {
3676		pr_debug("%s: Fall back to using intr legacy remap\n",
3677			 __func__);
3678		pi_data->is_guest_mode = false;
3679	}
3680
 
 
 
 
3681	pi_data->prev_ga_tag = ir_data->cached_ga_tag;
3682	if (pi_data->is_guest_mode) {
3683		ir_data->ga_root_ptr = (pi_data->base >> 12);
3684		ir_data->ga_vector = vcpu_pi_info->vector;
3685		ir_data->ga_tag = pi_data->ga_tag;
3686		ret = amd_iommu_activate_guest_mode(ir_data);
3687		if (!ret)
3688			ir_data->cached_ga_tag = pi_data->ga_tag;
3689	} else {
3690		ret = amd_iommu_deactivate_guest_mode(ir_data);
3691
3692		/*
3693		 * This communicates the ga_tag back to the caller
3694		 * so that it can do all the necessary clean up.
3695		 */
3696		if (!ret)
3697			ir_data->cached_ga_tag = 0;
3698	}
3699
3700	return ret;
3701}
3702
3703
3704static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
3705			       struct amd_ir_data *ir_data,
3706			       struct irq_2_irte *irte_info,
3707			       struct irq_cfg *cfg)
3708{
3709
3710	/*
3711	 * Atomically updates the IRTE with the new destination, vector
3712	 * and flushes the interrupt entry cache.
3713	 */
3714	iommu->irte_ops->set_affinity(iommu, ir_data->entry, irte_info->devid,
3715				      irte_info->index, cfg->vector,
3716				      cfg->dest_apicid);
3717}
3718
3719static int amd_ir_set_affinity(struct irq_data *data,
3720			       const struct cpumask *mask, bool force)
3721{
3722	struct amd_ir_data *ir_data = data->chip_data;
3723	struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
3724	struct irq_cfg *cfg = irqd_cfg(data);
3725	struct irq_data *parent = data->parent_data;
3726	struct amd_iommu *iommu = ir_data->iommu;
3727	int ret;
3728
3729	if (!iommu)
3730		return -ENODEV;
3731
3732	ret = parent->chip->irq_set_affinity(parent, mask, force);
3733	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
3734		return ret;
3735
3736	amd_ir_update_irte(data, iommu, ir_data, irte_info, cfg);
3737	/*
3738	 * After this point, all the interrupts will start arriving
3739	 * at the new destination. So, time to cleanup the previous
3740	 * vector allocation.
3741	 */
3742	vector_schedule_cleanup(cfg);
3743
3744	return IRQ_SET_MASK_OK_DONE;
3745}
3746
3747static void ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
3748{
3749	struct amd_ir_data *ir_data = irq_data->chip_data;
3750
3751	*msg = ir_data->msi_entry;
3752}
3753
3754static struct irq_chip amd_ir_chip = {
3755	.name			= "AMD-IR",
3756	.irq_ack		= apic_ack_irq,
3757	.irq_set_affinity	= amd_ir_set_affinity,
3758	.irq_set_vcpu_affinity	= amd_ir_set_vcpu_affinity,
3759	.irq_compose_msi_msg	= ir_compose_msi_msg,
3760};
3761
3762static const struct msi_parent_ops amdvi_msi_parent_ops = {
3763	.supported_flags	= X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI,
3764	.prefix			= "IR-",
3765	.init_dev_msi_info	= msi_parent_init_dev_msi_info,
3766};
3767
3768int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
3769{
3770	struct fwnode_handle *fn;
3771
3772	fn = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index);
3773	if (!fn)
3774		return -ENOMEM;
3775	iommu->ir_domain = irq_domain_create_hierarchy(arch_get_ir_parent_domain(), 0, 0,
3776						       fn, &amd_ir_domain_ops, iommu);
3777	if (!iommu->ir_domain) {
3778		irq_domain_free_fwnode(fn);
3779		return -ENOMEM;
3780	}
3781
3782	irq_domain_update_bus_token(iommu->ir_domain,  DOMAIN_BUS_AMDVI);
3783	iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT |
3784				   IRQ_DOMAIN_FLAG_ISOLATED_MSI;
3785	iommu->ir_domain->msi_parent_ops = &amdvi_msi_parent_ops;
3786
3787	return 0;
3788}
3789
3790int amd_iommu_update_ga(int cpu, bool is_run, void *data)
3791{
 
 
 
3792	struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
 
3793	struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
 
3794
3795	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
3796	    !entry || !entry->lo.fields_vapic.guest_mode)
3797		return 0;
3798
3799	if (!ir_data->iommu)
 
3800		return -ENODEV;
3801
3802	if (cpu >= 0) {
3803		entry->lo.fields_vapic.destination =
3804					APICID_TO_IRTE_DEST_LO(cpu);
3805		entry->hi.fields.destination =
3806					APICID_TO_IRTE_DEST_HI(cpu);
 
 
 
 
 
 
 
 
 
 
3807	}
3808	entry->lo.fields_vapic.is_run = is_run;
3809
3810	return __modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
3811				ir_data->irq_2_irte.index, entry);
 
 
 
3812}
3813EXPORT_SYMBOL(amd_iommu_update_ga);
3814#endif
v5.9
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
   4 * Author: Joerg Roedel <jroedel@suse.de>
   5 *         Leo Duran <leo.duran@amd.com>
   6 */
   7
   8#define pr_fmt(fmt)     "AMD-Vi: " fmt
   9#define dev_fmt(fmt)    pr_fmt(fmt)
  10
  11#include <linux/ratelimit.h>
  12#include <linux/pci.h>
  13#include <linux/acpi.h>
  14#include <linux/amba/bus.h>
  15#include <linux/platform_device.h>
  16#include <linux/pci-ats.h>
  17#include <linux/bitmap.h>
  18#include <linux/slab.h>
  19#include <linux/debugfs.h>
  20#include <linux/scatterlist.h>
  21#include <linux/dma-mapping.h>
  22#include <linux/dma-direct.h>
  23#include <linux/dma-iommu.h>
  24#include <linux/iommu-helper.h>
  25#include <linux/delay.h>
  26#include <linux/amd-iommu.h>
  27#include <linux/notifier.h>
  28#include <linux/export.h>
  29#include <linux/irq.h>
  30#include <linux/msi.h>
  31#include <linux/dma-contiguous.h>
  32#include <linux/irqdomain.h>
  33#include <linux/percpu.h>
  34#include <linux/iova.h>
 
  35#include <asm/irq_remapping.h>
  36#include <asm/io_apic.h>
  37#include <asm/apic.h>
  38#include <asm/hw_irq.h>
  39#include <asm/msidef.h>
  40#include <asm/proto.h>
  41#include <asm/iommu.h>
  42#include <asm/gart.h>
  43#include <asm/dma.h>
 
  44
  45#include "amd_iommu.h"
 
  46#include "../irq_remapping.h"
 
  47
  48#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
  49
  50#define LOOP_TIMEOUT	100000
  51
  52/* IO virtual address start page frame number */
  53#define IOVA_START_PFN		(1)
  54#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
  55
  56/* Reserved IOVA ranges */
  57#define MSI_RANGE_START		(0xfee00000)
  58#define MSI_RANGE_END		(0xfeefffff)
  59#define HT_RANGE_START		(0xfd00000000ULL)
  60#define HT_RANGE_END		(0xffffffffffULL)
  61
  62/*
  63 * This bitmap is used to advertise the page sizes our hardware support
  64 * to the IOMMU core, which will then use this information to split
  65 * physically contiguous memory regions it is mapping into page sizes
  66 * that we support.
  67 *
  68 * 512GB Pages are not supported due to a hardware bug
  69 */
  70#define AMD_IOMMU_PGSIZES	((~0xFFFUL) & ~(2ULL << 38))
  71
  72#define DEFAULT_PGTABLE_LEVEL	PAGE_MODE_3_LEVEL
  73
  74static DEFINE_SPINLOCK(pd_bitmap_lock);
  75
  76/* List of all available dev_data structures */
  77static LLIST_HEAD(dev_data_list);
  78
  79LIST_HEAD(ioapic_map);
  80LIST_HEAD(hpet_map);
  81LIST_HEAD(acpihid_map);
  82
  83/*
  84 * Domain for untranslated devices - only allocated
  85 * if iommu=pt passed on kernel cmd line.
  86 */
  87const struct iommu_ops amd_iommu_ops;
 
  88
  89static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
  90int amd_iommu_max_glx_val = -1;
  91
  92/*
  93 * general struct to manage commands send to an IOMMU
  94 */
  95struct iommu_cmd {
  96	u32 data[4];
  97};
  98
 
 
 
 
 
 
  99struct kmem_cache *amd_iommu_irq_cache;
 100
 101static void update_domain(struct protection_domain *domain);
 102static void detach_device(struct device *dev);
 103static void update_and_flush_device_table(struct protection_domain *domain,
 104					  struct domain_pgtable *pgtable);
 
 105
 106/****************************************************************************
 107 *
 108 * Helper functions
 109 *
 110 ****************************************************************************/
 111
 112static inline u16 get_pci_device_id(struct device *dev)
 113{
 114	struct pci_dev *pdev = to_pci_dev(dev);
 
 
 
 
 
 
 115
 116	return pci_dev_id(pdev);
 
 
 
 
 
 
 
 117}
 118
 119static inline int get_acpihid_device_id(struct device *dev,
 120					struct acpihid_map_entry **entry)
 121{
 122	struct acpi_device *adev = ACPI_COMPANION(dev);
 123	struct acpihid_map_entry *p;
 124
 125	if (!adev)
 126		return -ENODEV;
 127
 128	list_for_each_entry(p, &acpihid_map, list) {
 129		if (acpi_dev_hid_uid_match(adev, p->hid,
 130					   p->uid[0] ? p->uid : NULL)) {
 131			if (entry)
 132				*entry = p;
 133			return p->devid;
 134		}
 135	}
 136	return -EINVAL;
 137}
 138
 139static inline int get_device_id(struct device *dev)
 140{
 141	int devid;
 142
 143	if (dev_is_pci(dev))
 144		devid = get_pci_device_id(dev);
 145	else
 146		devid = get_acpihid_device_id(dev, NULL);
 147
 148	return devid;
 149}
 150
 151static struct protection_domain *to_pdomain(struct iommu_domain *dom)
 152{
 153	return container_of(dom, struct protection_domain, domain);
 
 
 
 
 
 
 
 154}
 155
 156static void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
 157					 struct domain_pgtable *pgtable)
 158{
 159	u64 pt_root = atomic64_read(&domain->pt_root);
 
 
 
 160
 161	pgtable->root = (u64 *)(pt_root & PAGE_MASK);
 162	pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
 
 
 
 
 
 
 163}
 164
 165static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
 
 166{
 167	atomic64_set(&domain->pt_root, root);
 
 
 168}
 169
 170static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
 171{
 172	amd_iommu_domain_set_pt_root(domain, 0);
 
 
 
 
 
 
 173}
 174
 175static void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 176					 u64 *root, int mode)
 177{
 178	u64 pt_root;
 
 179
 180	/* lowest 3 bits encode pgtable mode */
 181	pt_root = mode & 7;
 182	pt_root |= (u64)root;
 183
 184	amd_iommu_domain_set_pt_root(domain, pt_root);
 185}
 186
 187static struct iommu_dev_data *alloc_dev_data(u16 devid)
 188{
 189	struct iommu_dev_data *dev_data;
 
 190
 191	dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
 192	if (!dev_data)
 193		return NULL;
 194
 195	spin_lock_init(&dev_data->lock);
 196	dev_data->devid = devid;
 197	ratelimit_default_init(&dev_data->rs);
 198
 199	llist_add(&dev_data->dev_data_list, &dev_data_list);
 200	return dev_data;
 201}
 202
 203static struct iommu_dev_data *search_dev_data(u16 devid)
 204{
 205	struct iommu_dev_data *dev_data;
 206	struct llist_node *node;
 
 207
 208	if (llist_empty(&dev_data_list))
 209		return NULL;
 210
 211	node = dev_data_list.first;
 212	llist_for_each_entry(dev_data, node, dev_data_list) {
 213		if (dev_data->devid == devid)
 214			return dev_data;
 215	}
 216
 217	return NULL;
 218}
 219
 220static int clone_alias(struct pci_dev *pdev, u16 alias, void *data)
 221{
 
 
 222	u16 devid = pci_dev_id(pdev);
 223
 224	if (devid == alias)
 225		return 0;
 226
 227	amd_iommu_rlookup_table[alias] =
 228		amd_iommu_rlookup_table[devid];
 229	memcpy(amd_iommu_dev_table[alias].data,
 230	       amd_iommu_dev_table[devid].data,
 231	       sizeof(amd_iommu_dev_table[alias].data));
 
 
 
 
 232
 233	return 0;
 234}
 235
 236static void clone_aliases(struct pci_dev *pdev)
 237{
 238	if (!pdev)
 
 
 239		return;
 
 240
 241	/*
 242	 * The IVRS alias stored in the alias table may not be
 243	 * part of the PCI DMA aliases if it's bus differs
 244	 * from the original device.
 245	 */
 246	clone_alias(pdev, amd_iommu_alias_table[pci_dev_id(pdev)], NULL);
 247
 248	pci_for_each_dma_alias(pdev, clone_alias, NULL);
 249}
 250
 251static struct pci_dev *setup_aliases(struct device *dev)
 252{
 253	struct pci_dev *pdev = to_pci_dev(dev);
 
 254	u16 ivrs_alias;
 255
 256	/* For ACPI HID devices, there are no aliases */
 257	if (!dev_is_pci(dev))
 258		return NULL;
 259
 260	/*
 261	 * Add the IVRS alias to the pci aliases if it is on the same
 262	 * bus. The IVRS table may know about a quirk that we don't.
 263	 */
 264	ivrs_alias = amd_iommu_alias_table[pci_dev_id(pdev)];
 265	if (ivrs_alias != pci_dev_id(pdev) &&
 266	    PCI_BUS_NUM(ivrs_alias) == pdev->bus->number)
 267		pci_add_dma_alias(pdev, ivrs_alias & 0xff, 1);
 268
 269	clone_aliases(pdev);
 270
 271	return pdev;
 272}
 273
 274static struct iommu_dev_data *find_dev_data(u16 devid)
 275{
 276	struct iommu_dev_data *dev_data;
 277	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
 278
 279	dev_data = search_dev_data(devid);
 280
 281	if (dev_data == NULL) {
 282		dev_data = alloc_dev_data(devid);
 283		if (!dev_data)
 284			return NULL;
 285
 286		if (translation_pre_enabled(iommu))
 287			dev_data->defer_attach = true;
 288	}
 289
 290	return dev_data;
 291}
 292
 293/*
 294* Find or create an IOMMU group for a acpihid device.
 295*/
 296static struct iommu_group *acpihid_device_group(struct device *dev)
 297{
 298	struct acpihid_map_entry *p, *entry = NULL;
 299	int devid;
 300
 301	devid = get_acpihid_device_id(dev, &entry);
 302	if (devid < 0)
 303		return ERR_PTR(devid);
 304
 305	list_for_each_entry(p, &acpihid_map, list) {
 306		if ((devid == p->devid) && p->group)
 307			entry->group = p->group;
 308	}
 309
 310	if (!entry->group)
 311		entry->group = generic_device_group(dev);
 312	else
 313		iommu_group_ref_get(entry->group);
 314
 315	return entry->group;
 316}
 317
 318static bool pci_iommuv2_capable(struct pci_dev *pdev)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 319{
 320	static const int caps[] = {
 321		PCI_EXT_CAP_ID_PRI,
 322		PCI_EXT_CAP_ID_PASID,
 323	};
 324	int i, pos;
 325
 326	if (!pci_ats_supported(pdev))
 327		return false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 328
 329	for (i = 0; i < 2; ++i) {
 330		pos = pci_find_ext_capability(pdev, caps[i]);
 331		if (pos == 0)
 332			return false;
 
 333	}
 334
 335	return true;
 336}
 337
 338static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
 339{
 340	struct iommu_dev_data *dev_data;
 
 
 
 
 
 
 341
 342	dev_data = dev_iommu_priv_get(&pdev->dev);
 
 
 
 
 
 343
 344	return dev_data->errata & (1 << erratum) ? true : false;
 
 
 
 
 345}
 346
 347/*
 348 * This function checks if the driver got a valid device from the caller to
 349 * avoid dereferencing invalid pointers.
 350 */
 351static bool check_device(struct device *dev)
 352{
 353	int devid;
 
 
 354
 355	if (!dev)
 356		return false;
 357
 358	devid = get_device_id(dev);
 359	if (devid < 0)
 360		return false;
 
 361
 362	/* Out of our scope? */
 363	if (devid > amd_iommu_last_bdf)
 364		return false;
 365
 366	if (amd_iommu_rlookup_table[devid] == NULL)
 
 
 367		return false;
 368
 369	return true;
 370}
 371
 372static int iommu_init_device(struct device *dev)
 373{
 374	struct iommu_dev_data *dev_data;
 375	int devid;
 376
 377	if (dev_iommu_priv_get(dev))
 378		return 0;
 379
 380	devid = get_device_id(dev);
 381	if (devid < 0)
 382		return devid;
 383
 384	dev_data = find_dev_data(devid);
 
 385	if (!dev_data)
 386		return -ENOMEM;
 387
 388	dev_data->pdev = setup_aliases(dev);
 
 389
 390	/*
 391	 * By default we use passthrough mode for IOMMUv2 capable device.
 392	 * But if amd_iommu=force_isolation is set (e.g. to debug DMA to
 393	 * invalid address), we ignore the capability for the device so
 394	 * it'll be forced to go into translation mode.
 395	 */
 396	if ((iommu_default_passthrough() || !amd_iommu_force_isolation) &&
 397	    dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
 398		struct amd_iommu *iommu;
 399
 400		iommu = amd_iommu_rlookup_table[dev_data->devid];
 401		dev_data->iommu_v2 = iommu->is_iommu_v2;
 402	}
 403
 404	dev_iommu_priv_set(dev, dev_data);
 405
 406	return 0;
 407}
 408
 409static void iommu_ignore_device(struct device *dev)
 410{
 411	int devid;
 
 
 412
 413	devid = get_device_id(dev);
 414	if (devid < 0)
 415		return;
 416
 417	amd_iommu_rlookup_table[devid] = NULL;
 418	memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
 
 419
 420	setup_aliases(dev);
 421}
 422
 423static void amd_iommu_uninit_device(struct device *dev)
 424{
 425	struct iommu_dev_data *dev_data;
 426
 427	dev_data = dev_iommu_priv_get(dev);
 428	if (!dev_data)
 429		return;
 430
 431	if (dev_data->domain)
 432		detach_device(dev);
 433
 434	dev_iommu_priv_set(dev, NULL);
 435
 436	/*
 437	 * We keep dev_data around for unplugged devices and reuse it when the
 438	 * device is re-plugged - not doing so would introduce a ton of races.
 439	 */
 440}
 441
 442/*
 443 * Helper function to get the first pte of a large mapping
 444 */
 445static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
 446			 unsigned long *count)
 447{
 448	unsigned long pte_mask, pg_size, cnt;
 449	u64 *fpte;
 450
 451	pg_size  = PTE_PAGE_SIZE(*pte);
 452	cnt      = PAGE_SIZE_PTE_COUNT(pg_size);
 453	pte_mask = ~((cnt << 3) - 1);
 454	fpte     = (u64 *)(((unsigned long)pte) & pte_mask);
 455
 456	if (page_size)
 457		*page_size = pg_size;
 458
 459	if (count)
 460		*count = cnt;
 461
 462	return fpte;
 463}
 464
 465/****************************************************************************
 466 *
 467 * Interrupt handling functions
 468 *
 469 ****************************************************************************/
 470
 471static void dump_dte_entry(u16 devid)
 472{
 473	int i;
 
 474
 475	for (i = 0; i < 4; ++i)
 476		pr_err("DTE[%d]: %016llx\n", i,
 477			amd_iommu_dev_table[devid].data[i]);
 478}
 479
 480static void dump_command(unsigned long phys_addr)
 481{
 482	struct iommu_cmd *cmd = iommu_phys_to_virt(phys_addr);
 483	int i;
 484
 485	for (i = 0; i < 4; ++i)
 486		pr_err("CMD[%d]: %08x\n", i, cmd->data[i]);
 487}
 488
 489static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 490					u64 address, int flags)
 491{
 492	struct iommu_dev_data *dev_data = NULL;
 493	struct pci_dev *pdev;
 494
 495	pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid),
 496					   devid & 0xff);
 497	if (pdev)
 498		dev_data = dev_iommu_priv_get(&pdev->dev);
 499
 500	if (dev_data && __ratelimit(&dev_data->rs)) {
 501		pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
 502			domain_id, address, flags);
 503	} else if (printk_ratelimit()) {
 504		pr_err("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
 505			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 506			domain_id, address, flags);
 507	}
 508
 
 509	if (pdev)
 510		pci_dev_put(pdev);
 511}
 512
 513static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 514{
 515	struct device *dev = iommu->iommu.dev;
 516	int type, devid, pasid, flags, tag;
 517	volatile u32 *event = __evt;
 518	int count = 0;
 519	u64 address;
 
 520
 521retry:
 522	type    = (event[1] >> EVENT_TYPE_SHIFT)  & EVENT_TYPE_MASK;
 523	devid   = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
 524	pasid   = (event[0] & EVENT_DOMID_MASK_HI) |
 525		  (event[1] & EVENT_DOMID_MASK_LO);
 526	flags   = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
 527	address = (u64)(((u64)event[3]) << 32) | event[2];
 528
 529	if (type == 0) {
 530		/* Did we hit the erratum? */
 531		if (++count == LOOP_TIMEOUT) {
 532			pr_err("No event written to event log\n");
 533			return;
 534		}
 535		udelay(1);
 536		goto retry;
 537	}
 538
 539	if (type == EVENT_TYPE_IO_FAULT) {
 540		amd_iommu_report_page_fault(devid, pasid, address, flags);
 541		return;
 542	}
 543
 544	switch (type) {
 545	case EVENT_TYPE_ILL_DEV:
 546		dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
 547			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 548			pasid, address, flags);
 549		dump_dte_entry(devid);
 550		break;
 551	case EVENT_TYPE_DEV_TAB_ERR:
 552		dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
 553			"address=0x%llx flags=0x%04x]\n",
 554			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 555			address, flags);
 556		break;
 557	case EVENT_TYPE_PAGE_TAB_ERR:
 558		dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n",
 559			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 560			pasid, address, flags);
 561		break;
 562	case EVENT_TYPE_ILL_CMD:
 563		dev_err(dev, "Event logged [ILLEGAL_COMMAND_ERROR address=0x%llx]\n", address);
 564		dump_command(address);
 565		break;
 566	case EVENT_TYPE_CMD_HARD_ERR:
 567		dev_err(dev, "Event logged [COMMAND_HARDWARE_ERROR address=0x%llx flags=0x%04x]\n",
 568			address, flags);
 569		break;
 570	case EVENT_TYPE_IOTLB_INV_TO:
 571		dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%llx]\n",
 572			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 573			address);
 574		break;
 575	case EVENT_TYPE_INV_DEV_REQ:
 576		dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
 577			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 578			pasid, address, flags);
 579		break;
 
 
 
 
 
 
 580	case EVENT_TYPE_INV_PPR_REQ:
 581		pasid = PPR_PASID(*((u64 *)__evt));
 582		tag = event[1] & 0x03FF;
 583		dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n",
 584			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 585			pasid, address, flags, tag);
 586		break;
 587	default:
 588		dev_err(dev, "Event logged [UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n",
 589			event[0], event[1], event[2], event[3]);
 590	}
 591
 592	memset(__evt, 0, 4 * sizeof(u32));
 
 
 
 
 
 
 
 593}
 594
 595static void iommu_poll_events(struct amd_iommu *iommu)
 596{
 597	u32 head, tail;
 598
 599	head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 600	tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
 601
 602	while (head != tail) {
 603		iommu_print_event(iommu, iommu->evt_buf + head);
 
 
 604		head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE;
 
 605	}
 606
 607	writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 608}
 609
 610static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
 611{
 612	struct amd_iommu_fault fault;
 613
 614	if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
 615		pr_err_ratelimited("Unknown PPR request received\n");
 616		return;
 617	}
 618
 619	fault.address   = raw[1];
 620	fault.pasid     = PPR_PASID(raw[0]);
 621	fault.device_id = PPR_DEVID(raw[0]);
 622	fault.tag       = PPR_TAG(raw[0]);
 623	fault.flags     = PPR_FLAGS(raw[0]);
 624
 625	atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
 626}
 627
 628static void iommu_poll_ppr_log(struct amd_iommu *iommu)
 629{
 630	u32 head, tail;
 631
 632	if (iommu->ppr_log == NULL)
 633		return;
 634
 635	head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 636	tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
 637
 638	while (head != tail) {
 639		volatile u64 *raw;
 640		u64 entry[2];
 641		int i;
 642
 643		raw = (u64 *)(iommu->ppr_log + head);
 644
 645		/*
 646		 * Hardware bug: Interrupt may arrive before the entry is
 647		 * written to memory. If this happens we need to wait for the
 648		 * entry to arrive.
 649		 */
 650		for (i = 0; i < LOOP_TIMEOUT; ++i) {
 651			if (PPR_REQ_TYPE(raw[0]) != 0)
 652				break;
 653			udelay(1);
 654		}
 655
 656		/* Avoid memcpy function-call overhead */
 657		entry[0] = raw[0];
 658		entry[1] = raw[1];
 659
 660		/*
 661		 * To detect the hardware bug we need to clear the entry
 662		 * back to zero.
 663		 */
 664		raw[0] = raw[1] = 0UL;
 665
 666		/* Update head pointer of hardware ring-buffer */
 667		head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
 668		writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 669
 670		/* Handle PPR entry */
 671		iommu_handle_ppr_entry(iommu, entry);
 672
 673		/* Refresh ring-buffer information */
 674		head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 675		tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
 676	}
 677}
 678
 679#ifdef CONFIG_IRQ_REMAP
 680static int (*iommu_ga_log_notifier)(u32);
 681
 682int amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
 683{
 684	iommu_ga_log_notifier = notifier;
 685
 686	return 0;
 687}
 688EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier);
 689
 690static void iommu_poll_ga_log(struct amd_iommu *iommu)
 691{
 692	u32 head, tail, cnt = 0;
 693
 694	if (iommu->ga_log == NULL)
 695		return;
 696
 697	head = readl(iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
 698	tail = readl(iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
 699
 700	while (head != tail) {
 701		volatile u64 *raw;
 702		u64 log_entry;
 703
 704		raw = (u64 *)(iommu->ga_log + head);
 705		cnt++;
 706
 707		/* Avoid memcpy function-call overhead */
 708		log_entry = *raw;
 709
 710		/* Update head pointer of hardware ring-buffer */
 711		head = (head + GA_ENTRY_SIZE) % GA_LOG_SIZE;
 712		writel(head, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
 713
 714		/* Handle GA entry */
 715		switch (GA_REQ_TYPE(log_entry)) {
 716		case GA_GUEST_NR:
 717			if (!iommu_ga_log_notifier)
 718				break;
 719
 720			pr_debug("%s: devid=%#x, ga_tag=%#x\n",
 721				 __func__, GA_DEVID(log_entry),
 722				 GA_TAG(log_entry));
 723
 724			if (iommu_ga_log_notifier(GA_TAG(log_entry)) != 0)
 725				pr_err("GA log notifier failed.\n");
 726			break;
 727		default:
 728			break;
 729		}
 730	}
 731}
 732#endif /* CONFIG_IRQ_REMAP */
 733
 734#define AMD_IOMMU_INT_MASK	\
 735	(MMIO_STATUS_EVT_INT_MASK | \
 736	 MMIO_STATUS_PPR_INT_MASK | \
 737	 MMIO_STATUS_GALOG_INT_MASK)
 
 
 
 
 
 
 
 
 
 
 738
 739irqreturn_t amd_iommu_int_thread(int irq, void *data)
 
 
 
 740{
 741	struct amd_iommu *iommu = (struct amd_iommu *) data;
 742	u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 
 743
 744	while (status & AMD_IOMMU_INT_MASK) {
 745		/* Enable EVT and PPR and GA interrupts again */
 746		writel(AMD_IOMMU_INT_MASK,
 747			iommu->mmio_base + MMIO_STATUS_OFFSET);
 748
 749		if (status & MMIO_STATUS_EVT_INT_MASK) {
 750			pr_devel("Processing IOMMU Event Log\n");
 751			iommu_poll_events(iommu);
 752		}
 753
 754		if (status & MMIO_STATUS_PPR_INT_MASK) {
 755			pr_devel("Processing IOMMU PPR Log\n");
 756			iommu_poll_ppr_log(iommu);
 
 757		}
 758
 759#ifdef CONFIG_IRQ_REMAP
 760		if (status & MMIO_STATUS_GALOG_INT_MASK) {
 761			pr_devel("Processing IOMMU GA Log\n");
 762			iommu_poll_ga_log(iommu);
 763		}
 764#endif
 765
 766		/*
 767		 * Hardware bug: ERBT1312
 768		 * When re-enabling interrupt (by writing 1
 769		 * to clear the bit), the hardware might also try to set
 770		 * the interrupt bit in the event status register.
 771		 * In this scenario, the bit will be set, and disable
 772		 * subsequent interrupts.
 773		 *
 774		 * Workaround: The IOMMU driver should read back the
 775		 * status register and check if the interrupt bits are cleared.
 776		 * If not, driver will need to go through the interrupt handler
 777		 * again and re-clear the bits
 778		 */
 779		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 780	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 781	return IRQ_HANDLED;
 782}
 783
 784irqreturn_t amd_iommu_int_handler(int irq, void *data)
 785{
 786	return IRQ_WAKE_THREAD;
 787}
 788
 789/****************************************************************************
 790 *
 791 * IOMMU command queuing functions
 792 *
 793 ****************************************************************************/
 794
 795static int wait_on_sem(volatile u64 *sem)
 796{
 797	int i = 0;
 798
 799	while (*sem == 0 && i < LOOP_TIMEOUT) {
 800		udelay(1);
 801		i += 1;
 802	}
 803
 804	if (i == LOOP_TIMEOUT) {
 805		pr_alert("Completion-Wait loop timed out\n");
 806		return -EIO;
 807	}
 808
 809	return 0;
 810}
 811
 812static void copy_cmd_to_buffer(struct amd_iommu *iommu,
 813			       struct iommu_cmd *cmd)
 814{
 815	u8 *target;
 816	u32 tail;
 817
 818	/* Copy command to buffer */
 819	tail = iommu->cmd_buf_tail;
 820	target = iommu->cmd_buf + tail;
 821	memcpy(target, cmd, sizeof(*cmd));
 822
 823	tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
 824	iommu->cmd_buf_tail = tail;
 825
 826	/* Tell the IOMMU about it */
 827	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 828}
 829
 830static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
 
 
 831{
 832	u64 paddr = iommu_virt_to_phys((void *)address);
 833
 834	WARN_ON(address & 0x7ULL);
 835
 836	memset(cmd, 0, sizeof(*cmd));
 837	cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK;
 838	cmd->data[1] = upper_32_bits(paddr);
 839	cmd->data[2] = 1;
 
 840	CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
 841}
 842
 843static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
 844{
 845	memset(cmd, 0, sizeof(*cmd));
 846	cmd->data[0] = devid;
 847	CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
 848}
 849
 850static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
 851				  size_t size, u16 domid, int pde)
 
 
 
 852{
 853	u64 pages;
 854	bool s;
 855
 856	pages = iommu_num_pages(address, size, PAGE_SIZE);
 857	s     = false;
 858
 859	if (pages > 1) {
 860		/*
 861		 * If we have to flush more than one page, flush all
 862		 * TLB entries for this domain
 863		 */
 864		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
 865		s = true;
 866	}
 867
 868	address &= PAGE_MASK;
 869
 870	memset(cmd, 0, sizeof(*cmd));
 871	cmd->data[1] |= domid;
 872	cmd->data[2]  = lower_32_bits(address);
 873	cmd->data[3]  = upper_32_bits(address);
 874	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
 875	if (s) /* size bit - we flush more than one 4kb page */
 876		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
 877	if (pde) /* PDE bit - we want to flush everything, not only the PTEs */
 878		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
 879}
 880
 881static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
 882				  u64 address, size_t size)
 883{
 884	u64 pages;
 885	bool s;
 886
 887	pages = iommu_num_pages(address, size, PAGE_SIZE);
 888	s     = false;
 889
 890	if (pages > 1) {
 891		/*
 892		 * If we have to flush more than one page, flush all
 893		 * TLB entries for this domain
 894		 */
 895		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
 896		s = true;
 897	}
 898
 
 899	address &= PAGE_MASK;
 900
 901	memset(cmd, 0, sizeof(*cmd));
 902	cmd->data[0]  = devid;
 903	cmd->data[0] |= (qdep & 0xff) << 24;
 904	cmd->data[1]  = devid;
 905	cmd->data[2]  = lower_32_bits(address);
 906	cmd->data[3]  = upper_32_bits(address);
 907	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
 908	if (s)
 909		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
 910}
 911
 912static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
 913				  u64 address, bool size)
 
 914{
 
 
 915	memset(cmd, 0, sizeof(*cmd));
 916
 917	address &= ~(0xfffULL);
 918
 919	cmd->data[0]  = pasid;
 920	cmd->data[1]  = domid;
 921	cmd->data[2]  = lower_32_bits(address);
 922	cmd->data[3]  = upper_32_bits(address);
 923	cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
 924	cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
 925	if (size)
 926		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
 
 927	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
 928}
 929
 930static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
 931				  int qdep, u64 address, bool size)
 
 932{
 
 
 933	memset(cmd, 0, sizeof(*cmd));
 934
 935	address &= ~(0xfffULL);
 936
 937	cmd->data[0]  = devid;
 938	cmd->data[0] |= ((pasid >> 8) & 0xff) << 16;
 939	cmd->data[0] |= (qdep  & 0xff) << 24;
 940	cmd->data[1]  = devid;
 941	cmd->data[1] |= (pasid & 0xff) << 16;
 942	cmd->data[2]  = lower_32_bits(address);
 943	cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
 944	cmd->data[3]  = upper_32_bits(address);
 945	if (size)
 946		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
 
 
 947	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
 948}
 949
 950static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,
 951			       int status, int tag, bool gn)
 952{
 953	memset(cmd, 0, sizeof(*cmd));
 954
 955	cmd->data[0]  = devid;
 956	if (gn) {
 957		cmd->data[1]  = pasid;
 958		cmd->data[2]  = CMD_INV_IOMMU_PAGES_GN_MASK;
 959	}
 960	cmd->data[3]  = tag & 0x1ff;
 961	cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT;
 962
 963	CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR);
 964}
 965
 966static void build_inv_all(struct iommu_cmd *cmd)
 967{
 968	memset(cmd, 0, sizeof(*cmd));
 969	CMD_SET_TYPE(cmd, CMD_INV_ALL);
 970}
 971
 972static void build_inv_irt(struct iommu_cmd *cmd, u16 devid)
 973{
 974	memset(cmd, 0, sizeof(*cmd));
 975	cmd->data[0] = devid;
 976	CMD_SET_TYPE(cmd, CMD_INV_IRT);
 977}
 978
 979/*
 980 * Writes the command to the IOMMUs command buffer and informs the
 981 * hardware about the new command.
 982 */
 983static int __iommu_queue_command_sync(struct amd_iommu *iommu,
 984				      struct iommu_cmd *cmd,
 985				      bool sync)
 986{
 987	unsigned int count = 0;
 988	u32 left, next_tail;
 989
 990	next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
 991again:
 992	left      = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE;
 993
 994	if (left <= 0x20) {
 995		/* Skip udelay() the first time around */
 996		if (count++) {
 997			if (count == LOOP_TIMEOUT) {
 998				pr_err("Command buffer timeout\n");
 999				return -EIO;
1000			}
1001
1002			udelay(1);
1003		}
1004
1005		/* Update head and recheck remaining space */
1006		iommu->cmd_buf_head = readl(iommu->mmio_base +
1007					    MMIO_CMD_HEAD_OFFSET);
1008
1009		goto again;
1010	}
1011
1012	copy_cmd_to_buffer(iommu, cmd);
1013
1014	/* Do we need to make sure all commands are processed? */
1015	iommu->need_sync = sync;
1016
1017	return 0;
1018}
1019
1020static int iommu_queue_command_sync(struct amd_iommu *iommu,
1021				    struct iommu_cmd *cmd,
1022				    bool sync)
1023{
1024	unsigned long flags;
1025	int ret;
1026
1027	raw_spin_lock_irqsave(&iommu->lock, flags);
1028	ret = __iommu_queue_command_sync(iommu, cmd, sync);
1029	raw_spin_unlock_irqrestore(&iommu->lock, flags);
1030
1031	return ret;
1032}
1033
1034static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
1035{
1036	return iommu_queue_command_sync(iommu, cmd, true);
1037}
1038
1039/*
1040 * This function queues a completion wait command into the command
1041 * buffer of an IOMMU
1042 */
1043static int iommu_completion_wait(struct amd_iommu *iommu)
1044{
1045	struct iommu_cmd cmd;
1046	unsigned long flags;
1047	int ret;
 
1048
1049	if (!iommu->need_sync)
1050		return 0;
1051
1052
1053	build_completion_wait(&cmd, (u64)&iommu->cmd_sem);
1054
1055	raw_spin_lock_irqsave(&iommu->lock, flags);
1056
1057	iommu->cmd_sem = 0;
1058
1059	ret = __iommu_queue_command_sync(iommu, &cmd, false);
1060	if (ret)
1061		goto out_unlock;
1062
1063	ret = wait_on_sem(&iommu->cmd_sem);
1064
1065out_unlock:
1066	raw_spin_unlock_irqrestore(&iommu->lock, flags);
1067
1068	return ret;
1069}
1070
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1071static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
1072{
1073	struct iommu_cmd cmd;
1074
1075	build_inv_dte(&cmd, devid);
1076
1077	return iommu_queue_command(iommu, &cmd);
1078}
1079
1080static void amd_iommu_flush_dte_all(struct amd_iommu *iommu)
1081{
1082	u32 devid;
 
1083
1084	for (devid = 0; devid <= 0xffff; ++devid)
1085		iommu_flush_dte(iommu, devid);
1086
1087	iommu_completion_wait(iommu);
1088}
1089
1090/*
1091 * This function uses heavy locking and may disable irqs for some time. But
1092 * this is no issue because it is only called during resume.
1093 */
1094static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu)
1095{
1096	u32 dom_id;
 
1097
1098	for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
1099		struct iommu_cmd cmd;
1100		build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
1101				      dom_id, 1);
1102		iommu_queue_command(iommu, &cmd);
1103	}
1104
1105	iommu_completion_wait(iommu);
1106}
1107
1108static void amd_iommu_flush_tlb_domid(struct amd_iommu *iommu, u32 dom_id)
1109{
1110	struct iommu_cmd cmd;
1111
1112	build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
1113			      dom_id, 1);
1114	iommu_queue_command(iommu, &cmd);
1115
1116	iommu_completion_wait(iommu);
1117}
1118
1119static void amd_iommu_flush_all(struct amd_iommu *iommu)
1120{
1121	struct iommu_cmd cmd;
1122
1123	build_inv_all(&cmd);
1124
1125	iommu_queue_command(iommu, &cmd);
1126	iommu_completion_wait(iommu);
1127}
1128
1129static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid)
1130{
1131	struct iommu_cmd cmd;
1132
1133	build_inv_irt(&cmd, devid);
1134
1135	iommu_queue_command(iommu, &cmd);
1136}
1137
1138static void amd_iommu_flush_irt_all(struct amd_iommu *iommu)
1139{
1140	u32 devid;
 
1141
1142	for (devid = 0; devid <= MAX_DEV_TABLE_ENTRIES; devid++)
 
 
 
1143		iommu_flush_irt(iommu, devid);
1144
1145	iommu_completion_wait(iommu);
1146}
1147
1148void iommu_flush_all_caches(struct amd_iommu *iommu)
1149{
1150	if (iommu_feature(iommu, FEATURE_IA)) {
1151		amd_iommu_flush_all(iommu);
1152	} else {
1153		amd_iommu_flush_dte_all(iommu);
1154		amd_iommu_flush_irt_all(iommu);
1155		amd_iommu_flush_tlb_all(iommu);
1156	}
1157}
1158
1159/*
1160 * Command send function for flushing on-device TLB
1161 */
1162static int device_flush_iotlb(struct iommu_dev_data *dev_data,
1163			      u64 address, size_t size)
1164{
1165	struct amd_iommu *iommu;
1166	struct iommu_cmd cmd;
1167	int qdep;
1168
1169	qdep     = dev_data->ats.qdep;
1170	iommu    = amd_iommu_rlookup_table[dev_data->devid];
1171
1172	build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size);
 
1173
1174	return iommu_queue_command(iommu, &cmd);
1175}
1176
1177static int device_flush_dte_alias(struct pci_dev *pdev, u16 alias, void *data)
1178{
1179	struct amd_iommu *iommu = data;
1180
1181	return iommu_flush_dte(iommu, alias);
1182}
1183
1184/*
1185 * Command send function for invalidating a device table entry
1186 */
1187static int device_flush_dte(struct iommu_dev_data *dev_data)
1188{
1189	struct amd_iommu *iommu;
 
 
1190	u16 alias;
1191	int ret;
1192
1193	iommu = amd_iommu_rlookup_table[dev_data->devid];
 
1194
1195	if (dev_data->pdev)
1196		ret = pci_for_each_dma_alias(dev_data->pdev,
1197					     device_flush_dte_alias, iommu);
1198	else
1199		ret = iommu_flush_dte(iommu, dev_data->devid);
1200	if (ret)
1201		return ret;
1202
1203	alias = amd_iommu_alias_table[dev_data->devid];
 
1204	if (alias != dev_data->devid) {
1205		ret = iommu_flush_dte(iommu, alias);
1206		if (ret)
1207			return ret;
1208	}
1209
1210	if (dev_data->ats.enabled)
1211		ret = device_flush_iotlb(dev_data, 0, ~0UL);
 
 
 
1212
1213	return ret;
1214}
1215
1216/*
1217 * TLB invalidation function which is called from the mapping functions.
1218 * It invalidates a single PTE if the range to flush is within a single
1219 * page. Otherwise it flushes the whole TLB of the IOMMU.
1220 */
1221static void __domain_flush_pages(struct protection_domain *domain,
1222				 u64 address, size_t size, int pde)
1223{
1224	struct iommu_dev_data *dev_data;
1225	struct iommu_cmd cmd;
1226	int ret = 0, i;
 
 
 
 
 
1227
1228	build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
 
1229
1230	for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
1231		if (!domain->dev_iommu[i])
1232			continue;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1233
 
1234		/*
1235		 * Devices of this domain are behind this IOMMU
1236		 * We need a TLB flush
1237		 */
1238		ret |= iommu_queue_command(amd_iommus[i], &cmd);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1239	}
1240
1241	list_for_each_entry(dev_data, &domain->dev_list, list) {
1242
1243		if (!dev_data->ats.enabled)
1244			continue;
1245
1246		ret |= device_flush_iotlb(dev_data, address, size);
1247	}
1248
1249	WARN_ON(ret);
1250}
1251
1252static void domain_flush_pages(struct protection_domain *domain,
1253			       u64 address, size_t size)
1254{
1255	__domain_flush_pages(domain, address, size, 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1256}
1257
1258/* Flush the whole IO/TLB for a given protection domain - including PDE */
1259static void domain_flush_tlb_pde(struct protection_domain *domain)
1260{
1261	__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
 
1262}
1263
1264static void domain_flush_complete(struct protection_domain *domain)
 
1265{
1266	int i;
 
 
 
 
 
 
 
 
1267
1268	for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
1269		if (domain && !domain->dev_iommu[i])
1270			continue;
1271
1272		/*
1273		 * Devices of this domain are behind this IOMMU
1274		 * We need to wait for completion of all commands.
1275		 */
1276		iommu_completion_wait(amd_iommus[i]);
1277	}
1278}
1279
1280/* Flush the not present cache if it exists */
1281static void domain_flush_np_cache(struct protection_domain *domain,
1282		dma_addr_t iova, size_t size)
1283{
1284	if (unlikely(amd_iommu_np_cache)) {
1285		unsigned long flags;
1286
1287		spin_lock_irqsave(&domain->lock, flags);
1288		domain_flush_pages(domain, iova, size);
1289		domain_flush_complete(domain);
1290		spin_unlock_irqrestore(&domain->lock, flags);
1291	}
1292}
1293
1294
1295/*
1296 * This function flushes the DTEs for all devices in domain
1297 */
1298static void domain_flush_devices(struct protection_domain *domain)
1299{
1300	struct iommu_dev_data *dev_data;
1301
1302	list_for_each_entry(dev_data, &domain->dev_list, list)
1303		device_flush_dte(dev_data);
1304}
1305
1306/****************************************************************************
1307 *
1308 * The functions below are used the create the page table mappings for
1309 * unity mapped regions.
1310 *
1311 ****************************************************************************/
1312
1313static void free_page_list(struct page *freelist)
1314{
1315	while (freelist != NULL) {
1316		unsigned long p = (unsigned long)page_address(freelist);
1317		freelist = freelist->freelist;
1318		free_page(p);
1319	}
1320}
1321
1322static struct page *free_pt_page(unsigned long pt, struct page *freelist)
1323{
1324	struct page *p = virt_to_page((void *)pt);
1325
1326	p->freelist = freelist;
1327
1328	return p;
1329}
1330
1331#define DEFINE_FREE_PT_FN(LVL, FN)						\
1332static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist)	\
1333{										\
1334	unsigned long p;							\
1335	u64 *pt;								\
1336	int i;									\
1337										\
1338	pt = (u64 *)__pt;							\
1339										\
1340	for (i = 0; i < 512; ++i) {						\
1341		/* PTE present? */						\
1342		if (!IOMMU_PTE_PRESENT(pt[i]))					\
1343			continue;						\
1344										\
1345		/* Large PTE? */						\
1346		if (PM_PTE_LEVEL(pt[i]) == 0 ||					\
1347		    PM_PTE_LEVEL(pt[i]) == 7)					\
1348			continue;						\
1349										\
1350		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);			\
1351		freelist = FN(p, freelist);					\
1352	}									\
1353										\
1354	return free_pt_page((unsigned long)pt, freelist);			\
1355}
1356
1357DEFINE_FREE_PT_FN(l2, free_pt_page)
1358DEFINE_FREE_PT_FN(l3, free_pt_l2)
1359DEFINE_FREE_PT_FN(l4, free_pt_l3)
1360DEFINE_FREE_PT_FN(l5, free_pt_l4)
1361DEFINE_FREE_PT_FN(l6, free_pt_l5)
1362
1363static struct page *free_sub_pt(unsigned long root, int mode,
1364				struct page *freelist)
1365{
1366	switch (mode) {
1367	case PAGE_MODE_NONE:
1368	case PAGE_MODE_7_LEVEL:
1369		break;
1370	case PAGE_MODE_1_LEVEL:
1371		freelist = free_pt_page(root, freelist);
1372		break;
1373	case PAGE_MODE_2_LEVEL:
1374		freelist = free_pt_l2(root, freelist);
1375		break;
1376	case PAGE_MODE_3_LEVEL:
1377		freelist = free_pt_l3(root, freelist);
1378		break;
1379	case PAGE_MODE_4_LEVEL:
1380		freelist = free_pt_l4(root, freelist);
1381		break;
1382	case PAGE_MODE_5_LEVEL:
1383		freelist = free_pt_l5(root, freelist);
1384		break;
1385	case PAGE_MODE_6_LEVEL:
1386		freelist = free_pt_l6(root, freelist);
1387		break;
1388	default:
1389		BUG();
1390	}
1391
1392	return freelist;
1393}
1394
1395static void free_pagetable(struct domain_pgtable *pgtable)
1396{
1397	struct page *freelist = NULL;
1398	unsigned long root;
 
1399
1400	if (pgtable->mode == PAGE_MODE_NONE)
1401		return;
 
 
 
1402
1403	BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
1404	       pgtable->mode > PAGE_MODE_6_LEVEL);
1405
1406	root = (unsigned long)pgtable->root;
1407	freelist = free_sub_pt(root, pgtable->mode, freelist);
 
 
 
 
 
 
 
1408
1409	free_page_list(freelist);
 
 
1410}
1411
1412/*
1413 * This function is used to add another level to an IO page table. Adding
1414 * another level increases the size of the address space by 9 bits to a size up
1415 * to 64 bits.
1416 */
1417static bool increase_address_space(struct protection_domain *domain,
1418				   unsigned long address,
1419				   gfp_t gfp)
1420{
1421	struct domain_pgtable pgtable;
1422	unsigned long flags;
1423	bool ret = true;
1424	u64 *pte;
1425
1426	spin_lock_irqsave(&domain->lock, flags);
1427
1428	amd_iommu_domain_get_pgtable(domain, &pgtable);
1429
1430	if (address <= PM_LEVEL_SIZE(pgtable.mode))
1431		goto out;
1432
1433	ret = false;
1434	if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL))
1435		goto out;
1436
1437	pte = (void *)get_zeroed_page(gfp);
1438	if (!pte)
1439		goto out;
1440
1441	*pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root));
1442
1443	pgtable.root  = pte;
1444	pgtable.mode += 1;
1445	update_and_flush_device_table(domain, &pgtable);
1446	domain_flush_complete(domain);
1447
1448	/*
1449	 * Device Table needs to be updated and flushed before the new root can
1450	 * be published.
1451	 */
1452	amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode);
1453
1454	ret = true;
1455
1456out:
1457	spin_unlock_irqrestore(&domain->lock, flags);
1458
1459	return ret;
1460}
1461
1462static u64 *alloc_pte(struct protection_domain *domain,
1463		      unsigned long address,
1464		      unsigned long page_size,
1465		      u64 **pte_page,
1466		      gfp_t gfp,
1467		      bool *updated)
1468{
1469	struct domain_pgtable pgtable;
1470	int level, end_lvl;
1471	u64 *pte, *page;
1472
1473	BUG_ON(!is_power_of_2(page_size));
1474
1475	amd_iommu_domain_get_pgtable(domain, &pgtable);
1476
1477	while (address > PM_LEVEL_SIZE(pgtable.mode)) {
1478		/*
1479		 * Return an error if there is no memory to update the
1480		 * page-table.
1481		 */
1482		if (!increase_address_space(domain, address, gfp))
1483			return NULL;
1484
1485		/* Read new values to check if update was successful */
1486		amd_iommu_domain_get_pgtable(domain, &pgtable);
1487	}
1488
1489
1490	level   = pgtable.mode - 1;
1491	pte     = &pgtable.root[PM_LEVEL_INDEX(level, address)];
1492	address = PAGE_SIZE_ALIGN(address, page_size);
1493	end_lvl = PAGE_SIZE_LEVEL(page_size);
1494
1495	while (level > end_lvl) {
1496		u64 __pte, __npte;
1497		int pte_level;
1498
1499		__pte     = *pte;
1500		pte_level = PM_PTE_LEVEL(__pte);
1501
1502		/*
1503		 * If we replace a series of large PTEs, we need
1504		 * to tear down all of them.
1505		 */
1506		if (IOMMU_PTE_PRESENT(__pte) &&
1507		    pte_level == PAGE_MODE_7_LEVEL) {
1508			unsigned long count, i;
1509			u64 *lpte;
1510
1511			lpte = first_pte_l7(pte, NULL, &count);
1512
1513			/*
1514			 * Unmap the replicated PTEs that still match the
1515			 * original large mapping
1516			 */
1517			for (i = 0; i < count; ++i)
1518				cmpxchg64(&lpte[i], __pte, 0ULL);
1519
1520			*updated = true;
1521			continue;
1522		}
1523
1524		if (!IOMMU_PTE_PRESENT(__pte) ||
1525		    pte_level == PAGE_MODE_NONE) {
1526			page = (u64 *)get_zeroed_page(gfp);
1527
1528			if (!page)
1529				return NULL;
1530
1531			__npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
1532
1533			/* pte could have been changed somewhere. */
1534			if (cmpxchg64(pte, __pte, __npte) != __pte)
1535				free_page((unsigned long)page);
1536			else if (IOMMU_PTE_PRESENT(__pte))
1537				*updated = true;
1538
1539			continue;
1540		}
1541
1542		/* No level skipping support yet */
1543		if (pte_level != level)
1544			return NULL;
1545
1546		level -= 1;
1547
1548		pte = IOMMU_PTE_PAGE(__pte);
1549
1550		if (pte_page && level == end_lvl)
1551			*pte_page = pte;
1552
1553		pte = &pte[PM_LEVEL_INDEX(level, address)];
1554	}
1555
1556	return pte;
1557}
1558
1559/*
1560 * This function checks if there is a PTE for a given dma address. If
1561 * there is one, it returns the pointer to it.
1562 */
1563static u64 *fetch_pte(struct protection_domain *domain,
1564		      unsigned long address,
1565		      unsigned long *page_size)
1566{
1567	struct domain_pgtable pgtable;
1568	int level;
1569	u64 *pte;
1570
1571	*page_size = 0;
 
 
1572
1573	amd_iommu_domain_get_pgtable(domain, &pgtable);
1574
1575	if (address > PM_LEVEL_SIZE(pgtable.mode))
1576		return NULL;
1577
1578	level	   =  pgtable.mode - 1;
1579	pte	   = &pgtable.root[PM_LEVEL_INDEX(level, address)];
1580	*page_size =  PTE_LEVEL_PAGE_SIZE(level);
1581
1582	while (level > 0) {
1583
1584		/* Not Present */
1585		if (!IOMMU_PTE_PRESENT(*pte))
1586			return NULL;
1587
1588		/* Large PTE */
1589		if (PM_PTE_LEVEL(*pte) == 7 ||
1590		    PM_PTE_LEVEL(*pte) == 0)
1591			break;
1592
1593		/* No level skipping support yet */
1594		if (PM_PTE_LEVEL(*pte) != level)
1595			return NULL;
1596
1597		level -= 1;
1598
1599		/* Walk to the next level */
1600		pte	   = IOMMU_PTE_PAGE(*pte);
1601		pte	   = &pte[PM_LEVEL_INDEX(level, address)];
1602		*page_size = PTE_LEVEL_PAGE_SIZE(level);
1603	}
1604
1605	/*
1606	 * If we have a series of large PTEs, make
1607	 * sure to return a pointer to the first one.
1608	 */
1609	if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
1610		pte = first_pte_l7(pte, page_size, NULL);
1611
1612	return pte;
1613}
1614
1615static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
1616{
1617	unsigned long pt;
1618	int mode;
 
 
 
 
1619
1620	while (cmpxchg64(pte, pteval, 0) != pteval) {
1621		pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
1622		pteval = *pte;
1623	}
1624
1625	if (!IOMMU_PTE_PRESENT(pteval))
1626		return freelist;
1627
1628	pt   = (unsigned long)IOMMU_PTE_PAGE(pteval);
1629	mode = IOMMU_PTE_MODE(pteval);
1630
1631	return free_sub_pt(pt, mode, freelist);
1632}
1633
1634/*
1635 * Generic mapping functions. It maps a physical address into a DMA
1636 * address space. It allocates the page table pages if necessary.
1637 * In the future it can be extended to a generic mapping function
1638 * supporting all features of AMD IOMMU page tables like level skipping
1639 * and full 64 bit address spaces.
1640 */
1641static int iommu_map_page(struct protection_domain *dom,
1642			  unsigned long bus_addr,
1643			  unsigned long phys_addr,
1644			  unsigned long page_size,
1645			  int prot,
1646			  gfp_t gfp)
1647{
1648	struct page *freelist = NULL;
1649	bool updated = false;
1650	u64 __pte, *pte;
1651	int ret, i, count;
1652
1653	BUG_ON(!IS_ALIGNED(bus_addr, page_size));
1654	BUG_ON(!IS_ALIGNED(phys_addr, page_size));
1655
1656	ret = -EINVAL;
1657	if (!(prot & IOMMU_PROT_MASK))
1658		goto out;
1659
1660	count = PAGE_SIZE_PTE_COUNT(page_size);
1661	pte   = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated);
1662
1663	ret = -ENOMEM;
1664	if (!pte)
1665		goto out;
 
 
 
1666
1667	for (i = 0; i < count; ++i)
1668		freelist = free_clear_pte(&pte[i], pte[i], freelist);
1669
1670	if (freelist != NULL)
1671		updated = true;
1672
1673	if (count > 1) {
1674		__pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
1675		__pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
1676	} else
1677		__pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
1678
1679	if (prot & IOMMU_PROT_IR)
1680		__pte |= IOMMU_PTE_IR;
1681	if (prot & IOMMU_PROT_IW)
1682		__pte |= IOMMU_PTE_IW;
1683
1684	for (i = 0; i < count; ++i)
1685		pte[i] = __pte;
1686
1687	ret = 0;
1688
1689out:
1690	if (updated) {
1691		unsigned long flags;
1692
1693		spin_lock_irqsave(&dom->lock, flags);
1694		/*
1695		 * Flush domain TLB(s) and wait for completion. Any Device-Table
1696		 * Updates and flushing already happened in
1697		 * increase_address_space().
1698		 */
1699		domain_flush_tlb_pde(dom);
1700		domain_flush_complete(dom);
1701		spin_unlock_irqrestore(&dom->lock, flags);
1702	}
1703
1704	/* Everything flushed out, free pages now */
1705	free_page_list(freelist);
1706
1707	return ret;
1708}
1709
1710static unsigned long iommu_unmap_page(struct protection_domain *dom,
1711				      unsigned long bus_addr,
1712				      unsigned long page_size)
1713{
1714	unsigned long long unmapped;
1715	unsigned long unmap_size;
1716	u64 *pte;
 
 
1717
1718	BUG_ON(!is_power_of_2(page_size));
1719
1720	unmapped = 0;
 
1721
1722	while (unmapped < page_size) {
 
1723
1724		pte = fetch_pte(dom, bus_addr, &unmap_size);
 
 
1725
1726		if (pte) {
1727			int i, count;
 
1728
1729			count = PAGE_SIZE_PTE_COUNT(unmap_size);
1730			for (i = 0; i < count; i++)
1731				pte[i] = 0ULL;
1732		}
1733
1734		bus_addr  = (bus_addr & ~(unmap_size - 1)) + unmap_size;
1735		unmapped += unmap_size;
 
1736	}
1737
1738	BUG_ON(unmapped && !is_power_of_2(unmapped));
1739
1740	return unmapped;
1741}
1742
1743/****************************************************************************
1744 *
1745 * The next functions belong to the domain allocation. A domain is
1746 * allocated for every IOMMU as the default domain. If device isolation
1747 * is enabled, every device get its own domain. The most important thing
1748 * about domains is the page table mapping the DMA address space they
1749 * contain.
1750 *
1751 ****************************************************************************/
1752
1753static u16 domain_id_alloc(void)
1754{
1755	int id;
1756
1757	spin_lock(&pd_bitmap_lock);
1758	id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
1759	BUG_ON(id == 0);
1760	if (id > 0 && id < MAX_DOMAIN_ID)
1761		__set_bit(id, amd_iommu_pd_alloc_bitmap);
1762	else
1763		id = 0;
1764	spin_unlock(&pd_bitmap_lock);
1765
1766	return id;
 
1767}
1768
1769static void domain_id_free(int id)
 
1770{
1771	spin_lock(&pd_bitmap_lock);
1772	if (id > 0 && id < MAX_DOMAIN_ID)
1773		__clear_bit(id, amd_iommu_pd_alloc_bitmap);
1774	spin_unlock(&pd_bitmap_lock);
1775}
1776
1777static void free_gcr3_tbl_level1(u64 *tbl)
1778{
1779	u64 *ptr;
1780	int i;
1781
1782	for (i = 0; i < 512; ++i) {
1783		if (!(tbl[i] & GCR3_VALID))
1784			continue;
1785
1786		ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK);
1787
1788		free_page((unsigned long)ptr);
1789	}
1790}
1791
1792static void free_gcr3_tbl_level2(u64 *tbl)
1793{
1794	u64 *ptr;
1795	int i;
1796
1797	for (i = 0; i < 512; ++i) {
1798		if (!(tbl[i] & GCR3_VALID))
1799			continue;
1800
1801		ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK);
 
 
1802
1803		free_gcr3_tbl_level1(ptr);
1804	}
1805}
1806
1807static void free_gcr3_table(struct protection_domain *domain)
1808{
1809	if (domain->glx == 2)
1810		free_gcr3_tbl_level2(domain->gcr3_tbl);
1811	else if (domain->glx == 1)
1812		free_gcr3_tbl_level1(domain->gcr3_tbl);
1813	else
1814		BUG_ON(domain->glx != 0);
1815
1816	free_page((unsigned long)domain->gcr3_tbl);
1817}
1818
1819static void set_dte_entry(u16 devid, struct protection_domain *domain,
1820			  struct domain_pgtable *pgtable,
1821			  bool ats, bool ppr)
1822{
1823	u64 pte_root = 0;
1824	u64 flags = 0;
1825	u32 old_domid;
 
 
 
 
 
 
 
 
 
 
1826
1827	if (pgtable->mode != PAGE_MODE_NONE)
1828		pte_root = iommu_virt_to_phys(pgtable->root);
1829
1830	pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK)
1831		    << DEV_ENTRY_MODE_SHIFT;
1832	pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
1833
1834	flags = amd_iommu_dev_table[devid].data[1];
1835
1836	if (ats)
 
 
 
 
 
 
 
 
 
1837		flags |= DTE_FLAG_IOTLB;
1838
1839	if (ppr) {
1840		struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
1841
1842		if (iommu_feature(iommu, FEATURE_EPHSUP))
1843			pte_root |= 1ULL << DEV_ENTRY_PPR;
1844	}
1845
1846	if (domain->flags & PD_IOMMUV2_MASK) {
1847		u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl);
1848		u64 glx  = domain->glx;
1849		u64 tmp;
1850
1851		pte_root |= DTE_FLAG_GV;
1852		pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT;
1853
1854		/* First mask out possible old values for GCR3 table */
1855		tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
1856		flags    &= ~tmp;
1857
1858		tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
1859		flags    &= ~tmp;
1860
1861		/* Encode GCR3 table into DTE */
1862		tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
1863		pte_root |= tmp;
1864
1865		tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B;
1866		flags    |= tmp;
1867
1868		tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
1869		flags    |= tmp;
 
 
 
 
 
 
 
 
 
1870	}
1871
1872	flags &= ~DEV_DOMID_MASK;
1873	flags |= domain->id;
1874
1875	old_domid = amd_iommu_dev_table[devid].data[1] & DEV_DOMID_MASK;
1876	amd_iommu_dev_table[devid].data[1]  = flags;
1877	amd_iommu_dev_table[devid].data[0]  = pte_root;
1878
1879	/*
1880	 * A kdump kernel might be replacing a domain ID that was copied from
1881	 * the previous kernel--if so, it needs to flush the translation cache
1882	 * entries for the old domain ID that is being overwritten
1883	 */
1884	if (old_domid) {
1885		struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
1886
1887		amd_iommu_flush_tlb_domid(iommu, old_domid);
1888	}
1889}
1890
1891static void clear_dte_entry(u16 devid)
1892{
 
 
1893	/* remove entry from the device table seen by the hardware */
1894	amd_iommu_dev_table[devid].data[0]  = DTE_FLAG_V | DTE_FLAG_TV;
1895	amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK;
 
 
1896
1897	amd_iommu_apply_erratum_63(devid);
 
 
1898}
1899
1900static void do_attach(struct iommu_dev_data *dev_data,
1901		      struct protection_domain *domain)
1902{
1903	struct domain_pgtable pgtable;
1904	struct amd_iommu *iommu;
1905	bool ats;
1906
1907	iommu = amd_iommu_rlookup_table[dev_data->devid];
1908	ats   = dev_data->ats.enabled;
1909
1910	/* Update data structures */
1911	dev_data->domain = domain;
1912	list_add(&dev_data->list, &domain->dev_list);
1913
1914	/* Do reference counting */
1915	domain->dev_iommu[iommu->index] += 1;
1916	domain->dev_cnt                 += 1;
1917
1918	/* Update device table */
1919	amd_iommu_domain_get_pgtable(domain, &pgtable);
1920	set_dte_entry(dev_data->devid, domain, &pgtable,
1921		      ats, dev_data->iommu_v2);
1922	clone_aliases(dev_data->pdev);
1923
 
1924	device_flush_dte(dev_data);
 
1925}
1926
1927static void do_detach(struct iommu_dev_data *dev_data)
 
 
 
 
 
1928{
1929	struct protection_domain *domain = dev_data->domain;
1930	struct amd_iommu *iommu;
 
 
 
 
 
 
 
 
1931
1932	iommu = amd_iommu_rlookup_table[dev_data->devid];
 
 
 
 
 
 
 
1933
1934	/* Update data structures */
1935	dev_data->domain = NULL;
1936	list_del(&dev_data->list);
1937	clear_dte_entry(dev_data->devid);
1938	clone_aliases(dev_data->pdev);
1939
1940	/* Flush the DTE entry */
1941	device_flush_dte(dev_data);
 
1942
1943	/* Flush IOTLB */
1944	domain_flush_tlb_pde(domain);
1945
1946	/* Wait for the flushes to finish */
1947	domain_flush_complete(domain);
1948
1949	/* decrease reference counters - needs to happen after the flushes */
1950	domain->dev_iommu[iommu->index] -= 1;
1951	domain->dev_cnt                 -= 1;
1952}
1953
1954static void pdev_iommuv2_disable(struct pci_dev *pdev)
 
1955{
1956	pci_disable_ats(pdev);
1957	pci_disable_pri(pdev);
1958	pci_disable_pasid(pdev);
1959}
1960
1961/* FIXME: Change generic reset-function to do the same */
1962static int pri_reset_while_enabled(struct pci_dev *pdev)
1963{
1964	u16 control;
1965	int pos;
1966
1967	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
1968	if (!pos)
1969		return -EINVAL;
1970
1971	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
1972	control |= PCI_PRI_CTRL_RESET;
1973	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
1974
1975	return 0;
1976}
1977
1978static int pdev_iommuv2_enable(struct pci_dev *pdev)
 
1979{
1980	bool reset_enable;
1981	int reqs, ret;
 
 
1982
1983	/* FIXME: Hardcode number of outstanding requests for now */
1984	reqs = 32;
1985	if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE))
1986		reqs = 1;
1987	reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET);
1988
1989	/* Only allow access to user-accessible pages */
1990	ret = pci_enable_pasid(pdev, 0);
1991	if (ret)
1992		goto out_err;
 
1993
1994	/* First reset the PRI state of the device */
1995	ret = pci_reset_pri(pdev);
1996	if (ret)
1997		goto out_err;
 
1998
1999	/* Enable PRI */
2000	ret = pci_enable_pri(pdev, reqs);
2001	if (ret)
2002		goto out_err;
2003
2004	if (reset_enable) {
2005		ret = pri_reset_while_enabled(pdev);
2006		if (ret)
2007			goto out_err;
 
 
2008	}
2009
2010	ret = pci_enable_ats(pdev, PAGE_SHIFT);
2011	if (ret)
2012		goto out_err;
 
 
 
 
 
 
 
 
 
 
 
 
 
2013
2014	return 0;
 
 
 
 
2015
2016out_err:
2017	pci_disable_pri(pdev);
2018	pci_disable_pasid(pdev);
 
 
2019
2020	return ret;
2021}
2022
2023/*
2024 * If a device is not yet associated with a domain, this function makes the
2025 * device visible in the domain
2026 */
2027static int attach_device(struct device *dev,
2028			 struct protection_domain *domain)
2029{
2030	struct iommu_dev_data *dev_data;
 
2031	struct pci_dev *pdev;
2032	unsigned long flags;
2033	int ret;
2034
2035	spin_lock_irqsave(&domain->lock, flags);
2036
2037	dev_data = dev_iommu_priv_get(dev);
 
 
 
2038
2039	spin_lock(&dev_data->lock);
2040
2041	ret = -EBUSY;
2042	if (dev_data->domain != NULL)
2043		goto out;
2044
2045	if (!dev_is_pci(dev))
2046		goto skip_ats_check;
2047
2048	pdev = to_pci_dev(dev);
2049	if (domain->flags & PD_IOMMUV2_MASK) {
2050		struct iommu_domain *def_domain = iommu_get_dma_domain(dev);
2051
2052		ret = -EINVAL;
2053		if (def_domain->type != IOMMU_DOMAIN_IDENTITY)
2054			goto out;
2055
2056		if (dev_data->iommu_v2) {
2057			if (pdev_iommuv2_enable(pdev) != 0)
2058				goto out;
2059
2060			dev_data->ats.enabled = true;
2061			dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
2062			dev_data->pri_tlp     = pci_prg_resp_pasid_required(pdev);
2063		}
2064	} else if (amd_iommu_iotlb_sup &&
2065		   pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
2066		dev_data->ats.enabled = true;
2067		dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
2068	}
2069
2070skip_ats_check:
2071	ret = 0;
 
2072
2073	do_attach(dev_data, domain);
 
 
 
 
 
 
 
 
 
2074
2075	/*
2076	 * We might boot into a crash-kernel here. The crashed kernel
2077	 * left the caches in the IOMMU dirty. So we have to flush
2078	 * here to evict all dirty stuff.
2079	 */
2080	domain_flush_tlb_pde(domain);
2081
2082	domain_flush_complete(domain);
 
2083
2084out:
2085	spin_unlock(&dev_data->lock);
2086
2087	spin_unlock_irqrestore(&domain->lock, flags);
2088
2089	return ret;
2090}
2091
2092/*
2093 * Removes a device from a protection domain (with devtable_lock held)
2094 */
2095static void detach_device(struct device *dev)
2096{
2097	struct protection_domain *domain;
2098	struct iommu_dev_data *dev_data;
 
2099	unsigned long flags;
2100
2101	dev_data = dev_iommu_priv_get(dev);
2102	domain   = dev_data->domain;
2103
2104	spin_lock_irqsave(&domain->lock, flags);
2105
2106	spin_lock(&dev_data->lock);
2107
2108	/*
2109	 * First check if the device is still attached. It might already
2110	 * be detached from its domain because the generic
2111	 * iommu_detach_group code detached it and we try again here in
2112	 * our alias handling.
2113	 */
2114	if (WARN_ON(!dev_data->domain))
2115		goto out;
2116
2117	do_detach(dev_data);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2118
2119	if (!dev_is_pci(dev))
2120		goto out;
 
2121
2122	if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2)
2123		pdev_iommuv2_disable(to_pci_dev(dev));
2124	else if (dev_data->ats.enabled)
2125		pci_disable_ats(to_pci_dev(dev));
2126
2127	dev_data->ats.enabled = false;
 
2128
2129out:
2130	spin_unlock(&dev_data->lock);
2131
2132	spin_unlock_irqrestore(&domain->lock, flags);
2133}
2134
2135static struct iommu_device *amd_iommu_probe_device(struct device *dev)
2136{
2137	struct iommu_device *iommu_dev;
2138	struct amd_iommu *iommu;
2139	int ret, devid;
 
2140
2141	if (!check_device(dev))
2142		return ERR_PTR(-ENODEV);
2143
2144	devid = get_device_id(dev);
2145	if (devid < 0)
2146		return ERR_PTR(devid);
2147
2148	iommu = amd_iommu_rlookup_table[devid];
 
 
2149
2150	if (dev_iommu_priv_get(dev))
2151		return &iommu->iommu;
2152
2153	ret = iommu_init_device(dev);
2154	if (ret) {
2155		if (ret != -ENOTSUPP)
2156			dev_err(dev, "Failed to initialize - trying to proceed anyway\n");
2157		iommu_dev = ERR_PTR(ret);
2158		iommu_ignore_device(dev);
2159	} else {
2160		iommu_dev = &iommu->iommu;
 
 
 
 
 
 
 
 
 
 
 
 
 
2161	}
2162
 
2163	iommu_completion_wait(iommu);
2164
 
 
 
2165	return iommu_dev;
2166}
2167
2168static void amd_iommu_probe_finalize(struct device *dev)
2169{
2170	struct iommu_domain *domain;
2171
2172	/* Domains are initialized for this device - have a look what we ended up with */
2173	domain = iommu_get_domain_for_dev(dev);
2174	if (domain->type == IOMMU_DOMAIN_DMA)
2175		iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0);
2176}
2177
2178static void amd_iommu_release_device(struct device *dev)
2179{
2180	int devid = get_device_id(dev);
2181	struct amd_iommu *iommu;
2182
2183	if (!check_device(dev))
2184		return;
2185
2186	iommu = amd_iommu_rlookup_table[devid];
2187
2188	amd_iommu_uninit_device(dev);
2189	iommu_completion_wait(iommu);
2190}
2191
2192static struct iommu_group *amd_iommu_device_group(struct device *dev)
2193{
2194	if (dev_is_pci(dev))
2195		return pci_device_group(dev);
2196
2197	return acpihid_device_group(dev);
2198}
2199
2200static int amd_iommu_domain_get_attr(struct iommu_domain *domain,
2201		enum iommu_attr attr, void *data)
2202{
2203	switch (domain->type) {
2204	case IOMMU_DOMAIN_UNMANAGED:
2205		return -ENODEV;
2206	case IOMMU_DOMAIN_DMA:
2207		switch (attr) {
2208		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2209			*(int *)data = !amd_iommu_unmap_flush;
2210			return 0;
2211		default:
2212			return -ENODEV;
2213		}
2214		break;
2215	default:
2216		return -EINVAL;
2217	}
2218}
2219
2220/*****************************************************************************
2221 *
2222 * The next functions belong to the dma_ops mapping/unmapping code.
2223 *
2224 *****************************************************************************/
2225
2226static void update_device_table(struct protection_domain *domain,
2227				struct domain_pgtable *pgtable)
2228{
2229	struct iommu_dev_data *dev_data;
2230
2231	list_for_each_entry(dev_data, &domain->dev_list, list) {
2232		set_dte_entry(dev_data->devid, domain, pgtable,
2233			      dev_data->ats.enabled, dev_data->iommu_v2);
2234		clone_aliases(dev_data->pdev);
2235	}
2236}
2237
2238static void update_and_flush_device_table(struct protection_domain *domain,
2239					  struct domain_pgtable *pgtable)
2240{
2241	update_device_table(domain, pgtable);
2242	domain_flush_devices(domain);
2243}
2244
2245static void update_domain(struct protection_domain *domain)
2246{
2247	struct domain_pgtable pgtable;
2248
2249	/* Update device table */
2250	amd_iommu_domain_get_pgtable(domain, &pgtable);
2251	update_and_flush_device_table(domain, &pgtable);
2252
2253	/* Flush domain TLB(s) and wait for completion */
2254	domain_flush_tlb_pde(domain);
2255	domain_flush_complete(domain);
2256}
2257
2258int __init amd_iommu_init_api(void)
2259{
2260	int ret, err = 0;
2261
2262	ret = iova_cache_get();
2263	if (ret)
2264		return ret;
2265
2266	err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
2267	if (err)
2268		return err;
2269#ifdef CONFIG_ARM_AMBA
2270	err = bus_set_iommu(&amba_bustype, &amd_iommu_ops);
2271	if (err)
2272		return err;
2273#endif
2274	err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops);
2275	if (err)
2276		return err;
2277
2278	return 0;
2279}
2280
2281int __init amd_iommu_init_dma_ops(void)
2282{
2283	swiotlb        = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
2284
2285	if (amd_iommu_unmap_flush)
2286		pr_info("IO/TLB flush on unmap enabled\n");
2287	else
2288		pr_info("Lazy IO/TLB flushing enabled\n");
2289
2290	return 0;
2291
2292}
2293
2294/*****************************************************************************
2295 *
2296 * The following functions belong to the exported interface of AMD IOMMU
2297 *
2298 * This interface allows access to lower level functions of the IOMMU
2299 * like protection domain handling and assignement of devices to domains
2300 * which is not possible with the dma_ops interface.
2301 *
2302 *****************************************************************************/
2303
2304static void cleanup_domain(struct protection_domain *domain)
2305{
2306	struct iommu_dev_data *entry;
2307	unsigned long flags;
 
 
 
 
2308
2309	spin_lock_irqsave(&domain->lock, flags);
2310
2311	while (!list_empty(&domain->dev_list)) {
2312		entry = list_first_entry(&domain->dev_list,
2313					 struct iommu_dev_data, list);
2314		BUG_ON(!entry->domain);
2315		do_detach(entry);
2316	}
2317
2318	spin_unlock_irqrestore(&domain->lock, flags);
2319}
2320
2321static void protection_domain_free(struct protection_domain *domain)
2322{
2323	struct domain_pgtable pgtable;
 
2324
 
2325	if (!domain)
2326		return;
2327
2328	if (domain->id)
2329		domain_id_free(domain->id);
 
 
 
 
2330
2331	amd_iommu_domain_get_pgtable(domain, &pgtable);
2332	amd_iommu_domain_clr_pt_root(domain);
2333	free_pagetable(&pgtable);
2334
2335	kfree(domain);
2336}
2337
2338static int protection_domain_init(struct protection_domain *domain, int mode)
2339{
2340	u64 *pt_root = NULL;
 
2341
2342	BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL);
 
 
 
 
 
 
 
2343
2344	spin_lock_init(&domain->lock);
2345	domain->id = domain_id_alloc();
2346	if (!domain->id)
2347		return -ENOMEM;
2348	INIT_LIST_HEAD(&domain->dev_list);
2349
2350	if (mode != PAGE_MODE_NONE) {
2351		pt_root = (void *)get_zeroed_page(GFP_KERNEL);
2352		if (!pt_root)
2353			return -ENOMEM;
2354	}
2355
2356	amd_iommu_domain_set_pgtable(domain, pt_root, mode);
2357
2358	return 0;
2359}
2360
2361static struct protection_domain *protection_domain_alloc(int mode)
2362{
2363	struct protection_domain *domain;
 
2364
2365	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
2366	if (!domain)
2367		return NULL;
2368
2369	if (protection_domain_init(domain, mode))
2370		goto out_err;
2371
2372	return domain;
2373
2374out_err:
2375	kfree(domain);
2376
2377	return NULL;
2378}
2379
2380static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
 
 
2381{
 
 
2382	struct protection_domain *domain;
2383	int mode = DEFAULT_PGTABLE_LEVEL;
2384
2385	if (type == IOMMU_DOMAIN_IDENTITY)
2386		mode = PAGE_MODE_NONE;
 
2387
2388	domain = protection_domain_alloc(mode);
2389	if (!domain)
2390		return NULL;
 
 
 
 
2391
2392	domain->domain.geometry.aperture_start = 0;
2393	domain->domain.geometry.aperture_end   = ~0ULL;
2394	domain->domain.geometry.force_aperture = true;
 
 
 
 
2395
2396	if (type == IOMMU_DOMAIN_DMA &&
2397	    iommu_get_dma_cookie(&domain->domain) == -ENOMEM)
2398		goto free_domain;
2399
2400	return &domain->domain;
 
2401
2402free_domain:
2403	protection_domain_free(domain);
2404
2405	return NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2406}
2407
2408static void amd_iommu_domain_free(struct iommu_domain *dom)
2409{
2410	struct protection_domain *domain;
2411
2412	domain = to_pdomain(dom);
 
2413
2414	if (domain->dev_cnt > 0)
2415		cleanup_domain(domain);
 
 
2416
2417	BUG_ON(domain->dev_cnt != 0);
 
2418
2419	if (!dom)
2420		return;
 
 
2421
2422	if (dom->type == IOMMU_DOMAIN_DMA)
2423		iommu_put_dma_cookie(&domain->domain);
2424
2425	if (domain->flags & PD_IOMMUV2_MASK)
2426		free_gcr3_table(domain);
2427
2428	protection_domain_free(domain);
2429}
2430
2431static void amd_iommu_detach_device(struct iommu_domain *dom,
2432				    struct device *dev)
2433{
2434	struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
2435	struct amd_iommu *iommu;
2436	int devid;
2437
2438	if (!check_device(dev))
2439		return;
2440
2441	devid = get_device_id(dev);
2442	if (devid < 0)
2443		return;
2444
2445	if (dev_data->domain != NULL)
2446		detach_device(dev);
 
2447
2448	iommu = amd_iommu_rlookup_table[devid];
2449	if (!iommu)
2450		return;
2451
2452#ifdef CONFIG_IRQ_REMAP
2453	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
2454	    (dom->type == IOMMU_DOMAIN_UNMANAGED))
2455		dev_data->use_vapic = 0;
2456#endif
2457
2458	iommu_completion_wait(iommu);
2459}
2460
 
 
 
 
 
 
 
 
2461static int amd_iommu_attach_device(struct iommu_domain *dom,
2462				   struct device *dev)
2463{
 
2464	struct protection_domain *domain = to_pdomain(dom);
2465	struct iommu_dev_data *dev_data;
2466	struct amd_iommu *iommu;
2467	int ret;
2468
2469	if (!check_device(dev))
2470		return -EINVAL;
 
 
 
 
2471
2472	dev_data = dev_iommu_priv_get(dev);
2473	dev_data->defer_attach = false;
2474
2475	iommu = amd_iommu_rlookup_table[dev_data->devid];
2476	if (!iommu)
 
 
 
2477		return -EINVAL;
2478
2479	if (dev_data->domain)
2480		detach_device(dev);
2481
2482	ret = attach_device(dev, domain);
2483
2484#ifdef CONFIG_IRQ_REMAP
2485	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) {
2486		if (dom->type == IOMMU_DOMAIN_UNMANAGED)
2487			dev_data->use_vapic = 1;
2488		else
2489			dev_data->use_vapic = 0;
2490	}
2491#endif
2492
2493	iommu_completion_wait(iommu);
 
 
 
 
 
 
 
2494
2495	return ret;
 
 
2496}
2497
2498static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
2499			 phys_addr_t paddr, size_t page_size, int iommu_prot,
2500			 gfp_t gfp)
2501{
2502	struct protection_domain *domain = to_pdomain(dom);
2503	struct domain_pgtable pgtable;
2504	int prot = 0;
2505	int ret;
2506
2507	amd_iommu_domain_get_pgtable(domain, &pgtable);
2508	if (pgtable.mode == PAGE_MODE_NONE)
2509		return -EINVAL;
2510
2511	if (iommu_prot & IOMMU_READ)
2512		prot |= IOMMU_PROT_IR;
2513	if (iommu_prot & IOMMU_WRITE)
2514		prot |= IOMMU_PROT_IW;
2515
2516	ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp);
 
 
 
 
 
 
2517
2518	domain_flush_np_cache(domain, iova, page_size);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2519
2520	return ret;
2521}
2522
2523static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
2524			      size_t page_size,
2525			      struct iommu_iotlb_gather *gather)
2526{
2527	struct protection_domain *domain = to_pdomain(dom);
2528	struct domain_pgtable pgtable;
 
2529
2530	amd_iommu_domain_get_pgtable(domain, &pgtable);
2531	if (pgtable.mode == PAGE_MODE_NONE)
2532		return 0;
2533
2534	return iommu_unmap_page(domain, iova, page_size);
 
 
 
 
 
2535}
2536
2537static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
2538					  dma_addr_t iova)
2539{
2540	struct protection_domain *domain = to_pdomain(dom);
2541	unsigned long offset_mask, pte_pgsize;
2542	struct domain_pgtable pgtable;
2543	u64 *pte, __pte;
2544
2545	amd_iommu_domain_get_pgtable(domain, &pgtable);
2546	if (pgtable.mode == PAGE_MODE_NONE)
2547		return iova;
2548
2549	pte = fetch_pte(domain, iova, &pte_pgsize);
2550
2551	if (!pte || !IOMMU_PTE_PRESENT(*pte))
2552		return 0;
2553
2554	offset_mask = pte_pgsize - 1;
2555	__pte	    = __sme_clr(*pte & PM_ADDR_MASK);
2556
2557	return (__pte & ~offset_mask) | (iova & offset_mask);
2558}
2559
2560static bool amd_iommu_capable(enum iommu_cap cap)
2561{
2562	switch (cap) {
2563	case IOMMU_CAP_CACHE_COHERENCY:
2564		return true;
2565	case IOMMU_CAP_INTR_REMAP:
2566		return (irq_remapping_enabled == 1);
2567	case IOMMU_CAP_NOEXEC:
2568		return false;
 
 
 
 
 
 
 
 
 
 
 
2569	default:
2570		break;
2571	}
2572
2573	return false;
2574}
2575
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2576static void amd_iommu_get_resv_regions(struct device *dev,
2577				       struct list_head *head)
2578{
2579	struct iommu_resv_region *region;
2580	struct unity_map_entry *entry;
2581	int devid;
 
 
2582
2583	devid = get_device_id(dev);
2584	if (devid < 0)
2585		return;
2586
2587	list_for_each_entry(entry, &amd_iommu_unity_map, list) {
 
 
 
 
2588		int type, prot = 0;
2589		size_t length;
2590
2591		if (devid < entry->devid_start || devid > entry->devid_end)
2592			continue;
2593
2594		type   = IOMMU_RESV_DIRECT;
2595		length = entry->address_end - entry->address_start;
2596		if (entry->prot & IOMMU_PROT_IR)
2597			prot |= IOMMU_READ;
2598		if (entry->prot & IOMMU_PROT_IW)
2599			prot |= IOMMU_WRITE;
2600		if (entry->prot & IOMMU_UNITY_MAP_FLAG_EXCL_RANGE)
2601			/* Exclusion range */
2602			type = IOMMU_RESV_RESERVED;
2603
2604		region = iommu_alloc_resv_region(entry->address_start,
2605						 length, prot, type);
 
2606		if (!region) {
2607			dev_err(dev, "Out of memory allocating dm-regions\n");
2608			return;
2609		}
2610		list_add_tail(&region->list, head);
2611	}
2612
2613	region = iommu_alloc_resv_region(MSI_RANGE_START,
2614					 MSI_RANGE_END - MSI_RANGE_START + 1,
2615					 0, IOMMU_RESV_MSI);
2616	if (!region)
2617		return;
2618	list_add_tail(&region->list, head);
2619
2620	region = iommu_alloc_resv_region(HT_RANGE_START,
2621					 HT_RANGE_END - HT_RANGE_START + 1,
2622					 0, IOMMU_RESV_RESERVED);
2623	if (!region)
2624		return;
2625	list_add_tail(&region->list, head);
2626}
2627
2628bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
2629				  struct device *dev)
2630{
2631	struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
2632
2633	return dev_data->defer_attach;
2634}
2635EXPORT_SYMBOL_GPL(amd_iommu_is_attach_deferred);
2636
2637static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
2638{
2639	struct protection_domain *dom = to_pdomain(domain);
2640	unsigned long flags;
2641
2642	spin_lock_irqsave(&dom->lock, flags);
2643	domain_flush_tlb_pde(dom);
2644	domain_flush_complete(dom);
2645	spin_unlock_irqrestore(&dom->lock, flags);
2646}
2647
2648static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
2649				 struct iommu_iotlb_gather *gather)
2650{
2651	amd_iommu_flush_iotlb_all(domain);
 
 
 
 
 
 
2652}
2653
2654static int amd_iommu_def_domain_type(struct device *dev)
2655{
2656	struct iommu_dev_data *dev_data;
2657
2658	dev_data = dev_iommu_priv_get(dev);
2659	if (!dev_data)
2660		return 0;
2661
 
 
 
 
2662	/*
2663	 * Do not identity map IOMMUv2 capable devices when memory encryption is
2664	 * active, because some of those devices (AMD GPUs) don't have the
2665	 * encryption bit in their DMA-mask and require remapping.
 
 
2666	 */
2667	if (!mem_encrypt_active() && dev_data->iommu_v2)
 
 
2668		return IOMMU_DOMAIN_IDENTITY;
 
2669
2670	return 0;
2671}
2672
2673const struct iommu_ops amd_iommu_ops = {
2674	.capable = amd_iommu_capable,
2675	.domain_alloc = amd_iommu_domain_alloc,
2676	.domain_free  = amd_iommu_domain_free,
2677	.attach_dev = amd_iommu_attach_device,
2678	.detach_dev = amd_iommu_detach_device,
2679	.map = amd_iommu_map,
2680	.unmap = amd_iommu_unmap,
2681	.iova_to_phys = amd_iommu_iova_to_phys,
2682	.probe_device = amd_iommu_probe_device,
2683	.release_device = amd_iommu_release_device,
2684	.probe_finalize = amd_iommu_probe_finalize,
2685	.device_group = amd_iommu_device_group,
2686	.domain_get_attr = amd_iommu_domain_get_attr,
2687	.get_resv_regions = amd_iommu_get_resv_regions,
2688	.put_resv_regions = generic_iommu_put_resv_regions,
2689	.is_attach_deferred = amd_iommu_is_attach_deferred,
2690	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
2691	.flush_iotlb_all = amd_iommu_flush_iotlb_all,
2692	.iotlb_sync = amd_iommu_iotlb_sync,
2693	.def_domain_type = amd_iommu_def_domain_type,
2694};
2695
2696/*****************************************************************************
2697 *
2698 * The next functions do a basic initialization of IOMMU for pass through
2699 * mode
2700 *
2701 * In passthrough mode the IOMMU is initialized and enabled but not used for
2702 * DMA-API translation.
2703 *
2704 *****************************************************************************/
2705
2706/* IOMMUv2 specific functions */
2707int amd_iommu_register_ppr_notifier(struct notifier_block *nb)
2708{
2709	return atomic_notifier_chain_register(&ppr_notifier, nb);
 
2710}
2711EXPORT_SYMBOL(amd_iommu_register_ppr_notifier);
2712
2713int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb)
2714{
2715	return atomic_notifier_chain_unregister(&ppr_notifier, nb);
2716}
2717EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
2718
2719void amd_iommu_domain_direct_map(struct iommu_domain *dom)
 
2720{
2721	struct protection_domain *domain = to_pdomain(dom);
2722	struct domain_pgtable pgtable;
2723	unsigned long flags;
2724
2725	spin_lock_irqsave(&domain->lock, flags);
2726
2727	/* First save pgtable configuration*/
2728	amd_iommu_domain_get_pgtable(domain, &pgtable);
2729
2730	/* Remove page-table from domain */
2731	amd_iommu_domain_clr_pt_root(domain);
2732
2733	/* Make changes visible to IOMMUs */
2734	update_domain(domain);
2735
2736	/* Page-table is not visible to IOMMU anymore, so free it */
2737	free_pagetable(&pgtable);
2738
2739	spin_unlock_irqrestore(&domain->lock, flags);
2740}
2741EXPORT_SYMBOL(amd_iommu_domain_direct_map);
2742
2743int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
2744{
2745	struct protection_domain *domain = to_pdomain(dom);
2746	unsigned long flags;
2747	int levels, ret;
2748
2749	if (pasids <= 0 || pasids > (PASID_MASK + 1))
2750		return -EINVAL;
2751
2752	/* Number of GCR3 table levels required */
2753	for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9)
2754		levels += 1;
2755
2756	if (levels > amd_iommu_max_glx_val)
2757		return -EINVAL;
2758
2759	spin_lock_irqsave(&domain->lock, flags);
2760
2761	/*
2762	 * Save us all sanity checks whether devices already in the
2763	 * domain support IOMMUv2. Just force that the domain has no
2764	 * devices attached when it is switched into IOMMUv2 mode.
2765	 */
2766	ret = -EBUSY;
2767	if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK)
2768		goto out;
2769
2770	ret = -ENOMEM;
2771	domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
2772	if (domain->gcr3_tbl == NULL)
2773		goto out;
2774
2775	domain->glx      = levels;
2776	domain->flags   |= PD_IOMMUV2_MASK;
2777
2778	update_domain(domain);
2779
2780	ret = 0;
2781
2782out:
2783	spin_unlock_irqrestore(&domain->lock, flags);
2784
 
 
 
 
 
 
 
 
2785	return ret;
2786}
2787EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
2788
2789static int __flush_pasid(struct protection_domain *domain, int pasid,
2790			 u64 address, bool size)
2791{
2792	struct iommu_dev_data *dev_data;
2793	struct iommu_cmd cmd;
2794	int i, ret;
2795
2796	if (!(domain->flags & PD_IOMMUV2_MASK))
2797		return -EINVAL;
2798
2799	build_inv_iommu_pasid(&cmd, domain->id, pasid, address, size);
2800
2801	/*
2802	 * IOMMU TLB needs to be flushed before Device TLB to
2803	 * prevent device TLB refill from IOMMU TLB
2804	 */
2805	for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
2806		if (domain->dev_iommu[i] == 0)
2807			continue;
2808
2809		ret = iommu_queue_command(amd_iommus[i], &cmd);
2810		if (ret != 0)
2811			goto out;
2812	}
2813
2814	/* Wait until IOMMU TLB flushes are complete */
2815	domain_flush_complete(domain);
2816
2817	/* Now flush device TLBs */
2818	list_for_each_entry(dev_data, &domain->dev_list, list) {
2819		struct amd_iommu *iommu;
2820		int qdep;
2821
2822		/*
2823		   There might be non-IOMMUv2 capable devices in an IOMMUv2
2824		 * domain.
2825		 */
2826		if (!dev_data->ats.enabled)
2827			continue;
2828
2829		qdep  = dev_data->ats.qdep;
2830		iommu = amd_iommu_rlookup_table[dev_data->devid];
2831
2832		build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid,
2833				      qdep, address, size);
2834
2835		ret = iommu_queue_command(iommu, &cmd);
2836		if (ret != 0)
2837			goto out;
2838	}
2839
2840	/* Wait until all device TLBs are flushed */
2841	domain_flush_complete(domain);
2842
2843	ret = 0;
2844
2845out:
2846
2847	return ret;
2848}
2849
2850static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid,
2851				  u64 address)
2852{
2853	return __flush_pasid(domain, pasid, address, false);
2854}
2855
2856int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
2857			 u64 address)
2858{
2859	struct protection_domain *domain = to_pdomain(dom);
2860	unsigned long flags;
2861	int ret;
2862
2863	spin_lock_irqsave(&domain->lock, flags);
2864	ret = __amd_iommu_flush_page(domain, pasid, address);
2865	spin_unlock_irqrestore(&domain->lock, flags);
2866
2867	return ret;
2868}
2869EXPORT_SYMBOL(amd_iommu_flush_page);
2870
2871static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid)
2872{
2873	return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
2874			     true);
2875}
2876
2877int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid)
2878{
2879	struct protection_domain *domain = to_pdomain(dom);
2880	unsigned long flags;
2881	int ret;
2882
2883	spin_lock_irqsave(&domain->lock, flags);
2884	ret = __amd_iommu_flush_tlb(domain, pasid);
2885	spin_unlock_irqrestore(&domain->lock, flags);
2886
2887	return ret;
2888}
2889EXPORT_SYMBOL(amd_iommu_flush_tlb);
2890
2891static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
2892{
2893	int index;
2894	u64 *pte;
2895
2896	while (true) {
2897
2898		index = (pasid >> (9 * level)) & 0x1ff;
2899		pte   = &root[index];
2900
2901		if (level == 0)
2902			break;
2903
2904		if (!(*pte & GCR3_VALID)) {
2905			if (!alloc)
2906				return NULL;
2907
2908			root = (void *)get_zeroed_page(GFP_ATOMIC);
2909			if (root == NULL)
2910				return NULL;
2911
2912			*pte = iommu_virt_to_phys(root) | GCR3_VALID;
2913		}
2914
2915		root = iommu_phys_to_virt(*pte & PAGE_MASK);
2916
2917		level -= 1;
2918	}
 
2919
2920	return pte;
2921}
2922
2923static int __set_gcr3(struct protection_domain *domain, int pasid,
2924		      unsigned long cr3)
2925{
2926	struct domain_pgtable pgtable;
2927	u64 *pte;
2928
2929	amd_iommu_domain_get_pgtable(domain, &pgtable);
2930	if (pgtable.mode != PAGE_MODE_NONE)
2931		return -EINVAL;
2932
2933	pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
2934	if (pte == NULL)
2935		return -ENOMEM;
2936
2937	*pte = (cr3 & PAGE_MASK) | GCR3_VALID;
2938
2939	return __amd_iommu_flush_tlb(domain, pasid);
2940}
2941
2942static int __clear_gcr3(struct protection_domain *domain, int pasid)
2943{
2944	struct domain_pgtable pgtable;
2945	u64 *pte;
2946
2947	amd_iommu_domain_get_pgtable(domain, &pgtable);
2948	if (pgtable.mode != PAGE_MODE_NONE)
2949		return -EINVAL;
2950
2951	pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
2952	if (pte == NULL)
2953		return 0;
2954
2955	*pte = 0;
2956
2957	return __amd_iommu_flush_tlb(domain, pasid);
2958}
2959
2960int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
2961			      unsigned long cr3)
2962{
2963	struct protection_domain *domain = to_pdomain(dom);
2964	unsigned long flags;
2965	int ret;
2966
2967	spin_lock_irqsave(&domain->lock, flags);
2968	ret = __set_gcr3(domain, pasid, cr3);
2969	spin_unlock_irqrestore(&domain->lock, flags);
2970
2971	return ret;
2972}
2973EXPORT_SYMBOL(amd_iommu_domain_set_gcr3);
2974
2975int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid)
2976{
2977	struct protection_domain *domain = to_pdomain(dom);
2978	unsigned long flags;
2979	int ret;
2980
2981	spin_lock_irqsave(&domain->lock, flags);
2982	ret = __clear_gcr3(domain, pasid);
2983	spin_unlock_irqrestore(&domain->lock, flags);
2984
2985	return ret;
2986}
2987EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
2988
2989int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
2990			   int status, int tag)
2991{
2992	struct iommu_dev_data *dev_data;
2993	struct amd_iommu *iommu;
2994	struct iommu_cmd cmd;
2995
2996	dev_data = dev_iommu_priv_get(&pdev->dev);
2997	iommu    = amd_iommu_rlookup_table[dev_data->devid];
2998
2999	build_complete_ppr(&cmd, dev_data->devid, pasid, status,
3000			   tag, dev_data->pri_tlp);
3001
3002	return iommu_queue_command(iommu, &cmd);
3003}
3004EXPORT_SYMBOL(amd_iommu_complete_ppr);
3005
3006struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
3007{
3008	struct protection_domain *pdomain;
3009	struct iommu_dev_data *dev_data;
3010	struct device *dev = &pdev->dev;
3011	struct iommu_domain *io_domain;
3012
3013	if (!check_device(dev))
3014		return NULL;
3015
3016	dev_data  = dev_iommu_priv_get(&pdev->dev);
3017	pdomain   = dev_data->domain;
3018	io_domain = iommu_get_domain_for_dev(dev);
3019
3020	if (pdomain == NULL && dev_data->defer_attach) {
3021		dev_data->defer_attach = false;
3022		pdomain = to_pdomain(io_domain);
3023		attach_device(dev, pdomain);
3024	}
3025
3026	if (pdomain == NULL)
3027		return NULL;
3028
3029	if (io_domain->type != IOMMU_DOMAIN_DMA)
3030		return NULL;
3031
3032	/* Only return IOMMUv2 domains */
3033	if (!(pdomain->flags & PD_IOMMUV2_MASK))
3034		return NULL;
3035
3036	return &pdomain->domain;
3037}
3038EXPORT_SYMBOL(amd_iommu_get_v2_domain);
3039
3040void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum)
3041{
3042	struct iommu_dev_data *dev_data;
3043
3044	if (!amd_iommu_v2_supported())
3045		return;
3046
3047	dev_data = dev_iommu_priv_get(&pdev->dev);
3048	dev_data->errata |= (1 << erratum);
3049}
3050EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
3051
3052int amd_iommu_device_info(struct pci_dev *pdev,
3053                          struct amd_iommu_device_info *info)
3054{
3055	int max_pasids;
3056	int pos;
3057
3058	if (pdev == NULL || info == NULL)
3059		return -EINVAL;
3060
3061	if (!amd_iommu_v2_supported())
3062		return -EINVAL;
3063
3064	memset(info, 0, sizeof(*info));
3065
3066	if (pci_ats_supported(pdev))
3067		info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
3068
3069	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
3070	if (pos)
3071		info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
3072
3073	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
3074	if (pos) {
3075		int features;
3076
3077		max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1));
3078		max_pasids = min(max_pasids, (1 << 20));
3079
3080		info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
3081		info->max_pasids = min(pci_max_pasids(pdev), max_pasids);
3082
3083		features = pci_pasid_features(pdev);
3084		if (features & PCI_PASID_CAP_EXEC)
3085			info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
3086		if (features & PCI_PASID_CAP_PRIV)
3087			info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
3088	}
3089
3090	return 0;
3091}
3092EXPORT_SYMBOL(amd_iommu_device_info);
3093
3094#ifdef CONFIG_IRQ_REMAP
3095
3096/*****************************************************************************
3097 *
3098 * Interrupt Remapping Implementation
3099 *
3100 *****************************************************************************/
3101
3102static struct irq_chip amd_ir_chip;
3103static DEFINE_SPINLOCK(iommu_table_lock);
3104
3105static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
3106{
3107	u64 dte;
 
3108
3109	dte	= amd_iommu_dev_table[devid].data[2];
3110	dte	&= ~DTE_IRQ_PHYS_ADDR_MASK;
3111	dte	|= iommu_virt_to_phys(table->table);
3112	dte	|= DTE_IRQ_REMAP_INTCTL;
3113	dte	|= DTE_IRQ_TABLE_LEN;
3114	dte	|= DTE_IRQ_REMAP_ENABLE;
3115
3116	amd_iommu_dev_table[devid].data[2] = dte;
3117}
3118
3119static struct irq_remap_table *get_irq_table(u16 devid)
3120{
3121	struct irq_remap_table *table;
 
3122
3123	if (WARN_ONCE(!amd_iommu_rlookup_table[devid],
3124		      "%s: no iommu for devid %x\n", __func__, devid))
 
3125		return NULL;
3126
3127	table = irq_lookup_table[devid];
3128	if (WARN_ONCE(!table, "%s: no table for devid %x\n", __func__, devid))
 
3129		return NULL;
3130
3131	return table;
3132}
3133
3134static struct irq_remap_table *__alloc_irq_table(void)
3135{
3136	struct irq_remap_table *table;
3137
3138	table = kzalloc(sizeof(*table), GFP_KERNEL);
3139	if (!table)
3140		return NULL;
3141
3142	table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL);
3143	if (!table->table) {
3144		kfree(table);
3145		return NULL;
3146	}
3147	raw_spin_lock_init(&table->lock);
3148
3149	if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
3150		memset(table->table, 0,
3151		       MAX_IRQS_PER_TABLE * sizeof(u32));
3152	else
3153		memset(table->table, 0,
3154		       (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
3155	return table;
3156}
3157
3158static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid,
3159				  struct irq_remap_table *table)
3160{
3161	irq_lookup_table[devid] = table;
3162	set_dte_irq_entry(devid, table);
 
 
3163	iommu_flush_dte(iommu, devid);
3164}
3165
3166static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias,
3167				       void *data)
3168{
3169	struct irq_remap_table *table = data;
 
 
3170
3171	irq_lookup_table[alias] = table;
3172	set_dte_irq_entry(alias, table);
3173
3174	iommu_flush_dte(amd_iommu_rlookup_table[alias], alias);
 
 
 
3175
3176	return 0;
3177}
3178
3179static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev)
 
3180{
3181	struct irq_remap_table *table = NULL;
3182	struct irq_remap_table *new_table = NULL;
3183	struct amd_iommu *iommu;
3184	unsigned long flags;
3185	u16 alias;
3186
3187	spin_lock_irqsave(&iommu_table_lock, flags);
3188
3189	iommu = amd_iommu_rlookup_table[devid];
3190	if (!iommu)
3191		goto out_unlock;
3192
3193	table = irq_lookup_table[devid];
3194	if (table)
3195		goto out_unlock;
3196
3197	alias = amd_iommu_alias_table[devid];
3198	table = irq_lookup_table[alias];
3199	if (table) {
3200		set_remap_table_entry(iommu, devid, table);
3201		goto out_wait;
3202	}
3203	spin_unlock_irqrestore(&iommu_table_lock, flags);
3204
3205	/* Nothing there yet, allocate new irq remapping table */
3206	new_table = __alloc_irq_table();
3207	if (!new_table)
3208		return NULL;
3209
3210	spin_lock_irqsave(&iommu_table_lock, flags);
3211
3212	table = irq_lookup_table[devid];
3213	if (table)
3214		goto out_unlock;
3215
3216	table = irq_lookup_table[alias];
3217	if (table) {
3218		set_remap_table_entry(iommu, devid, table);
3219		goto out_wait;
3220	}
3221
3222	table = new_table;
3223	new_table = NULL;
3224
3225	if (pdev)
3226		pci_for_each_dma_alias(pdev, set_remap_table_entry_alias,
3227				       table);
3228	else
3229		set_remap_table_entry(iommu, devid, table);
3230
3231	if (devid != alias)
3232		set_remap_table_entry(iommu, alias, table);
3233
3234out_wait:
3235	iommu_completion_wait(iommu);
3236
3237out_unlock:
3238	spin_unlock_irqrestore(&iommu_table_lock, flags);
3239
3240	if (new_table) {
3241		kmem_cache_free(amd_iommu_irq_cache, new_table->table);
3242		kfree(new_table);
3243	}
3244	return table;
3245}
3246
3247static int alloc_irq_index(u16 devid, int count, bool align,
3248			   struct pci_dev *pdev)
3249{
3250	struct irq_remap_table *table;
3251	int index, c, alignment = 1;
3252	unsigned long flags;
3253	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
3254
3255	if (!iommu)
3256		return -ENODEV;
3257
3258	table = alloc_irq_table(devid, pdev);
3259	if (!table)
3260		return -ENODEV;
3261
3262	if (align)
3263		alignment = roundup_pow_of_two(count);
3264
3265	raw_spin_lock_irqsave(&table->lock, flags);
3266
3267	/* Scan table for free entries */
3268	for (index = ALIGN(table->min_index, alignment), c = 0;
3269	     index < MAX_IRQS_PER_TABLE;) {
3270		if (!iommu->irte_ops->is_allocated(table, index)) {
3271			c += 1;
3272		} else {
3273			c     = 0;
3274			index = ALIGN(index + 1, alignment);
3275			continue;
3276		}
3277
3278		if (c == count)	{
3279			for (; c != 0; --c)
3280				iommu->irte_ops->set_allocated(table, index - c + 1);
3281
3282			index -= count - 1;
3283			goto out;
3284		}
3285
3286		index++;
3287	}
3288
3289	index = -ENOSPC;
3290
3291out:
3292	raw_spin_unlock_irqrestore(&table->lock, flags);
3293
3294	return index;
3295}
3296
3297static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte,
3298			  struct amd_ir_data *data)
3299{
3300	bool ret;
3301	struct irq_remap_table *table;
3302	struct amd_iommu *iommu;
3303	unsigned long flags;
3304	struct irte_ga *entry;
3305
3306	iommu = amd_iommu_rlookup_table[devid];
3307	if (iommu == NULL)
3308		return -EINVAL;
3309
3310	table = get_irq_table(devid);
3311	if (!table)
3312		return -ENOMEM;
3313
3314	raw_spin_lock_irqsave(&table->lock, flags);
3315
3316	entry = (struct irte_ga *)table->table;
3317	entry = &entry[index];
3318
3319	ret = cmpxchg_double(&entry->lo.val, &entry->hi.val,
3320			     entry->lo.val, entry->hi.val,
3321			     irte->lo.val, irte->hi.val);
3322	/*
3323	 * We use cmpxchg16 to atomically update the 128-bit IRTE,
3324	 * and it cannot be updated by the hardware or other processors
3325	 * behind us, so the return value of cmpxchg16 should be the
3326	 * same as the old value.
3327	 */
3328	WARN_ON(!ret);
 
3329
3330	if (data)
3331		data->ref = entry;
3332
3333	raw_spin_unlock_irqrestore(&table->lock, flags);
 
 
 
 
 
 
 
 
 
 
3334
3335	iommu_flush_irt(iommu, devid);
3336	iommu_completion_wait(iommu);
3337
3338	return 0;
3339}
3340
3341static int modify_irte(u16 devid, int index, union irte *irte)
 
3342{
3343	struct irq_remap_table *table;
3344	struct amd_iommu *iommu;
3345	unsigned long flags;
3346
3347	iommu = amd_iommu_rlookup_table[devid];
3348	if (iommu == NULL)
3349		return -EINVAL;
3350
3351	table = get_irq_table(devid);
3352	if (!table)
3353		return -ENOMEM;
3354
3355	raw_spin_lock_irqsave(&table->lock, flags);
3356	table->table[index] = irte->val;
3357	raw_spin_unlock_irqrestore(&table->lock, flags);
3358
3359	iommu_flush_irt(iommu, devid);
3360	iommu_completion_wait(iommu);
3361
3362	return 0;
3363}
3364
3365static void free_irte(u16 devid, int index)
3366{
3367	struct irq_remap_table *table;
3368	struct amd_iommu *iommu;
3369	unsigned long flags;
3370
3371	iommu = amd_iommu_rlookup_table[devid];
3372	if (iommu == NULL)
3373		return;
3374
3375	table = get_irq_table(devid);
3376	if (!table)
3377		return;
3378
3379	raw_spin_lock_irqsave(&table->lock, flags);
3380	iommu->irte_ops->clear_allocated(table, index);
3381	raw_spin_unlock_irqrestore(&table->lock, flags);
3382
3383	iommu_flush_irt(iommu, devid);
3384	iommu_completion_wait(iommu);
3385}
3386
3387static void irte_prepare(void *entry,
3388			 u32 delivery_mode, u32 dest_mode,
3389			 u8 vector, u32 dest_apicid, int devid)
3390{
3391	union irte *irte = (union irte *) entry;
3392
3393	irte->val                = 0;
3394	irte->fields.vector      = vector;
3395	irte->fields.int_type    = delivery_mode;
3396	irte->fields.destination = dest_apicid;
3397	irte->fields.dm          = dest_mode;
3398	irte->fields.valid       = 1;
3399}
3400
3401static void irte_ga_prepare(void *entry,
3402			    u32 delivery_mode, u32 dest_mode,
3403			    u8 vector, u32 dest_apicid, int devid)
3404{
3405	struct irte_ga *irte = (struct irte_ga *) entry;
3406
3407	irte->lo.val                      = 0;
3408	irte->hi.val                      = 0;
3409	irte->lo.fields_remap.int_type    = delivery_mode;
3410	irte->lo.fields_remap.dm          = dest_mode;
3411	irte->hi.fields.vector            = vector;
3412	irte->lo.fields_remap.destination = APICID_TO_IRTE_DEST_LO(dest_apicid);
3413	irte->hi.fields.destination       = APICID_TO_IRTE_DEST_HI(dest_apicid);
3414	irte->lo.fields_remap.valid       = 1;
3415}
3416
3417static void irte_activate(void *entry, u16 devid, u16 index)
3418{
3419	union irte *irte = (union irte *) entry;
3420
3421	irte->fields.valid = 1;
3422	modify_irte(devid, index, irte);
3423}
3424
3425static void irte_ga_activate(void *entry, u16 devid, u16 index)
3426{
3427	struct irte_ga *irte = (struct irte_ga *) entry;
3428
3429	irte->lo.fields_remap.valid = 1;
3430	modify_irte_ga(devid, index, irte, NULL);
3431}
3432
3433static void irte_deactivate(void *entry, u16 devid, u16 index)
3434{
3435	union irte *irte = (union irte *) entry;
3436
3437	irte->fields.valid = 0;
3438	modify_irte(devid, index, irte);
3439}
3440
3441static void irte_ga_deactivate(void *entry, u16 devid, u16 index)
3442{
3443	struct irte_ga *irte = (struct irte_ga *) entry;
3444
3445	irte->lo.fields_remap.valid = 0;
3446	modify_irte_ga(devid, index, irte, NULL);
3447}
3448
3449static void irte_set_affinity(void *entry, u16 devid, u16 index,
3450			      u8 vector, u32 dest_apicid)
3451{
3452	union irte *irte = (union irte *) entry;
3453
3454	irte->fields.vector = vector;
3455	irte->fields.destination = dest_apicid;
3456	modify_irte(devid, index, irte);
3457}
3458
3459static void irte_ga_set_affinity(void *entry, u16 devid, u16 index,
3460				 u8 vector, u32 dest_apicid)
3461{
3462	struct irte_ga *irte = (struct irte_ga *) entry;
3463
3464	if (!irte->lo.fields_remap.guest_mode) {
3465		irte->hi.fields.vector = vector;
3466		irte->lo.fields_remap.destination =
3467					APICID_TO_IRTE_DEST_LO(dest_apicid);
3468		irte->hi.fields.destination =
3469					APICID_TO_IRTE_DEST_HI(dest_apicid);
3470		modify_irte_ga(devid, index, irte, NULL);
3471	}
3472}
3473
3474#define IRTE_ALLOCATED (~1U)
3475static void irte_set_allocated(struct irq_remap_table *table, int index)
3476{
3477	table->table[index] = IRTE_ALLOCATED;
3478}
3479
3480static void irte_ga_set_allocated(struct irq_remap_table *table, int index)
3481{
3482	struct irte_ga *ptr = (struct irte_ga *)table->table;
3483	struct irte_ga *irte = &ptr[index];
3484
3485	memset(&irte->lo.val, 0, sizeof(u64));
3486	memset(&irte->hi.val, 0, sizeof(u64));
3487	irte->hi.fields.vector = 0xff;
3488}
3489
3490static bool irte_is_allocated(struct irq_remap_table *table, int index)
3491{
3492	union irte *ptr = (union irte *)table->table;
3493	union irte *irte = &ptr[index];
3494
3495	return irte->val != 0;
3496}
3497
3498static bool irte_ga_is_allocated(struct irq_remap_table *table, int index)
3499{
3500	struct irte_ga *ptr = (struct irte_ga *)table->table;
3501	struct irte_ga *irte = &ptr[index];
3502
3503	return irte->hi.fields.vector != 0;
3504}
3505
3506static void irte_clear_allocated(struct irq_remap_table *table, int index)
3507{
3508	table->table[index] = 0;
3509}
3510
3511static void irte_ga_clear_allocated(struct irq_remap_table *table, int index)
3512{
3513	struct irte_ga *ptr = (struct irte_ga *)table->table;
3514	struct irte_ga *irte = &ptr[index];
3515
3516	memset(&irte->lo.val, 0, sizeof(u64));
3517	memset(&irte->hi.val, 0, sizeof(u64));
3518}
3519
3520static int get_devid(struct irq_alloc_info *info)
3521{
3522	int devid = -1;
3523
3524	switch (info->type) {
3525	case X86_IRQ_ALLOC_TYPE_IOAPIC:
3526		devid     = get_ioapic_devid(info->ioapic_id);
3527		break;
3528	case X86_IRQ_ALLOC_TYPE_HPET:
3529		devid     = get_hpet_devid(info->hpet_id);
3530		break;
3531	case X86_IRQ_ALLOC_TYPE_MSI:
3532	case X86_IRQ_ALLOC_TYPE_MSIX:
3533		devid = get_device_id(&info->msi_dev->dev);
3534		break;
3535	default:
3536		BUG_ON(1);
3537		break;
3538	}
3539
3540	return devid;
3541}
3542
3543static struct irq_domain *get_ir_irq_domain(struct irq_alloc_info *info)
3544{
3545	struct amd_iommu *iommu;
3546	int devid;
3547
3548	if (!info)
3549		return NULL;
3550
3551	devid = get_devid(info);
3552	if (devid >= 0) {
3553		iommu = amd_iommu_rlookup_table[devid];
3554		if (iommu)
3555			return iommu->ir_domain;
3556	}
3557
3558	return NULL;
3559}
3560
3561static struct irq_domain *get_irq_domain(struct irq_alloc_info *info)
3562{
3563	struct amd_iommu *iommu;
3564	int devid;
3565
3566	if (!info)
3567		return NULL;
3568
3569	switch (info->type) {
3570	case X86_IRQ_ALLOC_TYPE_MSI:
3571	case X86_IRQ_ALLOC_TYPE_MSIX:
3572		devid = get_device_id(&info->msi_dev->dev);
3573		if (devid < 0)
3574			return NULL;
3575
3576		iommu = amd_iommu_rlookup_table[devid];
3577		if (iommu)
3578			return iommu->msi_domain;
3579		break;
3580	default:
3581		break;
 
3582	}
3583
3584	return NULL;
3585}
3586
3587struct irq_remap_ops amd_iommu_irq_ops = {
3588	.prepare		= amd_iommu_prepare,
3589	.enable			= amd_iommu_enable,
3590	.disable		= amd_iommu_disable,
3591	.reenable		= amd_iommu_reenable,
3592	.enable_faulting	= amd_iommu_enable_faulting,
3593	.get_ir_irq_domain	= get_ir_irq_domain,
3594	.get_irq_domain		= get_irq_domain,
3595};
3596
 
 
 
 
 
 
 
 
3597static void irq_remapping_prepare_irte(struct amd_ir_data *data,
3598				       struct irq_cfg *irq_cfg,
3599				       struct irq_alloc_info *info,
3600				       int devid, int index, int sub_handle)
3601{
3602	struct irq_2_irte *irte_info = &data->irq_2_irte;
3603	struct msi_msg *msg = &data->msi_entry;
3604	struct IO_APIC_route_entry *entry;
3605	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
3606
3607	if (!iommu)
3608		return;
3609
3610	data->irq_2_irte.devid = devid;
3611	data->irq_2_irte.index = index + sub_handle;
3612	iommu->irte_ops->prepare(data->entry, apic->irq_delivery_mode,
3613				 apic->irq_dest_mode, irq_cfg->vector,
3614				 irq_cfg->dest_apicid, devid);
3615
3616	switch (info->type) {
3617	case X86_IRQ_ALLOC_TYPE_IOAPIC:
3618		/* Setup IOAPIC entry */
3619		entry = info->ioapic_entry;
3620		info->ioapic_entry = NULL;
3621		memset(entry, 0, sizeof(*entry));
3622		entry->vector        = index;
3623		entry->mask          = 0;
3624		entry->trigger       = info->ioapic_trigger;
3625		entry->polarity      = info->ioapic_polarity;
3626		/* Mask level triggered irqs. */
3627		if (info->ioapic_trigger)
3628			entry->mask = 1;
3629		break;
3630
3631	case X86_IRQ_ALLOC_TYPE_HPET:
3632	case X86_IRQ_ALLOC_TYPE_MSI:
3633	case X86_IRQ_ALLOC_TYPE_MSIX:
3634		msg->address_hi = MSI_ADDR_BASE_HI;
3635		msg->address_lo = MSI_ADDR_BASE_LO;
3636		msg->data = irte_info->index;
3637		break;
3638
3639	default:
3640		BUG_ON(1);
3641		break;
3642	}
3643}
3644
3645struct amd_irte_ops irte_32_ops = {
3646	.prepare = irte_prepare,
3647	.activate = irte_activate,
3648	.deactivate = irte_deactivate,
3649	.set_affinity = irte_set_affinity,
3650	.set_allocated = irte_set_allocated,
3651	.is_allocated = irte_is_allocated,
3652	.clear_allocated = irte_clear_allocated,
3653};
3654
3655struct amd_irte_ops irte_128_ops = {
3656	.prepare = irte_ga_prepare,
3657	.activate = irte_ga_activate,
3658	.deactivate = irte_ga_deactivate,
3659	.set_affinity = irte_ga_set_affinity,
3660	.set_allocated = irte_ga_set_allocated,
3661	.is_allocated = irte_ga_is_allocated,
3662	.clear_allocated = irte_ga_clear_allocated,
3663};
3664
3665static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
3666			       unsigned int nr_irqs, void *arg)
3667{
3668	struct irq_alloc_info *info = arg;
3669	struct irq_data *irq_data;
3670	struct amd_ir_data *data = NULL;
 
3671	struct irq_cfg *cfg;
3672	int i, ret, devid;
3673	int index;
3674
3675	if (!info)
3676		return -EINVAL;
3677	if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_MSI &&
3678	    info->type != X86_IRQ_ALLOC_TYPE_MSIX)
3679		return -EINVAL;
3680
3681	/*
3682	 * With IRQ remapping enabled, don't need contiguous CPU vectors
3683	 * to support multiple MSI interrupts.
3684	 */
3685	if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
3686		info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
3687
3688	devid = get_devid(info);
3689	if (devid < 0)
 
 
3690		return -EINVAL;
3691
3692	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
3693	if (ret < 0)
3694		return ret;
3695
3696	if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
3697		struct irq_remap_table *table;
3698		struct amd_iommu *iommu;
3699
3700		table = alloc_irq_table(devid, NULL);
3701		if (table) {
3702			if (!table->min_index) {
3703				/*
3704				 * Keep the first 32 indexes free for IOAPIC
3705				 * interrupts.
3706				 */
3707				table->min_index = 32;
3708				iommu = amd_iommu_rlookup_table[devid];
3709				for (i = 0; i < 32; ++i)
3710					iommu->irte_ops->set_allocated(table, i);
3711			}
3712			WARN_ON(table->min_index != 32);
3713			index = info->ioapic_pin;
3714		} else {
3715			index = -ENOMEM;
3716		}
3717	} else if (info->type == X86_IRQ_ALLOC_TYPE_MSI ||
3718		   info->type == X86_IRQ_ALLOC_TYPE_MSIX) {
3719		bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);
3720
3721		index = alloc_irq_index(devid, nr_irqs, align, info->msi_dev);
 
3722	} else {
3723		index = alloc_irq_index(devid, nr_irqs, false, NULL);
3724	}
3725
3726	if (index < 0) {
3727		pr_warn("Failed to allocate IRTE\n");
3728		ret = index;
3729		goto out_free_parent;
3730	}
3731
3732	for (i = 0; i < nr_irqs; i++) {
3733		irq_data = irq_domain_get_irq_data(domain, virq + i);
3734		cfg = irqd_cfg(irq_data);
3735		if (!irq_data || !cfg) {
3736			ret = -EINVAL;
3737			goto out_free_data;
3738		}
3739
3740		ret = -ENOMEM;
3741		data = kzalloc(sizeof(*data), GFP_KERNEL);
3742		if (!data)
3743			goto out_free_data;
3744
3745		if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
3746			data->entry = kzalloc(sizeof(union irte), GFP_KERNEL);
3747		else
3748			data->entry = kzalloc(sizeof(struct irte_ga),
3749						     GFP_KERNEL);
3750		if (!data->entry) {
3751			kfree(data);
3752			goto out_free_data;
3753		}
3754
 
3755		irq_data->hwirq = (devid << 16) + i;
3756		irq_data->chip_data = data;
3757		irq_data->chip = &amd_ir_chip;
3758		irq_remapping_prepare_irte(data, cfg, info, devid, index, i);
3759		irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
3760	}
3761
3762	return 0;
3763
3764out_free_data:
3765	for (i--; i >= 0; i--) {
3766		irq_data = irq_domain_get_irq_data(domain, virq + i);
3767		if (irq_data)
3768			kfree(irq_data->chip_data);
3769	}
3770	for (i = 0; i < nr_irqs; i++)
3771		free_irte(devid, index + i);
3772out_free_parent:
3773	irq_domain_free_irqs_common(domain, virq, nr_irqs);
3774	return ret;
3775}
3776
3777static void irq_remapping_free(struct irq_domain *domain, unsigned int virq,
3778			       unsigned int nr_irqs)
3779{
3780	struct irq_2_irte *irte_info;
3781	struct irq_data *irq_data;
3782	struct amd_ir_data *data;
3783	int i;
3784
3785	for (i = 0; i < nr_irqs; i++) {
3786		irq_data = irq_domain_get_irq_data(domain, virq  + i);
3787		if (irq_data && irq_data->chip_data) {
3788			data = irq_data->chip_data;
3789			irte_info = &data->irq_2_irte;
3790			free_irte(irte_info->devid, irte_info->index);
3791			kfree(data->entry);
3792			kfree(data);
3793		}
3794	}
3795	irq_domain_free_irqs_common(domain, virq, nr_irqs);
3796}
3797
3798static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
3799			       struct amd_ir_data *ir_data,
3800			       struct irq_2_irte *irte_info,
3801			       struct irq_cfg *cfg);
3802
3803static int irq_remapping_activate(struct irq_domain *domain,
3804				  struct irq_data *irq_data, bool reserve)
3805{
3806	struct amd_ir_data *data = irq_data->chip_data;
3807	struct irq_2_irte *irte_info = &data->irq_2_irte;
3808	struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
3809	struct irq_cfg *cfg = irqd_cfg(irq_data);
3810
3811	if (!iommu)
3812		return 0;
3813
3814	iommu->irte_ops->activate(data->entry, irte_info->devid,
3815				  irte_info->index);
3816	amd_ir_update_irte(irq_data, iommu, data, irte_info, cfg);
3817	return 0;
3818}
3819
3820static void irq_remapping_deactivate(struct irq_domain *domain,
3821				     struct irq_data *irq_data)
3822{
3823	struct amd_ir_data *data = irq_data->chip_data;
3824	struct irq_2_irte *irte_info = &data->irq_2_irte;
3825	struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
3826
3827	if (iommu)
3828		iommu->irte_ops->deactivate(data->entry, irte_info->devid,
3829					    irte_info->index);
3830}
3831
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3832static const struct irq_domain_ops amd_ir_domain_ops = {
 
3833	.alloc = irq_remapping_alloc,
3834	.free = irq_remapping_free,
3835	.activate = irq_remapping_activate,
3836	.deactivate = irq_remapping_deactivate,
3837};
3838
3839int amd_iommu_activate_guest_mode(void *data)
3840{
3841	struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
3842	struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
3843	u64 valid;
3844
3845	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
3846	    !entry || entry->lo.fields_vapic.guest_mode)
3847		return 0;
3848
3849	valid = entry->lo.fields_vapic.valid;
3850
3851	entry->lo.val = 0;
3852	entry->hi.val = 0;
3853
3854	entry->lo.fields_vapic.valid       = valid;
3855	entry->lo.fields_vapic.guest_mode  = 1;
3856	entry->lo.fields_vapic.ga_log_intr = 1;
3857	entry->hi.fields.ga_root_ptr       = ir_data->ga_root_ptr;
3858	entry->hi.fields.vector            = ir_data->ga_vector;
3859	entry->lo.fields_vapic.ga_tag      = ir_data->ga_tag;
3860
3861	return modify_irte_ga(ir_data->irq_2_irte.devid,
3862			      ir_data->irq_2_irte.index, entry, ir_data);
3863}
3864EXPORT_SYMBOL(amd_iommu_activate_guest_mode);
3865
3866int amd_iommu_deactivate_guest_mode(void *data)
3867{
3868	struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
3869	struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
3870	struct irq_cfg *cfg = ir_data->cfg;
3871	u64 valid;
3872
3873	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
3874	    !entry || !entry->lo.fields_vapic.guest_mode)
3875		return 0;
3876
3877	valid = entry->lo.fields_remap.valid;
3878
3879	entry->lo.val = 0;
3880	entry->hi.val = 0;
3881
3882	entry->lo.fields_remap.valid       = valid;
3883	entry->lo.fields_remap.dm          = apic->irq_dest_mode;
3884	entry->lo.fields_remap.int_type    = apic->irq_delivery_mode;
3885	entry->hi.fields.vector            = cfg->vector;
3886	entry->lo.fields_remap.destination =
3887				APICID_TO_IRTE_DEST_LO(cfg->dest_apicid);
3888	entry->hi.fields.destination =
3889				APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
3890
3891	return modify_irte_ga(ir_data->irq_2_irte.devid,
3892			      ir_data->irq_2_irte.index, entry, ir_data);
3893}
3894EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
3895
3896static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
3897{
3898	int ret;
3899	struct amd_iommu *iommu;
3900	struct amd_iommu_pi_data *pi_data = vcpu_info;
3901	struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data;
3902	struct amd_ir_data *ir_data = data->chip_data;
3903	struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
3904	struct iommu_dev_data *dev_data = search_dev_data(irte_info->devid);
 
 
 
 
 
3905
3906	/* Note:
3907	 * This device has never been set up for guest mode.
3908	 * we should not modify the IRTE
3909	 */
3910	if (!dev_data || !dev_data->use_vapic)
3911		return 0;
3912
3913	ir_data->cfg = irqd_cfg(data);
3914	pi_data->ir_data = ir_data;
3915
3916	/* Note:
3917	 * SVM tries to set up for VAPIC mode, but we are in
3918	 * legacy mode. So, we force legacy mode instead.
3919	 */
3920	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) {
3921		pr_debug("%s: Fall back to using intr legacy remap\n",
3922			 __func__);
3923		pi_data->is_guest_mode = false;
3924	}
3925
3926	iommu = amd_iommu_rlookup_table[irte_info->devid];
3927	if (iommu == NULL)
3928		return -EINVAL;
3929
3930	pi_data->prev_ga_tag = ir_data->cached_ga_tag;
3931	if (pi_data->is_guest_mode) {
3932		ir_data->ga_root_ptr = (pi_data->base >> 12);
3933		ir_data->ga_vector = vcpu_pi_info->vector;
3934		ir_data->ga_tag = pi_data->ga_tag;
3935		ret = amd_iommu_activate_guest_mode(ir_data);
3936		if (!ret)
3937			ir_data->cached_ga_tag = pi_data->ga_tag;
3938	} else {
3939		ret = amd_iommu_deactivate_guest_mode(ir_data);
3940
3941		/*
3942		 * This communicates the ga_tag back to the caller
3943		 * so that it can do all the necessary clean up.
3944		 */
3945		if (!ret)
3946			ir_data->cached_ga_tag = 0;
3947	}
3948
3949	return ret;
3950}
3951
3952
3953static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
3954			       struct amd_ir_data *ir_data,
3955			       struct irq_2_irte *irte_info,
3956			       struct irq_cfg *cfg)
3957{
3958
3959	/*
3960	 * Atomically updates the IRTE with the new destination, vector
3961	 * and flushes the interrupt entry cache.
3962	 */
3963	iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid,
3964				      irte_info->index, cfg->vector,
3965				      cfg->dest_apicid);
3966}
3967
3968static int amd_ir_set_affinity(struct irq_data *data,
3969			       const struct cpumask *mask, bool force)
3970{
3971	struct amd_ir_data *ir_data = data->chip_data;
3972	struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
3973	struct irq_cfg *cfg = irqd_cfg(data);
3974	struct irq_data *parent = data->parent_data;
3975	struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
3976	int ret;
3977
3978	if (!iommu)
3979		return -ENODEV;
3980
3981	ret = parent->chip->irq_set_affinity(parent, mask, force);
3982	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
3983		return ret;
3984
3985	amd_ir_update_irte(data, iommu, ir_data, irte_info, cfg);
3986	/*
3987	 * After this point, all the interrupts will start arriving
3988	 * at the new destination. So, time to cleanup the previous
3989	 * vector allocation.
3990	 */
3991	send_cleanup_vector(cfg);
3992
3993	return IRQ_SET_MASK_OK_DONE;
3994}
3995
3996static void ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
3997{
3998	struct amd_ir_data *ir_data = irq_data->chip_data;
3999
4000	*msg = ir_data->msi_entry;
4001}
4002
4003static struct irq_chip amd_ir_chip = {
4004	.name			= "AMD-IR",
4005	.irq_ack		= apic_ack_irq,
4006	.irq_set_affinity	= amd_ir_set_affinity,
4007	.irq_set_vcpu_affinity	= amd_ir_set_vcpu_affinity,
4008	.irq_compose_msi_msg	= ir_compose_msi_msg,
4009};
4010
 
 
 
 
 
 
4011int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
4012{
4013	struct fwnode_handle *fn;
4014
4015	fn = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index);
4016	if (!fn)
4017		return -ENOMEM;
4018	iommu->ir_domain = irq_domain_create_tree(fn, &amd_ir_domain_ops, iommu);
 
4019	if (!iommu->ir_domain) {
4020		irq_domain_free_fwnode(fn);
4021		return -ENOMEM;
4022	}
4023
4024	iommu->ir_domain->parent = arch_get_ir_parent_domain();
4025	iommu->msi_domain = arch_create_remap_msi_irq_domain(iommu->ir_domain,
4026							     "AMD-IR-MSI",
4027							     iommu->index);
 
4028	return 0;
4029}
4030
4031int amd_iommu_update_ga(int cpu, bool is_run, void *data)
4032{
4033	unsigned long flags;
4034	struct amd_iommu *iommu;
4035	struct irq_remap_table *table;
4036	struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
4037	int devid = ir_data->irq_2_irte.devid;
4038	struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
4039	struct irte_ga *ref = (struct irte_ga *) ir_data->ref;
4040
4041	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
4042	    !ref || !entry || !entry->lo.fields_vapic.guest_mode)
4043		return 0;
4044
4045	iommu = amd_iommu_rlookup_table[devid];
4046	if (!iommu)
4047		return -ENODEV;
4048
4049	table = get_irq_table(devid);
4050	if (!table)
4051		return -ENODEV;
4052
4053	raw_spin_lock_irqsave(&table->lock, flags);
4054
4055	if (ref->lo.fields_vapic.guest_mode) {
4056		if (cpu >= 0) {
4057			ref->lo.fields_vapic.destination =
4058						APICID_TO_IRTE_DEST_LO(cpu);
4059			ref->hi.fields.destination =
4060						APICID_TO_IRTE_DEST_HI(cpu);
4061		}
4062		ref->lo.fields_vapic.is_run = is_run;
4063		barrier();
4064	}
 
4065
4066	raw_spin_unlock_irqrestore(&table->lock, flags);
4067
4068	iommu_flush_irt(iommu, devid);
4069	iommu_completion_wait(iommu);
4070	return 0;
4071}
4072EXPORT_SYMBOL(amd_iommu_update_ga);
4073#endif