Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.8.
   1/*
   2 * IOMMU API for ARM architected SMMUv3 implementations.
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License version 2 as
   6 * published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 * GNU General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  15 *
  16 * Copyright (C) 2015 ARM Limited
  17 *
  18 * Author: Will Deacon <will.deacon@arm.com>
  19 *
  20 * This driver is powered by bad coffee and bombay mix.
  21 */
  22
  23#include <linux/delay.h>
  24#include <linux/dma-iommu.h>
  25#include <linux/err.h>
  26#include <linux/interrupt.h>
  27#include <linux/iommu.h>
  28#include <linux/iopoll.h>
  29#include <linux/module.h>
  30#include <linux/msi.h>
  31#include <linux/of.h>
  32#include <linux/of_address.h>
  33#include <linux/of_platform.h>
  34#include <linux/pci.h>
  35#include <linux/platform_device.h>
  36
  37#include "io-pgtable.h"
  38
  39/* MMIO registers */
  40#define ARM_SMMU_IDR0			0x0
  41#define IDR0_ST_LVL_SHIFT		27
  42#define IDR0_ST_LVL_MASK		0x3
  43#define IDR0_ST_LVL_2LVL		(1 << IDR0_ST_LVL_SHIFT)
  44#define IDR0_STALL_MODEL_SHIFT		24
  45#define IDR0_STALL_MODEL_MASK		0x3
  46#define IDR0_STALL_MODEL_STALL		(0 << IDR0_STALL_MODEL_SHIFT)
  47#define IDR0_STALL_MODEL_FORCE		(2 << IDR0_STALL_MODEL_SHIFT)
  48#define IDR0_TTENDIAN_SHIFT		21
  49#define IDR0_TTENDIAN_MASK		0x3
  50#define IDR0_TTENDIAN_LE		(2 << IDR0_TTENDIAN_SHIFT)
  51#define IDR0_TTENDIAN_BE		(3 << IDR0_TTENDIAN_SHIFT)
  52#define IDR0_TTENDIAN_MIXED		(0 << IDR0_TTENDIAN_SHIFT)
  53#define IDR0_CD2L			(1 << 19)
  54#define IDR0_VMID16			(1 << 18)
  55#define IDR0_PRI			(1 << 16)
  56#define IDR0_SEV			(1 << 14)
  57#define IDR0_MSI			(1 << 13)
  58#define IDR0_ASID16			(1 << 12)
  59#define IDR0_ATS			(1 << 10)
  60#define IDR0_HYP			(1 << 9)
  61#define IDR0_COHACC			(1 << 4)
  62#define IDR0_TTF_SHIFT			2
  63#define IDR0_TTF_MASK			0x3
  64#define IDR0_TTF_AARCH64		(2 << IDR0_TTF_SHIFT)
  65#define IDR0_TTF_AARCH32_64		(3 << IDR0_TTF_SHIFT)
  66#define IDR0_S1P			(1 << 1)
  67#define IDR0_S2P			(1 << 0)
  68
  69#define ARM_SMMU_IDR1			0x4
  70#define IDR1_TABLES_PRESET		(1 << 30)
  71#define IDR1_QUEUES_PRESET		(1 << 29)
  72#define IDR1_REL			(1 << 28)
  73#define IDR1_CMDQ_SHIFT			21
  74#define IDR1_CMDQ_MASK			0x1f
  75#define IDR1_EVTQ_SHIFT			16
  76#define IDR1_EVTQ_MASK			0x1f
  77#define IDR1_PRIQ_SHIFT			11
  78#define IDR1_PRIQ_MASK			0x1f
  79#define IDR1_SSID_SHIFT			6
  80#define IDR1_SSID_MASK			0x1f
  81#define IDR1_SID_SHIFT			0
  82#define IDR1_SID_MASK			0x3f
  83
  84#define ARM_SMMU_IDR5			0x14
  85#define IDR5_STALL_MAX_SHIFT		16
  86#define IDR5_STALL_MAX_MASK		0xffff
  87#define IDR5_GRAN64K			(1 << 6)
  88#define IDR5_GRAN16K			(1 << 5)
  89#define IDR5_GRAN4K			(1 << 4)
  90#define IDR5_OAS_SHIFT			0
  91#define IDR5_OAS_MASK			0x7
  92#define IDR5_OAS_32_BIT			(0 << IDR5_OAS_SHIFT)
  93#define IDR5_OAS_36_BIT			(1 << IDR5_OAS_SHIFT)
  94#define IDR5_OAS_40_BIT			(2 << IDR5_OAS_SHIFT)
  95#define IDR5_OAS_42_BIT			(3 << IDR5_OAS_SHIFT)
  96#define IDR5_OAS_44_BIT			(4 << IDR5_OAS_SHIFT)
  97#define IDR5_OAS_48_BIT			(5 << IDR5_OAS_SHIFT)
  98
  99#define ARM_SMMU_CR0			0x20
 100#define CR0_CMDQEN			(1 << 3)
 101#define CR0_EVTQEN			(1 << 2)
 102#define CR0_PRIQEN			(1 << 1)
 103#define CR0_SMMUEN			(1 << 0)
 104
 105#define ARM_SMMU_CR0ACK			0x24
 106
 107#define ARM_SMMU_CR1			0x28
 108#define CR1_SH_NSH			0
 109#define CR1_SH_OSH			2
 110#define CR1_SH_ISH			3
 111#define CR1_CACHE_NC			0
 112#define CR1_CACHE_WB			1
 113#define CR1_CACHE_WT			2
 114#define CR1_TABLE_SH_SHIFT		10
 115#define CR1_TABLE_OC_SHIFT		8
 116#define CR1_TABLE_IC_SHIFT		6
 117#define CR1_QUEUE_SH_SHIFT		4
 118#define CR1_QUEUE_OC_SHIFT		2
 119#define CR1_QUEUE_IC_SHIFT		0
 120
 121#define ARM_SMMU_CR2			0x2c
 122#define CR2_PTM				(1 << 2)
 123#define CR2_RECINVSID			(1 << 1)
 124#define CR2_E2H				(1 << 0)
 125
 126#define ARM_SMMU_IRQ_CTRL		0x50
 127#define IRQ_CTRL_EVTQ_IRQEN		(1 << 2)
 128#define IRQ_CTRL_PRIQ_IRQEN		(1 << 1)
 129#define IRQ_CTRL_GERROR_IRQEN		(1 << 0)
 130
 131#define ARM_SMMU_IRQ_CTRLACK		0x54
 132
 133#define ARM_SMMU_GERROR			0x60
 134#define GERROR_SFM_ERR			(1 << 8)
 135#define GERROR_MSI_GERROR_ABT_ERR	(1 << 7)
 136#define GERROR_MSI_PRIQ_ABT_ERR		(1 << 6)
 137#define GERROR_MSI_EVTQ_ABT_ERR		(1 << 5)
 138#define GERROR_MSI_CMDQ_ABT_ERR		(1 << 4)
 139#define GERROR_PRIQ_ABT_ERR		(1 << 3)
 140#define GERROR_EVTQ_ABT_ERR		(1 << 2)
 141#define GERROR_CMDQ_ERR			(1 << 0)
 142#define GERROR_ERR_MASK			0xfd
 143
 144#define ARM_SMMU_GERRORN		0x64
 145
 146#define ARM_SMMU_GERROR_IRQ_CFG0	0x68
 147#define ARM_SMMU_GERROR_IRQ_CFG1	0x70
 148#define ARM_SMMU_GERROR_IRQ_CFG2	0x74
 149
 150#define ARM_SMMU_STRTAB_BASE		0x80
 151#define STRTAB_BASE_RA			(1UL << 62)
 152#define STRTAB_BASE_ADDR_SHIFT		6
 153#define STRTAB_BASE_ADDR_MASK		0x3ffffffffffUL
 154
 155#define ARM_SMMU_STRTAB_BASE_CFG	0x88
 156#define STRTAB_BASE_CFG_LOG2SIZE_SHIFT	0
 157#define STRTAB_BASE_CFG_LOG2SIZE_MASK	0x3f
 158#define STRTAB_BASE_CFG_SPLIT_SHIFT	6
 159#define STRTAB_BASE_CFG_SPLIT_MASK	0x1f
 160#define STRTAB_BASE_CFG_FMT_SHIFT	16
 161#define STRTAB_BASE_CFG_FMT_MASK	0x3
 162#define STRTAB_BASE_CFG_FMT_LINEAR	(0 << STRTAB_BASE_CFG_FMT_SHIFT)
 163#define STRTAB_BASE_CFG_FMT_2LVL	(1 << STRTAB_BASE_CFG_FMT_SHIFT)
 164
 165#define ARM_SMMU_CMDQ_BASE		0x90
 166#define ARM_SMMU_CMDQ_PROD		0x98
 167#define ARM_SMMU_CMDQ_CONS		0x9c
 168
 169#define ARM_SMMU_EVTQ_BASE		0xa0
 170#define ARM_SMMU_EVTQ_PROD		0x100a8
 171#define ARM_SMMU_EVTQ_CONS		0x100ac
 172#define ARM_SMMU_EVTQ_IRQ_CFG0		0xb0
 173#define ARM_SMMU_EVTQ_IRQ_CFG1		0xb8
 174#define ARM_SMMU_EVTQ_IRQ_CFG2		0xbc
 175
 176#define ARM_SMMU_PRIQ_BASE		0xc0
 177#define ARM_SMMU_PRIQ_PROD		0x100c8
 178#define ARM_SMMU_PRIQ_CONS		0x100cc
 179#define ARM_SMMU_PRIQ_IRQ_CFG0		0xd0
 180#define ARM_SMMU_PRIQ_IRQ_CFG1		0xd8
 181#define ARM_SMMU_PRIQ_IRQ_CFG2		0xdc
 182
 183/* Common MSI config fields */
 184#define MSI_CFG0_ADDR_SHIFT		2
 185#define MSI_CFG0_ADDR_MASK		0x3fffffffffffUL
 186#define MSI_CFG2_SH_SHIFT		4
 187#define MSI_CFG2_SH_NSH			(0UL << MSI_CFG2_SH_SHIFT)
 188#define MSI_CFG2_SH_OSH			(2UL << MSI_CFG2_SH_SHIFT)
 189#define MSI_CFG2_SH_ISH			(3UL << MSI_CFG2_SH_SHIFT)
 190#define MSI_CFG2_MEMATTR_SHIFT		0
 191#define MSI_CFG2_MEMATTR_DEVICE_nGnRE	(0x1 << MSI_CFG2_MEMATTR_SHIFT)
 192
 193#define Q_IDX(q, p)			((p) & ((1 << (q)->max_n_shift) - 1))
 194#define Q_WRP(q, p)			((p) & (1 << (q)->max_n_shift))
 195#define Q_OVERFLOW_FLAG			(1 << 31)
 196#define Q_OVF(q, p)			((p) & Q_OVERFLOW_FLAG)
 197#define Q_ENT(q, p)			((q)->base +			\
 198					 Q_IDX(q, p) * (q)->ent_dwords)
 199
 200#define Q_BASE_RWA			(1UL << 62)
 201#define Q_BASE_ADDR_SHIFT		5
 202#define Q_BASE_ADDR_MASK		0xfffffffffffUL
 203#define Q_BASE_LOG2SIZE_SHIFT		0
 204#define Q_BASE_LOG2SIZE_MASK		0x1fUL
 205
 206/*
 207 * Stream table.
 208 *
 209 * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
 210 * 2lvl: 128k L1 entries,
 211 *       256 lazy entries per table (each table covers a PCI bus)
 212 */
 213#define STRTAB_L1_SZ_SHIFT		20
 214#define STRTAB_SPLIT			8
 215
 216#define STRTAB_L1_DESC_DWORDS		1
 217#define STRTAB_L1_DESC_SPAN_SHIFT	0
 218#define STRTAB_L1_DESC_SPAN_MASK	0x1fUL
 219#define STRTAB_L1_DESC_L2PTR_SHIFT	6
 220#define STRTAB_L1_DESC_L2PTR_MASK	0x3ffffffffffUL
 221
 222#define STRTAB_STE_DWORDS		8
 223#define STRTAB_STE_0_V			(1UL << 0)
 224#define STRTAB_STE_0_CFG_SHIFT		1
 225#define STRTAB_STE_0_CFG_MASK		0x7UL
 226#define STRTAB_STE_0_CFG_ABORT		(0UL << STRTAB_STE_0_CFG_SHIFT)
 227#define STRTAB_STE_0_CFG_BYPASS		(4UL << STRTAB_STE_0_CFG_SHIFT)
 228#define STRTAB_STE_0_CFG_S1_TRANS	(5UL << STRTAB_STE_0_CFG_SHIFT)
 229#define STRTAB_STE_0_CFG_S2_TRANS	(6UL << STRTAB_STE_0_CFG_SHIFT)
 230
 231#define STRTAB_STE_0_S1FMT_SHIFT	4
 232#define STRTAB_STE_0_S1FMT_LINEAR	(0UL << STRTAB_STE_0_S1FMT_SHIFT)
 233#define STRTAB_STE_0_S1CTXPTR_SHIFT	6
 234#define STRTAB_STE_0_S1CTXPTR_MASK	0x3ffffffffffUL
 235#define STRTAB_STE_0_S1CDMAX_SHIFT	59
 236#define STRTAB_STE_0_S1CDMAX_MASK	0x1fUL
 237
 238#define STRTAB_STE_1_S1C_CACHE_NC	0UL
 239#define STRTAB_STE_1_S1C_CACHE_WBRA	1UL
 240#define STRTAB_STE_1_S1C_CACHE_WT	2UL
 241#define STRTAB_STE_1_S1C_CACHE_WB	3UL
 242#define STRTAB_STE_1_S1C_SH_NSH		0UL
 243#define STRTAB_STE_1_S1C_SH_OSH		2UL
 244#define STRTAB_STE_1_S1C_SH_ISH		3UL
 245#define STRTAB_STE_1_S1CIR_SHIFT	2
 246#define STRTAB_STE_1_S1COR_SHIFT	4
 247#define STRTAB_STE_1_S1CSH_SHIFT	6
 248
 249#define STRTAB_STE_1_S1STALLD		(1UL << 27)
 250
 251#define STRTAB_STE_1_EATS_ABT		0UL
 252#define STRTAB_STE_1_EATS_TRANS		1UL
 253#define STRTAB_STE_1_EATS_S1CHK		2UL
 254#define STRTAB_STE_1_EATS_SHIFT		28
 255
 256#define STRTAB_STE_1_STRW_NSEL1		0UL
 257#define STRTAB_STE_1_STRW_EL2		2UL
 258#define STRTAB_STE_1_STRW_SHIFT		30
 259
 260#define STRTAB_STE_1_SHCFG_INCOMING	1UL
 261#define STRTAB_STE_1_SHCFG_SHIFT	44
 262
 263#define STRTAB_STE_2_S2VMID_SHIFT	0
 264#define STRTAB_STE_2_S2VMID_MASK	0xffffUL
 265#define STRTAB_STE_2_VTCR_SHIFT		32
 266#define STRTAB_STE_2_VTCR_MASK		0x7ffffUL
 267#define STRTAB_STE_2_S2AA64		(1UL << 51)
 268#define STRTAB_STE_2_S2ENDI		(1UL << 52)
 269#define STRTAB_STE_2_S2PTW		(1UL << 54)
 270#define STRTAB_STE_2_S2R		(1UL << 58)
 271
 272#define STRTAB_STE_3_S2TTB_SHIFT	4
 273#define STRTAB_STE_3_S2TTB_MASK		0xfffffffffffUL
 274
 275/* Context descriptor (stage-1 only) */
 276#define CTXDESC_CD_DWORDS		8
 277#define CTXDESC_CD_0_TCR_T0SZ_SHIFT	0
 278#define ARM64_TCR_T0SZ_SHIFT		0
 279#define ARM64_TCR_T0SZ_MASK		0x1fUL
 280#define CTXDESC_CD_0_TCR_TG0_SHIFT	6
 281#define ARM64_TCR_TG0_SHIFT		14
 282#define ARM64_TCR_TG0_MASK		0x3UL
 283#define CTXDESC_CD_0_TCR_IRGN0_SHIFT	8
 284#define ARM64_TCR_IRGN0_SHIFT		8
 285#define ARM64_TCR_IRGN0_MASK		0x3UL
 286#define CTXDESC_CD_0_TCR_ORGN0_SHIFT	10
 287#define ARM64_TCR_ORGN0_SHIFT		10
 288#define ARM64_TCR_ORGN0_MASK		0x3UL
 289#define CTXDESC_CD_0_TCR_SH0_SHIFT	12
 290#define ARM64_TCR_SH0_SHIFT		12
 291#define ARM64_TCR_SH0_MASK		0x3UL
 292#define CTXDESC_CD_0_TCR_EPD0_SHIFT	14
 293#define ARM64_TCR_EPD0_SHIFT		7
 294#define ARM64_TCR_EPD0_MASK		0x1UL
 295#define CTXDESC_CD_0_TCR_EPD1_SHIFT	30
 296#define ARM64_TCR_EPD1_SHIFT		23
 297#define ARM64_TCR_EPD1_MASK		0x1UL
 298
 299#define CTXDESC_CD_0_ENDI		(1UL << 15)
 300#define CTXDESC_CD_0_V			(1UL << 31)
 301
 302#define CTXDESC_CD_0_TCR_IPS_SHIFT	32
 303#define ARM64_TCR_IPS_SHIFT		32
 304#define ARM64_TCR_IPS_MASK		0x7UL
 305#define CTXDESC_CD_0_TCR_TBI0_SHIFT	38
 306#define ARM64_TCR_TBI0_SHIFT		37
 307#define ARM64_TCR_TBI0_MASK		0x1UL
 308
 309#define CTXDESC_CD_0_AA64		(1UL << 41)
 310#define CTXDESC_CD_0_R			(1UL << 45)
 311#define CTXDESC_CD_0_A			(1UL << 46)
 312#define CTXDESC_CD_0_ASET_SHIFT		47
 313#define CTXDESC_CD_0_ASET_SHARED	(0UL << CTXDESC_CD_0_ASET_SHIFT)
 314#define CTXDESC_CD_0_ASET_PRIVATE	(1UL << CTXDESC_CD_0_ASET_SHIFT)
 315#define CTXDESC_CD_0_ASID_SHIFT		48
 316#define CTXDESC_CD_0_ASID_MASK		0xffffUL
 317
 318#define CTXDESC_CD_1_TTB0_SHIFT		4
 319#define CTXDESC_CD_1_TTB0_MASK		0xfffffffffffUL
 320
 321#define CTXDESC_CD_3_MAIR_SHIFT		0
 322
 323/* Convert between AArch64 (CPU) TCR format and SMMU CD format */
 324#define ARM_SMMU_TCR2CD(tcr, fld)					\
 325	(((tcr) >> ARM64_TCR_##fld##_SHIFT & ARM64_TCR_##fld##_MASK)	\
 326	 << CTXDESC_CD_0_TCR_##fld##_SHIFT)
 327
 328/* Command queue */
 329#define CMDQ_ENT_DWORDS			2
 330#define CMDQ_MAX_SZ_SHIFT		8
 331
 332#define CMDQ_ERR_SHIFT			24
 333#define CMDQ_ERR_MASK			0x7f
 334#define CMDQ_ERR_CERROR_NONE_IDX	0
 335#define CMDQ_ERR_CERROR_ILL_IDX		1
 336#define CMDQ_ERR_CERROR_ABT_IDX		2
 337
 338#define CMDQ_0_OP_SHIFT			0
 339#define CMDQ_0_OP_MASK			0xffUL
 340#define CMDQ_0_SSV			(1UL << 11)
 341
 342#define CMDQ_PREFETCH_0_SID_SHIFT	32
 343#define CMDQ_PREFETCH_1_SIZE_SHIFT	0
 344#define CMDQ_PREFETCH_1_ADDR_MASK	~0xfffUL
 345
 346#define CMDQ_CFGI_0_SID_SHIFT		32
 347#define CMDQ_CFGI_0_SID_MASK		0xffffffffUL
 348#define CMDQ_CFGI_1_LEAF		(1UL << 0)
 349#define CMDQ_CFGI_1_RANGE_SHIFT		0
 350#define CMDQ_CFGI_1_RANGE_MASK		0x1fUL
 351
 352#define CMDQ_TLBI_0_VMID_SHIFT		32
 353#define CMDQ_TLBI_0_ASID_SHIFT		48
 354#define CMDQ_TLBI_1_LEAF		(1UL << 0)
 355#define CMDQ_TLBI_1_VA_MASK		~0xfffUL
 356#define CMDQ_TLBI_1_IPA_MASK		0xfffffffff000UL
 357
 358#define CMDQ_PRI_0_SSID_SHIFT		12
 359#define CMDQ_PRI_0_SSID_MASK		0xfffffUL
 360#define CMDQ_PRI_0_SID_SHIFT		32
 361#define CMDQ_PRI_0_SID_MASK		0xffffffffUL
 362#define CMDQ_PRI_1_GRPID_SHIFT		0
 363#define CMDQ_PRI_1_GRPID_MASK		0x1ffUL
 364#define CMDQ_PRI_1_RESP_SHIFT		12
 365#define CMDQ_PRI_1_RESP_DENY		(0UL << CMDQ_PRI_1_RESP_SHIFT)
 366#define CMDQ_PRI_1_RESP_FAIL		(1UL << CMDQ_PRI_1_RESP_SHIFT)
 367#define CMDQ_PRI_1_RESP_SUCC		(2UL << CMDQ_PRI_1_RESP_SHIFT)
 368
 369#define CMDQ_SYNC_0_CS_SHIFT		12
 370#define CMDQ_SYNC_0_CS_NONE		(0UL << CMDQ_SYNC_0_CS_SHIFT)
 371#define CMDQ_SYNC_0_CS_SEV		(2UL << CMDQ_SYNC_0_CS_SHIFT)
 372
 373/* Event queue */
 374#define EVTQ_ENT_DWORDS			4
 375#define EVTQ_MAX_SZ_SHIFT		7
 376
 377#define EVTQ_0_ID_SHIFT			0
 378#define EVTQ_0_ID_MASK			0xffUL
 379
 380/* PRI queue */
 381#define PRIQ_ENT_DWORDS			2
 382#define PRIQ_MAX_SZ_SHIFT		8
 383
 384#define PRIQ_0_SID_SHIFT		0
 385#define PRIQ_0_SID_MASK			0xffffffffUL
 386#define PRIQ_0_SSID_SHIFT		32
 387#define PRIQ_0_SSID_MASK		0xfffffUL
 388#define PRIQ_0_PERM_PRIV		(1UL << 58)
 389#define PRIQ_0_PERM_EXEC		(1UL << 59)
 390#define PRIQ_0_PERM_READ		(1UL << 60)
 391#define PRIQ_0_PERM_WRITE		(1UL << 61)
 392#define PRIQ_0_PRG_LAST			(1UL << 62)
 393#define PRIQ_0_SSID_V			(1UL << 63)
 394
 395#define PRIQ_1_PRG_IDX_SHIFT		0
 396#define PRIQ_1_PRG_IDX_MASK		0x1ffUL
 397#define PRIQ_1_ADDR_SHIFT		12
 398#define PRIQ_1_ADDR_MASK		0xfffffffffffffUL
 399
 400/* High-level queue structures */
 401#define ARM_SMMU_POLL_TIMEOUT_US	100
 402
 403static bool disable_bypass;
 404module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
 405MODULE_PARM_DESC(disable_bypass,
 406	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
 407
 408enum pri_resp {
 409	PRI_RESP_DENY,
 410	PRI_RESP_FAIL,
 411	PRI_RESP_SUCC,
 412};
 413
 414enum arm_smmu_msi_index {
 415	EVTQ_MSI_INDEX,
 416	GERROR_MSI_INDEX,
 417	PRIQ_MSI_INDEX,
 418	ARM_SMMU_MAX_MSIS,
 419};
 420
 421static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
 422	[EVTQ_MSI_INDEX] = {
 423		ARM_SMMU_EVTQ_IRQ_CFG0,
 424		ARM_SMMU_EVTQ_IRQ_CFG1,
 425		ARM_SMMU_EVTQ_IRQ_CFG2,
 426	},
 427	[GERROR_MSI_INDEX] = {
 428		ARM_SMMU_GERROR_IRQ_CFG0,
 429		ARM_SMMU_GERROR_IRQ_CFG1,
 430		ARM_SMMU_GERROR_IRQ_CFG2,
 431	},
 432	[PRIQ_MSI_INDEX] = {
 433		ARM_SMMU_PRIQ_IRQ_CFG0,
 434		ARM_SMMU_PRIQ_IRQ_CFG1,
 435		ARM_SMMU_PRIQ_IRQ_CFG2,
 436	},
 437};
 438
 439struct arm_smmu_cmdq_ent {
 440	/* Common fields */
 441	u8				opcode;
 442	bool				substream_valid;
 443
 444	/* Command-specific fields */
 445	union {
 446		#define CMDQ_OP_PREFETCH_CFG	0x1
 447		struct {
 448			u32			sid;
 449			u8			size;
 450			u64			addr;
 451		} prefetch;
 452
 453		#define CMDQ_OP_CFGI_STE	0x3
 454		#define CMDQ_OP_CFGI_ALL	0x4
 455		struct {
 456			u32			sid;
 457			union {
 458				bool		leaf;
 459				u8		span;
 460			};
 461		} cfgi;
 462
 463		#define CMDQ_OP_TLBI_NH_ASID	0x11
 464		#define CMDQ_OP_TLBI_NH_VA	0x12
 465		#define CMDQ_OP_TLBI_EL2_ALL	0x20
 466		#define CMDQ_OP_TLBI_S12_VMALL	0x28
 467		#define CMDQ_OP_TLBI_S2_IPA	0x2a
 468		#define CMDQ_OP_TLBI_NSNH_ALL	0x30
 469		struct {
 470			u16			asid;
 471			u16			vmid;
 472			bool			leaf;
 473			u64			addr;
 474		} tlbi;
 475
 476		#define CMDQ_OP_PRI_RESP	0x41
 477		struct {
 478			u32			sid;
 479			u32			ssid;
 480			u16			grpid;
 481			enum pri_resp		resp;
 482		} pri;
 483
 484		#define CMDQ_OP_CMD_SYNC	0x46
 485	};
 486};
 487
 488struct arm_smmu_queue {
 489	int				irq; /* Wired interrupt */
 490
 491	__le64				*base;
 492	dma_addr_t			base_dma;
 493	u64				q_base;
 494
 495	size_t				ent_dwords;
 496	u32				max_n_shift;
 497	u32				prod;
 498	u32				cons;
 499
 500	u32 __iomem			*prod_reg;
 501	u32 __iomem			*cons_reg;
 502};
 503
 504struct arm_smmu_cmdq {
 505	struct arm_smmu_queue		q;
 506	spinlock_t			lock;
 507};
 508
 509struct arm_smmu_evtq {
 510	struct arm_smmu_queue		q;
 511	u32				max_stalls;
 512};
 513
 514struct arm_smmu_priq {
 515	struct arm_smmu_queue		q;
 516};
 517
 518/* High-level stream table and context descriptor structures */
 519struct arm_smmu_strtab_l1_desc {
 520	u8				span;
 521
 522	__le64				*l2ptr;
 523	dma_addr_t			l2ptr_dma;
 524};
 525
 526struct arm_smmu_s1_cfg {
 527	__le64				*cdptr;
 528	dma_addr_t			cdptr_dma;
 529
 530	struct arm_smmu_ctx_desc {
 531		u16	asid;
 532		u64	ttbr;
 533		u64	tcr;
 534		u64	mair;
 535	}				cd;
 536};
 537
 538struct arm_smmu_s2_cfg {
 539	u16				vmid;
 540	u64				vttbr;
 541	u64				vtcr;
 542};
 543
 544struct arm_smmu_strtab_ent {
 545	bool				valid;
 546
 547	bool				bypass;	/* Overrides s1/s2 config */
 548	struct arm_smmu_s1_cfg		*s1_cfg;
 549	struct arm_smmu_s2_cfg		*s2_cfg;
 550};
 551
 552struct arm_smmu_strtab_cfg {
 553	__le64				*strtab;
 554	dma_addr_t			strtab_dma;
 555	struct arm_smmu_strtab_l1_desc	*l1_desc;
 556	unsigned int			num_l1_ents;
 557
 558	u64				strtab_base;
 559	u32				strtab_base_cfg;
 560};
 561
 562/* An SMMUv3 instance */
 563struct arm_smmu_device {
 564	struct device			*dev;
 565	void __iomem			*base;
 566
 567#define ARM_SMMU_FEAT_2_LVL_STRTAB	(1 << 0)
 568#define ARM_SMMU_FEAT_2_LVL_CDTAB	(1 << 1)
 569#define ARM_SMMU_FEAT_TT_LE		(1 << 2)
 570#define ARM_SMMU_FEAT_TT_BE		(1 << 3)
 571#define ARM_SMMU_FEAT_PRI		(1 << 4)
 572#define ARM_SMMU_FEAT_ATS		(1 << 5)
 573#define ARM_SMMU_FEAT_SEV		(1 << 6)
 574#define ARM_SMMU_FEAT_MSI		(1 << 7)
 575#define ARM_SMMU_FEAT_COHERENCY		(1 << 8)
 576#define ARM_SMMU_FEAT_TRANS_S1		(1 << 9)
 577#define ARM_SMMU_FEAT_TRANS_S2		(1 << 10)
 578#define ARM_SMMU_FEAT_STALLS		(1 << 11)
 579#define ARM_SMMU_FEAT_HYP		(1 << 12)
 580	u32				features;
 581
 582#define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
 583	u32				options;
 584
 585	struct arm_smmu_cmdq		cmdq;
 586	struct arm_smmu_evtq		evtq;
 587	struct arm_smmu_priq		priq;
 588
 589	int				gerr_irq;
 590
 591	unsigned long			ias; /* IPA */
 592	unsigned long			oas; /* PA */
 593
 594#define ARM_SMMU_MAX_ASIDS		(1 << 16)
 595	unsigned int			asid_bits;
 596	DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
 597
 598#define ARM_SMMU_MAX_VMIDS		(1 << 16)
 599	unsigned int			vmid_bits;
 600	DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
 601
 602	unsigned int			ssid_bits;
 603	unsigned int			sid_bits;
 604
 605	struct arm_smmu_strtab_cfg	strtab_cfg;
 606};
 607
 608/* SMMU private data for an IOMMU group */
 609struct arm_smmu_group {
 610	struct arm_smmu_device		*smmu;
 611	struct arm_smmu_domain		*domain;
 612	int				num_sids;
 613	u32				*sids;
 614	struct arm_smmu_strtab_ent	ste;
 615};
 616
 617/* SMMU private data for an IOMMU domain */
 618enum arm_smmu_domain_stage {
 619	ARM_SMMU_DOMAIN_S1 = 0,
 620	ARM_SMMU_DOMAIN_S2,
 621	ARM_SMMU_DOMAIN_NESTED,
 622};
 623
 624struct arm_smmu_domain {
 625	struct arm_smmu_device		*smmu;
 626	struct mutex			init_mutex; /* Protects smmu pointer */
 627
 628	struct io_pgtable_ops		*pgtbl_ops;
 629	spinlock_t			pgtbl_lock;
 630
 631	enum arm_smmu_domain_stage	stage;
 632	union {
 633		struct arm_smmu_s1_cfg	s1_cfg;
 634		struct arm_smmu_s2_cfg	s2_cfg;
 635	};
 636
 637	struct iommu_domain		domain;
 638};
 639
 640struct arm_smmu_option_prop {
 641	u32 opt;
 642	const char *prop;
 643};
 644
 645static struct arm_smmu_option_prop arm_smmu_options[] = {
 646	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
 647	{ 0, NULL},
 648};
 649
 650static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
 651{
 652	return container_of(dom, struct arm_smmu_domain, domain);
 653}
 654
 655static void parse_driver_options(struct arm_smmu_device *smmu)
 656{
 657	int i = 0;
 658
 659	do {
 660		if (of_property_read_bool(smmu->dev->of_node,
 661						arm_smmu_options[i].prop)) {
 662			smmu->options |= arm_smmu_options[i].opt;
 663			dev_notice(smmu->dev, "option %s\n",
 664				arm_smmu_options[i].prop);
 665		}
 666	} while (arm_smmu_options[++i].opt);
 667}
 668
 669/* Low-level queue manipulation functions */
 670static bool queue_full(struct arm_smmu_queue *q)
 671{
 672	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
 673	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
 674}
 675
 676static bool queue_empty(struct arm_smmu_queue *q)
 677{
 678	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
 679	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
 680}
 681
 682static void queue_sync_cons(struct arm_smmu_queue *q)
 683{
 684	q->cons = readl_relaxed(q->cons_reg);
 685}
 686
 687static void queue_inc_cons(struct arm_smmu_queue *q)
 688{
 689	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
 690
 691	q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
 692	writel(q->cons, q->cons_reg);
 693}
 694
 695static int queue_sync_prod(struct arm_smmu_queue *q)
 696{
 697	int ret = 0;
 698	u32 prod = readl_relaxed(q->prod_reg);
 699
 700	if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
 701		ret = -EOVERFLOW;
 702
 703	q->prod = prod;
 704	return ret;
 705}
 706
 707static void queue_inc_prod(struct arm_smmu_queue *q)
 708{
 709	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
 710
 711	q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
 712	writel(q->prod, q->prod_reg);
 713}
 714
 715static bool __queue_cons_before(struct arm_smmu_queue *q, u32 until)
 716{
 717	if (Q_WRP(q, q->cons) == Q_WRP(q, until))
 718		return Q_IDX(q, q->cons) < Q_IDX(q, until);
 719
 720	return Q_IDX(q, q->cons) >= Q_IDX(q, until);
 721}
 722
 723static int queue_poll_cons(struct arm_smmu_queue *q, u32 until, bool wfe)
 724{
 725	ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
 726
 727	while (queue_sync_cons(q), __queue_cons_before(q, until)) {
 728		if (ktime_compare(ktime_get(), timeout) > 0)
 729			return -ETIMEDOUT;
 730
 731		if (wfe) {
 732			wfe();
 733		} else {
 734			cpu_relax();
 735			udelay(1);
 736		}
 737	}
 738
 739	return 0;
 740}
 741
 742static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
 743{
 744	int i;
 745
 746	for (i = 0; i < n_dwords; ++i)
 747		*dst++ = cpu_to_le64(*src++);
 748}
 749
 750static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
 751{
 752	if (queue_full(q))
 753		return -ENOSPC;
 754
 755	queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
 756	queue_inc_prod(q);
 757	return 0;
 758}
 759
 760static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
 761{
 762	int i;
 763
 764	for (i = 0; i < n_dwords; ++i)
 765		*dst++ = le64_to_cpu(*src++);
 766}
 767
 768static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
 769{
 770	if (queue_empty(q))
 771		return -EAGAIN;
 772
 773	queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
 774	queue_inc_cons(q);
 775	return 0;
 776}
 777
 778/* High-level queue accessors */
 779static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 780{
 781	memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
 782	cmd[0] |= (ent->opcode & CMDQ_0_OP_MASK) << CMDQ_0_OP_SHIFT;
 783
 784	switch (ent->opcode) {
 785	case CMDQ_OP_TLBI_EL2_ALL:
 786	case CMDQ_OP_TLBI_NSNH_ALL:
 787		break;
 788	case CMDQ_OP_PREFETCH_CFG:
 789		cmd[0] |= (u64)ent->prefetch.sid << CMDQ_PREFETCH_0_SID_SHIFT;
 790		cmd[1] |= ent->prefetch.size << CMDQ_PREFETCH_1_SIZE_SHIFT;
 791		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
 792		break;
 793	case CMDQ_OP_CFGI_STE:
 794		cmd[0] |= (u64)ent->cfgi.sid << CMDQ_CFGI_0_SID_SHIFT;
 795		cmd[1] |= ent->cfgi.leaf ? CMDQ_CFGI_1_LEAF : 0;
 796		break;
 797	case CMDQ_OP_CFGI_ALL:
 798		/* Cover the entire SID range */
 799		cmd[1] |= CMDQ_CFGI_1_RANGE_MASK << CMDQ_CFGI_1_RANGE_SHIFT;
 800		break;
 801	case CMDQ_OP_TLBI_NH_VA:
 802		cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
 803		cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
 804		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
 805		break;
 806	case CMDQ_OP_TLBI_S2_IPA:
 807		cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
 808		cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
 809		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
 810		break;
 811	case CMDQ_OP_TLBI_NH_ASID:
 812		cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
 813		/* Fallthrough */
 814	case CMDQ_OP_TLBI_S12_VMALL:
 815		cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
 816		break;
 817	case CMDQ_OP_PRI_RESP:
 818		cmd[0] |= ent->substream_valid ? CMDQ_0_SSV : 0;
 819		cmd[0] |= ent->pri.ssid << CMDQ_PRI_0_SSID_SHIFT;
 820		cmd[0] |= (u64)ent->pri.sid << CMDQ_PRI_0_SID_SHIFT;
 821		cmd[1] |= ent->pri.grpid << CMDQ_PRI_1_GRPID_SHIFT;
 822		switch (ent->pri.resp) {
 823		case PRI_RESP_DENY:
 824			cmd[1] |= CMDQ_PRI_1_RESP_DENY;
 825			break;
 826		case PRI_RESP_FAIL:
 827			cmd[1] |= CMDQ_PRI_1_RESP_FAIL;
 828			break;
 829		case PRI_RESP_SUCC:
 830			cmd[1] |= CMDQ_PRI_1_RESP_SUCC;
 831			break;
 832		default:
 833			return -EINVAL;
 834		}
 835		break;
 836	case CMDQ_OP_CMD_SYNC:
 837		cmd[0] |= CMDQ_SYNC_0_CS_SEV;
 838		break;
 839	default:
 840		return -ENOENT;
 841	}
 842
 843	return 0;
 844}
 845
 846static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 847{
 848	static const char *cerror_str[] = {
 849		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
 850		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
 851		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
 852	};
 853
 854	int i;
 855	u64 cmd[CMDQ_ENT_DWORDS];
 856	struct arm_smmu_queue *q = &smmu->cmdq.q;
 857	u32 cons = readl_relaxed(q->cons_reg);
 858	u32 idx = cons >> CMDQ_ERR_SHIFT & CMDQ_ERR_MASK;
 859	struct arm_smmu_cmdq_ent cmd_sync = {
 860		.opcode = CMDQ_OP_CMD_SYNC,
 861	};
 862
 863	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
 864		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
 865
 866	switch (idx) {
 867	case CMDQ_ERR_CERROR_ABT_IDX:
 868		dev_err(smmu->dev, "retrying command fetch\n");
 869	case CMDQ_ERR_CERROR_NONE_IDX:
 870		return;
 871	case CMDQ_ERR_CERROR_ILL_IDX:
 872		/* Fallthrough */
 873	default:
 874		break;
 875	}
 876
 877	/*
 878	 * We may have concurrent producers, so we need to be careful
 879	 * not to touch any of the shadow cmdq state.
 880	 */
 881	queue_read(cmd, Q_ENT(q, idx), q->ent_dwords);
 882	dev_err(smmu->dev, "skipping command in error state:\n");
 883	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
 884		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
 885
 886	/* Convert the erroneous command into a CMD_SYNC */
 887	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
 888		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
 889		return;
 890	}
 891
 892	queue_write(cmd, Q_ENT(q, idx), q->ent_dwords);
 893}
 894
 895static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
 896				    struct arm_smmu_cmdq_ent *ent)
 897{
 898	u32 until;
 899	u64 cmd[CMDQ_ENT_DWORDS];
 900	bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
 901	struct arm_smmu_queue *q = &smmu->cmdq.q;
 902
 903	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
 904		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
 905			 ent->opcode);
 906		return;
 907	}
 908
 909	spin_lock(&smmu->cmdq.lock);
 910	while (until = q->prod + 1, queue_insert_raw(q, cmd) == -ENOSPC) {
 911		/*
 912		 * Keep the queue locked, otherwise the producer could wrap
 913		 * twice and we could see a future consumer pointer that looks
 914		 * like it's behind us.
 915		 */
 916		if (queue_poll_cons(q, until, wfe))
 917			dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
 918	}
 919
 920	if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, until, wfe))
 921		dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
 922	spin_unlock(&smmu->cmdq.lock);
 923}
 924
 925/* Context descriptor manipulation functions */
 926static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
 927{
 928	u64 val = 0;
 929
 930	/* Repack the TCR. Just care about TTBR0 for now */
 931	val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
 932	val |= ARM_SMMU_TCR2CD(tcr, TG0);
 933	val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
 934	val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
 935	val |= ARM_SMMU_TCR2CD(tcr, SH0);
 936	val |= ARM_SMMU_TCR2CD(tcr, EPD0);
 937	val |= ARM_SMMU_TCR2CD(tcr, EPD1);
 938	val |= ARM_SMMU_TCR2CD(tcr, IPS);
 939	val |= ARM_SMMU_TCR2CD(tcr, TBI0);
 940
 941	return val;
 942}
 943
 944static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
 945				    struct arm_smmu_s1_cfg *cfg)
 946{
 947	u64 val;
 948
 949	/*
 950	 * We don't need to issue any invalidation here, as we'll invalidate
 951	 * the STE when installing the new entry anyway.
 952	 */
 953	val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
 954#ifdef __BIG_ENDIAN
 955	      CTXDESC_CD_0_ENDI |
 956#endif
 957	      CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
 958	      CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
 959	      CTXDESC_CD_0_V;
 960	cfg->cdptr[0] = cpu_to_le64(val);
 961
 962	val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
 963	cfg->cdptr[1] = cpu_to_le64(val);
 964
 965	cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
 966}
 967
 968/* Stream table manipulation functions */
 969static void
 970arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
 971{
 972	u64 val = 0;
 973
 974	val |= (desc->span & STRTAB_L1_DESC_SPAN_MASK)
 975		<< STRTAB_L1_DESC_SPAN_SHIFT;
 976	val |= desc->l2ptr_dma &
 977	       STRTAB_L1_DESC_L2PTR_MASK << STRTAB_L1_DESC_L2PTR_SHIFT;
 978
 979	*dst = cpu_to_le64(val);
 980}
 981
 982static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
 983{
 984	struct arm_smmu_cmdq_ent cmd = {
 985		.opcode	= CMDQ_OP_CFGI_STE,
 986		.cfgi	= {
 987			.sid	= sid,
 988			.leaf	= true,
 989		},
 990	};
 991
 992	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
 993	cmd.opcode = CMDQ_OP_CMD_SYNC;
 994	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
 995}
 996
 997static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 998				      __le64 *dst, struct arm_smmu_strtab_ent *ste)
 999{
1000	/*
1001	 * This is hideously complicated, but we only really care about
1002	 * three cases at the moment:
1003	 *
1004	 * 1. Invalid (all zero) -> bypass  (init)
1005	 * 2. Bypass -> translation (attach)
1006	 * 3. Translation -> bypass (detach)
1007	 *
1008	 * Given that we can't update the STE atomically and the SMMU
1009	 * doesn't read the thing in a defined order, that leaves us
1010	 * with the following maintenance requirements:
1011	 *
1012	 * 1. Update Config, return (init time STEs aren't live)
1013	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1014	 * 3. Update Config, sync
1015	 */
1016	u64 val = le64_to_cpu(dst[0]);
1017	bool ste_live = false;
1018	struct arm_smmu_cmdq_ent prefetch_cmd = {
1019		.opcode		= CMDQ_OP_PREFETCH_CFG,
1020		.prefetch	= {
1021			.sid	= sid,
1022		},
1023	};
1024
1025	if (val & STRTAB_STE_0_V) {
1026		u64 cfg;
1027
1028		cfg = val & STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT;
1029		switch (cfg) {
1030		case STRTAB_STE_0_CFG_BYPASS:
1031			break;
1032		case STRTAB_STE_0_CFG_S1_TRANS:
1033		case STRTAB_STE_0_CFG_S2_TRANS:
1034			ste_live = true;
1035			break;
1036		default:
1037			BUG(); /* STE corruption */
1038		}
1039	}
1040
1041	/* Nuke the existing Config, as we're going to rewrite it */
1042	val &= ~(STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT);
1043
1044	if (ste->valid)
1045		val |= STRTAB_STE_0_V;
1046	else
1047		val &= ~STRTAB_STE_0_V;
1048
1049	if (ste->bypass) {
1050		val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT
1051				      : STRTAB_STE_0_CFG_BYPASS;
1052		dst[0] = cpu_to_le64(val);
1053		dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
1054			 << STRTAB_STE_1_SHCFG_SHIFT);
1055		dst[2] = 0; /* Nuke the VMID */
1056		if (ste_live)
1057			arm_smmu_sync_ste_for_sid(smmu, sid);
1058		return;
1059	}
1060
1061	if (ste->s1_cfg) {
1062		BUG_ON(ste_live);
1063		dst[1] = cpu_to_le64(
1064			 STRTAB_STE_1_S1C_CACHE_WBRA
1065			 << STRTAB_STE_1_S1CIR_SHIFT |
1066			 STRTAB_STE_1_S1C_CACHE_WBRA
1067			 << STRTAB_STE_1_S1COR_SHIFT |
1068			 STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
1069#ifdef CONFIG_PCI_ATS
1070			 STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
1071#endif
1072			 STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
1073
1074		if (smmu->features & ARM_SMMU_FEAT_STALLS)
1075			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1076
1077		val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
1078		        << STRTAB_STE_0_S1CTXPTR_SHIFT) |
1079			STRTAB_STE_0_CFG_S1_TRANS;
1080
1081	}
1082
1083	if (ste->s2_cfg) {
1084		BUG_ON(ste_live);
1085		dst[2] = cpu_to_le64(
1086			 ste->s2_cfg->vmid << STRTAB_STE_2_S2VMID_SHIFT |
1087			 (ste->s2_cfg->vtcr & STRTAB_STE_2_VTCR_MASK)
1088			  << STRTAB_STE_2_VTCR_SHIFT |
1089#ifdef __BIG_ENDIAN
1090			 STRTAB_STE_2_S2ENDI |
1091#endif
1092			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1093			 STRTAB_STE_2_S2R);
1094
1095		dst[3] = cpu_to_le64(ste->s2_cfg->vttbr &
1096			 STRTAB_STE_3_S2TTB_MASK << STRTAB_STE_3_S2TTB_SHIFT);
1097
1098		val |= STRTAB_STE_0_CFG_S2_TRANS;
1099	}
1100
1101	arm_smmu_sync_ste_for_sid(smmu, sid);
1102	dst[0] = cpu_to_le64(val);
1103	arm_smmu_sync_ste_for_sid(smmu, sid);
1104
1105	/* It's likely that we'll want to use the new STE soon */
1106	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1107		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1108}
1109
1110static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1111{
1112	unsigned int i;
1113	struct arm_smmu_strtab_ent ste = {
1114		.valid	= true,
1115		.bypass	= true,
1116	};
1117
1118	for (i = 0; i < nent; ++i) {
1119		arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1120		strtab += STRTAB_STE_DWORDS;
1121	}
1122}
1123
1124static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1125{
1126	size_t size;
1127	void *strtab;
1128	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1129	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1130
1131	if (desc->l2ptr)
1132		return 0;
1133
1134	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1135	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1136
1137	desc->span = STRTAB_SPLIT + 1;
1138	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1139					  GFP_KERNEL | __GFP_ZERO);
1140	if (!desc->l2ptr) {
1141		dev_err(smmu->dev,
1142			"failed to allocate l2 stream table for SID %u\n",
1143			sid);
1144		return -ENOMEM;
1145	}
1146
1147	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1148	arm_smmu_write_strtab_l1_desc(strtab, desc);
1149	return 0;
1150}
1151
1152/* IRQ and event handlers */
1153static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1154{
1155	int i;
1156	struct arm_smmu_device *smmu = dev;
1157	struct arm_smmu_queue *q = &smmu->evtq.q;
1158	u64 evt[EVTQ_ENT_DWORDS];
1159
1160	while (!queue_remove_raw(q, evt)) {
1161		u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
1162
1163		dev_info(smmu->dev, "event 0x%02x received:\n", id);
1164		for (i = 0; i < ARRAY_SIZE(evt); ++i)
1165			dev_info(smmu->dev, "\t0x%016llx\n",
1166				 (unsigned long long)evt[i]);
1167	}
1168
1169	/* Sync our overflow flag, as we believe we're up to speed */
1170	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1171	return IRQ_HANDLED;
1172}
1173
1174static irqreturn_t arm_smmu_evtq_handler(int irq, void *dev)
1175{
1176	irqreturn_t ret = IRQ_WAKE_THREAD;
1177	struct arm_smmu_device *smmu = dev;
1178	struct arm_smmu_queue *q = &smmu->evtq.q;
1179
1180	/*
1181	 * Not much we can do on overflow, so scream and pretend we're
1182	 * trying harder.
1183	 */
1184	if (queue_sync_prod(q) == -EOVERFLOW)
1185		dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1186	else if (queue_empty(q))
1187		ret = IRQ_NONE;
1188
1189	return ret;
1190}
1191
1192static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1193{
1194	struct arm_smmu_device *smmu = dev;
1195	struct arm_smmu_queue *q = &smmu->priq.q;
1196	u64 evt[PRIQ_ENT_DWORDS];
1197
1198	while (!queue_remove_raw(q, evt)) {
1199		u32 sid, ssid;
1200		u16 grpid;
1201		bool ssv, last;
1202
1203		sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
1204		ssv = evt[0] & PRIQ_0_SSID_V;
1205		ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
1206		last = evt[0] & PRIQ_0_PRG_LAST;
1207		grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
1208
1209		dev_info(smmu->dev, "unexpected PRI request received:\n");
1210		dev_info(smmu->dev,
1211			 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1212			 sid, ssid, grpid, last ? "L" : "",
1213			 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1214			 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1215			 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1216			 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1217			 evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
1218
1219		if (last) {
1220			struct arm_smmu_cmdq_ent cmd = {
1221				.opcode			= CMDQ_OP_PRI_RESP,
1222				.substream_valid	= ssv,
1223				.pri			= {
1224					.sid	= sid,
1225					.ssid	= ssid,
1226					.grpid	= grpid,
1227					.resp	= PRI_RESP_DENY,
1228				},
1229			};
1230
1231			arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1232		}
1233	}
1234
1235	/* Sync our overflow flag, as we believe we're up to speed */
1236	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1237	return IRQ_HANDLED;
1238}
1239
1240static irqreturn_t arm_smmu_priq_handler(int irq, void *dev)
1241{
1242	irqreturn_t ret = IRQ_WAKE_THREAD;
1243	struct arm_smmu_device *smmu = dev;
1244	struct arm_smmu_queue *q = &smmu->priq.q;
1245
1246	/* PRIQ overflow indicates a programming error */
1247	if (queue_sync_prod(q) == -EOVERFLOW)
1248		dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1249	else if (queue_empty(q))
1250		ret = IRQ_NONE;
1251
1252	return ret;
1253}
1254
1255static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
1256{
1257	/* We don't actually use CMD_SYNC interrupts for anything */
1258	return IRQ_HANDLED;
1259}
1260
1261static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1262
1263static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1264{
1265	u32 gerror, gerrorn, active;
1266	struct arm_smmu_device *smmu = dev;
1267
1268	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1269	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1270
1271	active = gerror ^ gerrorn;
1272	if (!(active & GERROR_ERR_MASK))
1273		return IRQ_NONE; /* No errors pending */
1274
1275	dev_warn(smmu->dev,
1276		 "unexpected global error reported (0x%08x), this could be serious\n",
1277		 active);
1278
1279	if (active & GERROR_SFM_ERR) {
1280		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1281		arm_smmu_device_disable(smmu);
1282	}
1283
1284	if (active & GERROR_MSI_GERROR_ABT_ERR)
1285		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1286
1287	if (active & GERROR_MSI_PRIQ_ABT_ERR) {
1288		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1289		arm_smmu_priq_handler(irq, smmu->dev);
1290	}
1291
1292	if (active & GERROR_MSI_EVTQ_ABT_ERR) {
1293		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1294		arm_smmu_evtq_handler(irq, smmu->dev);
1295	}
1296
1297	if (active & GERROR_MSI_CMDQ_ABT_ERR) {
1298		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1299		arm_smmu_cmdq_sync_handler(irq, smmu->dev);
1300	}
1301
1302	if (active & GERROR_PRIQ_ABT_ERR)
1303		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1304
1305	if (active & GERROR_EVTQ_ABT_ERR)
1306		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1307
1308	if (active & GERROR_CMDQ_ERR)
1309		arm_smmu_cmdq_skip_err(smmu);
1310
1311	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1312	return IRQ_HANDLED;
1313}
1314
1315/* IO_PGTABLE API */
1316static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1317{
1318	struct arm_smmu_cmdq_ent cmd;
1319
1320	cmd.opcode = CMDQ_OP_CMD_SYNC;
1321	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1322}
1323
1324static void arm_smmu_tlb_sync(void *cookie)
1325{
1326	struct arm_smmu_domain *smmu_domain = cookie;
1327	__arm_smmu_tlb_sync(smmu_domain->smmu);
1328}
1329
1330static void arm_smmu_tlb_inv_context(void *cookie)
1331{
1332	struct arm_smmu_domain *smmu_domain = cookie;
1333	struct arm_smmu_device *smmu = smmu_domain->smmu;
1334	struct arm_smmu_cmdq_ent cmd;
1335
1336	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1337		cmd.opcode	= CMDQ_OP_TLBI_NH_ASID;
1338		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1339		cmd.tlbi.vmid	= 0;
1340	} else {
1341		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1342		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1343	}
1344
1345	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1346	__arm_smmu_tlb_sync(smmu);
1347}
1348
1349static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1350					  size_t granule, bool leaf, void *cookie)
1351{
1352	struct arm_smmu_domain *smmu_domain = cookie;
1353	struct arm_smmu_device *smmu = smmu_domain->smmu;
1354	struct arm_smmu_cmdq_ent cmd = {
1355		.tlbi = {
1356			.leaf	= leaf,
1357			.addr	= iova,
1358		},
1359	};
1360
1361	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1362		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
1363		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1364	} else {
1365		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1366		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1367	}
1368
1369	do {
1370		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1371		cmd.tlbi.addr += granule;
1372	} while (size -= granule);
1373}
1374
1375static struct iommu_gather_ops arm_smmu_gather_ops = {
1376	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1377	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
1378	.tlb_sync	= arm_smmu_tlb_sync,
1379};
1380
1381/* IOMMU API */
1382static bool arm_smmu_capable(enum iommu_cap cap)
1383{
1384	switch (cap) {
1385	case IOMMU_CAP_CACHE_COHERENCY:
1386		return true;
1387	case IOMMU_CAP_INTR_REMAP:
1388		return true; /* MSIs are just memory writes */
1389	case IOMMU_CAP_NOEXEC:
1390		return true;
1391	default:
1392		return false;
1393	}
1394}
1395
1396static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1397{
1398	struct arm_smmu_domain *smmu_domain;
1399
1400	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
1401		return NULL;
1402
1403	/*
1404	 * Allocate the domain and initialise some of its data structures.
1405	 * We can't really do anything meaningful until we've added a
1406	 * master.
1407	 */
1408	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1409	if (!smmu_domain)
1410		return NULL;
1411
1412	if (type == IOMMU_DOMAIN_DMA &&
1413	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1414		kfree(smmu_domain);
1415		return NULL;
1416	}
1417
1418	mutex_init(&smmu_domain->init_mutex);
1419	spin_lock_init(&smmu_domain->pgtbl_lock);
1420	return &smmu_domain->domain;
1421}
1422
1423static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1424{
1425	int idx, size = 1 << span;
1426
1427	do {
1428		idx = find_first_zero_bit(map, size);
1429		if (idx == size)
1430			return -ENOSPC;
1431	} while (test_and_set_bit(idx, map));
1432
1433	return idx;
1434}
1435
1436static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1437{
1438	clear_bit(idx, map);
1439}
1440
1441static void arm_smmu_domain_free(struct iommu_domain *domain)
1442{
1443	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1444	struct arm_smmu_device *smmu = smmu_domain->smmu;
1445
1446	iommu_put_dma_cookie(domain);
1447	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1448
1449	/* Free the CD and ASID, if we allocated them */
1450	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1451		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1452
1453		if (cfg->cdptr) {
1454			dmam_free_coherent(smmu_domain->smmu->dev,
1455					   CTXDESC_CD_DWORDS << 3,
1456					   cfg->cdptr,
1457					   cfg->cdptr_dma);
1458
1459			arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1460		}
1461	} else {
1462		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1463		if (cfg->vmid)
1464			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1465	}
1466
1467	kfree(smmu_domain);
1468}
1469
1470static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1471				       struct io_pgtable_cfg *pgtbl_cfg)
1472{
1473	int ret;
1474	int asid;
1475	struct arm_smmu_device *smmu = smmu_domain->smmu;
1476	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1477
1478	asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1479	if (IS_ERR_VALUE(asid))
1480		return asid;
1481
1482	cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1483					 &cfg->cdptr_dma,
1484					 GFP_KERNEL | __GFP_ZERO);
1485	if (!cfg->cdptr) {
1486		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1487		ret = -ENOMEM;
1488		goto out_free_asid;
1489	}
1490
1491	cfg->cd.asid	= (u16)asid;
1492	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1493	cfg->cd.tcr	= pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1494	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1495	return 0;
1496
1497out_free_asid:
1498	arm_smmu_bitmap_free(smmu->asid_map, asid);
1499	return ret;
1500}
1501
1502static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1503				       struct io_pgtable_cfg *pgtbl_cfg)
1504{
1505	int vmid;
1506	struct arm_smmu_device *smmu = smmu_domain->smmu;
1507	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1508
1509	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1510	if (IS_ERR_VALUE(vmid))
1511		return vmid;
1512
1513	cfg->vmid	= (u16)vmid;
1514	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1515	cfg->vtcr	= pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1516	return 0;
1517}
1518
1519static struct iommu_ops arm_smmu_ops;
1520
1521static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1522{
1523	int ret;
1524	unsigned long ias, oas;
1525	enum io_pgtable_fmt fmt;
1526	struct io_pgtable_cfg pgtbl_cfg;
1527	struct io_pgtable_ops *pgtbl_ops;
1528	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1529				 struct io_pgtable_cfg *);
1530	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1531	struct arm_smmu_device *smmu = smmu_domain->smmu;
1532
1533	/* Restrict the stage to what we can actually support */
1534	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1535		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1536	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1537		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1538
1539	switch (smmu_domain->stage) {
1540	case ARM_SMMU_DOMAIN_S1:
1541		ias = VA_BITS;
1542		oas = smmu->ias;
1543		fmt = ARM_64_LPAE_S1;
1544		finalise_stage_fn = arm_smmu_domain_finalise_s1;
1545		break;
1546	case ARM_SMMU_DOMAIN_NESTED:
1547	case ARM_SMMU_DOMAIN_S2:
1548		ias = smmu->ias;
1549		oas = smmu->oas;
1550		fmt = ARM_64_LPAE_S2;
1551		finalise_stage_fn = arm_smmu_domain_finalise_s2;
1552		break;
1553	default:
1554		return -EINVAL;
1555	}
1556
1557	pgtbl_cfg = (struct io_pgtable_cfg) {
1558		.pgsize_bitmap	= arm_smmu_ops.pgsize_bitmap,
1559		.ias		= ias,
1560		.oas		= oas,
1561		.tlb		= &arm_smmu_gather_ops,
1562		.iommu_dev	= smmu->dev,
1563	};
1564
1565	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1566	if (!pgtbl_ops)
1567		return -ENOMEM;
1568
1569	arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1570	smmu_domain->pgtbl_ops = pgtbl_ops;
1571
1572	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1573	if (IS_ERR_VALUE(ret))
1574		free_io_pgtable_ops(pgtbl_ops);
1575
1576	return ret;
1577}
1578
1579static struct arm_smmu_group *arm_smmu_group_get(struct device *dev)
1580{
1581	struct iommu_group *group;
1582	struct arm_smmu_group *smmu_group;
1583
1584	group = iommu_group_get(dev);
1585	if (!group)
1586		return NULL;
1587
1588	smmu_group = iommu_group_get_iommudata(group);
1589	iommu_group_put(group);
1590	return smmu_group;
1591}
1592
1593static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1594{
1595	__le64 *step;
1596	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1597
1598	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1599		struct arm_smmu_strtab_l1_desc *l1_desc;
1600		int idx;
1601
1602		/* Two-level walk */
1603		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1604		l1_desc = &cfg->l1_desc[idx];
1605		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1606		step = &l1_desc->l2ptr[idx];
1607	} else {
1608		/* Simple linear lookup */
1609		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1610	}
1611
1612	return step;
1613}
1614
1615static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
1616{
1617	int i;
1618	struct arm_smmu_domain *smmu_domain = smmu_group->domain;
1619	struct arm_smmu_strtab_ent *ste = &smmu_group->ste;
1620	struct arm_smmu_device *smmu = smmu_group->smmu;
1621
1622	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1623		ste->s1_cfg = &smmu_domain->s1_cfg;
1624		ste->s2_cfg = NULL;
1625		arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1626	} else {
1627		ste->s1_cfg = NULL;
1628		ste->s2_cfg = &smmu_domain->s2_cfg;
1629	}
1630
1631	for (i = 0; i < smmu_group->num_sids; ++i) {
1632		u32 sid = smmu_group->sids[i];
1633		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1634
1635		arm_smmu_write_strtab_ent(smmu, sid, step, ste);
1636	}
1637
1638	return 0;
1639}
1640
1641static void arm_smmu_detach_dev(struct device *dev)
1642{
1643	struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1644
1645	smmu_group->ste.bypass = true;
1646	if (IS_ERR_VALUE(arm_smmu_install_ste_for_group(smmu_group)))
1647		dev_warn(dev, "failed to install bypass STE\n");
1648
1649	smmu_group->domain = NULL;
1650}
1651
1652static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1653{
1654	int ret = 0;
1655	struct arm_smmu_device *smmu;
1656	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1657	struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1658
1659	if (!smmu_group)
1660		return -ENOENT;
1661
1662	/* Already attached to a different domain? */
1663	if (smmu_group->domain && smmu_group->domain != smmu_domain)
1664		arm_smmu_detach_dev(dev);
1665
1666	smmu = smmu_group->smmu;
1667	mutex_lock(&smmu_domain->init_mutex);
1668
1669	if (!smmu_domain->smmu) {
1670		smmu_domain->smmu = smmu;
1671		ret = arm_smmu_domain_finalise(domain);
1672		if (ret) {
1673			smmu_domain->smmu = NULL;
1674			goto out_unlock;
1675		}
1676	} else if (smmu_domain->smmu != smmu) {
1677		dev_err(dev,
1678			"cannot attach to SMMU %s (upstream of %s)\n",
1679			dev_name(smmu_domain->smmu->dev),
1680			dev_name(smmu->dev));
1681		ret = -ENXIO;
1682		goto out_unlock;
1683	}
1684
1685	/* Group already attached to this domain? */
1686	if (smmu_group->domain)
1687		goto out_unlock;
1688
1689	smmu_group->domain	= smmu_domain;
1690
1691	/*
1692	 * FIXME: This should always be "false" once we have IOMMU-backed
1693	 * DMA ops for all devices behind the SMMU.
1694	 */
1695	smmu_group->ste.bypass	= domain->type == IOMMU_DOMAIN_DMA;
1696
1697	ret = arm_smmu_install_ste_for_group(smmu_group);
1698	if (IS_ERR_VALUE(ret))
1699		smmu_group->domain = NULL;
1700
1701out_unlock:
1702	mutex_unlock(&smmu_domain->init_mutex);
1703	return ret;
1704}
1705
1706static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1707			phys_addr_t paddr, size_t size, int prot)
1708{
1709	int ret;
1710	unsigned long flags;
1711	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1712	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1713
1714	if (!ops)
1715		return -ENODEV;
1716
1717	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1718	ret = ops->map(ops, iova, paddr, size, prot);
1719	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1720	return ret;
1721}
1722
1723static size_t
1724arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1725{
1726	size_t ret;
1727	unsigned long flags;
1728	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1729	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1730
1731	if (!ops)
1732		return 0;
1733
1734	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1735	ret = ops->unmap(ops, iova, size);
1736	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1737	return ret;
1738}
1739
1740static phys_addr_t
1741arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1742{
1743	phys_addr_t ret;
1744	unsigned long flags;
1745	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1746	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1747
1748	if (!ops)
1749		return 0;
1750
1751	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1752	ret = ops->iova_to_phys(ops, iova);
1753	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1754
1755	return ret;
1756}
1757
1758static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *sidp)
1759{
1760	*(u32 *)sidp = alias;
1761	return 0; /* Continue walking */
1762}
1763
1764static void __arm_smmu_release_pci_iommudata(void *data)
1765{
1766	kfree(data);
1767}
1768
1769static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
1770{
1771	struct device_node *of_node;
1772	struct platform_device *smmu_pdev;
1773	struct arm_smmu_device *smmu = NULL;
1774	struct pci_bus *bus = pdev->bus;
1775
1776	/* Walk up to the root bus */
1777	while (!pci_is_root_bus(bus))
1778		bus = bus->parent;
1779
1780	/* Follow the "iommus" phandle from the host controller */
1781	of_node = of_parse_phandle(bus->bridge->parent->of_node, "iommus", 0);
1782	if (!of_node)
1783		return NULL;
1784
1785	/* See if we can find an SMMU corresponding to the phandle */
1786	smmu_pdev = of_find_device_by_node(of_node);
1787	if (smmu_pdev)
1788		smmu = platform_get_drvdata(smmu_pdev);
1789
1790	of_node_put(of_node);
1791	return smmu;
1792}
1793
1794static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1795{
1796	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1797
1798	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1799		limit *= 1UL << STRTAB_SPLIT;
1800
1801	return sid < limit;
1802}
1803
1804static int arm_smmu_add_device(struct device *dev)
1805{
1806	int i, ret;
1807	u32 sid, *sids;
1808	struct pci_dev *pdev;
1809	struct iommu_group *group;
1810	struct arm_smmu_group *smmu_group;
1811	struct arm_smmu_device *smmu;
1812
1813	/* We only support PCI, for now */
1814	if (!dev_is_pci(dev))
1815		return -ENODEV;
1816
1817	pdev = to_pci_dev(dev);
1818	group = iommu_group_get_for_dev(dev);
1819	if (IS_ERR(group))
1820		return PTR_ERR(group);
1821
1822	smmu_group = iommu_group_get_iommudata(group);
1823	if (!smmu_group) {
1824		smmu = arm_smmu_get_for_pci_dev(pdev);
1825		if (!smmu) {
1826			ret = -ENOENT;
1827			goto out_remove_dev;
1828		}
1829
1830		smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL);
1831		if (!smmu_group) {
1832			ret = -ENOMEM;
1833			goto out_remove_dev;
1834		}
1835
1836		smmu_group->ste.valid	= true;
1837		smmu_group->smmu	= smmu;
1838		iommu_group_set_iommudata(group, smmu_group,
1839					  __arm_smmu_release_pci_iommudata);
1840	} else {
1841		smmu = smmu_group->smmu;
1842	}
1843
1844	/* Assume SID == RID until firmware tells us otherwise */
1845	pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
1846	for (i = 0; i < smmu_group->num_sids; ++i) {
1847		/* If we already know about this SID, then we're done */
1848		if (smmu_group->sids[i] == sid)
1849			goto out_put_group;
1850	}
1851
1852	/* Check the SID is in range of the SMMU and our stream table */
1853	if (!arm_smmu_sid_in_range(smmu, sid)) {
1854		ret = -ERANGE;
1855		goto out_remove_dev;
1856	}
1857
1858	/* Ensure l2 strtab is initialised */
1859	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1860		ret = arm_smmu_init_l2_strtab(smmu, sid);
1861		if (ret)
1862			goto out_remove_dev;
1863	}
1864
1865	/* Resize the SID array for the group */
1866	smmu_group->num_sids++;
1867	sids = krealloc(smmu_group->sids, smmu_group->num_sids * sizeof(*sids),
1868			GFP_KERNEL);
1869	if (!sids) {
1870		smmu_group->num_sids--;
1871		ret = -ENOMEM;
1872		goto out_remove_dev;
1873	}
1874
1875	/* Add the new SID */
1876	sids[smmu_group->num_sids - 1] = sid;
1877	smmu_group->sids = sids;
1878
1879out_put_group:
1880	iommu_group_put(group);
1881	return 0;
1882
1883out_remove_dev:
1884	iommu_group_remove_device(dev);
1885	iommu_group_put(group);
1886	return ret;
1887}
1888
1889static void arm_smmu_remove_device(struct device *dev)
1890{
1891	iommu_group_remove_device(dev);
1892}
1893
1894static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1895				    enum iommu_attr attr, void *data)
1896{
1897	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1898
1899	switch (attr) {
1900	case DOMAIN_ATTR_NESTING:
1901		*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1902		return 0;
1903	default:
1904		return -ENODEV;
1905	}
1906}
1907
1908static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1909				    enum iommu_attr attr, void *data)
1910{
1911	int ret = 0;
1912	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1913
1914	mutex_lock(&smmu_domain->init_mutex);
1915
1916	switch (attr) {
1917	case DOMAIN_ATTR_NESTING:
1918		if (smmu_domain->smmu) {
1919			ret = -EPERM;
1920			goto out_unlock;
1921		}
1922
1923		if (*(int *)data)
1924			smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1925		else
1926			smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1927
1928		break;
1929	default:
1930		ret = -ENODEV;
1931	}
1932
1933out_unlock:
1934	mutex_unlock(&smmu_domain->init_mutex);
1935	return ret;
1936}
1937
1938static struct iommu_ops arm_smmu_ops = {
1939	.capable		= arm_smmu_capable,
1940	.domain_alloc		= arm_smmu_domain_alloc,
1941	.domain_free		= arm_smmu_domain_free,
1942	.attach_dev		= arm_smmu_attach_dev,
1943	.map			= arm_smmu_map,
1944	.unmap			= arm_smmu_unmap,
1945	.iova_to_phys		= arm_smmu_iova_to_phys,
1946	.add_device		= arm_smmu_add_device,
1947	.remove_device		= arm_smmu_remove_device,
1948	.device_group		= pci_device_group,
1949	.domain_get_attr	= arm_smmu_domain_get_attr,
1950	.domain_set_attr	= arm_smmu_domain_set_attr,
1951	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1952};
1953
1954/* Probing and initialisation functions */
1955static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
1956				   struct arm_smmu_queue *q,
1957				   unsigned long prod_off,
1958				   unsigned long cons_off,
1959				   size_t dwords)
1960{
1961	size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
1962
1963	q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
1964	if (!q->base) {
1965		dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
1966			qsz);
1967		return -ENOMEM;
1968	}
1969
1970	q->prod_reg	= smmu->base + prod_off;
1971	q->cons_reg	= smmu->base + cons_off;
1972	q->ent_dwords	= dwords;
1973
1974	q->q_base  = Q_BASE_RWA;
1975	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK << Q_BASE_ADDR_SHIFT;
1976	q->q_base |= (q->max_n_shift & Q_BASE_LOG2SIZE_MASK)
1977		     << Q_BASE_LOG2SIZE_SHIFT;
1978
1979	q->prod = q->cons = 0;
1980	return 0;
1981}
1982
1983static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
1984{
1985	int ret;
1986
1987	/* cmdq */
1988	spin_lock_init(&smmu->cmdq.lock);
1989	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
1990				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
1991	if (ret)
1992		return ret;
1993
1994	/* evtq */
1995	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
1996				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
1997	if (ret)
1998		return ret;
1999
2000	/* priq */
2001	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2002		return 0;
2003
2004	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2005				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2006}
2007
2008static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2009{
2010	unsigned int i;
2011	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2012	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2013	void *strtab = smmu->strtab_cfg.strtab;
2014
2015	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2016	if (!cfg->l1_desc) {
2017		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2018		return -ENOMEM;
2019	}
2020
2021	for (i = 0; i < cfg->num_l1_ents; ++i) {
2022		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2023		strtab += STRTAB_L1_DESC_DWORDS << 3;
2024	}
2025
2026	return 0;
2027}
2028
2029static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2030{
2031	void *strtab;
2032	u64 reg;
2033	u32 size, l1size;
2034	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2035
2036	/*
2037	 * If we can resolve everything with a single L2 table, then we
2038	 * just need a single L1 descriptor. Otherwise, calculate the L1
2039	 * size, capped to the SIDSIZE.
2040	 */
2041	if (smmu->sid_bits < STRTAB_SPLIT) {
2042		size = 0;
2043	} else {
2044		size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2045		size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2046	}
2047	cfg->num_l1_ents = 1 << size;
2048
2049	size += STRTAB_SPLIT;
2050	if (size < smmu->sid_bits)
2051		dev_warn(smmu->dev,
2052			 "2-level strtab only covers %u/%u bits of SID\n",
2053			 size, smmu->sid_bits);
2054
2055	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2056	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2057				     GFP_KERNEL | __GFP_ZERO);
2058	if (!strtab) {
2059		dev_err(smmu->dev,
2060			"failed to allocate l1 stream table (%u bytes)\n",
2061			size);
2062		return -ENOMEM;
2063	}
2064	cfg->strtab = strtab;
2065
2066	/* Configure strtab_base_cfg for 2 levels */
2067	reg  = STRTAB_BASE_CFG_FMT_2LVL;
2068	reg |= (size & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2069		<< STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2070	reg |= (STRTAB_SPLIT & STRTAB_BASE_CFG_SPLIT_MASK)
2071		<< STRTAB_BASE_CFG_SPLIT_SHIFT;
2072	cfg->strtab_base_cfg = reg;
2073
2074	return arm_smmu_init_l1_strtab(smmu);
2075}
2076
2077static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2078{
2079	void *strtab;
2080	u64 reg;
2081	u32 size;
2082	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2083
2084	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2085	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2086				     GFP_KERNEL | __GFP_ZERO);
2087	if (!strtab) {
2088		dev_err(smmu->dev,
2089			"failed to allocate linear stream table (%u bytes)\n",
2090			size);
2091		return -ENOMEM;
2092	}
2093	cfg->strtab = strtab;
2094	cfg->num_l1_ents = 1 << smmu->sid_bits;
2095
2096	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2097	reg  = STRTAB_BASE_CFG_FMT_LINEAR;
2098	reg |= (smmu->sid_bits & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2099		<< STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2100	cfg->strtab_base_cfg = reg;
2101
2102	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2103	return 0;
2104}
2105
2106static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2107{
2108	u64 reg;
2109	int ret;
2110
2111	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2112		ret = arm_smmu_init_strtab_2lvl(smmu);
2113	else
2114		ret = arm_smmu_init_strtab_linear(smmu);
2115
2116	if (ret)
2117		return ret;
2118
2119	/* Set the strtab base address */
2120	reg  = smmu->strtab_cfg.strtab_dma &
2121	       STRTAB_BASE_ADDR_MASK << STRTAB_BASE_ADDR_SHIFT;
2122	reg |= STRTAB_BASE_RA;
2123	smmu->strtab_cfg.strtab_base = reg;
2124
2125	/* Allocate the first VMID for stage-2 bypass STEs */
2126	set_bit(0, smmu->vmid_map);
2127	return 0;
2128}
2129
2130static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2131{
2132	int ret;
2133
2134	ret = arm_smmu_init_queues(smmu);
2135	if (ret)
2136		return ret;
2137
2138	return arm_smmu_init_strtab(smmu);
2139}
2140
2141static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2142				   unsigned int reg_off, unsigned int ack_off)
2143{
2144	u32 reg;
2145
2146	writel_relaxed(val, smmu->base + reg_off);
2147	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2148					  1, ARM_SMMU_POLL_TIMEOUT_US);
2149}
2150
2151static void arm_smmu_free_msis(void *data)
2152{
2153	struct device *dev = data;
2154	platform_msi_domain_free_irqs(dev);
2155}
2156
2157static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2158{
2159	phys_addr_t doorbell;
2160	struct device *dev = msi_desc_to_dev(desc);
2161	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2162	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2163
2164	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2165	doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
2166
2167	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2168	writel_relaxed(msg->data, smmu->base + cfg[1]);
2169	writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2170}
2171
2172static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2173{
2174	struct msi_desc *desc;
2175	int ret, nvec = ARM_SMMU_MAX_MSIS;
2176	struct device *dev = smmu->dev;
2177
2178	/* Clear the MSI address regs */
2179	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2180	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2181
2182	if (smmu->features & ARM_SMMU_FEAT_PRI)
2183		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2184	else
2185		nvec--;
2186
2187	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2188		return;
2189
2190	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2191	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2192	if (ret) {
2193		dev_warn(dev, "failed to allocate MSIs\n");
2194		return;
2195	}
2196
2197	for_each_msi_entry(desc, dev) {
2198		switch (desc->platform.msi_index) {
2199		case EVTQ_MSI_INDEX:
2200			smmu->evtq.q.irq = desc->irq;
2201			break;
2202		case GERROR_MSI_INDEX:
2203			smmu->gerr_irq = desc->irq;
2204			break;
2205		case PRIQ_MSI_INDEX:
2206			smmu->priq.q.irq = desc->irq;
2207			break;
2208		default:	/* Unknown */
2209			continue;
2210		}
2211	}
2212
2213	/* Add callback to free MSIs on teardown */
2214	devm_add_action(dev, arm_smmu_free_msis, dev);
2215}
2216
2217static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2218{
2219	int ret, irq;
2220	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2221
2222	/* Disable IRQs first */
2223	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2224				      ARM_SMMU_IRQ_CTRLACK);
2225	if (ret) {
2226		dev_err(smmu->dev, "failed to disable irqs\n");
2227		return ret;
2228	}
2229
2230	arm_smmu_setup_msis(smmu);
2231
2232	/* Request interrupt lines */
2233	irq = smmu->evtq.q.irq;
2234	if (irq) {
2235		ret = devm_request_threaded_irq(smmu->dev, irq,
2236						arm_smmu_evtq_handler,
2237						arm_smmu_evtq_thread,
2238						0, "arm-smmu-v3-evtq", smmu);
2239		if (IS_ERR_VALUE(ret))
2240			dev_warn(smmu->dev, "failed to enable evtq irq\n");
2241	}
2242
2243	irq = smmu->cmdq.q.irq;
2244	if (irq) {
2245		ret = devm_request_irq(smmu->dev, irq,
2246				       arm_smmu_cmdq_sync_handler, 0,
2247				       "arm-smmu-v3-cmdq-sync", smmu);
2248		if (IS_ERR_VALUE(ret))
2249			dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
2250	}
2251
2252	irq = smmu->gerr_irq;
2253	if (irq) {
2254		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2255				       0, "arm-smmu-v3-gerror", smmu);
2256		if (IS_ERR_VALUE(ret))
2257			dev_warn(smmu->dev, "failed to enable gerror irq\n");
2258	}
2259
2260	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2261		irq = smmu->priq.q.irq;
2262		if (irq) {
2263			ret = devm_request_threaded_irq(smmu->dev, irq,
2264							arm_smmu_priq_handler,
2265							arm_smmu_priq_thread,
2266							0, "arm-smmu-v3-priq",
2267							smmu);
2268			if (IS_ERR_VALUE(ret))
2269				dev_warn(smmu->dev,
2270					 "failed to enable priq irq\n");
2271			else
2272				irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2273		}
2274	}
2275
2276	/* Enable interrupt generation on the SMMU */
2277	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2278				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2279	if (ret)
2280		dev_warn(smmu->dev, "failed to enable irqs\n");
2281
2282	return 0;
2283}
2284
2285static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2286{
2287	int ret;
2288
2289	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2290	if (ret)
2291		dev_err(smmu->dev, "failed to clear cr0\n");
2292
2293	return ret;
2294}
2295
2296static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
2297{
2298	int ret;
2299	u32 reg, enables;
2300	struct arm_smmu_cmdq_ent cmd;
2301
2302	/* Clear CR0 and sync (disables SMMU and queue processing) */
2303	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2304	if (reg & CR0_SMMUEN)
2305		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2306
2307	ret = arm_smmu_device_disable(smmu);
2308	if (ret)
2309		return ret;
2310
2311	/* CR1 (table and queue memory attributes) */
2312	reg = (CR1_SH_ISH << CR1_TABLE_SH_SHIFT) |
2313	      (CR1_CACHE_WB << CR1_TABLE_OC_SHIFT) |
2314	      (CR1_CACHE_WB << CR1_TABLE_IC_SHIFT) |
2315	      (CR1_SH_ISH << CR1_QUEUE_SH_SHIFT) |
2316	      (CR1_CACHE_WB << CR1_QUEUE_OC_SHIFT) |
2317	      (CR1_CACHE_WB << CR1_QUEUE_IC_SHIFT);
2318	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2319
2320	/* CR2 (random crap) */
2321	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2322	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2323
2324	/* Stream table */
2325	writeq_relaxed(smmu->strtab_cfg.strtab_base,
2326		       smmu->base + ARM_SMMU_STRTAB_BASE);
2327	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2328		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2329
2330	/* Command queue */
2331	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2332	writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2333	writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2334
2335	enables = CR0_CMDQEN;
2336	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2337				      ARM_SMMU_CR0ACK);
2338	if (ret) {
2339		dev_err(smmu->dev, "failed to enable command queue\n");
2340		return ret;
2341	}
2342
2343	/* Invalidate any cached configuration */
2344	cmd.opcode = CMDQ_OP_CFGI_ALL;
2345	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2346	cmd.opcode = CMDQ_OP_CMD_SYNC;
2347	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2348
2349	/* Invalidate any stale TLB entries */
2350	if (smmu->features & ARM_SMMU_FEAT_HYP) {
2351		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2352		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2353	}
2354
2355	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2356	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2357	cmd.opcode = CMDQ_OP_CMD_SYNC;
2358	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2359
2360	/* Event queue */
2361	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2362	writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD);
2363	writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS);
2364
2365	enables |= CR0_EVTQEN;
2366	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2367				      ARM_SMMU_CR0ACK);
2368	if (ret) {
2369		dev_err(smmu->dev, "failed to enable event queue\n");
2370		return ret;
2371	}
2372
2373	/* PRI queue */
2374	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2375		writeq_relaxed(smmu->priq.q.q_base,
2376			       smmu->base + ARM_SMMU_PRIQ_BASE);
2377		writel_relaxed(smmu->priq.q.prod,
2378			       smmu->base + ARM_SMMU_PRIQ_PROD);
2379		writel_relaxed(smmu->priq.q.cons,
2380			       smmu->base + ARM_SMMU_PRIQ_CONS);
2381
2382		enables |= CR0_PRIQEN;
2383		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2384					      ARM_SMMU_CR0ACK);
2385		if (ret) {
2386			dev_err(smmu->dev, "failed to enable PRI queue\n");
2387			return ret;
2388		}
2389	}
2390
2391	ret = arm_smmu_setup_irqs(smmu);
2392	if (ret) {
2393		dev_err(smmu->dev, "failed to setup irqs\n");
2394		return ret;
2395	}
2396
2397	/* Enable the SMMU interface */
2398	enables |= CR0_SMMUEN;
2399	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2400				      ARM_SMMU_CR0ACK);
2401	if (ret) {
2402		dev_err(smmu->dev, "failed to enable SMMU interface\n");
2403		return ret;
2404	}
2405
2406	return 0;
2407}
2408
2409static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
2410{
2411	u32 reg;
2412	bool coherent;
2413	unsigned long pgsize_bitmap = 0;
2414
2415	/* IDR0 */
2416	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2417
2418	/* 2-level structures */
2419	if ((reg & IDR0_ST_LVL_MASK << IDR0_ST_LVL_SHIFT) == IDR0_ST_LVL_2LVL)
2420		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2421
2422	if (reg & IDR0_CD2L)
2423		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2424
2425	/*
2426	 * Translation table endianness.
2427	 * We currently require the same endianness as the CPU, but this
2428	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
2429	 */
2430	switch (reg & IDR0_TTENDIAN_MASK << IDR0_TTENDIAN_SHIFT) {
2431	case IDR0_TTENDIAN_MIXED:
2432		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2433		break;
2434#ifdef __BIG_ENDIAN
2435	case IDR0_TTENDIAN_BE:
2436		smmu->features |= ARM_SMMU_FEAT_TT_BE;
2437		break;
2438#else
2439	case IDR0_TTENDIAN_LE:
2440		smmu->features |= ARM_SMMU_FEAT_TT_LE;
2441		break;
2442#endif
2443	default:
2444		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2445		return -ENXIO;
2446	}
2447
2448	/* Boolean feature flags */
2449	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2450		smmu->features |= ARM_SMMU_FEAT_PRI;
2451
2452	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2453		smmu->features |= ARM_SMMU_FEAT_ATS;
2454
2455	if (reg & IDR0_SEV)
2456		smmu->features |= ARM_SMMU_FEAT_SEV;
2457
2458	if (reg & IDR0_MSI)
2459		smmu->features |= ARM_SMMU_FEAT_MSI;
2460
2461	if (reg & IDR0_HYP)
2462		smmu->features |= ARM_SMMU_FEAT_HYP;
2463
2464	/*
2465	 * The dma-coherent property is used in preference to the ID
2466	 * register, but warn on mismatch.
2467	 */
2468	coherent = of_dma_is_coherent(smmu->dev->of_node);
2469	if (coherent)
2470		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2471
2472	if (!!(reg & IDR0_COHACC) != coherent)
2473		dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n",
2474			 coherent ? "true" : "false");
2475
2476	switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
2477	case IDR0_STALL_MODEL_STALL:
2478		/* Fallthrough */
2479	case IDR0_STALL_MODEL_FORCE:
2480		smmu->features |= ARM_SMMU_FEAT_STALLS;
2481	}
2482
2483	if (reg & IDR0_S1P)
2484		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2485
2486	if (reg & IDR0_S2P)
2487		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2488
2489	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2490		dev_err(smmu->dev, "no translation support!\n");
2491		return -ENXIO;
2492	}
2493
2494	/* We only support the AArch64 table format at present */
2495	switch (reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) {
2496	case IDR0_TTF_AARCH32_64:
2497		smmu->ias = 40;
2498		/* Fallthrough */
2499	case IDR0_TTF_AARCH64:
2500		break;
2501	default:
2502		dev_err(smmu->dev, "AArch64 table format not supported!\n");
2503		return -ENXIO;
2504	}
2505
2506	/* ASID/VMID sizes */
2507	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2508	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2509
2510	/* IDR1 */
2511	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2512	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2513		dev_err(smmu->dev, "embedded implementation not supported\n");
2514		return -ENXIO;
2515	}
2516
2517	/* Queue sizes, capped at 4k */
2518	smmu->cmdq.q.max_n_shift = min((u32)CMDQ_MAX_SZ_SHIFT,
2519				       reg >> IDR1_CMDQ_SHIFT & IDR1_CMDQ_MASK);
2520	if (!smmu->cmdq.q.max_n_shift) {
2521		/* Odd alignment restrictions on the base, so ignore for now */
2522		dev_err(smmu->dev, "unit-length command queue not supported\n");
2523		return -ENXIO;
2524	}
2525
2526	smmu->evtq.q.max_n_shift = min((u32)EVTQ_MAX_SZ_SHIFT,
2527				       reg >> IDR1_EVTQ_SHIFT & IDR1_EVTQ_MASK);
2528	smmu->priq.q.max_n_shift = min((u32)PRIQ_MAX_SZ_SHIFT,
2529				       reg >> IDR1_PRIQ_SHIFT & IDR1_PRIQ_MASK);
2530
2531	/* SID/SSID sizes */
2532	smmu->ssid_bits = reg >> IDR1_SSID_SHIFT & IDR1_SSID_MASK;
2533	smmu->sid_bits = reg >> IDR1_SID_SHIFT & IDR1_SID_MASK;
2534
2535	/* IDR5 */
2536	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2537
2538	/* Maximum number of outstanding stalls */
2539	smmu->evtq.max_stalls = reg >> IDR5_STALL_MAX_SHIFT
2540				& IDR5_STALL_MAX_MASK;
2541
2542	/* Page sizes */
2543	if (reg & IDR5_GRAN64K)
2544		pgsize_bitmap |= SZ_64K | SZ_512M;
2545	if (reg & IDR5_GRAN16K)
2546		pgsize_bitmap |= SZ_16K | SZ_32M;
2547	if (reg & IDR5_GRAN4K)
2548		pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2549
2550	arm_smmu_ops.pgsize_bitmap &= pgsize_bitmap;
2551
2552	/* Output address size */
2553	switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
2554	case IDR5_OAS_32_BIT:
2555		smmu->oas = 32;
2556		break;
2557	case IDR5_OAS_36_BIT:
2558		smmu->oas = 36;
2559		break;
2560	case IDR5_OAS_40_BIT:
2561		smmu->oas = 40;
2562		break;
2563	case IDR5_OAS_42_BIT:
2564		smmu->oas = 42;
2565		break;
2566	case IDR5_OAS_44_BIT:
2567		smmu->oas = 44;
2568		break;
2569	default:
2570		dev_info(smmu->dev,
2571			"unknown output address size. Truncating to 48-bit\n");
2572		/* Fallthrough */
2573	case IDR5_OAS_48_BIT:
2574		smmu->oas = 48;
2575	}
2576
2577	/* Set the DMA mask for our table walker */
2578	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2579		dev_warn(smmu->dev,
2580			 "failed to set DMA mask for table walker\n");
2581
2582	smmu->ias = max(smmu->ias, smmu->oas);
2583
2584	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2585		 smmu->ias, smmu->oas, smmu->features);
2586	return 0;
2587}
2588
2589static int arm_smmu_device_dt_probe(struct platform_device *pdev)
2590{
2591	int irq, ret;
2592	struct resource *res;
2593	struct arm_smmu_device *smmu;
2594	struct device *dev = &pdev->dev;
2595
2596	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2597	if (!smmu) {
2598		dev_err(dev, "failed to allocate arm_smmu_device\n");
2599		return -ENOMEM;
2600	}
2601	smmu->dev = dev;
2602
2603	/* Base address */
2604	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2605	if (resource_size(res) + 1 < SZ_128K) {
2606		dev_err(dev, "MMIO region too small (%pr)\n", res);
2607		return -EINVAL;
2608	}
2609
2610	smmu->base = devm_ioremap_resource(dev, res);
2611	if (IS_ERR(smmu->base))
2612		return PTR_ERR(smmu->base);
2613
2614	/* Interrupt lines */
2615	irq = platform_get_irq_byname(pdev, "eventq");
2616	if (irq > 0)
2617		smmu->evtq.q.irq = irq;
2618
2619	irq = platform_get_irq_byname(pdev, "priq");
2620	if (irq > 0)
2621		smmu->priq.q.irq = irq;
2622
2623	irq = platform_get_irq_byname(pdev, "cmdq-sync");
2624	if (irq > 0)
2625		smmu->cmdq.q.irq = irq;
2626
2627	irq = platform_get_irq_byname(pdev, "gerror");
2628	if (irq > 0)
2629		smmu->gerr_irq = irq;
2630
2631	parse_driver_options(smmu);
2632
2633	/* Probe the h/w */
2634	ret = arm_smmu_device_probe(smmu);
2635	if (ret)
2636		return ret;
2637
2638	/* Initialise in-memory data structures */
2639	ret = arm_smmu_init_structures(smmu);
2640	if (ret)
2641		return ret;
2642
2643	/* Record our private device structure */
2644	platform_set_drvdata(pdev, smmu);
2645
2646	/* Reset the device */
2647	return arm_smmu_device_reset(smmu);
2648}
2649
2650static int arm_smmu_device_remove(struct platform_device *pdev)
2651{
2652	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2653
2654	arm_smmu_device_disable(smmu);
2655	return 0;
2656}
2657
2658static struct of_device_id arm_smmu_of_match[] = {
2659	{ .compatible = "arm,smmu-v3", },
2660	{ },
2661};
2662MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2663
2664static struct platform_driver arm_smmu_driver = {
2665	.driver	= {
2666		.name		= "arm-smmu-v3",
2667		.of_match_table	= of_match_ptr(arm_smmu_of_match),
2668	},
2669	.probe	= arm_smmu_device_dt_probe,
2670	.remove	= arm_smmu_device_remove,
2671};
2672
2673static int __init arm_smmu_init(void)
2674{
2675	struct device_node *np;
2676	int ret;
2677
2678	np = of_find_matching_node(NULL, arm_smmu_of_match);
2679	if (!np)
2680		return 0;
2681
2682	of_node_put(np);
2683
2684	ret = platform_driver_register(&arm_smmu_driver);
2685	if (ret)
2686		return ret;
2687
2688	return bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2689}
2690
2691static void __exit arm_smmu_exit(void)
2692{
2693	return platform_driver_unregister(&arm_smmu_driver);
2694}
2695
2696subsys_initcall(arm_smmu_init);
2697module_exit(arm_smmu_exit);
2698
2699MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2700MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2701MODULE_LICENSE("GPL v2");