Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright (C) 2012 ARM Ltd.
   3 * Author: Marc Zyngier <marc.zyngier@arm.com>
   4 *
   5 * This program is free software; you can redistribute it and/or modify
   6 * it under the terms of the GNU General Public License version 2 as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17 */
  18
  19#include <linux/cpu.h>
  20#include <linux/kvm.h>
  21#include <linux/kvm_host.h>
  22#include <linux/interrupt.h>
  23#include <linux/io.h>
  24#include <linux/of.h>
  25#include <linux/of_address.h>
  26#include <linux/of_irq.h>
  27#include <linux/uaccess.h>
  28
  29#include <linux/irqchip/arm-gic.h>
  30
  31#include <asm/kvm_emulate.h>
  32#include <asm/kvm_arm.h>
  33#include <asm/kvm_mmu.h>
  34
  35/*
  36 * How the whole thing works (courtesy of Christoffer Dall):
  37 *
  38 * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
  39 *   something is pending
  40 * - VGIC pending interrupts are stored on the vgic.irq_state vgic
  41 *   bitmap (this bitmap is updated by both user land ioctls and guest
  42 *   mmio ops, and other in-kernel peripherals such as the
  43 *   arch. timers) and indicate the 'wire' state.
  44 * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
  45 *   recalculated
  46 * - To calculate the oracle, we need info for each cpu from
  47 *   compute_pending_for_cpu, which considers:
  48 *   - PPI: dist->irq_state & dist->irq_enable
  49 *   - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target
  50 *   - irq_spi_target is a 'formatted' version of the GICD_ICFGR
  51 *     registers, stored on each vcpu. We only keep one bit of
  52 *     information per interrupt, making sure that only one vcpu can
  53 *     accept the interrupt.
  54 * - The same is true when injecting an interrupt, except that we only
  55 *   consider a single interrupt at a time. The irq_spi_cpu array
  56 *   contains the target CPU for each SPI.
  57 *
  58 * The handling of level interrupts adds some extra complexity. We
  59 * need to track when the interrupt has been EOIed, so we can sample
  60 * the 'line' again. This is achieved as such:
  61 *
  62 * - When a level interrupt is moved onto a vcpu, the corresponding
  63 *   bit in irq_active is set. As long as this bit is set, the line
  64 *   will be ignored for further interrupts. The interrupt is injected
  65 *   into the vcpu with the GICH_LR_EOI bit set (generate a
  66 *   maintenance interrupt on EOI).
  67 * - When the interrupt is EOIed, the maintenance interrupt fires,
  68 *   and clears the corresponding bit in irq_active. This allow the
  69 *   interrupt line to be sampled again.
  70 */
  71
  72#define VGIC_ADDR_UNDEF		(-1)
  73#define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
  74
  75#define PRODUCT_ID_KVM		0x4b	/* ASCII code K */
  76#define IMPLEMENTER_ARM		0x43b
  77#define GICC_ARCH_VERSION_V2	0x2
  78
  79/* Physical address of vgic virtual cpu interface */
  80static phys_addr_t vgic_vcpu_base;
  81
  82/* Virtual control interface base address */
  83static void __iomem *vgic_vctrl_base;
  84
  85static struct device_node *vgic_node;
  86
  87#define ACCESS_READ_VALUE	(1 << 0)
  88#define ACCESS_READ_RAZ		(0 << 0)
  89#define ACCESS_READ_MASK(x)	((x) & (1 << 0))
  90#define ACCESS_WRITE_IGNORED	(0 << 1)
  91#define ACCESS_WRITE_SETBIT	(1 << 1)
  92#define ACCESS_WRITE_CLEARBIT	(2 << 1)
  93#define ACCESS_WRITE_VALUE	(3 << 1)
  94#define ACCESS_WRITE_MASK(x)	((x) & (3 << 1))
  95
  96static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
  97static void vgic_update_state(struct kvm *kvm);
  98static void vgic_kick_vcpus(struct kvm *kvm);
  99static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
 100static u32 vgic_nr_lr;
 101
 102static unsigned int vgic_maint_irq;
 103
 104static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
 105				int cpuid, u32 offset)
 106{
 107	offset >>= 2;
 108	if (!offset)
 109		return x->percpu[cpuid].reg;
 110	else
 111		return x->shared.reg + offset - 1;
 112}
 113
 114static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
 115				   int cpuid, int irq)
 116{
 117	if (irq < VGIC_NR_PRIVATE_IRQS)
 118		return test_bit(irq, x->percpu[cpuid].reg_ul);
 119
 120	return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul);
 121}
 122
 123static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
 124				    int irq, int val)
 125{
 126	unsigned long *reg;
 127
 128	if (irq < VGIC_NR_PRIVATE_IRQS) {
 129		reg = x->percpu[cpuid].reg_ul;
 130	} else {
 131		reg =  x->shared.reg_ul;
 132		irq -= VGIC_NR_PRIVATE_IRQS;
 133	}
 134
 135	if (val)
 136		set_bit(irq, reg);
 137	else
 138		clear_bit(irq, reg);
 139}
 140
 141static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
 142{
 143	if (unlikely(cpuid >= VGIC_MAX_CPUS))
 144		return NULL;
 145	return x->percpu[cpuid].reg_ul;
 146}
 147
 148static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
 149{
 150	return x->shared.reg_ul;
 151}
 152
 153static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
 154{
 155	offset >>= 2;
 156	BUG_ON(offset > (VGIC_NR_IRQS / 4));
 157	if (offset < 8)
 158		return x->percpu[cpuid] + offset;
 159	else
 160		return x->shared + offset - 8;
 161}
 162
 163#define VGIC_CFG_LEVEL	0
 164#define VGIC_CFG_EDGE	1
 165
 166static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq)
 167{
 168	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 169	int irq_val;
 170
 171	irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq);
 172	return irq_val == VGIC_CFG_EDGE;
 173}
 174
 175static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
 176{
 177	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 178
 179	return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
 180}
 181
 182static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
 183{
 184	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 185
 186	return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
 187}
 188
 189static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
 190{
 191	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 192
 193	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
 194}
 195
 196static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
 197{
 198	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 199
 200	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
 201}
 202
 203static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
 204{
 205	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 206
 207	return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq);
 208}
 209
 210static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
 211{
 212	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 213
 214	vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1);
 215}
 216
 217static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq)
 218{
 219	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 220
 221	vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0);
 222}
 223
 224static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
 225{
 226	if (irq < VGIC_NR_PRIVATE_IRQS)
 227		set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
 228	else
 229		set_bit(irq - VGIC_NR_PRIVATE_IRQS,
 230			vcpu->arch.vgic_cpu.pending_shared);
 231}
 232
 233static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
 234{
 235	if (irq < VGIC_NR_PRIVATE_IRQS)
 236		clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
 237	else
 238		clear_bit(irq - VGIC_NR_PRIVATE_IRQS,
 239			  vcpu->arch.vgic_cpu.pending_shared);
 240}
 241
 242static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
 243{
 244	return *((u32 *)mmio->data) & mask;
 245}
 246
 247static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
 248{
 249	*((u32 *)mmio->data) = value & mask;
 250}
 251
 252/**
 253 * vgic_reg_access - access vgic register
 254 * @mmio:   pointer to the data describing the mmio access
 255 * @reg:    pointer to the virtual backing of vgic distributor data
 256 * @offset: least significant 2 bits used for word offset
 257 * @mode:   ACCESS_ mode (see defines above)
 258 *
 259 * Helper to make vgic register access easier using one of the access
 260 * modes defined for vgic register access
 261 * (read,raz,write-ignored,setbit,clearbit,write)
 262 */
 263static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
 264			    phys_addr_t offset, int mode)
 265{
 266	int word_offset = (offset & 3) * 8;
 267	u32 mask = (1UL << (mmio->len * 8)) - 1;
 268	u32 regval;
 269
 270	/*
 271	 * Any alignment fault should have been delivered to the guest
 272	 * directly (ARM ARM B3.12.7 "Prioritization of aborts").
 273	 */
 274
 275	if (reg) {
 276		regval = *reg;
 277	} else {
 278		BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED));
 279		regval = 0;
 280	}
 281
 282	if (mmio->is_write) {
 283		u32 data = mmio_data_read(mmio, mask) << word_offset;
 284		switch (ACCESS_WRITE_MASK(mode)) {
 285		case ACCESS_WRITE_IGNORED:
 286			return;
 287
 288		case ACCESS_WRITE_SETBIT:
 289			regval |= data;
 290			break;
 291
 292		case ACCESS_WRITE_CLEARBIT:
 293			regval &= ~data;
 294			break;
 295
 296		case ACCESS_WRITE_VALUE:
 297			regval = (regval & ~(mask << word_offset)) | data;
 298			break;
 299		}
 300		*reg = regval;
 301	} else {
 302		switch (ACCESS_READ_MASK(mode)) {
 303		case ACCESS_READ_RAZ:
 304			regval = 0;
 305			/* fall through */
 306
 307		case ACCESS_READ_VALUE:
 308			mmio_data_write(mmio, mask, regval >> word_offset);
 309		}
 310	}
 311}
 312
 313static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
 314			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
 315{
 316	u32 reg;
 317	u32 word_offset = offset & 3;
 318
 319	switch (offset & ~3) {
 320	case 0:			/* GICD_CTLR */
 321		reg = vcpu->kvm->arch.vgic.enabled;
 322		vgic_reg_access(mmio, &reg, word_offset,
 323				ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
 324		if (mmio->is_write) {
 325			vcpu->kvm->arch.vgic.enabled = reg & 1;
 326			vgic_update_state(vcpu->kvm);
 327			return true;
 328		}
 329		break;
 330
 331	case 4:			/* GICD_TYPER */
 332		reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
 333		reg |= (VGIC_NR_IRQS >> 5) - 1;
 334		vgic_reg_access(mmio, &reg, word_offset,
 335				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
 336		break;
 337
 338	case 8:			/* GICD_IIDR */
 339		reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
 340		vgic_reg_access(mmio, &reg, word_offset,
 341				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
 342		break;
 343	}
 344
 345	return false;
 346}
 347
 348static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu,
 349			       struct kvm_exit_mmio *mmio, phys_addr_t offset)
 350{
 351	vgic_reg_access(mmio, NULL, offset,
 352			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
 353	return false;
 354}
 355
 356static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
 357				       struct kvm_exit_mmio *mmio,
 358				       phys_addr_t offset)
 359{
 360	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
 361				       vcpu->vcpu_id, offset);
 362	vgic_reg_access(mmio, reg, offset,
 363			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
 364	if (mmio->is_write) {
 365		vgic_update_state(vcpu->kvm);
 366		return true;
 367	}
 368
 369	return false;
 370}
 371
 372static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
 373					 struct kvm_exit_mmio *mmio,
 374					 phys_addr_t offset)
 375{
 376	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
 377				       vcpu->vcpu_id, offset);
 378	vgic_reg_access(mmio, reg, offset,
 379			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
 380	if (mmio->is_write) {
 381		if (offset < 4) /* Force SGI enabled */
 382			*reg |= 0xffff;
 383		vgic_retire_disabled_irqs(vcpu);
 384		vgic_update_state(vcpu->kvm);
 385		return true;
 386	}
 387
 388	return false;
 389}
 390
 391static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 392					struct kvm_exit_mmio *mmio,
 393					phys_addr_t offset)
 394{
 395	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
 396				       vcpu->vcpu_id, offset);
 397	vgic_reg_access(mmio, reg, offset,
 398			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
 399	if (mmio->is_write) {
 400		vgic_update_state(vcpu->kvm);
 401		return true;
 402	}
 403
 404	return false;
 405}
 406
 407static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
 408					  struct kvm_exit_mmio *mmio,
 409					  phys_addr_t offset)
 410{
 411	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
 412				       vcpu->vcpu_id, offset);
 413	vgic_reg_access(mmio, reg, offset,
 414			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
 415	if (mmio->is_write) {
 416		vgic_update_state(vcpu->kvm);
 417		return true;
 418	}
 419
 420	return false;
 421}
 422
 423static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
 424				     struct kvm_exit_mmio *mmio,
 425				     phys_addr_t offset)
 426{
 427	u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
 428					vcpu->vcpu_id, offset);
 429	vgic_reg_access(mmio, reg, offset,
 430			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
 431	return false;
 432}
 433
 434#define GICD_ITARGETSR_SIZE	32
 435#define GICD_CPUTARGETS_BITS	8
 436#define GICD_IRQS_PER_ITARGETSR	(GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
 437static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
 438{
 439	struct vgic_dist *dist = &kvm->arch.vgic;
 440	int i;
 441	u32 val = 0;
 442
 443	irq -= VGIC_NR_PRIVATE_IRQS;
 444
 445	for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
 446		val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8);
 447
 448	return val;
 449}
 450
 451static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
 452{
 453	struct vgic_dist *dist = &kvm->arch.vgic;
 454	struct kvm_vcpu *vcpu;
 455	int i, c;
 456	unsigned long *bmap;
 457	u32 target;
 458
 459	irq -= VGIC_NR_PRIVATE_IRQS;
 460
 461	/*
 462	 * Pick the LSB in each byte. This ensures we target exactly
 463	 * one vcpu per IRQ. If the byte is null, assume we target
 464	 * CPU0.
 465	 */
 466	for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
 467		int shift = i * GICD_CPUTARGETS_BITS;
 468		target = ffs((val >> shift) & 0xffU);
 469		target = target ? (target - 1) : 0;
 470		dist->irq_spi_cpu[irq + i] = target;
 471		kvm_for_each_vcpu(c, vcpu, kvm) {
 472			bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
 473			if (c == target)
 474				set_bit(irq + i, bmap);
 475			else
 476				clear_bit(irq + i, bmap);
 477		}
 478	}
 479}
 480
 481static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
 482				   struct kvm_exit_mmio *mmio,
 483				   phys_addr_t offset)
 484{
 485	u32 reg;
 486
 487	/* We treat the banked interrupts targets as read-only */
 488	if (offset < 32) {
 489		u32 roreg = 1 << vcpu->vcpu_id;
 490		roreg |= roreg << 8;
 491		roreg |= roreg << 16;
 492
 493		vgic_reg_access(mmio, &roreg, offset,
 494				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
 495		return false;
 496	}
 497
 498	reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
 499	vgic_reg_access(mmio, &reg, offset,
 500			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
 501	if (mmio->is_write) {
 502		vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
 503		vgic_update_state(vcpu->kvm);
 504		return true;
 505	}
 506
 507	return false;
 508}
 509
 510static u32 vgic_cfg_expand(u16 val)
 511{
 512	u32 res = 0;
 513	int i;
 514
 515	/*
 516	 * Turn a 16bit value like abcd...mnop into a 32bit word
 517	 * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is.
 518	 */
 519	for (i = 0; i < 16; i++)
 520		res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1);
 521
 522	return res;
 523}
 524
 525static u16 vgic_cfg_compress(u32 val)
 526{
 527	u16 res = 0;
 528	int i;
 529
 530	/*
 531	 * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like
 532	 * abcd...mnop which is what we really care about.
 533	 */
 534	for (i = 0; i < 16; i++)
 535		res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i;
 536
 537	return res;
 538}
 539
 540/*
 541 * The distributor uses 2 bits per IRQ for the CFG register, but the
 542 * LSB is always 0. As such, we only keep the upper bit, and use the
 543 * two above functions to compress/expand the bits
 544 */
 545static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
 546				struct kvm_exit_mmio *mmio, phys_addr_t offset)
 547{
 548	u32 val;
 549	u32 *reg;
 550
 551	reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
 552				  vcpu->vcpu_id, offset >> 1);
 553
 554	if (offset & 4)
 555		val = *reg >> 16;
 556	else
 557		val = *reg & 0xffff;
 558
 559	val = vgic_cfg_expand(val);
 560	vgic_reg_access(mmio, &val, offset,
 561			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
 562	if (mmio->is_write) {
 563		if (offset < 8) {
 564			*reg = ~0U; /* Force PPIs/SGIs to 1 */
 565			return false;
 566		}
 567
 568		val = vgic_cfg_compress(val);
 569		if (offset & 4) {
 570			*reg &= 0xffff;
 571			*reg |= val << 16;
 572		} else {
 573			*reg &= 0xffff << 16;
 574			*reg |= val;
 575		}
 576	}
 577
 578	return false;
 579}
 580
 581static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
 582				struct kvm_exit_mmio *mmio, phys_addr_t offset)
 583{
 584	u32 reg;
 585	vgic_reg_access(mmio, &reg, offset,
 586			ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
 587	if (mmio->is_write) {
 588		vgic_dispatch_sgi(vcpu, reg);
 589		vgic_update_state(vcpu->kvm);
 590		return true;
 591	}
 592
 593	return false;
 594}
 595
 596#define LR_CPUID(lr)	\
 597	(((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
 598#define LR_IRQID(lr)	\
 599	((lr) & GICH_LR_VIRTUALID)
 600
 601static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu)
 602{
 603	clear_bit(lr_nr, vgic_cpu->lr_used);
 604	vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE;
 605	vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
 606}
 607
 608/**
 609 * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
 610 * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
 611 *
 612 * Move any pending IRQs that have already been assigned to LRs back to the
 613 * emulated distributor state so that the complete emulated state can be read
 614 * from the main emulation structures without investigating the LRs.
 615 *
 616 * Note that IRQs in the active state in the LRs get their pending state moved
 617 * to the distributor but the active state stays in the LRs, because we don't
 618 * track the active state on the distributor side.
 619 */
 620static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 621{
 622	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 623	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 624	int vcpu_id = vcpu->vcpu_id;
 625	int i, irq, source_cpu;
 626	u32 *lr;
 627
 628	for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
 629		lr = &vgic_cpu->vgic_lr[i];
 630		irq = LR_IRQID(*lr);
 631		source_cpu = LR_CPUID(*lr);
 632
 633		/*
 634		 * There are three options for the state bits:
 635		 *
 636		 * 01: pending
 637		 * 10: active
 638		 * 11: pending and active
 639		 *
 640		 * If the LR holds only an active interrupt (not pending) then
 641		 * just leave it alone.
 642		 */
 643		if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT)
 644			continue;
 645
 646		/*
 647		 * Reestablish the pending state on the distributor and the
 648		 * CPU interface.  It may have already been pending, but that
 649		 * is fine, then we are only setting a few bits that were
 650		 * already set.
 651		 */
 652		vgic_dist_irq_set(vcpu, irq);
 653		if (irq < VGIC_NR_SGIS)
 654			dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu;
 655		*lr &= ~GICH_LR_PENDING_BIT;
 656
 657		/*
 658		 * If there's no state left on the LR (it could still be
 659		 * active), then the LR does not hold any useful info and can
 660		 * be marked as free for other use.
 661		 */
 662		if (!(*lr & GICH_LR_STATE))
 663			vgic_retire_lr(i, irq, vgic_cpu);
 664
 665		/* Finally update the VGIC state. */
 666		vgic_update_state(vcpu->kvm);
 667	}
 668}
 669
 670/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */
 671static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
 672					struct kvm_exit_mmio *mmio,
 673					phys_addr_t offset)
 674{
 675	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 676	int sgi;
 677	int min_sgi = (offset & ~0x3) * 4;
 678	int max_sgi = min_sgi + 3;
 679	int vcpu_id = vcpu->vcpu_id;
 680	u32 reg = 0;
 681
 682	/* Copy source SGIs from distributor side */
 683	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
 684		int shift = 8 * (sgi - min_sgi);
 685		reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift;
 686	}
 687
 688	mmio_data_write(mmio, ~0, reg);
 689	return false;
 690}
 691
 692static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
 693					 struct kvm_exit_mmio *mmio,
 694					 phys_addr_t offset, bool set)
 695{
 696	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 697	int sgi;
 698	int min_sgi = (offset & ~0x3) * 4;
 699	int max_sgi = min_sgi + 3;
 700	int vcpu_id = vcpu->vcpu_id;
 701	u32 reg;
 702	bool updated = false;
 703
 704	reg = mmio_data_read(mmio, ~0);
 705
 706	/* Clear pending SGIs on the distributor */
 707	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
 708		u8 mask = reg >> (8 * (sgi - min_sgi));
 709		if (set) {
 710			if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask)
 711				updated = true;
 712			dist->irq_sgi_sources[vcpu_id][sgi] |= mask;
 713		} else {
 714			if (dist->irq_sgi_sources[vcpu_id][sgi] & mask)
 715				updated = true;
 716			dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask;
 717		}
 718	}
 719
 720	if (updated)
 721		vgic_update_state(vcpu->kvm);
 722
 723	return updated;
 724}
 725
 726static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
 727				struct kvm_exit_mmio *mmio,
 728				phys_addr_t offset)
 729{
 730	if (!mmio->is_write)
 731		return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
 732	else
 733		return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
 734}
 735
 736static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
 737				  struct kvm_exit_mmio *mmio,
 738				  phys_addr_t offset)
 739{
 740	if (!mmio->is_write)
 741		return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
 742	else
 743		return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
 744}
 745
 746/*
 747 * I would have liked to use the kvm_bus_io_*() API instead, but it
 748 * cannot cope with banked registers (only the VM pointer is passed
 749 * around, and we need the vcpu). One of these days, someone please
 750 * fix it!
 751 */
 752struct mmio_range {
 753	phys_addr_t base;
 754	unsigned long len;
 755	bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
 756			    phys_addr_t offset);
 757};
 758
 759static const struct mmio_range vgic_dist_ranges[] = {
 760	{
 761		.base		= GIC_DIST_CTRL,
 762		.len		= 12,
 763		.handle_mmio	= handle_mmio_misc,
 764	},
 765	{
 766		.base		= GIC_DIST_IGROUP,
 767		.len		= VGIC_NR_IRQS / 8,
 768		.handle_mmio	= handle_mmio_raz_wi,
 769	},
 770	{
 771		.base		= GIC_DIST_ENABLE_SET,
 772		.len		= VGIC_NR_IRQS / 8,
 773		.handle_mmio	= handle_mmio_set_enable_reg,
 774	},
 775	{
 776		.base		= GIC_DIST_ENABLE_CLEAR,
 777		.len		= VGIC_NR_IRQS / 8,
 778		.handle_mmio	= handle_mmio_clear_enable_reg,
 779	},
 780	{
 781		.base		= GIC_DIST_PENDING_SET,
 782		.len		= VGIC_NR_IRQS / 8,
 783		.handle_mmio	= handle_mmio_set_pending_reg,
 784	},
 785	{
 786		.base		= GIC_DIST_PENDING_CLEAR,
 787		.len		= VGIC_NR_IRQS / 8,
 788		.handle_mmio	= handle_mmio_clear_pending_reg,
 789	},
 790	{
 791		.base		= GIC_DIST_ACTIVE_SET,
 792		.len		= VGIC_NR_IRQS / 8,
 793		.handle_mmio	= handle_mmio_raz_wi,
 794	},
 795	{
 796		.base		= GIC_DIST_ACTIVE_CLEAR,
 797		.len		= VGIC_NR_IRQS / 8,
 798		.handle_mmio	= handle_mmio_raz_wi,
 799	},
 800	{
 801		.base		= GIC_DIST_PRI,
 802		.len		= VGIC_NR_IRQS,
 803		.handle_mmio	= handle_mmio_priority_reg,
 804	},
 805	{
 806		.base		= GIC_DIST_TARGET,
 807		.len		= VGIC_NR_IRQS,
 808		.handle_mmio	= handle_mmio_target_reg,
 809	},
 810	{
 811		.base		= GIC_DIST_CONFIG,
 812		.len		= VGIC_NR_IRQS / 4,
 813		.handle_mmio	= handle_mmio_cfg_reg,
 814	},
 815	{
 816		.base		= GIC_DIST_SOFTINT,
 817		.len		= 4,
 818		.handle_mmio	= handle_mmio_sgi_reg,
 819	},
 820	{
 821		.base		= GIC_DIST_SGI_PENDING_CLEAR,
 822		.len		= VGIC_NR_SGIS,
 823		.handle_mmio	= handle_mmio_sgi_clear,
 824	},
 825	{
 826		.base		= GIC_DIST_SGI_PENDING_SET,
 827		.len		= VGIC_NR_SGIS,
 828		.handle_mmio	= handle_mmio_sgi_set,
 829	},
 830	{}
 831};
 832
 833static const
 834struct mmio_range *find_matching_range(const struct mmio_range *ranges,
 835				       struct kvm_exit_mmio *mmio,
 836				       phys_addr_t offset)
 837{
 838	const struct mmio_range *r = ranges;
 839
 840	while (r->len) {
 841		if (offset >= r->base &&
 842		    (offset + mmio->len) <= (r->base + r->len))
 843			return r;
 844		r++;
 845	}
 846
 847	return NULL;
 848}
 849
 850/**
 851 * vgic_handle_mmio - handle an in-kernel MMIO access
 852 * @vcpu:	pointer to the vcpu performing the access
 853 * @run:	pointer to the kvm_run structure
 854 * @mmio:	pointer to the data describing the access
 855 *
 856 * returns true if the MMIO access has been performed in kernel space,
 857 * and false if it needs to be emulated in user space.
 858 */
 859bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 860		      struct kvm_exit_mmio *mmio)
 861{
 862	const struct mmio_range *range;
 863	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 864	unsigned long base = dist->vgic_dist_base;
 865	bool updated_state;
 866	unsigned long offset;
 867
 868	if (!irqchip_in_kernel(vcpu->kvm) ||
 869	    mmio->phys_addr < base ||
 870	    (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE))
 871		return false;
 872
 873	/* We don't support ldrd / strd or ldm / stm to the emulated vgic */
 874	if (mmio->len > 4) {
 875		kvm_inject_dabt(vcpu, mmio->phys_addr);
 876		return true;
 877	}
 878
 879	offset = mmio->phys_addr - base;
 880	range = find_matching_range(vgic_dist_ranges, mmio, offset);
 881	if (unlikely(!range || !range->handle_mmio)) {
 882		pr_warn("Unhandled access %d %08llx %d\n",
 883			mmio->is_write, mmio->phys_addr, mmio->len);
 884		return false;
 885	}
 886
 887	spin_lock(&vcpu->kvm->arch.vgic.lock);
 888	offset = mmio->phys_addr - range->base - base;
 889	updated_state = range->handle_mmio(vcpu, mmio, offset);
 890	spin_unlock(&vcpu->kvm->arch.vgic.lock);
 891	kvm_prepare_mmio(run, mmio);
 892	kvm_handle_mmio_return(vcpu, run);
 893
 894	if (updated_state)
 895		vgic_kick_vcpus(vcpu->kvm);
 896
 897	return true;
 898}
 899
 900static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 901{
 902	struct kvm *kvm = vcpu->kvm;
 903	struct vgic_dist *dist = &kvm->arch.vgic;
 904	int nrcpus = atomic_read(&kvm->online_vcpus);
 905	u8 target_cpus;
 906	int sgi, mode, c, vcpu_id;
 907
 908	vcpu_id = vcpu->vcpu_id;
 909
 910	sgi = reg & 0xf;
 911	target_cpus = (reg >> 16) & 0xff;
 912	mode = (reg >> 24) & 3;
 913
 914	switch (mode) {
 915	case 0:
 916		if (!target_cpus)
 917			return;
 918		break;
 919
 920	case 1:
 921		target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
 922		break;
 923
 924	case 2:
 925		target_cpus = 1 << vcpu_id;
 926		break;
 927	}
 928
 929	kvm_for_each_vcpu(c, vcpu, kvm) {
 930		if (target_cpus & 1) {
 931			/* Flag the SGI as pending */
 932			vgic_dist_irq_set(vcpu, sgi);
 933			dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
 934			kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
 935		}
 936
 937		target_cpus >>= 1;
 938	}
 939}
 940
 941static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 942{
 943	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 944	unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
 945	unsigned long pending_private, pending_shared;
 946	int vcpu_id;
 947
 948	vcpu_id = vcpu->vcpu_id;
 949	pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
 950	pend_shared = vcpu->arch.vgic_cpu.pending_shared;
 951
 952	pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id);
 953	enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
 954	bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
 955
 956	pending = vgic_bitmap_get_shared_map(&dist->irq_state);
 957	enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
 958	bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
 959	bitmap_and(pend_shared, pend_shared,
 960		   vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
 961		   VGIC_NR_SHARED_IRQS);
 962
 963	pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
 964	pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS);
 965	return (pending_private < VGIC_NR_PRIVATE_IRQS ||
 966		pending_shared < VGIC_NR_SHARED_IRQS);
 967}
 968
 969/*
 970 * Update the interrupt state and determine which CPUs have pending
 971 * interrupts. Must be called with distributor lock held.
 972 */
 973static void vgic_update_state(struct kvm *kvm)
 974{
 975	struct vgic_dist *dist = &kvm->arch.vgic;
 976	struct kvm_vcpu *vcpu;
 977	int c;
 978
 979	if (!dist->enabled) {
 980		set_bit(0, &dist->irq_pending_on_cpu);
 981		return;
 982	}
 983
 984	kvm_for_each_vcpu(c, vcpu, kvm) {
 985		if (compute_pending_for_cpu(vcpu)) {
 986			pr_debug("CPU%d has pending interrupts\n", c);
 987			set_bit(c, &dist->irq_pending_on_cpu);
 988		}
 989	}
 990}
 991
 992#define MK_LR_PEND(src, irq)	\
 993	(GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
 994
 995/*
 996 * An interrupt may have been disabled after being made pending on the
 997 * CPU interface (the classic case is a timer running while we're
 998 * rebooting the guest - the interrupt would kick as soon as the CPU
 999 * interface gets enabled, with deadly consequences).
1000 *
1001 * The solution is to examine already active LRs, and check the
1002 * interrupt is still enabled. If not, just retire it.
1003 */
1004static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
1005{
1006	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1007	int lr;
1008
1009	for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
1010		int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
1011
1012		if (!vgic_irq_is_enabled(vcpu, irq)) {
1013			vgic_retire_lr(lr, irq, vgic_cpu);
1014			if (vgic_irq_is_active(vcpu, irq))
1015				vgic_irq_clear_active(vcpu, irq);
1016		}
1017	}
1018}
1019
1020/*
1021 * Queue an interrupt to a CPU virtual interface. Return true on success,
1022 * or false if it wasn't possible to queue it.
1023 */
1024static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
1025{
1026	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1027	int lr;
1028
1029	/* Sanitize the input... */
1030	BUG_ON(sgi_source_id & ~7);
1031	BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
1032	BUG_ON(irq >= VGIC_NR_IRQS);
1033
1034	kvm_debug("Queue IRQ%d\n", irq);
1035
1036	lr = vgic_cpu->vgic_irq_lr_map[irq];
1037
1038	/* Do we have an active interrupt for the same CPUID? */
1039	if (lr != LR_EMPTY &&
1040	    (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) {
1041		kvm_debug("LR%d piggyback for IRQ%d %x\n",
1042			  lr, irq, vgic_cpu->vgic_lr[lr]);
1043		BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
1044		vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT;
1045		return true;
1046	}
1047
1048	/* Try to use another LR for this interrupt */
1049	lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
1050			       vgic_cpu->nr_lr);
1051	if (lr >= vgic_cpu->nr_lr)
1052		return false;
1053
1054	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
1055	vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
1056	vgic_cpu->vgic_irq_lr_map[irq] = lr;
1057	set_bit(lr, vgic_cpu->lr_used);
1058
1059	if (!vgic_irq_is_edge(vcpu, irq))
1060		vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI;
1061
1062	return true;
1063}
1064
1065static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
1066{
1067	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1068	unsigned long sources;
1069	int vcpu_id = vcpu->vcpu_id;
1070	int c;
1071
1072	sources = dist->irq_sgi_sources[vcpu_id][irq];
1073
1074	for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
1075		if (vgic_queue_irq(vcpu, c, irq))
1076			clear_bit(c, &sources);
1077	}
1078
1079	dist->irq_sgi_sources[vcpu_id][irq] = sources;
1080
1081	/*
1082	 * If the sources bitmap has been cleared it means that we
1083	 * could queue all the SGIs onto link registers (see the
1084	 * clear_bit above), and therefore we are done with them in
1085	 * our emulated gic and can get rid of them.
1086	 */
1087	if (!sources) {
1088		vgic_dist_irq_clear(vcpu, irq);
1089		vgic_cpu_irq_clear(vcpu, irq);
1090		return true;
1091	}
1092
1093	return false;
1094}
1095
1096static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
1097{
1098	if (vgic_irq_is_active(vcpu, irq))
1099		return true; /* level interrupt, already queued */
1100
1101	if (vgic_queue_irq(vcpu, 0, irq)) {
1102		if (vgic_irq_is_edge(vcpu, irq)) {
1103			vgic_dist_irq_clear(vcpu, irq);
1104			vgic_cpu_irq_clear(vcpu, irq);
1105		} else {
1106			vgic_irq_set_active(vcpu, irq);
1107		}
1108
1109		return true;
1110	}
1111
1112	return false;
1113}
1114
1115/*
1116 * Fill the list registers with pending interrupts before running the
1117 * guest.
1118 */
1119static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1120{
1121	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1122	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1123	int i, vcpu_id;
1124	int overflow = 0;
1125
1126	vcpu_id = vcpu->vcpu_id;
1127
1128	/*
1129	 * We may not have any pending interrupt, or the interrupts
1130	 * may have been serviced from another vcpu. In all cases,
1131	 * move along.
1132	 */
1133	if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
1134		pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
1135		goto epilog;
1136	}
1137
1138	/* SGIs */
1139	for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
1140		if (!vgic_queue_sgi(vcpu, i))
1141			overflow = 1;
1142	}
1143
1144	/* PPIs */
1145	for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) {
1146		if (!vgic_queue_hwirq(vcpu, i))
1147			overflow = 1;
1148	}
1149
1150	/* SPIs */
1151	for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) {
1152		if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
1153			overflow = 1;
1154	}
1155
1156epilog:
1157	if (overflow) {
1158		vgic_cpu->vgic_hcr |= GICH_HCR_UIE;
1159	} else {
1160		vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
1161		/*
1162		 * We're about to run this VCPU, and we've consumed
1163		 * everything the distributor had in store for
1164		 * us. Claim we don't have anything pending. We'll
1165		 * adjust that if needed while exiting.
1166		 */
1167		clear_bit(vcpu_id, &dist->irq_pending_on_cpu);
1168	}
1169}
1170
1171static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1172{
1173	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1174	bool level_pending = false;
1175
1176	kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
1177
1178	if (vgic_cpu->vgic_misr & GICH_MISR_EOI) {
1179		/*
1180		 * Some level interrupts have been EOIed. Clear their
1181		 * active bit.
1182		 */
1183		int lr, irq;
1184
1185		for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr,
1186				 vgic_cpu->nr_lr) {
1187			irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
1188
1189			vgic_irq_clear_active(vcpu, irq);
1190			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI;
1191
1192			/* Any additional pending interrupt? */
1193			if (vgic_dist_irq_is_pending(vcpu, irq)) {
1194				vgic_cpu_irq_set(vcpu, irq);
1195				level_pending = true;
1196			} else {
1197				vgic_cpu_irq_clear(vcpu, irq);
1198			}
1199
1200			/*
1201			 * Despite being EOIed, the LR may not have
1202			 * been marked as empty.
1203			 */
1204			set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr);
1205			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
1206		}
1207	}
1208
1209	if (vgic_cpu->vgic_misr & GICH_MISR_U)
1210		vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
1211
1212	return level_pending;
1213}
1214
1215/*
1216 * Sync back the VGIC state after a guest run. The distributor lock is
1217 * needed so we don't get preempted in the middle of the state processing.
1218 */
1219static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1220{
1221	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1222	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1223	int lr, pending;
1224	bool level_pending;
1225
1226	level_pending = vgic_process_maintenance(vcpu);
1227
1228	/* Clear mappings for empty LRs */
1229	for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr,
1230			 vgic_cpu->nr_lr) {
1231		int irq;
1232
1233		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
1234			continue;
1235
1236		irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
1237
1238		BUG_ON(irq >= VGIC_NR_IRQS);
1239		vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
1240	}
1241
1242	/* Check if we still have something up our sleeve... */
1243	pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr,
1244				      vgic_cpu->nr_lr);
1245	if (level_pending || pending < vgic_cpu->nr_lr)
1246		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
1247}
1248
1249void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1250{
1251	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1252
1253	if (!irqchip_in_kernel(vcpu->kvm))
1254		return;
1255
1256	spin_lock(&dist->lock);
1257	__kvm_vgic_flush_hwstate(vcpu);
1258	spin_unlock(&dist->lock);
1259}
1260
1261void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1262{
1263	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1264
1265	if (!irqchip_in_kernel(vcpu->kvm))
1266		return;
1267
1268	spin_lock(&dist->lock);
1269	__kvm_vgic_sync_hwstate(vcpu);
1270	spin_unlock(&dist->lock);
1271}
1272
1273int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
1274{
1275	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1276
1277	if (!irqchip_in_kernel(vcpu->kvm))
1278		return 0;
1279
1280	return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
1281}
1282
1283static void vgic_kick_vcpus(struct kvm *kvm)
1284{
1285	struct kvm_vcpu *vcpu;
1286	int c;
1287
1288	/*
1289	 * We've injected an interrupt, time to find out who deserves
1290	 * a good kick...
1291	 */
1292	kvm_for_each_vcpu(c, vcpu, kvm) {
1293		if (kvm_vgic_vcpu_pending_irq(vcpu))
1294			kvm_vcpu_kick(vcpu);
1295	}
1296}
1297
1298static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
1299{
1300	int is_edge = vgic_irq_is_edge(vcpu, irq);
1301	int state = vgic_dist_irq_is_pending(vcpu, irq);
1302
1303	/*
1304	 * Only inject an interrupt if:
1305	 * - edge triggered and we have a rising edge
1306	 * - level triggered and we change level
1307	 */
1308	if (is_edge)
1309		return level > state;
1310	else
1311		return level != state;
1312}
1313
1314static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
1315				  unsigned int irq_num, bool level)
1316{
1317	struct vgic_dist *dist = &kvm->arch.vgic;
1318	struct kvm_vcpu *vcpu;
1319	int is_edge, is_level;
1320	int enabled;
1321	bool ret = true;
1322
1323	spin_lock(&dist->lock);
1324
1325	vcpu = kvm_get_vcpu(kvm, cpuid);
1326	is_edge = vgic_irq_is_edge(vcpu, irq_num);
1327	is_level = !is_edge;
1328
1329	if (!vgic_validate_injection(vcpu, irq_num, level)) {
1330		ret = false;
1331		goto out;
1332	}
1333
1334	if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
1335		cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
1336		vcpu = kvm_get_vcpu(kvm, cpuid);
1337	}
1338
1339	kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
1340
1341	if (level)
1342		vgic_dist_irq_set(vcpu, irq_num);
1343	else
1344		vgic_dist_irq_clear(vcpu, irq_num);
1345
1346	enabled = vgic_irq_is_enabled(vcpu, irq_num);
1347
1348	if (!enabled) {
1349		ret = false;
1350		goto out;
1351	}
1352
1353	if (is_level && vgic_irq_is_active(vcpu, irq_num)) {
1354		/*
1355		 * Level interrupt in progress, will be picked up
1356		 * when EOId.
1357		 */
1358		ret = false;
1359		goto out;
1360	}
1361
1362	if (level) {
1363		vgic_cpu_irq_set(vcpu, irq_num);
1364		set_bit(cpuid, &dist->irq_pending_on_cpu);
1365	}
1366
1367out:
1368	spin_unlock(&dist->lock);
1369
1370	return ret;
1371}
1372
1373/**
1374 * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
1375 * @kvm:     The VM structure pointer
1376 * @cpuid:   The CPU for PPIs
1377 * @irq_num: The IRQ number that is assigned to the device
1378 * @level:   Edge-triggered:  true:  to trigger the interrupt
1379 *			      false: to ignore the call
1380 *	     Level-sensitive  true:  activates an interrupt
1381 *			      false: deactivates an interrupt
1382 *
1383 * The GIC is not concerned with devices being active-LOW or active-HIGH for
1384 * level-sensitive interrupts.  You can think of the level parameter as 1
1385 * being HIGH and 0 being LOW and all devices being active-HIGH.
1386 */
1387int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
1388			bool level)
1389{
1390	if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
1391		vgic_kick_vcpus(kvm);
1392
1393	return 0;
1394}
1395
1396static irqreturn_t vgic_maintenance_handler(int irq, void *data)
1397{
1398	/*
1399	 * We cannot rely on the vgic maintenance interrupt to be
1400	 * delivered synchronously. This means we can only use it to
1401	 * exit the VM, and we perform the handling of EOIed
1402	 * interrupts on the exit path (see vgic_process_maintenance).
1403	 */
1404	return IRQ_HANDLED;
1405}
1406
1407/**
1408 * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
1409 * @vcpu: pointer to the vcpu struct
1410 *
1411 * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
1412 * this vcpu and enable the VGIC for this VCPU
1413 */
1414int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
1415{
1416	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1417	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1418	int i;
1419
1420	if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
1421		return -EBUSY;
1422
1423	for (i = 0; i < VGIC_NR_IRQS; i++) {
1424		if (i < VGIC_NR_PPIS)
1425			vgic_bitmap_set_irq_val(&dist->irq_enabled,
1426						vcpu->vcpu_id, i, 1);
1427		if (i < VGIC_NR_PRIVATE_IRQS)
1428			vgic_bitmap_set_irq_val(&dist->irq_cfg,
1429						vcpu->vcpu_id, i, VGIC_CFG_EDGE);
1430
1431		vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
1432	}
1433
1434	/*
1435	 * By forcing VMCR to zero, the GIC will restore the binary
1436	 * points to their reset values. Anything else resets to zero
1437	 * anyway.
1438	 */
1439	vgic_cpu->vgic_vmcr = 0;
1440
1441	vgic_cpu->nr_lr = vgic_nr_lr;
1442	vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
1443
1444	return 0;
1445}
1446
1447static void vgic_init_maintenance_interrupt(void *info)
1448{
1449	enable_percpu_irq(vgic_maint_irq, 0);
1450}
1451
1452static int vgic_cpu_notify(struct notifier_block *self,
1453			   unsigned long action, void *cpu)
1454{
1455	switch (action) {
1456	case CPU_STARTING:
1457	case CPU_STARTING_FROZEN:
1458		vgic_init_maintenance_interrupt(NULL);
1459		break;
1460	case CPU_DYING:
1461	case CPU_DYING_FROZEN:
1462		disable_percpu_irq(vgic_maint_irq);
1463		break;
1464	}
1465
1466	return NOTIFY_OK;
1467}
1468
1469static struct notifier_block vgic_cpu_nb = {
1470	.notifier_call = vgic_cpu_notify,
1471};
1472
1473int kvm_vgic_hyp_init(void)
1474{
1475	int ret;
1476	struct resource vctrl_res;
1477	struct resource vcpu_res;
1478
1479	vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
1480	if (!vgic_node) {
1481		kvm_err("error: no compatible vgic node in DT\n");
1482		return -ENODEV;
1483	}
1484
1485	vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0);
1486	if (!vgic_maint_irq) {
1487		kvm_err("error getting vgic maintenance irq from DT\n");
1488		ret = -ENXIO;
1489		goto out;
1490	}
1491
1492	ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler,
1493				 "vgic", kvm_get_running_vcpus());
1494	if (ret) {
1495		kvm_err("Cannot register interrupt %d\n", vgic_maint_irq);
1496		goto out;
1497	}
1498
1499	ret = __register_cpu_notifier(&vgic_cpu_nb);
1500	if (ret) {
1501		kvm_err("Cannot register vgic CPU notifier\n");
1502		goto out_free_irq;
1503	}
1504
1505	ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
1506	if (ret) {
1507		kvm_err("Cannot obtain VCTRL resource\n");
1508		goto out_free_irq;
1509	}
1510
1511	vgic_vctrl_base = of_iomap(vgic_node, 2);
1512	if (!vgic_vctrl_base) {
1513		kvm_err("Cannot ioremap VCTRL\n");
1514		ret = -ENOMEM;
1515		goto out_free_irq;
1516	}
1517
1518	vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR);
1519	vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1;
1520
1521	ret = create_hyp_io_mappings(vgic_vctrl_base,
1522				     vgic_vctrl_base + resource_size(&vctrl_res),
1523				     vctrl_res.start);
1524	if (ret) {
1525		kvm_err("Cannot map VCTRL into hyp\n");
1526		goto out_unmap;
1527	}
1528
1529	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
1530		 vctrl_res.start, vgic_maint_irq);
1531	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
1532
1533	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
1534		kvm_err("Cannot obtain VCPU resource\n");
1535		ret = -ENXIO;
1536		goto out_unmap;
1537	}
1538	vgic_vcpu_base = vcpu_res.start;
1539
1540	goto out;
1541
1542out_unmap:
1543	iounmap(vgic_vctrl_base);
1544out_free_irq:
1545	free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus());
1546out:
1547	of_node_put(vgic_node);
1548	return ret;
1549}
1550
1551/**
1552 * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
1553 * @kvm: pointer to the kvm struct
1554 *
1555 * Map the virtual CPU interface into the VM before running any VCPUs.  We
1556 * can't do this at creation time, because user space must first set the
1557 * virtual CPU interface address in the guest physical address space.  Also
1558 * initialize the ITARGETSRn regs to 0 on the emulated distributor.
1559 */
1560int kvm_vgic_init(struct kvm *kvm)
1561{
1562	int ret = 0, i;
1563
1564	if (!irqchip_in_kernel(kvm))
1565		return 0;
1566
1567	mutex_lock(&kvm->lock);
1568
1569	if (vgic_initialized(kvm))
1570		goto out;
1571
1572	if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
1573	    IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
1574		kvm_err("Need to set vgic cpu and dist addresses first\n");
1575		ret = -ENXIO;
1576		goto out;
1577	}
1578
1579	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
1580				    vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE);
1581	if (ret) {
1582		kvm_err("Unable to remap VGIC CPU to VCPU\n");
1583		goto out;
1584	}
1585
1586	for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
1587		vgic_set_target_reg(kvm, 0, i);
1588
1589	kvm->arch.vgic.ready = true;
1590out:
1591	mutex_unlock(&kvm->lock);
1592	return ret;
1593}
1594
1595int kvm_vgic_create(struct kvm *kvm)
1596{
1597	int i, vcpu_lock_idx = -1, ret = 0;
1598	struct kvm_vcpu *vcpu;
1599
1600	mutex_lock(&kvm->lock);
1601
1602	if (kvm->arch.vgic.vctrl_base) {
1603		ret = -EEXIST;
1604		goto out;
1605	}
1606
1607	/*
1608	 * Any time a vcpu is run, vcpu_load is called which tries to grab the
1609	 * vcpu->mutex.  By grabbing the vcpu->mutex of all VCPUs we ensure
1610	 * that no other VCPUs are run while we create the vgic.
1611	 */
1612	kvm_for_each_vcpu(i, vcpu, kvm) {
1613		if (!mutex_trylock(&vcpu->mutex))
1614			goto out_unlock;
1615		vcpu_lock_idx = i;
1616	}
1617
1618	kvm_for_each_vcpu(i, vcpu, kvm) {
1619		if (vcpu->arch.has_run_once) {
1620			ret = -EBUSY;
1621			goto out_unlock;
1622		}
1623	}
1624
1625	spin_lock_init(&kvm->arch.vgic.lock);
1626	kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
1627	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
1628	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
1629
1630out_unlock:
1631	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
1632		vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
1633		mutex_unlock(&vcpu->mutex);
1634	}
1635
1636out:
1637	mutex_unlock(&kvm->lock);
1638	return ret;
1639}
1640
1641static bool vgic_ioaddr_overlap(struct kvm *kvm)
1642{
1643	phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
1644	phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
1645
1646	if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
1647		return 0;
1648	if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) ||
1649	    (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist))
1650		return -EBUSY;
1651	return 0;
1652}
1653
1654static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
1655			      phys_addr_t addr, phys_addr_t size)
1656{
1657	int ret;
1658
1659	if (addr & ~KVM_PHYS_MASK)
1660		return -E2BIG;
1661
1662	if (addr & (SZ_4K - 1))
1663		return -EINVAL;
1664
1665	if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
1666		return -EEXIST;
1667	if (addr + size < addr)
1668		return -EINVAL;
1669
1670	*ioaddr = addr;
1671	ret = vgic_ioaddr_overlap(kvm);
1672	if (ret)
1673		*ioaddr = VGIC_ADDR_UNDEF;
1674
1675	return ret;
1676}
1677
1678/**
1679 * kvm_vgic_addr - set or get vgic VM base addresses
1680 * @kvm:   pointer to the vm struct
1681 * @type:  the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX
1682 * @addr:  pointer to address value
1683 * @write: if true set the address in the VM address space, if false read the
1684 *          address
1685 *
1686 * Set or get the vgic base addresses for the distributor and the virtual CPU
1687 * interface in the VM physical address space.  These addresses are properties
1688 * of the emulated core/SoC and therefore user space initially knows this
1689 * information.
1690 */
1691int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
1692{
1693	int r = 0;
1694	struct vgic_dist *vgic = &kvm->arch.vgic;
1695
1696	mutex_lock(&kvm->lock);
1697	switch (type) {
1698	case KVM_VGIC_V2_ADDR_TYPE_DIST:
1699		if (write) {
1700			r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
1701					       *addr, KVM_VGIC_V2_DIST_SIZE);
1702		} else {
1703			*addr = vgic->vgic_dist_base;
1704		}
1705		break;
1706	case KVM_VGIC_V2_ADDR_TYPE_CPU:
1707		if (write) {
1708			r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
1709					       *addr, KVM_VGIC_V2_CPU_SIZE);
1710		} else {
1711			*addr = vgic->vgic_cpu_base;
1712		}
1713		break;
1714	default:
1715		r = -ENODEV;
1716	}
1717
1718	mutex_unlock(&kvm->lock);
1719	return r;
1720}
1721
1722static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
1723				 struct kvm_exit_mmio *mmio, phys_addr_t offset)
1724{
1725	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1726	u32 reg, mask = 0, shift = 0;
1727	bool updated = false;
1728
1729	switch (offset & ~0x3) {
1730	case GIC_CPU_CTRL:
1731		mask = GICH_VMCR_CTRL_MASK;
1732		shift = GICH_VMCR_CTRL_SHIFT;
1733		break;
1734	case GIC_CPU_PRIMASK:
1735		mask = GICH_VMCR_PRIMASK_MASK;
1736		shift = GICH_VMCR_PRIMASK_SHIFT;
1737		break;
1738	case GIC_CPU_BINPOINT:
1739		mask = GICH_VMCR_BINPOINT_MASK;
1740		shift = GICH_VMCR_BINPOINT_SHIFT;
1741		break;
1742	case GIC_CPU_ALIAS_BINPOINT:
1743		mask = GICH_VMCR_ALIAS_BINPOINT_MASK;
1744		shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT;
1745		break;
1746	}
1747
1748	if (!mmio->is_write) {
1749		reg = (vgic_cpu->vgic_vmcr & mask) >> shift;
1750		mmio_data_write(mmio, ~0, reg);
1751	} else {
1752		reg = mmio_data_read(mmio, ~0);
1753		reg = (reg << shift) & mask;
1754		if (reg != (vgic_cpu->vgic_vmcr & mask))
1755			updated = true;
1756		vgic_cpu->vgic_vmcr &= ~mask;
1757		vgic_cpu->vgic_vmcr |= reg;
1758	}
1759	return updated;
1760}
1761
1762static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
1763			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
1764{
1765	return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
1766}
1767
1768static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
1769				  struct kvm_exit_mmio *mmio,
1770				  phys_addr_t offset)
1771{
1772	u32 reg;
1773
1774	if (mmio->is_write)
1775		return false;
1776
1777	/* GICC_IIDR */
1778	reg = (PRODUCT_ID_KVM << 20) |
1779	      (GICC_ARCH_VERSION_V2 << 16) |
1780	      (IMPLEMENTER_ARM << 0);
1781	mmio_data_write(mmio, ~0, reg);
1782	return false;
1783}
1784
1785/*
1786 * CPU Interface Register accesses - these are not accessed by the VM, but by
1787 * user space for saving and restoring VGIC state.
1788 */
1789static const struct mmio_range vgic_cpu_ranges[] = {
1790	{
1791		.base		= GIC_CPU_CTRL,
1792		.len		= 12,
1793		.handle_mmio	= handle_cpu_mmio_misc,
1794	},
1795	{
1796		.base		= GIC_CPU_ALIAS_BINPOINT,
1797		.len		= 4,
1798		.handle_mmio	= handle_mmio_abpr,
1799	},
1800	{
1801		.base		= GIC_CPU_ACTIVEPRIO,
1802		.len		= 16,
1803		.handle_mmio	= handle_mmio_raz_wi,
1804	},
1805	{
1806		.base		= GIC_CPU_IDENT,
1807		.len		= 4,
1808		.handle_mmio	= handle_cpu_mmio_ident,
1809	},
1810};
1811
1812static int vgic_attr_regs_access(struct kvm_device *dev,
1813				 struct kvm_device_attr *attr,
1814				 u32 *reg, bool is_write)
1815{
1816	const struct mmio_range *r = NULL, *ranges;
1817	phys_addr_t offset;
1818	int ret, cpuid, c;
1819	struct kvm_vcpu *vcpu, *tmp_vcpu;
1820	struct vgic_dist *vgic;
1821	struct kvm_exit_mmio mmio;
1822
1823	offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
1824	cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
1825		KVM_DEV_ARM_VGIC_CPUID_SHIFT;
1826
1827	mutex_lock(&dev->kvm->lock);
1828
1829	if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
1830		ret = -EINVAL;
1831		goto out;
1832	}
1833
1834	vcpu = kvm_get_vcpu(dev->kvm, cpuid);
1835	vgic = &dev->kvm->arch.vgic;
1836
1837	mmio.len = 4;
1838	mmio.is_write = is_write;
1839	if (is_write)
1840		mmio_data_write(&mmio, ~0, *reg);
1841	switch (attr->group) {
1842	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
1843		mmio.phys_addr = vgic->vgic_dist_base + offset;
1844		ranges = vgic_dist_ranges;
1845		break;
1846	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
1847		mmio.phys_addr = vgic->vgic_cpu_base + offset;
1848		ranges = vgic_cpu_ranges;
1849		break;
1850	default:
1851		BUG();
1852	}
1853	r = find_matching_range(ranges, &mmio, offset);
1854
1855	if (unlikely(!r || !r->handle_mmio)) {
1856		ret = -ENXIO;
1857		goto out;
1858	}
1859
1860
1861	spin_lock(&vgic->lock);
1862
1863	/*
1864	 * Ensure that no other VCPU is running by checking the vcpu->cpu
1865	 * field.  If no other VPCUs are running we can safely access the VGIC
1866	 * state, because even if another VPU is run after this point, that
1867	 * VCPU will not touch the vgic state, because it will block on
1868	 * getting the vgic->lock in kvm_vgic_sync_hwstate().
1869	 */
1870	kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
1871		if (unlikely(tmp_vcpu->cpu != -1)) {
1872			ret = -EBUSY;
1873			goto out_vgic_unlock;
1874		}
1875	}
1876
1877	/*
1878	 * Move all pending IRQs from the LRs on all VCPUs so the pending
1879	 * state can be properly represented in the register state accessible
1880	 * through this API.
1881	 */
1882	kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
1883		vgic_unqueue_irqs(tmp_vcpu);
1884
1885	offset -= r->base;
1886	r->handle_mmio(vcpu, &mmio, offset);
1887
1888	if (!is_write)
1889		*reg = mmio_data_read(&mmio, ~0);
1890
1891	ret = 0;
1892out_vgic_unlock:
1893	spin_unlock(&vgic->lock);
1894out:
1895	mutex_unlock(&dev->kvm->lock);
1896	return ret;
1897}
1898
1899static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1900{
1901	int r;
1902
1903	switch (attr->group) {
1904	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
1905		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
1906		u64 addr;
1907		unsigned long type = (unsigned long)attr->attr;
1908
1909		if (copy_from_user(&addr, uaddr, sizeof(addr)))
1910			return -EFAULT;
1911
1912		r = kvm_vgic_addr(dev->kvm, type, &addr, true);
1913		return (r == -ENODEV) ? -ENXIO : r;
1914	}
1915
1916	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
1917	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
1918		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
1919		u32 reg;
1920
1921		if (get_user(reg, uaddr))
1922			return -EFAULT;
1923
1924		return vgic_attr_regs_access(dev, attr, &reg, true);
1925	}
1926
1927	}
1928
1929	return -ENXIO;
1930}
1931
1932static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1933{
1934	int r = -ENXIO;
1935
1936	switch (attr->group) {
1937	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
1938		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
1939		u64 addr;
1940		unsigned long type = (unsigned long)attr->attr;
1941
1942		r = kvm_vgic_addr(dev->kvm, type, &addr, false);
1943		if (r)
1944			return (r == -ENODEV) ? -ENXIO : r;
1945
1946		if (copy_to_user(uaddr, &addr, sizeof(addr)))
1947			return -EFAULT;
1948		break;
1949	}
1950
1951	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
1952	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
1953		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
1954		u32 reg = 0;
1955
1956		r = vgic_attr_regs_access(dev, attr, &reg, false);
1957		if (r)
1958			return r;
1959		r = put_user(reg, uaddr);
1960		break;
1961	}
1962
1963	}
1964
1965	return r;
1966}
1967
1968static int vgic_has_attr_regs(const struct mmio_range *ranges,
1969			      phys_addr_t offset)
1970{
1971	struct kvm_exit_mmio dev_attr_mmio;
1972
1973	dev_attr_mmio.len = 4;
1974	if (find_matching_range(ranges, &dev_attr_mmio, offset))
1975		return 0;
1976	else
1977		return -ENXIO;
1978}
1979
1980static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1981{
1982	phys_addr_t offset;
1983
1984	switch (attr->group) {
1985	case KVM_DEV_ARM_VGIC_GRP_ADDR:
1986		switch (attr->attr) {
1987		case KVM_VGIC_V2_ADDR_TYPE_DIST:
1988		case KVM_VGIC_V2_ADDR_TYPE_CPU:
1989			return 0;
1990		}
1991		break;
1992	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
1993		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
1994		return vgic_has_attr_regs(vgic_dist_ranges, offset);
1995	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
1996		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
1997		return vgic_has_attr_regs(vgic_cpu_ranges, offset);
1998	}
1999	return -ENXIO;
2000}
2001
2002static void vgic_destroy(struct kvm_device *dev)
2003{
2004	kfree(dev);
2005}
2006
2007static int vgic_create(struct kvm_device *dev, u32 type)
2008{
2009	return kvm_vgic_create(dev->kvm);
2010}
2011
2012struct kvm_device_ops kvm_arm_vgic_v2_ops = {
2013	.name = "kvm-arm-vgic",
2014	.create = vgic_create,
2015	.destroy = vgic_destroy,
2016	.set_attr = vgic_set_attr,
2017	.get_attr = vgic_get_attr,
2018	.has_attr = vgic_has_attr,
2019};