arm.c - virt/kvm/arm/arm.c - Linux source code v5.4

Note: File does not exist in v4.6.
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
   4 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
   5 */
   6
   7#include <linux/bug.h>
   8#include <linux/cpu_pm.h>
   9#include <linux/errno.h>
  10#include <linux/err.h>
  11#include <linux/kvm_host.h>
  12#include <linux/list.h>
  13#include <linux/module.h>
  14#include <linux/vmalloc.h>
  15#include <linux/fs.h>
  16#include <linux/mman.h>
  17#include <linux/sched.h>
  18#include <linux/kvm.h>
  19#include <linux/kvm_irqfd.h>
  20#include <linux/irqbypass.h>
  21#include <linux/sched/stat.h>
  22#include <trace/events/kvm.h>
  23#include <kvm/arm_pmu.h>
  24#include <kvm/arm_psci.h>
  25
  26#define CREATE_TRACE_POINTS
  27#include "trace.h"
  28
  29#include <linux/uaccess.h>
  30#include <asm/ptrace.h>
  31#include <asm/mman.h>
  32#include <asm/tlbflush.h>
  33#include <asm/cacheflush.h>
  34#include <asm/cpufeature.h>
  35#include <asm/virt.h>
  36#include <asm/kvm_arm.h>
  37#include <asm/kvm_asm.h>
  38#include <asm/kvm_mmu.h>
  39#include <asm/kvm_emulate.h>
  40#include <asm/kvm_coproc.h>
  41#include <asm/sections.h>
  42
  43#ifdef REQUIRES_VIRT
  44__asm__(".arch_extension	virt");
  45#endif
  46
  47DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data);
  48static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
  49
  50/* Per-CPU variable containing the currently running vcpu. */
  51static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
  52
  53/* The VMID used in the VTTBR */
  54static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
  55static u32 kvm_next_vmid;
  56static DEFINE_SPINLOCK(kvm_vmid_lock);
  57
  58static bool vgic_present;
  59
  60static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
  61
  62static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
  63{
  64	__this_cpu_write(kvm_arm_running_vcpu, vcpu);
  65}
  66
  67DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
  68
  69/**
  70 * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
  71 * Must be called from non-preemptible context
  72 */
  73struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
  74{
  75	return __this_cpu_read(kvm_arm_running_vcpu);
  76}
  77
  78/**
  79 * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
  80 */
  81struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
  82{
  83	return &kvm_arm_running_vcpu;
  84}
  85
  86int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
  87{
  88	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
  89}
  90
  91int kvm_arch_hardware_setup(void)
  92{
  93	return 0;
  94}
  95
  96int kvm_arch_check_processor_compat(void)
  97{
  98	return 0;
  99}
 100
 101
 102/**
 103 * kvm_arch_init_vm - initializes a VM data structure
 104 * @kvm:	pointer to the KVM struct
 105 */
 106int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 107{
 108	int ret, cpu;
 109
 110	ret = kvm_arm_setup_stage2(kvm, type);
 111	if (ret)
 112		return ret;
 113
 114	kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
 115	if (!kvm->arch.last_vcpu_ran)
 116		return -ENOMEM;
 117
 118	for_each_possible_cpu(cpu)
 119		*per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
 120
 121	ret = kvm_alloc_stage2_pgd(kvm);
 122	if (ret)
 123		goto out_fail_alloc;
 124
 125	ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
 126	if (ret)
 127		goto out_free_stage2_pgd;
 128
 129	kvm_vgic_early_init(kvm);
 130
 131	/* Mark the initial VMID generation invalid */
 132	kvm->arch.vmid.vmid_gen = 0;
 133
 134	/* The maximum number of VCPUs is limited by the host's GIC model */
 135	kvm->arch.max_vcpus = vgic_present ?
 136				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
 137
 138	return ret;
 139out_free_stage2_pgd:
 140	kvm_free_stage2_pgd(kvm);
 141out_fail_alloc:
 142	free_percpu(kvm->arch.last_vcpu_ran);
 143	kvm->arch.last_vcpu_ran = NULL;
 144	return ret;
 145}
 146
 147int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
 148{
 149	return 0;
 150}
 151
 152vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 153{
 154	return VM_FAULT_SIGBUS;
 155}
 156
 157
 158/**
 159 * kvm_arch_destroy_vm - destroy the VM data structure
 160 * @kvm:	pointer to the KVM struct
 161 */
 162void kvm_arch_destroy_vm(struct kvm *kvm)
 163{
 164	int i;
 165
 166	kvm_vgic_destroy(kvm);
 167
 168	free_percpu(kvm->arch.last_vcpu_ran);
 169	kvm->arch.last_vcpu_ran = NULL;
 170
 171	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 172		if (kvm->vcpus[i]) {
 173			kvm_arch_vcpu_free(kvm->vcpus[i]);
 174			kvm->vcpus[i] = NULL;
 175		}
 176	}
 177	atomic_set(&kvm->online_vcpus, 0);
 178}
 179
 180int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 181{
 182	int r;
 183	switch (ext) {
 184	case KVM_CAP_IRQCHIP:
 185		r = vgic_present;
 186		break;
 187	case KVM_CAP_IOEVENTFD:
 188	case KVM_CAP_DEVICE_CTRL:
 189	case KVM_CAP_USER_MEMORY:
 190	case KVM_CAP_SYNC_MMU:
 191	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
 192	case KVM_CAP_ONE_REG:
 193	case KVM_CAP_ARM_PSCI:
 194	case KVM_CAP_ARM_PSCI_0_2:
 195	case KVM_CAP_READONLY_MEM:
 196	case KVM_CAP_MP_STATE:
 197	case KVM_CAP_IMMEDIATE_EXIT:
 198	case KVM_CAP_VCPU_EVENTS:
 199	case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
 200		r = 1;
 201		break;
 202	case KVM_CAP_ARM_SET_DEVICE_ADDR:
 203		r = 1;
 204		break;
 205	case KVM_CAP_NR_VCPUS:
 206		r = num_online_cpus();
 207		break;
 208	case KVM_CAP_MAX_VCPUS:
 209		r = KVM_MAX_VCPUS;
 210		break;
 211	case KVM_CAP_MAX_VCPU_ID:
 212		r = KVM_MAX_VCPU_ID;
 213		break;
 214	case KVM_CAP_MSI_DEVID:
 215		if (!kvm)
 216			r = -EINVAL;
 217		else
 218			r = kvm->arch.vgic.msis_require_devid;
 219		break;
 220	case KVM_CAP_ARM_USER_IRQ:
 221		/*
 222		 * 1: EL1_VTIMER, EL1_PTIMER, and PMU.
 223		 * (bump this number if adding more devices)
 224		 */
 225		r = 1;
 226		break;
 227	default:
 228		r = kvm_arch_vm_ioctl_check_extension(kvm, ext);
 229		break;
 230	}
 231	return r;
 232}
 233
 234long kvm_arch_dev_ioctl(struct file *filp,
 235			unsigned int ioctl, unsigned long arg)
 236{
 237	return -EINVAL;
 238}
 239
 240struct kvm *kvm_arch_alloc_vm(void)
 241{
 242	if (!has_vhe())
 243		return kzalloc(sizeof(struct kvm), GFP_KERNEL);
 244
 245	return vzalloc(sizeof(struct kvm));
 246}
 247
 248void kvm_arch_free_vm(struct kvm *kvm)
 249{
 250	if (!has_vhe())
 251		kfree(kvm);
 252	else
 253		vfree(kvm);
 254}
 255
 256struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 257{
 258	int err;
 259	struct kvm_vcpu *vcpu;
 260
 261	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
 262		err = -EBUSY;
 263		goto out;
 264	}
 265
 266	if (id >= kvm->arch.max_vcpus) {
 267		err = -EINVAL;
 268		goto out;
 269	}
 270
 271	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
 272	if (!vcpu) {
 273		err = -ENOMEM;
 274		goto out;
 275	}
 276
 277	err = kvm_vcpu_init(vcpu, kvm, id);
 278	if (err)
 279		goto free_vcpu;
 280
 281	err = create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
 282	if (err)
 283		goto vcpu_uninit;
 284
 285	return vcpu;
 286vcpu_uninit:
 287	kvm_vcpu_uninit(vcpu);
 288free_vcpu:
 289	kmem_cache_free(kvm_vcpu_cache, vcpu);
 290out:
 291	return ERR_PTR(err);
 292}
 293
 294void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 295{
 296}
 297
 298void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 299{
 300	if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
 301		static_branch_dec(&userspace_irqchip_in_use);
 302
 303	kvm_mmu_free_memory_caches(vcpu);
 304	kvm_timer_vcpu_terminate(vcpu);
 305	kvm_pmu_vcpu_destroy(vcpu);
 306	kvm_vcpu_uninit(vcpu);
 307	kmem_cache_free(kvm_vcpu_cache, vcpu);
 308}
 309
 310void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 311{
 312	kvm_arch_vcpu_free(vcpu);
 313}
 314
 315int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 316{
 317	return kvm_timer_is_pending(vcpu);
 318}
 319
 320void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 321{
 322	/*
 323	 * If we're about to block (most likely because we've just hit a
 324	 * WFI), we need to sync back the state of the GIC CPU interface
 325	 * so that we have the lastest PMR and group enables. This ensures
 326	 * that kvm_arch_vcpu_runnable has up-to-date data to decide
 327	 * whether we have pending interrupts.
 328	 */
 329	preempt_disable();
 330	kvm_vgic_vmcr_sync(vcpu);
 331	preempt_enable();
 332
 333	kvm_vgic_v4_enable_doorbell(vcpu);
 334}
 335
 336void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 337{
 338	kvm_vgic_v4_disable_doorbell(vcpu);
 339}
 340
 341int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 342{
 343	/* Force users to call KVM_ARM_VCPU_INIT */
 344	vcpu->arch.target = -1;
 345	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 346
 347	/* Set up the timer */
 348	kvm_timer_vcpu_init(vcpu);
 349
 350	kvm_pmu_vcpu_init(vcpu);
 351
 352	kvm_arm_reset_debug_ptr(vcpu);
 353
 354	return kvm_vgic_vcpu_init(vcpu);
 355}
 356
 357void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 358{
 359	int *last_ran;
 360	kvm_host_data_t *cpu_data;
 361
 362	last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
 363	cpu_data = this_cpu_ptr(&kvm_host_data);
 364
 365	/*
 366	 * We might get preempted before the vCPU actually runs, but
 367	 * over-invalidation doesn't affect correctness.
 368	 */
 369	if (*last_ran != vcpu->vcpu_id) {
 370		kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
 371		*last_ran = vcpu->vcpu_id;
 372	}
 373
 374	vcpu->cpu = cpu;
 375	vcpu->arch.host_cpu_context = &cpu_data->host_ctxt;
 376
 377	kvm_arm_set_running_vcpu(vcpu);
 378	kvm_vgic_load(vcpu);
 379	kvm_timer_vcpu_load(vcpu);
 380	kvm_vcpu_load_sysregs(vcpu);
 381	kvm_arch_vcpu_load_fp(vcpu);
 382	kvm_vcpu_pmu_restore_guest(vcpu);
 383
 384	if (single_task_running())
 385		vcpu_clear_wfe_traps(vcpu);
 386	else
 387		vcpu_set_wfe_traps(vcpu);
 388
 389	vcpu_ptrauth_setup_lazy(vcpu);
 390}
 391
 392void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 393{
 394	kvm_arch_vcpu_put_fp(vcpu);
 395	kvm_vcpu_put_sysregs(vcpu);
 396	kvm_timer_vcpu_put(vcpu);
 397	kvm_vgic_put(vcpu);
 398	kvm_vcpu_pmu_restore_host(vcpu);
 399
 400	vcpu->cpu = -1;
 401
 402	kvm_arm_set_running_vcpu(NULL);
 403}
 404
 405static void vcpu_power_off(struct kvm_vcpu *vcpu)
 406{
 407	vcpu->arch.power_off = true;
 408	kvm_make_request(KVM_REQ_SLEEP, vcpu);
 409	kvm_vcpu_kick(vcpu);
 410}
 411
 412int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 413				    struct kvm_mp_state *mp_state)
 414{
 415	if (vcpu->arch.power_off)
 416		mp_state->mp_state = KVM_MP_STATE_STOPPED;
 417	else
 418		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
 419
 420	return 0;
 421}
 422
 423int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 424				    struct kvm_mp_state *mp_state)
 425{
 426	int ret = 0;
 427
 428	switch (mp_state->mp_state) {
 429	case KVM_MP_STATE_RUNNABLE:
 430		vcpu->arch.power_off = false;
 431		break;
 432	case KVM_MP_STATE_STOPPED:
 433		vcpu_power_off(vcpu);
 434		break;
 435	default:
 436		ret = -EINVAL;
 437	}
 438
 439	return ret;
 440}
 441
 442/**
 443 * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
 444 * @v:		The VCPU pointer
 445 *
 446 * If the guest CPU is not waiting for interrupts or an interrupt line is
 447 * asserted, the CPU is by definition runnable.
 448 */
 449int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 450{
 451	bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
 452	return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
 453		&& !v->arch.power_off && !v->arch.pause);
 454}
 455
 456bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
 457{
 458	return vcpu_mode_priv(vcpu);
 459}
 460
 461/* Just ensure a guest exit from a particular CPU */
 462static void exit_vm_noop(void *info)
 463{
 464}
 465
 466void force_vm_exit(const cpumask_t *mask)
 467{
 468	preempt_disable();
 469	smp_call_function_many(mask, exit_vm_noop, NULL, true);
 470	preempt_enable();
 471}
 472
 473/**
 474 * need_new_vmid_gen - check that the VMID is still valid
 475 * @vmid: The VMID to check
 476 *
 477 * return true if there is a new generation of VMIDs being used
 478 *
 479 * The hardware supports a limited set of values with the value zero reserved
 480 * for the host, so we check if an assigned value belongs to a previous
 481 * generation, which which requires us to assign a new value. If we're the
 482 * first to use a VMID for the new generation, we must flush necessary caches
 483 * and TLBs on all CPUs.
 484 */
 485static bool need_new_vmid_gen(struct kvm_vmid *vmid)
 486{
 487	u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
 488	smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
 489	return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
 490}
 491
 492/**
 493 * update_vmid - Update the vmid with a valid VMID for the current generation
 494 * @kvm: The guest that struct vmid belongs to
 495 * @vmid: The stage-2 VMID information struct
 496 */
 497static void update_vmid(struct kvm_vmid *vmid)
 498{
 499	if (!need_new_vmid_gen(vmid))
 500		return;
 501
 502	spin_lock(&kvm_vmid_lock);
 503
 504	/*
 505	 * We need to re-check the vmid_gen here to ensure that if another vcpu
 506	 * already allocated a valid vmid for this vm, then this vcpu should
 507	 * use the same vmid.
 508	 */
 509	if (!need_new_vmid_gen(vmid)) {
 510		spin_unlock(&kvm_vmid_lock);
 511		return;
 512	}
 513
 514	/* First user of a new VMID generation? */
 515	if (unlikely(kvm_next_vmid == 0)) {
 516		atomic64_inc(&kvm_vmid_gen);
 517		kvm_next_vmid = 1;
 518
 519		/*
 520		 * On SMP we know no other CPUs can use this CPU's or each
 521		 * other's VMID after force_vm_exit returns since the
 522		 * kvm_vmid_lock blocks them from reentry to the guest.
 523		 */
 524		force_vm_exit(cpu_all_mask);
 525		/*
 526		 * Now broadcast TLB + ICACHE invalidation over the inner
 527		 * shareable domain to make sure all data structures are
 528		 * clean.
 529		 */
 530		kvm_call_hyp(__kvm_flush_vm_context);
 531	}
 532
 533	vmid->vmid = kvm_next_vmid;
 534	kvm_next_vmid++;
 535	kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
 536
 537	smp_wmb();
 538	WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
 539
 540	spin_unlock(&kvm_vmid_lock);
 541}
 542
 543static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 544{
 545	struct kvm *kvm = vcpu->kvm;
 546	int ret = 0;
 547
 548	if (likely(vcpu->arch.has_run_once))
 549		return 0;
 550
 551	if (!kvm_arm_vcpu_is_finalized(vcpu))
 552		return -EPERM;
 553
 554	vcpu->arch.has_run_once = true;
 555
 556	if (likely(irqchip_in_kernel(kvm))) {
 557		/*
 558		 * Map the VGIC hardware resources before running a vcpu the
 559		 * first time on this VM.
 560		 */
 561		if (unlikely(!vgic_ready(kvm))) {
 562			ret = kvm_vgic_map_resources(kvm);
 563			if (ret)
 564				return ret;
 565		}
 566	} else {
 567		/*
 568		 * Tell the rest of the code that there are userspace irqchip
 569		 * VMs in the wild.
 570		 */
 571		static_branch_inc(&userspace_irqchip_in_use);
 572	}
 573
 574	ret = kvm_timer_enable(vcpu);
 575	if (ret)
 576		return ret;
 577
 578	ret = kvm_arm_pmu_v3_enable(vcpu);
 579
 580	return ret;
 581}
 582
 583bool kvm_arch_intc_initialized(struct kvm *kvm)
 584{
 585	return vgic_initialized(kvm);
 586}
 587
 588void kvm_arm_halt_guest(struct kvm *kvm)
 589{
 590	int i;
 591	struct kvm_vcpu *vcpu;
 592
 593	kvm_for_each_vcpu(i, vcpu, kvm)
 594		vcpu->arch.pause = true;
 595	kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP);
 596}
 597
 598void kvm_arm_resume_guest(struct kvm *kvm)
 599{
 600	int i;
 601	struct kvm_vcpu *vcpu;
 602
 603	kvm_for_each_vcpu(i, vcpu, kvm) {
 604		vcpu->arch.pause = false;
 605		swake_up_one(kvm_arch_vcpu_wq(vcpu));
 606	}
 607}
 608
 609static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
 610{
 611	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
 612
 613	swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
 614				       (!vcpu->arch.pause)));
 615
 616	if (vcpu->arch.power_off || vcpu->arch.pause) {
 617		/* Awaken to handle a signal, request we sleep again later. */
 618		kvm_make_request(KVM_REQ_SLEEP, vcpu);
 619	}
 620
 621	/*
 622	 * Make sure we will observe a potential reset request if we've
 623	 * observed a change to the power state. Pairs with the smp_wmb() in
 624	 * kvm_psci_vcpu_on().
 625	 */
 626	smp_rmb();
 627}
 628
 629static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
 630{
 631	return vcpu->arch.target >= 0;
 632}
 633
 634static void check_vcpu_requests(struct kvm_vcpu *vcpu)
 635{
 636	if (kvm_request_pending(vcpu)) {
 637		if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
 638			vcpu_req_sleep(vcpu);
 639
 640		if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
 641			kvm_reset_vcpu(vcpu);
 642
 643		/*
 644		 * Clear IRQ_PENDING requests that were made to guarantee
 645		 * that a VCPU sees new virtual interrupts.
 646		 */
 647		kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
 648	}
 649}
 650
 651/**
 652 * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
 653 * @vcpu:	The VCPU pointer
 654 * @run:	The kvm_run structure pointer used for userspace state exchange
 655 *
 656 * This function is called through the VCPU_RUN ioctl called from user space. It
 657 * will execute VM code in a loop until the time slice for the process is used
 658 * or some emulation is needed from user space in which case the function will
 659 * return with return value 0 and with the kvm_run structure filled in with the
 660 * required data for the requested emulation.
 661 */
 662int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 663{
 664	int ret;
 665
 666	if (unlikely(!kvm_vcpu_initialized(vcpu)))
 667		return -ENOEXEC;
 668
 669	ret = kvm_vcpu_first_run_init(vcpu);
 670	if (ret)
 671		return ret;
 672
 673	if (run->exit_reason == KVM_EXIT_MMIO) {
 674		ret = kvm_handle_mmio_return(vcpu, vcpu->run);
 675		if (ret)
 676			return ret;
 677	}
 678
 679	if (run->immediate_exit)
 680		return -EINTR;
 681
 682	vcpu_load(vcpu);
 683
 684	kvm_sigset_activate(vcpu);
 685
 686	ret = 1;
 687	run->exit_reason = KVM_EXIT_UNKNOWN;
 688	while (ret > 0) {
 689		/*
 690		 * Check conditions before entering the guest
 691		 */
 692		cond_resched();
 693
 694		update_vmid(&vcpu->kvm->arch.vmid);
 695
 696		check_vcpu_requests(vcpu);
 697
 698		/*
 699		 * Preparing the interrupts to be injected also
 700		 * involves poking the GIC, which must be done in a
 701		 * non-preemptible context.
 702		 */
 703		preempt_disable();
 704
 705		kvm_pmu_flush_hwstate(vcpu);
 706
 707		local_irq_disable();
 708
 709		kvm_vgic_flush_hwstate(vcpu);
 710
 711		/*
 712		 * Exit if we have a signal pending so that we can deliver the
 713		 * signal to user space.
 714		 */
 715		if (signal_pending(current)) {
 716			ret = -EINTR;
 717			run->exit_reason = KVM_EXIT_INTR;
 718		}
 719
 720		/*
 721		 * If we're using a userspace irqchip, then check if we need
 722		 * to tell a userspace irqchip about timer or PMU level
 723		 * changes and if so, exit to userspace (the actual level
 724		 * state gets updated in kvm_timer_update_run and
 725		 * kvm_pmu_update_run below).
 726		 */
 727		if (static_branch_unlikely(&userspace_irqchip_in_use)) {
 728			if (kvm_timer_should_notify_user(vcpu) ||
 729			    kvm_pmu_should_notify_user(vcpu)) {
 730				ret = -EINTR;
 731				run->exit_reason = KVM_EXIT_INTR;
 732			}
 733		}
 734
 735		/*
 736		 * Ensure we set mode to IN_GUEST_MODE after we disable
 737		 * interrupts and before the final VCPU requests check.
 738		 * See the comment in kvm_vcpu_exiting_guest_mode() and
 739		 * Documentation/virt/kvm/vcpu-requests.rst
 740		 */
 741		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 742
 743		if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) ||
 744		    kvm_request_pending(vcpu)) {
 745			vcpu->mode = OUTSIDE_GUEST_MODE;
 746			isb(); /* Ensure work in x_flush_hwstate is committed */
 747			kvm_pmu_sync_hwstate(vcpu);
 748			if (static_branch_unlikely(&userspace_irqchip_in_use))
 749				kvm_timer_sync_hwstate(vcpu);
 750			kvm_vgic_sync_hwstate(vcpu);
 751			local_irq_enable();
 752			preempt_enable();
 753			continue;
 754		}
 755
 756		kvm_arm_setup_debug(vcpu);
 757
 758		/**************************************************************
 759		 * Enter the guest
 760		 */
 761		trace_kvm_entry(*vcpu_pc(vcpu));
 762		guest_enter_irqoff();
 763
 764		if (has_vhe()) {
 765			kvm_arm_vhe_guest_enter();
 766			ret = kvm_vcpu_run_vhe(vcpu);
 767			kvm_arm_vhe_guest_exit();
 768		} else {
 769			ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu);
 770		}
 771
 772		vcpu->mode = OUTSIDE_GUEST_MODE;
 773		vcpu->stat.exits++;
 774		/*
 775		 * Back from guest
 776		 *************************************************************/
 777
 778		kvm_arm_clear_debug(vcpu);
 779
 780		/*
 781		 * We must sync the PMU state before the vgic state so
 782		 * that the vgic can properly sample the updated state of the
 783		 * interrupt line.
 784		 */
 785		kvm_pmu_sync_hwstate(vcpu);
 786
 787		/*
 788		 * Sync the vgic state before syncing the timer state because
 789		 * the timer code needs to know if the virtual timer
 790		 * interrupts are active.
 791		 */
 792		kvm_vgic_sync_hwstate(vcpu);
 793
 794		/*
 795		 * Sync the timer hardware state before enabling interrupts as
 796		 * we don't want vtimer interrupts to race with syncing the
 797		 * timer virtual interrupt state.
 798		 */
 799		if (static_branch_unlikely(&userspace_irqchip_in_use))
 800			kvm_timer_sync_hwstate(vcpu);
 801
 802		kvm_arch_vcpu_ctxsync_fp(vcpu);
 803
 804		/*
 805		 * We may have taken a host interrupt in HYP mode (ie
 806		 * while executing the guest). This interrupt is still
 807		 * pending, as we haven't serviced it yet!
 808		 *
 809		 * We're now back in SVC mode, with interrupts
 810		 * disabled.  Enabling the interrupts now will have
 811		 * the effect of taking the interrupt again, in SVC
 812		 * mode this time.
 813		 */
 814		local_irq_enable();
 815
 816		/*
 817		 * We do local_irq_enable() before calling guest_exit() so
 818		 * that if a timer interrupt hits while running the guest we
 819		 * account that tick as being spent in the guest.  We enable
 820		 * preemption after calling guest_exit() so that if we get
 821		 * preempted we make sure ticks after that is not counted as
 822		 * guest time.
 823		 */
 824		guest_exit();
 825		trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
 826
 827		/* Exit types that need handling before we can be preempted */
 828		handle_exit_early(vcpu, run, ret);
 829
 830		preempt_enable();
 831
 832		ret = handle_exit(vcpu, run, ret);
 833	}
 834
 835	/* Tell userspace about in-kernel device output levels */
 836	if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
 837		kvm_timer_update_run(vcpu);
 838		kvm_pmu_update_run(vcpu);
 839	}
 840
 841	kvm_sigset_deactivate(vcpu);
 842
 843	vcpu_put(vcpu);
 844	return ret;
 845}
 846
 847static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
 848{
 849	int bit_index;
 850	bool set;
 851	unsigned long *hcr;
 852
 853	if (number == KVM_ARM_IRQ_CPU_IRQ)
 854		bit_index = __ffs(HCR_VI);
 855	else /* KVM_ARM_IRQ_CPU_FIQ */
 856		bit_index = __ffs(HCR_VF);
 857
 858	hcr = vcpu_hcr(vcpu);
 859	if (level)
 860		set = test_and_set_bit(bit_index, hcr);
 861	else
 862		set = test_and_clear_bit(bit_index, hcr);
 863
 864	/*
 865	 * If we didn't change anything, no need to wake up or kick other CPUs
 866	 */
 867	if (set == level)
 868		return 0;
 869
 870	/*
 871	 * The vcpu irq_lines field was updated, wake up sleeping VCPUs and
 872	 * trigger a world-switch round on the running physical CPU to set the
 873	 * virtual IRQ/FIQ fields in the HCR appropriately.
 874	 */
 875	kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 876	kvm_vcpu_kick(vcpu);
 877
 878	return 0;
 879}
 880
 881int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 882			  bool line_status)
 883{
 884	u32 irq = irq_level->irq;
 885	unsigned int irq_type, vcpu_idx, irq_num;
 886	int nrcpus = atomic_read(&kvm->online_vcpus);
 887	struct kvm_vcpu *vcpu = NULL;
 888	bool level = irq_level->level;
 889
 890	irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
 891	vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
 892	vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1);
 893	irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
 894
 895	trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
 896
 897	switch (irq_type) {
 898	case KVM_ARM_IRQ_TYPE_CPU:
 899		if (irqchip_in_kernel(kvm))
 900			return -ENXIO;
 901
 902		if (vcpu_idx >= nrcpus)
 903			return -EINVAL;
 904
 905		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
 906		if (!vcpu)
 907			return -EINVAL;
 908
 909		if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
 910			return -EINVAL;
 911
 912		return vcpu_interrupt_line(vcpu, irq_num, level);
 913	case KVM_ARM_IRQ_TYPE_PPI:
 914		if (!irqchip_in_kernel(kvm))
 915			return -ENXIO;
 916
 917		if (vcpu_idx >= nrcpus)
 918			return -EINVAL;
 919
 920		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
 921		if (!vcpu)
 922			return -EINVAL;
 923
 924		if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
 925			return -EINVAL;
 926
 927		return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
 928	case KVM_ARM_IRQ_TYPE_SPI:
 929		if (!irqchip_in_kernel(kvm))
 930			return -ENXIO;
 931
 932		if (irq_num < VGIC_NR_PRIVATE_IRQS)
 933			return -EINVAL;
 934
 935		return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
 936	}
 937
 938	return -EINVAL;
 939}
 940
 941static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 942			       const struct kvm_vcpu_init *init)
 943{
 944	unsigned int i, ret;
 945	int phys_target = kvm_target_cpu();
 946
 947	if (init->target != phys_target)
 948		return -EINVAL;
 949
 950	/*
 951	 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
 952	 * use the same target.
 953	 */
 954	if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
 955		return -EINVAL;
 956
 957	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
 958	for (i = 0; i < sizeof(init->features) * 8; i++) {
 959		bool set = (init->features[i / 32] & (1 << (i % 32)));
 960
 961		if (set && i >= KVM_VCPU_MAX_FEATURES)
 962			return -ENOENT;
 963
 964		/*
 965		 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
 966		 * use the same feature set.
 967		 */
 968		if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
 969		    test_bit(i, vcpu->arch.features) != set)
 970			return -EINVAL;
 971
 972		if (set)
 973			set_bit(i, vcpu->arch.features);
 974	}
 975
 976	vcpu->arch.target = phys_target;
 977
 978	/* Now we know what it is, we can reset it. */
 979	ret = kvm_reset_vcpu(vcpu);
 980	if (ret) {
 981		vcpu->arch.target = -1;
 982		bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 983	}
 984
 985	return ret;
 986}
 987
 988static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 989					 struct kvm_vcpu_init *init)
 990{
 991	int ret;
 992
 993	ret = kvm_vcpu_set_target(vcpu, init);
 994	if (ret)
 995		return ret;
 996
 997	/*
 998	 * Ensure a rebooted VM will fault in RAM pages and detect if the
 999	 * guest MMU is turned off and flush the caches as needed.
1000	 */
1001	if (vcpu->arch.has_run_once)
1002		stage2_unmap_vm(vcpu->kvm);
1003
1004	vcpu_reset_hcr(vcpu);
1005
1006	/*
1007	 * Handle the "start in power-off" case.
1008	 */
1009	if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
1010		vcpu_power_off(vcpu);
1011	else
1012		vcpu->arch.power_off = false;
1013
1014	return 0;
1015}
1016
1017static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
1018				 struct kvm_device_attr *attr)
1019{
1020	int ret = -ENXIO;
1021
1022	switch (attr->group) {
1023	default:
1024		ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
1025		break;
1026	}
1027
1028	return ret;
1029}
1030
1031static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
1032				 struct kvm_device_attr *attr)
1033{
1034	int ret = -ENXIO;
1035
1036	switch (attr->group) {
1037	default:
1038		ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
1039		break;
1040	}
1041
1042	return ret;
1043}
1044
1045static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
1046				 struct kvm_device_attr *attr)
1047{
1048	int ret = -ENXIO;
1049
1050	switch (attr->group) {
1051	default:
1052		ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
1053		break;
1054	}
1055
1056	return ret;
1057}
1058
1059static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
1060				   struct kvm_vcpu_events *events)
1061{
1062	memset(events, 0, sizeof(*events));
1063
1064	return __kvm_arm_vcpu_get_events(vcpu, events);
1065}
1066
1067static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
1068				   struct kvm_vcpu_events *events)
1069{
1070	int i;
1071
1072	/* check whether the reserved field is zero */
1073	for (i = 0; i < ARRAY_SIZE(events->reserved); i++)
1074		if (events->reserved[i])
1075			return -EINVAL;
1076
1077	/* check whether the pad field is zero */
1078	for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++)
1079		if (events->exception.pad[i])
1080			return -EINVAL;
1081
1082	return __kvm_arm_vcpu_set_events(vcpu, events);
1083}
1084
1085long kvm_arch_vcpu_ioctl(struct file *filp,
1086			 unsigned int ioctl, unsigned long arg)
1087{
1088	struct kvm_vcpu *vcpu = filp->private_data;
1089	void __user *argp = (void __user *)arg;
1090	struct kvm_device_attr attr;
1091	long r;
1092
1093	switch (ioctl) {
1094	case KVM_ARM_VCPU_INIT: {
1095		struct kvm_vcpu_init init;
1096
1097		r = -EFAULT;
1098		if (copy_from_user(&init, argp, sizeof(init)))
1099			break;
1100
1101		r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
1102		break;
1103	}
1104	case KVM_SET_ONE_REG:
1105	case KVM_GET_ONE_REG: {
1106		struct kvm_one_reg reg;
1107
1108		r = -ENOEXEC;
1109		if (unlikely(!kvm_vcpu_initialized(vcpu)))
1110			break;
1111
1112		r = -EFAULT;
1113		if (copy_from_user(&reg, argp, sizeof(reg)))
1114			break;
1115
1116		if (ioctl == KVM_SET_ONE_REG)
1117			r = kvm_arm_set_reg(vcpu, &reg);
1118		else
1119			r = kvm_arm_get_reg(vcpu, &reg);
1120		break;
1121	}
1122	case KVM_GET_REG_LIST: {
1123		struct kvm_reg_list __user *user_list = argp;
1124		struct kvm_reg_list reg_list;
1125		unsigned n;
1126
1127		r = -ENOEXEC;
1128		if (unlikely(!kvm_vcpu_initialized(vcpu)))
1129			break;
1130
1131		r = -EPERM;
1132		if (!kvm_arm_vcpu_is_finalized(vcpu))
1133			break;
1134
1135		r = -EFAULT;
1136		if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
1137			break;
1138		n = reg_list.n;
1139		reg_list.n = kvm_arm_num_regs(vcpu);
1140		if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
1141			break;
1142		r = -E2BIG;
1143		if (n < reg_list.n)
1144			break;
1145		r = kvm_arm_copy_reg_indices(vcpu, user_list->reg);
1146		break;
1147	}
1148	case KVM_SET_DEVICE_ATTR: {
1149		r = -EFAULT;
1150		if (copy_from_user(&attr, argp, sizeof(attr)))
1151			break;
1152		r = kvm_arm_vcpu_set_attr(vcpu, &attr);
1153		break;
1154	}
1155	case KVM_GET_DEVICE_ATTR: {
1156		r = -EFAULT;
1157		if (copy_from_user(&attr, argp, sizeof(attr)))
1158			break;
1159		r = kvm_arm_vcpu_get_attr(vcpu, &attr);
1160		break;
1161	}
1162	case KVM_HAS_DEVICE_ATTR: {
1163		r = -EFAULT;
1164		if (copy_from_user(&attr, argp, sizeof(attr)))
1165			break;
1166		r = kvm_arm_vcpu_has_attr(vcpu, &attr);
1167		break;
1168	}
1169	case KVM_GET_VCPU_EVENTS: {
1170		struct kvm_vcpu_events events;
1171
1172		if (kvm_arm_vcpu_get_events(vcpu, &events))
1173			return -EINVAL;
1174
1175		if (copy_to_user(argp, &events, sizeof(events)))
1176			return -EFAULT;
1177
1178		return 0;
1179	}
1180	case KVM_SET_VCPU_EVENTS: {
1181		struct kvm_vcpu_events events;
1182
1183		if (copy_from_user(&events, argp, sizeof(events)))
1184			return -EFAULT;
1185
1186		return kvm_arm_vcpu_set_events(vcpu, &events);
1187	}
1188	case KVM_ARM_VCPU_FINALIZE: {
1189		int what;
1190
1191		if (!kvm_vcpu_initialized(vcpu))
1192			return -ENOEXEC;
1193
1194		if (get_user(what, (const int __user *)argp))
1195			return -EFAULT;
1196
1197		return kvm_arm_vcpu_finalize(vcpu, what);
1198	}
1199	default:
1200		r = -EINVAL;
1201	}
1202
1203	return r;
1204}
1205
1206/**
1207 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
1208 * @kvm: kvm instance
1209 * @log: slot id and address to which we copy the log
1210 *
1211 * Steps 1-4 below provide general overview of dirty page logging. See
1212 * kvm_get_dirty_log_protect() function description for additional details.
1213 *
1214 * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
1215 * always flush the TLB (step 4) even if previous step failed  and the dirty
1216 * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
1217 * does not preclude user space subsequent dirty log read. Flushing TLB ensures
1218 * writes will be marked dirty for next log read.
1219 *
1220 *   1. Take a snapshot of the bit and clear it if needed.
1221 *   2. Write protect the corresponding page.
1222 *   3. Copy the snapshot to the userspace.
1223 *   4. Flush TLB's if needed.
1224 */
1225int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1226{
1227	bool flush = false;
1228	int r;
1229
1230	mutex_lock(&kvm->slots_lock);
1231
1232	r = kvm_get_dirty_log_protect(kvm, log, &flush);
1233
1234	if (flush)
1235		kvm_flush_remote_tlbs(kvm);
1236
1237	mutex_unlock(&kvm->slots_lock);
1238	return r;
1239}
1240
1241int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
1242{
1243	bool flush = false;
1244	int r;
1245
1246	mutex_lock(&kvm->slots_lock);
1247
1248	r = kvm_clear_dirty_log_protect(kvm, log, &flush);
1249
1250	if (flush)
1251		kvm_flush_remote_tlbs(kvm);
1252
1253	mutex_unlock(&kvm->slots_lock);
1254	return r;
1255}
1256
1257static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
1258					struct kvm_arm_device_addr *dev_addr)
1259{
1260	unsigned long dev_id, type;
1261
1262	dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
1263		KVM_ARM_DEVICE_ID_SHIFT;
1264	type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
1265		KVM_ARM_DEVICE_TYPE_SHIFT;
1266
1267	switch (dev_id) {
1268	case KVM_ARM_DEVICE_VGIC_V2:
1269		if (!vgic_present)
1270			return -ENXIO;
1271		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
1272	default:
1273		return -ENODEV;
1274	}
1275}
1276
1277long kvm_arch_vm_ioctl(struct file *filp,
1278		       unsigned int ioctl, unsigned long arg)
1279{
1280	struct kvm *kvm = filp->private_data;
1281	void __user *argp = (void __user *)arg;
1282
1283	switch (ioctl) {
1284	case KVM_CREATE_IRQCHIP: {
1285		int ret;
1286		if (!vgic_present)
1287			return -ENXIO;
1288		mutex_lock(&kvm->lock);
1289		ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
1290		mutex_unlock(&kvm->lock);
1291		return ret;
1292	}
1293	case KVM_ARM_SET_DEVICE_ADDR: {
1294		struct kvm_arm_device_addr dev_addr;
1295
1296		if (copy_from_user(&dev_addr, argp, sizeof(dev_addr)))
1297			return -EFAULT;
1298		return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
1299	}
1300	case KVM_ARM_PREFERRED_TARGET: {
1301		int err;
1302		struct kvm_vcpu_init init;
1303
1304		err = kvm_vcpu_preferred_target(&init);
1305		if (err)
1306			return err;
1307
1308		if (copy_to_user(argp, &init, sizeof(init)))
1309			return -EFAULT;
1310
1311		return 0;
1312	}
1313	default:
1314		return -EINVAL;
1315	}
1316}
1317
1318static void cpu_init_hyp_mode(void *dummy)
1319{
1320	phys_addr_t pgd_ptr;
1321	unsigned long hyp_stack_ptr;
1322	unsigned long stack_page;
1323	unsigned long vector_ptr;
1324
1325	/* Switch from the HYP stub to our own HYP init vector */
1326	__hyp_set_vectors(kvm_get_idmap_vector());
1327
1328	pgd_ptr = kvm_mmu_get_httbr();
1329	stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
1330	hyp_stack_ptr = stack_page + PAGE_SIZE;
1331	vector_ptr = (unsigned long)kvm_get_hyp_vector();
1332
1333	__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
1334	__cpu_init_stage2();
1335}
1336
1337static void cpu_hyp_reset(void)
1338{
1339	if (!is_kernel_in_hyp_mode())
1340		__hyp_reset_vectors();
1341}
1342
1343static void cpu_hyp_reinit(void)
1344{
1345	kvm_init_host_cpu_context(&this_cpu_ptr(&kvm_host_data)->host_ctxt);
1346
1347	cpu_hyp_reset();
1348
1349	if (is_kernel_in_hyp_mode())
1350		kvm_timer_init_vhe();
1351	else
1352		cpu_init_hyp_mode(NULL);
1353
1354	kvm_arm_init_debug();
1355
1356	if (vgic_present)
1357		kvm_vgic_init_cpu_hardware();
1358}
1359
1360static void _kvm_arch_hardware_enable(void *discard)
1361{
1362	if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
1363		cpu_hyp_reinit();
1364		__this_cpu_write(kvm_arm_hardware_enabled, 1);
1365	}
1366}
1367
1368int kvm_arch_hardware_enable(void)
1369{
1370	_kvm_arch_hardware_enable(NULL);
1371	return 0;
1372}
1373
1374static void _kvm_arch_hardware_disable(void *discard)
1375{
1376	if (__this_cpu_read(kvm_arm_hardware_enabled)) {
1377		cpu_hyp_reset();
1378		__this_cpu_write(kvm_arm_hardware_enabled, 0);
1379	}
1380}
1381
1382void kvm_arch_hardware_disable(void)
1383{
1384	_kvm_arch_hardware_disable(NULL);
1385}
1386
1387#ifdef CONFIG_CPU_PM
1388static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
1389				    unsigned long cmd,
1390				    void *v)
1391{
1392	/*
1393	 * kvm_arm_hardware_enabled is left with its old value over
1394	 * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
1395	 * re-enable hyp.
1396	 */
1397	switch (cmd) {
1398	case CPU_PM_ENTER:
1399		if (__this_cpu_read(kvm_arm_hardware_enabled))
1400			/*
1401			 * don't update kvm_arm_hardware_enabled here
1402			 * so that the hardware will be re-enabled
1403			 * when we resume. See below.
1404			 */
1405			cpu_hyp_reset();
1406
1407		return NOTIFY_OK;
1408	case CPU_PM_ENTER_FAILED:
1409	case CPU_PM_EXIT:
1410		if (__this_cpu_read(kvm_arm_hardware_enabled))
1411			/* The hardware was enabled before suspend. */
1412			cpu_hyp_reinit();
1413
1414		return NOTIFY_OK;
1415
1416	default:
1417		return NOTIFY_DONE;
1418	}
1419}
1420
1421static struct notifier_block hyp_init_cpu_pm_nb = {
1422	.notifier_call = hyp_init_cpu_pm_notifier,
1423};
1424
1425static void __init hyp_cpu_pm_init(void)
1426{
1427	cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
1428}
1429static void __init hyp_cpu_pm_exit(void)
1430{
1431	cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
1432}
1433#else
1434static inline void hyp_cpu_pm_init(void)
1435{
1436}
1437static inline void hyp_cpu_pm_exit(void)
1438{
1439}
1440#endif
1441
1442static int init_common_resources(void)
1443{
1444	kvm_set_ipa_limit();
1445
1446	return 0;
1447}
1448
1449static int init_subsystems(void)
1450{
1451	int err = 0;
1452
1453	/*
1454	 * Enable hardware so that subsystem initialisation can access EL2.
1455	 */
1456	on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
1457
1458	/*
1459	 * Register CPU lower-power notifier
1460	 */
1461	hyp_cpu_pm_init();
1462
1463	/*
1464	 * Init HYP view of VGIC
1465	 */
1466	err = kvm_vgic_hyp_init();
1467	switch (err) {
1468	case 0:
1469		vgic_present = true;
1470		break;
1471	case -ENODEV:
1472	case -ENXIO:
1473		vgic_present = false;
1474		err = 0;
1475		break;
1476	default:
1477		goto out;
1478	}
1479
1480	/*
1481	 * Init HYP architected timer support
1482	 */
1483	err = kvm_timer_hyp_init(vgic_present);
1484	if (err)
1485		goto out;
1486
1487	kvm_perf_init();
1488	kvm_coproc_table_init();
1489
1490out:
1491	on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
1492
1493	return err;
1494}
1495
1496static void teardown_hyp_mode(void)
1497{
1498	int cpu;
1499
1500	free_hyp_pgds();
1501	for_each_possible_cpu(cpu)
1502		free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
1503	hyp_cpu_pm_exit();
1504}
1505
1506/**
1507 * Inits Hyp-mode on all online CPUs
1508 */
1509static int init_hyp_mode(void)
1510{
1511	int cpu;
1512	int err = 0;
1513
1514	/*
1515	 * Allocate Hyp PGD and setup Hyp identity mapping
1516	 */
1517	err = kvm_mmu_init();
1518	if (err)
1519		goto out_err;
1520
1521	/*
1522	 * Allocate stack pages for Hypervisor-mode
1523	 */
1524	for_each_possible_cpu(cpu) {
1525		unsigned long stack_page;
1526
1527		stack_page = __get_free_page(GFP_KERNEL);
1528		if (!stack_page) {
1529			err = -ENOMEM;
1530			goto out_err;
1531		}
1532
1533		per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
1534	}
1535
1536	/*
1537	 * Map the Hyp-code called directly from the host
1538	 */
1539	err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
1540				  kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC);
1541	if (err) {
1542		kvm_err("Cannot map world-switch code\n");
1543		goto out_err;
1544	}
1545
1546	err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
1547				  kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
1548	if (err) {
1549		kvm_err("Cannot map rodata section\n");
1550		goto out_err;
1551	}
1552
1553	err = create_hyp_mappings(kvm_ksym_ref(__bss_start),
1554				  kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
1555	if (err) {
1556		kvm_err("Cannot map bss section\n");
1557		goto out_err;
1558	}
1559
1560	err = kvm_map_vectors();
1561	if (err) {
1562		kvm_err("Cannot map vectors\n");
1563		goto out_err;
1564	}
1565
1566	/*
1567	 * Map the Hyp stack pages
1568	 */
1569	for_each_possible_cpu(cpu) {
1570		char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
1571		err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
1572					  PAGE_HYP);
1573
1574		if (err) {
1575			kvm_err("Cannot map hyp stack\n");
1576			goto out_err;
1577		}
1578	}
1579
1580	for_each_possible_cpu(cpu) {
1581		kvm_host_data_t *cpu_data;
1582
1583		cpu_data = per_cpu_ptr(&kvm_host_data, cpu);
1584		err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP);
1585
1586		if (err) {
1587			kvm_err("Cannot map host CPU state: %d\n", err);
1588			goto out_err;
1589		}
1590	}
1591
1592	err = hyp_map_aux_data();
1593	if (err)
1594		kvm_err("Cannot map host auxiliary data: %d\n", err);
1595
1596	return 0;
1597
1598out_err:
1599	teardown_hyp_mode();
1600	kvm_err("error initializing Hyp mode: %d\n", err);
1601	return err;
1602}
1603
1604static void check_kvm_target_cpu(void *ret)
1605{
1606	*(int *)ret = kvm_target_cpu();
1607}
1608
1609struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
1610{
1611	struct kvm_vcpu *vcpu;
1612	int i;
1613
1614	mpidr &= MPIDR_HWID_BITMASK;
1615	kvm_for_each_vcpu(i, vcpu, kvm) {
1616		if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
1617			return vcpu;
1618	}
1619	return NULL;
1620}
1621
1622bool kvm_arch_has_irq_bypass(void)
1623{
1624	return true;
1625}
1626
1627int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
1628				      struct irq_bypass_producer *prod)
1629{
1630	struct kvm_kernel_irqfd *irqfd =
1631		container_of(cons, struct kvm_kernel_irqfd, consumer);
1632
1633	return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
1634					  &irqfd->irq_entry);
1635}
1636void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
1637				      struct irq_bypass_producer *prod)
1638{
1639	struct kvm_kernel_irqfd *irqfd =
1640		container_of(cons, struct kvm_kernel_irqfd, consumer);
1641
1642	kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
1643				     &irqfd->irq_entry);
1644}
1645
1646void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
1647{
1648	struct kvm_kernel_irqfd *irqfd =
1649		container_of(cons, struct kvm_kernel_irqfd, consumer);
1650
1651	kvm_arm_halt_guest(irqfd->kvm);
1652}
1653
1654void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
1655{
1656	struct kvm_kernel_irqfd *irqfd =
1657		container_of(cons, struct kvm_kernel_irqfd, consumer);
1658
1659	kvm_arm_resume_guest(irqfd->kvm);
1660}
1661
1662/**
1663 * Initialize Hyp-mode and memory mappings on all CPUs.
1664 */
1665int kvm_arch_init(void *opaque)
1666{
1667	int err;
1668	int ret, cpu;
1669	bool in_hyp_mode;
1670
1671	if (!is_hyp_mode_available()) {
1672		kvm_info("HYP mode not available\n");
1673		return -ENODEV;
1674	}
1675
1676	in_hyp_mode = is_kernel_in_hyp_mode();
1677
1678	if (!in_hyp_mode && kvm_arch_requires_vhe()) {
1679		kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n");
1680		return -ENODEV;
1681	}
1682
1683	for_each_online_cpu(cpu) {
1684		smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
1685		if (ret < 0) {
1686			kvm_err("Error, CPU %d not supported!\n", cpu);
1687			return -ENODEV;
1688		}
1689	}
1690
1691	err = init_common_resources();
1692	if (err)
1693		return err;
1694
1695	err = kvm_arm_init_sve();
1696	if (err)
1697		return err;
1698
1699	if (!in_hyp_mode) {
1700		err = init_hyp_mode();
1701		if (err)
1702			goto out_err;
1703	}
1704
1705	err = init_subsystems();
1706	if (err)
1707		goto out_hyp;
1708
1709	if (in_hyp_mode)
1710		kvm_info("VHE mode initialized successfully\n");
1711	else
1712		kvm_info("Hyp mode initialized successfully\n");
1713
1714	return 0;
1715
1716out_hyp:
1717	if (!in_hyp_mode)
1718		teardown_hyp_mode();
1719out_err:
1720	return err;
1721}
1722
1723/* NOP: Compiling as a module not supported */
1724void kvm_arch_exit(void)
1725{
1726	kvm_perf_teardown();
1727}
1728
1729static int arm_init(void)
1730{
1731	int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1732	return rc;
1733}
1734
1735module_init(arm_init);