Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.15.
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * KVM Microsoft Hyper-V emulation
   4 *
   5 * derived from arch/x86/kvm/x86.c
   6 *
   7 * Copyright (C) 2006 Qumranet, Inc.
   8 * Copyright (C) 2008 Qumranet, Inc.
   9 * Copyright IBM Corporation, 2008
  10 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  11 * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
  12 *
  13 * Authors:
  14 *   Avi Kivity   <avi@qumranet.com>
  15 *   Yaniv Kamay  <yaniv@qumranet.com>
  16 *   Amit Shah    <amit.shah@qumranet.com>
  17 *   Ben-Ami Yassour <benami@il.ibm.com>
  18 *   Andrey Smetanin <asmetanin@virtuozzo.com>
  19 */
  20
  21#include "x86.h"
  22#include "lapic.h"
  23#include "ioapic.h"
  24#include "cpuid.h"
  25#include "hyperv.h"
  26#include "mmu.h"
  27#include "xen.h"
  28
  29#include <linux/cpu.h>
  30#include <linux/kvm_host.h>
  31#include <linux/highmem.h>
  32#include <linux/sched/cputime.h>
  33#include <linux/spinlock.h>
  34#include <linux/eventfd.h>
  35
  36#include <asm/apicdef.h>
  37#include <asm/mshyperv.h>
  38#include <trace/events/kvm.h>
  39
  40#include "trace.h"
  41#include "irq.h"
  42#include "fpu.h"
  43
  44#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, HV_VCPUS_PER_SPARSE_BANK)
  45
  46static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
  47				bool vcpu_kick);
  48
  49static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
  50{
  51	return atomic64_read(&synic->sint[sint]);
  52}
  53
  54static inline int synic_get_sint_vector(u64 sint_value)
  55{
  56	if (sint_value & HV_SYNIC_SINT_MASKED)
  57		return -1;
  58	return sint_value & HV_SYNIC_SINT_VECTOR_MASK;
  59}
  60
  61static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic,
  62				      int vector)
  63{
  64	int i;
  65
  66	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
  67		if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
  68			return true;
  69	}
  70	return false;
  71}
  72
  73static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
  74				     int vector)
  75{
  76	int i;
  77	u64 sint_value;
  78
  79	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
  80		sint_value = synic_read_sint(synic, i);
  81		if (synic_get_sint_vector(sint_value) == vector &&
  82		    sint_value & HV_SYNIC_SINT_AUTO_EOI)
  83			return true;
  84	}
  85	return false;
  86}
  87
  88static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
  89				int vector)
  90{
  91	struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
  92	struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
  93	bool auto_eoi_old, auto_eoi_new;
  94
  95	if (vector < HV_SYNIC_FIRST_VALID_VECTOR)
  96		return;
  97
  98	if (synic_has_vector_connected(synic, vector))
  99		__set_bit(vector, synic->vec_bitmap);
 100	else
 101		__clear_bit(vector, synic->vec_bitmap);
 102
 103	auto_eoi_old = !bitmap_empty(synic->auto_eoi_bitmap, 256);
 104
 105	if (synic_has_vector_auto_eoi(synic, vector))
 106		__set_bit(vector, synic->auto_eoi_bitmap);
 107	else
 108		__clear_bit(vector, synic->auto_eoi_bitmap);
 109
 110	auto_eoi_new = !bitmap_empty(synic->auto_eoi_bitmap, 256);
 111
 112	if (auto_eoi_old == auto_eoi_new)
 113		return;
 114
 115	if (!enable_apicv)
 116		return;
 117
 118	down_write(&vcpu->kvm->arch.apicv_update_lock);
 119
 120	if (auto_eoi_new)
 121		hv->synic_auto_eoi_used++;
 122	else
 123		hv->synic_auto_eoi_used--;
 124
 125	/*
 126	 * Inhibit APICv if any vCPU is using SynIC's AutoEOI, which relies on
 127	 * the hypervisor to manually inject IRQs.
 128	 */
 129	__kvm_set_or_clear_apicv_inhibit(vcpu->kvm,
 130					 APICV_INHIBIT_REASON_HYPERV,
 131					 !!hv->synic_auto_eoi_used);
 132
 133	up_write(&vcpu->kvm->arch.apicv_update_lock);
 134}
 135
 136static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
 137			  u64 data, bool host)
 138{
 139	int vector, old_vector;
 140	bool masked;
 141
 142	vector = data & HV_SYNIC_SINT_VECTOR_MASK;
 143	masked = data & HV_SYNIC_SINT_MASKED;
 144
 145	/*
 146	 * Valid vectors are 16-255, however, nested Hyper-V attempts to write
 147	 * default '0x10000' value on boot and this should not #GP. We need to
 148	 * allow zero-initing the register from host as well.
 149	 */
 150	if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked)
 151		return 1;
 152	/*
 153	 * Guest may configure multiple SINTs to use the same vector, so
 154	 * we maintain a bitmap of vectors handled by synic, and a
 155	 * bitmap of vectors with auto-eoi behavior.  The bitmaps are
 156	 * updated here, and atomically queried on fast paths.
 157	 */
 158	old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK;
 159
 160	atomic64_set(&synic->sint[sint], data);
 161
 162	synic_update_vector(synic, old_vector);
 163
 164	synic_update_vector(synic, vector);
 165
 166	/* Load SynIC vectors into EOI exit bitmap */
 167	kvm_make_request(KVM_REQ_SCAN_IOAPIC, hv_synic_to_vcpu(synic));
 168	return 0;
 169}
 170
 171static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
 172{
 173	struct kvm_vcpu *vcpu = NULL;
 174	unsigned long i;
 175
 176	if (vpidx >= KVM_MAX_VCPUS)
 177		return NULL;
 178
 179	vcpu = kvm_get_vcpu(kvm, vpidx);
 180	if (vcpu && kvm_hv_get_vpindex(vcpu) == vpidx)
 181		return vcpu;
 182	kvm_for_each_vcpu(i, vcpu, kvm)
 183		if (kvm_hv_get_vpindex(vcpu) == vpidx)
 184			return vcpu;
 185	return NULL;
 186}
 187
 188static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
 189{
 190	struct kvm_vcpu *vcpu;
 191	struct kvm_vcpu_hv_synic *synic;
 192
 193	vcpu = get_vcpu_by_vpidx(kvm, vpidx);
 194	if (!vcpu || !to_hv_vcpu(vcpu))
 195		return NULL;
 196	synic = to_hv_synic(vcpu);
 197	return (synic->active) ? synic : NULL;
 198}
 199
 200static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
 201{
 202	struct kvm *kvm = vcpu->kvm;
 203	struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
 204	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 205	struct kvm_vcpu_hv_stimer *stimer;
 206	int gsi, idx;
 207
 208	trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint);
 209
 210	/* Try to deliver pending Hyper-V SynIC timers messages */
 211	for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) {
 212		stimer = &hv_vcpu->stimer[idx];
 213		if (stimer->msg_pending && stimer->config.enable &&
 214		    !stimer->config.direct_mode &&
 215		    stimer->config.sintx == sint)
 216			stimer_mark_pending(stimer, false);
 217	}
 218
 219	idx = srcu_read_lock(&kvm->irq_srcu);
 220	gsi = atomic_read(&synic->sint_to_gsi[sint]);
 221	if (gsi != -1)
 222		kvm_notify_acked_gsi(kvm, gsi);
 223	srcu_read_unlock(&kvm->irq_srcu, idx);
 224}
 225
 226static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
 227{
 228	struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
 229	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 230
 231	hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
 232	hv_vcpu->exit.u.synic.msr = msr;
 233	hv_vcpu->exit.u.synic.control = synic->control;
 234	hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
 235	hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
 236
 237	kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
 238}
 239
 240static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 241			 u32 msr, u64 data, bool host)
 242{
 243	struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
 244	int ret;
 245
 246	if (!synic->active && (!host || data))
 247		return 1;
 248
 249	trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
 250
 251	ret = 0;
 252	switch (msr) {
 253	case HV_X64_MSR_SCONTROL:
 254		synic->control = data;
 255		if (!host)
 256			synic_exit(synic, msr);
 257		break;
 258	case HV_X64_MSR_SVERSION:
 259		if (!host) {
 260			ret = 1;
 261			break;
 262		}
 263		synic->version = data;
 264		break;
 265	case HV_X64_MSR_SIEFP:
 266		if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
 267		    !synic->dont_zero_synic_pages)
 268			if (kvm_clear_guest(vcpu->kvm,
 269					    data & PAGE_MASK, PAGE_SIZE)) {
 270				ret = 1;
 271				break;
 272			}
 273		synic->evt_page = data;
 274		if (!host)
 275			synic_exit(synic, msr);
 276		break;
 277	case HV_X64_MSR_SIMP:
 278		if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
 279		    !synic->dont_zero_synic_pages)
 280			if (kvm_clear_guest(vcpu->kvm,
 281					    data & PAGE_MASK, PAGE_SIZE)) {
 282				ret = 1;
 283				break;
 284			}
 285		synic->msg_page = data;
 286		if (!host)
 287			synic_exit(synic, msr);
 288		break;
 289	case HV_X64_MSR_EOM: {
 290		int i;
 291
 292		if (!synic->active)
 293			break;
 294
 295		for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
 296			kvm_hv_notify_acked_sint(vcpu, i);
 297		break;
 298	}
 299	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
 300		ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host);
 301		break;
 302	default:
 303		ret = 1;
 304		break;
 305	}
 306	return ret;
 307}
 308
 309static bool kvm_hv_is_syndbg_enabled(struct kvm_vcpu *vcpu)
 310{
 311	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 312
 313	return hv_vcpu->cpuid_cache.syndbg_cap_eax &
 314		HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
 315}
 316
 317static int kvm_hv_syndbg_complete_userspace(struct kvm_vcpu *vcpu)
 318{
 319	struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
 320
 321	if (vcpu->run->hyperv.u.syndbg.msr == HV_X64_MSR_SYNDBG_CONTROL)
 322		hv->hv_syndbg.control.status =
 323			vcpu->run->hyperv.u.syndbg.status;
 324	return 1;
 325}
 326
 327static void syndbg_exit(struct kvm_vcpu *vcpu, u32 msr)
 328{
 329	struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
 330	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 331
 332	hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNDBG;
 333	hv_vcpu->exit.u.syndbg.msr = msr;
 334	hv_vcpu->exit.u.syndbg.control = syndbg->control.control;
 335	hv_vcpu->exit.u.syndbg.send_page = syndbg->control.send_page;
 336	hv_vcpu->exit.u.syndbg.recv_page = syndbg->control.recv_page;
 337	hv_vcpu->exit.u.syndbg.pending_page = syndbg->control.pending_page;
 338	vcpu->arch.complete_userspace_io =
 339			kvm_hv_syndbg_complete_userspace;
 340
 341	kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
 342}
 343
 344static int syndbg_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 345{
 346	struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
 347
 348	if (!kvm_hv_is_syndbg_enabled(vcpu) && !host)
 349		return 1;
 350
 351	trace_kvm_hv_syndbg_set_msr(vcpu->vcpu_id,
 352				    to_hv_vcpu(vcpu)->vp_index, msr, data);
 353	switch (msr) {
 354	case HV_X64_MSR_SYNDBG_CONTROL:
 355		syndbg->control.control = data;
 356		if (!host)
 357			syndbg_exit(vcpu, msr);
 358		break;
 359	case HV_X64_MSR_SYNDBG_STATUS:
 360		syndbg->control.status = data;
 361		break;
 362	case HV_X64_MSR_SYNDBG_SEND_BUFFER:
 363		syndbg->control.send_page = data;
 364		break;
 365	case HV_X64_MSR_SYNDBG_RECV_BUFFER:
 366		syndbg->control.recv_page = data;
 367		break;
 368	case HV_X64_MSR_SYNDBG_PENDING_BUFFER:
 369		syndbg->control.pending_page = data;
 370		if (!host)
 371			syndbg_exit(vcpu, msr);
 372		break;
 373	case HV_X64_MSR_SYNDBG_OPTIONS:
 374		syndbg->options = data;
 375		break;
 376	default:
 377		break;
 378	}
 379
 380	return 0;
 381}
 382
 383static int syndbg_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
 384{
 385	struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
 386
 387	if (!kvm_hv_is_syndbg_enabled(vcpu) && !host)
 388		return 1;
 389
 390	switch (msr) {
 391	case HV_X64_MSR_SYNDBG_CONTROL:
 392		*pdata = syndbg->control.control;
 393		break;
 394	case HV_X64_MSR_SYNDBG_STATUS:
 395		*pdata = syndbg->control.status;
 396		break;
 397	case HV_X64_MSR_SYNDBG_SEND_BUFFER:
 398		*pdata = syndbg->control.send_page;
 399		break;
 400	case HV_X64_MSR_SYNDBG_RECV_BUFFER:
 401		*pdata = syndbg->control.recv_page;
 402		break;
 403	case HV_X64_MSR_SYNDBG_PENDING_BUFFER:
 404		*pdata = syndbg->control.pending_page;
 405		break;
 406	case HV_X64_MSR_SYNDBG_OPTIONS:
 407		*pdata = syndbg->options;
 408		break;
 409	default:
 410		break;
 411	}
 412
 413	trace_kvm_hv_syndbg_get_msr(vcpu->vcpu_id, kvm_hv_get_vpindex(vcpu), msr, *pdata);
 414
 415	return 0;
 416}
 417
 418static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata,
 419			 bool host)
 420{
 421	int ret;
 422
 423	if (!synic->active && !host)
 424		return 1;
 425
 426	ret = 0;
 427	switch (msr) {
 428	case HV_X64_MSR_SCONTROL:
 429		*pdata = synic->control;
 430		break;
 431	case HV_X64_MSR_SVERSION:
 432		*pdata = synic->version;
 433		break;
 434	case HV_X64_MSR_SIEFP:
 435		*pdata = synic->evt_page;
 436		break;
 437	case HV_X64_MSR_SIMP:
 438		*pdata = synic->msg_page;
 439		break;
 440	case HV_X64_MSR_EOM:
 441		*pdata = 0;
 442		break;
 443	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
 444		*pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]);
 445		break;
 446	default:
 447		ret = 1;
 448		break;
 449	}
 450	return ret;
 451}
 452
 453static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
 454{
 455	struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
 456	struct kvm_lapic_irq irq;
 457	int ret, vector;
 458
 459	if (KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm))
 460		return -EINVAL;
 461
 462	if (sint >= ARRAY_SIZE(synic->sint))
 463		return -EINVAL;
 464
 465	vector = synic_get_sint_vector(synic_read_sint(synic, sint));
 466	if (vector < 0)
 467		return -ENOENT;
 468
 469	memset(&irq, 0, sizeof(irq));
 470	irq.shorthand = APIC_DEST_SELF;
 471	irq.dest_mode = APIC_DEST_PHYSICAL;
 472	irq.delivery_mode = APIC_DM_FIXED;
 473	irq.vector = vector;
 474	irq.level = 1;
 475
 476	ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL);
 477	trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
 478	return ret;
 479}
 480
 481int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
 482{
 483	struct kvm_vcpu_hv_synic *synic;
 484
 485	synic = synic_get(kvm, vpidx);
 486	if (!synic)
 487		return -EINVAL;
 488
 489	return synic_set_irq(synic, sint);
 490}
 491
 492void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
 493{
 494	struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
 495	int i;
 496
 497	trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector);
 498
 499	for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
 500		if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
 501			kvm_hv_notify_acked_sint(vcpu, i);
 502}
 503
 504static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
 505{
 506	struct kvm_vcpu_hv_synic *synic;
 507
 508	synic = synic_get(kvm, vpidx);
 509	if (!synic)
 510		return -EINVAL;
 511
 512	if (sint >= ARRAY_SIZE(synic->sint_to_gsi))
 513		return -EINVAL;
 514
 515	atomic_set(&synic->sint_to_gsi[sint], gsi);
 516	return 0;
 517}
 518
 519void kvm_hv_irq_routing_update(struct kvm *kvm)
 520{
 521	struct kvm_irq_routing_table *irq_rt;
 522	struct kvm_kernel_irq_routing_entry *e;
 523	u32 gsi;
 524
 525	irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
 526					lockdep_is_held(&kvm->irq_lock));
 527
 528	for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) {
 529		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
 530			if (e->type == KVM_IRQ_ROUTING_HV_SINT)
 531				kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu,
 532						    e->hv_sint.sint, gsi);
 533		}
 534	}
 535}
 536
 537static void synic_init(struct kvm_vcpu_hv_synic *synic)
 538{
 539	int i;
 540
 541	memset(synic, 0, sizeof(*synic));
 542	synic->version = HV_SYNIC_VERSION_1;
 543	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
 544		atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED);
 545		atomic_set(&synic->sint_to_gsi[i], -1);
 546	}
 547}
 548
 549static u64 get_time_ref_counter(struct kvm *kvm)
 550{
 551	struct kvm_hv *hv = to_kvm_hv(kvm);
 552	struct kvm_vcpu *vcpu;
 553	u64 tsc;
 554
 555	/*
 556	 * Fall back to get_kvmclock_ns() when TSC page hasn't been set up,
 557	 * is broken, disabled or being updated.
 558	 */
 559	if (hv->hv_tsc_page_status != HV_TSC_PAGE_SET)
 560		return div_u64(get_kvmclock_ns(kvm), 100);
 561
 562	vcpu = kvm_get_vcpu(kvm, 0);
 563	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
 564	return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64)
 565		+ hv->tsc_ref.tsc_offset;
 566}
 567
 568static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
 569				bool vcpu_kick)
 570{
 571	struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
 572
 573	set_bit(stimer->index,
 574		to_hv_vcpu(vcpu)->stimer_pending_bitmap);
 575	kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
 576	if (vcpu_kick)
 577		kvm_vcpu_kick(vcpu);
 578}
 579
 580static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
 581{
 582	struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
 583
 584	trace_kvm_hv_stimer_cleanup(hv_stimer_to_vcpu(stimer)->vcpu_id,
 585				    stimer->index);
 586
 587	hrtimer_cancel(&stimer->timer);
 588	clear_bit(stimer->index,
 589		  to_hv_vcpu(vcpu)->stimer_pending_bitmap);
 590	stimer->msg_pending = false;
 591	stimer->exp_time = 0;
 592}
 593
 594static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
 595{
 596	struct kvm_vcpu_hv_stimer *stimer;
 597
 598	stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
 599	trace_kvm_hv_stimer_callback(hv_stimer_to_vcpu(stimer)->vcpu_id,
 600				     stimer->index);
 601	stimer_mark_pending(stimer, true);
 602
 603	return HRTIMER_NORESTART;
 604}
 605
 606/*
 607 * stimer_start() assumptions:
 608 * a) stimer->count is not equal to 0
 609 * b) stimer->config has HV_STIMER_ENABLE flag
 610 */
 611static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
 612{
 613	u64 time_now;
 614	ktime_t ktime_now;
 615
 616	time_now = get_time_ref_counter(hv_stimer_to_vcpu(stimer)->kvm);
 617	ktime_now = ktime_get();
 618
 619	if (stimer->config.periodic) {
 620		if (stimer->exp_time) {
 621			if (time_now >= stimer->exp_time) {
 622				u64 remainder;
 623
 624				div64_u64_rem(time_now - stimer->exp_time,
 625					      stimer->count, &remainder);
 626				stimer->exp_time =
 627					time_now + (stimer->count - remainder);
 628			}
 629		} else
 630			stimer->exp_time = time_now + stimer->count;
 631
 632		trace_kvm_hv_stimer_start_periodic(
 633					hv_stimer_to_vcpu(stimer)->vcpu_id,
 634					stimer->index,
 635					time_now, stimer->exp_time);
 636
 637		hrtimer_start(&stimer->timer,
 638			      ktime_add_ns(ktime_now,
 639					   100 * (stimer->exp_time - time_now)),
 640			      HRTIMER_MODE_ABS);
 641		return 0;
 642	}
 643	stimer->exp_time = stimer->count;
 644	if (time_now >= stimer->count) {
 645		/*
 646		 * Expire timer according to Hypervisor Top-Level Functional
 647		 * specification v4(15.3.1):
 648		 * "If a one shot is enabled and the specified count is in
 649		 * the past, it will expire immediately."
 650		 */
 651		stimer_mark_pending(stimer, false);
 652		return 0;
 653	}
 654
 655	trace_kvm_hv_stimer_start_one_shot(hv_stimer_to_vcpu(stimer)->vcpu_id,
 656					   stimer->index,
 657					   time_now, stimer->count);
 658
 659	hrtimer_start(&stimer->timer,
 660		      ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)),
 661		      HRTIMER_MODE_ABS);
 662	return 0;
 663}
 664
 665static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
 666			     bool host)
 667{
 668	union hv_stimer_config new_config = {.as_uint64 = config},
 669		old_config = {.as_uint64 = stimer->config.as_uint64};
 670	struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
 671	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 672	struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
 673
 674	if (!synic->active && (!host || config))
 675		return 1;
 676
 677	if (unlikely(!host && hv_vcpu->enforce_cpuid && new_config.direct_mode &&
 678		     !(hv_vcpu->cpuid_cache.features_edx &
 679		       HV_STIMER_DIRECT_MODE_AVAILABLE)))
 680		return 1;
 681
 682	trace_kvm_hv_stimer_set_config(hv_stimer_to_vcpu(stimer)->vcpu_id,
 683				       stimer->index, config, host);
 684
 685	stimer_cleanup(stimer);
 686	if (old_config.enable &&
 687	    !new_config.direct_mode && new_config.sintx == 0)
 688		new_config.enable = 0;
 689	stimer->config.as_uint64 = new_config.as_uint64;
 690
 691	if (stimer->config.enable)
 692		stimer_mark_pending(stimer, false);
 693
 694	return 0;
 695}
 696
 697static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
 698			    bool host)
 699{
 700	struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
 701	struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
 702
 703	if (!synic->active && (!host || count))
 704		return 1;
 705
 706	trace_kvm_hv_stimer_set_count(hv_stimer_to_vcpu(stimer)->vcpu_id,
 707				      stimer->index, count, host);
 708
 709	stimer_cleanup(stimer);
 710	stimer->count = count;
 711	if (stimer->count == 0)
 712		stimer->config.enable = 0;
 713	else if (stimer->config.auto_enable)
 714		stimer->config.enable = 1;
 715
 716	if (stimer->config.enable)
 717		stimer_mark_pending(stimer, false);
 718
 719	return 0;
 720}
 721
 722static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig)
 723{
 724	*pconfig = stimer->config.as_uint64;
 725	return 0;
 726}
 727
 728static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount)
 729{
 730	*pcount = stimer->count;
 731	return 0;
 732}
 733
 734static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
 735			     struct hv_message *src_msg, bool no_retry)
 736{
 737	struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
 738	int msg_off = offsetof(struct hv_message_page, sint_message[sint]);
 739	gfn_t msg_page_gfn;
 740	struct hv_message_header hv_hdr;
 741	int r;
 742
 743	if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE))
 744		return -ENOENT;
 745
 746	msg_page_gfn = synic->msg_page >> PAGE_SHIFT;
 747
 748	/*
 749	 * Strictly following the spec-mandated ordering would assume setting
 750	 * .msg_pending before checking .message_type.  However, this function
 751	 * is only called in vcpu context so the entire update is atomic from
 752	 * guest POV and thus the exact order here doesn't matter.
 753	 */
 754	r = kvm_vcpu_read_guest_page(vcpu, msg_page_gfn, &hv_hdr.message_type,
 755				     msg_off + offsetof(struct hv_message,
 756							header.message_type),
 757				     sizeof(hv_hdr.message_type));
 758	if (r < 0)
 759		return r;
 760
 761	if (hv_hdr.message_type != HVMSG_NONE) {
 762		if (no_retry)
 763			return 0;
 764
 765		hv_hdr.message_flags.msg_pending = 1;
 766		r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn,
 767					      &hv_hdr.message_flags,
 768					      msg_off +
 769					      offsetof(struct hv_message,
 770						       header.message_flags),
 771					      sizeof(hv_hdr.message_flags));
 772		if (r < 0)
 773			return r;
 774		return -EAGAIN;
 775	}
 776
 777	r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, src_msg, msg_off,
 778				      sizeof(src_msg->header) +
 779				      src_msg->header.payload_size);
 780	if (r < 0)
 781		return r;
 782
 783	r = synic_set_irq(synic, sint);
 784	if (r < 0)
 785		return r;
 786	if (r == 0)
 787		return -EFAULT;
 788	return 0;
 789}
 790
 791static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
 792{
 793	struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
 794	struct hv_message *msg = &stimer->msg;
 795	struct hv_timer_message_payload *payload =
 796			(struct hv_timer_message_payload *)&msg->u.payload;
 797
 798	/*
 799	 * To avoid piling up periodic ticks, don't retry message
 800	 * delivery for them (within "lazy" lost ticks policy).
 801	 */
 802	bool no_retry = stimer->config.periodic;
 803
 804	payload->expiration_time = stimer->exp_time;
 805	payload->delivery_time = get_time_ref_counter(vcpu->kvm);
 806	return synic_deliver_msg(to_hv_synic(vcpu),
 807				 stimer->config.sintx, msg,
 808				 no_retry);
 809}
 810
 811static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer)
 812{
 813	struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
 814	struct kvm_lapic_irq irq = {
 815		.delivery_mode = APIC_DM_FIXED,
 816		.vector = stimer->config.apic_vector
 817	};
 818
 819	if (lapic_in_kernel(vcpu))
 820		return !kvm_apic_set_irq(vcpu, &irq, NULL);
 821	return 0;
 822}
 823
 824static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
 825{
 826	int r, direct = stimer->config.direct_mode;
 827
 828	stimer->msg_pending = true;
 829	if (!direct)
 830		r = stimer_send_msg(stimer);
 831	else
 832		r = stimer_notify_direct(stimer);
 833	trace_kvm_hv_stimer_expiration(hv_stimer_to_vcpu(stimer)->vcpu_id,
 834				       stimer->index, direct, r);
 835	if (!r) {
 836		stimer->msg_pending = false;
 837		if (!(stimer->config.periodic))
 838			stimer->config.enable = 0;
 839	}
 840}
 841
 842void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
 843{
 844	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 845	struct kvm_vcpu_hv_stimer *stimer;
 846	u64 time_now, exp_time;
 847	int i;
 848
 849	if (!hv_vcpu)
 850		return;
 851
 852	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
 853		if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
 854			stimer = &hv_vcpu->stimer[i];
 855			if (stimer->config.enable) {
 856				exp_time = stimer->exp_time;
 857
 858				if (exp_time) {
 859					time_now =
 860						get_time_ref_counter(vcpu->kvm);
 861					if (time_now >= exp_time)
 862						stimer_expiration(stimer);
 863				}
 864
 865				if ((stimer->config.enable) &&
 866				    stimer->count) {
 867					if (!stimer->msg_pending)
 868						stimer_start(stimer);
 869				} else
 870					stimer_cleanup(stimer);
 871			}
 872		}
 873}
 874
 875void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
 876{
 877	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 878	int i;
 879
 880	if (!hv_vcpu)
 881		return;
 882
 883	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
 884		stimer_cleanup(&hv_vcpu->stimer[i]);
 885
 886	kfree(hv_vcpu);
 887	vcpu->arch.hyperv = NULL;
 888}
 889
 890bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
 891{
 892	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 893
 894	if (!hv_vcpu)
 895		return false;
 896
 897	if (!(hv_vcpu->hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
 898		return false;
 899	return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
 900}
 901EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
 902
 903int kvm_hv_get_assist_page(struct kvm_vcpu *vcpu)
 904{
 905	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 906
 907	if (!hv_vcpu || !kvm_hv_assist_page_enabled(vcpu))
 908		return -EFAULT;
 909
 910	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
 911				     &hv_vcpu->vp_assist_page, sizeof(struct hv_vp_assist_page));
 912}
 913EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
 914
 915static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
 916{
 917	struct hv_message *msg = &stimer->msg;
 918	struct hv_timer_message_payload *payload =
 919			(struct hv_timer_message_payload *)&msg->u.payload;
 920
 921	memset(&msg->header, 0, sizeof(msg->header));
 922	msg->header.message_type = HVMSG_TIMER_EXPIRED;
 923	msg->header.payload_size = sizeof(*payload);
 924
 925	payload->timer_index = stimer->index;
 926	payload->expiration_time = 0;
 927	payload->delivery_time = 0;
 928}
 929
 930static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
 931{
 932	memset(stimer, 0, sizeof(*stimer));
 933	stimer->index = timer_index;
 934	hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 935	stimer->timer.function = stimer_timer_callback;
 936	stimer_prepare_msg(stimer);
 937}
 938
 939int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
 940{
 941	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 942	int i;
 943
 944	if (hv_vcpu)
 945		return 0;
 946
 947	hv_vcpu = kzalloc(sizeof(struct kvm_vcpu_hv), GFP_KERNEL_ACCOUNT);
 948	if (!hv_vcpu)
 949		return -ENOMEM;
 950
 951	vcpu->arch.hyperv = hv_vcpu;
 952	hv_vcpu->vcpu = vcpu;
 953
 954	synic_init(&hv_vcpu->synic);
 955
 956	bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
 957	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
 958		stimer_init(&hv_vcpu->stimer[i], i);
 959
 960	hv_vcpu->vp_index = vcpu->vcpu_idx;
 961
 962	for (i = 0; i < HV_NR_TLB_FLUSH_FIFOS; i++) {
 963		INIT_KFIFO(hv_vcpu->tlb_flush_fifo[i].entries);
 964		spin_lock_init(&hv_vcpu->tlb_flush_fifo[i].write_lock);
 965	}
 966
 967	return 0;
 968}
 969
 970int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
 971{
 972	struct kvm_vcpu_hv_synic *synic;
 973	int r;
 974
 975	r = kvm_hv_vcpu_init(vcpu);
 976	if (r)
 977		return r;
 978
 979	synic = to_hv_synic(vcpu);
 980
 981	synic->active = true;
 982	synic->dont_zero_synic_pages = dont_zero_synic_pages;
 983	synic->control = HV_SYNIC_CONTROL_ENABLE;
 984	return 0;
 985}
 986
 987static bool kvm_hv_msr_partition_wide(u32 msr)
 988{
 989	bool r = false;
 990
 991	switch (msr) {
 992	case HV_X64_MSR_GUEST_OS_ID:
 993	case HV_X64_MSR_HYPERCALL:
 994	case HV_X64_MSR_REFERENCE_TSC:
 995	case HV_X64_MSR_TIME_REF_COUNT:
 996	case HV_X64_MSR_CRASH_CTL:
 997	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 998	case HV_X64_MSR_RESET:
 999	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
1000	case HV_X64_MSR_TSC_EMULATION_CONTROL:
1001	case HV_X64_MSR_TSC_EMULATION_STATUS:
1002	case HV_X64_MSR_SYNDBG_OPTIONS:
1003	case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
1004		r = true;
1005		break;
1006	}
1007
1008	return r;
1009}
1010
1011static int kvm_hv_msr_get_crash_data(struct kvm *kvm, u32 index, u64 *pdata)
1012{
1013	struct kvm_hv *hv = to_kvm_hv(kvm);
1014	size_t size = ARRAY_SIZE(hv->hv_crash_param);
1015
1016	if (WARN_ON_ONCE(index >= size))
1017		return -EINVAL;
1018
1019	*pdata = hv->hv_crash_param[array_index_nospec(index, size)];
1020	return 0;
1021}
1022
1023static int kvm_hv_msr_get_crash_ctl(struct kvm *kvm, u64 *pdata)
1024{
1025	struct kvm_hv *hv = to_kvm_hv(kvm);
1026
1027	*pdata = hv->hv_crash_ctl;
1028	return 0;
1029}
1030
1031static int kvm_hv_msr_set_crash_ctl(struct kvm *kvm, u64 data)
1032{
1033	struct kvm_hv *hv = to_kvm_hv(kvm);
1034
1035	hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY;
1036
1037	return 0;
1038}
1039
1040static int kvm_hv_msr_set_crash_data(struct kvm *kvm, u32 index, u64 data)
1041{
1042	struct kvm_hv *hv = to_kvm_hv(kvm);
1043	size_t size = ARRAY_SIZE(hv->hv_crash_param);
1044
1045	if (WARN_ON_ONCE(index >= size))
1046		return -EINVAL;
1047
1048	hv->hv_crash_param[array_index_nospec(index, size)] = data;
1049	return 0;
1050}
1051
1052/*
1053 * The kvmclock and Hyper-V TSC page use similar formulas, and converting
1054 * between them is possible:
1055 *
1056 * kvmclock formula:
1057 *    nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
1058 *           + system_time
1059 *
1060 * Hyper-V formula:
1061 *    nsec/100 = ticks * scale / 2^64 + offset
1062 *
1063 * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula.
1064 * By dividing the kvmclock formula by 100 and equating what's left we get:
1065 *    ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
1066 *            scale / 2^64 =         tsc_to_system_mul * 2^(tsc_shift-32) / 100
1067 *            scale        =         tsc_to_system_mul * 2^(32+tsc_shift) / 100
1068 *
1069 * Now expand the kvmclock formula and divide by 100:
1070 *    nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
1071 *           - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
1072 *           + system_time
1073 *    nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
1074 *               - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
1075 *               + system_time / 100
1076 *
1077 * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
1078 *    nsec/100 = ticks * scale / 2^64
1079 *               - tsc_timestamp * scale / 2^64
1080 *               + system_time / 100
1081 *
1082 * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
1083 *    offset = system_time / 100 - tsc_timestamp * scale / 2^64
1084 *
1085 * These two equivalencies are implemented in this function.
1086 */
1087static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
1088					struct ms_hyperv_tsc_page *tsc_ref)
1089{
1090	u64 max_mul;
1091
1092	if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
1093		return false;
1094
1095	/*
1096	 * check if scale would overflow, if so we use the time ref counter
1097	 *    tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
1098	 *    tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
1099	 *    tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
1100	 */
1101	max_mul = 100ull << (32 - hv_clock->tsc_shift);
1102	if (hv_clock->tsc_to_system_mul >= max_mul)
1103		return false;
1104
1105	/*
1106	 * Otherwise compute the scale and offset according to the formulas
1107	 * derived above.
1108	 */
1109	tsc_ref->tsc_scale =
1110		mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
1111				hv_clock->tsc_to_system_mul,
1112				100);
1113
1114	tsc_ref->tsc_offset = hv_clock->system_time;
1115	do_div(tsc_ref->tsc_offset, 100);
1116	tsc_ref->tsc_offset -=
1117		mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64);
1118	return true;
1119}
1120
1121/*
1122 * Don't touch TSC page values if the guest has opted for TSC emulation after
1123 * migration. KVM doesn't fully support reenlightenment notifications and TSC
1124 * access emulation and Hyper-V is known to expect the values in TSC page to
1125 * stay constant before TSC access emulation is disabled from guest side
1126 * (HV_X64_MSR_TSC_EMULATION_STATUS). KVM userspace is expected to preserve TSC
1127 * frequency and guest visible TSC value across migration (and prevent it when
1128 * TSC scaling is unsupported).
1129 */
1130static inline bool tsc_page_update_unsafe(struct kvm_hv *hv)
1131{
1132	return (hv->hv_tsc_page_status != HV_TSC_PAGE_GUEST_CHANGED) &&
1133		hv->hv_tsc_emulation_control;
1134}
1135
1136void kvm_hv_setup_tsc_page(struct kvm *kvm,
1137			   struct pvclock_vcpu_time_info *hv_clock)
1138{
1139	struct kvm_hv *hv = to_kvm_hv(kvm);
1140	u32 tsc_seq;
1141	u64 gfn;
1142
1143	BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
1144	BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
1145
1146	mutex_lock(&hv->hv_lock);
1147
1148	if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
1149	    hv->hv_tsc_page_status == HV_TSC_PAGE_SET ||
1150	    hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET)
1151		goto out_unlock;
1152
1153	if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
1154		goto out_unlock;
1155
1156	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
1157	/*
1158	 * Because the TSC parameters only vary when there is a
1159	 * change in the master clock, do not bother with caching.
1160	 */
1161	if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
1162				    &tsc_seq, sizeof(tsc_seq))))
1163		goto out_err;
1164
1165	if (tsc_seq && tsc_page_update_unsafe(hv)) {
1166		if (kvm_read_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
1167			goto out_err;
1168
1169		hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
1170		goto out_unlock;
1171	}
1172
1173	/*
1174	 * While we're computing and writing the parameters, force the
1175	 * guest to use the time reference count MSR.
1176	 */
1177	hv->tsc_ref.tsc_sequence = 0;
1178	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
1179			    &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
1180		goto out_err;
1181
1182	if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
1183		goto out_err;
1184
1185	/* Ensure sequence is zero before writing the rest of the struct.  */
1186	smp_wmb();
1187	if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
1188		goto out_err;
1189
1190	/*
1191	 * Now switch to the TSC page mechanism by writing the sequence.
1192	 */
1193	tsc_seq++;
1194	if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0)
1195		tsc_seq = 1;
1196
1197	/* Write the struct entirely before the non-zero sequence.  */
1198	smp_wmb();
1199
1200	hv->tsc_ref.tsc_sequence = tsc_seq;
1201	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
1202			    &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
1203		goto out_err;
1204
1205	hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
1206	goto out_unlock;
1207
1208out_err:
1209	hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
1210out_unlock:
1211	mutex_unlock(&hv->hv_lock);
1212}
1213
1214void kvm_hv_request_tsc_page_update(struct kvm *kvm)
1215{
1216	struct kvm_hv *hv = to_kvm_hv(kvm);
1217
1218	mutex_lock(&hv->hv_lock);
1219
1220	if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET &&
1221	    !tsc_page_update_unsafe(hv))
1222		hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED;
1223
1224	mutex_unlock(&hv->hv_lock);
1225}
1226
1227static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
1228{
1229	if (!hv_vcpu->enforce_cpuid)
1230		return true;
1231
1232	switch (msr) {
1233	case HV_X64_MSR_GUEST_OS_ID:
1234	case HV_X64_MSR_HYPERCALL:
1235		return hv_vcpu->cpuid_cache.features_eax &
1236			HV_MSR_HYPERCALL_AVAILABLE;
1237	case HV_X64_MSR_VP_RUNTIME:
1238		return hv_vcpu->cpuid_cache.features_eax &
1239			HV_MSR_VP_RUNTIME_AVAILABLE;
1240	case HV_X64_MSR_TIME_REF_COUNT:
1241		return hv_vcpu->cpuid_cache.features_eax &
1242			HV_MSR_TIME_REF_COUNT_AVAILABLE;
1243	case HV_X64_MSR_VP_INDEX:
1244		return hv_vcpu->cpuid_cache.features_eax &
1245			HV_MSR_VP_INDEX_AVAILABLE;
1246	case HV_X64_MSR_RESET:
1247		return hv_vcpu->cpuid_cache.features_eax &
1248			HV_MSR_RESET_AVAILABLE;
1249	case HV_X64_MSR_REFERENCE_TSC:
1250		return hv_vcpu->cpuid_cache.features_eax &
1251			HV_MSR_REFERENCE_TSC_AVAILABLE;
1252	case HV_X64_MSR_SCONTROL:
1253	case HV_X64_MSR_SVERSION:
1254	case HV_X64_MSR_SIEFP:
1255	case HV_X64_MSR_SIMP:
1256	case HV_X64_MSR_EOM:
1257	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
1258		return hv_vcpu->cpuid_cache.features_eax &
1259			HV_MSR_SYNIC_AVAILABLE;
1260	case HV_X64_MSR_STIMER0_CONFIG:
1261	case HV_X64_MSR_STIMER1_CONFIG:
1262	case HV_X64_MSR_STIMER2_CONFIG:
1263	case HV_X64_MSR_STIMER3_CONFIG:
1264	case HV_X64_MSR_STIMER0_COUNT:
1265	case HV_X64_MSR_STIMER1_COUNT:
1266	case HV_X64_MSR_STIMER2_COUNT:
1267	case HV_X64_MSR_STIMER3_COUNT:
1268		return hv_vcpu->cpuid_cache.features_eax &
1269			HV_MSR_SYNTIMER_AVAILABLE;
1270	case HV_X64_MSR_EOI:
1271	case HV_X64_MSR_ICR:
1272	case HV_X64_MSR_TPR:
1273	case HV_X64_MSR_VP_ASSIST_PAGE:
1274		return hv_vcpu->cpuid_cache.features_eax &
1275			HV_MSR_APIC_ACCESS_AVAILABLE;
1276		break;
1277	case HV_X64_MSR_TSC_FREQUENCY:
1278	case HV_X64_MSR_APIC_FREQUENCY:
1279		return hv_vcpu->cpuid_cache.features_eax &
1280			HV_ACCESS_FREQUENCY_MSRS;
1281	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
1282	case HV_X64_MSR_TSC_EMULATION_CONTROL:
1283	case HV_X64_MSR_TSC_EMULATION_STATUS:
1284		return hv_vcpu->cpuid_cache.features_eax &
1285			HV_ACCESS_REENLIGHTENMENT;
1286	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
1287	case HV_X64_MSR_CRASH_CTL:
1288		return hv_vcpu->cpuid_cache.features_edx &
1289			HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
1290	case HV_X64_MSR_SYNDBG_OPTIONS:
1291	case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
1292		return hv_vcpu->cpuid_cache.features_edx &
1293			HV_FEATURE_DEBUG_MSRS_AVAILABLE;
1294	default:
1295		break;
1296	}
1297
1298	return false;
1299}
1300
1301static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
1302			     bool host)
1303{
1304	struct kvm *kvm = vcpu->kvm;
1305	struct kvm_hv *hv = to_kvm_hv(kvm);
1306
1307	if (unlikely(!host && !hv_check_msr_access(to_hv_vcpu(vcpu), msr)))
1308		return 1;
1309
1310	switch (msr) {
1311	case HV_X64_MSR_GUEST_OS_ID:
1312		hv->hv_guest_os_id = data;
1313		/* setting guest os id to zero disables hypercall page */
1314		if (!hv->hv_guest_os_id)
1315			hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1316		break;
1317	case HV_X64_MSR_HYPERCALL: {
1318		u8 instructions[9];
1319		int i = 0;
1320		u64 addr;
1321
1322		/* if guest os id is not set hypercall should remain disabled */
1323		if (!hv->hv_guest_os_id)
1324			break;
1325		if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1326			hv->hv_hypercall = data;
1327			break;
1328		}
1329
1330		/*
1331		 * If Xen and Hyper-V hypercalls are both enabled, disambiguate
1332		 * the same way Xen itself does, by setting the bit 31 of EAX
1333		 * which is RsvdZ in the 32-bit Hyper-V hypercall ABI and just
1334		 * going to be clobbered on 64-bit.
1335		 */
1336		if (kvm_xen_hypercall_enabled(kvm)) {
1337			/* orl $0x80000000, %eax */
1338			instructions[i++] = 0x0d;
1339			instructions[i++] = 0x00;
1340			instructions[i++] = 0x00;
1341			instructions[i++] = 0x00;
1342			instructions[i++] = 0x80;
1343		}
1344
1345		/* vmcall/vmmcall */
1346		static_call(kvm_x86_patch_hypercall)(vcpu, instructions + i);
1347		i += 3;
1348
1349		/* ret */
1350		((unsigned char *)instructions)[i++] = 0xc3;
1351
1352		addr = data & HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK;
1353		if (kvm_vcpu_write_guest(vcpu, addr, instructions, i))
1354			return 1;
1355		hv->hv_hypercall = data;
1356		break;
1357	}
1358	case HV_X64_MSR_REFERENCE_TSC:
1359		hv->hv_tsc_page = data;
1360		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
1361			if (!host)
1362				hv->hv_tsc_page_status = HV_TSC_PAGE_GUEST_CHANGED;
1363			else
1364				hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED;
1365			kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1366		} else {
1367			hv->hv_tsc_page_status = HV_TSC_PAGE_UNSET;
1368		}
1369		break;
1370	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
1371		return kvm_hv_msr_set_crash_data(kvm,
1372						 msr - HV_X64_MSR_CRASH_P0,
1373						 data);
1374	case HV_X64_MSR_CRASH_CTL:
1375		if (host)
1376			return kvm_hv_msr_set_crash_ctl(kvm, data);
1377
1378		if (data & HV_CRASH_CTL_CRASH_NOTIFY) {
1379			vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
1380				   hv->hv_crash_param[0],
1381				   hv->hv_crash_param[1],
1382				   hv->hv_crash_param[2],
1383				   hv->hv_crash_param[3],
1384				   hv->hv_crash_param[4]);
1385
1386			/* Send notification about crash to user space */
1387			kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
1388		}
1389		break;
1390	case HV_X64_MSR_RESET:
1391		if (data == 1) {
1392			vcpu_debug(vcpu, "hyper-v reset requested\n");
1393			kvm_make_request(KVM_REQ_HV_RESET, vcpu);
1394		}
1395		break;
1396	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
1397		hv->hv_reenlightenment_control = data;
1398		break;
1399	case HV_X64_MSR_TSC_EMULATION_CONTROL:
1400		hv->hv_tsc_emulation_control = data;
1401		break;
1402	case HV_X64_MSR_TSC_EMULATION_STATUS:
1403		if (data && !host)
1404			return 1;
1405
1406		hv->hv_tsc_emulation_status = data;
1407		break;
1408	case HV_X64_MSR_TIME_REF_COUNT:
1409		/* read-only, but still ignore it if host-initiated */
1410		if (!host)
1411			return 1;
1412		break;
1413	case HV_X64_MSR_SYNDBG_OPTIONS:
1414	case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
1415		return syndbg_set_msr(vcpu, msr, data, host);
1416	default:
1417		vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
1418			    msr, data);
1419		return 1;
1420	}
1421	return 0;
1422}
1423
1424/* Calculate cpu time spent by current task in 100ns units */
1425static u64 current_task_runtime_100ns(void)
1426{
1427	u64 utime, stime;
1428
1429	task_cputime_adjusted(current, &utime, &stime);
1430
1431	return div_u64(utime + stime, 100);
1432}
1433
1434static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
1435{
1436	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
1437
1438	if (unlikely(!host && !hv_check_msr_access(hv_vcpu, msr)))
1439		return 1;
1440
1441	switch (msr) {
1442	case HV_X64_MSR_VP_INDEX: {
1443		struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
1444		u32 new_vp_index = (u32)data;
1445
1446		if (!host || new_vp_index >= KVM_MAX_VCPUS)
1447			return 1;
1448
1449		if (new_vp_index == hv_vcpu->vp_index)
1450			return 0;
1451
1452		/*
1453		 * The VP index is initialized to vcpu_index by
1454		 * kvm_hv_vcpu_postcreate so they initially match.  Now the
1455		 * VP index is changing, adjust num_mismatched_vp_indexes if
1456		 * it now matches or no longer matches vcpu_idx.
1457		 */
1458		if (hv_vcpu->vp_index == vcpu->vcpu_idx)
1459			atomic_inc(&hv->num_mismatched_vp_indexes);
1460		else if (new_vp_index == vcpu->vcpu_idx)
1461			atomic_dec(&hv->num_mismatched_vp_indexes);
1462
1463		hv_vcpu->vp_index = new_vp_index;
1464		break;
1465	}
1466	case HV_X64_MSR_VP_ASSIST_PAGE: {
1467		u64 gfn;
1468		unsigned long addr;
1469
1470		if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
1471			hv_vcpu->hv_vapic = data;
1472			if (kvm_lapic_set_pv_eoi(vcpu, 0, 0))
1473				return 1;
1474			break;
1475		}
1476		gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT;
1477		addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
1478		if (kvm_is_error_hva(addr))
1479			return 1;
1480
1481		/*
1482		 * Clear apic_assist portion of struct hv_vp_assist_page
1483		 * only, there can be valuable data in the rest which needs
1484		 * to be preserved e.g. on migration.
1485		 */
1486		if (__put_user(0, (u32 __user *)addr))
1487			return 1;
1488		hv_vcpu->hv_vapic = data;
1489		kvm_vcpu_mark_page_dirty(vcpu, gfn);
1490		if (kvm_lapic_set_pv_eoi(vcpu,
1491					    gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
1492					    sizeof(struct hv_vp_assist_page)))
1493			return 1;
1494		break;
1495	}
1496	case HV_X64_MSR_EOI:
1497		return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1498	case HV_X64_MSR_ICR:
1499		return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1500	case HV_X64_MSR_TPR:
1501		return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1502	case HV_X64_MSR_VP_RUNTIME:
1503		if (!host)
1504			return 1;
1505		hv_vcpu->runtime_offset = data - current_task_runtime_100ns();
1506		break;
1507	case HV_X64_MSR_SCONTROL:
1508	case HV_X64_MSR_SVERSION:
1509	case HV_X64_MSR_SIEFP:
1510	case HV_X64_MSR_SIMP:
1511	case HV_X64_MSR_EOM:
1512	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
1513		return synic_set_msr(to_hv_synic(vcpu), msr, data, host);
1514	case HV_X64_MSR_STIMER0_CONFIG:
1515	case HV_X64_MSR_STIMER1_CONFIG:
1516	case HV_X64_MSR_STIMER2_CONFIG:
1517	case HV_X64_MSR_STIMER3_CONFIG: {
1518		int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
1519
1520		return stimer_set_config(to_hv_stimer(vcpu, timer_index),
1521					 data, host);
1522	}
1523	case HV_X64_MSR_STIMER0_COUNT:
1524	case HV_X64_MSR_STIMER1_COUNT:
1525	case HV_X64_MSR_STIMER2_COUNT:
1526	case HV_X64_MSR_STIMER3_COUNT: {
1527		int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
1528
1529		return stimer_set_count(to_hv_stimer(vcpu, timer_index),
1530					data, host);
1531	}
1532	case HV_X64_MSR_TSC_FREQUENCY:
1533	case HV_X64_MSR_APIC_FREQUENCY:
1534		/* read-only, but still ignore it if host-initiated */
1535		if (!host)
1536			return 1;
1537		break;
1538	default:
1539		vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
1540			    msr, data);
1541		return 1;
1542	}
1543
1544	return 0;
1545}
1546
1547static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
1548			     bool host)
1549{
1550	u64 data = 0;
1551	struct kvm *kvm = vcpu->kvm;
1552	struct kvm_hv *hv = to_kvm_hv(kvm);
1553
1554	if (unlikely(!host && !hv_check_msr_access(to_hv_vcpu(vcpu), msr)))
1555		return 1;
1556
1557	switch (msr) {
1558	case HV_X64_MSR_GUEST_OS_ID:
1559		data = hv->hv_guest_os_id;
1560		break;
1561	case HV_X64_MSR_HYPERCALL:
1562		data = hv->hv_hypercall;
1563		break;
1564	case HV_X64_MSR_TIME_REF_COUNT:
1565		data = get_time_ref_counter(kvm);
1566		break;
1567	case HV_X64_MSR_REFERENCE_TSC:
1568		data = hv->hv_tsc_page;
1569		break;
1570	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
1571		return kvm_hv_msr_get_crash_data(kvm,
1572						 msr - HV_X64_MSR_CRASH_P0,
1573						 pdata);
1574	case HV_X64_MSR_CRASH_CTL:
1575		return kvm_hv_msr_get_crash_ctl(kvm, pdata);
1576	case HV_X64_MSR_RESET:
1577		data = 0;
1578		break;
1579	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
1580		data = hv->hv_reenlightenment_control;
1581		break;
1582	case HV_X64_MSR_TSC_EMULATION_CONTROL:
1583		data = hv->hv_tsc_emulation_control;
1584		break;
1585	case HV_X64_MSR_TSC_EMULATION_STATUS:
1586		data = hv->hv_tsc_emulation_status;
1587		break;
1588	case HV_X64_MSR_SYNDBG_OPTIONS:
1589	case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
1590		return syndbg_get_msr(vcpu, msr, pdata, host);
1591	default:
1592		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1593		return 1;
1594	}
1595
1596	*pdata = data;
1597	return 0;
1598}
1599
1600static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
1601			  bool host)
1602{
1603	u64 data = 0;
1604	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
1605
1606	if (unlikely(!host && !hv_check_msr_access(hv_vcpu, msr)))
1607		return 1;
1608
1609	switch (msr) {
1610	case HV_X64_MSR_VP_INDEX:
1611		data = hv_vcpu->vp_index;
1612		break;
1613	case HV_X64_MSR_EOI:
1614		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
1615	case HV_X64_MSR_ICR:
1616		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
1617	case HV_X64_MSR_TPR:
1618		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
1619	case HV_X64_MSR_VP_ASSIST_PAGE:
1620		data = hv_vcpu->hv_vapic;
1621		break;
1622	case HV_X64_MSR_VP_RUNTIME:
1623		data = current_task_runtime_100ns() + hv_vcpu->runtime_offset;
1624		break;
1625	case HV_X64_MSR_SCONTROL:
1626	case HV_X64_MSR_SVERSION:
1627	case HV_X64_MSR_SIEFP:
1628	case HV_X64_MSR_SIMP:
1629	case HV_X64_MSR_EOM:
1630	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
1631		return synic_get_msr(to_hv_synic(vcpu), msr, pdata, host);
1632	case HV_X64_MSR_STIMER0_CONFIG:
1633	case HV_X64_MSR_STIMER1_CONFIG:
1634	case HV_X64_MSR_STIMER2_CONFIG:
1635	case HV_X64_MSR_STIMER3_CONFIG: {
1636		int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
1637
1638		return stimer_get_config(to_hv_stimer(vcpu, timer_index),
1639					 pdata);
1640	}
1641	case HV_X64_MSR_STIMER0_COUNT:
1642	case HV_X64_MSR_STIMER1_COUNT:
1643	case HV_X64_MSR_STIMER2_COUNT:
1644	case HV_X64_MSR_STIMER3_COUNT: {
1645		int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
1646
1647		return stimer_get_count(to_hv_stimer(vcpu, timer_index),
1648					pdata);
1649	}
1650	case HV_X64_MSR_TSC_FREQUENCY:
1651		data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
1652		break;
1653	case HV_X64_MSR_APIC_FREQUENCY:
1654		data = APIC_BUS_FREQUENCY;
1655		break;
1656	default:
1657		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1658		return 1;
1659	}
1660	*pdata = data;
1661	return 0;
1662}
1663
1664int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
1665{
1666	struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
1667
1668	if (!host && !vcpu->arch.hyperv_enabled)
1669		return 1;
1670
1671	if (kvm_hv_vcpu_init(vcpu))
1672		return 1;
1673
1674	if (kvm_hv_msr_partition_wide(msr)) {
1675		int r;
1676
1677		mutex_lock(&hv->hv_lock);
1678		r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
1679		mutex_unlock(&hv->hv_lock);
1680		return r;
1681	} else
1682		return kvm_hv_set_msr(vcpu, msr, data, host);
1683}
1684
1685int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
1686{
1687	struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
1688
1689	if (!host && !vcpu->arch.hyperv_enabled)
1690		return 1;
1691
1692	if (kvm_hv_vcpu_init(vcpu))
1693		return 1;
1694
1695	if (kvm_hv_msr_partition_wide(msr)) {
1696		int r;
1697
1698		mutex_lock(&hv->hv_lock);
1699		r = kvm_hv_get_msr_pw(vcpu, msr, pdata, host);
1700		mutex_unlock(&hv->hv_lock);
1701		return r;
1702	} else
1703		return kvm_hv_get_msr(vcpu, msr, pdata, host);
1704}
1705
1706static void sparse_set_to_vcpu_mask(struct kvm *kvm, u64 *sparse_banks,
1707				    u64 valid_bank_mask, unsigned long *vcpu_mask)
1708{
1709	struct kvm_hv *hv = to_kvm_hv(kvm);
1710	bool has_mismatch = atomic_read(&hv->num_mismatched_vp_indexes);
1711	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
1712	struct kvm_vcpu *vcpu;
1713	int bank, sbank = 0;
1714	unsigned long i;
1715	u64 *bitmap;
1716
1717	BUILD_BUG_ON(sizeof(vp_bitmap) >
1718		     sizeof(*vcpu_mask) * BITS_TO_LONGS(KVM_MAX_VCPUS));
1719
1720	/*
1721	 * If vp_index == vcpu_idx for all vCPUs, fill vcpu_mask directly, else
1722	 * fill a temporary buffer and manually test each vCPU's VP index.
1723	 */
1724	if (likely(!has_mismatch))
1725		bitmap = (u64 *)vcpu_mask;
1726	else
1727		bitmap = vp_bitmap;
1728
1729	/*
1730	 * Each set of 64 VPs is packed into sparse_banks, with valid_bank_mask
1731	 * having a '1' for each bank that exists in sparse_banks.  Sets must
1732	 * be in ascending order, i.e. bank0..bankN.
1733	 */
1734	memset(bitmap, 0, sizeof(vp_bitmap));
1735	for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
1736			 KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
1737		bitmap[bank] = sparse_banks[sbank++];
1738
1739	if (likely(!has_mismatch))
1740		return;
1741
1742	bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);
1743	kvm_for_each_vcpu(i, vcpu, kvm) {
1744		if (test_bit(kvm_hv_get_vpindex(vcpu), (unsigned long *)vp_bitmap))
1745			__set_bit(i, vcpu_mask);
1746	}
1747}
1748
1749static bool hv_is_vp_in_sparse_set(u32 vp_id, u64 valid_bank_mask, u64 sparse_banks[])
1750{
1751	int valid_bit_nr = vp_id / HV_VCPUS_PER_SPARSE_BANK;
1752	unsigned long sbank;
1753
1754	if (!test_bit(valid_bit_nr, (unsigned long *)&valid_bank_mask))
1755		return false;
1756
1757	/*
1758	 * The index into the sparse bank is the number of preceding bits in
1759	 * the valid mask.  Optimize for VMs with <64 vCPUs by skipping the
1760	 * fancy math if there can't possibly be preceding bits.
1761	 */
1762	if (valid_bit_nr)
1763		sbank = hweight64(valid_bank_mask & GENMASK_ULL(valid_bit_nr - 1, 0));
1764	else
1765		sbank = 0;
1766
1767	return test_bit(vp_id % HV_VCPUS_PER_SPARSE_BANK,
1768			(unsigned long *)&sparse_banks[sbank]);
1769}
1770
1771struct kvm_hv_hcall {
1772	/* Hypercall input data */
1773	u64 param;
1774	u64 ingpa;
1775	u64 outgpa;
1776	u16 code;
1777	u16 var_cnt;
1778	u16 rep_cnt;
1779	u16 rep_idx;
1780	bool fast;
1781	bool rep;
1782	sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS];
1783
1784	/*
1785	 * Current read offset when KVM reads hypercall input data gradually,
1786	 * either offset in bytes from 'ingpa' for regular hypercalls or the
1787	 * number of already consumed 'XMM halves' for 'fast' hypercalls.
1788	 */
1789	union {
1790		gpa_t data_offset;
1791		int consumed_xmm_halves;
1792	};
1793};
1794
1795
1796static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc,
1797			      u16 orig_cnt, u16 cnt_cap, u64 *data)
1798{
1799	/*
1800	 * Preserve the original count when ignoring entries via a "cap", KVM
1801	 * still needs to validate the guest input (though the non-XMM path
1802	 * punts on the checks).
1803	 */
1804	u16 cnt = min(orig_cnt, cnt_cap);
1805	int i, j;
1806
1807	if (hc->fast) {
1808		/*
1809		 * Each XMM holds two sparse banks, but do not count halves that
1810		 * have already been consumed for hypercall parameters.
1811		 */
1812		if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - hc->consumed_xmm_halves)
1813			return HV_STATUS_INVALID_HYPERCALL_INPUT;
1814
1815		for (i = 0; i < cnt; i++) {
1816			j = i + hc->consumed_xmm_halves;
1817			if (j % 2)
1818				data[i] = sse128_hi(hc->xmm[j / 2]);
1819			else
1820				data[i] = sse128_lo(hc->xmm[j / 2]);
1821		}
1822		return 0;
1823	}
1824
1825	return kvm_read_guest(kvm, hc->ingpa + hc->data_offset, data,
1826			      cnt * sizeof(*data));
1827}
1828
1829static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
1830				 u64 *sparse_banks)
1831{
1832	if (hc->var_cnt > HV_MAX_SPARSE_VCPU_BANKS)
1833		return -EINVAL;
1834
1835	/* Cap var_cnt to ignore banks that cannot contain a legal VP index. */
1836	return kvm_hv_get_hc_data(kvm, hc, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS,
1837				  sparse_banks);
1838}
1839
1840static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[])
1841{
1842	return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt, entries);
1843}
1844
1845static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu,
1846				 struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo,
1847				 u64 *entries, int count)
1848{
1849	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
1850	u64 flush_all_entry = KVM_HV_TLB_FLUSHALL_ENTRY;
1851
1852	if (!hv_vcpu)
1853		return;
1854
1855	spin_lock(&tlb_flush_fifo->write_lock);
1856
1857	/*
1858	 * All entries should fit on the fifo leaving one free for 'flush all'
1859	 * entry in case another request comes in. In case there's not enough
1860	 * space, just put 'flush all' entry there.
1861	 */
1862	if (count && entries && count < kfifo_avail(&tlb_flush_fifo->entries)) {
1863		WARN_ON(kfifo_in(&tlb_flush_fifo->entries, entries, count) != count);
1864		goto out_unlock;
1865	}
1866
1867	/*
1868	 * Note: full fifo always contains 'flush all' entry, no need to check the
1869	 * return value.
1870	 */
1871	kfifo_in(&tlb_flush_fifo->entries, &flush_all_entry, 1);
1872
1873out_unlock:
1874	spin_unlock(&tlb_flush_fifo->write_lock);
1875}
1876
1877int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
1878{
1879	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
1880	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
1881	u64 entries[KVM_HV_TLB_FLUSH_FIFO_SIZE];
1882	int i, j, count;
1883	gva_t gva;
1884
1885	if (!tdp_enabled || !hv_vcpu)
1886		return -EINVAL;
1887
1888	tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(vcpu, is_guest_mode(vcpu));
1889
1890	count = kfifo_out(&tlb_flush_fifo->entries, entries, KVM_HV_TLB_FLUSH_FIFO_SIZE);
1891
1892	for (i = 0; i < count; i++) {
1893		if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY)
1894			goto out_flush_all;
1895
1896		/*
1897		 * Lower 12 bits of 'address' encode the number of additional
1898		 * pages to flush.
1899		 */
1900		gva = entries[i] & PAGE_MASK;
1901		for (j = 0; j < (entries[i] & ~PAGE_MASK) + 1; j++)
1902			static_call(kvm_x86_flush_tlb_gva)(vcpu, gva + j * PAGE_SIZE);
1903
1904		++vcpu->stat.tlb_flush;
1905	}
1906	return 0;
1907
1908out_flush_all:
1909	kfifo_reset_out(&tlb_flush_fifo->entries);
1910
1911	/* Fall back to full flush. */
1912	return -ENOSPC;
1913}
1914
1915static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
1916{
1917	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
1918	u64 *sparse_banks = hv_vcpu->sparse_banks;
1919	struct kvm *kvm = vcpu->kvm;
1920	struct hv_tlb_flush_ex flush_ex;
1921	struct hv_tlb_flush flush;
1922	DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
1923	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
1924	/*
1925	 * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE'
1926	 * entries on the TLB flush fifo. The last entry, however, needs to be
1927	 * always left free for 'flush all' entry which gets placed when
1928	 * there is not enough space to put all the requested entries.
1929	 */
1930	u64 __tlb_flush_entries[KVM_HV_TLB_FLUSH_FIFO_SIZE - 1];
1931	u64 *tlb_flush_entries;
1932	u64 valid_bank_mask;
1933	struct kvm_vcpu *v;
1934	unsigned long i;
1935	bool all_cpus;
1936
1937	/*
1938	 * The Hyper-V TLFS doesn't allow more than HV_MAX_SPARSE_VCPU_BANKS
1939	 * sparse banks. Fail the build if KVM's max allowed number of
1940	 * vCPUs (>4096) exceeds this limit.
1941	 */
1942	BUILD_BUG_ON(KVM_HV_MAX_SPARSE_VCPU_SET_BITS > HV_MAX_SPARSE_VCPU_BANKS);
1943
1944	/*
1945	 * 'Slow' hypercall's first parameter is the address in guest's memory
1946	 * where hypercall parameters are placed. This is either a GPA or a
1947	 * nested GPA when KVM is handling the call from L2 ('direct' TLB
1948	 * flush).  Translate the address here so the memory can be uniformly
1949	 * read with kvm_read_guest().
1950	 */
1951	if (!hc->fast && is_guest_mode(vcpu)) {
1952		hc->ingpa = translate_nested_gpa(vcpu, hc->ingpa, 0, NULL);
1953		if (unlikely(hc->ingpa == INVALID_GPA))
1954			return HV_STATUS_INVALID_HYPERCALL_INPUT;
1955	}
1956
1957	if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST ||
1958	    hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE) {
1959		if (hc->fast) {
1960			flush.address_space = hc->ingpa;
1961			flush.flags = hc->outgpa;
1962			flush.processor_mask = sse128_lo(hc->xmm[0]);
1963			hc->consumed_xmm_halves = 1;
1964		} else {
1965			if (unlikely(kvm_read_guest(kvm, hc->ingpa,
1966						    &flush, sizeof(flush))))
1967				return HV_STATUS_INVALID_HYPERCALL_INPUT;
1968			hc->data_offset = sizeof(flush);
1969		}
1970
1971		trace_kvm_hv_flush_tlb(flush.processor_mask,
1972				       flush.address_space, flush.flags,
1973				       is_guest_mode(vcpu));
1974
1975		valid_bank_mask = BIT_ULL(0);
1976		sparse_banks[0] = flush.processor_mask;
1977
1978		/*
1979		 * Work around possible WS2012 bug: it sends hypercalls
1980		 * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear,
1981		 * while also expecting us to flush something and crashing if
1982		 * we don't. Let's treat processor_mask == 0 same as
1983		 * HV_FLUSH_ALL_PROCESSORS.
1984		 */
1985		all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
1986			flush.processor_mask == 0;
1987	} else {
1988		if (hc->fast) {
1989			flush_ex.address_space = hc->ingpa;
1990			flush_ex.flags = hc->outgpa;
1991			memcpy(&flush_ex.hv_vp_set,
1992			       &hc->xmm[0], sizeof(hc->xmm[0]));
1993			hc->consumed_xmm_halves = 2;
1994		} else {
1995			if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
1996						    sizeof(flush_ex))))
1997				return HV_STATUS_INVALID_HYPERCALL_INPUT;
1998			hc->data_offset = sizeof(flush_ex);
1999		}
2000
2001		trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
2002					  flush_ex.hv_vp_set.format,
2003					  flush_ex.address_space,
2004					  flush_ex.flags, is_guest_mode(vcpu));
2005
2006		valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask;
2007		all_cpus = flush_ex.hv_vp_set.format !=
2008			HV_GENERIC_SET_SPARSE_4K;
2009
2010		if (hc->var_cnt != hweight64(valid_bank_mask))
2011			return HV_STATUS_INVALID_HYPERCALL_INPUT;
2012
2013		if (!all_cpus) {
2014			if (!hc->var_cnt)
2015				goto ret_success;
2016
2017			if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks))
2018				return HV_STATUS_INVALID_HYPERCALL_INPUT;
2019		}
2020
2021		/*
2022		 * Hyper-V TLFS doesn't explicitly forbid non-empty sparse vCPU
2023		 * banks (and, thus, non-zero 'var_cnt') for the 'all vCPUs'
2024		 * case (HV_GENERIC_SET_ALL).  Always adjust data_offset and
2025		 * consumed_xmm_halves to make sure TLB flush entries are read
2026		 * from the correct offset.
2027		 */
2028		if (hc->fast)
2029			hc->consumed_xmm_halves += hc->var_cnt;
2030		else
2031			hc->data_offset += hc->var_cnt * sizeof(sparse_banks[0]);
2032	}
2033
2034	if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE ||
2035	    hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX ||
2036	    hc->rep_cnt > ARRAY_SIZE(__tlb_flush_entries)) {
2037		tlb_flush_entries = NULL;
2038	} else {
2039		if (kvm_hv_get_tlb_flush_entries(kvm, hc, __tlb_flush_entries))
2040			return HV_STATUS_INVALID_HYPERCALL_INPUT;
2041		tlb_flush_entries = __tlb_flush_entries;
2042	}
2043
2044	/*
2045	 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
2046	 * analyze it here, flush TLB regardless of the specified address space.
2047	 */
2048	if (all_cpus && !is_guest_mode(vcpu)) {
2049		kvm_for_each_vcpu(i, v, kvm) {
2050			tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, false);
2051			hv_tlb_flush_enqueue(v, tlb_flush_fifo,
2052					     tlb_flush_entries, hc->rep_cnt);
2053		}
2054
2055		kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH);
2056	} else if (!is_guest_mode(vcpu)) {
2057		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask);
2058
2059		for_each_set_bit(i, vcpu_mask, KVM_MAX_VCPUS) {
2060			v = kvm_get_vcpu(kvm, i);
2061			if (!v)
2062				continue;
2063			tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, false);
2064			hv_tlb_flush_enqueue(v, tlb_flush_fifo,
2065					     tlb_flush_entries, hc->rep_cnt);
2066		}
2067
2068		kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask);
2069	} else {
2070		struct kvm_vcpu_hv *hv_v;
2071
2072		bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);
2073
2074		kvm_for_each_vcpu(i, v, kvm) {
2075			hv_v = to_hv_vcpu(v);
2076
2077			/*
2078			 * The following check races with nested vCPUs entering/exiting
2079			 * and/or migrating between L1's vCPUs, however the only case when
2080			 * KVM *must* flush the TLB is when the target L2 vCPU keeps
2081			 * running on the same L1 vCPU from the moment of the request until
2082			 * kvm_hv_flush_tlb() returns. TLB is fully flushed in all other
2083			 * cases, e.g. when the target L2 vCPU migrates to a different L1
2084			 * vCPU or when the corresponding L1 vCPU temporary switches to a
2085			 * different L2 vCPU while the request is being processed.
2086			 */
2087			if (!hv_v || hv_v->nested.vm_id != hv_vcpu->nested.vm_id)
2088				continue;
2089
2090			if (!all_cpus &&
2091			    !hv_is_vp_in_sparse_set(hv_v->nested.vp_id, valid_bank_mask,
2092						    sparse_banks))
2093				continue;
2094
2095			__set_bit(i, vcpu_mask);
2096			tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, true);
2097			hv_tlb_flush_enqueue(v, tlb_flush_fifo,
2098					     tlb_flush_entries, hc->rep_cnt);
2099		}
2100
2101		kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask);
2102	}
2103
2104ret_success:
2105	/* We always do full TLB flush, set 'Reps completed' = 'Rep Count' */
2106	return (u64)HV_STATUS_SUCCESS |
2107		((u64)hc->rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
2108}
2109
2110static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
2111				    u64 *sparse_banks, u64 valid_bank_mask)
2112{
2113	struct kvm_lapic_irq irq = {
2114		.delivery_mode = APIC_DM_FIXED,
2115		.vector = vector
2116	};
2117	struct kvm_vcpu *vcpu;
2118	unsigned long i;
2119
2120	kvm_for_each_vcpu(i, vcpu, kvm) {
2121		if (sparse_banks &&
2122		    !hv_is_vp_in_sparse_set(kvm_hv_get_vpindex(vcpu),
2123					    valid_bank_mask, sparse_banks))
2124			continue;
2125
2126		/* We fail only when APIC is disabled */
2127		kvm_apic_set_irq(vcpu, &irq, NULL);
2128	}
2129}
2130
2131static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
2132{
2133	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
2134	u64 *sparse_banks = hv_vcpu->sparse_banks;
2135	struct kvm *kvm = vcpu->kvm;
2136	struct hv_send_ipi_ex send_ipi_ex;
2137	struct hv_send_ipi send_ipi;
2138	u64 valid_bank_mask;
2139	u32 vector;
2140	bool all_cpus;
2141
2142	if (hc->code == HVCALL_SEND_IPI) {
2143		if (!hc->fast) {
2144			if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi,
2145						    sizeof(send_ipi))))
2146				return HV_STATUS_INVALID_HYPERCALL_INPUT;
2147			sparse_banks[0] = send_ipi.cpu_mask;
2148			vector = send_ipi.vector;
2149		} else {
2150			/* 'reserved' part of hv_send_ipi should be 0 */
2151			if (unlikely(hc->ingpa >> 32 != 0))
2152				return HV_STATUS_INVALID_HYPERCALL_INPUT;
2153			sparse_banks[0] = hc->outgpa;
2154			vector = (u32)hc->ingpa;
2155		}
2156		all_cpus = false;
2157		valid_bank_mask = BIT_ULL(0);
2158
2159		trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
2160	} else {
2161		if (!hc->fast) {
2162			if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex,
2163						    sizeof(send_ipi_ex))))
2164				return HV_STATUS_INVALID_HYPERCALL_INPUT;
2165		} else {
2166			send_ipi_ex.vector = (u32)hc->ingpa;
2167			send_ipi_ex.vp_set.format = hc->outgpa;
2168			send_ipi_ex.vp_set.valid_bank_mask = sse128_lo(hc->xmm[0]);
2169		}
2170
2171		trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
2172					 send_ipi_ex.vp_set.format,
2173					 send_ipi_ex.vp_set.valid_bank_mask);
2174
2175		vector = send_ipi_ex.vector;
2176		valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
2177		all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
2178
2179		if (hc->var_cnt != hweight64(valid_bank_mask))
2180			return HV_STATUS_INVALID_HYPERCALL_INPUT;
2181
2182		if (all_cpus)
2183			goto check_and_send_ipi;
2184
2185		if (!hc->var_cnt)
2186			goto ret_success;
2187
2188		if (!hc->fast)
2189			hc->data_offset = offsetof(struct hv_send_ipi_ex,
2190						   vp_set.bank_contents);
2191		else
2192			hc->consumed_xmm_halves = 1;
2193
2194		if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks))
2195			return HV_STATUS_INVALID_HYPERCALL_INPUT;
2196	}
2197
2198check_and_send_ipi:
2199	if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
2200		return HV_STATUS_INVALID_HYPERCALL_INPUT;
2201
2202	if (all_cpus)
2203		kvm_hv_send_ipi_to_many(kvm, vector, NULL, 0);
2204	else
2205		kvm_hv_send_ipi_to_many(kvm, vector, sparse_banks, valid_bank_mask);
2206
2207ret_success:
2208	return HV_STATUS_SUCCESS;
2209}
2210
2211void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu, bool hyperv_enabled)
2212{
2213	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
2214	struct kvm_cpuid_entry2 *entry;
2215
2216	vcpu->arch.hyperv_enabled = hyperv_enabled;
2217
2218	if (!hv_vcpu) {
2219		/*
2220		 * KVM should have already allocated kvm_vcpu_hv if Hyper-V is
2221		 * enabled in CPUID.
2222		 */
2223		WARN_ON_ONCE(vcpu->arch.hyperv_enabled);
2224		return;
2225	}
2226
2227	memset(&hv_vcpu->cpuid_cache, 0, sizeof(hv_vcpu->cpuid_cache));
2228
2229	if (!vcpu->arch.hyperv_enabled)
2230		return;
2231
2232	entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
2233	if (entry) {
2234		hv_vcpu->cpuid_cache.features_eax = entry->eax;
2235		hv_vcpu->cpuid_cache.features_ebx = entry->ebx;
2236		hv_vcpu->cpuid_cache.features_edx = entry->edx;
2237	}
2238
2239	entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
2240	if (entry) {
2241		hv_vcpu->cpuid_cache.enlightenments_eax = entry->eax;
2242		hv_vcpu->cpuid_cache.enlightenments_ebx = entry->ebx;
2243	}
2244
2245	entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
2246	if (entry)
2247		hv_vcpu->cpuid_cache.syndbg_cap_eax = entry->eax;
2248
2249	entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_NESTED_FEATURES);
2250	if (entry) {
2251		hv_vcpu->cpuid_cache.nested_eax = entry->eax;
2252		hv_vcpu->cpuid_cache.nested_ebx = entry->ebx;
2253	}
2254}
2255
2256int kvm_hv_set_enforce_cpuid(struct kvm_vcpu *vcpu, bool enforce)
2257{
2258	struct kvm_vcpu_hv *hv_vcpu;
2259	int ret = 0;
2260
2261	if (!to_hv_vcpu(vcpu)) {
2262		if (enforce) {
2263			ret = kvm_hv_vcpu_init(vcpu);
2264			if (ret)
2265				return ret;
2266		} else {
2267			return 0;
2268		}
2269	}
2270
2271	hv_vcpu = to_hv_vcpu(vcpu);
2272	hv_vcpu->enforce_cpuid = enforce;
2273
2274	return ret;
2275}
2276
2277static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
2278{
2279	bool longmode;
2280
2281	longmode = is_64_bit_hypercall(vcpu);
2282	if (longmode)
2283		kvm_rax_write(vcpu, result);
2284	else {
2285		kvm_rdx_write(vcpu, result >> 32);
2286		kvm_rax_write(vcpu, result & 0xffffffff);
2287	}
2288}
2289
2290static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
2291{
2292	u32 tlb_lock_count = 0;
2293	int ret;
2294
2295	if (hv_result_success(result) && is_guest_mode(vcpu) &&
2296	    kvm_hv_is_tlb_flush_hcall(vcpu) &&
2297	    kvm_read_guest(vcpu->kvm, to_hv_vcpu(vcpu)->nested.pa_page_gpa,
2298			   &tlb_lock_count, sizeof(tlb_lock_count)))
2299		result = HV_STATUS_INVALID_HYPERCALL_INPUT;
2300
2301	trace_kvm_hv_hypercall_done(result);
2302	kvm_hv_hypercall_set_result(vcpu, result);
2303	++vcpu->stat.hypercalls;
2304
2305	ret = kvm_skip_emulated_instruction(vcpu);
2306
2307	if (tlb_lock_count)
2308		kvm_x86_ops.nested_ops->hv_inject_synthetic_vmexit_post_tlb_flush(vcpu);
2309
2310	return ret;
2311}
2312
2313static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
2314{
2315	return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
2316}
2317
2318static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
2319{
2320	struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
2321	struct eventfd_ctx *eventfd;
2322
2323	if (unlikely(!hc->fast)) {
2324		int ret;
2325		gpa_t gpa = hc->ingpa;
2326
2327		if ((gpa & (__alignof__(hc->ingpa) - 1)) ||
2328		    offset_in_page(gpa) + sizeof(hc->ingpa) > PAGE_SIZE)
2329			return HV_STATUS_INVALID_ALIGNMENT;
2330
2331		ret = kvm_vcpu_read_guest(vcpu, gpa,
2332					  &hc->ingpa, sizeof(hc->ingpa));
2333		if (ret < 0)
2334			return HV_STATUS_INVALID_ALIGNMENT;
2335	}
2336
2337	/*
2338	 * Per spec, bits 32-47 contain the extra "flag number".  However, we
2339	 * have no use for it, and in all known usecases it is zero, so just
2340	 * report lookup failure if it isn't.
2341	 */
2342	if (hc->ingpa & 0xffff00000000ULL)
2343		return HV_STATUS_INVALID_PORT_ID;
2344	/* remaining bits are reserved-zero */
2345	if (hc->ingpa & ~KVM_HYPERV_CONN_ID_MASK)
2346		return HV_STATUS_INVALID_HYPERCALL_INPUT;
2347
2348	/* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */
2349	rcu_read_lock();
2350	eventfd = idr_find(&hv->conn_to_evt, hc->ingpa);
2351	rcu_read_unlock();
2352	if (!eventfd)
2353		return HV_STATUS_INVALID_PORT_ID;
2354
2355	eventfd_signal(eventfd, 1);
2356	return HV_STATUS_SUCCESS;
2357}
2358
2359static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc)
2360{
2361	switch (hc->code) {
2362	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
2363	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
2364	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
2365	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
2366	case HVCALL_SEND_IPI_EX:
2367		return true;
2368	}
2369
2370	return false;
2371}
2372
2373static void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc)
2374{
2375	int reg;
2376
2377	kvm_fpu_get();
2378	for (reg = 0; reg < HV_HYPERCALL_MAX_XMM_REGISTERS; reg++)
2379		_kvm_read_sse_reg(reg, &hc->xmm[reg]);
2380	kvm_fpu_put();
2381}
2382
2383static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
2384{
2385	if (!hv_vcpu->enforce_cpuid)
2386		return true;
2387
2388	switch (code) {
2389	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
2390		return hv_vcpu->cpuid_cache.enlightenments_ebx &&
2391			hv_vcpu->cpuid_cache.enlightenments_ebx != U32_MAX;
2392	case HVCALL_POST_MESSAGE:
2393		return hv_vcpu->cpuid_cache.features_ebx & HV_POST_MESSAGES;
2394	case HVCALL_SIGNAL_EVENT:
2395		return hv_vcpu->cpuid_cache.features_ebx & HV_SIGNAL_EVENTS;
2396	case HVCALL_POST_DEBUG_DATA:
2397	case HVCALL_RETRIEVE_DEBUG_DATA:
2398	case HVCALL_RESET_DEBUG_SESSION:
2399		/*
2400		 * Return 'true' when SynDBG is disabled so the resulting code
2401		 * will be HV_STATUS_INVALID_HYPERCALL_CODE.
2402		 */
2403		return !kvm_hv_is_syndbg_enabled(hv_vcpu->vcpu) ||
2404			hv_vcpu->cpuid_cache.features_ebx & HV_DEBUGGING;
2405	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
2406	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
2407		if (!(hv_vcpu->cpuid_cache.enlightenments_eax &
2408		      HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
2409			return false;
2410		fallthrough;
2411	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
2412	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
2413		return hv_vcpu->cpuid_cache.enlightenments_eax &
2414			HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
2415	case HVCALL_SEND_IPI_EX:
2416		if (!(hv_vcpu->cpuid_cache.enlightenments_eax &
2417		      HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
2418			return false;
2419		fallthrough;
2420	case HVCALL_SEND_IPI:
2421		return hv_vcpu->cpuid_cache.enlightenments_eax &
2422			HV_X64_CLUSTER_IPI_RECOMMENDED;
2423	default:
2424		break;
2425	}
2426
2427	return true;
2428}
2429
2430int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
2431{
2432	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
2433	struct kvm_hv_hcall hc;
2434	u64 ret = HV_STATUS_SUCCESS;
2435
2436	/*
2437	 * hypercall generates UD from non zero cpl and real mode
2438	 * per HYPER-V spec
2439	 */
2440	if (static_call(kvm_x86_get_cpl)(vcpu) != 0 || !is_protmode(vcpu)) {
2441		kvm_queue_exception(vcpu, UD_VECTOR);
2442		return 1;
2443	}
2444
2445#ifdef CONFIG_X86_64
2446	if (is_64_bit_hypercall(vcpu)) {
2447		hc.param = kvm_rcx_read(vcpu);
2448		hc.ingpa = kvm_rdx_read(vcpu);
2449		hc.outgpa = kvm_r8_read(vcpu);
2450	} else
2451#endif
2452	{
2453		hc.param = ((u64)kvm_rdx_read(vcpu) << 32) |
2454			    (kvm_rax_read(vcpu) & 0xffffffff);
2455		hc.ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
2456			    (kvm_rcx_read(vcpu) & 0xffffffff);
2457		hc.outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
2458			     (kvm_rsi_read(vcpu) & 0xffffffff);
2459	}
2460
2461	hc.code = hc.param & 0xffff;
2462	hc.var_cnt = (hc.param & HV_HYPERCALL_VARHEAD_MASK) >> HV_HYPERCALL_VARHEAD_OFFSET;
2463	hc.fast = !!(hc.param & HV_HYPERCALL_FAST_BIT);
2464	hc.rep_cnt = (hc.param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
2465	hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
2466	hc.rep = !!(hc.rep_cnt || hc.rep_idx);
2467
2468	trace_kvm_hv_hypercall(hc.code, hc.fast, hc.var_cnt, hc.rep_cnt,
2469			       hc.rep_idx, hc.ingpa, hc.outgpa);
2470
2471	if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) {
2472		ret = HV_STATUS_ACCESS_DENIED;
2473		goto hypercall_complete;
2474	}
2475
2476	if (unlikely(hc.param & HV_HYPERCALL_RSVD_MASK)) {
2477		ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
2478		goto hypercall_complete;
2479	}
2480
2481	if (hc.fast && is_xmm_fast_hypercall(&hc)) {
2482		if (unlikely(hv_vcpu->enforce_cpuid &&
2483			     !(hv_vcpu->cpuid_cache.features_edx &
2484			       HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) {
2485			kvm_queue_exception(vcpu, UD_VECTOR);
2486			return 1;
2487		}
2488
2489		kvm_hv_hypercall_read_xmm(&hc);
2490	}
2491
2492	switch (hc.code) {
2493	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
2494		if (unlikely(hc.rep || hc.var_cnt)) {
2495			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
2496			break;
2497		}
2498		kvm_vcpu_on_spin(vcpu, true);
2499		break;
2500	case HVCALL_SIGNAL_EVENT:
2501		if (unlikely(hc.rep || hc.var_cnt)) {
2502			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
2503			break;
2504		}
2505		ret = kvm_hvcall_signal_event(vcpu, &hc);
2506		if (ret != HV_STATUS_INVALID_PORT_ID)
2507			break;
2508		fallthrough;	/* maybe userspace knows this conn_id */
2509	case HVCALL_POST_MESSAGE:
2510		/* don't bother userspace if it has no way to handle it */
2511		if (unlikely(hc.rep || hc.var_cnt || !to_hv_synic(vcpu)->active)) {
2512			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
2513			break;
2514		}
2515		vcpu->run->exit_reason = KVM_EXIT_HYPERV;
2516		vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
2517		vcpu->run->hyperv.u.hcall.input = hc.param;
2518		vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa;
2519		vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa;
2520		vcpu->arch.complete_userspace_io =
2521				kvm_hv_hypercall_complete_userspace;
2522		return 0;
2523	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
2524		if (unlikely(hc.var_cnt)) {
2525			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
2526			break;
2527		}
2528		fallthrough;
2529	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
2530		if (unlikely(!hc.rep_cnt || hc.rep_idx)) {
2531			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
2532			break;
2533		}
2534		ret = kvm_hv_flush_tlb(vcpu, &hc);
2535		break;
2536	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
2537		if (unlikely(hc.var_cnt)) {
2538			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
2539			break;
2540		}
2541		fallthrough;
2542	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
2543		if (unlikely(hc.rep)) {
2544			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
2545			break;
2546		}
2547		ret = kvm_hv_flush_tlb(vcpu, &hc);
2548		break;
2549	case HVCALL_SEND_IPI:
2550		if (unlikely(hc.var_cnt)) {
2551			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
2552			break;
2553		}
2554		fallthrough;
2555	case HVCALL_SEND_IPI_EX:
2556		if (unlikely(hc.rep)) {
2557			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
2558			break;
2559		}
2560		ret = kvm_hv_send_ipi(vcpu, &hc);
2561		break;
2562	case HVCALL_POST_DEBUG_DATA:
2563	case HVCALL_RETRIEVE_DEBUG_DATA:
2564		if (unlikely(hc.fast)) {
2565			ret = HV_STATUS_INVALID_PARAMETER;
2566			break;
2567		}
2568		fallthrough;
2569	case HVCALL_RESET_DEBUG_SESSION: {
2570		struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
2571
2572		if (!kvm_hv_is_syndbg_enabled(vcpu)) {
2573			ret = HV_STATUS_INVALID_HYPERCALL_CODE;
2574			break;
2575		}
2576
2577		if (!(syndbg->options & HV_X64_SYNDBG_OPTION_USE_HCALLS)) {
2578			ret = HV_STATUS_OPERATION_DENIED;
2579			break;
2580		}
2581		vcpu->run->exit_reason = KVM_EXIT_HYPERV;
2582		vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
2583		vcpu->run->hyperv.u.hcall.input = hc.param;
2584		vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa;
2585		vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa;
2586		vcpu->arch.complete_userspace_io =
2587				kvm_hv_hypercall_complete_userspace;
2588		return 0;
2589	}
2590	default:
2591		ret = HV_STATUS_INVALID_HYPERCALL_CODE;
2592		break;
2593	}
2594
2595hypercall_complete:
2596	return kvm_hv_hypercall_complete(vcpu, ret);
2597}
2598
2599void kvm_hv_init_vm(struct kvm *kvm)
2600{
2601	struct kvm_hv *hv = to_kvm_hv(kvm);
2602
2603	mutex_init(&hv->hv_lock);
2604	idr_init(&hv->conn_to_evt);
2605}
2606
2607void kvm_hv_destroy_vm(struct kvm *kvm)
2608{
2609	struct kvm_hv *hv = to_kvm_hv(kvm);
2610	struct eventfd_ctx *eventfd;
2611	int i;
2612
2613	idr_for_each_entry(&hv->conn_to_evt, eventfd, i)
2614		eventfd_ctx_put(eventfd);
2615	idr_destroy(&hv->conn_to_evt);
2616}
2617
2618static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
2619{
2620	struct kvm_hv *hv = to_kvm_hv(kvm);
2621	struct eventfd_ctx *eventfd;
2622	int ret;
2623
2624	eventfd = eventfd_ctx_fdget(fd);
2625	if (IS_ERR(eventfd))
2626		return PTR_ERR(eventfd);
2627
2628	mutex_lock(&hv->hv_lock);
2629	ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
2630			GFP_KERNEL_ACCOUNT);
2631	mutex_unlock(&hv->hv_lock);
2632
2633	if (ret >= 0)
2634		return 0;
2635
2636	if (ret == -ENOSPC)
2637		ret = -EEXIST;
2638	eventfd_ctx_put(eventfd);
2639	return ret;
2640}
2641
2642static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id)
2643{
2644	struct kvm_hv *hv = to_kvm_hv(kvm);
2645	struct eventfd_ctx *eventfd;
2646
2647	mutex_lock(&hv->hv_lock);
2648	eventfd = idr_remove(&hv->conn_to_evt, conn_id);
2649	mutex_unlock(&hv->hv_lock);
2650
2651	if (!eventfd)
2652		return -ENOENT;
2653
2654	synchronize_srcu(&kvm->srcu);
2655	eventfd_ctx_put(eventfd);
2656	return 0;
2657}
2658
2659int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
2660{
2661	if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) ||
2662	    (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK))
2663		return -EINVAL;
2664
2665	if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN)
2666		return kvm_hv_eventfd_deassign(kvm, args->conn_id);
2667	return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
2668}
2669
2670int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
2671		     struct kvm_cpuid_entry2 __user *entries)
2672{
2673	uint16_t evmcs_ver = 0;
2674	struct kvm_cpuid_entry2 cpuid_entries[] = {
2675		{ .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS },
2676		{ .function = HYPERV_CPUID_INTERFACE },
2677		{ .function = HYPERV_CPUID_VERSION },
2678		{ .function = HYPERV_CPUID_FEATURES },
2679		{ .function = HYPERV_CPUID_ENLIGHTMENT_INFO },
2680		{ .function = HYPERV_CPUID_IMPLEMENT_LIMITS },
2681		{ .function = HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS },
2682		{ .function = HYPERV_CPUID_SYNDBG_INTERFACE },
2683		{ .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES	},
2684		{ .function = HYPERV_CPUID_NESTED_FEATURES },
2685	};
2686	int i, nent = ARRAY_SIZE(cpuid_entries);
2687
2688	if (kvm_x86_ops.nested_ops->get_evmcs_version)
2689		evmcs_ver = kvm_x86_ops.nested_ops->get_evmcs_version(vcpu);
2690
2691	if (cpuid->nent < nent)
2692		return -E2BIG;
2693
2694	if (cpuid->nent > nent)
2695		cpuid->nent = nent;
2696
2697	for (i = 0; i < nent; i++) {
2698		struct kvm_cpuid_entry2 *ent = &cpuid_entries[i];
2699		u32 signature[3];
2700
2701		switch (ent->function) {
2702		case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS:
2703			memcpy(signature, "Linux KVM Hv", 12);
2704
2705			ent->eax = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES;
2706			ent->ebx = signature[0];
2707			ent->ecx = signature[1];
2708			ent->edx = signature[2];
2709			break;
2710
2711		case HYPERV_CPUID_INTERFACE:
2712			ent->eax = HYPERV_CPUID_SIGNATURE_EAX;
2713			break;
2714
2715		case HYPERV_CPUID_VERSION:
2716			/*
2717			 * We implement some Hyper-V 2016 functions so let's use
2718			 * this version.
2719			 */
2720			ent->eax = 0x00003839;
2721			ent->ebx = 0x000A0000;
2722			break;
2723
2724		case HYPERV_CPUID_FEATURES:
2725			ent->eax |= HV_MSR_VP_RUNTIME_AVAILABLE;
2726			ent->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE;
2727			ent->eax |= HV_MSR_SYNIC_AVAILABLE;
2728			ent->eax |= HV_MSR_SYNTIMER_AVAILABLE;
2729			ent->eax |= HV_MSR_APIC_ACCESS_AVAILABLE;
2730			ent->eax |= HV_MSR_HYPERCALL_AVAILABLE;
2731			ent->eax |= HV_MSR_VP_INDEX_AVAILABLE;
2732			ent->eax |= HV_MSR_RESET_AVAILABLE;
2733			ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
2734			ent->eax |= HV_ACCESS_FREQUENCY_MSRS;
2735			ent->eax |= HV_ACCESS_REENLIGHTENMENT;
2736
2737			ent->ebx |= HV_POST_MESSAGES;
2738			ent->ebx |= HV_SIGNAL_EVENTS;
2739
2740			ent->edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
2741			ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
2742			ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
2743
2744			ent->ebx |= HV_DEBUGGING;
2745			ent->edx |= HV_X64_GUEST_DEBUGGING_AVAILABLE;
2746			ent->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
2747			ent->edx |= HV_FEATURE_EXT_GVA_RANGES_FLUSH;
2748
2749			/*
2750			 * Direct Synthetic timers only make sense with in-kernel
2751			 * LAPIC
2752			 */
2753			if (!vcpu || lapic_in_kernel(vcpu))
2754				ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
2755
2756			break;
2757
2758		case HYPERV_CPUID_ENLIGHTMENT_INFO:
2759			ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
2760			ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
2761			ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
2762			ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
2763			ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
2764			if (evmcs_ver)
2765				ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
2766			if (!cpu_smt_possible())
2767				ent->eax |= HV_X64_NO_NONARCH_CORESHARING;
2768
2769			ent->eax |= HV_DEPRECATING_AEOI_RECOMMENDED;
2770			/*
2771			 * Default number of spinlock retry attempts, matches
2772			 * HyperV 2016.
2773			 */
2774			ent->ebx = 0x00000FFF;
2775
2776			break;
2777
2778		case HYPERV_CPUID_IMPLEMENT_LIMITS:
2779			/* Maximum number of virtual processors */
2780			ent->eax = KVM_MAX_VCPUS;
2781			/*
2782			 * Maximum number of logical processors, matches
2783			 * HyperV 2016.
2784			 */
2785			ent->ebx = 64;
2786
2787			break;
2788
2789		case HYPERV_CPUID_NESTED_FEATURES:
2790			ent->eax = evmcs_ver;
2791			ent->eax |= HV_X64_NESTED_DIRECT_FLUSH;
2792			ent->eax |= HV_X64_NESTED_MSR_BITMAP;
2793			ent->ebx |= HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL;
2794			break;
2795
2796		case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS:
2797			memcpy(signature, "Linux KVM Hv", 12);
2798
2799			ent->eax = 0;
2800			ent->ebx = signature[0];
2801			ent->ecx = signature[1];
2802			ent->edx = signature[2];
2803			break;
2804
2805		case HYPERV_CPUID_SYNDBG_INTERFACE:
2806			memcpy(signature, "VS#1\0\0\0\0\0\0\0\0", 12);
2807			ent->eax = signature[0];
2808			break;
2809
2810		case HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES:
2811			ent->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
2812			break;
2813
2814		default:
2815			break;
2816		}
2817	}
2818
2819	if (copy_to_user(entries, cpuid_entries,
2820			 nent * sizeof(struct kvm_cpuid_entry2)))
2821		return -EFAULT;
2822
2823	return 0;
2824}