kvm_host.h - arch/x86/include/asm/kvm_host.h - Linux diff v4.10.11

 
   1/*
   2 * Kernel-based Virtual Machine driver for Linux
   3 *
   4 * This header defines architecture specific interfaces, x86 version
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2.  See
   7 * the COPYING file in the top-level directory.
   8 *
   9 */
  10
  11#ifndef _ASM_X86_KVM_HOST_H
  12#define _ASM_X86_KVM_HOST_H
  13
  14#include <linux/types.h>
  15#include <linux/mm.h>
  16#include <linux/mmu_notifier.h>
  17#include <linux/tracepoint.h>
  18#include <linux/cpumask.h>
  19#include <linux/irq_work.h>
 
 
  20
  21#include <linux/kvm.h>
  22#include <linux/kvm_para.h>
  23#include <linux/kvm_types.h>
  24#include <linux/perf_event.h>
  25#include <linux/pvclock_gtod.h>
  26#include <linux/clocksource.h>
  27#include <linux/irqbypass.h>
  28#include <linux/hyperv.h>
 
  29
  30#include <asm/apic.h>
  31#include <asm/pvclock-abi.h>
  32#include <asm/desc.h>
  33#include <asm/mtrr.h>
  34#include <asm/msr-index.h>
  35#include <asm/asm.h>
  36#include <asm/kvm_page_track.h>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  37
  38#define KVM_MAX_VCPUS 288
  39#define KVM_SOFT_MAX_VCPUS 240
  40#define KVM_MAX_VCPU_ID 1023
  41#define KVM_USER_MEM_SLOTS 509
  42/* memory slots that are not exposed to userspace */
  43#define KVM_PRIVATE_MEM_SLOTS 3
  44#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
  45
  46#define KVM_PIO_PAGE_OFFSET 1
  47#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
  48#define KVM_HALT_POLL_NS_DEFAULT 400000
  49
  50#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
  51
 
 
 
 
 
 
 
 
 
  52/* x86-specific vcpu->requests bit members */
  53#define KVM_REQ_MIGRATE_TIMER      8
  54#define KVM_REQ_REPORT_TPR_ACCESS  9
  55#define KVM_REQ_TRIPLE_FAULT      10
  56#define KVM_REQ_MMU_SYNC          11
  57#define KVM_REQ_CLOCK_UPDATE      12
  58#define KVM_REQ_DEACTIVATE_FPU    13
  59#define KVM_REQ_EVENT             14
  60#define KVM_REQ_APF_HALT          15
  61#define KVM_REQ_STEAL_UPDATE      16
  62#define KVM_REQ_NMI               17
  63#define KVM_REQ_PMU               18
  64#define KVM_REQ_PMI               19
  65#define KVM_REQ_SMI               20
  66#define KVM_REQ_MASTERCLOCK_UPDATE 21
  67#define KVM_REQ_MCLOCK_INPROGRESS 22
  68#define KVM_REQ_SCAN_IOAPIC       23
  69#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24
  70#define KVM_REQ_APIC_PAGE_RELOAD  25
  71#define KVM_REQ_HV_CRASH          26
  72#define KVM_REQ_IOAPIC_EOI_EXIT   27
  73#define KVM_REQ_HV_RESET          28
  74#define KVM_REQ_HV_EXIT           29
  75#define KVM_REQ_HV_STIMER         30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  76
  77#define CR0_RESERVED_BITS                                               \
  78	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
  79			  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
  80			  | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
  81
  82#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
  83#define CR3_PCID_INVD		 BIT_64(63)
  84#define CR4_RESERVED_BITS                                               \
  85	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
  86			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
  87			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
  88			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
  89			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \
  90			  | X86_CR4_PKE))
  91
  92#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
  93
  94
  95
  96#define INVALID_PAGE (~(hpa_t)0)
  97#define VALID_PAGE(x) ((x) != INVALID_PAGE)
  98
  99#define UNMAPPED_GVA (~(gpa_t)0)
 100
 101/* KVM Hugepage definitions for x86 */
 102#define KVM_NR_PAGE_SIZES	3
 
 103#define KVM_HPAGE_GFN_SHIFT(x)	(((x) - 1) * 9)
 104#define KVM_HPAGE_SHIFT(x)	(PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
 105#define KVM_HPAGE_SIZE(x)	(1UL << KVM_HPAGE_SHIFT(x))
 106#define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
 107#define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 108
 109static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
 110{
 111	/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
 112	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
 113		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
 114}
 115
 116#define KVM_PERMILLE_MMU_PAGES 20
 117#define KVM_MIN_ALLOC_MMU_PAGES 64
 118#define KVM_MMU_HASH_SHIFT 10
 119#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
 120#define KVM_MIN_FREE_MMU_PAGES 5
 121#define KVM_REFILL_PAGES 25
 122#define KVM_MAX_CPUID_ENTRIES 80
 123#define KVM_NR_FIXED_MTRR_REGION 88
 124#define KVM_NR_VAR_MTRR 8
 125
 126#define ASYNC_PF_PER_VCPU 64
 127
 128enum kvm_reg {
 129	VCPU_REGS_RAX = 0,
 130	VCPU_REGS_RCX = 1,
 131	VCPU_REGS_RDX = 2,
 132	VCPU_REGS_RBX = 3,
 133	VCPU_REGS_RSP = 4,
 134	VCPU_REGS_RBP = 5,
 135	VCPU_REGS_RSI = 6,
 136	VCPU_REGS_RDI = 7,
 137#ifdef CONFIG_X86_64
 138	VCPU_REGS_R8 = 8,
 139	VCPU_REGS_R9 = 9,
 140	VCPU_REGS_R10 = 10,
 141	VCPU_REGS_R11 = 11,
 142	VCPU_REGS_R12 = 12,
 143	VCPU_REGS_R13 = 13,
 144	VCPU_REGS_R14 = 14,
 145	VCPU_REGS_R15 = 15,
 146#endif
 147	VCPU_REGS_RIP,
 148	NR_VCPU_REGS
 149};
 150
 151enum kvm_reg_ex {
 152	VCPU_EXREG_PDPTR = NR_VCPU_REGS,
 
 153	VCPU_EXREG_CR3,
 
 154	VCPU_EXREG_RFLAGS,
 155	VCPU_EXREG_SEGMENTS,
 
 
 156};
 157
 158enum {
 159	VCPU_SREG_ES,
 160	VCPU_SREG_CS,
 161	VCPU_SREG_SS,
 162	VCPU_SREG_DS,
 163	VCPU_SREG_FS,
 164	VCPU_SREG_GS,
 165	VCPU_SREG_TR,
 166	VCPU_SREG_LDTR,
 167};
 168
 169#include <asm/kvm_emulate.h>
 
 
 
 
 
 170
 171#define KVM_NR_MEM_OBJS 40
 
 
 
 
 172
 173#define KVM_NR_DB_REGS	4
 174
 
 175#define DR6_BD		(1 << 13)
 176#define DR6_BS		(1 << 14)
 
 177#define DR6_RTM		(1 << 16)
 178#define DR6_FIXED_1	0xfffe0ff0
 179#define DR6_INIT	0xffff0ff0
 180#define DR6_VOLATILE	0x0001e00f
 
 
 
 
 
 
 
 
 181
 182#define DR7_BP_EN_MASK	0x000000ff
 183#define DR7_GE		(1 << 9)
 184#define DR7_GD		(1 << 13)
 185#define DR7_FIXED_1	0x00000400
 186#define DR7_VOLATILE	0xffff2bff
 187
 
 
 
 
 
 
 
 
 
 
 188#define PFERR_PRESENT_BIT 0
 189#define PFERR_WRITE_BIT 1
 190#define PFERR_USER_BIT 2
 191#define PFERR_RSVD_BIT 3
 192#define PFERR_FETCH_BIT 4
 193#define PFERR_PK_BIT 5
 
 194#define PFERR_GUEST_FINAL_BIT 32
 195#define PFERR_GUEST_PAGE_BIT 33
 
 196
 197#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
 198#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
 199#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
 200#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
 201#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
 202#define PFERR_PK_MASK (1U << PFERR_PK_BIT)
 203#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT)
 204#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
 
 
 205
 206#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK |	\
 207				 PFERR_USER_MASK |		\
 208				 PFERR_WRITE_MASK |		\
 209				 PFERR_PRESENT_MASK)
 210
 211/* apic attention bits */
 212#define KVM_APIC_CHECK_VAPIC	0
 213/*
 214 * The following bit is set with PV-EOI, unset on EOI.
 215 * We detect PV-EOI changes by guest by comparing
 216 * this bit with PV-EOI in guest memory.
 217 * See the implementation in apic_update_pv_eoi.
 218 */
 219#define KVM_APIC_PV_EOI_PENDING	1
 220
 221struct kvm_kernel_irq_routing_entry;
 222
 223/*
 224 * We don't want allocation failures within the mmu code, so we preallocate
 225 * enough memory for a single page fault in a cache.
 226 */
 227struct kvm_mmu_memory_cache {
 228	int nobjs;
 229	void *objects[KVM_NR_MEM_OBJS];
 230};
 231
 232/*
 233 * the pages used as guest page table on soft mmu are tracked by
 234 * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
 235 * by indirect shadow page can not be more than 15 bits.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 236 *
 237 * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access,
 238 * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
 239 */
 240union kvm_mmu_page_role {
 241	unsigned word;
 242	struct {
 243		unsigned level:4;
 244		unsigned cr4_pae:1;
 245		unsigned quadrant:2;
 246		unsigned direct:1;
 247		unsigned access:3;
 248		unsigned invalid:1;
 249		unsigned nxe:1;
 250		unsigned cr0_wp:1;
 251		unsigned smep_andnot_wp:1;
 252		unsigned smap_andnot_wp:1;
 253		unsigned :8;
 
 
 
 254
 255		/*
 256		 * This is left at the top of the word so that
 257		 * kvm_memslots_for_spte_role can extract it with a
 258		 * simple shift.  While there is room, give it a whole
 259		 * byte so it is also faster to load it from memory.
 260		 */
 261		unsigned smm:8;
 262	};
 263};
 264
 265struct kvm_rmap_head {
 266	unsigned long val;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 267};
 268
 269struct kvm_mmu_page {
 270	struct list_head link;
 271	struct hlist_node hash_link;
 272
 273	/*
 274	 * The following two entries are used to key the shadow page in the
 275	 * hash table.
 276	 */
 277	gfn_t gfn;
 278	union kvm_mmu_page_role role;
 279
 280	u64 *spt;
 281	/* hold the gfn of each spte inside spt */
 282	gfn_t *gfns;
 283	bool unsync;
 284	int root_count;          /* Currently serving as active root */
 285	unsigned int unsync_children;
 286	struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
 287
 288	/* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen.  */
 289	unsigned long mmu_valid_gen;
 290
 291	DECLARE_BITMAP(unsync_child_bitmap, 512);
 292
 293#ifdef CONFIG_X86_32
 294	/*
 295	 * Used out of the mmu-lock to avoid reading spte values while an
 296	 * update is in progress; see the comments in __get_spte_lockless().
 297	 */
 298	int clear_spte_count;
 299#endif
 300
 301	/* Number of writes since the last time traversal visited this page.  */
 302	atomic_t write_flooding_count;
 303};
 304
 305struct kvm_pio_request {
 
 306	unsigned long count;
 307	int in;
 308	int port;
 309	int size;
 310};
 311
 
 
 312struct rsvd_bits_validate {
 313	u64 rsvd_bits_mask[2][4];
 314	u64 bad_mt_xwr;
 315};
 316
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 317/*
 318 * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
 319 * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
 320 * mode.
 321 */
 322struct kvm_mmu {
 323	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
 324	unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
 325	u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
 326	int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err,
 327			  bool prefault);
 328	void (*inject_page_fault)(struct kvm_vcpu *vcpu,
 329				  struct x86_exception *fault);
 330	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
 
 331			    struct x86_exception *exception);
 332	gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
 333			       struct x86_exception *exception);
 334	int (*sync_page)(struct kvm_vcpu *vcpu,
 335			 struct kvm_mmu_page *sp);
 336	void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
 337	void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 338			   u64 *spte, const void *pte);
 339	hpa_t root_hpa;
 340	int root_level;
 341	int shadow_root_level;
 342	union kvm_mmu_page_role base_role;
 343	bool direct_map;
 344
 345	/*
 346	 * Bitmap; bit set = permission fault
 347	 * Byte index: page fault error code [4:1]
 348	 * Bit index: pte permissions in ACC_* format
 349	 */
 350	u8 permissions[16];
 351
 352	/*
 353	* The pkru_mask indicates if protection key checks are needed.  It
 354	* consists of 16 domains indexed by page fault error code bits [4:1],
 355	* with PFEC.RSVD replaced by ACC_USER_MASK from the page tables.
 356	* Each domain has 2 bits which are ANDed with AD and WD from PKRU.
 357	*/
 358	u32 pkru_mask;
 359
 
 
 
 
 
 
 
 
 
 360	u64 *pae_root;
 361	u64 *lm_root;
 
 362
 363	/*
 364	 * check zero bits on shadow page table entries, these
 365	 * bits include not only hardware reserved bits but also
 366	 * the bits spte never used.
 367	 */
 368	struct rsvd_bits_validate shadow_zero_check;
 369
 370	struct rsvd_bits_validate guest_rsvd_check;
 371
 372	/* Can have large pages at levels 2..last_nonleaf_level-1. */
 373	u8 last_nonleaf_level;
 374
 375	bool nx;
 376
 377	u64 pdptrs[4]; /* pae */
 378};
 379
 
 
 
 
 
 380enum pmc_type {
 381	KVM_PMC_GP = 0,
 382	KVM_PMC_FIXED,
 383};
 384
 385struct kvm_pmc {
 386	enum pmc_type type;
 387	u8 idx;
 
 
 388	u64 counter;
 
 389	u64 eventsel;
 390	struct perf_event *perf_event;
 391	struct kvm_vcpu *vcpu;
 
 
 
 
 
 
 392};
 393
 
 
 
 
 
 
 394struct kvm_pmu {
 395	unsigned nr_arch_gp_counters;
 396	unsigned nr_arch_fixed_counters;
 397	unsigned available_event_types;
 398	u64 fixed_ctr_ctrl;
 
 399	u64 global_ctrl;
 400	u64 global_status;
 401	u64 global_ovf_ctrl;
 402	u64 counter_bitmask[2];
 403	u64 global_ctrl_mask;
 
 404	u64 reserved_bits;
 
 405	u8 version;
 406	struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
 407	struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
 408	struct irq_work irq_work;
 409	u64 reprogram_pmi;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 410};
 411
 412struct kvm_pmu_ops;
 413
 414enum {
 415	KVM_DEBUGREG_BP_ENABLED = 1,
 416	KVM_DEBUGREG_WONT_EXIT = 2,
 417	KVM_DEBUGREG_RELOAD = 4,
 418};
 419
 420struct kvm_mtrr_range {
 421	u64 base;
 422	u64 mask;
 423	struct list_head node;
 424};
 425
 426struct kvm_mtrr {
 427	struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR];
 428	mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
 429	u64 deftype;
 430
 431	struct list_head head;
 432};
 433
 434/* Hyper-V SynIC timer */
 435struct kvm_vcpu_hv_stimer {
 436	struct hrtimer timer;
 437	int index;
 438	u64 config;
 439	u64 count;
 440	u64 exp_time;
 441	struct hv_message msg;
 442	bool msg_pending;
 443};
 444
 445/* Hyper-V synthetic interrupt controller (SynIC)*/
 446struct kvm_vcpu_hv_synic {
 447	u64 version;
 448	u64 control;
 449	u64 msg_page;
 450	u64 evt_page;
 451	atomic64_t sint[HV_SYNIC_SINT_COUNT];
 452	atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT];
 453	DECLARE_BITMAP(auto_eoi_bitmap, 256);
 454	DECLARE_BITMAP(vec_bitmap, 256);
 455	bool active;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 456};
 457
 458/* Hyper-V per vcpu emulation context */
 459struct kvm_vcpu_hv {
 
 
 460	u64 hv_vapic;
 461	s64 runtime_offset;
 462	struct kvm_vcpu_hv_synic synic;
 463	struct kvm_hyperv_exit exit;
 464	struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
 465	DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 466};
 467
 468struct kvm_vcpu_arch {
 469	/*
 470	 * rip and regs accesses must go through
 471	 * kvm_{register,rip}_{read,write} functions.
 472	 */
 473	unsigned long regs[NR_VCPU_REGS];
 474	u32 regs_avail;
 475	u32 regs_dirty;
 476
 477	unsigned long cr0;
 478	unsigned long cr0_guest_owned_bits;
 479	unsigned long cr2;
 480	unsigned long cr3;
 481	unsigned long cr4;
 482	unsigned long cr4_guest_owned_bits;
 
 483	unsigned long cr8;
 
 
 484	u32 hflags;
 485	u64 efer;
 486	u64 apic_base;
 487	struct kvm_lapic *apic;    /* kernel irqchip context */
 488	bool apicv_active;
 489	DECLARE_BITMAP(ioapic_handled_vectors, 256);
 490	unsigned long apic_attention;
 491	int32_t apic_arb_prio;
 492	int mp_state;
 493	u64 ia32_misc_enable_msr;
 494	u64 smbase;
 
 
 495	bool tpr_access_reporting;
 
 
 496	u64 ia32_xss;
 
 
 
 497
 498	/*
 499	 * Paging state of the vcpu
 500	 *
 501	 * If the vcpu runs in guest mode with two level paging this still saves
 502	 * the paging mode of the l1 guest. This context is always used to
 503	 * handle faults.
 504	 */
 505	struct kvm_mmu mmu;
 
 
 
 
 
 
 506
 507	/*
 508	 * Paging state of an L2 guest (used for nested npt)
 509	 *
 510	 * This context will save all necessary information to walk page tables
 511	 * of the an L2 guest. This context is only initialized for page table
 512	 * walking and not for faulting since we never handle l2 page faults on
 513	 * the host.
 514	 */
 515	struct kvm_mmu nested_mmu;
 516
 517	/*
 518	 * Pointer to the mmu context currently used for
 519	 * gva_to_gpa translations.
 520	 */
 521	struct kvm_mmu *walk_mmu;
 522
 523	struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
 524	struct kvm_mmu_memory_cache mmu_page_cache;
 
 525	struct kvm_mmu_memory_cache mmu_page_header_cache;
 526
 527	struct fpu guest_fpu;
 
 
 
 
 
 
 
 
 
 
 
 
 528	u64 xcr0;
 529	u64 guest_supported_xcr0;
 530	u32 guest_xstate_size;
 531
 532	struct kvm_pio_request pio;
 533	void *pio_data;
 
 
 534
 535	u8 event_exit_inst_len;
 536
 537	struct kvm_queued_exception {
 538		bool pending;
 539		bool has_error_code;
 540		bool reinject;
 541		u8 nr;
 542		u32 error_code;
 543	} exception;
 544
 545	struct kvm_queued_interrupt {
 546		bool pending;
 547		bool soft;
 548		u8 nr;
 549	} interrupt;
 550
 551	int halt_request; /* real mode on Intel only */
 552
 553	int cpuid_nent;
 554	struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
 
 555
 
 556	int maxphyaddr;
 557
 558	/* emulate context */
 559
 560	struct x86_emulate_ctxt emulate_ctxt;
 561	bool emulate_regs_need_sync_to_vcpu;
 562	bool emulate_regs_need_sync_from_vcpu;
 563	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
 564
 565	gpa_t time;
 566	struct pvclock_vcpu_time_info hv_clock;
 567	unsigned int hw_tsc_khz;
 568	struct gfn_to_hva_cache pv_time;
 569	bool pv_time_enabled;
 570	/* set guest stopped flag in pvclock flags field */
 571	bool pvclock_set_guest_stopped_request;
 572
 573	struct {
 
 574		u64 msr_val;
 575		u64 last_steal;
 576		struct gfn_to_hva_cache stime;
 577		struct kvm_steal_time steal;
 578	} st;
 579
 580	u64 tsc_offset;
 
 581	u64 last_guest_tsc;
 582	u64 last_host_tsc;
 583	u64 tsc_offset_adjustment;
 584	u64 this_tsc_nsec;
 585	u64 this_tsc_write;
 586	u64 this_tsc_generation;
 587	bool tsc_catchup;
 588	bool tsc_always_catchup;
 589	s8 virtual_tsc_shift;
 590	u32 virtual_tsc_mult;
 591	u32 virtual_tsc_khz;
 592	s64 ia32_tsc_adjust_msr;
 593	u64 tsc_scaling_ratio;
 
 
 594
 595	atomic_t nmi_queued;  /* unprocessed asynchronous NMIs */
 596	unsigned nmi_pending; /* NMI queued after currently running handler */
 597	bool nmi_injected;    /* Trying to inject an NMI this entry */
 598	bool smi_pending;    /* SMI queued after currently running handler */
 
 599
 600	struct kvm_mtrr mtrr_state;
 601	u64 pat;
 602
 603	unsigned switch_db_regs;
 604	unsigned long db[KVM_NR_DB_REGS];
 605	unsigned long dr6;
 606	unsigned long dr7;
 607	unsigned long eff_db[KVM_NR_DB_REGS];
 608	unsigned long guest_debug_dr7;
 
 
 609
 610	u64 mcg_cap;
 611	u64 mcg_status;
 612	u64 mcg_ctl;
 613	u64 mcg_ext_ctl;
 614	u64 *mce_banks;
 
 615
 616	/* Cache MMIO info */
 617	u64 mmio_gva;
 618	unsigned access;
 619	gfn_t mmio_gfn;
 620	u64 mmio_gen;
 621
 622	struct kvm_pmu pmu;
 623
 624	/* used for guest single stepping over the given code position */
 625	unsigned long singlestep_rip;
 626
 627	struct kvm_vcpu_hv hyperv;
 
 
 628
 629	cpumask_var_t wbinvd_dirty_mask;
 630
 631	unsigned long last_retry_eip;
 632	unsigned long last_retry_addr;
 633
 634	struct {
 635		bool halted;
 636		gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
 637		struct gfn_to_hva_cache data;
 638		u64 msr_val;
 
 
 639		u32 id;
 640		bool send_user_only;
 
 
 
 641	} apf;
 642
 643	/* OSVW MSRs (AMD only) */
 644	struct {
 645		u64 length;
 646		u64 status;
 647	} osvw;
 648
 649	struct {
 650		u64 msr_val;
 651		struct gfn_to_hva_cache data;
 652	} pv_eoi;
 653
 
 
 654	/*
 655	 * Indicate whether the access faults on its page table in guest
 656	 * which is set when fix page fault and used to detect unhandeable
 657	 * instruction.
 
 
 
 
 
 
 
 
 
 
 658	 */
 659	bool write_fault_to_shadow_pgtable;
 660
 661	/* set at EPT violation at this point */
 662	unsigned long exit_qualification;
 663
 664	/* pv related host specific info */
 665	struct {
 666		bool pv_unhalted;
 667	} pv;
 668
 669	int pending_ioapic_eoi;
 670	int pending_external_vector;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 671};
 672
 673struct kvm_lpage_info {
 674	int disallow_lpage;
 675};
 676
 677struct kvm_arch_memory_slot {
 678	struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
 679	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 680	unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
 681};
 682
 683/*
 684 * We use as the mode the number of bits allocated in the LDR for the
 685 * logical processor ID.  It happens that these are all powers of two.
 686 * This makes it is very easy to detect cases where the APICs are
 687 * configured for multiple modes; in that case, we cannot use the map and
 688 * hence cannot use kvm_irq_delivery_to_apic_fast either.
 689 */
 690#define KVM_APIC_MODE_XAPIC_CLUSTER          4
 691#define KVM_APIC_MODE_XAPIC_FLAT             8
 692#define KVM_APIC_MODE_X2APIC                16
 693
 694struct kvm_apic_map {
 695	struct rcu_head rcu;
 696	u8 mode;
 697	u32 max_apic_id;
 698	union {
 699		struct kvm_lapic *xapic_flat_map[8];
 700		struct kvm_lapic *xapic_cluster_map[16][4];
 701	};
 702	struct kvm_lapic *phys_map[];
 703};
 704
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 705/* Hyper-V emulation context */
 706struct kvm_hv {
 707	struct mutex hv_lock;
 708	u64 hv_guest_os_id;
 709	u64 hv_hypercall;
 710	u64 hv_tsc_page;
 
 711
 712	/* Hyper-v based guest crash (NT kernel bugcheck) parameters */
 713	u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
 714	u64 hv_crash_ctl;
 715
 716	HV_REFERENCE_TSC_PAGE tsc_ref;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 717};
 718
 719struct kvm_arch {
 720	unsigned int n_used_mmu_pages;
 721	unsigned int n_requested_mmu_pages;
 722	unsigned int n_max_mmu_pages;
 723	unsigned int indirect_shadow_pages;
 724	unsigned long mmu_valid_gen;
 725	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
 726	/*
 727	 * Hash table of struct kvm_mmu_page.
 728	 */
 729	struct list_head active_mmu_pages;
 730	struct list_head zapped_obsolete_pages;
 
 
 
 
 
 
 
 
 
 
 
 
 731	struct kvm_page_track_notifier_node mmu_sp_tracker;
 732	struct kvm_page_track_notifier_head track_notifier_head;
 
 
 
 
 
 
 
 733
 734	struct list_head assigned_dev_head;
 735	struct iommu_domain *iommu_domain;
 736	bool iommu_noncoherent;
 737#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
 738	atomic_t noncoherent_dma_count;
 739#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
 740	atomic_t assigned_device_count;
 741	struct kvm_pic *vpic;
 742	struct kvm_ioapic *vioapic;
 743	struct kvm_pit *vpit;
 744	atomic_t vapics_in_nmi_mode;
 745	struct mutex apic_map_lock;
 746	struct kvm_apic_map *apic_map;
 
 747
 748	unsigned int tss_addr;
 749	bool apic_access_page_done;
 
 
 
 750
 751	gpa_t wall_clock;
 752
 753	bool ept_identity_pagetable_done;
 754	gpa_t ept_identity_map_addr;
 
 
 755
 756	unsigned long irq_sources_bitmap;
 757	s64 kvmclock_offset;
 
 
 
 
 
 758	raw_spinlock_t tsc_write_lock;
 759	u64 last_tsc_nsec;
 760	u64 last_tsc_write;
 761	u32 last_tsc_khz;
 
 762	u64 cur_tsc_nsec;
 763	u64 cur_tsc_write;
 764	u64 cur_tsc_offset;
 765	u64 cur_tsc_generation;
 766	int nr_vcpus_matched_tsc;
 767
 768	spinlock_t pvclock_gtod_sync_lock;
 
 
 769	bool use_master_clock;
 770	u64 master_kernel_ns;
 771	u64 master_cycle_now;
 772	struct delayed_work kvmclock_update_work;
 773	struct delayed_work kvmclock_sync_work;
 774
 775	struct kvm_xen_hvm_config xen_hvm_config;
 776
 777	/* reads protected by irq_srcu, writes by irq_lock */
 778	struct hlist_head mask_notifier_list;
 779
 780	struct kvm_hv hyperv;
 
 781
 782	#ifdef CONFIG_KVM_MMU_AUDIT
 783	int audit_point;
 784	#endif
 785
 786	bool boot_vcpu_runs_old_kvmclock;
 787	u32 bsp_vcpu_id;
 788
 789	u64 disabled_quirks;
 
 790
 791	bool irqchip_split;
 792	u8 nr_reserved_ioapic_pins;
 793
 794	bool disabled_lapic_found;
 795
 796	/* Struct members for AVIC */
 797	u32 avic_vm_id;
 798	u32 ldr_mode;
 799	struct page *avic_logical_id_table_page;
 800	struct page *avic_physical_id_table_page;
 801	struct hlist_node hnode;
 802
 803	bool x2apic_format;
 804	bool x2apic_broadcast_quirk_disabled;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 805};
 806
 807struct kvm_vm_stat {
 808	ulong mmu_shadow_zapped;
 809	ulong mmu_pte_write;
 810	ulong mmu_pte_updated;
 811	ulong mmu_pde_zapped;
 812	ulong mmu_flooded;
 813	ulong mmu_recycled;
 814	ulong mmu_cache_miss;
 815	ulong mmu_unsync;
 816	ulong remote_tlb_flush;
 817	ulong lpages;
 
 
 
 
 
 
 
 
 
 818};
 819
 820struct kvm_vcpu_stat {
 
 
 821	u64 pf_fixed;
 
 
 
 
 822	u64 pf_guest;
 823	u64 tlb_flush;
 824	u64 invlpg;
 825
 826	u64 exits;
 827	u64 io_exits;
 828	u64 mmio_exits;
 829	u64 signal_exits;
 830	u64 irq_window_exits;
 831	u64 nmi_window_exits;
 
 832	u64 halt_exits;
 833	u64 halt_successful_poll;
 834	u64 halt_attempted_poll;
 835	u64 halt_poll_invalid;
 836	u64 halt_wakeup;
 837	u64 request_irq_exits;
 838	u64 irq_exits;
 839	u64 host_state_reload;
 840	u64 efer_reload;
 841	u64 fpu_reload;
 842	u64 insn_emulation;
 843	u64 insn_emulation_fail;
 844	u64 hypercalls;
 845	u64 irq_injections;
 846	u64 nmi_injections;
 
 
 
 
 
 
 
 
 847};
 848
 849struct x86_instruction_info;
 850
 851struct msr_data {
 852	bool host_initiated;
 853	u32 index;
 854	u64 data;
 855};
 856
 857struct kvm_lapic_irq {
 858	u32 vector;
 859	u16 delivery_mode;
 860	u16 dest_mode;
 861	bool level;
 862	u16 trig_mode;
 863	u32 shorthand;
 864	u32 dest_id;
 865	bool msi_redir_hint;
 866};
 867
 
 
 
 
 
 868struct kvm_x86_ops {
 869	int (*cpu_has_kvm_support)(void);          /* __init */
 870	int (*disabled_by_bios)(void);             /* __init */
 871	int (*hardware_enable)(void);
 872	void (*hardware_disable)(void);
 873	void (*check_processor_compatibility)(void *rtn);
 874	int (*hardware_setup)(void);               /* __init */
 875	void (*hardware_unsetup)(void);            /* __exit */
 876	bool (*cpu_has_accelerated_tpr)(void);
 877	bool (*cpu_has_high_real_mode_segbase)(void);
 878	void (*cpuid_update)(struct kvm_vcpu *vcpu);
 879
 
 880	int (*vm_init)(struct kvm *kvm);
 881	void (*vm_destroy)(struct kvm *kvm);
 882
 883	/* Create, but do not attach this VCPU */
 884	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
 
 885	void (*vcpu_free)(struct kvm_vcpu *vcpu);
 886	void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
 887
 888	void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
 889	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
 890	void (*vcpu_put)(struct kvm_vcpu *vcpu);
 891
 892	void (*update_bp_intercept)(struct kvm_vcpu *vcpu);
 893	int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
 894	int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
 895	u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
 896	void (*get_segment)(struct kvm_vcpu *vcpu,
 897			    struct kvm_segment *var, int seg);
 898	int (*get_cpl)(struct kvm_vcpu *vcpu);
 899	void (*set_segment)(struct kvm_vcpu *vcpu,
 900			    struct kvm_segment *var, int seg);
 901	void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
 902	void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
 903	void (*decache_cr3)(struct kvm_vcpu *vcpu);
 904	void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
 905	void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
 906	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
 907	int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
 908	void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
 
 909	void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
 910	void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
 911	void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
 912	void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
 913	u64 (*get_dr6)(struct kvm_vcpu *vcpu);
 914	void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
 915	void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
 916	void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
 917	void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
 918	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
 919	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
 920	u32 (*get_pkru)(struct kvm_vcpu *vcpu);
 921	void (*fpu_activate)(struct kvm_vcpu *vcpu);
 922	void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
 923
 924	void (*tlb_flush)(struct kvm_vcpu *vcpu);
 925
 926	void (*run)(struct kvm_vcpu *vcpu);
 927	int (*handle_exit)(struct kvm_vcpu *vcpu);
 928	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 929	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
 930	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
 931	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
 932				unsigned char *hypercall_addr);
 933	void (*set_irq)(struct kvm_vcpu *vcpu);
 934	void (*set_nmi)(struct kvm_vcpu *vcpu);
 935	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 936				bool has_error_code, u32 error_code,
 937				bool reinject);
 938	void (*cancel_injection)(struct kvm_vcpu *vcpu);
 939	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
 940	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
 941	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
 942	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
 943	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
 944	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
 945	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
 946	bool (*get_enable_apicv)(void);
 947	void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
 948	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
 949	void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
 
 950	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 951	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
 952	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
 953	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
 954	void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
 
 955	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 956	int (*get_tdp_level)(void);
 957	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 958	int (*get_lpage_level)(void);
 959	bool (*rdtscp_supported)(void);
 960	bool (*invpcid_supported)(void);
 961
 962	void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
 963
 964	void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry);
 965
 966	bool (*has_wbinvd_exit)(void);
 967
 
 
 968	void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
 969
 970	void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
 
 
 
 
 
 
 971
 972	int (*check_intercept)(struct kvm_vcpu *vcpu,
 973			       struct x86_instruction_info *info,
 974			       enum x86_intercept_stage stage);
 975	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
 976	bool (*mpx_supported)(void);
 977	bool (*xsaves_supported)(void);
 978
 979	int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
 980
 981	void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
 982
 983	/*
 984	 * Arch-specific dirty logging hooks. These hooks are only supposed to
 985	 * be valid if the specific arch has hardware-accelerated dirty logging
 986	 * mechanism. Currently only for PML on VMX.
 987	 *
 988	 *  - slot_enable_log_dirty:
 989	 *	called when enabling log dirty mode for the slot.
 990	 *  - slot_disable_log_dirty:
 991	 *	called when disabling log dirty mode for the slot.
 992	 *	also called when slot is created with log dirty disabled.
 993	 *  - flush_log_dirty:
 994	 *	called before reporting dirty_bitmap to userspace.
 995	 *  - enable_log_dirty_pt_masked:
 996	 *	called when reenabling log dirty for the GFNs in the mask after
 997	 *	corresponding bits are cleared in slot->dirty_bitmap.
 998	 */
 999	void (*slot_enable_log_dirty)(struct kvm *kvm,
1000				      struct kvm_memory_slot *slot);
1001	void (*slot_disable_log_dirty)(struct kvm *kvm,
1002				       struct kvm_memory_slot *slot);
1003	void (*flush_log_dirty)(struct kvm *kvm);
1004	void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
1005					   struct kvm_memory_slot *slot,
1006					   gfn_t offset, unsigned long mask);
1007	/* pmu operations of sub-arch */
1008	const struct kvm_pmu_ops *pmu_ops;
1009
1010	/*
1011	 * Architecture specific hooks for vCPU blocking due to
1012	 * HLT instruction.
1013	 * Returns for .pre_block():
1014	 *    - 0 means continue to block the vCPU.
1015	 *    - 1 means we cannot block the vCPU since some event
1016	 *        happens during this period, such as, 'ON' bit in
1017	 *        posted-interrupts descriptor is set.
1018	 */
1019	int (*pre_block)(struct kvm_vcpu *vcpu);
1020	void (*post_block)(struct kvm_vcpu *vcpu);
 
 
1021
1022	void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
1023	void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
1024
1025	int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
1026			      uint32_t guest_irq, bool set);
 
1027	void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
 
1028
1029	int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
 
1030	void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
1031
1032	void (*setup_mce)(struct kvm_vcpu *vcpu);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1033};
1034
1035struct kvm_arch_async_pf {
1036	u32 token;
1037	gfn_t gfn;
1038	unsigned long cr3;
1039	bool direct_map;
1040};
1041
1042extern struct kvm_x86_ops *kvm_x86_ops;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1043
1044int kvm_mmu_module_init(void);
1045void kvm_mmu_module_exit(void);
 
1046
1047void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
1048int kvm_mmu_create(struct kvm_vcpu *vcpu);
1049void kvm_mmu_setup(struct kvm_vcpu *vcpu);
1050void kvm_mmu_init_vm(struct kvm *kvm);
1051void kvm_mmu_uninit_vm(struct kvm *kvm);
1052void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
1053		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask);
1054
 
1055void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
1056void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
1057				      struct kvm_memory_slot *memslot);
 
 
 
 
 
 
 
 
1058void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
1059				   const struct kvm_memory_slot *memslot);
1060void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
1061				   struct kvm_memory_slot *memslot);
1062void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
1063					struct kvm_memory_slot *memslot);
1064void kvm_mmu_slot_set_dirty(struct kvm *kvm,
1065			    struct kvm_memory_slot *memslot);
1066void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
1067				   struct kvm_memory_slot *slot,
1068				   gfn_t gfn_offset, unsigned long mask);
1069void kvm_mmu_zap_all(struct kvm *kvm);
1070void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots);
1071unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
1072void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
1073
1074int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
1075bool pdptrs_changed(struct kvm_vcpu *vcpu);
1076
1077int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1078			  const void *val, int bytes);
1079
1080struct kvm_irq_mask_notifier {
1081	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
1082	int irq;
1083	struct hlist_node link;
1084};
1085
1086void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
1087				    struct kvm_irq_mask_notifier *kimn);
1088void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
1089				      struct kvm_irq_mask_notifier *kimn);
1090void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
1091			     bool mask);
1092
1093extern bool tdp_enabled;
1094
1095u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
1096
1097/* control of guest tsc rate supported? */
1098extern bool kvm_has_tsc_control;
1099/* maximum supported tsc_khz for guests */
1100extern u32  kvm_max_guest_tsc_khz;
1101/* number of bits of the fractional part of the TSC scaling ratio */
1102extern u8   kvm_tsc_scaling_ratio_frac_bits;
1103/* maximum allowed value of TSC scaling ratio */
1104extern u64  kvm_max_tsc_scaling_ratio;
1105/* 1ull << kvm_tsc_scaling_ratio_frac_bits */
1106extern u64  kvm_default_tsc_scaling_ratio;
1107
1108extern u64 kvm_mce_cap_supported;
1109
1110enum emulation_result {
1111	EMULATE_DONE,         /* no further processing */
1112	EMULATE_USER_EXIT,    /* kvm_run ready for userspace exit */
1113	EMULATE_FAIL,         /* can't emulate this instruction */
1114};
1115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1116#define EMULTYPE_NO_DECODE	    (1 << 0)
1117#define EMULTYPE_TRAP_UD	    (1 << 1)
1118#define EMULTYPE_SKIP		    (1 << 2)
1119#define EMULTYPE_RETRY		    (1 << 3)
1120#define EMULTYPE_NO_REEXECUTE	    (1 << 4)
1121int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
1122			    int emulation_type, void *insn, int insn_len);
1123
1124static inline int emulate_instruction(struct kvm_vcpu *vcpu,
1125			int emulation_type)
1126{
1127	return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
1128}
 
 
1129
1130void kvm_enable_efer_bits(u64);
1131bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
1132int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
1133int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
1134
1135struct x86_emulate_ctxt;
 
 
 
 
 
 
1136
1137int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
1138int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port);
1139int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
1140int kvm_emulate_halt(struct kvm_vcpu *vcpu);
1141int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
 
1142int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
1143
1144void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 
1145int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
1146void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
1147
1148int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
1149		    int reason, bool has_error_code, u32 error_code);
1150
 
 
1151int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
1152int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
1153int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
1154int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
1155int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
1156int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
1157unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
1158void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
1159void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
1160int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
1161
1162int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
1163int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
1164
1165unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
1166void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
1167bool kvm_rdpmc(struct kvm_vcpu *vcpu);
1168
1169void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
1170void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
 
1171void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
1172void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
1173void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
1174int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
1175			    gfn_t gfn, void *data, int offset, int len,
1176			    u32 access);
1177bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
1178bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
1179
1180static inline int __kvm_irq_line_state(unsigned long *irq_state,
1181				       int irq_source_id, int level)
1182{
1183	/* Logical OR for level trig interrupt */
1184	if (level)
1185		__set_bit(irq_source_id, irq_state);
1186	else
1187		__clear_bit(irq_source_id, irq_state);
1188
1189	return !!(*irq_state);
1190}
1191
 
 
 
 
1192int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
1193void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
1194
1195void kvm_inject_nmi(struct kvm_vcpu *vcpu);
1196
 
 
1197int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
1198int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
1199void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
1200int kvm_mmu_load(struct kvm_vcpu *vcpu);
1201void kvm_mmu_unload(struct kvm_vcpu *vcpu);
1202void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
1203gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
1204			   struct x86_exception *exception);
1205gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
1206			      struct x86_exception *exception);
1207gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
1208			       struct x86_exception *exception);
1209gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
1210			       struct x86_exception *exception);
1211gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
1212				struct x86_exception *exception);
1213
1214void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
1215
1216int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
 
 
 
1217
1218int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
1219		       void *insn, int insn_len);
1220void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
1221void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
1222
1223void kvm_enable_tdp(void);
1224void kvm_disable_tdp(void);
1225
1226static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
1227				  struct x86_exception *exception)
1228{
1229	return gpa;
1230}
1231
1232static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
 
1233{
1234	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
1235
1236	return (struct kvm_mmu_page *)page_private(page);
1237}
1238
 
 
 
 
 
 
 
 
 
 
 
 
 
1239static inline u16 kvm_read_ldt(void)
1240{
1241	u16 ldt;
1242	asm("sldt %0" : "=g"(ldt));
1243	return ldt;
1244}
1245
1246static inline void kvm_load_ldt(u16 sel)
1247{
1248	asm("lldt %0" : : "rm"(sel));
1249}
1250
1251#ifdef CONFIG_X86_64
1252static inline unsigned long read_msr(unsigned long msr)
1253{
1254	u64 value;
1255
1256	rdmsrl(msr, value);
1257	return value;
1258}
1259#endif
1260
1261static inline u32 get_rdx_init_val(void)
1262{
1263	return 0x600; /* P6 family */
1264}
1265
1266static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
1267{
1268	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
1269}
1270
1271static inline u64 get_canonical(u64 la)
1272{
1273	return ((int64_t)la << 16) >> 16;
1274}
1275
1276static inline bool is_noncanonical_address(u64 la)
1277{
1278#ifdef CONFIG_X86_64
1279	return get_canonical(la) != la;
1280#else
1281	return false;
1282#endif
1283}
1284
1285#define TSS_IOPB_BASE_OFFSET 0x66
1286#define TSS_BASE_SIZE 0x68
1287#define TSS_IOPB_SIZE (65536 / 8)
1288#define TSS_REDIRECTION_SIZE (256 / 8)
1289#define RMODE_TSS_SIZE							\
1290	(TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
1291
1292enum {
1293	TASK_SWITCH_CALL = 0,
1294	TASK_SWITCH_IRET = 1,
1295	TASK_SWITCH_JMP = 2,
1296	TASK_SWITCH_GATE = 3,
1297};
1298
1299#define HF_GIF_MASK		(1 << 0)
1300#define HF_HIF_MASK		(1 << 1)
1301#define HF_VINTR_MASK		(1 << 2)
1302#define HF_NMI_MASK		(1 << 3)
1303#define HF_IRET_MASK		(1 << 4)
1304#define HF_GUEST_MASK		(1 << 5) /* VCPU is in guest-mode */
 
 
1305#define HF_SMM_MASK		(1 << 6)
1306#define HF_SMM_INSIDE_NMI_MASK	(1 << 7)
1307
1308#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
1309#define KVM_ADDRESS_SPACE_NUM 2
1310
1311#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
1312#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
1313
1314/*
1315 * Hardware virtualization extension instructions may fault if a
1316 * reboot turns off virtualization while processes are running.
1317 * Trap the fault and ignore the instruction if that happens.
1318 */
1319asmlinkage void kvm_spurious_fault(void);
1320
1321#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn)	\
1322	"666: " insn "\n\t" \
1323	"668: \n\t"                           \
1324	".pushsection .fixup, \"ax\" \n" \
1325	"667: \n\t" \
1326	cleanup_insn "\n\t"		      \
1327	"cmpb $0, kvm_rebooting \n\t"	      \
1328	"jne 668b \n\t"      		      \
1329	__ASM_SIZE(push) " $666b \n\t"	      \
1330	"call kvm_spurious_fault \n\t"	      \
1331	".popsection \n\t" \
1332	_ASM_EXTABLE(666b, 667b)
1333
1334#define __kvm_handle_fault_on_reboot(insn)		\
1335	____kvm_handle_fault_on_reboot(insn, "")
1336
1337#define KVM_ARCH_WANT_MMU_NOTIFIER
1338int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
1339int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
1340int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
1341int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
1342void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
1343int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
1344int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 
1345int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
1346int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
1347void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
1348void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
1349void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
1350					   unsigned long address);
1351
1352void kvm_define_shared_msr(unsigned index, u32 msr);
1353int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
 
 
 
 
1354
1355u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
 
 
 
 
 
1356u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
 
 
1357
1358unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
1359bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
1360
1361void kvm_make_mclock_inprogress_request(struct kvm *kvm);
1362void kvm_make_scan_ioapic_request(struct kvm *kvm);
 
 
1363
1364void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1365				     struct kvm_async_pf *work);
1366void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1367				 struct kvm_async_pf *work);
1368void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1369			       struct kvm_async_pf *work);
1370bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
 
1371extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
1372
1373int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
1374int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
 
1375
1376int kvm_is_in_guest(void);
1377
1378int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
1379int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
1380bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
1381bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
1382
1383bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
1384			     struct kvm_vcpu **dest_vcpu);
1385
1386void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
1387		     struct kvm_lapic_irq *irq);
1388
 
 
 
 
 
 
 
1389static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
1390{
1391	if (kvm_x86_ops->vcpu_blocking)
1392		kvm_x86_ops->vcpu_blocking(vcpu);
1393}
1394
1395static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
1396{
1397	if (kvm_x86_ops->vcpu_unblocking)
1398		kvm_x86_ops->vcpu_unblocking(vcpu);
1399}
1400
1401static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
1402
1403static inline int kvm_cpu_get_apicid(int mps_cpu)
1404{
1405#ifdef CONFIG_X86_LOCAL_APIC
1406	return __default_cpu_present_to_apicid(mps_cpu);
1407#else
1408	WARN_ON_ONCE(1);
1409	return BAD_APICID;
1410#endif
1411}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1412
1413#endif /* _ASM_X86_KVM_HOST_H */

   1/* SPDX-License-Identifier: GPL-2.0-only */
   2/*
   3 * Kernel-based Virtual Machine driver for Linux
   4 *
   5 * This header defines architecture specific interfaces, x86 version
 
 
 
 
   6 */
   7
   8#ifndef _ASM_X86_KVM_HOST_H
   9#define _ASM_X86_KVM_HOST_H
  10
  11#include <linux/types.h>
  12#include <linux/mm.h>
  13#include <linux/mmu_notifier.h>
  14#include <linux/tracepoint.h>
  15#include <linux/cpumask.h>
  16#include <linux/irq_work.h>
  17#include <linux/irq.h>
  18#include <linux/workqueue.h>
  19
  20#include <linux/kvm.h>
  21#include <linux/kvm_para.h>
  22#include <linux/kvm_types.h>
  23#include <linux/perf_event.h>
  24#include <linux/pvclock_gtod.h>
  25#include <linux/clocksource.h>
  26#include <linux/irqbypass.h>
  27#include <linux/hyperv.h>
  28#include <linux/kfifo.h>
  29
  30#include <asm/apic.h>
  31#include <asm/pvclock-abi.h>
  32#include <asm/desc.h>
  33#include <asm/mtrr.h>
  34#include <asm/msr-index.h>
  35#include <asm/asm.h>
  36#include <asm/kvm_page_track.h>
  37#include <asm/kvm_vcpu_regs.h>
  38#include <asm/hyperv-tlfs.h>
  39
  40#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
  41
  42#define KVM_MAX_VCPUS 1024
  43
  44/*
  45 * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
  46 * might be larger than the actual number of VCPUs because the
  47 * APIC ID encodes CPU topology information.
  48 *
  49 * In the worst case, we'll need less than one extra bit for the
  50 * Core ID, and less than one extra bit for the Package (Die) ID,
  51 * so ratio of 4 should be enough.
  52 */
  53#define KVM_VCPU_ID_RATIO 4
  54#define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO)
  55
 
 
 
 
  56/* memory slots that are not exposed to userspace */
  57#define KVM_INTERNAL_MEM_SLOTS 3
 
  58
  59#define KVM_HALT_POLL_NS_DEFAULT 200000
 
 
  60
  61#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
  62
  63#define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
  64					KVM_DIRTY_LOG_INITIALLY_SET)
  65
  66#define KVM_BUS_LOCK_DETECTION_VALID_MODE	(KVM_BUS_LOCK_DETECTION_OFF | \
  67						 KVM_BUS_LOCK_DETECTION_EXIT)
  68
  69#define KVM_X86_NOTIFY_VMEXIT_VALID_BITS	(KVM_X86_NOTIFY_VMEXIT_ENABLED | \
  70						 KVM_X86_NOTIFY_VMEXIT_USER)
  71
  72/* x86-specific vcpu->requests bit members */
  73#define KVM_REQ_MIGRATE_TIMER		KVM_ARCH_REQ(0)
  74#define KVM_REQ_REPORT_TPR_ACCESS	KVM_ARCH_REQ(1)
  75#define KVM_REQ_TRIPLE_FAULT		KVM_ARCH_REQ(2)
  76#define KVM_REQ_MMU_SYNC		KVM_ARCH_REQ(3)
  77#define KVM_REQ_CLOCK_UPDATE		KVM_ARCH_REQ(4)
  78#define KVM_REQ_LOAD_MMU_PGD		KVM_ARCH_REQ(5)
  79#define KVM_REQ_EVENT			KVM_ARCH_REQ(6)
  80#define KVM_REQ_APF_HALT		KVM_ARCH_REQ(7)
  81#define KVM_REQ_STEAL_UPDATE		KVM_ARCH_REQ(8)
  82#define KVM_REQ_NMI			KVM_ARCH_REQ(9)
  83#define KVM_REQ_PMU			KVM_ARCH_REQ(10)
  84#define KVM_REQ_PMI			KVM_ARCH_REQ(11)
  85#ifdef CONFIG_KVM_SMM
  86#define KVM_REQ_SMI			KVM_ARCH_REQ(12)
  87#endif
  88#define KVM_REQ_MASTERCLOCK_UPDATE	KVM_ARCH_REQ(13)
  89#define KVM_REQ_MCLOCK_INPROGRESS \
  90	KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
  91#define KVM_REQ_SCAN_IOAPIC \
  92	KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
  93#define KVM_REQ_GLOBAL_CLOCK_UPDATE	KVM_ARCH_REQ(16)
  94#define KVM_REQ_APIC_PAGE_RELOAD \
  95	KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
  96#define KVM_REQ_HV_CRASH		KVM_ARCH_REQ(18)
  97#define KVM_REQ_IOAPIC_EOI_EXIT		KVM_ARCH_REQ(19)
  98#define KVM_REQ_HV_RESET		KVM_ARCH_REQ(20)
  99#define KVM_REQ_HV_EXIT			KVM_ARCH_REQ(21)
 100#define KVM_REQ_HV_STIMER		KVM_ARCH_REQ(22)
 101#define KVM_REQ_LOAD_EOI_EXITMAP	KVM_ARCH_REQ(23)
 102#define KVM_REQ_GET_NESTED_STATE_PAGES	KVM_ARCH_REQ(24)
 103#define KVM_REQ_APICV_UPDATE \
 104	KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 105#define KVM_REQ_TLB_FLUSH_CURRENT	KVM_ARCH_REQ(26)
 106#define KVM_REQ_TLB_FLUSH_GUEST \
 107	KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 108#define KVM_REQ_APF_READY		KVM_ARCH_REQ(28)
 109#define KVM_REQ_MSR_FILTER_CHANGED	KVM_ARCH_REQ(29)
 110#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
 111	KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 112#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
 113	KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 114#define KVM_REQ_HV_TLB_FLUSH \
 115	KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 116
 117#define CR0_RESERVED_BITS                                               \
 118	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
 119			  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
 120			  | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
 121
 
 
 122#define CR4_RESERVED_BITS                                               \
 123	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 124			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
 125			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
 126			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
 127			  | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
 128			  | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
 129
 130#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 131
 132
 133
 134#define INVALID_PAGE (~(hpa_t)0)
 135#define VALID_PAGE(x) ((x) != INVALID_PAGE)
 136
 137#define INVALID_GPA (~(gpa_t)0)
 138
 139/* KVM Hugepage definitions for x86 */
 140#define KVM_MAX_HUGEPAGE_LEVEL	PG_LEVEL_1G
 141#define KVM_NR_PAGE_SIZES	(KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1)
 142#define KVM_HPAGE_GFN_SHIFT(x)	(((x) - 1) * 9)
 143#define KVM_HPAGE_SHIFT(x)	(PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
 144#define KVM_HPAGE_SIZE(x)	(1UL << KVM_HPAGE_SHIFT(x))
 145#define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
 146#define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 147
 148#define KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO 50
 149#define KVM_MIN_ALLOC_MMU_PAGES 64UL
 150#define KVM_MMU_HASH_SHIFT 12
 
 
 
 
 
 
 
 151#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
 152#define KVM_MIN_FREE_MMU_PAGES 5
 153#define KVM_REFILL_PAGES 25
 154#define KVM_MAX_CPUID_ENTRIES 256
 155#define KVM_NR_FIXED_MTRR_REGION 88
 156#define KVM_NR_VAR_MTRR 8
 157
 158#define ASYNC_PF_PER_VCPU 64
 159
 160enum kvm_reg {
 161	VCPU_REGS_RAX = __VCPU_REGS_RAX,
 162	VCPU_REGS_RCX = __VCPU_REGS_RCX,
 163	VCPU_REGS_RDX = __VCPU_REGS_RDX,
 164	VCPU_REGS_RBX = __VCPU_REGS_RBX,
 165	VCPU_REGS_RSP = __VCPU_REGS_RSP,
 166	VCPU_REGS_RBP = __VCPU_REGS_RBP,
 167	VCPU_REGS_RSI = __VCPU_REGS_RSI,
 168	VCPU_REGS_RDI = __VCPU_REGS_RDI,
 169#ifdef CONFIG_X86_64
 170	VCPU_REGS_R8  = __VCPU_REGS_R8,
 171	VCPU_REGS_R9  = __VCPU_REGS_R9,
 172	VCPU_REGS_R10 = __VCPU_REGS_R10,
 173	VCPU_REGS_R11 = __VCPU_REGS_R11,
 174	VCPU_REGS_R12 = __VCPU_REGS_R12,
 175	VCPU_REGS_R13 = __VCPU_REGS_R13,
 176	VCPU_REGS_R14 = __VCPU_REGS_R14,
 177	VCPU_REGS_R15 = __VCPU_REGS_R15,
 178#endif
 179	VCPU_REGS_RIP,
 180	NR_VCPU_REGS,
 
 181
 
 182	VCPU_EXREG_PDPTR = NR_VCPU_REGS,
 183	VCPU_EXREG_CR0,
 184	VCPU_EXREG_CR3,
 185	VCPU_EXREG_CR4,
 186	VCPU_EXREG_RFLAGS,
 187	VCPU_EXREG_SEGMENTS,
 188	VCPU_EXREG_EXIT_INFO_1,
 189	VCPU_EXREG_EXIT_INFO_2,
 190};
 191
 192enum {
 193	VCPU_SREG_ES,
 194	VCPU_SREG_CS,
 195	VCPU_SREG_SS,
 196	VCPU_SREG_DS,
 197	VCPU_SREG_FS,
 198	VCPU_SREG_GS,
 199	VCPU_SREG_TR,
 200	VCPU_SREG_LDTR,
 201};
 202
 203enum exit_fastpath_completion {
 204	EXIT_FASTPATH_NONE,
 205	EXIT_FASTPATH_REENTER_GUEST,
 206	EXIT_FASTPATH_EXIT_HANDLED,
 207};
 208typedef enum exit_fastpath_completion fastpath_t;
 209
 210struct x86_emulate_ctxt;
 211struct x86_exception;
 212union kvm_smram;
 213enum x86_intercept;
 214enum x86_intercept_stage;
 215
 216#define KVM_NR_DB_REGS	4
 217
 218#define DR6_BUS_LOCK   (1 << 11)
 219#define DR6_BD		(1 << 13)
 220#define DR6_BS		(1 << 14)
 221#define DR6_BT		(1 << 15)
 222#define DR6_RTM		(1 << 16)
 223/*
 224 * DR6_ACTIVE_LOW combines fixed-1 and active-low bits.
 225 * We can regard all the bits in DR6_FIXED_1 as active_low bits;
 226 * they will never be 0 for now, but when they are defined
 227 * in the future it will require no code change.
 228 *
 229 * DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
 230 */
 231#define DR6_ACTIVE_LOW	0xffff0ff0
 232#define DR6_VOLATILE	0x0001e80f
 233#define DR6_FIXED_1	(DR6_ACTIVE_LOW & ~DR6_VOLATILE)
 234
 235#define DR7_BP_EN_MASK	0x000000ff
 236#define DR7_GE		(1 << 9)
 237#define DR7_GD		(1 << 13)
 238#define DR7_FIXED_1	0x00000400
 239#define DR7_VOLATILE	0xffff2bff
 240
 241#define KVM_GUESTDBG_VALID_MASK \
 242	(KVM_GUESTDBG_ENABLE | \
 243	KVM_GUESTDBG_SINGLESTEP | \
 244	KVM_GUESTDBG_USE_HW_BP | \
 245	KVM_GUESTDBG_USE_SW_BP | \
 246	KVM_GUESTDBG_INJECT_BP | \
 247	KVM_GUESTDBG_INJECT_DB | \
 248	KVM_GUESTDBG_BLOCKIRQ)
 249
 250
 251#define PFERR_PRESENT_BIT 0
 252#define PFERR_WRITE_BIT 1
 253#define PFERR_USER_BIT 2
 254#define PFERR_RSVD_BIT 3
 255#define PFERR_FETCH_BIT 4
 256#define PFERR_PK_BIT 5
 257#define PFERR_SGX_BIT 15
 258#define PFERR_GUEST_FINAL_BIT 32
 259#define PFERR_GUEST_PAGE_BIT 33
 260#define PFERR_IMPLICIT_ACCESS_BIT 48
 261
 262#define PFERR_PRESENT_MASK	BIT(PFERR_PRESENT_BIT)
 263#define PFERR_WRITE_MASK	BIT(PFERR_WRITE_BIT)
 264#define PFERR_USER_MASK		BIT(PFERR_USER_BIT)
 265#define PFERR_RSVD_MASK		BIT(PFERR_RSVD_BIT)
 266#define PFERR_FETCH_MASK	BIT(PFERR_FETCH_BIT)
 267#define PFERR_PK_MASK		BIT(PFERR_PK_BIT)
 268#define PFERR_SGX_MASK		BIT(PFERR_SGX_BIT)
 269#define PFERR_GUEST_FINAL_MASK	BIT_ULL(PFERR_GUEST_FINAL_BIT)
 270#define PFERR_GUEST_PAGE_MASK	BIT_ULL(PFERR_GUEST_PAGE_BIT)
 271#define PFERR_IMPLICIT_ACCESS	BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
 272
 273#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK |	\
 
 274				 PFERR_WRITE_MASK |		\
 275				 PFERR_PRESENT_MASK)
 276
 277/* apic attention bits */
 278#define KVM_APIC_CHECK_VAPIC	0
 279/*
 280 * The following bit is set with PV-EOI, unset on EOI.
 281 * We detect PV-EOI changes by guest by comparing
 282 * this bit with PV-EOI in guest memory.
 283 * See the implementation in apic_update_pv_eoi.
 284 */
 285#define KVM_APIC_PV_EOI_PENDING	1
 286
 287struct kvm_kernel_irq_routing_entry;
 288
 289/*
 290 * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
 291 * also includes TDP pages) to determine whether or not a page can be used in
 292 * the given MMU context.  This is a subset of the overall kvm_cpu_role to
 293 * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
 294 * 2 bytes per gfn instead of 4 bytes per gfn.
 295 *
 296 * Upper-level shadow pages having gptes are tracked for write-protection via
 297 * gfn_track.  As above, gfn_track is a 16 bit counter, so KVM must not create
 298 * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
 299 * gfn_track will overflow and explosions will ensure.
 300 *
 301 * A unique shadow page (SP) for a gfn is created if and only if an existing SP
 302 * cannot be reused.  The ability to reuse a SP is tracked by its role, which
 303 * incorporates various mode bits and properties of the SP.  Roughly speaking,
 304 * the number of unique SPs that can theoretically be created is 2^n, where n
 305 * is the number of bits that are used to compute the role.
 306 *
 307 * But, even though there are 19 bits in the mask below, not all combinations
 308 * of modes and flags are possible:
 309 *
 310 *   - invalid shadow pages are not accounted, so the bits are effectively 18
 311 *
 312 *   - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
 313 *     execonly and ad_disabled are only used for nested EPT which has
 314 *     has_4_byte_gpte=0.  Therefore, 2 bits are always unused.
 315 *
 316 *   - the 4 bits of level are effectively limited to the values 2/3/4/5,
 317 *     as 4k SPs are not tracked (allowed to go unsync).  In addition non-PAE
 318 *     paging has exactly one upper level, making level completely redundant
 319 *     when has_4_byte_gpte=1.
 320 *
 321 *   - on top of this, smep_andnot_wp and smap_andnot_wp are only set if
 322 *     cr0_wp=0, therefore these three bits only give rise to 5 possibilities.
 323 *
 324 * Therefore, the maximum number of possible upper-level shadow pages for a
 325 * single gfn is a bit less than 2^13.
 326 */
 327union kvm_mmu_page_role {
 328	u32 word;
 329	struct {
 330		unsigned level:4;
 331		unsigned has_4_byte_gpte:1;
 332		unsigned quadrant:2;
 333		unsigned direct:1;
 334		unsigned access:3;
 335		unsigned invalid:1;
 336		unsigned efer_nx:1;
 337		unsigned cr0_wp:1;
 338		unsigned smep_andnot_wp:1;
 339		unsigned smap_andnot_wp:1;
 340		unsigned ad_disabled:1;
 341		unsigned guest_mode:1;
 342		unsigned passthrough:1;
 343		unsigned :5;
 344
 345		/*
 346		 * This is left at the top of the word so that
 347		 * kvm_memslots_for_spte_role can extract it with a
 348		 * simple shift.  While there is room, give it a whole
 349		 * byte so it is also faster to load it from memory.
 350		 */
 351		unsigned smm:8;
 352	};
 353};
 354
 355/*
 356 * kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties
 357 * relevant to the current MMU configuration.   When loading CR0, CR4, or EFER,
 358 * including on nested transitions, if nothing in the full role changes then
 359 * MMU re-configuration can be skipped. @valid bit is set on first usage so we
 360 * don't treat all-zero structure as valid data.
 361 *
 362 * The properties that are tracked in the extended role but not the page role
 363 * are for things that either (a) do not affect the validity of the shadow page
 364 * or (b) are indirectly reflected in the shadow page's role.  For example,
 365 * CR4.PKE only affects permission checks for software walks of the guest page
 366 * tables (because KVM doesn't support Protection Keys with shadow paging), and
 367 * CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level.
 368 *
 369 * Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role.
 370 * If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and
 371 * SMAP, but the MMU's permission checks for software walks need to be SMEP and
 372 * SMAP aware regardless of CR0.WP.
 373 */
 374union kvm_mmu_extended_role {
 375	u32 word;
 376	struct {
 377		unsigned int valid:1;
 378		unsigned int execonly:1;
 379		unsigned int cr4_pse:1;
 380		unsigned int cr4_pke:1;
 381		unsigned int cr4_smap:1;
 382		unsigned int cr4_smep:1;
 383		unsigned int cr4_la57:1;
 384		unsigned int efer_lma:1;
 385	};
 386};
 387
 388union kvm_cpu_role {
 389	u64 as_u64;
 390	struct {
 391		union kvm_mmu_page_role base;
 392		union kvm_mmu_extended_role ext;
 393	};
 394};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 395
 396struct kvm_rmap_head {
 397	unsigned long val;
 398};
 399
 400struct kvm_pio_request {
 401	unsigned long linear_rip;
 402	unsigned long count;
 403	int in;
 404	int port;
 405	int size;
 406};
 407
 408#define PT64_ROOT_MAX_LEVEL 5
 409
 410struct rsvd_bits_validate {
 411	u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL];
 412	u64 bad_mt_xwr;
 413};
 414
 415struct kvm_mmu_root_info {
 416	gpa_t pgd;
 417	hpa_t hpa;
 418};
 419
 420#define KVM_MMU_ROOT_INFO_INVALID \
 421	((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE })
 422
 423#define KVM_MMU_NUM_PREV_ROOTS 3
 424
 425#define KVM_HAVE_MMU_RWLOCK
 426
 427struct kvm_mmu_page;
 428struct kvm_page_fault;
 429
 430/*
 431 * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
 432 * and 2-level 32-bit).  The kvm_mmu structure abstracts the details of the
 433 * current mmu mode.
 434 */
 435struct kvm_mmu {
 436	unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu);
 
 437	u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
 438	int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
 
 439	void (*inject_page_fault)(struct kvm_vcpu *vcpu,
 440				  struct x86_exception *fault);
 441	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 442			    gpa_t gva_or_gpa, u64 access,
 443			    struct x86_exception *exception);
 
 
 444	int (*sync_page)(struct kvm_vcpu *vcpu,
 445			 struct kvm_mmu_page *sp);
 446	void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
 447	struct kvm_mmu_root_info root;
 448	union kvm_cpu_role cpu_role;
 449	union kvm_mmu_page_role root_role;
 
 
 
 
 
 
 
 
 
 
 
 450
 451	/*
 452	* The pkru_mask indicates if protection key checks are needed.  It
 453	* consists of 16 domains indexed by page fault error code bits [4:1],
 454	* with PFEC.RSVD replaced by ACC_USER_MASK from the page tables.
 455	* Each domain has 2 bits which are ANDed with AD and WD from PKRU.
 456	*/
 457	u32 pkru_mask;
 458
 459	struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
 460
 461	/*
 462	 * Bitmap; bit set = permission fault
 463	 * Byte index: page fault error code [4:1]
 464	 * Bit index: pte permissions in ACC_* format
 465	 */
 466	u8 permissions[16];
 467
 468	u64 *pae_root;
 469	u64 *pml4_root;
 470	u64 *pml5_root;
 471
 472	/*
 473	 * check zero bits on shadow page table entries, these
 474	 * bits include not only hardware reserved bits but also
 475	 * the bits spte never used.
 476	 */
 477	struct rsvd_bits_validate shadow_zero_check;
 478
 479	struct rsvd_bits_validate guest_rsvd_check;
 480
 
 
 
 
 
 481	u64 pdptrs[4]; /* pae */
 482};
 483
 484struct kvm_tlb_range {
 485	u64 start_gfn;
 486	u64 pages;
 487};
 488
 489enum pmc_type {
 490	KVM_PMC_GP = 0,
 491	KVM_PMC_FIXED,
 492};
 493
 494struct kvm_pmc {
 495	enum pmc_type type;
 496	u8 idx;
 497	bool is_paused;
 498	bool intr;
 499	u64 counter;
 500	u64 prev_counter;
 501	u64 eventsel;
 502	struct perf_event *perf_event;
 503	struct kvm_vcpu *vcpu;
 504	/*
 505	 * only for creating or reusing perf_event,
 506	 * eventsel value for general purpose counters,
 507	 * ctrl value for fixed counters.
 508	 */
 509	u64 current_config;
 510};
 511
 512/* More counters may conflict with other existing Architectural MSRs */
 513#define KVM_INTEL_PMC_MAX_GENERIC	8
 514#define MSR_ARCH_PERFMON_PERFCTR_MAX	(MSR_ARCH_PERFMON_PERFCTR0 + KVM_INTEL_PMC_MAX_GENERIC - 1)
 515#define MSR_ARCH_PERFMON_EVENTSEL_MAX	(MSR_ARCH_PERFMON_EVENTSEL0 + KVM_INTEL_PMC_MAX_GENERIC - 1)
 516#define KVM_PMC_MAX_FIXED	3
 517#define KVM_AMD_PMC_MAX_GENERIC	6
 518struct kvm_pmu {
 519	unsigned nr_arch_gp_counters;
 520	unsigned nr_arch_fixed_counters;
 521	unsigned available_event_types;
 522	u64 fixed_ctr_ctrl;
 523	u64 fixed_ctr_ctrl_mask;
 524	u64 global_ctrl;
 525	u64 global_status;
 
 526	u64 counter_bitmask[2];
 527	u64 global_ctrl_mask;
 528	u64 global_ovf_ctrl_mask;
 529	u64 reserved_bits;
 530	u64 raw_event_mask;
 531	u8 version;
 532	struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
 533	struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
 534	struct irq_work irq_work;
 535
 536	/*
 537	 * Overlay the bitmap with a 64-bit atomic so that all bits can be
 538	 * set in a single access, e.g. to reprogram all counters when the PMU
 539	 * filter changes.
 540	 */
 541	union {
 542		DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
 543		atomic64_t __reprogram_pmi;
 544	};
 545	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
 546	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
 547
 548	u64 ds_area;
 549	u64 pebs_enable;
 550	u64 pebs_enable_mask;
 551	u64 pebs_data_cfg;
 552	u64 pebs_data_cfg_mask;
 553
 554	/*
 555	 * If a guest counter is cross-mapped to host counter with different
 556	 * index, its PEBS capability will be temporarily disabled.
 557	 *
 558	 * The user should make sure that this mask is updated
 559	 * after disabling interrupts and before perf_guest_get_msrs();
 560	 */
 561	u64 host_cross_mapped_mask;
 562
 563	/*
 564	 * The gate to release perf_events not marked in
 565	 * pmc_in_use only once in a vcpu time slice.
 566	 */
 567	bool need_cleanup;
 568
 569	/*
 570	 * The total number of programmed perf_events and it helps to avoid
 571	 * redundant check before cleanup if guest don't use vPMU at all.
 572	 */
 573	u8 event_count;
 574};
 575
 576struct kvm_pmu_ops;
 577
 578enum {
 579	KVM_DEBUGREG_BP_ENABLED = 1,
 580	KVM_DEBUGREG_WONT_EXIT = 2,
 
 581};
 582
 583struct kvm_mtrr_range {
 584	u64 base;
 585	u64 mask;
 586	struct list_head node;
 587};
 588
 589struct kvm_mtrr {
 590	struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR];
 591	mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
 592	u64 deftype;
 593
 594	struct list_head head;
 595};
 596
 597/* Hyper-V SynIC timer */
 598struct kvm_vcpu_hv_stimer {
 599	struct hrtimer timer;
 600	int index;
 601	union hv_stimer_config config;
 602	u64 count;
 603	u64 exp_time;
 604	struct hv_message msg;
 605	bool msg_pending;
 606};
 607
 608/* Hyper-V synthetic interrupt controller (SynIC)*/
 609struct kvm_vcpu_hv_synic {
 610	u64 version;
 611	u64 control;
 612	u64 msg_page;
 613	u64 evt_page;
 614	atomic64_t sint[HV_SYNIC_SINT_COUNT];
 615	atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT];
 616	DECLARE_BITMAP(auto_eoi_bitmap, 256);
 617	DECLARE_BITMAP(vec_bitmap, 256);
 618	bool active;
 619	bool dont_zero_synic_pages;
 620};
 621
 622/* The maximum number of entries on the TLB flush fifo. */
 623#define KVM_HV_TLB_FLUSH_FIFO_SIZE (16)
 624/*
 625 * Note: the following 'magic' entry is made up by KVM to avoid putting
 626 * anything besides GVA on the TLB flush fifo. It is theoretically possible
 627 * to observe a request to flush 4095 PFNs starting from 0xfffffffffffff000
 628 * which will look identical. KVM's action to 'flush everything' instead of
 629 * flushing these particular addresses is, however, fully legitimate as
 630 * flushing more than requested is always OK.
 631 */
 632#define KVM_HV_TLB_FLUSHALL_ENTRY  ((u64)-1)
 633
 634enum hv_tlb_flush_fifos {
 635	HV_L1_TLB_FLUSH_FIFO,
 636	HV_L2_TLB_FLUSH_FIFO,
 637	HV_NR_TLB_FLUSH_FIFOS,
 638};
 639
 640struct kvm_vcpu_hv_tlb_flush_fifo {
 641	spinlock_t write_lock;
 642	DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE);
 643};
 644
 645/* Hyper-V per vcpu emulation context */
 646struct kvm_vcpu_hv {
 647	struct kvm_vcpu *vcpu;
 648	u32 vp_index;
 649	u64 hv_vapic;
 650	s64 runtime_offset;
 651	struct kvm_vcpu_hv_synic synic;
 652	struct kvm_hyperv_exit exit;
 653	struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
 654	DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
 655	bool enforce_cpuid;
 656	struct {
 657		u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */
 658		u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */
 659		u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */
 660		u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
 661		u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */
 662		u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
 663		u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */
 664		u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */
 665	} cpuid_cache;
 666
 667	struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS];
 668
 669	/* Preallocated buffer for handling hypercalls passing sparse vCPU set */
 670	u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS];
 671
 672	struct hv_vp_assist_page vp_assist_page;
 673
 674	struct {
 675		u64 pa_page_gpa;
 676		u64 vm_id;
 677		u32 vp_id;
 678	} nested;
 679};
 680
 681/* Xen HVM per vcpu emulation context */
 682struct kvm_vcpu_xen {
 683	u64 hypercall_rip;
 684	u32 current_runstate;
 685	u8 upcall_vector;
 686	struct gfn_to_pfn_cache vcpu_info_cache;
 687	struct gfn_to_pfn_cache vcpu_time_info_cache;
 688	struct gfn_to_pfn_cache runstate_cache;
 689	struct gfn_to_pfn_cache runstate2_cache;
 690	u64 last_steal;
 691	u64 runstate_entry_time;
 692	u64 runstate_times[4];
 693	unsigned long evtchn_pending_sel;
 694	u32 vcpu_id; /* The Xen / ACPI vCPU ID */
 695	u32 timer_virq;
 696	u64 timer_expires; /* In guest epoch */
 697	atomic_t timer_pending;
 698	struct hrtimer timer;
 699	int poll_evtchn;
 700	struct timer_list poll_timer;
 701};
 702
 703struct kvm_queued_exception {
 704	bool pending;
 705	bool injected;
 706	bool has_error_code;
 707	u8 vector;
 708	u32 error_code;
 709	unsigned long payload;
 710	bool has_payload;
 711};
 712
 713struct kvm_vcpu_arch {
 714	/*
 715	 * rip and regs accesses must go through
 716	 * kvm_{register,rip}_{read,write} functions.
 717	 */
 718	unsigned long regs[NR_VCPU_REGS];
 719	u32 regs_avail;
 720	u32 regs_dirty;
 721
 722	unsigned long cr0;
 723	unsigned long cr0_guest_owned_bits;
 724	unsigned long cr2;
 725	unsigned long cr3;
 726	unsigned long cr4;
 727	unsigned long cr4_guest_owned_bits;
 728	unsigned long cr4_guest_rsvd_bits;
 729	unsigned long cr8;
 730	u32 host_pkru;
 731	u32 pkru;
 732	u32 hflags;
 733	u64 efer;
 734	u64 apic_base;
 735	struct kvm_lapic *apic;    /* kernel irqchip context */
 736	bool load_eoi_exitmap_pending;
 737	DECLARE_BITMAP(ioapic_handled_vectors, 256);
 738	unsigned long apic_attention;
 739	int32_t apic_arb_prio;
 740	int mp_state;
 741	u64 ia32_misc_enable_msr;
 742	u64 smbase;
 743	u64 smi_count;
 744	bool at_instruction_boundary;
 745	bool tpr_access_reporting;
 746	bool xsaves_enabled;
 747	bool xfd_no_write_intercept;
 748	u64 ia32_xss;
 749	u64 microcode_version;
 750	u64 arch_capabilities;
 751	u64 perf_capabilities;
 752
 753	/*
 754	 * Paging state of the vcpu
 755	 *
 756	 * If the vcpu runs in guest mode with two level paging this still saves
 757	 * the paging mode of the l1 guest. This context is always used to
 758	 * handle faults.
 759	 */
 760	struct kvm_mmu *mmu;
 761
 762	/* Non-nested MMU for L1 */
 763	struct kvm_mmu root_mmu;
 764
 765	/* L1 MMU when running nested */
 766	struct kvm_mmu guest_mmu;
 767
 768	/*
 769	 * Paging state of an L2 guest (used for nested npt)
 770	 *
 771	 * This context will save all necessary information to walk page tables
 772	 * of an L2 guest. This context is only initialized for page table
 773	 * walking and not for faulting since we never handle l2 page faults on
 774	 * the host.
 775	 */
 776	struct kvm_mmu nested_mmu;
 777
 778	/*
 779	 * Pointer to the mmu context currently used for
 780	 * gva_to_gpa translations.
 781	 */
 782	struct kvm_mmu *walk_mmu;
 783
 784	struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
 785	struct kvm_mmu_memory_cache mmu_shadow_page_cache;
 786	struct kvm_mmu_memory_cache mmu_shadowed_info_cache;
 787	struct kvm_mmu_memory_cache mmu_page_header_cache;
 788
 789	/*
 790	 * QEMU userspace and the guest each have their own FPU state.
 791	 * In vcpu_run, we switch between the user and guest FPU contexts.
 792	 * While running a VCPU, the VCPU thread will have the guest FPU
 793	 * context.
 794	 *
 795	 * Note that while the PKRU state lives inside the fpu registers,
 796	 * it is switched out separately at VMENTER and VMEXIT time. The
 797	 * "guest_fpstate" state here contains the guest FPU context, with the
 798	 * host PRKU bits.
 799	 */
 800	struct fpu_guest guest_fpu;
 801
 802	u64 xcr0;
 803	u64 guest_supported_xcr0;
 
 804
 805	struct kvm_pio_request pio;
 806	void *pio_data;
 807	void *sev_pio_data;
 808	unsigned sev_pio_count;
 809
 810	u8 event_exit_inst_len;
 811
 812	bool exception_from_userspace;
 813
 814	/* Exceptions to be injected to the guest. */
 815	struct kvm_queued_exception exception;
 816	/* Exception VM-Exits to be synthesized to L1. */
 817	struct kvm_queued_exception exception_vmexit;
 
 818
 819	struct kvm_queued_interrupt {
 820		bool injected;
 821		bool soft;
 822		u8 nr;
 823	} interrupt;
 824
 825	int halt_request; /* real mode on Intel only */
 826
 827	int cpuid_nent;
 828	struct kvm_cpuid_entry2 *cpuid_entries;
 829	u32 kvm_cpuid_base;
 830
 831	u64 reserved_gpa_bits;
 832	int maxphyaddr;
 833
 834	/* emulate context */
 835
 836	struct x86_emulate_ctxt *emulate_ctxt;
 837	bool emulate_regs_need_sync_to_vcpu;
 838	bool emulate_regs_need_sync_from_vcpu;
 839	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
 840
 841	gpa_t time;
 842	struct pvclock_vcpu_time_info hv_clock;
 843	unsigned int hw_tsc_khz;
 844	struct gfn_to_pfn_cache pv_time;
 
 845	/* set guest stopped flag in pvclock flags field */
 846	bool pvclock_set_guest_stopped_request;
 847
 848	struct {
 849		u8 preempted;
 850		u64 msr_val;
 851		u64 last_steal;
 852		struct gfn_to_hva_cache cache;
 
 853	} st;
 854
 855	u64 l1_tsc_offset;
 856	u64 tsc_offset; /* current tsc offset */
 857	u64 last_guest_tsc;
 858	u64 last_host_tsc;
 859	u64 tsc_offset_adjustment;
 860	u64 this_tsc_nsec;
 861	u64 this_tsc_write;
 862	u64 this_tsc_generation;
 863	bool tsc_catchup;
 864	bool tsc_always_catchup;
 865	s8 virtual_tsc_shift;
 866	u32 virtual_tsc_mult;
 867	u32 virtual_tsc_khz;
 868	s64 ia32_tsc_adjust_msr;
 869	u64 msr_ia32_power_ctl;
 870	u64 l1_tsc_scaling_ratio;
 871	u64 tsc_scaling_ratio; /* current scaling ratio */
 872
 873	atomic_t nmi_queued;  /* unprocessed asynchronous NMIs */
 874	unsigned nmi_pending; /* NMI queued after currently running handler */
 875	bool nmi_injected;    /* Trying to inject an NMI this entry */
 876	bool smi_pending;    /* SMI queued after currently running handler */
 877	u8 handling_intr_from_guest;
 878
 879	struct kvm_mtrr mtrr_state;
 880	u64 pat;
 881
 882	unsigned switch_db_regs;
 883	unsigned long db[KVM_NR_DB_REGS];
 884	unsigned long dr6;
 885	unsigned long dr7;
 886	unsigned long eff_db[KVM_NR_DB_REGS];
 887	unsigned long guest_debug_dr7;
 888	u64 msr_platform_info;
 889	u64 msr_misc_features_enables;
 890
 891	u64 mcg_cap;
 892	u64 mcg_status;
 893	u64 mcg_ctl;
 894	u64 mcg_ext_ctl;
 895	u64 *mce_banks;
 896	u64 *mci_ctl2_banks;
 897
 898	/* Cache MMIO info */
 899	u64 mmio_gva;
 900	unsigned mmio_access;
 901	gfn_t mmio_gfn;
 902	u64 mmio_gen;
 903
 904	struct kvm_pmu pmu;
 905
 906	/* used for guest single stepping over the given code position */
 907	unsigned long singlestep_rip;
 908
 909	bool hyperv_enabled;
 910	struct kvm_vcpu_hv *hyperv;
 911	struct kvm_vcpu_xen xen;
 912
 913	cpumask_var_t wbinvd_dirty_mask;
 914
 915	unsigned long last_retry_eip;
 916	unsigned long last_retry_addr;
 917
 918	struct {
 919		bool halted;
 920		gfn_t gfns[ASYNC_PF_PER_VCPU];
 921		struct gfn_to_hva_cache data;
 922		u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */
 923		u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */
 924		u16 vec;
 925		u32 id;
 926		bool send_user_only;
 927		u32 host_apf_flags;
 928		bool delivery_as_pf_vmexit;
 929		bool pageready_pending;
 930	} apf;
 931
 932	/* OSVW MSRs (AMD only) */
 933	struct {
 934		u64 length;
 935		u64 status;
 936	} osvw;
 937
 938	struct {
 939		u64 msr_val;
 940		struct gfn_to_hva_cache data;
 941	} pv_eoi;
 942
 943	u64 msr_kvm_poll_control;
 944
 945	/*
 946	 * Indicates the guest is trying to write a gfn that contains one or
 947	 * more of the PTEs used to translate the write itself, i.e. the access
 948	 * is changing its own translation in the guest page tables.  KVM exits
 949	 * to userspace if emulation of the faulting instruction fails and this
 950	 * flag is set, as KVM cannot make forward progress.
 951	 *
 952	 * If emulation fails for a write to guest page tables, KVM unprotects
 953	 * (zaps) the shadow page for the target gfn and resumes the guest to
 954	 * retry the non-emulatable instruction (on hardware).  Unprotecting the
 955	 * gfn doesn't allow forward progress for a self-changing access because
 956	 * doing so also zaps the translation for the gfn, i.e. retrying the
 957	 * instruction will hit a !PRESENT fault, which results in a new shadow
 958	 * page and sends KVM back to square one.
 959	 */
 960	bool write_fault_to_shadow_pgtable;
 961
 962	/* set at EPT violation at this point */
 963	unsigned long exit_qualification;
 964
 965	/* pv related host specific info */
 966	struct {
 967		bool pv_unhalted;
 968	} pv;
 969
 970	int pending_ioapic_eoi;
 971	int pending_external_vector;
 972
 973	/* be preempted when it's in kernel-mode(cpl=0) */
 974	bool preempted_in_kernel;
 975
 976	/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
 977	bool l1tf_flush_l1d;
 978
 979	/* Host CPU on which VM-entry was most recently attempted */
 980	int last_vmentry_cpu;
 981
 982	/* AMD MSRC001_0015 Hardware Configuration */
 983	u64 msr_hwcr;
 984
 985	/* pv related cpuid info */
 986	struct {
 987		/*
 988		 * value of the eax register in the KVM_CPUID_FEATURES CPUID
 989		 * leaf.
 990		 */
 991		u32 features;
 992
 993		/*
 994		 * indicates whether pv emulation should be disabled if features
 995		 * are not present in the guest's cpuid
 996		 */
 997		bool enforce;
 998	} pv_cpuid;
 999
1000	/* Protected Guests */
1001	bool guest_state_protected;
1002
1003	/*
1004	 * Set when PDPTS were loaded directly by the userspace without
1005	 * reading the guest memory
1006	 */
1007	bool pdptrs_from_userspace;
1008
1009#if IS_ENABLED(CONFIG_HYPERV)
1010	hpa_t hv_root_tdp;
1011#endif
1012};
1013
1014struct kvm_lpage_info {
1015	int disallow_lpage;
1016};
1017
1018struct kvm_arch_memory_slot {
1019	struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
1020	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
1021	unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
1022};
1023
1024/*
1025 * We use as the mode the number of bits allocated in the LDR for the
1026 * logical processor ID.  It happens that these are all powers of two.
1027 * This makes it is very easy to detect cases where the APICs are
1028 * configured for multiple modes; in that case, we cannot use the map and
1029 * hence cannot use kvm_irq_delivery_to_apic_fast either.
1030 */
1031#define KVM_APIC_MODE_XAPIC_CLUSTER          4
1032#define KVM_APIC_MODE_XAPIC_FLAT             8
1033#define KVM_APIC_MODE_X2APIC                16
1034
1035struct kvm_apic_map {
1036	struct rcu_head rcu;
1037	u8 mode;
1038	u32 max_apic_id;
1039	union {
1040		struct kvm_lapic *xapic_flat_map[8];
1041		struct kvm_lapic *xapic_cluster_map[16][4];
1042	};
1043	struct kvm_lapic *phys_map[];
1044};
1045
1046/* Hyper-V synthetic debugger (SynDbg)*/
1047struct kvm_hv_syndbg {
1048	struct {
1049		u64 control;
1050		u64 status;
1051		u64 send_page;
1052		u64 recv_page;
1053		u64 pending_page;
1054	} control;
1055	u64 options;
1056};
1057
1058/* Current state of Hyper-V TSC page clocksource */
1059enum hv_tsc_page_status {
1060	/* TSC page was not set up or disabled */
1061	HV_TSC_PAGE_UNSET = 0,
1062	/* TSC page MSR was written by the guest, update pending */
1063	HV_TSC_PAGE_GUEST_CHANGED,
1064	/* TSC page update was triggered from the host side */
1065	HV_TSC_PAGE_HOST_CHANGED,
1066	/* TSC page was properly set up and is currently active  */
1067	HV_TSC_PAGE_SET,
1068	/* TSC page was set up with an inaccessible GPA */
1069	HV_TSC_PAGE_BROKEN,
1070};
1071
1072/* Hyper-V emulation context */
1073struct kvm_hv {
1074	struct mutex hv_lock;
1075	u64 hv_guest_os_id;
1076	u64 hv_hypercall;
1077	u64 hv_tsc_page;
1078	enum hv_tsc_page_status hv_tsc_page_status;
1079
1080	/* Hyper-v based guest crash (NT kernel bugcheck) parameters */
1081	u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
1082	u64 hv_crash_ctl;
1083
1084	struct ms_hyperv_tsc_page tsc_ref;
1085
1086	struct idr conn_to_evt;
1087
1088	u64 hv_reenlightenment_control;
1089	u64 hv_tsc_emulation_control;
1090	u64 hv_tsc_emulation_status;
1091
1092	/* How many vCPUs have VP index != vCPU index */
1093	atomic_t num_mismatched_vp_indexes;
1094
1095	/*
1096	 * How many SynICs use 'AutoEOI' feature
1097	 * (protected by arch.apicv_update_lock)
1098	 */
1099	unsigned int synic_auto_eoi_used;
1100
1101	struct hv_partition_assist_pg *hv_pa_pg;
1102	struct kvm_hv_syndbg hv_syndbg;
1103};
1104
1105struct msr_bitmap_range {
1106	u32 flags;
1107	u32 nmsrs;
1108	u32 base;
1109	unsigned long *bitmap;
1110};
1111
1112/* Xen emulation context */
1113struct kvm_xen {
1114	struct mutex xen_lock;
1115	u32 xen_version;
1116	bool long_mode;
1117	bool runstate_update_flag;
1118	u8 upcall_vector;
1119	struct gfn_to_pfn_cache shinfo_cache;
1120	struct idr evtchn_ports;
1121	unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
1122};
1123
1124enum kvm_irqchip_mode {
1125	KVM_IRQCHIP_NONE,
1126	KVM_IRQCHIP_KERNEL,       /* created with KVM_CREATE_IRQCHIP */
1127	KVM_IRQCHIP_SPLIT,        /* created with KVM_CAP_SPLIT_IRQCHIP */
1128};
1129
1130struct kvm_x86_msr_filter {
1131	u8 count;
1132	bool default_allow:1;
1133	struct msr_bitmap_range ranges[16];
1134};
1135
1136enum kvm_apicv_inhibit {
1137
1138	/********************************************************************/
1139	/* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */
1140	/********************************************************************/
1141
1142	/*
1143	 * APIC acceleration is disabled by a module parameter
1144	 * and/or not supported in hardware.
1145	 */
1146	APICV_INHIBIT_REASON_DISABLE,
1147
1148	/*
1149	 * APIC acceleration is inhibited because AutoEOI feature is
1150	 * being used by a HyperV guest.
1151	 */
1152	APICV_INHIBIT_REASON_HYPERV,
1153
1154	/*
1155	 * APIC acceleration is inhibited because the userspace didn't yet
1156	 * enable the kernel/split irqchip.
1157	 */
1158	APICV_INHIBIT_REASON_ABSENT,
1159
1160	/* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ
1161	 * (out of band, debug measure of blocking all interrupts on this vCPU)
1162	 * was enabled, to avoid AVIC/APICv bypassing it.
1163	 */
1164	APICV_INHIBIT_REASON_BLOCKIRQ,
1165
1166	/*
1167	 * For simplicity, the APIC acceleration is inhibited
1168	 * first time either APIC ID or APIC base are changed by the guest
1169	 * from their reset values.
1170	 */
1171	APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
1172	APICV_INHIBIT_REASON_APIC_BASE_MODIFIED,
1173
1174	/******************************************************/
1175	/* INHIBITs that are relevant only to the AMD's AVIC. */
1176	/******************************************************/
1177
1178	/*
1179	 * AVIC is inhibited on a vCPU because it runs a nested guest.
1180	 *
1181	 * This is needed because unlike APICv, the peers of this vCPU
1182	 * cannot use the doorbell mechanism to signal interrupts via AVIC when
1183	 * a vCPU runs nested.
1184	 */
1185	APICV_INHIBIT_REASON_NESTED,
1186
1187	/*
1188	 * On SVM, the wait for the IRQ window is implemented with pending vIRQ,
1189	 * which cannot be injected when the AVIC is enabled, thus AVIC
1190	 * is inhibited while KVM waits for IRQ window.
1191	 */
1192	APICV_INHIBIT_REASON_IRQWIN,
1193
1194	/*
1195	 * PIT (i8254) 're-inject' mode, relies on EOI intercept,
1196	 * which AVIC doesn't support for edge triggered interrupts.
1197	 */
1198	APICV_INHIBIT_REASON_PIT_REINJ,
1199
1200	/*
1201	 * AVIC is disabled because SEV doesn't support it.
1202	 */
1203	APICV_INHIBIT_REASON_SEV,
1204};
1205
1206struct kvm_arch {
1207	unsigned long n_used_mmu_pages;
1208	unsigned long n_requested_mmu_pages;
1209	unsigned long n_max_mmu_pages;
1210	unsigned int indirect_shadow_pages;
1211	u8 mmu_valid_gen;
1212	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
 
 
 
1213	struct list_head active_mmu_pages;
1214	struct list_head zapped_obsolete_pages;
1215	/*
1216	 * A list of kvm_mmu_page structs that, if zapped, could possibly be
1217	 * replaced by an NX huge page.  A shadow page is on this list if its
1218	 * existence disallows an NX huge page (nx_huge_page_disallowed is set)
1219	 * and there are no other conditions that prevent a huge page, e.g.
1220	 * the backing host page is huge, dirtly logging is not enabled for its
1221	 * memslot, etc...  Note, zapping shadow pages on this list doesn't
1222	 * guarantee an NX huge page will be created in its stead, e.g. if the
1223	 * guest attempts to execute from the region then KVM obviously can't
1224	 * create an NX huge page (without hanging the guest).
1225	 */
1226	struct list_head possible_nx_huge_pages;
1227	struct kvm_page_track_notifier_node mmu_sp_tracker;
1228	struct kvm_page_track_notifier_head track_notifier_head;
1229	/*
1230	 * Protects marking pages unsync during page faults, as TDP MMU page
1231	 * faults only take mmu_lock for read.  For simplicity, the unsync
1232	 * pages lock is always taken when marking pages unsync regardless of
1233	 * whether mmu_lock is held for read or write.
1234	 */
1235	spinlock_t mmu_unsync_pages_lock;
1236
1237	struct list_head assigned_dev_head;
1238	struct iommu_domain *iommu_domain;
1239	bool iommu_noncoherent;
1240#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
1241	atomic_t noncoherent_dma_count;
1242#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
1243	atomic_t assigned_device_count;
1244	struct kvm_pic *vpic;
1245	struct kvm_ioapic *vioapic;
1246	struct kvm_pit *vpit;
1247	atomic_t vapics_in_nmi_mode;
1248	struct mutex apic_map_lock;
1249	struct kvm_apic_map __rcu *apic_map;
1250	atomic_t apic_map_dirty;
1251
1252	/* Protects apic_access_memslot_enabled and apicv_inhibit_reasons */
1253	struct rw_semaphore apicv_update_lock;
1254
1255	bool apic_access_memslot_enabled;
1256	unsigned long apicv_inhibit_reasons;
1257
1258	gpa_t wall_clock;
1259
1260	bool mwait_in_guest;
1261	bool hlt_in_guest;
1262	bool pause_in_guest;
1263	bool cstate_in_guest;
1264
1265	unsigned long irq_sources_bitmap;
1266	s64 kvmclock_offset;
1267
1268	/*
1269	 * This also protects nr_vcpus_matched_tsc which is read from a
1270	 * preemption-disabled region, so it must be a raw spinlock.
1271	 */
1272	raw_spinlock_t tsc_write_lock;
1273	u64 last_tsc_nsec;
1274	u64 last_tsc_write;
1275	u32 last_tsc_khz;
1276	u64 last_tsc_offset;
1277	u64 cur_tsc_nsec;
1278	u64 cur_tsc_write;
1279	u64 cur_tsc_offset;
1280	u64 cur_tsc_generation;
1281	int nr_vcpus_matched_tsc;
1282
1283	u32 default_tsc_khz;
1284
1285	seqcount_raw_spinlock_t pvclock_sc;
1286	bool use_master_clock;
1287	u64 master_kernel_ns;
1288	u64 master_cycle_now;
1289	struct delayed_work kvmclock_update_work;
1290	struct delayed_work kvmclock_sync_work;
1291
1292	struct kvm_xen_hvm_config xen_hvm_config;
1293
1294	/* reads protected by irq_srcu, writes by irq_lock */
1295	struct hlist_head mask_notifier_list;
1296
1297	struct kvm_hv hyperv;
1298	struct kvm_xen xen;
1299
1300	bool backwards_tsc_observed;
 
 
 
1301	bool boot_vcpu_runs_old_kvmclock;
1302	u32 bsp_vcpu_id;
1303
1304	u64 disabled_quirks;
1305	int cpu_dirty_logging_count;
1306
1307	enum kvm_irqchip_mode irqchip_mode;
1308	u8 nr_reserved_ioapic_pins;
1309
1310	bool disabled_lapic_found;
1311
 
 
 
 
 
 
 
1312	bool x2apic_format;
1313	bool x2apic_broadcast_quirk_disabled;
1314
1315	bool guest_can_read_msr_platform_info;
1316	bool exception_payload_enabled;
1317
1318	bool triple_fault_event;
1319
1320	bool bus_lock_detection_enabled;
1321	bool enable_pmu;
1322
1323	u32 notify_window;
1324	u32 notify_vmexit_flags;
1325	/*
1326	 * If exit_on_emulation_error is set, and the in-kernel instruction
1327	 * emulator fails to emulate an instruction, allow userspace
1328	 * the opportunity to look at it.
1329	 */
1330	bool exit_on_emulation_error;
1331
1332	/* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
1333	u32 user_space_msr_mask;
1334	struct kvm_x86_msr_filter __rcu *msr_filter;
1335
1336	u32 hypercall_exit_enabled;
1337
1338	/* Guest can access the SGX PROVISIONKEY. */
1339	bool sgx_provisioning_allowed;
1340
1341	struct kvm_pmu_event_filter __rcu *pmu_event_filter;
1342	struct task_struct *nx_huge_page_recovery_thread;
1343
1344#ifdef CONFIG_X86_64
1345	/*
1346	 * Whether the TDP MMU is enabled for this VM. This contains a
1347	 * snapshot of the TDP MMU module parameter from when the VM was
1348	 * created and remains unchanged for the life of the VM. If this is
1349	 * true, TDP MMU handler functions will run for various MMU
1350	 * operations.
1351	 */
1352	bool tdp_mmu_enabled;
1353
1354	/* The number of TDP MMU pages across all roots. */
1355	atomic64_t tdp_mmu_pages;
1356
1357	/*
1358	 * List of kvm_mmu_page structs being used as roots.
1359	 * All kvm_mmu_page structs in the list should have
1360	 * tdp_mmu_page set.
1361	 *
1362	 * For reads, this list is protected by:
1363	 *	the MMU lock in read mode + RCU or
1364	 *	the MMU lock in write mode
1365	 *
1366	 * For writes, this list is protected by:
1367	 *	the MMU lock in read mode + the tdp_mmu_pages_lock or
1368	 *	the MMU lock in write mode
1369	 *
1370	 * Roots will remain in the list until their tdp_mmu_root_count
1371	 * drops to zero, at which point the thread that decremented the
1372	 * count to zero should removed the root from the list and clean
1373	 * it up, freeing the root after an RCU grace period.
1374	 */
1375	struct list_head tdp_mmu_roots;
1376
1377	/*
1378	 * Protects accesses to the following fields when the MMU lock
1379	 * is held in read mode:
1380	 *  - tdp_mmu_roots (above)
1381	 *  - the link field of kvm_mmu_page structs used by the TDP MMU
1382	 *  - possible_nx_huge_pages;
1383	 *  - the possible_nx_huge_page_link field of kvm_mmu_page structs used
1384	 *    by the TDP MMU
1385	 * It is acceptable, but not necessary, to acquire this lock when
1386	 * the thread holds the MMU lock in write mode.
1387	 */
1388	spinlock_t tdp_mmu_pages_lock;
1389	struct workqueue_struct *tdp_mmu_zap_wq;
1390#endif /* CONFIG_X86_64 */
1391
1392	/*
1393	 * If set, at least one shadow root has been allocated. This flag
1394	 * is used as one input when determining whether certain memslot
1395	 * related allocations are necessary.
1396	 */
1397	bool shadow_root_allocated;
1398
1399#if IS_ENABLED(CONFIG_HYPERV)
1400	hpa_t	hv_root_tdp;
1401	spinlock_t hv_root_tdp_lock;
1402#endif
1403	/*
1404	 * VM-scope maximum vCPU ID. Used to determine the size of structures
1405	 * that increase along with the maximum vCPU ID, in which case, using
1406	 * the global KVM_MAX_VCPU_IDS may lead to significant memory waste.
1407	 */
1408	u32 max_vcpu_ids;
1409
1410	bool disable_nx_huge_pages;
1411
1412	/*
1413	 * Memory caches used to allocate shadow pages when performing eager
1414	 * page splitting. No need for a shadowed_info_cache since eager page
1415	 * splitting only allocates direct shadow pages.
1416	 *
1417	 * Protected by kvm->slots_lock.
1418	 */
1419	struct kvm_mmu_memory_cache split_shadow_page_cache;
1420	struct kvm_mmu_memory_cache split_page_header_cache;
1421
1422	/*
1423	 * Memory cache used to allocate pte_list_desc structs while splitting
1424	 * huge pages. In the worst case, to split one huge page, 512
1425	 * pte_list_desc structs are needed to add each lower level leaf sptep
1426	 * to the rmap plus 1 to extend the parent_ptes rmap of the lower level
1427	 * page table.
1428	 *
1429	 * Protected by kvm->slots_lock.
1430	 */
1431#define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
1432	struct kvm_mmu_memory_cache split_desc_cache;
1433};
1434
1435struct kvm_vm_stat {
1436	struct kvm_vm_stat_generic generic;
1437	u64 mmu_shadow_zapped;
1438	u64 mmu_pte_write;
1439	u64 mmu_pde_zapped;
1440	u64 mmu_flooded;
1441	u64 mmu_recycled;
1442	u64 mmu_cache_miss;
1443	u64 mmu_unsync;
1444	union {
1445		struct {
1446			atomic64_t pages_4k;
1447			atomic64_t pages_2m;
1448			atomic64_t pages_1g;
1449		};
1450		atomic64_t pages[KVM_NR_PAGE_SIZES];
1451	};
1452	u64 nx_lpage_splits;
1453	u64 max_mmu_page_hash_collisions;
1454	u64 max_mmu_rmap_size;
1455};
1456
1457struct kvm_vcpu_stat {
1458	struct kvm_vcpu_stat_generic generic;
1459	u64 pf_taken;
1460	u64 pf_fixed;
1461	u64 pf_emulate;
1462	u64 pf_spurious;
1463	u64 pf_fast;
1464	u64 pf_mmio_spte_created;
1465	u64 pf_guest;
1466	u64 tlb_flush;
1467	u64 invlpg;
1468
1469	u64 exits;
1470	u64 io_exits;
1471	u64 mmio_exits;
1472	u64 signal_exits;
1473	u64 irq_window_exits;
1474	u64 nmi_window_exits;
1475	u64 l1d_flush;
1476	u64 halt_exits;
 
 
 
 
1477	u64 request_irq_exits;
1478	u64 irq_exits;
1479	u64 host_state_reload;
 
1480	u64 fpu_reload;
1481	u64 insn_emulation;
1482	u64 insn_emulation_fail;
1483	u64 hypercalls;
1484	u64 irq_injections;
1485	u64 nmi_injections;
1486	u64 req_event;
1487	u64 nested_run;
1488	u64 directed_yield_attempted;
1489	u64 directed_yield_successful;
1490	u64 preemption_reported;
1491	u64 preemption_other;
1492	u64 guest_mode;
1493	u64 notify_window_exits;
1494};
1495
1496struct x86_instruction_info;
1497
1498struct msr_data {
1499	bool host_initiated;
1500	u32 index;
1501	u64 data;
1502};
1503
1504struct kvm_lapic_irq {
1505	u32 vector;
1506	u16 delivery_mode;
1507	u16 dest_mode;
1508	bool level;
1509	u16 trig_mode;
1510	u32 shorthand;
1511	u32 dest_id;
1512	bool msi_redir_hint;
1513};
1514
1515static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
1516{
1517	return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
1518}
1519
1520struct kvm_x86_ops {
1521	const char *name;
1522
1523	int (*hardware_enable)(void);
1524	void (*hardware_disable)(void);
1525	void (*hardware_unsetup)(void);
1526	bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
1527	void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
 
 
 
1528
1529	unsigned int vm_size;
1530	int (*vm_init)(struct kvm *kvm);
1531	void (*vm_destroy)(struct kvm *kvm);
1532
1533	/* Create, but do not attach this VCPU */
1534	int (*vcpu_precreate)(struct kvm *kvm);
1535	int (*vcpu_create)(struct kvm_vcpu *vcpu);
1536	void (*vcpu_free)(struct kvm_vcpu *vcpu);
1537	void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
1538
1539	void (*prepare_switch_to_guest)(struct kvm_vcpu *vcpu);
1540	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
1541	void (*vcpu_put)(struct kvm_vcpu *vcpu);
1542
1543	void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
1544	int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
1545	int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
1546	u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
1547	void (*get_segment)(struct kvm_vcpu *vcpu,
1548			    struct kvm_segment *var, int seg);
1549	int (*get_cpl)(struct kvm_vcpu *vcpu);
1550	void (*set_segment)(struct kvm_vcpu *vcpu,
1551			    struct kvm_segment *var, int seg);
1552	void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
 
 
 
1553	void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
1554	void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
1555	bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
1556	void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
1557	int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
1558	void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
1559	void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
1560	void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
1561	void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
 
 
1562	void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
1563	void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
1564	void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
1565	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
1566	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
1567	bool (*get_if_flag)(struct kvm_vcpu *vcpu);
1568
1569	void (*flush_tlb_all)(struct kvm_vcpu *vcpu);
1570	void (*flush_tlb_current)(struct kvm_vcpu *vcpu);
1571	int  (*tlb_remote_flush)(struct kvm *kvm);
1572	int  (*tlb_remote_flush_with_range)(struct kvm *kvm,
1573			struct kvm_tlb_range *range);
1574
1575	/*
1576	 * Flush any TLB entries associated with the given GVA.
1577	 * Does not need to flush GPA->HPA mappings.
1578	 * Can potentially get non-canonical addresses through INVLPGs, which
1579	 * the implementation may choose to ignore if appropriate.
1580	 */
1581	void (*flush_tlb_gva)(struct kvm_vcpu *vcpu, gva_t addr);
1582
1583	/*
1584	 * Flush any TLB entries created by the guest.  Like tlb_flush_gva(),
1585	 * does not need to flush GPA->HPA mappings.
1586	 */
1587	void (*flush_tlb_guest)(struct kvm_vcpu *vcpu);
1588
1589	int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
1590	enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu);
1591	int (*handle_exit)(struct kvm_vcpu *vcpu,
1592		enum exit_fastpath_completion exit_fastpath);
1593	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
1594	void (*update_emulated_instruction)(struct kvm_vcpu *vcpu);
1595	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
1596	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
1597	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
1598				unsigned char *hypercall_addr);
1599	void (*inject_irq)(struct kvm_vcpu *vcpu, bool reinjected);
1600	void (*inject_nmi)(struct kvm_vcpu *vcpu);
1601	void (*inject_exception)(struct kvm_vcpu *vcpu);
 
 
1602	void (*cancel_injection)(struct kvm_vcpu *vcpu);
1603	int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
1604	int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
1605	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
1606	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
1607	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
1608	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
1609	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
1610	bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
1611	void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
1612	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
1613	void (*hwapic_isr_update)(int isr);
1614	bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
1615	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
1616	void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
1617	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
1618	void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode,
1619				  int trig_mode, int vector);
1620	int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
1621	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
1622	int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
1623	u8 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 
 
 
1624
1625	void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
1626			     int root_level);
 
1627
1628	bool (*has_wbinvd_exit)(void);
1629
1630	u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
1631	u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
1632	void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
1633	void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
1634
1635	/*
1636	 * Retrieve somewhat arbitrary exit information.  Intended to
1637	 * be used only from within tracepoints or error paths.
1638	 */
1639	void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
1640			      u64 *info1, u64 *info2,
1641			      u32 *exit_int_info, u32 *exit_int_info_err_code);
1642
1643	int (*check_intercept)(struct kvm_vcpu *vcpu,
1644			       struct x86_instruction_info *info,
1645			       enum x86_intercept_stage stage,
1646			       struct x86_exception *exception);
1647	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
 
1648
1649	void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
1650
1651	void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
1652
1653	/*
1654	 * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer.  A zero
1655	 * value indicates CPU dirty logging is unsupported or disabled.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1656	 */
1657	int cpu_dirty_log_size;
1658	void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu);
1659
1660	const struct kvm_x86_nested_ops *nested_ops;
1661
1662	void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
1663	void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
1664
1665	int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq,
1666			      uint32_t guest_irq, bool set);
1667	void (*pi_start_assignment)(struct kvm *kvm);
1668	void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
1669	bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
1670
1671	int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
1672			    bool *expired);
1673	void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
1674
1675	void (*setup_mce)(struct kvm_vcpu *vcpu);
1676
1677#ifdef CONFIG_KVM_SMM
1678	int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
1679	int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram);
1680	int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram);
1681	void (*enable_smi_window)(struct kvm_vcpu *vcpu);
1682#endif
1683
1684	int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
1685	int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
1686	int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
1687	int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
1688	int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
1689	void (*guest_memory_reclaimed)(struct kvm *kvm);
1690
1691	int (*get_msr_feature)(struct kvm_msr_entry *entry);
1692
1693	bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
1694					void *insn, int insn_len);
1695
1696	bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
1697	int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu);
1698
1699	void (*migrate_timers)(struct kvm_vcpu *vcpu);
1700	void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
1701	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
1702
1703	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
1704
1705	/*
1706	 * Returns vCPU specific APICv inhibit reasons
1707	 */
1708	unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
1709};
1710
1711struct kvm_x86_nested_ops {
1712	void (*leave_nested)(struct kvm_vcpu *vcpu);
1713	bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector,
1714				    u32 error_code);
1715	int (*check_events)(struct kvm_vcpu *vcpu);
1716	bool (*has_events)(struct kvm_vcpu *vcpu);
1717	void (*triple_fault)(struct kvm_vcpu *vcpu);
1718	int (*get_state)(struct kvm_vcpu *vcpu,
1719			 struct kvm_nested_state __user *user_kvm_nested_state,
1720			 unsigned user_data_size);
1721	int (*set_state)(struct kvm_vcpu *vcpu,
1722			 struct kvm_nested_state __user *user_kvm_nested_state,
1723			 struct kvm_nested_state *kvm_state);
1724	bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu);
1725	int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
1726
1727	int (*enable_evmcs)(struct kvm_vcpu *vcpu,
1728			    uint16_t *vmcs_version);
1729	uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu);
1730	void (*hv_inject_synthetic_vmexit_post_tlb_flush)(struct kvm_vcpu *vcpu);
1731};
1732
1733struct kvm_x86_init_ops {
1734	int (*cpu_has_kvm_support)(void);
1735	int (*disabled_by_bios)(void);
1736	int (*check_processor_compatibility)(void);
1737	int (*hardware_setup)(void);
1738	unsigned int (*handle_intel_pt_intr)(void);
1739
1740	struct kvm_x86_ops *runtime_ops;
1741	struct kvm_pmu_ops *pmu_ops;
1742};
1743
1744struct kvm_arch_async_pf {
1745	u32 token;
1746	gfn_t gfn;
1747	unsigned long cr3;
1748	bool direct_map;
1749};
1750
1751extern u32 __read_mostly kvm_nr_uret_msrs;
1752extern u64 __read_mostly host_efer;
1753extern bool __read_mostly allow_smaller_maxphyaddr;
1754extern bool __read_mostly enable_apicv;
1755extern struct kvm_x86_ops kvm_x86_ops;
1756
1757#define KVM_X86_OP(func) \
1758	DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func));
1759#define KVM_X86_OP_OPTIONAL KVM_X86_OP
1760#define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP
1761#include <asm/kvm-x86-ops.h>
1762
1763#define __KVM_HAVE_ARCH_VM_ALLOC
1764static inline struct kvm *kvm_arch_alloc_vm(void)
1765{
1766	return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
1767}
1768
1769#define __KVM_HAVE_ARCH_VM_FREE
1770void kvm_arch_free_vm(struct kvm *kvm);
1771
1772#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
1773static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
1774{
1775	if (kvm_x86_ops.tlb_remote_flush &&
1776	    !static_call(kvm_x86_tlb_remote_flush)(kvm))
1777		return 0;
1778	else
1779		return -ENOTSUPP;
1780}
1781
1782#define kvm_arch_pmi_in_guest(vcpu) \
1783	((vcpu) && (vcpu)->arch.handling_intr_from_guest)
1784
1785void __init kvm_mmu_x86_module_init(void);
1786int kvm_mmu_vendor_module_init(void);
1787void kvm_mmu_vendor_module_exit(void);
1788
1789void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
1790int kvm_mmu_create(struct kvm_vcpu *vcpu);
1791int kvm_mmu_init_vm(struct kvm *kvm);
 
1792void kvm_mmu_uninit_vm(struct kvm *kvm);
 
 
1793
1794void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
1795void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
1796void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
1797				      const struct kvm_memory_slot *memslot,
1798				      int start_level);
1799void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm,
1800				       const struct kvm_memory_slot *memslot,
1801				       int target_level);
1802void kvm_mmu_try_split_huge_pages(struct kvm *kvm,
1803				  const struct kvm_memory_slot *memslot,
1804				  u64 start, u64 end,
1805				  int target_level);
1806void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
1807				   const struct kvm_memory_slot *memslot);
1808void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
1809				   const struct kvm_memory_slot *memslot);
 
 
 
 
 
 
 
1810void kvm_mmu_zap_all(struct kvm *kvm);
1811void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
1812void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
 
1813
1814int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
 
1815
1816int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1817			  const void *val, int bytes);
1818
1819struct kvm_irq_mask_notifier {
1820	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
1821	int irq;
1822	struct hlist_node link;
1823};
1824
1825void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
1826				    struct kvm_irq_mask_notifier *kimn);
1827void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
1828				      struct kvm_irq_mask_notifier *kimn);
1829void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
1830			     bool mask);
1831
1832extern bool tdp_enabled;
1833
1834u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
1835
1836/*
1837 * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
1838 *			userspace I/O) to indicate that the emulation context
1839 *			should be reused as is, i.e. skip initialization of
1840 *			emulation context, instruction fetch and decode.
1841 *
1842 * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
1843 *		      Indicates that only select instructions (tagged with
1844 *		      EmulateOnUD) should be emulated (to minimize the emulator
1845 *		      attack surface).  See also EMULTYPE_TRAP_UD_FORCED.
1846 *
1847 * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
1848 *		   decode the instruction length.  For use *only* by
1849 *		   kvm_x86_ops.skip_emulated_instruction() implementations if
1850 *		   EMULTYPE_COMPLETE_USER_EXIT is not set.
1851 *
1852 * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to
1853 *			     retry native execution under certain conditions,
1854 *			     Can only be set in conjunction with EMULTYPE_PF.
1855 *
1856 * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
1857 *			     triggered by KVM's magic "force emulation" prefix,
1858 *			     which is opt in via module param (off by default).
1859 *			     Bypasses EmulateOnUD restriction despite emulating
1860 *			     due to an intercepted #UD (see EMULTYPE_TRAP_UD).
1861 *			     Used to test the full emulator from userspace.
1862 *
1863 * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
1864 *			backdoor emulation, which is opt in via module param.
1865 *			VMware backdoor emulation handles select instructions
1866 *			and reinjects the #GP for all other cases.
1867 *
1868 * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
1869 *		 case the CR2/GPA value pass on the stack is valid.
1870 *
1871 * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
1872 *				 state and inject single-step #DBs after skipping
1873 *				 an instruction (after completing userspace I/O).
1874 */
1875#define EMULTYPE_NO_DECODE	    (1 << 0)
1876#define EMULTYPE_TRAP_UD	    (1 << 1)
1877#define EMULTYPE_SKIP		    (1 << 2)
1878#define EMULTYPE_ALLOW_RETRY_PF	    (1 << 3)
1879#define EMULTYPE_TRAP_UD_FORCED	    (1 << 4)
1880#define EMULTYPE_VMWARE_GP	    (1 << 5)
1881#define EMULTYPE_PF		    (1 << 6)
1882#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
1883
1884int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
1885int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
1886					void *insn, int insn_len);
1887void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu,
1888					  u64 *data, u8 ndata);
1889void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);
1890
1891void kvm_enable_efer_bits(u64);
1892bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
1893int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
1894int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
1895int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
1896int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
1897int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
1898int kvm_emulate_as_nop(struct kvm_vcpu *vcpu);
1899int kvm_emulate_invd(struct kvm_vcpu *vcpu);
1900int kvm_emulate_mwait(struct kvm_vcpu *vcpu);
1901int kvm_handle_invalid_op(struct kvm_vcpu *vcpu);
1902int kvm_emulate_monitor(struct kvm_vcpu *vcpu);
1903
1904int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
 
1905int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
1906int kvm_emulate_halt(struct kvm_vcpu *vcpu);
1907int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu);
1908int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
1909int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
1910
1911void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
1912void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
1913int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
1914void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
1915
1916int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
1917		    int reason, bool has_error_code, u32 error_code);
1918
1919void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0);
1920void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4);
1921int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
1922int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
1923int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
1924int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
1925int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
1926void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
1927unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
1928void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
1929int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);
 
1930
1931int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
1932int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
1933
1934unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
1935void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
1936int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
1937
1938void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
1939void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
1940void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
1941void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
1942void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
1943void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
1944void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
1945				    struct x86_exception *fault);
 
1946bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
1947bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
1948
1949static inline int __kvm_irq_line_state(unsigned long *irq_state,
1950				       int irq_source_id, int level)
1951{
1952	/* Logical OR for level trig interrupt */
1953	if (level)
1954		__set_bit(irq_source_id, irq_state);
1955	else
1956		__clear_bit(irq_source_id, irq_state);
1957
1958	return !!(*irq_state);
1959}
1960
1961#define KVM_MMU_ROOT_CURRENT		BIT(0)
1962#define KVM_MMU_ROOT_PREVIOUS(i)	BIT(1+i)
1963#define KVM_MMU_ROOTS_ALL		(~0UL)
1964
1965int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
1966void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
1967
1968void kvm_inject_nmi(struct kvm_vcpu *vcpu);
1969
1970void kvm_update_dr7(struct kvm_vcpu *vcpu);
1971
1972int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
1973void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
1974			ulong roots_to_free);
1975void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu);
 
 
 
 
1976gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
1977			      struct x86_exception *exception);
 
 
1978gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
1979			       struct x86_exception *exception);
1980gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
1981				struct x86_exception *exception);
1982
1983bool kvm_apicv_activated(struct kvm *kvm);
1984bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu);
1985void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
1986void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
1987				      enum kvm_apicv_inhibit reason, bool set);
1988void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
1989				    enum kvm_apicv_inhibit reason, bool set);
1990
1991static inline void kvm_set_apicv_inhibit(struct kvm *kvm,
1992					 enum kvm_apicv_inhibit reason)
 
 
 
 
 
 
 
 
1993{
1994	kvm_set_or_clear_apicv_inhibit(kvm, reason, true);
1995}
1996
1997static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
1998					   enum kvm_apicv_inhibit reason)
1999{
2000	kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
 
 
2001}
2002
2003int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
2004
2005int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
2006		       void *insn, int insn_len);
2007void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
2008void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
2009			    gva_t gva, hpa_t root_hpa);
2010void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
2011void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
2012
2013void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
2014		       int tdp_max_root_level, int tdp_huge_page_level);
2015
2016static inline u16 kvm_read_ldt(void)
2017{
2018	u16 ldt;
2019	asm("sldt %0" : "=g"(ldt));
2020	return ldt;
2021}
2022
2023static inline void kvm_load_ldt(u16 sel)
2024{
2025	asm("lldt %0" : : "rm"(sel));
2026}
2027
2028#ifdef CONFIG_X86_64
2029static inline unsigned long read_msr(unsigned long msr)
2030{
2031	u64 value;
2032
2033	rdmsrl(msr, value);
2034	return value;
2035}
2036#endif
2037
 
 
 
 
 
2038static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
2039{
2040	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
2041}
2042
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2043#define TSS_IOPB_BASE_OFFSET 0x66
2044#define TSS_BASE_SIZE 0x68
2045#define TSS_IOPB_SIZE (65536 / 8)
2046#define TSS_REDIRECTION_SIZE (256 / 8)
2047#define RMODE_TSS_SIZE							\
2048	(TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
2049
2050enum {
2051	TASK_SWITCH_CALL = 0,
2052	TASK_SWITCH_IRET = 1,
2053	TASK_SWITCH_JMP = 2,
2054	TASK_SWITCH_GATE = 3,
2055};
2056
2057#define HF_GIF_MASK		(1 << 0)
 
 
2058#define HF_NMI_MASK		(1 << 3)
2059#define HF_IRET_MASK		(1 << 4)
2060#define HF_GUEST_MASK		(1 << 5) /* VCPU is in guest-mode */
2061
2062#ifdef CONFIG_KVM_SMM
2063#define HF_SMM_MASK		(1 << 6)
2064#define HF_SMM_INSIDE_NMI_MASK	(1 << 7)
2065
2066# define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
2067# define KVM_ADDRESS_SPACE_NUM 2
2068# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
2069# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
2070#else
2071# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0)
2072#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2073
2074#define KVM_ARCH_WANT_MMU_NOTIFIER
2075
 
 
 
 
2076int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
2077int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
2078int kvm_cpu_has_extint(struct kvm_vcpu *v);
2079int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
2080int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
2081void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
 
 
 
2082
2083int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
2084		    unsigned long ipi_bitmap_high, u32 min,
2085		    unsigned long icr, int op_64_bit);
2086
2087int kvm_add_user_return_msr(u32 msr);
2088int kvm_find_user_return_msr(u32 msr);
2089int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
2090
2091static inline bool kvm_is_supported_user_return_msr(u32 msr)
2092{
2093	return kvm_find_user_return_msr(msr) >= 0;
2094}
2095
2096u64 kvm_scale_tsc(u64 tsc, u64 ratio);
2097u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
2098u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier);
2099u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier);
2100
2101unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
2102bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
2103
 
2104void kvm_make_scan_ioapic_request(struct kvm *kvm);
2105void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
2106				       unsigned long *vcpu_bitmap);
2107
2108bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2109				     struct kvm_async_pf *work);
2110void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2111				 struct kvm_async_pf *work);
2112void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2113			       struct kvm_async_pf *work);
2114void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu);
2115bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
2116extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
2117
2118int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
2119int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
2120void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
2121
2122void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
2123				     u32 size);
 
 
2124bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
2125bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
2126
2127bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
2128			     struct kvm_vcpu **dest_vcpu);
2129
2130void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
2131		     struct kvm_lapic_irq *irq);
2132
2133static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
2134{
2135	/* We can only post Fixed and LowPrio IRQs */
2136	return (irq->delivery_mode == APIC_DM_FIXED ||
2137		irq->delivery_mode == APIC_DM_LOWEST);
2138}
2139
2140static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
2141{
2142	static_call_cond(kvm_x86_vcpu_blocking)(vcpu);
 
2143}
2144
2145static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
2146{
2147	static_call_cond(kvm_x86_vcpu_unblocking)(vcpu);
 
2148}
2149
 
 
2150static inline int kvm_cpu_get_apicid(int mps_cpu)
2151{
2152#ifdef CONFIG_X86_LOCAL_APIC
2153	return default_cpu_present_to_apicid(mps_cpu);
2154#else
2155	WARN_ON_ONCE(1);
2156	return BAD_APICID;
2157#endif
2158}
2159
2160int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
2161
2162#define KVM_CLOCK_VALID_FLAGS						\
2163	(KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
2164
2165#define KVM_X86_VALID_QUIRKS			\
2166	(KVM_X86_QUIRK_LINT0_REENABLED |	\
2167	 KVM_X86_QUIRK_CD_NW_CLEARED |		\
2168	 KVM_X86_QUIRK_LAPIC_MMIO_HOLE |	\
2169	 KVM_X86_QUIRK_OUT_7E_INC_RIP |		\
2170	 KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT |	\
2171	 KVM_X86_QUIRK_FIX_HYPERCALL_INSN |	\
2172	 KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS)
2173
2174#endif /* _ASM_X86_KVM_HOST_H */