Linux Audio

Check our new training course

Loading...
v6.13.7
  1/* SPDX-License-Identifier: GPL-2.0 */
  2#ifndef __KVM_X86_VMX_H
  3#define __KVM_X86_VMX_H
  4
  5#include <linux/kvm_host.h>
  6
  7#include <asm/kvm.h>
  8#include <asm/intel_pt.h>
  9#include <asm/perf_event.h>
 10#include <asm/posted_intr.h>
 11
 12#include "capabilities.h"
 13#include "../kvm_cache_regs.h"
 
 14#include "vmcs.h"
 15#include "vmx_ops.h"
 16#include "../cpuid.h"
 17#include "run_flags.h"
 18#include "../mmu.h"
 
 
 
 19
 20#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
 21
 22#ifdef CONFIG_X86_64
 23#define MAX_NR_USER_RETURN_MSRS	7
 24#else
 25#define MAX_NR_USER_RETURN_MSRS	4
 26#endif
 27
 28#define MAX_NR_LOADSTORE_MSRS	8
 29
 30struct vmx_msrs {
 31	unsigned int		nr;
 32	struct vmx_msr_entry	val[MAX_NR_LOADSTORE_MSRS];
 33};
 34
 35struct vmx_uret_msr {
 36	bool load_into_hardware;
 37	u64 data;
 38	u64 mask;
 39};
 40
 41enum segment_cache_field {
 42	SEG_FIELD_SEL = 0,
 43	SEG_FIELD_BASE = 1,
 44	SEG_FIELD_LIMIT = 2,
 45	SEG_FIELD_AR = 3,
 46
 47	SEG_FIELD_NR = 4
 48};
 49
 50#define RTIT_ADDR_RANGE		4
 51
 52struct pt_ctx {
 53	u64 ctl;
 54	u64 status;
 55	u64 output_base;
 56	u64 output_mask;
 57	u64 cr3_match;
 58	u64 addr_a[RTIT_ADDR_RANGE];
 59	u64 addr_b[RTIT_ADDR_RANGE];
 60};
 61
 62struct pt_desc {
 63	u64 ctl_bitmask;
 64	u32 num_address_ranges;
 65	u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
 66	struct pt_ctx host;
 67	struct pt_ctx guest;
 68};
 69
 70union vmx_exit_reason {
 71	struct {
 72		u32	basic			: 16;
 73		u32	reserved16		: 1;
 74		u32	reserved17		: 1;
 75		u32	reserved18		: 1;
 76		u32	reserved19		: 1;
 77		u32	reserved20		: 1;
 78		u32	reserved21		: 1;
 79		u32	reserved22		: 1;
 80		u32	reserved23		: 1;
 81		u32	reserved24		: 1;
 82		u32	reserved25		: 1;
 83		u32	bus_lock_detected	: 1;
 84		u32	enclave_mode		: 1;
 85		u32	smi_pending_mtf		: 1;
 86		u32	smi_from_vmx_root	: 1;
 87		u32	reserved30		: 1;
 88		u32	failed_vmentry		: 1;
 89	};
 90	u32 full;
 91};
 92
 93struct lbr_desc {
 94	/* Basic info about guest LBR records. */
 95	struct x86_pmu_lbr records;
 96
 97	/*
 98	 * Emulate LBR feature via passthrough LBR registers when the
 99	 * per-vcpu guest LBR event is scheduled on the current pcpu.
100	 *
101	 * The records may be inaccurate if the host reclaims the LBR.
102	 */
103	struct perf_event *event;
104
105	/* True if LBRs are marked as not intercepted in the MSR bitmap */
106	bool msr_passthrough;
107};
108
109extern struct x86_pmu_lbr vmx_lbr_caps;
110
111/*
112 * The nested_vmx structure is part of vcpu_vmx, and holds information we need
113 * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
114 */
115struct nested_vmx {
116	/* Has the level1 guest done vmxon? */
117	bool vmxon;
118	gpa_t vmxon_ptr;
119	bool pml_full;
120
121	/* The guest-physical address of the current VMCS L1 keeps for L2 */
122	gpa_t current_vmptr;
123	/*
124	 * Cache of the guest's VMCS, existing outside of guest memory.
125	 * Loaded from guest memory during VMPTRLD. Flushed to guest
126	 * memory during VMCLEAR and VMPTRLD.
127	 */
128	struct vmcs12 *cached_vmcs12;
129	/*
130	 * Cache of the guest's shadow VMCS, existing outside of guest
131	 * memory. Loaded from guest memory during VM entry. Flushed
132	 * to guest memory during VM exit.
133	 */
134	struct vmcs12 *cached_shadow_vmcs12;
135
136	/*
137	 * GPA to HVA cache for accessing vmcs12->vmcs_link_pointer
138	 */
139	struct gfn_to_hva_cache shadow_vmcs12_cache;
140
141	/*
142	 * GPA to HVA cache for VMCS12
143	 */
144	struct gfn_to_hva_cache vmcs12_cache;
145
146	/*
147	 * Indicates if the shadow vmcs or enlightened vmcs must be updated
148	 * with the data held by struct vmcs12.
149	 */
150	bool need_vmcs12_to_shadow_sync;
151	bool dirty_vmcs12;
152
153	/*
154	 * Indicates whether MSR bitmap for L2 needs to be rebuilt due to
155	 * changes in MSR bitmap for L1 or switching to a different L2. Note,
156	 * this flag can only be used reliably in conjunction with a paravirt L1
157	 * which informs L0 whether any changes to MSR bitmap for L2 were done
158	 * on its side.
159	 */
160	bool force_msr_bitmap_recalc;
161
162	/*
163	 * Indicates lazily loaded guest state has not yet been decached from
164	 * vmcs02.
165	 */
166	bool need_sync_vmcs02_to_vmcs12_rare;
167
168	/*
169	 * vmcs02 has been initialized, i.e. state that is constant for
170	 * vmcs02 has been written to the backing VMCS.  Initialization
171	 * is delayed until L1 actually attempts to run a nested VM.
172	 */
173	bool vmcs02_initialized;
174
175	bool change_vmcs01_virtual_apic_mode;
176	bool reload_vmcs01_apic_access_page;
177	bool update_vmcs01_cpu_dirty_logging;
178	bool update_vmcs01_apicv_status;
179	bool update_vmcs01_hwapic_isr;
180
181	/*
182	 * Enlightened VMCS has been enabled. It does not mean that L1 has to
183	 * use it. However, VMX features available to L1 will be limited based
184	 * on what the enlightened VMCS supports.
185	 */
186	bool enlightened_vmcs_enabled;
187
188	/* L2 must run next, and mustn't decide to exit to L1. */
189	bool nested_run_pending;
190
191	/* Pending MTF VM-exit into L1.  */
192	bool mtf_pending;
193
194	struct loaded_vmcs vmcs02;
195
196	/*
197	 * Guest pages referred to in the vmcs02 with host-physical
198	 * pointers, so we must keep them pinned while L2 runs.
199	 */
200	struct kvm_host_map apic_access_page_map;
201	struct kvm_host_map virtual_apic_map;
202	struct kvm_host_map pi_desc_map;
203
 
 
204	struct pi_desc *pi_desc;
205	bool pi_pending;
206	u16 posted_intr_nv;
207
208	struct hrtimer preemption_timer;
209	u64 preemption_timer_deadline;
210	bool has_preemption_timer_deadline;
211	bool preemption_timer_expired;
212
213	/*
214	 * Used to snapshot MSRs that are conditionally loaded on VM-Enter in
215	 * order to propagate the guest's pre-VM-Enter value into vmcs02.  For
216	 * emulation of VMLAUNCH/VMRESUME, the snapshot will be of L1's value.
217	 * For KVM_SET_NESTED_STATE, the snapshot is of L2's value, _if_
218	 * userspace restores MSRs before nested state.  If userspace restores
219	 * MSRs after nested state, the snapshot holds garbage, but KVM can't
220	 * detect that, and the garbage value in vmcs02 will be overwritten by
221	 * MSR restoration in any case.
222	 */
223	u64 pre_vmenter_debugctl;
224	u64 pre_vmenter_bndcfgs;
225
226	/* to migrate it to L1 if L2 writes to L1's CR8 directly */
227	int l1_tpr_threshold;
228
229	u16 vpid02;
230	u16 last_vpid;
231
232	struct nested_vmx_msrs msrs;
233
234	/* SMM related state */
235	struct {
236		/* in VMX operation on SMM entry? */
237		bool vmxon;
238		/* in guest mode on SMM entry? */
239		bool guest_mode;
240	} smm;
241
242#ifdef CONFIG_KVM_HYPERV
243	gpa_t hv_evmcs_vmptr;
244	struct kvm_host_map hv_evmcs_map;
245	struct hv_enlightened_vmcs *hv_evmcs;
246#endif
247};
248
249struct vcpu_vmx {
250	struct kvm_vcpu       vcpu;
251	u8                    fail;
252	u8		      x2apic_msr_bitmap_mode;
253
254	/*
255	 * If true, host state has been stored in vmx->loaded_vmcs for
256	 * the CPU registers that only need to be switched when transitioning
257	 * to/from the kernel, and the registers have been loaded with guest
258	 * values.  If false, host state is loaded in the CPU registers
259	 * and vmx->loaded_vmcs->host_state is invalid.
260	 */
261	bool		      guest_state_loaded;
262
263	unsigned long         exit_qualification;
264	u32                   exit_intr_info;
265	u32                   idt_vectoring_info;
266	ulong                 rflags;
267
268	/*
269	 * User return MSRs are always emulated when enabled in the guest, but
270	 * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
271	 * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
272	 * be loaded into hardware if those conditions aren't met.
273	 */
274	struct vmx_uret_msr   guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
275	bool                  guest_uret_msrs_loaded;
276#ifdef CONFIG_X86_64
277	u64		      msr_host_kernel_gs_base;
278	u64		      msr_guest_kernel_gs_base;
279#endif
280
281	u64		      spec_ctrl;
282	u32		      msr_ia32_umwait_control;
283
284	/*
285	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
286	 * non-nested (L1) guest, it always points to vmcs01. For a nested
287	 * guest (L2), it points to a different VMCS.
288	 */
289	struct loaded_vmcs    vmcs01;
290	struct loaded_vmcs   *loaded_vmcs;
291
292	struct msr_autoload {
293		struct vmx_msrs guest;
294		struct vmx_msrs host;
295	} msr_autoload;
296
297	struct msr_autostore {
298		struct vmx_msrs guest;
299	} msr_autostore;
300
301	struct {
302		int vm86_active;
303		ulong save_rflags;
304		struct kvm_segment segs[8];
305	} rmode;
306	struct {
307		u32 bitmask; /* 4 bits per segment (1 bit per field) */
308		struct kvm_save_segment {
309			u16 selector;
310			unsigned long base;
311			u32 limit;
312			u32 ar;
313		} seg[8];
314	} segment_cache;
315	int vpid;
316	bool emulation_required;
317
318	union vmx_exit_reason exit_reason;
319
320	/* Posted interrupt descriptor */
321	struct pi_desc pi_desc;
322
323	/* Used if this vCPU is waiting for PI notification wakeup. */
324	struct list_head pi_wakeup_list;
325
326	/* Support for a guest hypervisor (nested VMX) */
327	struct nested_vmx nested;
328
329	/* Dynamic PLE window. */
330	unsigned int ple_window;
331	bool ple_window_dirty;
332
 
 
333	/* Support for PML */
334#define PML_ENTITY_NUM		512
335	struct page *pml_pg;
336
337	/* apic deadline value in host tsc */
338	u64 hv_deadline_tsc;
339
 
 
340	/*
341	 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
342	 * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included
343	 * in msr_ia32_feature_control_valid_bits.
344	 */
345	u64 msr_ia32_feature_control;
346	u64 msr_ia32_feature_control_valid_bits;
347	/* SGX Launch Control public key hash */
348	u64 msr_ia32_sgxlepubkeyhash[4];
349	u64 msr_ia32_mcu_opt_ctrl;
350	bool disable_fb_clear;
351
352	struct pt_desc pt_desc;
353	struct lbr_desc lbr_desc;
354
355	/* Save desired MSR intercept (read: pass-through) state */
356#define MAX_POSSIBLE_PASSTHROUGH_MSRS	16
357	struct {
358		DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
359		DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
360	} shadow_msr_intercept;
361
362	/* ve_info must be page aligned. */
363	struct vmx_ve_information *ve_info;
364};
365
366struct kvm_vmx {
367	struct kvm kvm;
368
369	unsigned int tss_addr;
370	bool ept_identity_pagetable_done;
371	gpa_t ept_identity_map_addr;
372	/* Posted Interrupt Descriptor (PID) table for IPI virtualization */
373	u64 *pid_table;
374};
375
376void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
377			struct loaded_vmcs *buddy);
378int allocate_vpid(void);
379void free_vpid(int vpid);
380void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
381void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
382void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
383			unsigned long fs_base, unsigned long gs_base);
384int vmx_get_cpl(struct kvm_vcpu *vcpu);
385int vmx_get_cpl_no_cache(struct kvm_vcpu *vcpu);
386bool vmx_emulation_required(struct kvm_vcpu *vcpu);
387unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
388void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
389u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu);
390void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask);
391int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer);
392void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
393void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
394void set_cr4_guest_host_mask(struct vcpu_vmx *vmx);
395void ept_save_pdptrs(struct kvm_vcpu *vcpu);
396void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
397void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
398u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
399
400bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu);
401void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
402bool vmx_nmi_blocked(struct kvm_vcpu *vcpu);
403bool __vmx_interrupt_blocked(struct kvm_vcpu *vcpu);
404bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu);
405bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
406void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
407void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
408struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
409void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
410void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
411void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
412unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
413bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
414		    unsigned int flags);
415int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
416void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
417
418void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
419void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
420
421u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
422u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
423
424gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
425
426static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
427					     int type, bool value)
428{
429	if (value)
430		vmx_enable_intercept_for_msr(vcpu, msr, type);
431	else
432		vmx_disable_intercept_for_msr(vcpu, msr, type);
433}
434
435void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
436
437/*
438 * Note, early Intel manuals have the write-low and read-high bitmap offsets
439 * the wrong way round.  The bitmaps control MSRs 0x00000000-0x00001fff and
440 * 0xc0000000-0xc0001fff.  The former (low) uses bytes 0-0x3ff for reads and
441 * 0x800-0xbff for writes.  The latter (high) uses 0x400-0x7ff for reads and
442 * 0xc00-0xfff for writes.  MSRs not covered by either of the ranges always
443 * VM-Exit.
444 */
445#define __BUILD_VMX_MSR_BITMAP_HELPER(rtype, action, bitop, access, base)      \
446static inline rtype vmx_##action##_msr_bitmap_##access(unsigned long *bitmap,  \
447						       u32 msr)		       \
448{									       \
449	int f = sizeof(unsigned long);					       \
450									       \
451	if (msr <= 0x1fff)						       \
452		return bitop##_bit(msr, bitmap + base / f);		       \
453	else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))		       \
454		return bitop##_bit(msr & 0x1fff, bitmap + (base + 0x400) / f); \
455	return (rtype)true;						       \
456}
457#define BUILD_VMX_MSR_BITMAP_HELPERS(ret_type, action, bitop)		       \
458	__BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, read,  0x0)     \
459	__BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 0x800)
460
461BUILD_VMX_MSR_BITMAP_HELPERS(bool, test, test)
462BUILD_VMX_MSR_BITMAP_HELPERS(void, clear, __clear)
463BUILD_VMX_MSR_BITMAP_HELPERS(void, set, __set)
464
465static inline u8 vmx_get_rvi(void)
466{
467	return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
468}
469
470#define __KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS				\
471	(VM_ENTRY_LOAD_DEBUG_CONTROLS)
472#ifdef CONFIG_X86_64
473	#define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS			\
474		(__KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS |			\
475		 VM_ENTRY_IA32E_MODE)
476#else
477	#define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS			\
478		__KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS
479#endif
480#define KVM_OPTIONAL_VMX_VM_ENTRY_CONTROLS				\
481	(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |				\
482	 VM_ENTRY_LOAD_IA32_PAT |					\
483	 VM_ENTRY_LOAD_IA32_EFER |					\
484	 VM_ENTRY_LOAD_BNDCFGS |					\
485	 VM_ENTRY_PT_CONCEAL_PIP |					\
486	 VM_ENTRY_LOAD_IA32_RTIT_CTL)
487
488#define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS				\
489	(VM_EXIT_SAVE_DEBUG_CONTROLS |					\
490	 VM_EXIT_ACK_INTR_ON_EXIT)
491#ifdef CONFIG_X86_64
492	#define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS			\
493		(__KVM_REQUIRED_VMX_VM_EXIT_CONTROLS |			\
494		 VM_EXIT_HOST_ADDR_SPACE_SIZE)
495#else
496	#define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS			\
497		__KVM_REQUIRED_VMX_VM_EXIT_CONTROLS
498#endif
499#define KVM_OPTIONAL_VMX_VM_EXIT_CONTROLS				\
500	      (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |			\
501	       VM_EXIT_SAVE_IA32_PAT |					\
502	       VM_EXIT_LOAD_IA32_PAT |					\
503	       VM_EXIT_SAVE_IA32_EFER |					\
504	       VM_EXIT_SAVE_VMX_PREEMPTION_TIMER |			\
505	       VM_EXIT_LOAD_IA32_EFER |					\
506	       VM_EXIT_CLEAR_BNDCFGS |					\
507	       VM_EXIT_PT_CONCEAL_PIP |					\
508	       VM_EXIT_CLEAR_IA32_RTIT_CTL)
509
510#define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL			\
511	(PIN_BASED_EXT_INTR_MASK |					\
512	 PIN_BASED_NMI_EXITING)
513#define KVM_OPTIONAL_VMX_PIN_BASED_VM_EXEC_CONTROL			\
514	(PIN_BASED_VIRTUAL_NMIS |					\
515	 PIN_BASED_POSTED_INTR |					\
516	 PIN_BASED_VMX_PREEMPTION_TIMER)
517
518#define __KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL			\
519	(CPU_BASED_HLT_EXITING |					\
520	 CPU_BASED_CR3_LOAD_EXITING |					\
521	 CPU_BASED_CR3_STORE_EXITING |					\
522	 CPU_BASED_UNCOND_IO_EXITING |					\
523	 CPU_BASED_MOV_DR_EXITING |					\
524	 CPU_BASED_USE_TSC_OFFSETTING |					\
525	 CPU_BASED_MWAIT_EXITING |					\
526	 CPU_BASED_MONITOR_EXITING |					\
527	 CPU_BASED_INVLPG_EXITING |					\
528	 CPU_BASED_RDPMC_EXITING |					\
529	 CPU_BASED_INTR_WINDOW_EXITING)
530
531#ifdef CONFIG_X86_64
532	#define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL		\
533		(__KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL |		\
534		 CPU_BASED_CR8_LOAD_EXITING |				\
535		 CPU_BASED_CR8_STORE_EXITING)
536#else
537	#define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL		\
538		__KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL
539#endif
540
541#define KVM_OPTIONAL_VMX_CPU_BASED_VM_EXEC_CONTROL			\
542	(CPU_BASED_RDTSC_EXITING |					\
543	 CPU_BASED_TPR_SHADOW |						\
544	 CPU_BASED_USE_IO_BITMAPS |					\
545	 CPU_BASED_MONITOR_TRAP_FLAG |					\
546	 CPU_BASED_USE_MSR_BITMAPS |					\
547	 CPU_BASED_NMI_WINDOW_EXITING |					\
548	 CPU_BASED_PAUSE_EXITING |					\
549	 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS |			\
550	 CPU_BASED_ACTIVATE_TERTIARY_CONTROLS)
551
552#define KVM_REQUIRED_VMX_SECONDARY_VM_EXEC_CONTROL 0
553#define KVM_OPTIONAL_VMX_SECONDARY_VM_EXEC_CONTROL			\
554	(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |			\
555	 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |			\
556	 SECONDARY_EXEC_WBINVD_EXITING |				\
557	 SECONDARY_EXEC_ENABLE_VPID |					\
558	 SECONDARY_EXEC_ENABLE_EPT |					\
559	 SECONDARY_EXEC_UNRESTRICTED_GUEST |				\
560	 SECONDARY_EXEC_PAUSE_LOOP_EXITING |				\
561	 SECONDARY_EXEC_DESC |						\
562	 SECONDARY_EXEC_ENABLE_RDTSCP |					\
563	 SECONDARY_EXEC_ENABLE_INVPCID |				\
564	 SECONDARY_EXEC_APIC_REGISTER_VIRT |				\
565	 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |				\
566	 SECONDARY_EXEC_SHADOW_VMCS |					\
567	 SECONDARY_EXEC_ENABLE_XSAVES |					\
568	 SECONDARY_EXEC_RDSEED_EXITING |				\
569	 SECONDARY_EXEC_RDRAND_EXITING |				\
570	 SECONDARY_EXEC_ENABLE_PML |					\
571	 SECONDARY_EXEC_TSC_SCALING |					\
572	 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |				\
573	 SECONDARY_EXEC_PT_USE_GPA |					\
574	 SECONDARY_EXEC_PT_CONCEAL_VMX |				\
575	 SECONDARY_EXEC_ENABLE_VMFUNC |					\
576	 SECONDARY_EXEC_BUS_LOCK_DETECTION |				\
577	 SECONDARY_EXEC_NOTIFY_VM_EXITING |				\
578	 SECONDARY_EXEC_ENCLS_EXITING |					\
579	 SECONDARY_EXEC_EPT_VIOLATION_VE)
580
581#define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0
582#define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL			\
583	(TERTIARY_EXEC_IPI_VIRT)
584
585#define BUILD_CONTROLS_SHADOW(lname, uname, bits)						\
586static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val)			\
587{												\
588	if (vmx->loaded_vmcs->controls_shadow.lname != val) {					\
589		vmcs_write##bits(uname, val);							\
590		vmx->loaded_vmcs->controls_shadow.lname = val;					\
591	}											\
592}												\
593static inline u##bits __##lname##_controls_get(struct loaded_vmcs *vmcs)			\
594{												\
595	return vmcs->controls_shadow.lname;							\
596}												\
597static inline u##bits lname##_controls_get(struct vcpu_vmx *vmx)				\
598{												\
599	return __##lname##_controls_get(vmx->loaded_vmcs);					\
600}												\
601static __always_inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u##bits val)		\
602{												\
603	BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname)));		\
604	lname##_controls_set(vmx, lname##_controls_get(vmx) | val);				\
605}												\
606static __always_inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##bits val)	\
607{												\
608	BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname)));		\
609	lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val);				\
610}
611BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32)
612BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32)
613BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL, 32)
614BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL, 32)
615BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL, 32)
616BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64)
617
618/*
619 * VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the
620 * cache on demand.  Other registers not listed here are synced to
621 * the cache immediately after VM-Exit.
622 */
623#define VMX_REGS_LAZY_LOAD_SET	((1 << VCPU_REGS_RIP) |         \
624				(1 << VCPU_REGS_RSP) |          \
625				(1 << VCPU_EXREG_RFLAGS) |      \
626				(1 << VCPU_EXREG_PDPTR) |       \
627				(1 << VCPU_EXREG_SEGMENTS) |    \
628				(1 << VCPU_EXREG_CR0) |         \
629				(1 << VCPU_EXREG_CR3) |         \
630				(1 << VCPU_EXREG_CR4) |         \
631				(1 << VCPU_EXREG_EXIT_INFO_1) | \
632				(1 << VCPU_EXREG_EXIT_INFO_2))
633
634static inline unsigned long vmx_l1_guest_owned_cr0_bits(void)
635{
636	unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS;
637
638	/*
639	 * CR0.WP needs to be intercepted when KVM is shadowing legacy paging
640	 * in order to construct shadow PTEs with the correct protections.
641	 * Note!  CR0.WP technically can be passed through to the guest if
642	 * paging is disabled, but checking CR0.PG would generate a cyclical
643	 * dependency of sorts due to forcing the caller to ensure CR0 holds
644	 * the correct value prior to determining which CR0 bits can be owned
645	 * by L1.  Keep it simple and limit the optimization to EPT.
646	 */
647	if (!enable_ept)
648		bits &= ~X86_CR0_WP;
649	return bits;
650}
651
652static __always_inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
653{
654	return container_of(kvm, struct kvm_vmx, kvm);
655}
656
657static __always_inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
658{
659	return container_of(vcpu, struct vcpu_vmx, vcpu);
660}
661
662static inline struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu)
663{
664	return &to_vmx(vcpu)->lbr_desc;
665}
666
667static inline struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu)
668{
669	return &vcpu_to_lbr_desc(vcpu)->records;
670}
671
672static inline bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
673{
674	return !!vcpu_to_lbr_records(vcpu)->nr;
675}
676
677void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
678int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
679void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu);
680
681static __always_inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu)
682{
683	struct vcpu_vmx *vmx = to_vmx(vcpu);
684
685	if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1))
686		vmx->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
687
688	return vmx->exit_qualification;
689}
690
691static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu)
692{
693	struct vcpu_vmx *vmx = to_vmx(vcpu);
694
695	if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2))
696		vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
697
698	return vmx->exit_intr_info;
699}
700
701struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags);
702void free_vmcs(struct vmcs *vmcs);
703int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
704void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
705void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs);
706
707static inline struct vmcs *alloc_vmcs(bool shadow)
708{
709	return alloc_vmcs_cpu(shadow, raw_smp_processor_id(),
710			      GFP_KERNEL_ACCOUNT);
711}
712
713static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
714{
715	return secondary_exec_controls_get(vmx) &
716		SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
717}
718
719static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
720{
721	if (!enable_ept)
722		return true;
723
724	return allow_smaller_maxphyaddr &&
725	       cpuid_maxphyaddr(vcpu) < kvm_host.maxphyaddr;
726}
727
728static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu)
729{
730	return enable_unrestricted_guest && (!is_guest_mode(vcpu) ||
731	    (secondary_exec_controls_get(to_vmx(vcpu)) &
732	    SECONDARY_EXEC_UNRESTRICTED_GUEST));
733}
734
735bool __vmx_guest_state_valid(struct kvm_vcpu *vcpu);
736static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu)
737{
738	return is_unrestricted_guest(vcpu) || __vmx_guest_state_valid(vcpu);
739}
740
741void dump_vmcs(struct kvm_vcpu *vcpu);
742
743static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info)
744{
745	return (vmx_instr_info >> 28) & 0xf;
746}
747
748static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu)
749{
750	return  lapic_in_kernel(vcpu) && enable_ipiv;
751}
752
753static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
754{
755	vmx->segment_cache.bitmask = 0;
756}
757
758#endif /* __KVM_X86_VMX_H */
v6.8
  1/* SPDX-License-Identifier: GPL-2.0 */
  2#ifndef __KVM_X86_VMX_H
  3#define __KVM_X86_VMX_H
  4
  5#include <linux/kvm_host.h>
  6
  7#include <asm/kvm.h>
  8#include <asm/intel_pt.h>
  9#include <asm/perf_event.h>
 
 10
 11#include "capabilities.h"
 12#include "../kvm_cache_regs.h"
 13#include "posted_intr.h"
 14#include "vmcs.h"
 15#include "vmx_ops.h"
 16#include "../cpuid.h"
 17#include "run_flags.h"
 18
 19#define MSR_TYPE_R	1
 20#define MSR_TYPE_W	2
 21#define MSR_TYPE_RW	3
 22
 23#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
 24
 25#ifdef CONFIG_X86_64
 26#define MAX_NR_USER_RETURN_MSRS	7
 27#else
 28#define MAX_NR_USER_RETURN_MSRS	4
 29#endif
 30
 31#define MAX_NR_LOADSTORE_MSRS	8
 32
 33struct vmx_msrs {
 34	unsigned int		nr;
 35	struct vmx_msr_entry	val[MAX_NR_LOADSTORE_MSRS];
 36};
 37
 38struct vmx_uret_msr {
 39	bool load_into_hardware;
 40	u64 data;
 41	u64 mask;
 42};
 43
 44enum segment_cache_field {
 45	SEG_FIELD_SEL = 0,
 46	SEG_FIELD_BASE = 1,
 47	SEG_FIELD_LIMIT = 2,
 48	SEG_FIELD_AR = 3,
 49
 50	SEG_FIELD_NR = 4
 51};
 52
 53#define RTIT_ADDR_RANGE		4
 54
 55struct pt_ctx {
 56	u64 ctl;
 57	u64 status;
 58	u64 output_base;
 59	u64 output_mask;
 60	u64 cr3_match;
 61	u64 addr_a[RTIT_ADDR_RANGE];
 62	u64 addr_b[RTIT_ADDR_RANGE];
 63};
 64
 65struct pt_desc {
 66	u64 ctl_bitmask;
 67	u32 num_address_ranges;
 68	u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
 69	struct pt_ctx host;
 70	struct pt_ctx guest;
 71};
 72
 73union vmx_exit_reason {
 74	struct {
 75		u32	basic			: 16;
 76		u32	reserved16		: 1;
 77		u32	reserved17		: 1;
 78		u32	reserved18		: 1;
 79		u32	reserved19		: 1;
 80		u32	reserved20		: 1;
 81		u32	reserved21		: 1;
 82		u32	reserved22		: 1;
 83		u32	reserved23		: 1;
 84		u32	reserved24		: 1;
 85		u32	reserved25		: 1;
 86		u32	bus_lock_detected	: 1;
 87		u32	enclave_mode		: 1;
 88		u32	smi_pending_mtf		: 1;
 89		u32	smi_from_vmx_root	: 1;
 90		u32	reserved30		: 1;
 91		u32	failed_vmentry		: 1;
 92	};
 93	u32 full;
 94};
 95
 96struct lbr_desc {
 97	/* Basic info about guest LBR records. */
 98	struct x86_pmu_lbr records;
 99
100	/*
101	 * Emulate LBR feature via passthrough LBR registers when the
102	 * per-vcpu guest LBR event is scheduled on the current pcpu.
103	 *
104	 * The records may be inaccurate if the host reclaims the LBR.
105	 */
106	struct perf_event *event;
107
108	/* True if LBRs are marked as not intercepted in the MSR bitmap */
109	bool msr_passthrough;
110};
111
 
 
112/*
113 * The nested_vmx structure is part of vcpu_vmx, and holds information we need
114 * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
115 */
116struct nested_vmx {
117	/* Has the level1 guest done vmxon? */
118	bool vmxon;
119	gpa_t vmxon_ptr;
120	bool pml_full;
121
122	/* The guest-physical address of the current VMCS L1 keeps for L2 */
123	gpa_t current_vmptr;
124	/*
125	 * Cache of the guest's VMCS, existing outside of guest memory.
126	 * Loaded from guest memory during VMPTRLD. Flushed to guest
127	 * memory during VMCLEAR and VMPTRLD.
128	 */
129	struct vmcs12 *cached_vmcs12;
130	/*
131	 * Cache of the guest's shadow VMCS, existing outside of guest
132	 * memory. Loaded from guest memory during VM entry. Flushed
133	 * to guest memory during VM exit.
134	 */
135	struct vmcs12 *cached_shadow_vmcs12;
136
137	/*
138	 * GPA to HVA cache for accessing vmcs12->vmcs_link_pointer
139	 */
140	struct gfn_to_hva_cache shadow_vmcs12_cache;
141
142	/*
143	 * GPA to HVA cache for VMCS12
144	 */
145	struct gfn_to_hva_cache vmcs12_cache;
146
147	/*
148	 * Indicates if the shadow vmcs or enlightened vmcs must be updated
149	 * with the data held by struct vmcs12.
150	 */
151	bool need_vmcs12_to_shadow_sync;
152	bool dirty_vmcs12;
153
154	/*
155	 * Indicates whether MSR bitmap for L2 needs to be rebuilt due to
156	 * changes in MSR bitmap for L1 or switching to a different L2. Note,
157	 * this flag can only be used reliably in conjunction with a paravirt L1
158	 * which informs L0 whether any changes to MSR bitmap for L2 were done
159	 * on its side.
160	 */
161	bool force_msr_bitmap_recalc;
162
163	/*
164	 * Indicates lazily loaded guest state has not yet been decached from
165	 * vmcs02.
166	 */
167	bool need_sync_vmcs02_to_vmcs12_rare;
168
169	/*
170	 * vmcs02 has been initialized, i.e. state that is constant for
171	 * vmcs02 has been written to the backing VMCS.  Initialization
172	 * is delayed until L1 actually attempts to run a nested VM.
173	 */
174	bool vmcs02_initialized;
175
176	bool change_vmcs01_virtual_apic_mode;
177	bool reload_vmcs01_apic_access_page;
178	bool update_vmcs01_cpu_dirty_logging;
179	bool update_vmcs01_apicv_status;
 
180
181	/*
182	 * Enlightened VMCS has been enabled. It does not mean that L1 has to
183	 * use it. However, VMX features available to L1 will be limited based
184	 * on what the enlightened VMCS supports.
185	 */
186	bool enlightened_vmcs_enabled;
187
188	/* L2 must run next, and mustn't decide to exit to L1. */
189	bool nested_run_pending;
190
191	/* Pending MTF VM-exit into L1.  */
192	bool mtf_pending;
193
194	struct loaded_vmcs vmcs02;
195
196	/*
197	 * Guest pages referred to in the vmcs02 with host-physical
198	 * pointers, so we must keep them pinned while L2 runs.
199	 */
200	struct kvm_host_map apic_access_page_map;
201	struct kvm_host_map virtual_apic_map;
202	struct kvm_host_map pi_desc_map;
203
204	struct kvm_host_map msr_bitmap_map;
205
206	struct pi_desc *pi_desc;
207	bool pi_pending;
208	u16 posted_intr_nv;
209
210	struct hrtimer preemption_timer;
211	u64 preemption_timer_deadline;
212	bool has_preemption_timer_deadline;
213	bool preemption_timer_expired;
214
215	/*
216	 * Used to snapshot MSRs that are conditionally loaded on VM-Enter in
217	 * order to propagate the guest's pre-VM-Enter value into vmcs02.  For
218	 * emulation of VMLAUNCH/VMRESUME, the snapshot will be of L1's value.
219	 * For KVM_SET_NESTED_STATE, the snapshot is of L2's value, _if_
220	 * userspace restores MSRs before nested state.  If userspace restores
221	 * MSRs after nested state, the snapshot holds garbage, but KVM can't
222	 * detect that, and the garbage value in vmcs02 will be overwritten by
223	 * MSR restoration in any case.
224	 */
225	u64 pre_vmenter_debugctl;
226	u64 pre_vmenter_bndcfgs;
227
228	/* to migrate it to L1 if L2 writes to L1's CR8 directly */
229	int l1_tpr_threshold;
230
231	u16 vpid02;
232	u16 last_vpid;
233
234	struct nested_vmx_msrs msrs;
235
236	/* SMM related state */
237	struct {
238		/* in VMX operation on SMM entry? */
239		bool vmxon;
240		/* in guest mode on SMM entry? */
241		bool guest_mode;
242	} smm;
243
244#ifdef CONFIG_KVM_HYPERV
245	gpa_t hv_evmcs_vmptr;
246	struct kvm_host_map hv_evmcs_map;
247	struct hv_enlightened_vmcs *hv_evmcs;
248#endif
249};
250
251struct vcpu_vmx {
252	struct kvm_vcpu       vcpu;
253	u8                    fail;
254	u8		      x2apic_msr_bitmap_mode;
255
256	/*
257	 * If true, host state has been stored in vmx->loaded_vmcs for
258	 * the CPU registers that only need to be switched when transitioning
259	 * to/from the kernel, and the registers have been loaded with guest
260	 * values.  If false, host state is loaded in the CPU registers
261	 * and vmx->loaded_vmcs->host_state is invalid.
262	 */
263	bool		      guest_state_loaded;
264
265	unsigned long         exit_qualification;
266	u32                   exit_intr_info;
267	u32                   idt_vectoring_info;
268	ulong                 rflags;
269
270	/*
271	 * User return MSRs are always emulated when enabled in the guest, but
272	 * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
273	 * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
274	 * be loaded into hardware if those conditions aren't met.
275	 */
276	struct vmx_uret_msr   guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
277	bool                  guest_uret_msrs_loaded;
278#ifdef CONFIG_X86_64
279	u64		      msr_host_kernel_gs_base;
280	u64		      msr_guest_kernel_gs_base;
281#endif
282
283	u64		      spec_ctrl;
284	u32		      msr_ia32_umwait_control;
285
286	/*
287	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
288	 * non-nested (L1) guest, it always points to vmcs01. For a nested
289	 * guest (L2), it points to a different VMCS.
290	 */
291	struct loaded_vmcs    vmcs01;
292	struct loaded_vmcs   *loaded_vmcs;
293
294	struct msr_autoload {
295		struct vmx_msrs guest;
296		struct vmx_msrs host;
297	} msr_autoload;
298
299	struct msr_autostore {
300		struct vmx_msrs guest;
301	} msr_autostore;
302
303	struct {
304		int vm86_active;
305		ulong save_rflags;
306		struct kvm_segment segs[8];
307	} rmode;
308	struct {
309		u32 bitmask; /* 4 bits per segment (1 bit per field) */
310		struct kvm_save_segment {
311			u16 selector;
312			unsigned long base;
313			u32 limit;
314			u32 ar;
315		} seg[8];
316	} segment_cache;
317	int vpid;
318	bool emulation_required;
319
320	union vmx_exit_reason exit_reason;
321
322	/* Posted interrupt descriptor */
323	struct pi_desc pi_desc;
324
325	/* Used if this vCPU is waiting for PI notification wakeup. */
326	struct list_head pi_wakeup_list;
327
328	/* Support for a guest hypervisor (nested VMX) */
329	struct nested_vmx nested;
330
331	/* Dynamic PLE window. */
332	unsigned int ple_window;
333	bool ple_window_dirty;
334
335	bool req_immediate_exit;
336
337	/* Support for PML */
338#define PML_ENTITY_NUM		512
339	struct page *pml_pg;
340
341	/* apic deadline value in host tsc */
342	u64 hv_deadline_tsc;
343
344	unsigned long host_debugctlmsr;
345
346	/*
347	 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
348	 * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included
349	 * in msr_ia32_feature_control_valid_bits.
350	 */
351	u64 msr_ia32_feature_control;
352	u64 msr_ia32_feature_control_valid_bits;
353	/* SGX Launch Control public key hash */
354	u64 msr_ia32_sgxlepubkeyhash[4];
355	u64 msr_ia32_mcu_opt_ctrl;
356	bool disable_fb_clear;
357
358	struct pt_desc pt_desc;
359	struct lbr_desc lbr_desc;
360
361	/* Save desired MSR intercept (read: pass-through) state */
362#define MAX_POSSIBLE_PASSTHROUGH_MSRS	16
363	struct {
364		DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
365		DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
366	} shadow_msr_intercept;
 
 
 
367};
368
369struct kvm_vmx {
370	struct kvm kvm;
371
372	unsigned int tss_addr;
373	bool ept_identity_pagetable_done;
374	gpa_t ept_identity_map_addr;
375	/* Posted Interrupt Descriptor (PID) table for IPI virtualization */
376	u64 *pid_table;
377};
378
379void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
380			struct loaded_vmcs *buddy);
381int allocate_vpid(void);
382void free_vpid(int vpid);
383void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
384void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
385void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
386			unsigned long fs_base, unsigned long gs_base);
387int vmx_get_cpl(struct kvm_vcpu *vcpu);
 
388bool vmx_emulation_required(struct kvm_vcpu *vcpu);
389unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
390void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
391u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu);
392void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask);
393int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer);
394void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
395void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
396void set_cr4_guest_host_mask(struct vcpu_vmx *vmx);
397void ept_save_pdptrs(struct kvm_vcpu *vcpu);
398void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
399void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
400u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
401
402bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu);
403void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
404bool vmx_nmi_blocked(struct kvm_vcpu *vcpu);
 
405bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu);
406bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
407void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
408void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
409struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
410void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
411void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
412void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
413unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
414bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
415		    unsigned int flags);
416int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
417void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
418
419void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
420void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
421
422u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
423u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
424
425gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
426
427static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
428					     int type, bool value)
429{
430	if (value)
431		vmx_enable_intercept_for_msr(vcpu, msr, type);
432	else
433		vmx_disable_intercept_for_msr(vcpu, msr, type);
434}
435
436void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
437
438/*
439 * Note, early Intel manuals have the write-low and read-high bitmap offsets
440 * the wrong way round.  The bitmaps control MSRs 0x00000000-0x00001fff and
441 * 0xc0000000-0xc0001fff.  The former (low) uses bytes 0-0x3ff for reads and
442 * 0x800-0xbff for writes.  The latter (high) uses 0x400-0x7ff for reads and
443 * 0xc00-0xfff for writes.  MSRs not covered by either of the ranges always
444 * VM-Exit.
445 */
446#define __BUILD_VMX_MSR_BITMAP_HELPER(rtype, action, bitop, access, base)      \
447static inline rtype vmx_##action##_msr_bitmap_##access(unsigned long *bitmap,  \
448						       u32 msr)		       \
449{									       \
450	int f = sizeof(unsigned long);					       \
451									       \
452	if (msr <= 0x1fff)						       \
453		return bitop##_bit(msr, bitmap + base / f);		       \
454	else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))		       \
455		return bitop##_bit(msr & 0x1fff, bitmap + (base + 0x400) / f); \
456	return (rtype)true;						       \
457}
458#define BUILD_VMX_MSR_BITMAP_HELPERS(ret_type, action, bitop)		       \
459	__BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, read,  0x0)     \
460	__BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 0x800)
461
462BUILD_VMX_MSR_BITMAP_HELPERS(bool, test, test)
463BUILD_VMX_MSR_BITMAP_HELPERS(void, clear, __clear)
464BUILD_VMX_MSR_BITMAP_HELPERS(void, set, __set)
465
466static inline u8 vmx_get_rvi(void)
467{
468	return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
469}
470
471#define __KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS				\
472	(VM_ENTRY_LOAD_DEBUG_CONTROLS)
473#ifdef CONFIG_X86_64
474	#define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS			\
475		(__KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS |			\
476		 VM_ENTRY_IA32E_MODE)
477#else
478	#define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS			\
479		__KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS
480#endif
481#define KVM_OPTIONAL_VMX_VM_ENTRY_CONTROLS				\
482	(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |				\
483	 VM_ENTRY_LOAD_IA32_PAT |					\
484	 VM_ENTRY_LOAD_IA32_EFER |					\
485	 VM_ENTRY_LOAD_BNDCFGS |					\
486	 VM_ENTRY_PT_CONCEAL_PIP |					\
487	 VM_ENTRY_LOAD_IA32_RTIT_CTL)
488
489#define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS				\
490	(VM_EXIT_SAVE_DEBUG_CONTROLS |					\
491	 VM_EXIT_ACK_INTR_ON_EXIT)
492#ifdef CONFIG_X86_64
493	#define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS			\
494		(__KVM_REQUIRED_VMX_VM_EXIT_CONTROLS |			\
495		 VM_EXIT_HOST_ADDR_SPACE_SIZE)
496#else
497	#define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS			\
498		__KVM_REQUIRED_VMX_VM_EXIT_CONTROLS
499#endif
500#define KVM_OPTIONAL_VMX_VM_EXIT_CONTROLS				\
501	      (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |			\
502	       VM_EXIT_SAVE_IA32_PAT |					\
503	       VM_EXIT_LOAD_IA32_PAT |					\
504	       VM_EXIT_SAVE_IA32_EFER |					\
505	       VM_EXIT_SAVE_VMX_PREEMPTION_TIMER |			\
506	       VM_EXIT_LOAD_IA32_EFER |					\
507	       VM_EXIT_CLEAR_BNDCFGS |					\
508	       VM_EXIT_PT_CONCEAL_PIP |					\
509	       VM_EXIT_CLEAR_IA32_RTIT_CTL)
510
511#define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL			\
512	(PIN_BASED_EXT_INTR_MASK |					\
513	 PIN_BASED_NMI_EXITING)
514#define KVM_OPTIONAL_VMX_PIN_BASED_VM_EXEC_CONTROL			\
515	(PIN_BASED_VIRTUAL_NMIS |					\
516	 PIN_BASED_POSTED_INTR |					\
517	 PIN_BASED_VMX_PREEMPTION_TIMER)
518
519#define __KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL			\
520	(CPU_BASED_HLT_EXITING |					\
521	 CPU_BASED_CR3_LOAD_EXITING |					\
522	 CPU_BASED_CR3_STORE_EXITING |					\
523	 CPU_BASED_UNCOND_IO_EXITING |					\
524	 CPU_BASED_MOV_DR_EXITING |					\
525	 CPU_BASED_USE_TSC_OFFSETTING |					\
526	 CPU_BASED_MWAIT_EXITING |					\
527	 CPU_BASED_MONITOR_EXITING |					\
528	 CPU_BASED_INVLPG_EXITING |					\
529	 CPU_BASED_RDPMC_EXITING |					\
530	 CPU_BASED_INTR_WINDOW_EXITING)
531
532#ifdef CONFIG_X86_64
533	#define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL		\
534		(__KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL |		\
535		 CPU_BASED_CR8_LOAD_EXITING |				\
536		 CPU_BASED_CR8_STORE_EXITING)
537#else
538	#define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL		\
539		__KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL
540#endif
541
542#define KVM_OPTIONAL_VMX_CPU_BASED_VM_EXEC_CONTROL			\
543	(CPU_BASED_RDTSC_EXITING |					\
544	 CPU_BASED_TPR_SHADOW |						\
545	 CPU_BASED_USE_IO_BITMAPS |					\
546	 CPU_BASED_MONITOR_TRAP_FLAG |					\
547	 CPU_BASED_USE_MSR_BITMAPS |					\
548	 CPU_BASED_NMI_WINDOW_EXITING |					\
549	 CPU_BASED_PAUSE_EXITING |					\
550	 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS |			\
551	 CPU_BASED_ACTIVATE_TERTIARY_CONTROLS)
552
553#define KVM_REQUIRED_VMX_SECONDARY_VM_EXEC_CONTROL 0
554#define KVM_OPTIONAL_VMX_SECONDARY_VM_EXEC_CONTROL			\
555	(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |			\
556	 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |			\
557	 SECONDARY_EXEC_WBINVD_EXITING |				\
558	 SECONDARY_EXEC_ENABLE_VPID |					\
559	 SECONDARY_EXEC_ENABLE_EPT |					\
560	 SECONDARY_EXEC_UNRESTRICTED_GUEST |				\
561	 SECONDARY_EXEC_PAUSE_LOOP_EXITING |				\
562	 SECONDARY_EXEC_DESC |						\
563	 SECONDARY_EXEC_ENABLE_RDTSCP |					\
564	 SECONDARY_EXEC_ENABLE_INVPCID |				\
565	 SECONDARY_EXEC_APIC_REGISTER_VIRT |				\
566	 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |				\
567	 SECONDARY_EXEC_SHADOW_VMCS |					\
568	 SECONDARY_EXEC_ENABLE_XSAVES |					\
569	 SECONDARY_EXEC_RDSEED_EXITING |				\
570	 SECONDARY_EXEC_RDRAND_EXITING |				\
571	 SECONDARY_EXEC_ENABLE_PML |					\
572	 SECONDARY_EXEC_TSC_SCALING |					\
573	 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |				\
574	 SECONDARY_EXEC_PT_USE_GPA |					\
575	 SECONDARY_EXEC_PT_CONCEAL_VMX |				\
576	 SECONDARY_EXEC_ENABLE_VMFUNC |					\
577	 SECONDARY_EXEC_BUS_LOCK_DETECTION |				\
578	 SECONDARY_EXEC_NOTIFY_VM_EXITING |				\
579	 SECONDARY_EXEC_ENCLS_EXITING)
 
580
581#define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0
582#define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL			\
583	(TERTIARY_EXEC_IPI_VIRT)
584
585#define BUILD_CONTROLS_SHADOW(lname, uname, bits)						\
586static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val)			\
587{												\
588	if (vmx->loaded_vmcs->controls_shadow.lname != val) {					\
589		vmcs_write##bits(uname, val);							\
590		vmx->loaded_vmcs->controls_shadow.lname = val;					\
591	}											\
592}												\
593static inline u##bits __##lname##_controls_get(struct loaded_vmcs *vmcs)			\
594{												\
595	return vmcs->controls_shadow.lname;							\
596}												\
597static inline u##bits lname##_controls_get(struct vcpu_vmx *vmx)				\
598{												\
599	return __##lname##_controls_get(vmx->loaded_vmcs);					\
600}												\
601static __always_inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u##bits val)		\
602{												\
603	BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname)));		\
604	lname##_controls_set(vmx, lname##_controls_get(vmx) | val);				\
605}												\
606static __always_inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##bits val)	\
607{												\
608	BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname)));		\
609	lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val);				\
610}
611BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32)
612BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32)
613BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL, 32)
614BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL, 32)
615BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL, 32)
616BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64)
617
618/*
619 * VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the
620 * cache on demand.  Other registers not listed here are synced to
621 * the cache immediately after VM-Exit.
622 */
623#define VMX_REGS_LAZY_LOAD_SET	((1 << VCPU_REGS_RIP) |         \
624				(1 << VCPU_REGS_RSP) |          \
625				(1 << VCPU_EXREG_RFLAGS) |      \
626				(1 << VCPU_EXREG_PDPTR) |       \
627				(1 << VCPU_EXREG_SEGMENTS) |    \
628				(1 << VCPU_EXREG_CR0) |         \
629				(1 << VCPU_EXREG_CR3) |         \
630				(1 << VCPU_EXREG_CR4) |         \
631				(1 << VCPU_EXREG_EXIT_INFO_1) | \
632				(1 << VCPU_EXREG_EXIT_INFO_2))
633
634static inline unsigned long vmx_l1_guest_owned_cr0_bits(void)
635{
636	unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS;
637
638	/*
639	 * CR0.WP needs to be intercepted when KVM is shadowing legacy paging
640	 * in order to construct shadow PTEs with the correct protections.
641	 * Note!  CR0.WP technically can be passed through to the guest if
642	 * paging is disabled, but checking CR0.PG would generate a cyclical
643	 * dependency of sorts due to forcing the caller to ensure CR0 holds
644	 * the correct value prior to determining which CR0 bits can be owned
645	 * by L1.  Keep it simple and limit the optimization to EPT.
646	 */
647	if (!enable_ept)
648		bits &= ~X86_CR0_WP;
649	return bits;
650}
651
652static __always_inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
653{
654	return container_of(kvm, struct kvm_vmx, kvm);
655}
656
657static __always_inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
658{
659	return container_of(vcpu, struct vcpu_vmx, vcpu);
660}
661
662static inline struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu)
663{
664	return &to_vmx(vcpu)->lbr_desc;
665}
666
667static inline struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu)
668{
669	return &vcpu_to_lbr_desc(vcpu)->records;
670}
671
672static inline bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
673{
674	return !!vcpu_to_lbr_records(vcpu)->nr;
675}
676
677void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
678int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
679void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu);
680
681static __always_inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu)
682{
683	struct vcpu_vmx *vmx = to_vmx(vcpu);
684
685	if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1))
686		vmx->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
687
688	return vmx->exit_qualification;
689}
690
691static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu)
692{
693	struct vcpu_vmx *vmx = to_vmx(vcpu);
694
695	if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2))
696		vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
697
698	return vmx->exit_intr_info;
699}
700
701struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags);
702void free_vmcs(struct vmcs *vmcs);
703int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
704void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
705void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs);
706
707static inline struct vmcs *alloc_vmcs(bool shadow)
708{
709	return alloc_vmcs_cpu(shadow, raw_smp_processor_id(),
710			      GFP_KERNEL_ACCOUNT);
711}
712
713static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
714{
715	return secondary_exec_controls_get(vmx) &
716		SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
717}
718
719static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
720{
721	if (!enable_ept)
722		return true;
723
724	return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
 
725}
726
727static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu)
728{
729	return enable_unrestricted_guest && (!is_guest_mode(vcpu) ||
730	    (secondary_exec_controls_get(to_vmx(vcpu)) &
731	    SECONDARY_EXEC_UNRESTRICTED_GUEST));
732}
733
734bool __vmx_guest_state_valid(struct kvm_vcpu *vcpu);
735static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu)
736{
737	return is_unrestricted_guest(vcpu) || __vmx_guest_state_valid(vcpu);
738}
739
740void dump_vmcs(struct kvm_vcpu *vcpu);
741
742static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info)
743{
744	return (vmx_instr_info >> 28) & 0xf;
745}
746
747static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu)
748{
749	return  lapic_in_kernel(vcpu) && enable_ipiv;
 
 
 
 
 
750}
751
752#endif /* __KVM_X86_VMX_H */