sgx.c - arch/x86/kvm/vmx/sgx.c - Linux diff v6.13.7

  1// SPDX-License-Identifier: GPL-2.0
  2/*  Copyright(c) 2021 Intel Corporation. */
  3#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  4
  5#include <asm/sgx.h>
  6
  7#include "x86.h"
  8#include "kvm_cache_regs.h"
  9#include "nested.h"
 10#include "sgx.h"
 11#include "vmx.h"
 
 12
 13bool __read_mostly enable_sgx = 1;
 14module_param_named(sgx, enable_sgx, bool, 0444);
 15
 16/* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */
 17static u64 sgx_pubkey_hash[4] __ro_after_init;
 18
 19/*
 20 * ENCLS's memory operands use a fixed segment (DS) and a fixed
 21 * address size based on the mode.  Related prefixes are ignored.
 22 */
 23static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
 24			     int size, int alignment, gva_t *gva)
 25{
 26	struct kvm_segment s;
 27	bool fault;
 28
 29	/* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
 30	*gva = offset;
 31	if (!is_64_bit_mode(vcpu)) {
 32		vmx_get_segment(vcpu, &s, VCPU_SREG_DS);
 33		*gva += s.base;
 34	}
 35
 36	if (!IS_ALIGNED(*gva, alignment)) {
 37		fault = true;
 38	} else if (likely(is_64_bit_mode(vcpu))) {
 39		*gva = vmx_get_untagged_addr(vcpu, *gva, 0);
 40		fault = is_noncanonical_address(*gva, vcpu, 0);
 41	} else {
 42		*gva &= 0xffffffff;
 43		fault = (s.unusable) ||
 44			(s.type != 2 && s.type != 3) ||
 45			(*gva > s.limit) ||
 46			((s.base != 0 || s.limit != 0xffffffff) &&
 47			(((u64)*gva + size - 1) > s.limit + 1));
 48	}
 49	if (fault)
 50		kvm_inject_gp(vcpu, 0);
 51	return fault ? -EINVAL : 0;
 52}
 53
 54static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
 55					 unsigned int size)
 56{
 57	uint64_t data[2] = { addr, size };
 58
 59	__kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data));
 60}
 61
 62static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
 63			unsigned int size)
 64{
 65	if (__copy_from_user(data, (void __user *)hva, size)) {
 66		sgx_handle_emulation_failure(vcpu, hva, size);
 67		return -EFAULT;
 68	}
 69
 70	return 0;
 71}
 72
 73static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write,
 74			  gpa_t *gpa)
 75{
 76	struct x86_exception ex;
 77
 78	if (write)
 79		*gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex);
 80	else
 81		*gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex);
 82
 83	if (*gpa == INVALID_GPA) {
 84		kvm_inject_emulated_page_fault(vcpu, &ex);
 85		return -EFAULT;
 86	}
 87
 88	return 0;
 89}
 90
 91static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva)
 92{
 93	*hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa));
 94	if (kvm_is_error_hva(*hva)) {
 95		sgx_handle_emulation_failure(vcpu, gpa, 1);
 96		return -EFAULT;
 97	}
 98
 99	*hva |= gpa & ~PAGE_MASK;
100
101	return 0;
102}
103
104static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
105{
106	struct x86_exception ex;
107
108	/*
109	 * A non-EPCM #PF indicates a bad userspace HVA.  This *should* check
110	 * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
111	 * but the error code isn't (yet) plumbed through the ENCLS helpers.
112	 */
113	if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
114		kvm_prepare_emulation_failure_exit(vcpu);
115		return 0;
116	}
117
118	/*
119	 * If the guest thinks it's running on SGX2 hardware, inject an SGX
120	 * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
121	 * #PF on SGX2).  The assumption is that EPCM faults are much more
122	 * likely than a bad userspace address.
123	 */
124	if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
125	    guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
126		memset(&ex, 0, sizeof(ex));
127		ex.vector = PF_VECTOR;
128		ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
129				PFERR_SGX_MASK;
130		ex.address = gva;
131		ex.error_code_valid = true;
132		ex.nested_page_fault = false;
133		kvm_inject_emulated_page_fault(vcpu, &ex);
134	} else {
135		kvm_inject_gp(vcpu, 0);
136	}
137	return 1;
138}
139
140static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
141				  struct sgx_pageinfo *pageinfo,
142				  unsigned long secs_hva,
143				  gva_t secs_gva)
144{
145	struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents;
146	struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
147	u64 attributes, xfrm, size;
148	u32 miscselect;
149	u8 max_size_log2;
150	int trapnr, ret;
151
152	sgx_12_0 = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
153	sgx_12_1 = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
154	if (!sgx_12_0 || !sgx_12_1) {
155		kvm_prepare_emulation_failure_exit(vcpu);
156		return 0;
157	}
158
159	miscselect = contents->miscselect;
160	attributes = contents->attributes;
161	xfrm = contents->xfrm;
162	size = contents->size;
163
164	/* Enforce restriction of access to the PROVISIONKEY. */
165	if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
166	    (attributes & SGX_ATTR_PROVISIONKEY)) {
167		if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
168			pr_warn_once("SGX PROVISIONKEY advertised but not allowed\n");
169		kvm_inject_gp(vcpu, 0);
170		return 1;
171	}
172
173	/*
174	 * Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM.  Note
175	 * that the allowed XFRM (XFeature Request Mask) isn't strictly bound
176	 * by the supported XCR0.  FP+SSE *must* be set in XFRM, even if XSAVE
177	 * is unsupported, i.e. even if XCR0 itself is completely unsupported.
178	 */
179	if ((u32)miscselect & ~sgx_12_0->ebx ||
180	    (u32)attributes & ~sgx_12_1->eax ||
181	    (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
182	    (u32)xfrm & ~sgx_12_1->ecx ||
183	    (u32)(xfrm >> 32) & ~sgx_12_1->edx ||
184	    xfrm & ~(vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE) ||
185	    (xfrm & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
186		kvm_inject_gp(vcpu, 0);
187		return 1;
188	}
189
190	/* Enforce CPUID restriction on max enclave size. */
191	max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
192							    sgx_12_0->edx;
193	if (size >= BIT_ULL(max_size_log2)) {
194		kvm_inject_gp(vcpu, 0);
195		return 1;
196	}
197
198	/*
199	 * sgx_virt_ecreate() returns:
200	 *  1) 0:	ECREATE was successful
201	 *  2) -EFAULT:	ECREATE was run but faulted, and trapnr was set to the
202	 *		exception number.
203	 *  3) -EINVAL:	access_ok() on @secs_hva failed. This should never
204	 *		happen as KVM checks host addresses at memslot creation.
205	 *		sgx_virt_ecreate() has already warned in this case.
206	 */
207	ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr);
208	if (!ret)
209		return kvm_skip_emulated_instruction(vcpu);
210	if (ret == -EFAULT)
211		return sgx_inject_fault(vcpu, secs_gva, trapnr);
212
213	return ret;
214}
215
216static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
217{
218	gva_t pageinfo_gva, secs_gva;
219	gva_t metadata_gva, contents_gva;
220	gpa_t metadata_gpa, contents_gpa, secs_gpa;
221	unsigned long metadata_hva, contents_hva, secs_hva;
222	struct sgx_pageinfo pageinfo;
223	struct sgx_secs *contents;
224	struct x86_exception ex;
225	int r;
226
227	if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) ||
228	    sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
229		return 1;
230
231	/*
232	 * Copy the PAGEINFO to local memory, its pointers need to be
233	 * translated, i.e. we need to do a deep copy/translate.
234	 */
235	r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
236				sizeof(pageinfo), &ex);
237	if (r == X86EMUL_PROPAGATE_FAULT) {
238		kvm_inject_emulated_page_fault(vcpu, &ex);
239		return 1;
240	} else if (r != X86EMUL_CONTINUE) {
241		sgx_handle_emulation_failure(vcpu, pageinfo_gva,
242					     sizeof(pageinfo));
243		return 0;
244	}
245
246	if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) ||
247	    sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
248			      &contents_gva))
249		return 1;
250
251	/*
252	 * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
253	 * Resume the guest on failure to inject a #PF.
254	 */
255	if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) ||
256	    sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) ||
257	    sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
258		return 1;
259
260	/*
261	 * ...and then to HVA.  The order of accesses isn't architectural, i.e.
262	 * KVM doesn't have to fully process one address at a time.  Exit to
263	 * userspace if a GPA is invalid.
264	 */
265	if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) ||
266	    sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) ||
267	    sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
268		return 0;
269
270	/*
271	 * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
272	 * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
273	 * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
274	 * enforce restriction of access to the PROVISIONKEY.
275	 */
276	contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL);
277	if (!contents)
278		return -ENOMEM;
279
280	/* Exit to userspace if copying from a host userspace address fails. */
281	if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) {
282		free_page((unsigned long)contents);
283		return 0;
284	}
285
286	pageinfo.metadata = metadata_hva;
287	pageinfo.contents = (u64)contents;
288
289	r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva);
290
291	free_page((unsigned long)contents);
292
293	return r;
294}
295
296static int handle_encls_einit(struct kvm_vcpu *vcpu)
297{
298	unsigned long sig_hva, secs_hva, token_hva, rflags;
299	struct vcpu_vmx *vmx = to_vmx(vcpu);
300	gva_t sig_gva, secs_gva, token_gva;
301	gpa_t sig_gpa, secs_gpa, token_gpa;
302	int ret, trapnr;
303
304	if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) ||
305	    sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) ||
306	    sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva))
307		return 1;
308
309	/*
310	 * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA.
311	 * Resume the guest on failure to inject a #PF.
312	 */
313	if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) ||
314	    sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) ||
315	    sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa))
316		return 1;
317
318	/*
319	 * ...and then to HVA.  The order of accesses isn't architectural, i.e.
320	 * KVM doesn't have to fully process one address at a time.  Exit to
321	 * userspace if a GPA is invalid.  Note, all structures are aligned and
322	 * cannot split pages.
323	 */
324	if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) ||
325	    sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) ||
326	    sgx_gpa_to_hva(vcpu, token_gpa, &token_hva))
327		return 0;
328
329	ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva,
330			     (void __user *)secs_hva,
331			     vmx->msr_ia32_sgxlepubkeyhash, &trapnr);
332
333	if (ret == -EFAULT)
334		return sgx_inject_fault(vcpu, secs_gva, trapnr);
335
336	/*
337	 * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva,
338	 * @token_hva or @secs_hva. This should never happen as KVM checks host
339	 * addresses at memslot creation. sgx_virt_einit() has already warned
340	 * in this case, so just return.
341	 */
342	if (ret < 0)
343		return ret;
344
345	rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF |
346					  X86_EFLAGS_AF | X86_EFLAGS_SF |
347					  X86_EFLAGS_OF);
348	if (ret)
349		rflags |= X86_EFLAGS_ZF;
350	else
351		rflags &= ~X86_EFLAGS_ZF;
352	vmx_set_rflags(vcpu, rflags);
353
354	kvm_rax_write(vcpu, ret);
355	return kvm_skip_emulated_instruction(vcpu);
356}
357
358static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
359{
360	/*
361	 * ENCLS generates a #UD if SGX1 isn't supported, i.e. this point will
362	 * be reached if and only if the SGX1 leafs are enabled.
363	 */
364	if (leaf >= ECREATE && leaf <= ETRACK)
365		return true;
366
367	if (leaf >= EAUG && leaf <= EMODT)
368		return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
369
370	return false;
371}
372
373static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu)
374{
375	const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED;
376
377	return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits;
378}
379
380int handle_encls(struct kvm_vcpu *vcpu)
381{
382	u32 leaf = (u32)kvm_rax_read(vcpu);
383
384	if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
385	    !guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
386		kvm_queue_exception(vcpu, UD_VECTOR);
387	} else if (!encls_leaf_enabled_in_guest(vcpu, leaf) ||
388		   !sgx_enabled_in_guest_bios(vcpu) || !is_paging(vcpu)) {
389		kvm_inject_gp(vcpu, 0);
390	} else {
391		if (leaf == ECREATE)
392			return handle_encls_ecreate(vcpu);
393		if (leaf == EINIT)
394			return handle_encls_einit(vcpu);
395		WARN_ONCE(1, "unexpected exit on ENCLS[%u]", leaf);
396		vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
397		vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;
398		return 0;
399	}
400	return 1;
401}
402
403void setup_default_sgx_lepubkeyhash(void)
404{
405	/*
406	 * Use Intel's default value for Skylake hardware if Launch Control is
407	 * not supported, i.e. Intel's hash is hardcoded into silicon, or if
408	 * Launch Control is supported and enabled, i.e. mimic the reset value
409	 * and let the guest write the MSRs at will.  If Launch Control is
410	 * supported but disabled, then use the current MSR values as the hash
411	 * MSRs exist but are read-only (locked and not writable).
412	 */
413	if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) ||
414	    rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) {
415		sgx_pubkey_hash[0] = 0xa6053e051270b7acULL;
416		sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL;
417		sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL;
418		sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL;
419	} else {
420		/* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
421		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
422		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
423		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
424	}
425}
426
427void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu)
428{
429	struct vcpu_vmx *vmx = to_vmx(vcpu);
430
431	memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash,
432	       sizeof(sgx_pubkey_hash));
433}
434
435/*
436 * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM
437 * restrictions if the guest's allowed-1 settings diverge from hardware.
438 */
439static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
440{
441	struct kvm_cpuid_entry2 *guest_cpuid;
442	u32 eax, ebx, ecx, edx;
443
444	if (!vcpu->kvm->arch.sgx_provisioning_allowed)
445		return true;
446
447	guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
448	if (!guest_cpuid)
449		return true;
450
451	cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx);
452	if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
453		return true;
454
455	guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
456	if (!guest_cpuid)
457		return true;
458
459	cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx);
460	if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx ||
461	    guest_cpuid->ecx != ecx || guest_cpuid->edx != edx)
462		return true;
463
464	return false;
465}
466
467void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
468{
469	/*
470	 * There is no software enable bit for SGX that is virtualized by
471	 * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the
472	 * guest (either by the host or by the guest's BIOS) but enabled in the
473	 * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate
474	 * the expected system behavior for ENCLS.
475	 */
476	u64 bitmap = -1ull;
477
478	/* Nothing to do if hardware doesn't support SGX */
479	if (!cpu_has_vmx_encls_vmexit())
480		return;
481
482	if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) &&
483	    sgx_enabled_in_guest_bios(vcpu)) {
484		if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
485			bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
486			if (sgx_intercept_encls_ecreate(vcpu))
487				bitmap |= (1 << ECREATE);
488		}
489
490		if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2))
491			bitmap &= ~GENMASK_ULL(EMODT, EAUG);
492
493		/*
494		 * Trap and execute EINIT if launch control is enabled in the
495		 * host using the guest's values for launch control MSRs, even
496		 * if the guest's values are fixed to hardware default values.
497		 * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing
498		 * the MSRs is extraordinarily expensive.
499		 */
500		if (boot_cpu_has(X86_FEATURE_SGX_LC))
501			bitmap |= (1 << EINIT);
502
503		if (!vmcs12 && is_guest_mode(vcpu))
504			vmcs12 = get_vmcs12(vcpu);
505		if (vmcs12 && nested_cpu_has_encls_exit(vmcs12))
506			bitmap |= vmcs12->encls_exiting_bitmap;
507	}
508	vmcs_write64(ENCLS_EXITING_BITMAP, bitmap);
509}

  1// SPDX-License-Identifier: GPL-2.0
  2/*  Copyright(c) 2021 Intel Corporation. */
  3#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  4
  5#include <asm/sgx.h>
  6
  7#include "cpuid.h"
  8#include "kvm_cache_regs.h"
  9#include "nested.h"
 10#include "sgx.h"
 11#include "vmx.h"
 12#include "x86.h"
 13
 14bool __read_mostly enable_sgx = 1;
 15module_param_named(sgx, enable_sgx, bool, 0444);
 16
 17/* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */
 18static u64 sgx_pubkey_hash[4] __ro_after_init;
 19
 20/*
 21 * ENCLS's memory operands use a fixed segment (DS) and a fixed
 22 * address size based on the mode.  Related prefixes are ignored.
 23 */
 24static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
 25			     int size, int alignment, gva_t *gva)
 26{
 27	struct kvm_segment s;
 28	bool fault;
 29
 30	/* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
 31	*gva = offset;
 32	if (!is_64_bit_mode(vcpu)) {
 33		vmx_get_segment(vcpu, &s, VCPU_SREG_DS);
 34		*gva += s.base;
 35	}
 36
 37	if (!IS_ALIGNED(*gva, alignment)) {
 38		fault = true;
 39	} else if (likely(is_64_bit_mode(vcpu))) {
 40		*gva = vmx_get_untagged_addr(vcpu, *gva, 0);
 41		fault = is_noncanonical_address(*gva, vcpu);
 42	} else {
 43		*gva &= 0xffffffff;
 44		fault = (s.unusable) ||
 45			(s.type != 2 && s.type != 3) ||
 46			(*gva > s.limit) ||
 47			((s.base != 0 || s.limit != 0xffffffff) &&
 48			(((u64)*gva + size - 1) > s.limit + 1));
 49	}
 50	if (fault)
 51		kvm_inject_gp(vcpu, 0);
 52	return fault ? -EINVAL : 0;
 53}
 54
 55static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
 56					 unsigned int size)
 57{
 58	uint64_t data[2] = { addr, size };
 59
 60	__kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data));
 61}
 62
 63static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
 64			unsigned int size)
 65{
 66	if (__copy_from_user(data, (void __user *)hva, size)) {
 67		sgx_handle_emulation_failure(vcpu, hva, size);
 68		return -EFAULT;
 69	}
 70
 71	return 0;
 72}
 73
 74static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write,
 75			  gpa_t *gpa)
 76{
 77	struct x86_exception ex;
 78
 79	if (write)
 80		*gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex);
 81	else
 82		*gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex);
 83
 84	if (*gpa == INVALID_GPA) {
 85		kvm_inject_emulated_page_fault(vcpu, &ex);
 86		return -EFAULT;
 87	}
 88
 89	return 0;
 90}
 91
 92static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva)
 93{
 94	*hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa));
 95	if (kvm_is_error_hva(*hva)) {
 96		sgx_handle_emulation_failure(vcpu, gpa, 1);
 97		return -EFAULT;
 98	}
 99
100	*hva |= gpa & ~PAGE_MASK;
101
102	return 0;
103}
104
105static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
106{
107	struct x86_exception ex;
108
109	/*
110	 * A non-EPCM #PF indicates a bad userspace HVA.  This *should* check
111	 * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
112	 * but the error code isn't (yet) plumbed through the ENCLS helpers.
113	 */
114	if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
115		kvm_prepare_emulation_failure_exit(vcpu);
116		return 0;
117	}
118
119	/*
120	 * If the guest thinks it's running on SGX2 hardware, inject an SGX
121	 * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
122	 * #PF on SGX2).  The assumption is that EPCM faults are much more
123	 * likely than a bad userspace address.
124	 */
125	if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
126	    guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
127		memset(&ex, 0, sizeof(ex));
128		ex.vector = PF_VECTOR;
129		ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
130				PFERR_SGX_MASK;
131		ex.address = gva;
132		ex.error_code_valid = true;
133		ex.nested_page_fault = false;
134		kvm_inject_emulated_page_fault(vcpu, &ex);
135	} else {
136		kvm_inject_gp(vcpu, 0);
137	}
138	return 1;
139}
140
141static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
142				  struct sgx_pageinfo *pageinfo,
143				  unsigned long secs_hva,
144				  gva_t secs_gva)
145{
146	struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents;
147	struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
148	u64 attributes, xfrm, size;
149	u32 miscselect;
150	u8 max_size_log2;
151	int trapnr, ret;
152
153	sgx_12_0 = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
154	sgx_12_1 = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
155	if (!sgx_12_0 || !sgx_12_1) {
156		kvm_prepare_emulation_failure_exit(vcpu);
157		return 0;
158	}
159
160	miscselect = contents->miscselect;
161	attributes = contents->attributes;
162	xfrm = contents->xfrm;
163	size = contents->size;
164
165	/* Enforce restriction of access to the PROVISIONKEY. */
166	if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
167	    (attributes & SGX_ATTR_PROVISIONKEY)) {
168		if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
169			pr_warn_once("SGX PROVISIONKEY advertised but not allowed\n");
170		kvm_inject_gp(vcpu, 0);
171		return 1;
172	}
173
174	/*
175	 * Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM.  Note
176	 * that the allowed XFRM (XFeature Request Mask) isn't strictly bound
177	 * by the supported XCR0.  FP+SSE *must* be set in XFRM, even if XSAVE
178	 * is unsupported, i.e. even if XCR0 itself is completely unsupported.
179	 */
180	if ((u32)miscselect & ~sgx_12_0->ebx ||
181	    (u32)attributes & ~sgx_12_1->eax ||
182	    (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
183	    (u32)xfrm & ~sgx_12_1->ecx ||
184	    (u32)(xfrm >> 32) & ~sgx_12_1->edx ||
185	    xfrm & ~(vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE) ||
186	    (xfrm & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
187		kvm_inject_gp(vcpu, 0);
188		return 1;
189	}
190
191	/* Enforce CPUID restriction on max enclave size. */
192	max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
193							    sgx_12_0->edx;
194	if (size >= BIT_ULL(max_size_log2)) {
195		kvm_inject_gp(vcpu, 0);
196		return 1;
197	}
198
199	/*
200	 * sgx_virt_ecreate() returns:
201	 *  1) 0:	ECREATE was successful
202	 *  2) -EFAULT:	ECREATE was run but faulted, and trapnr was set to the
203	 *		exception number.
204	 *  3) -EINVAL:	access_ok() on @secs_hva failed. This should never
205	 *		happen as KVM checks host addresses at memslot creation.
206	 *		sgx_virt_ecreate() has already warned in this case.
207	 */
208	ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr);
209	if (!ret)
210		return kvm_skip_emulated_instruction(vcpu);
211	if (ret == -EFAULT)
212		return sgx_inject_fault(vcpu, secs_gva, trapnr);
213
214	return ret;
215}
216
217static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
218{
219	gva_t pageinfo_gva, secs_gva;
220	gva_t metadata_gva, contents_gva;
221	gpa_t metadata_gpa, contents_gpa, secs_gpa;
222	unsigned long metadata_hva, contents_hva, secs_hva;
223	struct sgx_pageinfo pageinfo;
224	struct sgx_secs *contents;
225	struct x86_exception ex;
226	int r;
227
228	if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) ||
229	    sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
230		return 1;
231
232	/*
233	 * Copy the PAGEINFO to local memory, its pointers need to be
234	 * translated, i.e. we need to do a deep copy/translate.
235	 */
236	r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
237				sizeof(pageinfo), &ex);
238	if (r == X86EMUL_PROPAGATE_FAULT) {
239		kvm_inject_emulated_page_fault(vcpu, &ex);
240		return 1;
241	} else if (r != X86EMUL_CONTINUE) {
242		sgx_handle_emulation_failure(vcpu, pageinfo_gva,
243					     sizeof(pageinfo));
244		return 0;
245	}
246
247	if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) ||
248	    sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
249			      &contents_gva))
250		return 1;
251
252	/*
253	 * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
254	 * Resume the guest on failure to inject a #PF.
255	 */
256	if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) ||
257	    sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) ||
258	    sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
259		return 1;
260
261	/*
262	 * ...and then to HVA.  The order of accesses isn't architectural, i.e.
263	 * KVM doesn't have to fully process one address at a time.  Exit to
264	 * userspace if a GPA is invalid.
265	 */
266	if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) ||
267	    sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) ||
268	    sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
269		return 0;
270
271	/*
272	 * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
273	 * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
274	 * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
275	 * enforce restriction of access to the PROVISIONKEY.
276	 */
277	contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT);
278	if (!contents)
279		return -ENOMEM;
280
281	/* Exit to userspace if copying from a host userspace address fails. */
282	if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) {
283		free_page((unsigned long)contents);
284		return 0;
285	}
286
287	pageinfo.metadata = metadata_hva;
288	pageinfo.contents = (u64)contents;
289
290	r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva);
291
292	free_page((unsigned long)contents);
293
294	return r;
295}
296
297static int handle_encls_einit(struct kvm_vcpu *vcpu)
298{
299	unsigned long sig_hva, secs_hva, token_hva, rflags;
300	struct vcpu_vmx *vmx = to_vmx(vcpu);
301	gva_t sig_gva, secs_gva, token_gva;
302	gpa_t sig_gpa, secs_gpa, token_gpa;
303	int ret, trapnr;
304
305	if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) ||
306	    sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) ||
307	    sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva))
308		return 1;
309
310	/*
311	 * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA.
312	 * Resume the guest on failure to inject a #PF.
313	 */
314	if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) ||
315	    sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) ||
316	    sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa))
317		return 1;
318
319	/*
320	 * ...and then to HVA.  The order of accesses isn't architectural, i.e.
321	 * KVM doesn't have to fully process one address at a time.  Exit to
322	 * userspace if a GPA is invalid.  Note, all structures are aligned and
323	 * cannot split pages.
324	 */
325	if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) ||
326	    sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) ||
327	    sgx_gpa_to_hva(vcpu, token_gpa, &token_hva))
328		return 0;
329
330	ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva,
331			     (void __user *)secs_hva,
332			     vmx->msr_ia32_sgxlepubkeyhash, &trapnr);
333
334	if (ret == -EFAULT)
335		return sgx_inject_fault(vcpu, secs_gva, trapnr);
336
337	/*
338	 * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva,
339	 * @token_hva or @secs_hva. This should never happen as KVM checks host
340	 * addresses at memslot creation. sgx_virt_einit() has already warned
341	 * in this case, so just return.
342	 */
343	if (ret < 0)
344		return ret;
345
346	rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF |
347					  X86_EFLAGS_AF | X86_EFLAGS_SF |
348					  X86_EFLAGS_OF);
349	if (ret)
350		rflags |= X86_EFLAGS_ZF;
351	else
352		rflags &= ~X86_EFLAGS_ZF;
353	vmx_set_rflags(vcpu, rflags);
354
355	kvm_rax_write(vcpu, ret);
356	return kvm_skip_emulated_instruction(vcpu);
357}
358
359static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
360{
361	/*
362	 * ENCLS generates a #UD if SGX1 isn't supported, i.e. this point will
363	 * be reached if and only if the SGX1 leafs are enabled.
364	 */
365	if (leaf >= ECREATE && leaf <= ETRACK)
366		return true;
367
368	if (leaf >= EAUG && leaf <= EMODT)
369		return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
370
371	return false;
372}
373
374static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu)
375{
376	const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED;
377
378	return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits;
379}
380
381int handle_encls(struct kvm_vcpu *vcpu)
382{
383	u32 leaf = (u32)kvm_rax_read(vcpu);
384
385	if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
386	    !guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
387		kvm_queue_exception(vcpu, UD_VECTOR);
388	} else if (!encls_leaf_enabled_in_guest(vcpu, leaf) ||
389		   !sgx_enabled_in_guest_bios(vcpu) || !is_paging(vcpu)) {
390		kvm_inject_gp(vcpu, 0);
391	} else {
392		if (leaf == ECREATE)
393			return handle_encls_ecreate(vcpu);
394		if (leaf == EINIT)
395			return handle_encls_einit(vcpu);
396		WARN_ONCE(1, "unexpected exit on ENCLS[%u]", leaf);
397		vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
398		vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;
399		return 0;
400	}
401	return 1;
402}
403
404void setup_default_sgx_lepubkeyhash(void)
405{
406	/*
407	 * Use Intel's default value for Skylake hardware if Launch Control is
408	 * not supported, i.e. Intel's hash is hardcoded into silicon, or if
409	 * Launch Control is supported and enabled, i.e. mimic the reset value
410	 * and let the guest write the MSRs at will.  If Launch Control is
411	 * supported but disabled, then use the current MSR values as the hash
412	 * MSRs exist but are read-only (locked and not writable).
413	 */
414	if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) ||
415	    rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) {
416		sgx_pubkey_hash[0] = 0xa6053e051270b7acULL;
417		sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL;
418		sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL;
419		sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL;
420	} else {
421		/* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
422		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
423		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
424		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
425	}
426}
427
428void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu)
429{
430	struct vcpu_vmx *vmx = to_vmx(vcpu);
431
432	memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash,
433	       sizeof(sgx_pubkey_hash));
434}
435
436/*
437 * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM
438 * restrictions if the guest's allowed-1 settings diverge from hardware.
439 */
440static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
441{
442	struct kvm_cpuid_entry2 *guest_cpuid;
443	u32 eax, ebx, ecx, edx;
444
445	if (!vcpu->kvm->arch.sgx_provisioning_allowed)
446		return true;
447
448	guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
449	if (!guest_cpuid)
450		return true;
451
452	cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx);
453	if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
454		return true;
455
456	guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
457	if (!guest_cpuid)
458		return true;
459
460	cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx);
461	if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx ||
462	    guest_cpuid->ecx != ecx || guest_cpuid->edx != edx)
463		return true;
464
465	return false;
466}
467
468void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
469{
470	/*
471	 * There is no software enable bit for SGX that is virtualized by
472	 * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the
473	 * guest (either by the host or by the guest's BIOS) but enabled in the
474	 * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate
475	 * the expected system behavior for ENCLS.
476	 */
477	u64 bitmap = -1ull;
478
479	/* Nothing to do if hardware doesn't support SGX */
480	if (!cpu_has_vmx_encls_vmexit())
481		return;
482
483	if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) &&
484	    sgx_enabled_in_guest_bios(vcpu)) {
485		if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
486			bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
487			if (sgx_intercept_encls_ecreate(vcpu))
488				bitmap |= (1 << ECREATE);
489		}
490
491		if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2))
492			bitmap &= ~GENMASK_ULL(EMODT, EAUG);
493
494		/*
495		 * Trap and execute EINIT if launch control is enabled in the
496		 * host using the guest's values for launch control MSRs, even
497		 * if the guest's values are fixed to hardware default values.
498		 * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing
499		 * the MSRs is extraordinarily expensive.
500		 */
501		if (boot_cpu_has(X86_FEATURE_SGX_LC))
502			bitmap |= (1 << EINIT);
503
504		if (!vmcs12 && is_guest_mode(vcpu))
505			vmcs12 = get_vmcs12(vcpu);
506		if (vmcs12 && nested_cpu_has_encls_exit(vmcs12))
507			bitmap |= vmcs12->encls_exiting_bitmap;
508	}
509	vmcs_write64(ENCLS_EXITING_BITMAP, bitmap);
510}