Linux Audio

Check our new training course

Embedded Linux training

Mar 31-Apr 8, 2025
Register
Loading...
Note: File does not exist in v5.4.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Device driver to expose SGX enclave memory to KVM guests.
  4 *
  5 * Copyright(c) 2021 Intel Corporation.
  6 */
  7
  8#include <linux/miscdevice.h>
  9#include <linux/mm.h>
 10#include <linux/mman.h>
 11#include <linux/sched/mm.h>
 12#include <linux/sched/signal.h>
 13#include <linux/slab.h>
 14#include <linux/xarray.h>
 15#include <asm/sgx.h>
 16#include <uapi/asm/sgx.h>
 17
 18#include "encls.h"
 19#include "sgx.h"
 20
 21struct sgx_vepc {
 22	struct xarray page_array;
 23	struct mutex lock;
 24};
 25
 26/*
 27 * Temporary SECS pages that cannot be EREMOVE'd due to having child in other
 28 * virtual EPC instances, and the lock to protect it.
 29 */
 30static struct mutex zombie_secs_pages_lock;
 31static struct list_head zombie_secs_pages;
 32
 33static int __sgx_vepc_fault(struct sgx_vepc *vepc,
 34			    struct vm_area_struct *vma, unsigned long addr)
 35{
 36	struct sgx_epc_page *epc_page;
 37	unsigned long index, pfn;
 38	int ret;
 39
 40	WARN_ON(!mutex_is_locked(&vepc->lock));
 41
 42	/* Calculate index of EPC page in virtual EPC's page_array */
 43	index = vma->vm_pgoff + PFN_DOWN(addr - vma->vm_start);
 44
 45	epc_page = xa_load(&vepc->page_array, index);
 46	if (epc_page)
 47		return 0;
 48
 49	epc_page = sgx_alloc_epc_page(vepc, false);
 50	if (IS_ERR(epc_page))
 51		return PTR_ERR(epc_page);
 52
 53	ret = xa_err(xa_store(&vepc->page_array, index, epc_page, GFP_KERNEL));
 54	if (ret)
 55		goto err_free;
 56
 57	pfn = PFN_DOWN(sgx_get_epc_phys_addr(epc_page));
 58
 59	ret = vmf_insert_pfn(vma, addr, pfn);
 60	if (ret != VM_FAULT_NOPAGE) {
 61		ret = -EFAULT;
 62		goto err_delete;
 63	}
 64
 65	return 0;
 66
 67err_delete:
 68	xa_erase(&vepc->page_array, index);
 69err_free:
 70	sgx_free_epc_page(epc_page);
 71	return ret;
 72}
 73
 74static vm_fault_t sgx_vepc_fault(struct vm_fault *vmf)
 75{
 76	struct vm_area_struct *vma = vmf->vma;
 77	struct sgx_vepc *vepc = vma->vm_private_data;
 78	int ret;
 79
 80	mutex_lock(&vepc->lock);
 81	ret = __sgx_vepc_fault(vepc, vma, vmf->address);
 82	mutex_unlock(&vepc->lock);
 83
 84	if (!ret)
 85		return VM_FAULT_NOPAGE;
 86
 87	if (ret == -EBUSY && (vmf->flags & FAULT_FLAG_ALLOW_RETRY)) {
 88		mmap_read_unlock(vma->vm_mm);
 89		return VM_FAULT_RETRY;
 90	}
 91
 92	return VM_FAULT_SIGBUS;
 93}
 94
 95static const struct vm_operations_struct sgx_vepc_vm_ops = {
 96	.fault = sgx_vepc_fault,
 97};
 98
 99static int sgx_vepc_mmap(struct file *file, struct vm_area_struct *vma)
100{
101	struct sgx_vepc *vepc = file->private_data;
102
103	if (!(vma->vm_flags & VM_SHARED))
104		return -EINVAL;
105
106	vma->vm_ops = &sgx_vepc_vm_ops;
107	/* Don't copy VMA in fork() */
108	vma->vm_flags |= VM_PFNMAP | VM_IO | VM_DONTDUMP | VM_DONTCOPY;
109	vma->vm_private_data = vepc;
110
111	return 0;
112}
113
114static int sgx_vepc_remove_page(struct sgx_epc_page *epc_page)
115{
116	/*
117	 * Take a previously guest-owned EPC page and return it to the
118	 * general EPC page pool.
119	 *
120	 * Guests can not be trusted to have left this page in a good
121	 * state, so run EREMOVE on the page unconditionally.  In the
122	 * case that a guest properly EREMOVE'd this page, a superfluous
123	 * EREMOVE is harmless.
124	 */
125	return __eremove(sgx_get_epc_virt_addr(epc_page));
126}
127
128static int sgx_vepc_free_page(struct sgx_epc_page *epc_page)
129{
130	int ret = sgx_vepc_remove_page(epc_page);
131	if (ret) {
132		/*
133		 * Only SGX_CHILD_PRESENT is expected, which is because of
134		 * EREMOVE'ing an SECS still with child, in which case it can
135		 * be handled by EREMOVE'ing the SECS again after all pages in
136		 * virtual EPC have been EREMOVE'd. See comments in below in
137		 * sgx_vepc_release().
138		 *
139		 * The user of virtual EPC (KVM) needs to guarantee there's no
140		 * logical processor is still running in the enclave in guest,
141		 * otherwise EREMOVE will get SGX_ENCLAVE_ACT which cannot be
142		 * handled here.
143		 */
144		WARN_ONCE(ret != SGX_CHILD_PRESENT, EREMOVE_ERROR_MESSAGE,
145			  ret, ret);
146		return ret;
147	}
148
149	sgx_free_epc_page(epc_page);
150	return 0;
151}
152
153static long sgx_vepc_remove_all(struct sgx_vepc *vepc)
154{
155	struct sgx_epc_page *entry;
156	unsigned long index;
157	long failures = 0;
158
159	xa_for_each(&vepc->page_array, index, entry) {
160		int ret = sgx_vepc_remove_page(entry);
161		if (ret) {
162			if (ret == SGX_CHILD_PRESENT) {
163				/* The page is a SECS, userspace will retry.  */
164				failures++;
165			} else {
166				/*
167				 * Report errors due to #GP or SGX_ENCLAVE_ACT; do not
168				 * WARN, as userspace can induce said failures by
169				 * calling the ioctl concurrently on multiple vEPCs or
170				 * while one or more CPUs is running the enclave.  Only
171				 * a #PF on EREMOVE indicates a kernel/hardware issue.
172				 */
173				WARN_ON_ONCE(encls_faulted(ret) &&
174					     ENCLS_TRAPNR(ret) != X86_TRAP_GP);
175				return -EBUSY;
176			}
177		}
178		cond_resched();
179	}
180
181	/*
182	 * Return the number of SECS pages that failed to be removed, so
183	 * userspace knows that it has to retry.
184	 */
185	return failures;
186}
187
188static int sgx_vepc_release(struct inode *inode, struct file *file)
189{
190	struct sgx_vepc *vepc = file->private_data;
191	struct sgx_epc_page *epc_page, *tmp, *entry;
192	unsigned long index;
193
194	LIST_HEAD(secs_pages);
195
196	xa_for_each(&vepc->page_array, index, entry) {
197		/*
198		 * Remove all normal, child pages.  sgx_vepc_free_page()
199		 * will fail if EREMOVE fails, but this is OK and expected on
200		 * SECS pages.  Those can only be EREMOVE'd *after* all their
201		 * child pages. Retries below will clean them up.
202		 */
203		if (sgx_vepc_free_page(entry))
204			continue;
205
206		xa_erase(&vepc->page_array, index);
207	}
208
209	/*
210	 * Retry EREMOVE'ing pages.  This will clean up any SECS pages that
211	 * only had children in this 'epc' area.
212	 */
213	xa_for_each(&vepc->page_array, index, entry) {
214		epc_page = entry;
215		/*
216		 * An EREMOVE failure here means that the SECS page still
217		 * has children.  But, since all children in this 'sgx_vepc'
218		 * have been removed, the SECS page must have a child on
219		 * another instance.
220		 */
221		if (sgx_vepc_free_page(epc_page))
222			list_add_tail(&epc_page->list, &secs_pages);
223
224		xa_erase(&vepc->page_array, index);
225	}
226
227	/*
228	 * SECS pages are "pinned" by child pages, and "unpinned" once all
229	 * children have been EREMOVE'd.  A child page in this instance
230	 * may have pinned an SECS page encountered in an earlier release(),
231	 * creating a zombie.  Since some children were EREMOVE'd above,
232	 * try to EREMOVE all zombies in the hopes that one was unpinned.
233	 */
234	mutex_lock(&zombie_secs_pages_lock);
235	list_for_each_entry_safe(epc_page, tmp, &zombie_secs_pages, list) {
236		/*
237		 * Speculatively remove the page from the list of zombies,
238		 * if the page is successfully EREMOVE'd it will be added to
239		 * the list of free pages.  If EREMOVE fails, throw the page
240		 * on the local list, which will be spliced on at the end.
241		 */
242		list_del(&epc_page->list);
243
244		if (sgx_vepc_free_page(epc_page))
245			list_add_tail(&epc_page->list, &secs_pages);
246	}
247
248	if (!list_empty(&secs_pages))
249		list_splice_tail(&secs_pages, &zombie_secs_pages);
250	mutex_unlock(&zombie_secs_pages_lock);
251
252	xa_destroy(&vepc->page_array);
253	kfree(vepc);
254
255	return 0;
256}
257
258static int sgx_vepc_open(struct inode *inode, struct file *file)
259{
260	struct sgx_vepc *vepc;
261
262	vepc = kzalloc(sizeof(struct sgx_vepc), GFP_KERNEL);
263	if (!vepc)
264		return -ENOMEM;
265	mutex_init(&vepc->lock);
266	xa_init(&vepc->page_array);
267
268	file->private_data = vepc;
269
270	return 0;
271}
272
273static long sgx_vepc_ioctl(struct file *file,
274			   unsigned int cmd, unsigned long arg)
275{
276	struct sgx_vepc *vepc = file->private_data;
277
278	switch (cmd) {
279	case SGX_IOC_VEPC_REMOVE_ALL:
280		if (arg)
281			return -EINVAL;
282		return sgx_vepc_remove_all(vepc);
283
284	default:
285		return -ENOTTY;
286	}
287}
288
289static const struct file_operations sgx_vepc_fops = {
290	.owner		= THIS_MODULE,
291	.open		= sgx_vepc_open,
292	.unlocked_ioctl	= sgx_vepc_ioctl,
293	.compat_ioctl	= sgx_vepc_ioctl,
294	.release	= sgx_vepc_release,
295	.mmap		= sgx_vepc_mmap,
296};
297
298static struct miscdevice sgx_vepc_dev = {
299	.minor		= MISC_DYNAMIC_MINOR,
300	.name		= "sgx_vepc",
301	.nodename	= "sgx_vepc",
302	.fops		= &sgx_vepc_fops,
303};
304
305int __init sgx_vepc_init(void)
306{
307	/* SGX virtualization requires KVM to work */
308	if (!cpu_feature_enabled(X86_FEATURE_VMX))
309		return -ENODEV;
310
311	INIT_LIST_HEAD(&zombie_secs_pages);
312	mutex_init(&zombie_secs_pages_lock);
313
314	return misc_register(&sgx_vepc_dev);
315}
316
317/**
318 * sgx_virt_ecreate() - Run ECREATE on behalf of guest
319 * @pageinfo:	Pointer to PAGEINFO structure
320 * @secs:	Userspace pointer to SECS page
321 * @trapnr:	trap number injected to guest in case of ECREATE error
322 *
323 * Run ECREATE on behalf of guest after KVM traps ECREATE for the purpose
324 * of enforcing policies of guest's enclaves, and return the trap number
325 * which should be injected to guest in case of any ECREATE error.
326 *
327 * Return:
328 * -  0:	ECREATE was successful.
329 * - <0:	on error.
330 */
331int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
332		     int *trapnr)
333{
334	int ret;
335
336	/*
337	 * @secs is an untrusted, userspace-provided address.  It comes from
338	 * KVM and is assumed to be a valid pointer which points somewhere in
339	 * userspace.  This can fault and call SGX or other fault handlers when
340	 * userspace mapping @secs doesn't exist.
341	 *
342	 * Add a WARN() to make sure @secs is already valid userspace pointer
343	 * from caller (KVM), who should already have handled invalid pointer
344	 * case (for instance, made by malicious guest).  All other checks,
345	 * such as alignment of @secs, are deferred to ENCLS itself.
346	 */
347	if (WARN_ON_ONCE(!access_ok(secs, PAGE_SIZE)))
348		return -EINVAL;
349
350	__uaccess_begin();
351	ret = __ecreate(pageinfo, (void *)secs);
352	__uaccess_end();
353
354	if (encls_faulted(ret)) {
355		*trapnr = ENCLS_TRAPNR(ret);
356		return -EFAULT;
357	}
358
359	/* ECREATE doesn't return an error code, it faults or succeeds. */
360	WARN_ON_ONCE(ret);
361	return 0;
362}
363EXPORT_SYMBOL_GPL(sgx_virt_ecreate);
364
365static int __sgx_virt_einit(void __user *sigstruct, void __user *token,
366			    void __user *secs)
367{
368	int ret;
369
370	/*
371	 * Make sure all userspace pointers from caller (KVM) are valid.
372	 * All other checks deferred to ENCLS itself.  Also see comment
373	 * for @secs in sgx_virt_ecreate().
374	 */
375#define SGX_EINITTOKEN_SIZE	304
376	if (WARN_ON_ONCE(!access_ok(sigstruct, sizeof(struct sgx_sigstruct)) ||
377			 !access_ok(token, SGX_EINITTOKEN_SIZE) ||
378			 !access_ok(secs, PAGE_SIZE)))
379		return -EINVAL;
380
381	__uaccess_begin();
382	ret = __einit((void *)sigstruct, (void *)token, (void *)secs);
383	__uaccess_end();
384
385	return ret;
386}
387
388/**
389 * sgx_virt_einit() - Run EINIT on behalf of guest
390 * @sigstruct:		Userspace pointer to SIGSTRUCT structure
391 * @token:		Userspace pointer to EINITTOKEN structure
392 * @secs:		Userspace pointer to SECS page
393 * @lepubkeyhash:	Pointer to guest's *virtual* SGX_LEPUBKEYHASH MSR values
394 * @trapnr:		trap number injected to guest in case of EINIT error
395 *
396 * Run EINIT on behalf of guest after KVM traps EINIT. If SGX_LC is available
397 * in host, SGX driver may rewrite the hardware values at wish, therefore KVM
398 * needs to update hardware values to guest's virtual MSR values in order to
399 * ensure EINIT is executed with expected hardware values.
400 *
401 * Return:
402 * -  0:	EINIT was successful.
403 * - <0:	on error.
404 */
405int sgx_virt_einit(void __user *sigstruct, void __user *token,
406		   void __user *secs, u64 *lepubkeyhash, int *trapnr)
407{
408	int ret;
409
410	if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) {
411		ret = __sgx_virt_einit(sigstruct, token, secs);
412	} else {
413		preempt_disable();
414
415		sgx_update_lepubkeyhash(lepubkeyhash);
416
417		ret = __sgx_virt_einit(sigstruct, token, secs);
418		preempt_enable();
419	}
420
421	/* Propagate up the error from the WARN_ON_ONCE in __sgx_virt_einit() */
422	if (ret == -EINVAL)
423		return ret;
424
425	if (encls_faulted(ret)) {
426		*trapnr = ENCLS_TRAPNR(ret);
427		return -EFAULT;
428	}
429
430	return ret;
431}
432EXPORT_SYMBOL_GPL(sgx_virt_einit);