Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 *  Implement mseal() syscall.
  4 *
  5 *  Copyright (c) 2023,2024 Google, Inc.
  6 *
  7 *  Author: Jeff Xu <jeffxu@chromium.org>
  8 */
  9
 10#include <linux/mempolicy.h>
 11#include <linux/mman.h>
 12#include <linux/mm.h>
 13#include <linux/mm_inline.h>
 14#include <linux/mmu_context.h>
 15#include <linux/syscalls.h>
 16#include <linux/sched.h>
 17#include "internal.h"
 18
 19static inline void set_vma_sealed(struct vm_area_struct *vma)
 20{
 21	vm_flags_set(vma, VM_SEALED);
 22}
 23
 24static bool is_madv_discard(int behavior)
 25{
 26	switch (behavior) {
 27	case MADV_FREE:
 28	case MADV_DONTNEED:
 29	case MADV_DONTNEED_LOCKED:
 30	case MADV_REMOVE:
 31	case MADV_DONTFORK:
 32	case MADV_WIPEONFORK:
 33	case MADV_GUARD_INSTALL:
 34		return true;
 35	}
 36
 37	return false;
 38}
 39
 40static bool is_ro_anon(struct vm_area_struct *vma)
 41{
 42	/* check anonymous mapping. */
 43	if (vma->vm_file || vma->vm_flags & VM_SHARED)
 44		return false;
 45
 46	/*
 47	 * check for non-writable:
 48	 * PROT=RO or PKRU is not writeable.
 49	 */
 50	if (!(vma->vm_flags & VM_WRITE) ||
 51		!arch_vma_access_permitted(vma, true, false, false))
 52		return true;
 53
 54	return false;
 55}
 56
 57/*
 58 * Check if a vma is allowed to be modified by madvise.
 59 */
 60bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior)
 61{
 62	if (!is_madv_discard(behavior))
 63		return true;
 64
 65	if (unlikely(!can_modify_vma(vma) && is_ro_anon(vma)))
 66		return false;
 67
 68	/* Allow by default. */
 69	return true;
 70}
 71
 72static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
 73		struct vm_area_struct **prev, unsigned long start,
 74		unsigned long end, vm_flags_t newflags)
 75{
 76	int ret = 0;
 77	vm_flags_t oldflags = vma->vm_flags;
 78
 79	if (newflags == oldflags)
 80		goto out;
 81
 82	vma = vma_modify_flags(vmi, *prev, vma, start, end, newflags);
 83	if (IS_ERR(vma)) {
 84		ret = PTR_ERR(vma);
 85		goto out;
 86	}
 87
 88	set_vma_sealed(vma);
 89out:
 90	*prev = vma;
 91	return ret;
 92}
 93
 94/*
 95 * Check for do_mseal:
 96 * 1> start is part of a valid vma.
 97 * 2> end is part of a valid vma.
 98 * 3> No gap (unallocated address) between start and end.
 99 * 4> map is sealable.
100 */
101static int check_mm_seal(unsigned long start, unsigned long end)
102{
103	struct vm_area_struct *vma;
104	unsigned long nstart = start;
105
106	VMA_ITERATOR(vmi, current->mm, start);
107
108	/* going through each vma to check. */
109	for_each_vma_range(vmi, vma, end) {
110		if (vma->vm_start > nstart)
111			/* unallocated memory found. */
112			return -ENOMEM;
113
114		if (vma->vm_end >= end)
115			return 0;
116
117		nstart = vma->vm_end;
118	}
119
120	return -ENOMEM;
121}
122
123/*
124 * Apply sealing.
125 */
126static int apply_mm_seal(unsigned long start, unsigned long end)
127{
128	unsigned long nstart;
129	struct vm_area_struct *vma, *prev;
130
131	VMA_ITERATOR(vmi, current->mm, start);
132
133	vma = vma_iter_load(&vmi);
134	/*
135	 * Note: check_mm_seal should already checked ENOMEM case.
136	 * so vma should not be null, same for the other ENOMEM cases.
137	 */
138	prev = vma_prev(&vmi);
139	if (start > vma->vm_start)
140		prev = vma;
141
142	nstart = start;
143	for_each_vma_range(vmi, vma, end) {
144		int error;
145		unsigned long tmp;
146		vm_flags_t newflags;
147
148		newflags = vma->vm_flags | VM_SEALED;
149		tmp = vma->vm_end;
150		if (tmp > end)
151			tmp = end;
152		error = mseal_fixup(&vmi, vma, &prev, nstart, tmp, newflags);
153		if (error)
154			return error;
155		nstart = vma_iter_end(&vmi);
156	}
157
158	return 0;
159}
160
161/*
162 * mseal(2) seals the VM's meta data from
163 * selected syscalls.
164 *
165 * addr/len: VM address range.
166 *
167 *  The address range by addr/len must meet:
168 *   start (addr) must be in a valid VMA.
169 *   end (addr + len) must be in a valid VMA.
170 *   no gap (unallocated memory) between start and end.
171 *   start (addr) must be page aligned.
172 *
173 *  len: len will be page aligned implicitly.
174 *
175 *   Below VMA operations are blocked after sealing.
176 *   1> Unmapping, moving to another location, and shrinking
177 *	the size, via munmap() and mremap(), can leave an empty
178 *	space, therefore can be replaced with a VMA with a new
179 *	set of attributes.
180 *   2> Moving or expanding a different vma into the current location,
181 *	via mremap().
182 *   3> Modifying a VMA via mmap(MAP_FIXED).
183 *   4> Size expansion, via mremap(), does not appear to pose any
184 *	specific risks to sealed VMAs. It is included anyway because
185 *	the use case is unclear. In any case, users can rely on
186 *	merging to expand a sealed VMA.
187 *   5> mprotect and pkey_mprotect.
188 *   6> Some destructive madvice() behavior (e.g. MADV_DONTNEED)
189 *      for anonymous memory, when users don't have write permission to the
190 *	memory. Those behaviors can alter region contents by discarding pages,
191 *	effectively a memset(0) for anonymous memory.
192 *
193 *  flags: reserved.
194 *
195 * return values:
196 *  zero: success.
197 *  -EINVAL:
198 *   invalid input flags.
199 *   start address is not page aligned.
200 *   Address arange (start + len) overflow.
201 *  -ENOMEM:
202 *   addr is not a valid address (not allocated).
203 *   end (start + len) is not a valid address.
204 *   a gap (unallocated memory) between start and end.
205 *  -EPERM:
206 *  - In 32 bit architecture, sealing is not supported.
207 * Note:
208 *  user can call mseal(2) multiple times, adding a seal on an
209 *  already sealed memory is a no-action (no error).
210 *
211 *  unseal() is not supported.
212 */
213int do_mseal(unsigned long start, size_t len_in, unsigned long flags)
214{
215	size_t len;
216	int ret = 0;
217	unsigned long end;
218	struct mm_struct *mm = current->mm;
219
220	ret = can_do_mseal(flags);
221	if (ret)
222		return ret;
223
224	start = untagged_addr(start);
225	if (!PAGE_ALIGNED(start))
226		return -EINVAL;
227
228	len = PAGE_ALIGN(len_in);
229	/* Check to see whether len was rounded up from small -ve to zero. */
230	if (len_in && !len)
231		return -EINVAL;
232
233	end = start + len;
234	if (end < start)
235		return -EINVAL;
236
237	if (end == start)
238		return 0;
239
240	if (mmap_write_lock_killable(mm))
241		return -EINTR;
242
243	/*
244	 * First pass, this helps to avoid
245	 * partial sealing in case of error in input address range,
246	 * e.g. ENOMEM error.
247	 */
248	ret = check_mm_seal(start, end);
249	if (ret)
250		goto out;
251
252	/*
253	 * Second pass, this should success, unless there are errors
254	 * from vma_modify_flags, e.g. merge/split error, or process
255	 * reaching the max supported VMAs, however, those cases shall
256	 * be rare.
257	 */
258	ret = apply_mm_seal(start, end);
259
260out:
261	mmap_write_unlock(current->mm);
262	return ret;
263}
264
265SYSCALL_DEFINE3(mseal, unsigned long, start, size_t, len, unsigned long,
266		flags)
267{
268	return do_mseal(start, len, flags);
269}