Loading...
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright 2007 Andi Kleen, SUSE Labs.
4 *
5 * This contains most of the x86 vDSO kernel-side code.
6 */
7#include <linux/mm.h>
8#include <linux/err.h>
9#include <linux/sched.h>
10#include <linux/sched/task_stack.h>
11#include <linux/slab.h>
12#include <linux/init.h>
13#include <linux/random.h>
14#include <linux/elf.h>
15#include <linux/cpu.h>
16#include <linux/ptrace.h>
17#include <linux/time_namespace.h>
18
19#include <asm/pvclock.h>
20#include <asm/vgtod.h>
21#include <asm/proto.h>
22#include <asm/vdso.h>
23#include <asm/vvar.h>
24#include <asm/tlb.h>
25#include <asm/page.h>
26#include <asm/desc.h>
27#include <asm/cpufeature.h>
28#include <clocksource/hyperv_timer.h>
29
30#undef _ASM_X86_VVAR_H
31#define EMIT_VVAR(name, offset) \
32 const size_t name ## _offset = offset;
33#include <asm/vvar.h>
34
35struct vdso_data *arch_get_vdso_data(void *vvar_page)
36{
37 return (struct vdso_data *)(vvar_page + _vdso_data_offset);
38}
39#undef EMIT_VVAR
40
41unsigned int vclocks_used __read_mostly;
42
43#if defined(CONFIG_X86_64)
44unsigned int __read_mostly vdso64_enabled = 1;
45#endif
46
47void __init init_vdso_image(const struct vdso_image *image)
48{
49 BUG_ON(image->size % PAGE_SIZE != 0);
50
51 apply_alternatives((struct alt_instr *)(image->data + image->alt),
52 (struct alt_instr *)(image->data + image->alt +
53 image->alt_len));
54}
55
56static const struct vm_special_mapping vvar_mapping;
57struct linux_binprm;
58
59static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
60 struct vm_area_struct *vma, struct vm_fault *vmf)
61{
62 const struct vdso_image *image = vma->vm_mm->context.vdso_image;
63
64 if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
65 return VM_FAULT_SIGBUS;
66
67 vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
68 get_page(vmf->page);
69 return 0;
70}
71
72static void vdso_fix_landing(const struct vdso_image *image,
73 struct vm_area_struct *new_vma)
74{
75#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
76 if (in_ia32_syscall() && image == &vdso_image_32) {
77 struct pt_regs *regs = current_pt_regs();
78 unsigned long vdso_land = image->sym_int80_landing_pad;
79 unsigned long old_land_addr = vdso_land +
80 (unsigned long)current->mm->context.vdso;
81
82 /* Fixing userspace landing - look at do_fast_syscall_32 */
83 if (regs->ip == old_land_addr)
84 regs->ip = new_vma->vm_start + vdso_land;
85 }
86#endif
87}
88
89static int vdso_mremap(const struct vm_special_mapping *sm,
90 struct vm_area_struct *new_vma)
91{
92 const struct vdso_image *image = current->mm->context.vdso_image;
93
94 vdso_fix_landing(image, new_vma);
95 current->mm->context.vdso = (void __user *)new_vma->vm_start;
96
97 return 0;
98}
99
100#ifdef CONFIG_TIME_NS
101/*
102 * The vvar page layout depends on whether a task belongs to the root or
103 * non-root time namespace. Whenever a task changes its namespace, the VVAR
104 * page tables are cleared and then they will re-faulted with a
105 * corresponding layout.
106 * See also the comment near timens_setup_vdso_data() for details.
107 */
108int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
109{
110 struct mm_struct *mm = task->mm;
111 struct vm_area_struct *vma;
112 VMA_ITERATOR(vmi, mm, 0);
113
114 mmap_read_lock(mm);
115 for_each_vma(vmi, vma) {
116 unsigned long size = vma->vm_end - vma->vm_start;
117
118 if (vma_is_special_mapping(vma, &vvar_mapping))
119 zap_page_range(vma, vma->vm_start, size);
120 }
121 mmap_read_unlock(mm);
122
123 return 0;
124}
125#endif
126
127static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
128 struct vm_area_struct *vma, struct vm_fault *vmf)
129{
130 const struct vdso_image *image = vma->vm_mm->context.vdso_image;
131 unsigned long pfn;
132 long sym_offset;
133
134 if (!image)
135 return VM_FAULT_SIGBUS;
136
137 sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
138 image->sym_vvar_start;
139
140 /*
141 * Sanity check: a symbol offset of zero means that the page
142 * does not exist for this vdso image, not that the page is at
143 * offset zero relative to the text mapping. This should be
144 * impossible here, because sym_offset should only be zero for
145 * the page past the end of the vvar mapping.
146 */
147 if (sym_offset == 0)
148 return VM_FAULT_SIGBUS;
149
150 if (sym_offset == image->sym_vvar_page) {
151 struct page *timens_page = find_timens_vvar_page(vma);
152
153 pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
154
155 /*
156 * If a task belongs to a time namespace then a namespace
157 * specific VVAR is mapped with the sym_vvar_page offset and
158 * the real VVAR page is mapped with the sym_timens_page
159 * offset.
160 * See also the comment near timens_setup_vdso_data().
161 */
162 if (timens_page) {
163 unsigned long addr;
164 vm_fault_t err;
165
166 /*
167 * Optimization: inside time namespace pre-fault
168 * VVAR page too. As on timens page there are only
169 * offsets for clocks on VVAR, it'll be faulted
170 * shortly by VDSO code.
171 */
172 addr = vmf->address + (image->sym_timens_page - sym_offset);
173 err = vmf_insert_pfn(vma, addr, pfn);
174 if (unlikely(err & VM_FAULT_ERROR))
175 return err;
176
177 pfn = page_to_pfn(timens_page);
178 }
179
180 return vmf_insert_pfn(vma, vmf->address, pfn);
181 } else if (sym_offset == image->sym_pvclock_page) {
182 struct pvclock_vsyscall_time_info *pvti =
183 pvclock_get_pvti_cpu0_va();
184 if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK)) {
185 return vmf_insert_pfn_prot(vma, vmf->address,
186 __pa(pvti) >> PAGE_SHIFT,
187 pgprot_decrypted(vma->vm_page_prot));
188 }
189 } else if (sym_offset == image->sym_hvclock_page) {
190 pfn = hv_get_tsc_pfn();
191
192 if (pfn && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK))
193 return vmf_insert_pfn(vma, vmf->address, pfn);
194 } else if (sym_offset == image->sym_timens_page) {
195 struct page *timens_page = find_timens_vvar_page(vma);
196
197 if (!timens_page)
198 return VM_FAULT_SIGBUS;
199
200 pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
201 return vmf_insert_pfn(vma, vmf->address, pfn);
202 }
203
204 return VM_FAULT_SIGBUS;
205}
206
207static const struct vm_special_mapping vdso_mapping = {
208 .name = "[vdso]",
209 .fault = vdso_fault,
210 .mremap = vdso_mremap,
211};
212static const struct vm_special_mapping vvar_mapping = {
213 .name = "[vvar]",
214 .fault = vvar_fault,
215};
216
217/*
218 * Add vdso and vvar mappings to current process.
219 * @image - blob to map
220 * @addr - request a specific address (zero to map at free addr)
221 */
222static int map_vdso(const struct vdso_image *image, unsigned long addr)
223{
224 struct mm_struct *mm = current->mm;
225 struct vm_area_struct *vma;
226 unsigned long text_start;
227 int ret = 0;
228
229 if (mmap_write_lock_killable(mm))
230 return -EINTR;
231
232 addr = get_unmapped_area(NULL, addr,
233 image->size - image->sym_vvar_start, 0, 0);
234 if (IS_ERR_VALUE(addr)) {
235 ret = addr;
236 goto up_fail;
237 }
238
239 text_start = addr - image->sym_vvar_start;
240
241 /*
242 * MAYWRITE to allow gdb to COW and set breakpoints
243 */
244 vma = _install_special_mapping(mm,
245 text_start,
246 image->size,
247 VM_READ|VM_EXEC|
248 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
249 &vdso_mapping);
250
251 if (IS_ERR(vma)) {
252 ret = PTR_ERR(vma);
253 goto up_fail;
254 }
255
256 vma = _install_special_mapping(mm,
257 addr,
258 -image->sym_vvar_start,
259 VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
260 VM_PFNMAP,
261 &vvar_mapping);
262
263 if (IS_ERR(vma)) {
264 ret = PTR_ERR(vma);
265 do_munmap(mm, text_start, image->size, NULL);
266 } else {
267 current->mm->context.vdso = (void __user *)text_start;
268 current->mm->context.vdso_image = image;
269 }
270
271up_fail:
272 mmap_write_unlock(mm);
273 return ret;
274}
275
276#ifdef CONFIG_X86_64
277/*
278 * Put the vdso above the (randomized) stack with another randomized
279 * offset. This way there is no hole in the middle of address space.
280 * To save memory make sure it is still in the same PTE as the stack
281 * top. This doesn't give that many random bits.
282 *
283 * Note that this algorithm is imperfect: the distribution of the vdso
284 * start address within a PMD is biased toward the end.
285 *
286 * Only used for the 64-bit and x32 vdsos.
287 */
288static unsigned long vdso_addr(unsigned long start, unsigned len)
289{
290 unsigned long addr, end;
291 unsigned offset;
292
293 /*
294 * Round up the start address. It can start out unaligned as a result
295 * of stack start randomization.
296 */
297 start = PAGE_ALIGN(start);
298
299 /* Round the lowest possible end address up to a PMD boundary. */
300 end = (start + len + PMD_SIZE - 1) & PMD_MASK;
301 if (end >= TASK_SIZE_MAX)
302 end = TASK_SIZE_MAX;
303 end -= len;
304
305 if (end > start) {
306 offset = get_random_u32_below(((end - start) >> PAGE_SHIFT) + 1);
307 addr = start + (offset << PAGE_SHIFT);
308 } else {
309 addr = start;
310 }
311
312 /*
313 * Forcibly align the final address in case we have a hardware
314 * issue that requires alignment for performance reasons.
315 */
316 addr = align_vdso_addr(addr);
317
318 return addr;
319}
320
321static int map_vdso_randomized(const struct vdso_image *image)
322{
323 unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start);
324
325 return map_vdso(image, addr);
326}
327#endif
328
329int map_vdso_once(const struct vdso_image *image, unsigned long addr)
330{
331 struct mm_struct *mm = current->mm;
332 struct vm_area_struct *vma;
333 VMA_ITERATOR(vmi, mm, 0);
334
335 mmap_write_lock(mm);
336 /*
337 * Check if we have already mapped vdso blob - fail to prevent
338 * abusing from userspace install_special_mapping, which may
339 * not do accounting and rlimit right.
340 * We could search vma near context.vdso, but it's a slowpath,
341 * so let's explicitly check all VMAs to be completely sure.
342 */
343 for_each_vma(vmi, vma) {
344 if (vma_is_special_mapping(vma, &vdso_mapping) ||
345 vma_is_special_mapping(vma, &vvar_mapping)) {
346 mmap_write_unlock(mm);
347 return -EEXIST;
348 }
349 }
350 mmap_write_unlock(mm);
351
352 return map_vdso(image, addr);
353}
354
355#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
356static int load_vdso32(void)
357{
358 if (vdso32_enabled != 1) /* Other values all mean "disabled" */
359 return 0;
360
361 return map_vdso(&vdso_image_32, 0);
362}
363#endif
364
365#ifdef CONFIG_X86_64
366int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
367{
368 if (!vdso64_enabled)
369 return 0;
370
371 return map_vdso_randomized(&vdso_image_64);
372}
373
374#ifdef CONFIG_COMPAT
375int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
376 int uses_interp, bool x32)
377{
378#ifdef CONFIG_X86_X32_ABI
379 if (x32) {
380 if (!vdso64_enabled)
381 return 0;
382 return map_vdso_randomized(&vdso_image_x32);
383 }
384#endif
385#ifdef CONFIG_IA32_EMULATION
386 return load_vdso32();
387#else
388 return 0;
389#endif
390}
391#endif
392#else
393int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
394{
395 return load_vdso32();
396}
397#endif
398
399bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
400{
401#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
402 const struct vdso_image *image = current->mm->context.vdso_image;
403 unsigned long vdso = (unsigned long) current->mm->context.vdso;
404
405 if (in_ia32_syscall() && image == &vdso_image_32) {
406 if (regs->ip == vdso + image->sym_vdso32_sigreturn_landing_pad ||
407 regs->ip == vdso + image->sym_vdso32_rt_sigreturn_landing_pad)
408 return true;
409 }
410#endif
411 return false;
412}
413
414#ifdef CONFIG_X86_64
415static __init int vdso_setup(char *s)
416{
417 vdso64_enabled = simple_strtoul(s, NULL, 0);
418 return 1;
419}
420__setup("vdso=", vdso_setup);
421
422static int __init init_vdso(void)
423{
424 BUILD_BUG_ON(VDSO_CLOCKMODE_MAX >= 32);
425
426 init_vdso_image(&vdso_image_64);
427
428#ifdef CONFIG_X86_X32_ABI
429 init_vdso_image(&vdso_image_x32);
430#endif
431
432 return 0;
433}
434subsys_initcall(init_vdso);
435#endif /* CONFIG_X86_64 */
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright 2007 Andi Kleen, SUSE Labs.
4 *
5 * This contains most of the x86 vDSO kernel-side code.
6 */
7#include <linux/mm.h>
8#include <linux/err.h>
9#include <linux/sched.h>
10#include <linux/sched/task_stack.h>
11#include <linux/slab.h>
12#include <linux/init.h>
13#include <linux/random.h>
14#include <linux/elf.h>
15#include <linux/cpu.h>
16#include <linux/ptrace.h>
17#include <linux/time_namespace.h>
18
19#include <asm/pvclock.h>
20#include <asm/vgtod.h>
21#include <asm/proto.h>
22#include <asm/vdso.h>
23#include <asm/tlb.h>
24#include <asm/page.h>
25#include <asm/desc.h>
26#include <asm/cpufeature.h>
27#include <asm/vdso/vsyscall.h>
28#include <clocksource/hyperv_timer.h>
29
30struct vdso_data *arch_get_vdso_data(void *vvar_page)
31{
32 return (struct vdso_data *)vvar_page;
33}
34
35static union vdso_data_store vdso_data_store __page_aligned_data;
36struct vdso_data *vdso_data = vdso_data_store.data;
37
38unsigned int vclocks_used __read_mostly;
39
40#if defined(CONFIG_X86_64)
41unsigned int __read_mostly vdso64_enabled = 1;
42#endif
43
44int __init init_vdso_image(const struct vdso_image *image)
45{
46 BUILD_BUG_ON(VDSO_CLOCKMODE_MAX >= 32);
47 BUG_ON(image->size % PAGE_SIZE != 0);
48
49 apply_alternatives((struct alt_instr *)(image->data + image->alt),
50 (struct alt_instr *)(image->data + image->alt +
51 image->alt_len),
52 NULL);
53
54 return 0;
55}
56
57static const struct vm_special_mapping vvar_mapping;
58struct linux_binprm;
59
60static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
61 struct vm_area_struct *vma, struct vm_fault *vmf)
62{
63 const struct vdso_image *image = vma->vm_mm->context.vdso_image;
64
65 if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
66 return VM_FAULT_SIGBUS;
67
68 vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
69 get_page(vmf->page);
70 return 0;
71}
72
73static void vdso_fix_landing(const struct vdso_image *image,
74 struct vm_area_struct *new_vma)
75{
76#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
77 if (in_ia32_syscall() && image == &vdso_image_32) {
78 struct pt_regs *regs = current_pt_regs();
79 unsigned long vdso_land = image->sym_int80_landing_pad;
80 unsigned long old_land_addr = vdso_land +
81 (unsigned long)current->mm->context.vdso;
82
83 /* Fixing userspace landing - look at do_fast_syscall_32 */
84 if (regs->ip == old_land_addr)
85 regs->ip = new_vma->vm_start + vdso_land;
86 }
87#endif
88}
89
90static int vdso_mremap(const struct vm_special_mapping *sm,
91 struct vm_area_struct *new_vma)
92{
93 const struct vdso_image *image = current->mm->context.vdso_image;
94
95 vdso_fix_landing(image, new_vma);
96 current->mm->context.vdso = (void __user *)new_vma->vm_start;
97
98 return 0;
99}
100
101#ifdef CONFIG_TIME_NS
102/*
103 * The vvar page layout depends on whether a task belongs to the root or
104 * non-root time namespace. Whenever a task changes its namespace, the VVAR
105 * page tables are cleared and then they will re-faulted with a
106 * corresponding layout.
107 * See also the comment near timens_setup_vdso_data() for details.
108 */
109int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
110{
111 struct mm_struct *mm = task->mm;
112 struct vm_area_struct *vma;
113 VMA_ITERATOR(vmi, mm, 0);
114
115 mmap_read_lock(mm);
116 for_each_vma(vmi, vma) {
117 if (vma_is_special_mapping(vma, &vvar_mapping))
118 zap_vma_pages(vma);
119 }
120 mmap_read_unlock(mm);
121
122 return 0;
123}
124#endif
125
126static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
127 struct vm_area_struct *vma, struct vm_fault *vmf)
128{
129 const struct vdso_image *image = vma->vm_mm->context.vdso_image;
130 unsigned long pfn;
131 long sym_offset;
132
133 if (!image)
134 return VM_FAULT_SIGBUS;
135
136 sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
137 image->sym_vvar_start;
138
139 /*
140 * Sanity check: a symbol offset of zero means that the page
141 * does not exist for this vdso image, not that the page is at
142 * offset zero relative to the text mapping. This should be
143 * impossible here, because sym_offset should only be zero for
144 * the page past the end of the vvar mapping.
145 */
146 if (sym_offset == 0)
147 return VM_FAULT_SIGBUS;
148
149 if (sym_offset == image->sym_vvar_page) {
150 struct page *timens_page = find_timens_vvar_page(vma);
151
152 pfn = __pa_symbol(vdso_data) >> PAGE_SHIFT;
153
154 /*
155 * If a task belongs to a time namespace then a namespace
156 * specific VVAR is mapped with the sym_vvar_page offset and
157 * the real VVAR page is mapped with the sym_timens_page
158 * offset.
159 * See also the comment near timens_setup_vdso_data().
160 */
161 if (timens_page) {
162 unsigned long addr;
163 vm_fault_t err;
164
165 /*
166 * Optimization: inside time namespace pre-fault
167 * VVAR page too. As on timens page there are only
168 * offsets for clocks on VVAR, it'll be faulted
169 * shortly by VDSO code.
170 */
171 addr = vmf->address + (image->sym_timens_page - sym_offset);
172 err = vmf_insert_pfn(vma, addr, pfn);
173 if (unlikely(err & VM_FAULT_ERROR))
174 return err;
175
176 pfn = page_to_pfn(timens_page);
177 }
178
179 return vmf_insert_pfn(vma, vmf->address, pfn);
180
181 } else if (sym_offset == image->sym_timens_page) {
182 struct page *timens_page = find_timens_vvar_page(vma);
183
184 if (!timens_page)
185 return VM_FAULT_SIGBUS;
186
187 pfn = __pa_symbol(vdso_data) >> PAGE_SHIFT;
188 return vmf_insert_pfn(vma, vmf->address, pfn);
189 }
190
191 return VM_FAULT_SIGBUS;
192}
193
194static vm_fault_t vvar_vclock_fault(const struct vm_special_mapping *sm,
195 struct vm_area_struct *vma, struct vm_fault *vmf)
196{
197 switch (vmf->pgoff) {
198#ifdef CONFIG_PARAVIRT_CLOCK
199 case VDSO_PAGE_PVCLOCK_OFFSET:
200 {
201 struct pvclock_vsyscall_time_info *pvti =
202 pvclock_get_pvti_cpu0_va();
203
204 if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK))
205 return vmf_insert_pfn_prot(vma, vmf->address,
206 __pa(pvti) >> PAGE_SHIFT,
207 pgprot_decrypted(vma->vm_page_prot));
208 break;
209 }
210#endif /* CONFIG_PARAVIRT_CLOCK */
211#ifdef CONFIG_HYPERV_TIMER
212 case VDSO_PAGE_HVCLOCK_OFFSET:
213 {
214 unsigned long pfn = hv_get_tsc_pfn();
215
216 if (pfn && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK))
217 return vmf_insert_pfn(vma, vmf->address, pfn);
218 break;
219 }
220#endif /* CONFIG_HYPERV_TIMER */
221 }
222
223 return VM_FAULT_SIGBUS;
224}
225
226static const struct vm_special_mapping vdso_mapping = {
227 .name = "[vdso]",
228 .fault = vdso_fault,
229 .mremap = vdso_mremap,
230};
231static const struct vm_special_mapping vvar_mapping = {
232 .name = "[vvar]",
233 .fault = vvar_fault,
234};
235static const struct vm_special_mapping vvar_vclock_mapping = {
236 .name = "[vvar_vclock]",
237 .fault = vvar_vclock_fault,
238};
239
240/*
241 * Add vdso and vvar mappings to current process.
242 * @image - blob to map
243 * @addr - request a specific address (zero to map at free addr)
244 */
245static int map_vdso(const struct vdso_image *image, unsigned long addr)
246{
247 struct mm_struct *mm = current->mm;
248 struct vm_area_struct *vma;
249 unsigned long text_start;
250 int ret = 0;
251
252 if (mmap_write_lock_killable(mm))
253 return -EINTR;
254
255 addr = get_unmapped_area(NULL, addr,
256 image->size - image->sym_vvar_start, 0, 0);
257 if (IS_ERR_VALUE(addr)) {
258 ret = addr;
259 goto up_fail;
260 }
261
262 text_start = addr - image->sym_vvar_start;
263
264 /*
265 * MAYWRITE to allow gdb to COW and set breakpoints
266 */
267 vma = _install_special_mapping(mm,
268 text_start,
269 image->size,
270 VM_READ|VM_EXEC|
271 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
272 &vdso_mapping);
273
274 if (IS_ERR(vma)) {
275 ret = PTR_ERR(vma);
276 goto up_fail;
277 }
278
279 vma = _install_special_mapping(mm,
280 addr,
281 (__VVAR_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE,
282 VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
283 VM_PFNMAP,
284 &vvar_mapping);
285
286 if (IS_ERR(vma)) {
287 ret = PTR_ERR(vma);
288 do_munmap(mm, text_start, image->size, NULL);
289 goto up_fail;
290 }
291
292 vma = _install_special_mapping(mm,
293 addr + (__VVAR_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE,
294 VDSO_NR_VCLOCK_PAGES * PAGE_SIZE,
295 VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
296 VM_PFNMAP,
297 &vvar_vclock_mapping);
298
299 if (IS_ERR(vma)) {
300 ret = PTR_ERR(vma);
301 do_munmap(mm, text_start, image->size, NULL);
302 do_munmap(mm, addr, image->size, NULL);
303 goto up_fail;
304 }
305
306 current->mm->context.vdso = (void __user *)text_start;
307 current->mm->context.vdso_image = image;
308
309up_fail:
310 mmap_write_unlock(mm);
311 return ret;
312}
313
314int map_vdso_once(const struct vdso_image *image, unsigned long addr)
315{
316 struct mm_struct *mm = current->mm;
317 struct vm_area_struct *vma;
318 VMA_ITERATOR(vmi, mm, 0);
319
320 mmap_write_lock(mm);
321 /*
322 * Check if we have already mapped vdso blob - fail to prevent
323 * abusing from userspace install_special_mapping, which may
324 * not do accounting and rlimit right.
325 * We could search vma near context.vdso, but it's a slowpath,
326 * so let's explicitly check all VMAs to be completely sure.
327 */
328 for_each_vma(vmi, vma) {
329 if (vma_is_special_mapping(vma, &vdso_mapping) ||
330 vma_is_special_mapping(vma, &vvar_mapping) ||
331 vma_is_special_mapping(vma, &vvar_vclock_mapping)) {
332 mmap_write_unlock(mm);
333 return -EEXIST;
334 }
335 }
336 mmap_write_unlock(mm);
337
338 return map_vdso(image, addr);
339}
340
341#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
342static int load_vdso32(void)
343{
344 if (vdso32_enabled != 1) /* Other values all mean "disabled" */
345 return 0;
346
347 return map_vdso(&vdso_image_32, 0);
348}
349#endif
350
351#ifdef CONFIG_X86_64
352int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
353{
354 if (!vdso64_enabled)
355 return 0;
356
357 return map_vdso(&vdso_image_64, 0);
358}
359
360#ifdef CONFIG_COMPAT
361int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
362 int uses_interp, bool x32)
363{
364#ifdef CONFIG_X86_X32_ABI
365 if (x32) {
366 if (!vdso64_enabled)
367 return 0;
368 return map_vdso(&vdso_image_x32, 0);
369 }
370#endif
371#ifdef CONFIG_IA32_EMULATION
372 return load_vdso32();
373#else
374 return 0;
375#endif
376}
377#endif
378#else
379int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
380{
381 return load_vdso32();
382}
383#endif
384
385bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
386{
387#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
388 const struct vdso_image *image = current->mm->context.vdso_image;
389 unsigned long vdso = (unsigned long) current->mm->context.vdso;
390
391 if (in_ia32_syscall() && image == &vdso_image_32) {
392 if (regs->ip == vdso + image->sym_vdso32_sigreturn_landing_pad ||
393 regs->ip == vdso + image->sym_vdso32_rt_sigreturn_landing_pad)
394 return true;
395 }
396#endif
397 return false;
398}
399
400#ifdef CONFIG_X86_64
401static __init int vdso_setup(char *s)
402{
403 vdso64_enabled = simple_strtoul(s, NULL, 0);
404 return 1;
405}
406__setup("vdso=", vdso_setup);
407#endif /* CONFIG_X86_64 */