Linux Audio

Check our new training course

Linux kernel drivers training

Mar 31-Apr 9, 2025, special US time zones
Register
Loading...
Note: File does not exist in v6.8.
  1/*
  2 *  Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
  3 *  Copyright 2003 Andi Kleen, SuSE Labs.
  4 *
  5 *  [ NOTE: this mechanism is now deprecated in favor of the vDSO. ]
  6 *
  7 *  Thanks to hpa@transmeta.com for some useful hint.
  8 *  Special thanks to Ingo Molnar for his early experience with
  9 *  a different vsyscall implementation for Linux/IA32 and for the name.
 10 *
 11 *  vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
 12 *  at virtual address -10Mbyte+1024bytes etc... There are at max 4
 13 *  vsyscalls. One vsyscall can reserve more than 1 slot to avoid
 14 *  jumping out of line if necessary. We cannot add more with this
 15 *  mechanism because older kernels won't return -ENOSYS.
 16 *
 17 *  Note: the concept clashes with user mode linux.  UML users should
 18 *  use the vDSO.
 19 */
 20
 21#include <linux/time.h>
 22#include <linux/init.h>
 23#include <linux/kernel.h>
 24#include <linux/timer.h>
 25#include <linux/seqlock.h>
 26#include <linux/jiffies.h>
 27#include <linux/sysctl.h>
 28#include <linux/clocksource.h>
 29#include <linux/getcpu.h>
 30#include <linux/cpu.h>
 31#include <linux/smp.h>
 32#include <linux/notifier.h>
 33#include <linux/syscalls.h>
 34#include <linux/ratelimit.h>
 35
 36#include <asm/vsyscall.h>
 37#include <asm/pgtable.h>
 38#include <asm/compat.h>
 39#include <asm/page.h>
 40#include <asm/unistd.h>
 41#include <asm/fixmap.h>
 42#include <asm/errno.h>
 43#include <asm/io.h>
 44#include <asm/segment.h>
 45#include <asm/desc.h>
 46#include <asm/topology.h>
 47#include <asm/vgtod.h>
 48#include <asm/traps.h>
 49
 50#define CREATE_TRACE_POINTS
 51#include "vsyscall_trace.h"
 52
 53DEFINE_VVAR(int, vgetcpu_mode);
 54DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
 55{
 56	.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
 57};
 58
 59static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE;
 60
 61static int __init vsyscall_setup(char *str)
 62{
 63	if (str) {
 64		if (!strcmp("emulate", str))
 65			vsyscall_mode = EMULATE;
 66		else if (!strcmp("native", str))
 67			vsyscall_mode = NATIVE;
 68		else if (!strcmp("none", str))
 69			vsyscall_mode = NONE;
 70		else
 71			return -EINVAL;
 72
 73		return 0;
 74	}
 75
 76	return -EINVAL;
 77}
 78early_param("vsyscall", vsyscall_setup);
 79
 80void update_vsyscall_tz(void)
 81{
 82	unsigned long flags;
 83
 84	write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
 85	/* sys_tz has changed */
 86	vsyscall_gtod_data.sys_tz = sys_tz;
 87	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
 88}
 89
 90void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
 91			struct clocksource *clock, u32 mult)
 92{
 93	unsigned long flags;
 94
 95	write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
 96
 97	/* copy vsyscall data */
 98	vsyscall_gtod_data.clock.vclock_mode	= clock->archdata.vclock_mode;
 99	vsyscall_gtod_data.clock.cycle_last	= clock->cycle_last;
100	vsyscall_gtod_data.clock.mask		= clock->mask;
101	vsyscall_gtod_data.clock.mult		= mult;
102	vsyscall_gtod_data.clock.shift		= clock->shift;
103	vsyscall_gtod_data.wall_time_sec	= wall_time->tv_sec;
104	vsyscall_gtod_data.wall_time_nsec	= wall_time->tv_nsec;
105	vsyscall_gtod_data.wall_to_monotonic	= *wtm;
106	vsyscall_gtod_data.wall_time_coarse	= __current_kernel_time();
107
108	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
109}
110
111static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
112			      const char *message)
113{
114	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
115	struct task_struct *tsk;
116
117	if (!show_unhandled_signals || !__ratelimit(&rs))
118		return;
119
120	tsk = current;
121
122	printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
123	       level, tsk->comm, task_pid_nr(tsk),
124	       message, regs->ip, regs->cs,
125	       regs->sp, regs->ax, regs->si, regs->di);
126}
127
128static int addr_to_vsyscall_nr(unsigned long addr)
129{
130	int nr;
131
132	if ((addr & ~0xC00UL) != VSYSCALL_START)
133		return -EINVAL;
134
135	nr = (addr & 0xC00UL) >> 10;
136	if (nr >= 3)
137		return -EINVAL;
138
139	return nr;
140}
141
142bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
143{
144	struct task_struct *tsk;
145	unsigned long caller;
146	int vsyscall_nr;
147	long ret;
148
149	/*
150	 * No point in checking CS -- the only way to get here is a user mode
151	 * trap to a high address, which means that we're in 64-bit user code.
152	 */
153
154	WARN_ON_ONCE(address != regs->ip);
155
156	if (vsyscall_mode == NONE) {
157		warn_bad_vsyscall(KERN_INFO, regs,
158				  "vsyscall attempted with vsyscall=none");
159		return false;
160	}
161
162	vsyscall_nr = addr_to_vsyscall_nr(address);
163
164	trace_emulate_vsyscall(vsyscall_nr);
165
166	if (vsyscall_nr < 0) {
167		warn_bad_vsyscall(KERN_WARNING, regs,
168				  "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
169		goto sigsegv;
170	}
171
172	if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
173		warn_bad_vsyscall(KERN_WARNING, regs,
174				  "vsyscall with bad stack (exploit attempt?)");
175		goto sigsegv;
176	}
177
178	tsk = current;
179	if (seccomp_mode(&tsk->seccomp))
180		do_exit(SIGKILL);
181
182	switch (vsyscall_nr) {
183	case 0:
184		ret = sys_gettimeofday(
185			(struct timeval __user *)regs->di,
186			(struct timezone __user *)regs->si);
187		break;
188
189	case 1:
190		ret = sys_time((time_t __user *)regs->di);
191		break;
192
193	case 2:
194		ret = sys_getcpu((unsigned __user *)regs->di,
195				 (unsigned __user *)regs->si,
196				 0);
197		break;
198	}
199
200	if (ret == -EFAULT) {
201		/*
202		 * Bad news -- userspace fed a bad pointer to a vsyscall.
203		 *
204		 * With a real vsyscall, that would have caused SIGSEGV.
205		 * To make writing reliable exploits using the emulated
206		 * vsyscalls harder, generate SIGSEGV here as well.
207		 */
208		warn_bad_vsyscall(KERN_INFO, regs,
209				  "vsyscall fault (exploit attempt?)");
210		goto sigsegv;
211	}
212
213	regs->ax = ret;
214
215	/* Emulate a ret instruction. */
216	regs->ip = caller;
217	regs->sp += 8;
218
219	return true;
220
221sigsegv:
222	force_sig(SIGSEGV, current);
223	return true;
224}
225
226/*
227 * Assume __initcall executes before all user space. Hopefully kmod
228 * doesn't violate that. We'll find out if it does.
229 */
230static void __cpuinit vsyscall_set_cpu(int cpu)
231{
232	unsigned long d;
233	unsigned long node = 0;
234#ifdef CONFIG_NUMA
235	node = cpu_to_node(cpu);
236#endif
237	if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
238		write_rdtscp_aux((node << 12) | cpu);
239
240	/*
241	 * Store cpu number in limit so that it can be loaded quickly
242	 * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node)
243	 */
244	d = 0x0f40000000000ULL;
245	d |= cpu;
246	d |= (node & 0xf) << 12;
247	d |= (node >> 4) << 48;
248
249	write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
250}
251
252static void __cpuinit cpu_vsyscall_init(void *arg)
253{
254	/* preemption should be already off */
255	vsyscall_set_cpu(raw_smp_processor_id());
256}
257
258static int __cpuinit
259cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
260{
261	long cpu = (long)arg;
262
263	if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
264		smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
265
266	return NOTIFY_DONE;
267}
268
269void __init map_vsyscall(void)
270{
271	extern char __vsyscall_page;
272	unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
273	extern char __vvar_page;
274	unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
275
276	__set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
277		     vsyscall_mode == NATIVE
278		     ? PAGE_KERNEL_VSYSCALL
279		     : PAGE_KERNEL_VVAR);
280	BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) !=
281		     (unsigned long)VSYSCALL_START);
282
283	__set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
284	BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) !=
285		     (unsigned long)VVAR_ADDRESS);
286}
287
288static int __init vsyscall_init(void)
289{
290	BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE));
291
292	on_each_cpu(cpu_vsyscall_init, NULL, 1);
293	/* notifier priority > KVM */
294	hotcpu_notifier(cpu_vsyscall_notifier, 30);
295
296	return 0;
297}
298__initcall(vsyscall_init);