Linux Audio

Check our new training course

Linux debugging, profiling, tracing and performance analysis training

Apr 14-17, 2025
Register
Loading...
Note: File does not exist in v4.17.
  1/*
  2 *  Kernel Probes Jump Optimization (Optprobes)
  3 *
  4 * This program is free software; you can redistribute it and/or modify
  5 * it under the terms of the GNU General Public License as published by
  6 * the Free Software Foundation; either version 2 of the License, or
  7 * (at your option) any later version.
  8 *
  9 * This program is distributed in the hope that it will be useful,
 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 * GNU General Public License for more details.
 13 *
 14 * You should have received a copy of the GNU General Public License
 15 * along with this program; if not, write to the Free Software
 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 *
 18 * Copyright (C) IBM Corporation, 2002, 2004
 19 * Copyright (C) Hitachi Ltd., 2012
 20 */
 21#include <linux/kprobes.h>
 22#include <linux/ptrace.h>
 23#include <linux/string.h>
 24#include <linux/slab.h>
 25#include <linux/hardirq.h>
 26#include <linux/preempt.h>
 27#include <linux/module.h>
 28#include <linux/kdebug.h>
 29#include <linux/kallsyms.h>
 30#include <linux/ftrace.h>
 31
 32#include <asm/cacheflush.h>
 33#include <asm/desc.h>
 34#include <asm/pgtable.h>
 35#include <asm/uaccess.h>
 36#include <asm/alternative.h>
 37#include <asm/insn.h>
 38#include <asm/debugreg.h>
 39
 40#include "kprobes-common.h"
 41
 42unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
 43{
 44	struct optimized_kprobe *op;
 45	struct kprobe *kp;
 46	long offs;
 47	int i;
 48
 49	for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
 50		kp = get_kprobe((void *)addr - i);
 51		/* This function only handles jump-optimized kprobe */
 52		if (kp && kprobe_optimized(kp)) {
 53			op = container_of(kp, struct optimized_kprobe, kp);
 54			/* If op->list is not empty, op is under optimizing */
 55			if (list_empty(&op->list))
 56				goto found;
 57		}
 58	}
 59
 60	return addr;
 61found:
 62	/*
 63	 * If the kprobe can be optimized, original bytes which can be
 64	 * overwritten by jump destination address. In this case, original
 65	 * bytes must be recovered from op->optinsn.copied_insn buffer.
 66	 */
 67	memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
 68	if (addr == (unsigned long)kp->addr) {
 69		buf[0] = kp->opcode;
 70		memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
 71	} else {
 72		offs = addr - (unsigned long)kp->addr - 1;
 73		memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
 74	}
 75
 76	return (unsigned long)buf;
 77}
 78
 79/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
 80static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
 81{
 82#ifdef CONFIG_X86_64
 83	*addr++ = 0x48;
 84	*addr++ = 0xbf;
 85#else
 86	*addr++ = 0xb8;
 87#endif
 88	*(unsigned long *)addr = val;
 89}
 90
 91static void __used __kprobes kprobes_optinsn_template_holder(void)
 92{
 93	asm volatile (
 94			".global optprobe_template_entry\n"
 95			"optprobe_template_entry:\n"
 96#ifdef CONFIG_X86_64
 97			/* We don't bother saving the ss register */
 98			"	pushq %rsp\n"
 99			"	pushfq\n"
100			SAVE_REGS_STRING
101			"	movq %rsp, %rsi\n"
102			".global optprobe_template_val\n"
103			"optprobe_template_val:\n"
104			ASM_NOP5
105			ASM_NOP5
106			".global optprobe_template_call\n"
107			"optprobe_template_call:\n"
108			ASM_NOP5
109			/* Move flags to rsp */
110			"	movq 144(%rsp), %rdx\n"
111			"	movq %rdx, 152(%rsp)\n"
112			RESTORE_REGS_STRING
113			/* Skip flags entry */
114			"	addq $8, %rsp\n"
115			"	popfq\n"
116#else /* CONFIG_X86_32 */
117			"	pushf\n"
118			SAVE_REGS_STRING
119			"	movl %esp, %edx\n"
120			".global optprobe_template_val\n"
121			"optprobe_template_val:\n"
122			ASM_NOP5
123			".global optprobe_template_call\n"
124			"optprobe_template_call:\n"
125			ASM_NOP5
126			RESTORE_REGS_STRING
127			"	addl $4, %esp\n"	/* skip cs */
128			"	popf\n"
129#endif
130			".global optprobe_template_end\n"
131			"optprobe_template_end:\n");
132}
133
134#define TMPL_MOVE_IDX \
135	((long)&optprobe_template_val - (long)&optprobe_template_entry)
136#define TMPL_CALL_IDX \
137	((long)&optprobe_template_call - (long)&optprobe_template_entry)
138#define TMPL_END_IDX \
139	((long)&optprobe_template_end - (long)&optprobe_template_entry)
140
141#define INT3_SIZE sizeof(kprobe_opcode_t)
142
143/* Optimized kprobe call back function: called from optinsn */
144static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
145{
146	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
147	unsigned long flags;
148
149	/* This is possible if op is under delayed unoptimizing */
150	if (kprobe_disabled(&op->kp))
151		return;
152
153	local_irq_save(flags);
154	if (kprobe_running()) {
155		kprobes_inc_nmissed_count(&op->kp);
156	} else {
157		/* Save skipped registers */
158#ifdef CONFIG_X86_64
159		regs->cs = __KERNEL_CS;
160#else
161		regs->cs = __KERNEL_CS | get_kernel_rpl();
162		regs->gs = 0;
163#endif
164		regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
165		regs->orig_ax = ~0UL;
166
167		__this_cpu_write(current_kprobe, &op->kp);
168		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
169		opt_pre_handler(&op->kp, regs);
170		__this_cpu_write(current_kprobe, NULL);
171	}
172	local_irq_restore(flags);
173}
174
175static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
176{
177	int len = 0, ret;
178
179	while (len < RELATIVEJUMP_SIZE) {
180		ret = __copy_instruction(dest + len, src + len);
181		if (!ret || !can_boost(dest + len))
182			return -EINVAL;
183		len += ret;
184	}
185	/* Check whether the address range is reserved */
186	if (ftrace_text_reserved(src, src + len - 1) ||
187	    alternatives_text_reserved(src, src + len - 1) ||
188	    jump_label_text_reserved(src, src + len - 1))
189		return -EBUSY;
190
191	return len;
192}
193
194/* Check whether insn is indirect jump */
195static int __kprobes insn_is_indirect_jump(struct insn *insn)
196{
197	return ((insn->opcode.bytes[0] == 0xff &&
198		(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
199		insn->opcode.bytes[0] == 0xea);	/* Segment based jump */
200}
201
202/* Check whether insn jumps into specified address range */
203static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
204{
205	unsigned long target = 0;
206
207	switch (insn->opcode.bytes[0]) {
208	case 0xe0:	/* loopne */
209	case 0xe1:	/* loope */
210	case 0xe2:	/* loop */
211	case 0xe3:	/* jcxz */
212	case 0xe9:	/* near relative jump */
213	case 0xeb:	/* short relative jump */
214		break;
215	case 0x0f:
216		if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
217			break;
218		return 0;
219	default:
220		if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
221			break;
222		return 0;
223	}
224	target = (unsigned long)insn->next_byte + insn->immediate.value;
225
226	return (start <= target && target <= start + len);
227}
228
229/* Decode whole function to ensure any instructions don't jump into target */
230static int __kprobes can_optimize(unsigned long paddr)
231{
232	unsigned long addr, size = 0, offset = 0;
233	struct insn insn;
234	kprobe_opcode_t buf[MAX_INSN_SIZE];
235
236	/* Lookup symbol including addr */
237	if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
238		return 0;
239
240	/*
241	 * Do not optimize in the entry code due to the unstable
242	 * stack handling.
243	 */
244	if ((paddr >= (unsigned long)__entry_text_start) &&
245	    (paddr <  (unsigned long)__entry_text_end))
246		return 0;
247
248	/* Check there is enough space for a relative jump. */
249	if (size - offset < RELATIVEJUMP_SIZE)
250		return 0;
251
252	/* Decode instructions */
253	addr = paddr - offset;
254	while (addr < paddr - offset + size) { /* Decode until function end */
255		if (search_exception_tables(addr))
256			/*
257			 * Since some fixup code will jumps into this function,
258			 * we can't optimize kprobe in this function.
259			 */
260			return 0;
261		kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr));
262		insn_get_length(&insn);
263		/* Another subsystem puts a breakpoint */
264		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
265			return 0;
266		/* Recover address */
267		insn.kaddr = (void *)addr;
268		insn.next_byte = (void *)(addr + insn.length);
269		/* Check any instructions don't jump into target */
270		if (insn_is_indirect_jump(&insn) ||
271		    insn_jump_into_range(&insn, paddr + INT3_SIZE,
272					 RELATIVE_ADDR_SIZE))
273			return 0;
274		addr += insn.length;
275	}
276
277	return 1;
278}
279
280/* Check optimized_kprobe can actually be optimized. */
281int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
282{
283	int i;
284	struct kprobe *p;
285
286	for (i = 1; i < op->optinsn.size; i++) {
287		p = get_kprobe(op->kp.addr + i);
288		if (p && !kprobe_disabled(p))
289			return -EEXIST;
290	}
291
292	return 0;
293}
294
295/* Check the addr is within the optimized instructions. */
296int __kprobes
297arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
298{
299	return ((unsigned long)op->kp.addr <= addr &&
300		(unsigned long)op->kp.addr + op->optinsn.size > addr);
301}
302
303/* Free optimized instruction slot */
304static __kprobes
305void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
306{
307	if (op->optinsn.insn) {
308		free_optinsn_slot(op->optinsn.insn, dirty);
309		op->optinsn.insn = NULL;
310		op->optinsn.size = 0;
311	}
312}
313
314void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
315{
316	__arch_remove_optimized_kprobe(op, 1);
317}
318
319/*
320 * Copy replacing target instructions
321 * Target instructions MUST be relocatable (checked inside)
322 * This is called when new aggr(opt)probe is allocated or reused.
323 */
324int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
325{
326	u8 *buf;
327	int ret;
328	long rel;
329
330	if (!can_optimize((unsigned long)op->kp.addr))
331		return -EILSEQ;
332
333	op->optinsn.insn = get_optinsn_slot();
334	if (!op->optinsn.insn)
335		return -ENOMEM;
336
337	/*
338	 * Verify if the address gap is in 2GB range, because this uses
339	 * a relative jump.
340	 */
341	rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
342	if (abs(rel) > 0x7fffffff)
343		return -ERANGE;
344
345	buf = (u8 *)op->optinsn.insn;
346
347	/* Copy instructions into the out-of-line buffer */
348	ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
349	if (ret < 0) {
350		__arch_remove_optimized_kprobe(op, 0);
351		return ret;
352	}
353	op->optinsn.size = ret;
354
355	/* Copy arch-dep-instance from template */
356	memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
357
358	/* Set probe information */
359	synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
360
361	/* Set probe function call */
362	synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
363
364	/* Set returning jmp instruction at the tail of out-of-line buffer */
365	synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
366			   (u8 *)op->kp.addr + op->optinsn.size);
367
368	flush_icache_range((unsigned long) buf,
369			   (unsigned long) buf + TMPL_END_IDX +
370			   op->optinsn.size + RELATIVEJUMP_SIZE);
371	return 0;
372}
373
374#define MAX_OPTIMIZE_PROBES 256
375static struct text_poke_param *jump_poke_params;
376static struct jump_poke_buffer {
377	u8 buf[RELATIVEJUMP_SIZE];
378} *jump_poke_bufs;
379
380static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
381					    u8 *insn_buf,
382					    struct optimized_kprobe *op)
383{
384	s32 rel = (s32)((long)op->optinsn.insn -
385			((long)op->kp.addr + RELATIVEJUMP_SIZE));
386
387	/* Backup instructions which will be replaced by jump address */
388	memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
389	       RELATIVE_ADDR_SIZE);
390
391	insn_buf[0] = RELATIVEJUMP_OPCODE;
392	*(s32 *)(&insn_buf[1]) = rel;
393
394	tprm->addr = op->kp.addr;
395	tprm->opcode = insn_buf;
396	tprm->len = RELATIVEJUMP_SIZE;
397}
398
399/*
400 * Replace breakpoints (int3) with relative jumps.
401 * Caller must call with locking kprobe_mutex and text_mutex.
402 */
403void __kprobes arch_optimize_kprobes(struct list_head *oplist)
404{
405	struct optimized_kprobe *op, *tmp;
406	int c = 0;
407
408	list_for_each_entry_safe(op, tmp, oplist, list) {
409		WARN_ON(kprobe_disabled(&op->kp));
410		/* Setup param */
411		setup_optimize_kprobe(&jump_poke_params[c],
412				      jump_poke_bufs[c].buf, op);
413		list_del_init(&op->list);
414		if (++c >= MAX_OPTIMIZE_PROBES)
415			break;
416	}
417
418	/*
419	 * text_poke_smp doesn't support NMI/MCE code modifying.
420	 * However, since kprobes itself also doesn't support NMI/MCE
421	 * code probing, it's not a problem.
422	 */
423	text_poke_smp_batch(jump_poke_params, c);
424}
425
426static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
427					      u8 *insn_buf,
428					      struct optimized_kprobe *op)
429{
430	/* Set int3 to first byte for kprobes */
431	insn_buf[0] = BREAKPOINT_INSTRUCTION;
432	memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
433
434	tprm->addr = op->kp.addr;
435	tprm->opcode = insn_buf;
436	tprm->len = RELATIVEJUMP_SIZE;
437}
438
439/*
440 * Recover original instructions and breakpoints from relative jumps.
441 * Caller must call with locking kprobe_mutex.
442 */
443extern void arch_unoptimize_kprobes(struct list_head *oplist,
444				    struct list_head *done_list)
445{
446	struct optimized_kprobe *op, *tmp;
447	int c = 0;
448
449	list_for_each_entry_safe(op, tmp, oplist, list) {
450		/* Setup param */
451		setup_unoptimize_kprobe(&jump_poke_params[c],
452					jump_poke_bufs[c].buf, op);
453		list_move(&op->list, done_list);
454		if (++c >= MAX_OPTIMIZE_PROBES)
455			break;
456	}
457
458	/*
459	 * text_poke_smp doesn't support NMI/MCE code modifying.
460	 * However, since kprobes itself also doesn't support NMI/MCE
461	 * code probing, it's not a problem.
462	 */
463	text_poke_smp_batch(jump_poke_params, c);
464}
465
466/* Replace a relative jump with a breakpoint (int3).  */
467void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
468{
469	u8 buf[RELATIVEJUMP_SIZE];
470
471	/* Set int3 to first byte for kprobes */
472	buf[0] = BREAKPOINT_INSTRUCTION;
473	memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
474	text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
475}
476
477int  __kprobes
478setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
479{
480	struct optimized_kprobe *op;
481
482	if (p->flags & KPROBE_FLAG_OPTIMIZED) {
483		/* This kprobe is really able to run optimized path. */
484		op = container_of(p, struct optimized_kprobe, kp);
485		/* Detour through copied instructions */
486		regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
487		if (!reenter)
488			reset_current_kprobe();
489		preempt_enable_no_resched();
490		return 1;
491	}
492	return 0;
493}
494
495int __kprobes arch_init_optprobes(void)
496{
497	/* Allocate code buffer and parameter array */
498	jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
499				 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
500	if (!jump_poke_bufs)
501		return -ENOMEM;
502
503	jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
504				   MAX_OPTIMIZE_PROBES, GFP_KERNEL);
505	if (!jump_poke_params) {
506		kfree(jump_poke_bufs);
507		jump_poke_bufs = NULL;
508		return -ENOMEM;
509	}
510
511	return 0;
512}