Linux Audio

Check our new training course

Loading...
v5.4
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 *  Kernel Probes Jump Optimization (Optprobes)
  4 *
  5 * Copyright (C) IBM Corporation, 2002, 2004
  6 * Copyright (C) Hitachi Ltd., 2012
  7 */
  8#include <linux/kprobes.h>
 
  9#include <linux/ptrace.h>
 10#include <linux/string.h>
 11#include <linux/slab.h>
 12#include <linux/hardirq.h>
 13#include <linux/preempt.h>
 14#include <linux/extable.h>
 15#include <linux/kdebug.h>
 16#include <linux/kallsyms.h>
 
 17#include <linux/ftrace.h>
 18#include <linux/frame.h>
 
 
 19
 20#include <asm/text-patching.h>
 21#include <asm/cacheflush.h>
 22#include <asm/desc.h>
 23#include <asm/pgtable.h>
 24#include <linux/uaccess.h>
 25#include <asm/alternative.h>
 26#include <asm/insn.h>
 27#include <asm/debugreg.h>
 28#include <asm/set_memory.h>
 29#include <asm/sections.h>
 30#include <asm/nospec-branch.h>
 31
 32#include "common.h"
 33
 34unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
 35{
 36	struct optimized_kprobe *op;
 37	struct kprobe *kp;
 38	long offs;
 39	int i;
 40
 41	for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
 42		kp = get_kprobe((void *)addr - i);
 43		/* This function only handles jump-optimized kprobe */
 44		if (kp && kprobe_optimized(kp)) {
 45			op = container_of(kp, struct optimized_kprobe, kp);
 46			/* If op->list is not empty, op is under optimizing */
 47			if (list_empty(&op->list))
 48				goto found;
 49		}
 50	}
 51
 52	return addr;
 53found:
 54	/*
 55	 * If the kprobe can be optimized, original bytes which can be
 56	 * overwritten by jump destination address. In this case, original
 57	 * bytes must be recovered from op->optinsn.copied_insn buffer.
 58	 */
 59	if (probe_kernel_read(buf, (void *)addr,
 60		MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
 61		return 0UL;
 62
 63	if (addr == (unsigned long)kp->addr) {
 64		buf[0] = kp->opcode;
 65		memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
 66	} else {
 67		offs = addr - (unsigned long)kp->addr - 1;
 68		memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
 69	}
 70
 71	return (unsigned long)buf;
 72}
 73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 74/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
 75static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
 76{
 77#ifdef CONFIG_X86_64
 78	*addr++ = 0x48;
 79	*addr++ = 0xbf;
 80#else
 81	*addr++ = 0xb8;
 82#endif
 83	*(unsigned long *)addr = val;
 84}
 85
 86asm (
 87			".pushsection .rodata\n"
 88			"optprobe_template_func:\n"
 89			".global optprobe_template_entry\n"
 90			"optprobe_template_entry:\n"
 91#ifdef CONFIG_X86_64
 92			/* We don't bother saving the ss register */
 
 93			"	pushq %rsp\n"
 94			"	pushfq\n"
 
 
 
 95			SAVE_REGS_STRING
 96			"	movq %rsp, %rsi\n"
 97			".global optprobe_template_val\n"
 98			"optprobe_template_val:\n"
 99			ASM_NOP5
100			ASM_NOP5
101			".global optprobe_template_call\n"
102			"optprobe_template_call:\n"
103			ASM_NOP5
104			/* Move flags to rsp */
105			"	movq 18*8(%rsp), %rdx\n"
106			"	movq %rdx, 19*8(%rsp)\n"
107			RESTORE_REGS_STRING
108			/* Skip flags entry */
109			"	addq $8, %rsp\n"
 
110			"	popfq\n"
111#else /* CONFIG_X86_32 */
 
 
112			"	pushl %esp\n"
113			"	pushfl\n"
 
 
 
114			SAVE_REGS_STRING
115			"	movl %esp, %edx\n"
116			".global optprobe_template_val\n"
117			"optprobe_template_val:\n"
118			ASM_NOP5
119			".global optprobe_template_call\n"
120			"optprobe_template_call:\n"
121			ASM_NOP5
122			/* Move flags into esp */
123			"	movl 14*4(%esp), %edx\n"
124			"	movl %edx, 15*4(%esp)\n"
125			RESTORE_REGS_STRING
126			/* Skip flags entry */
127			"	addl $4, %esp\n"
 
128			"	popfl\n"
129#endif
130			".global optprobe_template_end\n"
131			"optprobe_template_end:\n"
132			".popsection\n");
133
134void optprobe_template_func(void);
135STACK_FRAME_NON_STANDARD(optprobe_template_func);
136
 
 
137#define TMPL_MOVE_IDX \
138	((long)optprobe_template_val - (long)optprobe_template_entry)
139#define TMPL_CALL_IDX \
140	((long)optprobe_template_call - (long)optprobe_template_entry)
141#define TMPL_END_IDX \
142	((long)optprobe_template_end - (long)optprobe_template_entry)
143
144#define INT3_SIZE sizeof(kprobe_opcode_t)
145
146/* Optimized kprobe call back function: called from optinsn */
147static void
148optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
149{
150	/* This is possible if op is under delayed unoptimizing */
151	if (kprobe_disabled(&op->kp))
152		return;
153
154	preempt_disable();
155	if (kprobe_running()) {
156		kprobes_inc_nmissed_count(&op->kp);
157	} else {
158		struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
 
159		/* Save skipped registers */
160		regs->cs = __KERNEL_CS;
161#ifdef CONFIG_X86_32
162		regs->cs |= get_kernel_rpl();
163		regs->gs = 0;
164#endif
165		regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
166		regs->orig_ax = ~0UL;
167
168		__this_cpu_write(current_kprobe, &op->kp);
169		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
170		opt_pre_handler(&op->kp, regs);
171		__this_cpu_write(current_kprobe, NULL);
172	}
173	preempt_enable();
174}
175NOKPROBE_SYMBOL(optimized_callback);
176
177static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
178{
179	struct insn insn;
180	int len = 0, ret;
181
182	while (len < RELATIVEJUMP_SIZE) {
183		ret = __copy_instruction(dest + len, src + len, real + len, &insn);
184		if (!ret || !can_boost(&insn, src + len))
185			return -EINVAL;
186		len += ret;
187	}
188	/* Check whether the address range is reserved */
189	if (ftrace_text_reserved(src, src + len - 1) ||
190	    alternatives_text_reserved(src, src + len - 1) ||
191	    jump_label_text_reserved(src, src + len - 1))
 
192		return -EBUSY;
193
194	return len;
195}
196
197/* Check whether insn is indirect jump */
198static int __insn_is_indirect_jump(struct insn *insn)
199{
200	return ((insn->opcode.bytes[0] == 0xff &&
201		(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
202		insn->opcode.bytes[0] == 0xea);	/* Segment based jump */
203}
204
205/* Check whether insn jumps into specified address range */
206static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
207{
208	unsigned long target = 0;
209
210	switch (insn->opcode.bytes[0]) {
211	case 0xe0:	/* loopne */
212	case 0xe1:	/* loope */
213	case 0xe2:	/* loop */
214	case 0xe3:	/* jcxz */
215	case 0xe9:	/* near relative jump */
216	case 0xeb:	/* short relative jump */
217		break;
218	case 0x0f:
219		if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
220			break;
221		return 0;
222	default:
223		if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
224			break;
225		return 0;
226	}
227	target = (unsigned long)insn->next_byte + insn->immediate.value;
228
229	return (start <= target && target <= start + len);
230}
231
232static int insn_is_indirect_jump(struct insn *insn)
233{
234	int ret = __insn_is_indirect_jump(insn);
235
236#ifdef CONFIG_RETPOLINE
237	/*
238	 * Jump to x86_indirect_thunk_* is treated as an indirect jump.
239	 * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
240	 * older gcc may use indirect jump. So we add this check instead of
241	 * replace indirect-jump check.
242	 */
243	if (!ret)
244		ret = insn_jump_into_range(insn,
245				(unsigned long)__indirect_thunk_start,
246				(unsigned long)__indirect_thunk_end -
247				(unsigned long)__indirect_thunk_start);
248#endif
249	return ret;
250}
251
252/* Decode whole function to ensure any instructions don't jump into target */
253static int can_optimize(unsigned long paddr)
254{
255	unsigned long addr, size = 0, offset = 0;
256	struct insn insn;
257	kprobe_opcode_t buf[MAX_INSN_SIZE];
258
259	/* Lookup symbol including addr */
260	if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
261		return 0;
262
263	/*
264	 * Do not optimize in the entry code due to the unstable
265	 * stack handling and registers setup.
266	 */
267	if (((paddr >= (unsigned long)__entry_text_start) &&
268	     (paddr <  (unsigned long)__entry_text_end)) ||
269	    ((paddr >= (unsigned long)__irqentry_text_start) &&
270	     (paddr <  (unsigned long)__irqentry_text_end)))
271		return 0;
272
273	/* Check there is enough space for a relative jump. */
274	if (size - offset < RELATIVEJUMP_SIZE)
275		return 0;
276
277	/* Decode instructions */
278	addr = paddr - offset;
279	while (addr < paddr - offset + size) { /* Decode until function end */
280		unsigned long recovered_insn;
 
 
281		if (search_exception_tables(addr))
282			/*
283			 * Since some fixup code will jumps into this function,
284			 * we can't optimize kprobe in this function.
285			 */
286			return 0;
287		recovered_insn = recover_probed_instruction(buf, addr);
288		if (!recovered_insn)
289			return 0;
290		kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
291		insn_get_length(&insn);
292		/* Another subsystem puts a breakpoint */
293		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
 
 
 
 
 
 
 
294			return 0;
 
295		/* Recover address */
296		insn.kaddr = (void *)addr;
297		insn.next_byte = (void *)(addr + insn.length);
298		/* Check any instructions don't jump into target */
299		if (insn_is_indirect_jump(&insn) ||
300		    insn_jump_into_range(&insn, paddr + INT3_SIZE,
301					 RELATIVE_ADDR_SIZE))
302			return 0;
303		addr += insn.length;
304	}
305
306	return 1;
307}
308
309/* Check optimized_kprobe can actually be optimized. */
310int arch_check_optimized_kprobe(struct optimized_kprobe *op)
311{
312	int i;
313	struct kprobe *p;
314
315	for (i = 1; i < op->optinsn.size; i++) {
316		p = get_kprobe(op->kp.addr + i);
317		if (p && !kprobe_disabled(p))
318			return -EEXIST;
319	}
320
321	return 0;
322}
323
324/* Check the addr is within the optimized instructions. */
325int arch_within_optimized_kprobe(struct optimized_kprobe *op,
326				 unsigned long addr)
327{
328	return ((unsigned long)op->kp.addr <= addr &&
329		(unsigned long)op->kp.addr + op->optinsn.size > addr);
330}
331
332/* Free optimized instruction slot */
333static
334void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
335{
336	if (op->optinsn.insn) {
337		free_optinsn_slot(op->optinsn.insn, dirty);
 
 
 
 
 
 
 
338		op->optinsn.insn = NULL;
339		op->optinsn.size = 0;
340	}
341}
342
343void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
344{
345	__arch_remove_optimized_kprobe(op, 1);
346}
347
348/*
349 * Copy replacing target instructions
350 * Target instructions MUST be relocatable (checked inside)
351 * This is called when new aggr(opt)probe is allocated or reused.
352 */
353int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
354				  struct kprobe *__unused)
355{
356	u8 *buf = NULL, *slot;
357	int ret, len;
358	long rel;
359
360	if (!can_optimize((unsigned long)op->kp.addr))
361		return -EILSEQ;
362
363	buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
364	if (!buf)
365		return -ENOMEM;
366
367	op->optinsn.insn = slot = get_optinsn_slot();
368	if (!slot) {
369		ret = -ENOMEM;
370		goto out;
371	}
372
373	/*
374	 * Verify if the address gap is in 2GB range, because this uses
375	 * a relative jump.
376	 */
377	rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
378	if (abs(rel) > 0x7fffffff) {
379		ret = -ERANGE;
380		goto err;
381	}
382
383	/* Copy arch-dep-instance from template */
384	memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
385
386	/* Copy instructions into the out-of-line buffer */
387	ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
388					  slot + TMPL_END_IDX);
389	if (ret < 0)
390		goto err;
391	op->optinsn.size = ret;
392	len = TMPL_END_IDX + op->optinsn.size;
393
 
 
394	/* Set probe information */
395	synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
396
397	/* Set probe function call */
398	synthesize_relcall(buf + TMPL_CALL_IDX,
399			   slot + TMPL_CALL_IDX, optimized_callback);
400
401	/* Set returning jmp instruction at the tail of out-of-line buffer */
402	synthesize_reljump(buf + len, slot + len,
403			   (u8 *)op->kp.addr + op->optinsn.size);
404	len += RELATIVEJUMP_SIZE;
 
 
 
 
 
405
406	/* We have to use text_poke() for instruction buffer because it is RO */
 
407	text_poke(slot, buf, len);
 
408	ret = 0;
409out:
410	kfree(buf);
411	return ret;
412
413err:
414	__arch_remove_optimized_kprobe(op, 0);
415	goto out;
416}
417
418/*
419 * Replace breakpoints (int3) with relative jumps.
420 * Caller must call with locking kprobe_mutex and text_mutex.
 
 
 
 
421 */
422void arch_optimize_kprobes(struct list_head *oplist)
423{
424	struct optimized_kprobe *op, *tmp;
425	u8 insn_buff[RELATIVEJUMP_SIZE];
426
427	list_for_each_entry_safe(op, tmp, oplist, list) {
428		s32 rel = (s32)((long)op->optinsn.insn -
429			((long)op->kp.addr + RELATIVEJUMP_SIZE));
430
431		WARN_ON(kprobe_disabled(&op->kp));
432
433		/* Backup instructions which will be replaced by jump address */
434		memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
435		       RELATIVE_ADDR_SIZE);
436
437		insn_buff[0] = RELATIVEJUMP_OPCODE;
438		*(s32 *)(&insn_buff[1]) = rel;
439
440		text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
441			     op->optinsn.insn);
442
443		list_del_init(&op->list);
444	}
445}
446
447/* Replace a relative jump with a breakpoint (int3).  */
 
 
 
 
 
 
448void arch_unoptimize_kprobe(struct optimized_kprobe *op)
449{
450	u8 insn_buff[RELATIVEJUMP_SIZE];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
451
452	/* Set int3 to first byte for kprobes */
453	insn_buff[0] = BREAKPOINT_INSTRUCTION;
454	memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
455	text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
456		     op->optinsn.insn);
457}
458
459/*
460 * Recover original instructions and breakpoints from relative jumps.
461 * Caller must call with locking kprobe_mutex.
462 */
463extern void arch_unoptimize_kprobes(struct list_head *oplist,
464				    struct list_head *done_list)
465{
466	struct optimized_kprobe *op, *tmp;
467
468	list_for_each_entry_safe(op, tmp, oplist, list) {
469		arch_unoptimize_kprobe(op);
470		list_move(&op->list, done_list);
471	}
472}
473
474int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
475{
476	struct optimized_kprobe *op;
477
478	if (p->flags & KPROBE_FLAG_OPTIMIZED) {
479		/* This kprobe is really able to run optimized path. */
480		op = container_of(p, struct optimized_kprobe, kp);
481		/* Detour through copied instructions */
482		regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
483		if (!reenter)
484			reset_current_kprobe();
485		return 1;
486	}
487	return 0;
488}
489NOKPROBE_SYMBOL(setup_detour_execution);
v6.2
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 *  Kernel Probes Jump Optimization (Optprobes)
  4 *
  5 * Copyright (C) IBM Corporation, 2002, 2004
  6 * Copyright (C) Hitachi Ltd., 2012
  7 */
  8#include <linux/kprobes.h>
  9#include <linux/perf_event.h>
 10#include <linux/ptrace.h>
 11#include <linux/string.h>
 12#include <linux/slab.h>
 13#include <linux/hardirq.h>
 14#include <linux/preempt.h>
 15#include <linux/extable.h>
 16#include <linux/kdebug.h>
 17#include <linux/kallsyms.h>
 18#include <linux/kgdb.h>
 19#include <linux/ftrace.h>
 20#include <linux/objtool.h>
 21#include <linux/pgtable.h>
 22#include <linux/static_call.h>
 23
 24#include <asm/text-patching.h>
 25#include <asm/cacheflush.h>
 26#include <asm/desc.h>
 
 27#include <linux/uaccess.h>
 28#include <asm/alternative.h>
 29#include <asm/insn.h>
 30#include <asm/debugreg.h>
 31#include <asm/set_memory.h>
 32#include <asm/sections.h>
 33#include <asm/nospec-branch.h>
 34
 35#include "common.h"
 36
 37unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
 38{
 39	struct optimized_kprobe *op;
 40	struct kprobe *kp;
 41	long offs;
 42	int i;
 43
 44	for (i = 0; i < JMP32_INSN_SIZE; i++) {
 45		kp = get_kprobe((void *)addr - i);
 46		/* This function only handles jump-optimized kprobe */
 47		if (kp && kprobe_optimized(kp)) {
 48			op = container_of(kp, struct optimized_kprobe, kp);
 49			/* If op->list is not empty, op is under optimizing */
 50			if (list_empty(&op->list))
 51				goto found;
 52		}
 53	}
 54
 55	return addr;
 56found:
 57	/*
 58	 * If the kprobe can be optimized, original bytes which can be
 59	 * overwritten by jump destination address. In this case, original
 60	 * bytes must be recovered from op->optinsn.copied_insn buffer.
 61	 */
 62	if (copy_from_kernel_nofault(buf, (void *)addr,
 63		MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
 64		return 0UL;
 65
 66	if (addr == (unsigned long)kp->addr) {
 67		buf[0] = kp->opcode;
 68		memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE);
 69	} else {
 70		offs = addr - (unsigned long)kp->addr - 1;
 71		memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs);
 72	}
 73
 74	return (unsigned long)buf;
 75}
 76
 77static void synthesize_clac(kprobe_opcode_t *addr)
 78{
 79	/*
 80	 * Can't be static_cpu_has() due to how objtool treats this feature bit.
 81	 * This isn't a fast path anyway.
 82	 */
 83	if (!boot_cpu_has(X86_FEATURE_SMAP))
 84		return;
 85
 86	/* Replace the NOP3 with CLAC */
 87	addr[0] = 0x0f;
 88	addr[1] = 0x01;
 89	addr[2] = 0xca;
 90}
 91
 92/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
 93static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
 94{
 95#ifdef CONFIG_X86_64
 96	*addr++ = 0x48;
 97	*addr++ = 0xbf;
 98#else
 99	*addr++ = 0xb8;
100#endif
101	*(unsigned long *)addr = val;
102}
103
104asm (
105			".pushsection .rodata\n"
106			"optprobe_template_func:\n"
107			".global optprobe_template_entry\n"
108			"optprobe_template_entry:\n"
109#ifdef CONFIG_X86_64
110			"       pushq $" __stringify(__KERNEL_DS) "\n"
111			/* Save the 'sp - 8', this will be fixed later. */
112			"	pushq %rsp\n"
113			"	pushfq\n"
114			".global optprobe_template_clac\n"
115			"optprobe_template_clac:\n"
116			ASM_NOP3
117			SAVE_REGS_STRING
118			"	movq %rsp, %rsi\n"
119			".global optprobe_template_val\n"
120			"optprobe_template_val:\n"
121			ASM_NOP5
122			ASM_NOP5
123			".global optprobe_template_call\n"
124			"optprobe_template_call:\n"
125			ASM_NOP5
126			/* Copy 'regs->flags' into 'regs->ss'. */
127			"	movq 18*8(%rsp), %rdx\n"
128			"	movq %rdx, 20*8(%rsp)\n"
129			RESTORE_REGS_STRING
130			/* Skip 'regs->flags' and 'regs->sp'. */
131			"	addq $16, %rsp\n"
132			/* And pop flags register from 'regs->ss'. */
133			"	popfq\n"
134#else /* CONFIG_X86_32 */
135			"	pushl %ss\n"
136			/* Save the 'sp - 4', this will be fixed later. */
137			"	pushl %esp\n"
138			"	pushfl\n"
139			".global optprobe_template_clac\n"
140			"optprobe_template_clac:\n"
141			ASM_NOP3
142			SAVE_REGS_STRING
143			"	movl %esp, %edx\n"
144			".global optprobe_template_val\n"
145			"optprobe_template_val:\n"
146			ASM_NOP5
147			".global optprobe_template_call\n"
148			"optprobe_template_call:\n"
149			ASM_NOP5
150			/* Copy 'regs->flags' into 'regs->ss'. */
151			"	movl 14*4(%esp), %edx\n"
152			"	movl %edx, 16*4(%esp)\n"
153			RESTORE_REGS_STRING
154			/* Skip 'regs->flags' and 'regs->sp'. */
155			"	addl $8, %esp\n"
156			/* And pop flags register from 'regs->ss'. */
157			"	popfl\n"
158#endif
159			".global optprobe_template_end\n"
160			"optprobe_template_end:\n"
161			".popsection\n");
162
163void optprobe_template_func(void);
164STACK_FRAME_NON_STANDARD(optprobe_template_func);
165
166#define TMPL_CLAC_IDX \
167	((long)optprobe_template_clac - (long)optprobe_template_entry)
168#define TMPL_MOVE_IDX \
169	((long)optprobe_template_val - (long)optprobe_template_entry)
170#define TMPL_CALL_IDX \
171	((long)optprobe_template_call - (long)optprobe_template_entry)
172#define TMPL_END_IDX \
173	((long)optprobe_template_end - (long)optprobe_template_entry)
174
 
 
175/* Optimized kprobe call back function: called from optinsn */
176static void
177optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
178{
179	/* This is possible if op is under delayed unoptimizing */
180	if (kprobe_disabled(&op->kp))
181		return;
182
183	preempt_disable();
184	if (kprobe_running()) {
185		kprobes_inc_nmissed_count(&op->kp);
186	} else {
187		struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
188		/* Adjust stack pointer */
189		regs->sp += sizeof(long);
190		/* Save skipped registers */
191		regs->cs = __KERNEL_CS;
192#ifdef CONFIG_X86_32
 
193		regs->gs = 0;
194#endif
195		regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE;
196		regs->orig_ax = ~0UL;
197
198		__this_cpu_write(current_kprobe, &op->kp);
199		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
200		opt_pre_handler(&op->kp, regs);
201		__this_cpu_write(current_kprobe, NULL);
202	}
203	preempt_enable();
204}
205NOKPROBE_SYMBOL(optimized_callback);
206
207static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
208{
209	struct insn insn;
210	int len = 0, ret;
211
212	while (len < JMP32_INSN_SIZE) {
213		ret = __copy_instruction(dest + len, src + len, real + len, &insn);
214		if (!ret || !can_boost(&insn, src + len))
215			return -EINVAL;
216		len += ret;
217	}
218	/* Check whether the address range is reserved */
219	if (ftrace_text_reserved(src, src + len - 1) ||
220	    alternatives_text_reserved(src, src + len - 1) ||
221	    jump_label_text_reserved(src, src + len - 1) ||
222	    static_call_text_reserved(src, src + len - 1))
223		return -EBUSY;
224
225	return len;
226}
227
228/* Check whether insn is indirect jump */
229static int __insn_is_indirect_jump(struct insn *insn)
230{
231	return ((insn->opcode.bytes[0] == 0xff &&
232		(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
233		insn->opcode.bytes[0] == 0xea);	/* Segment based jump */
234}
235
236/* Check whether insn jumps into specified address range */
237static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
238{
239	unsigned long target = 0;
240
241	switch (insn->opcode.bytes[0]) {
242	case 0xe0:	/* loopne */
243	case 0xe1:	/* loope */
244	case 0xe2:	/* loop */
245	case 0xe3:	/* jcxz */
246	case 0xe9:	/* near relative jump */
247	case 0xeb:	/* short relative jump */
248		break;
249	case 0x0f:
250		if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
251			break;
252		return 0;
253	default:
254		if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
255			break;
256		return 0;
257	}
258	target = (unsigned long)insn->next_byte + insn->immediate.value;
259
260	return (start <= target && target <= start + len);
261}
262
263static int insn_is_indirect_jump(struct insn *insn)
264{
265	int ret = __insn_is_indirect_jump(insn);
266
267#ifdef CONFIG_RETPOLINE
268	/*
269	 * Jump to x86_indirect_thunk_* is treated as an indirect jump.
270	 * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
271	 * older gcc may use indirect jump. So we add this check instead of
272	 * replace indirect-jump check.
273	 */
274	if (!ret)
275		ret = insn_jump_into_range(insn,
276				(unsigned long)__indirect_thunk_start,
277				(unsigned long)__indirect_thunk_end -
278				(unsigned long)__indirect_thunk_start);
279#endif
280	return ret;
281}
282
283/* Decode whole function to ensure any instructions don't jump into target */
284static int can_optimize(unsigned long paddr)
285{
286	unsigned long addr, size = 0, offset = 0;
287	struct insn insn;
288	kprobe_opcode_t buf[MAX_INSN_SIZE];
289
290	/* Lookup symbol including addr */
291	if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
292		return 0;
293
294	/*
295	 * Do not optimize in the entry code due to the unstable
296	 * stack handling and registers setup.
297	 */
298	if (((paddr >= (unsigned long)__entry_text_start) &&
299	     (paddr <  (unsigned long)__entry_text_end)))
 
 
300		return 0;
301
302	/* Check there is enough space for a relative jump. */
303	if (size - offset < JMP32_INSN_SIZE)
304		return 0;
305
306	/* Decode instructions */
307	addr = paddr - offset;
308	while (addr < paddr - offset + size) { /* Decode until function end */
309		unsigned long recovered_insn;
310		int ret;
311
312		if (search_exception_tables(addr))
313			/*
314			 * Since some fixup code will jumps into this function,
315			 * we can't optimize kprobe in this function.
316			 */
317			return 0;
318		recovered_insn = recover_probed_instruction(buf, addr);
319		if (!recovered_insn)
320			return 0;
321
322		ret = insn_decode_kernel(&insn, (void *)recovered_insn);
323		if (ret < 0)
324			return 0;
325#ifdef CONFIG_KGDB
326		/*
327		 * If there is a dynamically installed kgdb sw breakpoint,
328		 * this function should not be probed.
329		 */
330		if (insn.opcode.bytes[0] == INT3_INSN_OPCODE &&
331		    kgdb_has_hit_break(addr))
332			return 0;
333#endif
334		/* Recover address */
335		insn.kaddr = (void *)addr;
336		insn.next_byte = (void *)(addr + insn.length);
337		/* Check any instructions don't jump into target */
338		if (insn_is_indirect_jump(&insn) ||
339		    insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
340					 DISP32_SIZE))
341			return 0;
342		addr += insn.length;
343	}
344
345	return 1;
346}
347
348/* Check optimized_kprobe can actually be optimized. */
349int arch_check_optimized_kprobe(struct optimized_kprobe *op)
350{
351	int i;
352	struct kprobe *p;
353
354	for (i = 1; i < op->optinsn.size; i++) {
355		p = get_kprobe(op->kp.addr + i);
356		if (p && !kprobe_disabled(p))
357			return -EEXIST;
358	}
359
360	return 0;
361}
362
363/* Check the addr is within the optimized instructions. */
364int arch_within_optimized_kprobe(struct optimized_kprobe *op,
365				 kprobe_opcode_t *addr)
366{
367	return (op->kp.addr <= addr &&
368		op->kp.addr + op->optinsn.size > addr);
369}
370
371/* Free optimized instruction slot */
372static
373void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
374{
375	u8 *slot = op->optinsn.insn;
376	if (slot) {
377		int len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE;
378
379		/* Record the perf event before freeing the slot */
380		if (dirty)
381			perf_event_text_poke(slot, slot, len, NULL, 0);
382
383		free_optinsn_slot(slot, dirty);
384		op->optinsn.insn = NULL;
385		op->optinsn.size = 0;
386	}
387}
388
389void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
390{
391	__arch_remove_optimized_kprobe(op, 1);
392}
393
394/*
395 * Copy replacing target instructions
396 * Target instructions MUST be relocatable (checked inside)
397 * This is called when new aggr(opt)probe is allocated or reused.
398 */
399int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
400				  struct kprobe *__unused)
401{
402	u8 *buf = NULL, *slot;
403	int ret, len;
404	long rel;
405
406	if (!can_optimize((unsigned long)op->kp.addr))
407		return -EILSEQ;
408
409	buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
410	if (!buf)
411		return -ENOMEM;
412
413	op->optinsn.insn = slot = get_optinsn_slot();
414	if (!slot) {
415		ret = -ENOMEM;
416		goto out;
417	}
418
419	/*
420	 * Verify if the address gap is in 2GB range, because this uses
421	 * a relative jump.
422	 */
423	rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE;
424	if (abs(rel) > 0x7fffffff) {
425		ret = -ERANGE;
426		goto err;
427	}
428
429	/* Copy arch-dep-instance from template */
430	memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
431
432	/* Copy instructions into the out-of-line buffer */
433	ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
434					  slot + TMPL_END_IDX);
435	if (ret < 0)
436		goto err;
437	op->optinsn.size = ret;
438	len = TMPL_END_IDX + op->optinsn.size;
439
440	synthesize_clac(buf + TMPL_CLAC_IDX);
441
442	/* Set probe information */
443	synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
444
445	/* Set probe function call */
446	synthesize_relcall(buf + TMPL_CALL_IDX,
447			   slot + TMPL_CALL_IDX, optimized_callback);
448
449	/* Set returning jmp instruction at the tail of out-of-line buffer */
450	synthesize_reljump(buf + len, slot + len,
451			   (u8 *)op->kp.addr + op->optinsn.size);
452	len += JMP32_INSN_SIZE;
453
454	/*
455	 * Note	len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE is also
456	 * used in __arch_remove_optimized_kprobe().
457	 */
458
459	/* We have to use text_poke() for instruction buffer because it is RO */
460	perf_event_text_poke(slot, NULL, 0, buf, len);
461	text_poke(slot, buf, len);
462
463	ret = 0;
464out:
465	kfree(buf);
466	return ret;
467
468err:
469	__arch_remove_optimized_kprobe(op, 0);
470	goto out;
471}
472
473/*
474 * Replace breakpoints (INT3) with relative jumps (JMP.d32).
475 * Caller must call with locking kprobe_mutex and text_mutex.
476 *
477 * The caller will have installed a regular kprobe and after that issued
478 * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in
479 * the 4 bytes after the INT3 are unused and can now be overwritten.
480 */
481void arch_optimize_kprobes(struct list_head *oplist)
482{
483	struct optimized_kprobe *op, *tmp;
484	u8 insn_buff[JMP32_INSN_SIZE];
485
486	list_for_each_entry_safe(op, tmp, oplist, list) {
487		s32 rel = (s32)((long)op->optinsn.insn -
488			((long)op->kp.addr + JMP32_INSN_SIZE));
489
490		WARN_ON(kprobe_disabled(&op->kp));
491
492		/* Backup instructions which will be replaced by jump address */
493		memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE,
494		       DISP32_SIZE);
495
496		insn_buff[0] = JMP32_INSN_OPCODE;
497		*(s32 *)(&insn_buff[1]) = rel;
498
499		text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
 
500
501		list_del_init(&op->list);
502	}
503}
504
505/*
506 * Replace a relative jump (JMP.d32) with a breakpoint (INT3).
507 *
508 * After that, we can restore the 4 bytes after the INT3 to undo what
509 * arch_optimize_kprobes() scribbled. This is safe since those bytes will be
510 * unused once the INT3 lands.
511 */
512void arch_unoptimize_kprobe(struct optimized_kprobe *op)
513{
514	u8 new[JMP32_INSN_SIZE] = { INT3_INSN_OPCODE, };
515	u8 old[JMP32_INSN_SIZE];
516	u8 *addr = op->kp.addr;
517
518	memcpy(old, op->kp.addr, JMP32_INSN_SIZE);
519	memcpy(new + INT3_INSN_SIZE,
520	       op->optinsn.copied_insn,
521	       JMP32_INSN_SIZE - INT3_INSN_SIZE);
522
523	text_poke(addr, new, INT3_INSN_SIZE);
524	text_poke_sync();
525	text_poke(addr + INT3_INSN_SIZE,
526		  new + INT3_INSN_SIZE,
527		  JMP32_INSN_SIZE - INT3_INSN_SIZE);
528	text_poke_sync();
529
530	perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
 
 
 
 
531}
532
533/*
534 * Recover original instructions and breakpoints from relative jumps.
535 * Caller must call with locking kprobe_mutex.
536 */
537extern void arch_unoptimize_kprobes(struct list_head *oplist,
538				    struct list_head *done_list)
539{
540	struct optimized_kprobe *op, *tmp;
541
542	list_for_each_entry_safe(op, tmp, oplist, list) {
543		arch_unoptimize_kprobe(op);
544		list_move(&op->list, done_list);
545	}
546}
547
548int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
549{
550	struct optimized_kprobe *op;
551
552	if (p->flags & KPROBE_FLAG_OPTIMIZED) {
553		/* This kprobe is really able to run optimized path. */
554		op = container_of(p, struct optimized_kprobe, kp);
555		/* Detour through copied instructions */
556		regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
557		if (!reenter)
558			reset_current_kprobe();
559		return 1;
560	}
561	return 0;
562}
563NOKPROBE_SYMBOL(setup_detour_execution);