Linux Audio

Check our new training course

Loading...
v5.4
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 *  Kernel Probes Jump Optimization (Optprobes)
  4 *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  5 * Copyright (C) IBM Corporation, 2002, 2004
  6 * Copyright (C) Hitachi Ltd., 2012
  7 */
  8#include <linux/kprobes.h>
  9#include <linux/ptrace.h>
 10#include <linux/string.h>
 11#include <linux/slab.h>
 12#include <linux/hardirq.h>
 13#include <linux/preempt.h>
 14#include <linux/extable.h>
 15#include <linux/kdebug.h>
 16#include <linux/kallsyms.h>
 17#include <linux/ftrace.h>
 18#include <linux/frame.h>
 19
 20#include <asm/text-patching.h>
 21#include <asm/cacheflush.h>
 22#include <asm/desc.h>
 23#include <asm/pgtable.h>
 24#include <linux/uaccess.h>
 25#include <asm/alternative.h>
 26#include <asm/insn.h>
 27#include <asm/debugreg.h>
 28#include <asm/set_memory.h>
 29#include <asm/sections.h>
 30#include <asm/nospec-branch.h>
 31
 32#include "common.h"
 33
 34unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
 35{
 36	struct optimized_kprobe *op;
 37	struct kprobe *kp;
 38	long offs;
 39	int i;
 40
 41	for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
 42		kp = get_kprobe((void *)addr - i);
 43		/* This function only handles jump-optimized kprobe */
 44		if (kp && kprobe_optimized(kp)) {
 45			op = container_of(kp, struct optimized_kprobe, kp);
 46			/* If op->list is not empty, op is under optimizing */
 47			if (list_empty(&op->list))
 48				goto found;
 49		}
 50	}
 51
 52	return addr;
 53found:
 54	/*
 55	 * If the kprobe can be optimized, original bytes which can be
 56	 * overwritten by jump destination address. In this case, original
 57	 * bytes must be recovered from op->optinsn.copied_insn buffer.
 58	 */
 59	if (probe_kernel_read(buf, (void *)addr,
 60		MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
 61		return 0UL;
 62
 63	if (addr == (unsigned long)kp->addr) {
 64		buf[0] = kp->opcode;
 65		memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
 66	} else {
 67		offs = addr - (unsigned long)kp->addr - 1;
 68		memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
 69	}
 70
 71	return (unsigned long)buf;
 72}
 73
 74/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
 75static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
 76{
 77#ifdef CONFIG_X86_64
 78	*addr++ = 0x48;
 79	*addr++ = 0xbf;
 80#else
 81	*addr++ = 0xb8;
 82#endif
 83	*(unsigned long *)addr = val;
 84}
 85
 86asm (
 87			".pushsection .rodata\n"
 88			"optprobe_template_func:\n"
 89			".global optprobe_template_entry\n"
 90			"optprobe_template_entry:\n"
 91#ifdef CONFIG_X86_64
 92			/* We don't bother saving the ss register */
 93			"	pushq %rsp\n"
 94			"	pushfq\n"
 95			SAVE_REGS_STRING
 96			"	movq %rsp, %rsi\n"
 97			".global optprobe_template_val\n"
 98			"optprobe_template_val:\n"
 99			ASM_NOP5
100			ASM_NOP5
101			".global optprobe_template_call\n"
102			"optprobe_template_call:\n"
103			ASM_NOP5
104			/* Move flags to rsp */
105			"	movq 18*8(%rsp), %rdx\n"
106			"	movq %rdx, 19*8(%rsp)\n"
107			RESTORE_REGS_STRING
108			/* Skip flags entry */
109			"	addq $8, %rsp\n"
110			"	popfq\n"
111#else /* CONFIG_X86_32 */
112			"	pushl %esp\n"
113			"	pushfl\n"
114			SAVE_REGS_STRING
115			"	movl %esp, %edx\n"
116			".global optprobe_template_val\n"
117			"optprobe_template_val:\n"
118			ASM_NOP5
119			".global optprobe_template_call\n"
120			"optprobe_template_call:\n"
121			ASM_NOP5
122			/* Move flags into esp */
123			"	movl 14*4(%esp), %edx\n"
124			"	movl %edx, 15*4(%esp)\n"
125			RESTORE_REGS_STRING
126			/* Skip flags entry */
127			"	addl $4, %esp\n"
128			"	popfl\n"
129#endif
130			".global optprobe_template_end\n"
131			"optprobe_template_end:\n"
132			".popsection\n");
 
133
134void optprobe_template_func(void);
135STACK_FRAME_NON_STANDARD(optprobe_template_func);
136
137#define TMPL_MOVE_IDX \
138	((long)optprobe_template_val - (long)optprobe_template_entry)
139#define TMPL_CALL_IDX \
140	((long)optprobe_template_call - (long)optprobe_template_entry)
141#define TMPL_END_IDX \
142	((long)optprobe_template_end - (long)optprobe_template_entry)
143
144#define INT3_SIZE sizeof(kprobe_opcode_t)
145
146/* Optimized kprobe call back function: called from optinsn */
147static void
148optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
149{
150	/* This is possible if op is under delayed unoptimizing */
151	if (kprobe_disabled(&op->kp))
152		return;
153
154	preempt_disable();
155	if (kprobe_running()) {
156		kprobes_inc_nmissed_count(&op->kp);
157	} else {
158		struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
159		/* Save skipped registers */
 
160		regs->cs = __KERNEL_CS;
161#ifdef CONFIG_X86_32
162		regs->cs |= get_kernel_rpl();
163		regs->gs = 0;
164#endif
165		regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
166		regs->orig_ax = ~0UL;
167
168		__this_cpu_write(current_kprobe, &op->kp);
169		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
170		opt_pre_handler(&op->kp, regs);
171		__this_cpu_write(current_kprobe, NULL);
172	}
173	preempt_enable();
174}
175NOKPROBE_SYMBOL(optimized_callback);
176
177static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
178{
179	struct insn insn;
180	int len = 0, ret;
181
182	while (len < RELATIVEJUMP_SIZE) {
183		ret = __copy_instruction(dest + len, src + len, real + len, &insn);
184		if (!ret || !can_boost(&insn, src + len))
185			return -EINVAL;
186		len += ret;
187	}
188	/* Check whether the address range is reserved */
189	if (ftrace_text_reserved(src, src + len - 1) ||
190	    alternatives_text_reserved(src, src + len - 1) ||
191	    jump_label_text_reserved(src, src + len - 1))
192		return -EBUSY;
193
194	return len;
195}
196
197/* Check whether insn is indirect jump */
198static int __insn_is_indirect_jump(struct insn *insn)
199{
200	return ((insn->opcode.bytes[0] == 0xff &&
201		(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
202		insn->opcode.bytes[0] == 0xea);	/* Segment based jump */
203}
204
205/* Check whether insn jumps into specified address range */
206static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
207{
208	unsigned long target = 0;
209
210	switch (insn->opcode.bytes[0]) {
211	case 0xe0:	/* loopne */
212	case 0xe1:	/* loope */
213	case 0xe2:	/* loop */
214	case 0xe3:	/* jcxz */
215	case 0xe9:	/* near relative jump */
216	case 0xeb:	/* short relative jump */
217		break;
218	case 0x0f:
219		if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
220			break;
221		return 0;
222	default:
223		if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
224			break;
225		return 0;
226	}
227	target = (unsigned long)insn->next_byte + insn->immediate.value;
228
229	return (start <= target && target <= start + len);
230}
231
232static int insn_is_indirect_jump(struct insn *insn)
233{
234	int ret = __insn_is_indirect_jump(insn);
235
236#ifdef CONFIG_RETPOLINE
237	/*
238	 * Jump to x86_indirect_thunk_* is treated as an indirect jump.
239	 * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
240	 * older gcc may use indirect jump. So we add this check instead of
241	 * replace indirect-jump check.
242	 */
243	if (!ret)
244		ret = insn_jump_into_range(insn,
245				(unsigned long)__indirect_thunk_start,
246				(unsigned long)__indirect_thunk_end -
247				(unsigned long)__indirect_thunk_start);
248#endif
249	return ret;
250}
251
252/* Decode whole function to ensure any instructions don't jump into target */
253static int can_optimize(unsigned long paddr)
254{
255	unsigned long addr, size = 0, offset = 0;
256	struct insn insn;
257	kprobe_opcode_t buf[MAX_INSN_SIZE];
258
259	/* Lookup symbol including addr */
260	if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
261		return 0;
262
263	/*
264	 * Do not optimize in the entry code due to the unstable
265	 * stack handling and registers setup.
266	 */
267	if (((paddr >= (unsigned long)__entry_text_start) &&
268	     (paddr <  (unsigned long)__entry_text_end)) ||
269	    ((paddr >= (unsigned long)__irqentry_text_start) &&
270	     (paddr <  (unsigned long)__irqentry_text_end)))
271		return 0;
272
273	/* Check there is enough space for a relative jump. */
274	if (size - offset < RELATIVEJUMP_SIZE)
275		return 0;
276
277	/* Decode instructions */
278	addr = paddr - offset;
279	while (addr < paddr - offset + size) { /* Decode until function end */
280		unsigned long recovered_insn;
281		if (search_exception_tables(addr))
282			/*
283			 * Since some fixup code will jumps into this function,
284			 * we can't optimize kprobe in this function.
285			 */
286			return 0;
287		recovered_insn = recover_probed_instruction(buf, addr);
288		if (!recovered_insn)
289			return 0;
290		kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
291		insn_get_length(&insn);
292		/* Another subsystem puts a breakpoint */
293		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
294			return 0;
295		/* Recover address */
296		insn.kaddr = (void *)addr;
297		insn.next_byte = (void *)(addr + insn.length);
298		/* Check any instructions don't jump into target */
299		if (insn_is_indirect_jump(&insn) ||
300		    insn_jump_into_range(&insn, paddr + INT3_SIZE,
301					 RELATIVE_ADDR_SIZE))
302			return 0;
303		addr += insn.length;
304	}
305
306	return 1;
307}
308
309/* Check optimized_kprobe can actually be optimized. */
310int arch_check_optimized_kprobe(struct optimized_kprobe *op)
311{
312	int i;
313	struct kprobe *p;
314
315	for (i = 1; i < op->optinsn.size; i++) {
316		p = get_kprobe(op->kp.addr + i);
317		if (p && !kprobe_disabled(p))
318			return -EEXIST;
319	}
320
321	return 0;
322}
323
324/* Check the addr is within the optimized instructions. */
325int arch_within_optimized_kprobe(struct optimized_kprobe *op,
326				 unsigned long addr)
327{
328	return ((unsigned long)op->kp.addr <= addr &&
329		(unsigned long)op->kp.addr + op->optinsn.size > addr);
330}
331
332/* Free optimized instruction slot */
333static
334void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
335{
336	if (op->optinsn.insn) {
337		free_optinsn_slot(op->optinsn.insn, dirty);
338		op->optinsn.insn = NULL;
339		op->optinsn.size = 0;
340	}
341}
342
343void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
344{
345	__arch_remove_optimized_kprobe(op, 1);
346}
347
348/*
349 * Copy replacing target instructions
350 * Target instructions MUST be relocatable (checked inside)
351 * This is called when new aggr(opt)probe is allocated or reused.
352 */
353int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
354				  struct kprobe *__unused)
355{
356	u8 *buf = NULL, *slot;
357	int ret, len;
358	long rel;
359
360	if (!can_optimize((unsigned long)op->kp.addr))
361		return -EILSEQ;
362
363	buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
364	if (!buf)
365		return -ENOMEM;
366
367	op->optinsn.insn = slot = get_optinsn_slot();
368	if (!slot) {
369		ret = -ENOMEM;
370		goto out;
371	}
372
373	/*
374	 * Verify if the address gap is in 2GB range, because this uses
375	 * a relative jump.
376	 */
377	rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
378	if (abs(rel) > 0x7fffffff) {
379		ret = -ERANGE;
380		goto err;
381	}
382
383	/* Copy arch-dep-instance from template */
384	memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
385
386	/* Copy instructions into the out-of-line buffer */
387	ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
388					  slot + TMPL_END_IDX);
389	if (ret < 0)
390		goto err;
391	op->optinsn.size = ret;
392	len = TMPL_END_IDX + op->optinsn.size;
393
394	/* Set probe information */
395	synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
396
397	/* Set probe function call */
398	synthesize_relcall(buf + TMPL_CALL_IDX,
399			   slot + TMPL_CALL_IDX, optimized_callback);
400
401	/* Set returning jmp instruction at the tail of out-of-line buffer */
402	synthesize_reljump(buf + len, slot + len,
403			   (u8 *)op->kp.addr + op->optinsn.size);
404	len += RELATIVEJUMP_SIZE;
405
406	/* We have to use text_poke() for instruction buffer because it is RO */
407	text_poke(slot, buf, len);
408	ret = 0;
409out:
410	kfree(buf);
411	return ret;
412
413err:
414	__arch_remove_optimized_kprobe(op, 0);
415	goto out;
416}
417
418/*
419 * Replace breakpoints (int3) with relative jumps.
420 * Caller must call with locking kprobe_mutex and text_mutex.
421 */
422void arch_optimize_kprobes(struct list_head *oplist)
423{
424	struct optimized_kprobe *op, *tmp;
425	u8 insn_buff[RELATIVEJUMP_SIZE];
426
427	list_for_each_entry_safe(op, tmp, oplist, list) {
428		s32 rel = (s32)((long)op->optinsn.insn -
429			((long)op->kp.addr + RELATIVEJUMP_SIZE));
430
431		WARN_ON(kprobe_disabled(&op->kp));
432
433		/* Backup instructions which will be replaced by jump address */
434		memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
435		       RELATIVE_ADDR_SIZE);
436
437		insn_buff[0] = RELATIVEJUMP_OPCODE;
438		*(s32 *)(&insn_buff[1]) = rel;
439
440		text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
441			     op->optinsn.insn);
442
443		list_del_init(&op->list);
444	}
445}
446
447/* Replace a relative jump with a breakpoint (int3).  */
448void arch_unoptimize_kprobe(struct optimized_kprobe *op)
449{
450	u8 insn_buff[RELATIVEJUMP_SIZE];
451
452	/* Set int3 to first byte for kprobes */
453	insn_buff[0] = BREAKPOINT_INSTRUCTION;
454	memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
455	text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
456		     op->optinsn.insn);
457}
458
459/*
460 * Recover original instructions and breakpoints from relative jumps.
461 * Caller must call with locking kprobe_mutex.
462 */
463extern void arch_unoptimize_kprobes(struct list_head *oplist,
464				    struct list_head *done_list)
465{
466	struct optimized_kprobe *op, *tmp;
467
468	list_for_each_entry_safe(op, tmp, oplist, list) {
469		arch_unoptimize_kprobe(op);
470		list_move(&op->list, done_list);
471	}
472}
473
474int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
475{
476	struct optimized_kprobe *op;
477
478	if (p->flags & KPROBE_FLAG_OPTIMIZED) {
479		/* This kprobe is really able to run optimized path. */
480		op = container_of(p, struct optimized_kprobe, kp);
481		/* Detour through copied instructions */
482		regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
483		if (!reenter)
484			reset_current_kprobe();
 
485		return 1;
486	}
487	return 0;
488}
489NOKPROBE_SYMBOL(setup_detour_execution);
v4.17
 
  1/*
  2 *  Kernel Probes Jump Optimization (Optprobes)
  3 *
  4 * This program is free software; you can redistribute it and/or modify
  5 * it under the terms of the GNU General Public License as published by
  6 * the Free Software Foundation; either version 2 of the License, or
  7 * (at your option) any later version.
  8 *
  9 * This program is distributed in the hope that it will be useful,
 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 * GNU General Public License for more details.
 13 *
 14 * You should have received a copy of the GNU General Public License
 15 * along with this program; if not, write to the Free Software
 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 *
 18 * Copyright (C) IBM Corporation, 2002, 2004
 19 * Copyright (C) Hitachi Ltd., 2012
 20 */
 21#include <linux/kprobes.h>
 22#include <linux/ptrace.h>
 23#include <linux/string.h>
 24#include <linux/slab.h>
 25#include <linux/hardirq.h>
 26#include <linux/preempt.h>
 27#include <linux/extable.h>
 28#include <linux/kdebug.h>
 29#include <linux/kallsyms.h>
 30#include <linux/ftrace.h>
 31#include <linux/frame.h>
 32
 33#include <asm/text-patching.h>
 34#include <asm/cacheflush.h>
 35#include <asm/desc.h>
 36#include <asm/pgtable.h>
 37#include <linux/uaccess.h>
 38#include <asm/alternative.h>
 39#include <asm/insn.h>
 40#include <asm/debugreg.h>
 41#include <asm/set_memory.h>
 42#include <asm/sections.h>
 43#include <asm/nospec-branch.h>
 44
 45#include "common.h"
 46
 47unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
 48{
 49	struct optimized_kprobe *op;
 50	struct kprobe *kp;
 51	long offs;
 52	int i;
 53
 54	for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
 55		kp = get_kprobe((void *)addr - i);
 56		/* This function only handles jump-optimized kprobe */
 57		if (kp && kprobe_optimized(kp)) {
 58			op = container_of(kp, struct optimized_kprobe, kp);
 59			/* If op->list is not empty, op is under optimizing */
 60			if (list_empty(&op->list))
 61				goto found;
 62		}
 63	}
 64
 65	return addr;
 66found:
 67	/*
 68	 * If the kprobe can be optimized, original bytes which can be
 69	 * overwritten by jump destination address. In this case, original
 70	 * bytes must be recovered from op->optinsn.copied_insn buffer.
 71	 */
 72	if (probe_kernel_read(buf, (void *)addr,
 73		MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
 74		return 0UL;
 75
 76	if (addr == (unsigned long)kp->addr) {
 77		buf[0] = kp->opcode;
 78		memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
 79	} else {
 80		offs = addr - (unsigned long)kp->addr - 1;
 81		memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
 82	}
 83
 84	return (unsigned long)buf;
 85}
 86
 87/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
 88static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
 89{
 90#ifdef CONFIG_X86_64
 91	*addr++ = 0x48;
 92	*addr++ = 0xbf;
 93#else
 94	*addr++ = 0xb8;
 95#endif
 96	*(unsigned long *)addr = val;
 97}
 98
 99asm (
 
100			"optprobe_template_func:\n"
101			".global optprobe_template_entry\n"
102			"optprobe_template_entry:\n"
103#ifdef CONFIG_X86_64
104			/* We don't bother saving the ss register */
105			"	pushq %rsp\n"
106			"	pushfq\n"
107			SAVE_REGS_STRING
108			"	movq %rsp, %rsi\n"
109			".global optprobe_template_val\n"
110			"optprobe_template_val:\n"
111			ASM_NOP5
112			ASM_NOP5
113			".global optprobe_template_call\n"
114			"optprobe_template_call:\n"
115			ASM_NOP5
116			/* Move flags to rsp */
117			"	movq 144(%rsp), %rdx\n"
118			"	movq %rdx, 152(%rsp)\n"
119			RESTORE_REGS_STRING
120			/* Skip flags entry */
121			"	addq $8, %rsp\n"
122			"	popfq\n"
123#else /* CONFIG_X86_32 */
124			"	pushf\n"
 
125			SAVE_REGS_STRING
126			"	movl %esp, %edx\n"
127			".global optprobe_template_val\n"
128			"optprobe_template_val:\n"
129			ASM_NOP5
130			".global optprobe_template_call\n"
131			"optprobe_template_call:\n"
132			ASM_NOP5
 
 
 
133			RESTORE_REGS_STRING
134			"	addl $4, %esp\n"	/* skip cs */
135			"	popf\n"
 
136#endif
137			".global optprobe_template_end\n"
138			"optprobe_template_end:\n"
139			".type optprobe_template_func, @function\n"
140			".size optprobe_template_func, .-optprobe_template_func\n");
141
142void optprobe_template_func(void);
143STACK_FRAME_NON_STANDARD(optprobe_template_func);
144
145#define TMPL_MOVE_IDX \
146	((long)optprobe_template_val - (long)optprobe_template_entry)
147#define TMPL_CALL_IDX \
148	((long)optprobe_template_call - (long)optprobe_template_entry)
149#define TMPL_END_IDX \
150	((long)optprobe_template_end - (long)optprobe_template_entry)
151
152#define INT3_SIZE sizeof(kprobe_opcode_t)
153
154/* Optimized kprobe call back function: called from optinsn */
155static void
156optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
157{
158	/* This is possible if op is under delayed unoptimizing */
159	if (kprobe_disabled(&op->kp))
160		return;
161
162	preempt_disable();
163	if (kprobe_running()) {
164		kprobes_inc_nmissed_count(&op->kp);
165	} else {
166		struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
167		/* Save skipped registers */
168#ifdef CONFIG_X86_64
169		regs->cs = __KERNEL_CS;
170#else
171		regs->cs = __KERNEL_CS | get_kernel_rpl();
172		regs->gs = 0;
173#endif
174		regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
175		regs->orig_ax = ~0UL;
176
177		__this_cpu_write(current_kprobe, &op->kp);
178		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
179		opt_pre_handler(&op->kp, regs);
180		__this_cpu_write(current_kprobe, NULL);
181	}
182	preempt_enable_no_resched();
183}
184NOKPROBE_SYMBOL(optimized_callback);
185
186static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
187{
188	struct insn insn;
189	int len = 0, ret;
190
191	while (len < RELATIVEJUMP_SIZE) {
192		ret = __copy_instruction(dest + len, src + len, real, &insn);
193		if (!ret || !can_boost(&insn, src + len))
194			return -EINVAL;
195		len += ret;
196	}
197	/* Check whether the address range is reserved */
198	if (ftrace_text_reserved(src, src + len - 1) ||
199	    alternatives_text_reserved(src, src + len - 1) ||
200	    jump_label_text_reserved(src, src + len - 1))
201		return -EBUSY;
202
203	return len;
204}
205
206/* Check whether insn is indirect jump */
207static int __insn_is_indirect_jump(struct insn *insn)
208{
209	return ((insn->opcode.bytes[0] == 0xff &&
210		(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
211		insn->opcode.bytes[0] == 0xea);	/* Segment based jump */
212}
213
214/* Check whether insn jumps into specified address range */
215static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
216{
217	unsigned long target = 0;
218
219	switch (insn->opcode.bytes[0]) {
220	case 0xe0:	/* loopne */
221	case 0xe1:	/* loope */
222	case 0xe2:	/* loop */
223	case 0xe3:	/* jcxz */
224	case 0xe9:	/* near relative jump */
225	case 0xeb:	/* short relative jump */
226		break;
227	case 0x0f:
228		if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
229			break;
230		return 0;
231	default:
232		if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
233			break;
234		return 0;
235	}
236	target = (unsigned long)insn->next_byte + insn->immediate.value;
237
238	return (start <= target && target <= start + len);
239}
240
241static int insn_is_indirect_jump(struct insn *insn)
242{
243	int ret = __insn_is_indirect_jump(insn);
244
245#ifdef CONFIG_RETPOLINE
246	/*
247	 * Jump to x86_indirect_thunk_* is treated as an indirect jump.
248	 * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
249	 * older gcc may use indirect jump. So we add this check instead of
250	 * replace indirect-jump check.
251	 */
252	if (!ret)
253		ret = insn_jump_into_range(insn,
254				(unsigned long)__indirect_thunk_start,
255				(unsigned long)__indirect_thunk_end -
256				(unsigned long)__indirect_thunk_start);
257#endif
258	return ret;
259}
260
261/* Decode whole function to ensure any instructions don't jump into target */
262static int can_optimize(unsigned long paddr)
263{
264	unsigned long addr, size = 0, offset = 0;
265	struct insn insn;
266	kprobe_opcode_t buf[MAX_INSN_SIZE];
267
268	/* Lookup symbol including addr */
269	if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
270		return 0;
271
272	/*
273	 * Do not optimize in the entry code due to the unstable
274	 * stack handling and registers setup.
275	 */
276	if (((paddr >= (unsigned long)__entry_text_start) &&
277	     (paddr <  (unsigned long)__entry_text_end)) ||
278	    ((paddr >= (unsigned long)__irqentry_text_start) &&
279	     (paddr <  (unsigned long)__irqentry_text_end)))
280		return 0;
281
282	/* Check there is enough space for a relative jump. */
283	if (size - offset < RELATIVEJUMP_SIZE)
284		return 0;
285
286	/* Decode instructions */
287	addr = paddr - offset;
288	while (addr < paddr - offset + size) { /* Decode until function end */
289		unsigned long recovered_insn;
290		if (search_exception_tables(addr))
291			/*
292			 * Since some fixup code will jumps into this function,
293			 * we can't optimize kprobe in this function.
294			 */
295			return 0;
296		recovered_insn = recover_probed_instruction(buf, addr);
297		if (!recovered_insn)
298			return 0;
299		kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
300		insn_get_length(&insn);
301		/* Another subsystem puts a breakpoint */
302		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
303			return 0;
304		/* Recover address */
305		insn.kaddr = (void *)addr;
306		insn.next_byte = (void *)(addr + insn.length);
307		/* Check any instructions don't jump into target */
308		if (insn_is_indirect_jump(&insn) ||
309		    insn_jump_into_range(&insn, paddr + INT3_SIZE,
310					 RELATIVE_ADDR_SIZE))
311			return 0;
312		addr += insn.length;
313	}
314
315	return 1;
316}
317
318/* Check optimized_kprobe can actually be optimized. */
319int arch_check_optimized_kprobe(struct optimized_kprobe *op)
320{
321	int i;
322	struct kprobe *p;
323
324	for (i = 1; i < op->optinsn.size; i++) {
325		p = get_kprobe(op->kp.addr + i);
326		if (p && !kprobe_disabled(p))
327			return -EEXIST;
328	}
329
330	return 0;
331}
332
333/* Check the addr is within the optimized instructions. */
334int arch_within_optimized_kprobe(struct optimized_kprobe *op,
335				 unsigned long addr)
336{
337	return ((unsigned long)op->kp.addr <= addr &&
338		(unsigned long)op->kp.addr + op->optinsn.size > addr);
339}
340
341/* Free optimized instruction slot */
342static
343void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
344{
345	if (op->optinsn.insn) {
346		free_optinsn_slot(op->optinsn.insn, dirty);
347		op->optinsn.insn = NULL;
348		op->optinsn.size = 0;
349	}
350}
351
352void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
353{
354	__arch_remove_optimized_kprobe(op, 1);
355}
356
357/*
358 * Copy replacing target instructions
359 * Target instructions MUST be relocatable (checked inside)
360 * This is called when new aggr(opt)probe is allocated or reused.
361 */
362int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
363				  struct kprobe *__unused)
364{
365	u8 *buf = NULL, *slot;
366	int ret, len;
367	long rel;
368
369	if (!can_optimize((unsigned long)op->kp.addr))
370		return -EILSEQ;
371
372	buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
373	if (!buf)
374		return -ENOMEM;
375
376	op->optinsn.insn = slot = get_optinsn_slot();
377	if (!slot) {
378		ret = -ENOMEM;
379		goto out;
380	}
381
382	/*
383	 * Verify if the address gap is in 2GB range, because this uses
384	 * a relative jump.
385	 */
386	rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
387	if (abs(rel) > 0x7fffffff) {
388		ret = -ERANGE;
389		goto err;
390	}
391
392	/* Copy arch-dep-instance from template */
393	memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
394
395	/* Copy instructions into the out-of-line buffer */
396	ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
397					  slot + TMPL_END_IDX);
398	if (ret < 0)
399		goto err;
400	op->optinsn.size = ret;
401	len = TMPL_END_IDX + op->optinsn.size;
402
403	/* Set probe information */
404	synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
405
406	/* Set probe function call */
407	synthesize_relcall(buf + TMPL_CALL_IDX,
408			   slot + TMPL_CALL_IDX, optimized_callback);
409
410	/* Set returning jmp instruction at the tail of out-of-line buffer */
411	synthesize_reljump(buf + len, slot + len,
412			   (u8 *)op->kp.addr + op->optinsn.size);
413	len += RELATIVEJUMP_SIZE;
414
415	/* We have to use text_poke for instuction buffer because it is RO */
416	text_poke(slot, buf, len);
417	ret = 0;
418out:
419	kfree(buf);
420	return ret;
421
422err:
423	__arch_remove_optimized_kprobe(op, 0);
424	goto out;
425}
426
427/*
428 * Replace breakpoints (int3) with relative jumps.
429 * Caller must call with locking kprobe_mutex and text_mutex.
430 */
431void arch_optimize_kprobes(struct list_head *oplist)
432{
433	struct optimized_kprobe *op, *tmp;
434	u8 insn_buf[RELATIVEJUMP_SIZE];
435
436	list_for_each_entry_safe(op, tmp, oplist, list) {
437		s32 rel = (s32)((long)op->optinsn.insn -
438			((long)op->kp.addr + RELATIVEJUMP_SIZE));
439
440		WARN_ON(kprobe_disabled(&op->kp));
441
442		/* Backup instructions which will be replaced by jump address */
443		memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
444		       RELATIVE_ADDR_SIZE);
445
446		insn_buf[0] = RELATIVEJUMP_OPCODE;
447		*(s32 *)(&insn_buf[1]) = rel;
448
449		text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
450			     op->optinsn.insn);
451
452		list_del_init(&op->list);
453	}
454}
455
456/* Replace a relative jump with a breakpoint (int3).  */
457void arch_unoptimize_kprobe(struct optimized_kprobe *op)
458{
459	u8 insn_buf[RELATIVEJUMP_SIZE];
460
461	/* Set int3 to first byte for kprobes */
462	insn_buf[0] = BREAKPOINT_INSTRUCTION;
463	memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
464	text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
465		     op->optinsn.insn);
466}
467
468/*
469 * Recover original instructions and breakpoints from relative jumps.
470 * Caller must call with locking kprobe_mutex.
471 */
472extern void arch_unoptimize_kprobes(struct list_head *oplist,
473				    struct list_head *done_list)
474{
475	struct optimized_kprobe *op, *tmp;
476
477	list_for_each_entry_safe(op, tmp, oplist, list) {
478		arch_unoptimize_kprobe(op);
479		list_move(&op->list, done_list);
480	}
481}
482
483int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
484{
485	struct optimized_kprobe *op;
486
487	if (p->flags & KPROBE_FLAG_OPTIMIZED) {
488		/* This kprobe is really able to run optimized path. */
489		op = container_of(p, struct optimized_kprobe, kp);
490		/* Detour through copied instructions */
491		regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
492		if (!reenter)
493			reset_current_kprobe();
494		preempt_enable_no_resched();
495		return 1;
496	}
497	return 0;
498}
499NOKPROBE_SYMBOL(setup_detour_execution);