Linux Audio

Check our new training course

Linux kernel drivers training

May 6-19, 2025
Register
Loading...
Note: File does not exist in v3.5.6.
  1#include <linux/sched.h>
  2#include <asm/ptrace.h>
  3#include <asm/bitops.h>
  4#include <asm/stacktrace.h>
  5#include <asm/unwind.h>
  6
  7#define FRAME_HEADER_SIZE (sizeof(long) * 2)
  8
  9/*
 10 * This disables KASAN checking when reading a value from another task's stack,
 11 * since the other task could be running on another CPU and could have poisoned
 12 * the stack in the meantime.
 13 */
 14#define READ_ONCE_TASK_STACK(task, x)			\
 15({							\
 16	unsigned long val;				\
 17	if (task == current)				\
 18		val = READ_ONCE(x);			\
 19	else						\
 20		val = READ_ONCE_NOCHECK(x);		\
 21	val;						\
 22})
 23
 24static void unwind_dump(struct unwind_state *state, unsigned long *sp)
 25{
 26	static bool dumped_before = false;
 27	bool prev_zero, zero = false;
 28	unsigned long word;
 29
 30	if (dumped_before)
 31		return;
 32
 33	dumped_before = true;
 34
 35	printk_deferred("unwind stack type:%d next_sp:%p mask:%lx graph_idx:%d\n",
 36			state->stack_info.type, state->stack_info.next_sp,
 37			state->stack_mask, state->graph_idx);
 38
 39	for (sp = state->orig_sp; sp < state->stack_info.end; sp++) {
 40		word = READ_ONCE_NOCHECK(*sp);
 41
 42		prev_zero = zero;
 43		zero = word == 0;
 44
 45		if (zero) {
 46			if (!prev_zero)
 47				printk_deferred("%p: %016x ...\n", sp, 0);
 48			continue;
 49		}
 50
 51		printk_deferred("%p: %016lx (%pB)\n", sp, word, (void *)word);
 52	}
 53}
 54
 55unsigned long unwind_get_return_address(struct unwind_state *state)
 56{
 57	unsigned long addr;
 58	unsigned long *addr_p = unwind_get_return_address_ptr(state);
 59
 60	if (unwind_done(state))
 61		return 0;
 62
 63	if (state->regs && user_mode(state->regs))
 64		return 0;
 65
 66	addr = READ_ONCE_TASK_STACK(state->task, *addr_p);
 67	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr,
 68				     addr_p);
 69
 70	return __kernel_text_address(addr) ? addr : 0;
 71}
 72EXPORT_SYMBOL_GPL(unwind_get_return_address);
 73
 74static size_t regs_size(struct pt_regs *regs)
 75{
 76	/* x86_32 regs from kernel mode are two words shorter: */
 77	if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
 78		return sizeof(*regs) - 2*sizeof(long);
 79
 80	return sizeof(*regs);
 81}
 82
 83#ifdef CONFIG_X86_32
 84#define GCC_REALIGN_WORDS 3
 85#else
 86#define GCC_REALIGN_WORDS 1
 87#endif
 88
 89static bool is_last_task_frame(struct unwind_state *state)
 90{
 91	unsigned long *last_bp = (unsigned long *)task_pt_regs(state->task) - 2;
 92	unsigned long *aligned_bp = last_bp - GCC_REALIGN_WORDS;
 93
 94	/*
 95	 * We have to check for the last task frame at two different locations
 96	 * because gcc can occasionally decide to realign the stack pointer and
 97	 * change the offset of the stack frame in the prologue of a function
 98	 * called by head/entry code.  Examples:
 99	 *
100	 * <start_secondary>:
101	 *      push   %edi
102	 *      lea    0x8(%esp),%edi
103	 *      and    $0xfffffff8,%esp
104	 *      pushl  -0x4(%edi)
105	 *      push   %ebp
106	 *      mov    %esp,%ebp
107	 *
108	 * <x86_64_start_kernel>:
109	 *      lea    0x8(%rsp),%r10
110	 *      and    $0xfffffffffffffff0,%rsp
111	 *      pushq  -0x8(%r10)
112	 *      push   %rbp
113	 *      mov    %rsp,%rbp
114	 *
115	 * Note that after aligning the stack, it pushes a duplicate copy of
116	 * the return address before pushing the frame pointer.
117	 */
118	return (state->bp == last_bp ||
119		(state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1)));
120}
121
122/*
123 * This determines if the frame pointer actually contains an encoded pointer to
124 * pt_regs on the stack.  See ENCODE_FRAME_POINTER.
125 */
126static struct pt_regs *decode_frame_pointer(unsigned long *bp)
127{
128	unsigned long regs = (unsigned long)bp;
129
130	if (!(regs & 0x1))
131		return NULL;
132
133	return (struct pt_regs *)(regs & ~0x1);
134}
135
136static bool update_stack_state(struct unwind_state *state, void *addr,
137			       size_t len)
138{
139	struct stack_info *info = &state->stack_info;
140	enum stack_type orig_type = info->type;
141
142	/*
143	 * If addr isn't on the current stack, switch to the next one.
144	 *
145	 * We may have to traverse multiple stacks to deal with the possibility
146	 * that 'info->next_sp' could point to an empty stack and 'addr' could
147	 * be on a subsequent stack.
148	 */
149	while (!on_stack(info, addr, len))
150		if (get_stack_info(info->next_sp, state->task, info,
151				   &state->stack_mask))
152			return false;
153
154	if (!state->orig_sp || info->type != orig_type)
155		state->orig_sp = addr;
156
157	return true;
158}
159
160bool unwind_next_frame(struct unwind_state *state)
161{
162	struct pt_regs *regs;
163	unsigned long *next_bp, *next_frame;
164	size_t next_len;
165	enum stack_type prev_type = state->stack_info.type;
166
167	if (unwind_done(state))
168		return false;
169
170	/* have we reached the end? */
171	if (state->regs && user_mode(state->regs))
172		goto the_end;
173
174	if (is_last_task_frame(state)) {
175		regs = task_pt_regs(state->task);
176
177		/*
178		 * kthreads (other than the boot CPU's idle thread) have some
179		 * partial regs at the end of their stack which were placed
180		 * there by copy_thread_tls().  But the regs don't have any
181		 * useful information, so we can skip them.
182		 *
183		 * This user_mode() check is slightly broader than a PF_KTHREAD
184		 * check because it also catches the awkward situation where a
185		 * newly forked kthread transitions into a user task by calling
186		 * do_execve(), which eventually clears PF_KTHREAD.
187		 */
188		if (!user_mode(regs))
189			goto the_end;
190
191		/*
192		 * We're almost at the end, but not quite: there's still the
193		 * syscall regs frame.  Entry code doesn't encode the regs
194		 * pointer for syscalls, so we have to set it manually.
195		 */
196		state->regs = regs;
197		state->bp = NULL;
198		return true;
199	}
200
201	/* get the next frame pointer */
202	if (state->regs)
203		next_bp = (unsigned long *)state->regs->bp;
204	else
205		next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task,*state->bp);
206
207	/* is the next frame pointer an encoded pointer to pt_regs? */
208	regs = decode_frame_pointer(next_bp);
209	if (regs) {
210		next_frame = (unsigned long *)regs;
211		next_len = sizeof(*regs);
212	} else {
213		next_frame = next_bp;
214		next_len = FRAME_HEADER_SIZE;
215	}
216
217	/* make sure the next frame's data is accessible */
218	if (!update_stack_state(state, next_frame, next_len)) {
219		/*
220		 * Don't warn on bad regs->bp.  An interrupt in entry code
221		 * might cause a false positive warning.
222		 */
223		if (state->regs)
224			goto the_end;
225
226		goto bad_address;
227	}
228
229	/* Make sure it only unwinds up and doesn't overlap the last frame: */
230	if (state->stack_info.type == prev_type) {
231		if (state->regs && (void *)next_frame < (void *)state->regs + regs_size(state->regs))
232			goto bad_address;
233
234		if (state->bp && (void *)next_frame < (void *)state->bp + FRAME_HEADER_SIZE)
235			goto bad_address;
236	}
237
238	/* move to the next frame */
239	if (regs) {
240		state->regs = regs;
241		state->bp = NULL;
242	} else {
243		state->bp = next_bp;
244		state->regs = NULL;
245	}
246
247	return true;
248
249bad_address:
250	/*
251	 * When unwinding a non-current task, the task might actually be
252	 * running on another CPU, in which case it could be modifying its
253	 * stack while we're reading it.  This is generally not a problem and
254	 * can be ignored as long as the caller understands that unwinding
255	 * another task will not always succeed.
256	 */
257	if (state->task != current)
258		goto the_end;
259
260	if (state->regs) {
261		printk_deferred_once(KERN_WARNING
262			"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
263			state->regs, state->task->comm,
264			state->task->pid, next_frame);
265		unwind_dump(state, (unsigned long *)state->regs);
266	} else {
267		printk_deferred_once(KERN_WARNING
268			"WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n",
269			state->bp, state->task->comm,
270			state->task->pid, next_frame);
271		unwind_dump(state, state->bp);
272	}
273the_end:
274	state->stack_info.type = STACK_TYPE_UNKNOWN;
275	return false;
276}
277EXPORT_SYMBOL_GPL(unwind_next_frame);
278
279void __unwind_start(struct unwind_state *state, struct task_struct *task,
280		    struct pt_regs *regs, unsigned long *first_frame)
281{
282	unsigned long *bp, *frame;
283	size_t len;
284
285	memset(state, 0, sizeof(*state));
286	state->task = task;
287
288	/* don't even attempt to start from user mode regs */
289	if (regs && user_mode(regs)) {
290		state->stack_info.type = STACK_TYPE_UNKNOWN;
291		return;
292	}
293
294	/* set up the starting stack frame */
295	bp = get_frame_pointer(task, regs);
296	regs = decode_frame_pointer(bp);
297	if (regs) {
298		state->regs = regs;
299		frame = (unsigned long *)regs;
300		len = sizeof(*regs);
301	} else {
302		state->bp = bp;
303		frame = bp;
304		len = FRAME_HEADER_SIZE;
305	}
306
307	/* initialize stack info and make sure the frame data is accessible */
308	get_stack_info(frame, state->task, &state->stack_info,
309		       &state->stack_mask);
310	update_stack_state(state, frame, len);
311
312	/*
313	 * The caller can provide the address of the first frame directly
314	 * (first_frame) or indirectly (regs->sp) to indicate which stack frame
315	 * to start unwinding at.  Skip ahead until we reach it.
316	 */
317	while (!unwind_done(state) &&
318	       (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
319			state->bp < first_frame))
320		unwind_next_frame(state);
321}
322EXPORT_SYMBOL_GPL(__unwind_start);