Loading...
Note: File does not exist in v3.5.6.
1#include <linux/sched.h>
2#include <asm/ptrace.h>
3#include <asm/bitops.h>
4#include <asm/stacktrace.h>
5#include <asm/unwind.h>
6
7#define FRAME_HEADER_SIZE (sizeof(long) * 2)
8
9/*
10 * This disables KASAN checking when reading a value from another task's stack,
11 * since the other task could be running on another CPU and could have poisoned
12 * the stack in the meantime.
13 */
14#define READ_ONCE_TASK_STACK(task, x) \
15({ \
16 unsigned long val; \
17 if (task == current) \
18 val = READ_ONCE(x); \
19 else \
20 val = READ_ONCE_NOCHECK(x); \
21 val; \
22})
23
24static void unwind_dump(struct unwind_state *state, unsigned long *sp)
25{
26 static bool dumped_before = false;
27 bool prev_zero, zero = false;
28 unsigned long word;
29
30 if (dumped_before)
31 return;
32
33 dumped_before = true;
34
35 printk_deferred("unwind stack type:%d next_sp:%p mask:%lx graph_idx:%d\n",
36 state->stack_info.type, state->stack_info.next_sp,
37 state->stack_mask, state->graph_idx);
38
39 for (sp = state->orig_sp; sp < state->stack_info.end; sp++) {
40 word = READ_ONCE_NOCHECK(*sp);
41
42 prev_zero = zero;
43 zero = word == 0;
44
45 if (zero) {
46 if (!prev_zero)
47 printk_deferred("%p: %016x ...\n", sp, 0);
48 continue;
49 }
50
51 printk_deferred("%p: %016lx (%pB)\n", sp, word, (void *)word);
52 }
53}
54
55unsigned long unwind_get_return_address(struct unwind_state *state)
56{
57 unsigned long addr;
58 unsigned long *addr_p = unwind_get_return_address_ptr(state);
59
60 if (unwind_done(state))
61 return 0;
62
63 if (state->regs && user_mode(state->regs))
64 return 0;
65
66 addr = READ_ONCE_TASK_STACK(state->task, *addr_p);
67 addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr,
68 addr_p);
69
70 return __kernel_text_address(addr) ? addr : 0;
71}
72EXPORT_SYMBOL_GPL(unwind_get_return_address);
73
74static size_t regs_size(struct pt_regs *regs)
75{
76 /* x86_32 regs from kernel mode are two words shorter: */
77 if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
78 return sizeof(*regs) - 2*sizeof(long);
79
80 return sizeof(*regs);
81}
82
83#ifdef CONFIG_X86_32
84#define GCC_REALIGN_WORDS 3
85#else
86#define GCC_REALIGN_WORDS 1
87#endif
88
89static bool is_last_task_frame(struct unwind_state *state)
90{
91 unsigned long *last_bp = (unsigned long *)task_pt_regs(state->task) - 2;
92 unsigned long *aligned_bp = last_bp - GCC_REALIGN_WORDS;
93
94 /*
95 * We have to check for the last task frame at two different locations
96 * because gcc can occasionally decide to realign the stack pointer and
97 * change the offset of the stack frame in the prologue of a function
98 * called by head/entry code. Examples:
99 *
100 * <start_secondary>:
101 * push %edi
102 * lea 0x8(%esp),%edi
103 * and $0xfffffff8,%esp
104 * pushl -0x4(%edi)
105 * push %ebp
106 * mov %esp,%ebp
107 *
108 * <x86_64_start_kernel>:
109 * lea 0x8(%rsp),%r10
110 * and $0xfffffffffffffff0,%rsp
111 * pushq -0x8(%r10)
112 * push %rbp
113 * mov %rsp,%rbp
114 *
115 * Note that after aligning the stack, it pushes a duplicate copy of
116 * the return address before pushing the frame pointer.
117 */
118 return (state->bp == last_bp ||
119 (state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1)));
120}
121
122/*
123 * This determines if the frame pointer actually contains an encoded pointer to
124 * pt_regs on the stack. See ENCODE_FRAME_POINTER.
125 */
126static struct pt_regs *decode_frame_pointer(unsigned long *bp)
127{
128 unsigned long regs = (unsigned long)bp;
129
130 if (!(regs & 0x1))
131 return NULL;
132
133 return (struct pt_regs *)(regs & ~0x1);
134}
135
136static bool update_stack_state(struct unwind_state *state, void *addr,
137 size_t len)
138{
139 struct stack_info *info = &state->stack_info;
140 enum stack_type orig_type = info->type;
141
142 /*
143 * If addr isn't on the current stack, switch to the next one.
144 *
145 * We may have to traverse multiple stacks to deal with the possibility
146 * that 'info->next_sp' could point to an empty stack and 'addr' could
147 * be on a subsequent stack.
148 */
149 while (!on_stack(info, addr, len))
150 if (get_stack_info(info->next_sp, state->task, info,
151 &state->stack_mask))
152 return false;
153
154 if (!state->orig_sp || info->type != orig_type)
155 state->orig_sp = addr;
156
157 return true;
158}
159
160bool unwind_next_frame(struct unwind_state *state)
161{
162 struct pt_regs *regs;
163 unsigned long *next_bp, *next_frame;
164 size_t next_len;
165 enum stack_type prev_type = state->stack_info.type;
166
167 if (unwind_done(state))
168 return false;
169
170 /* have we reached the end? */
171 if (state->regs && user_mode(state->regs))
172 goto the_end;
173
174 if (is_last_task_frame(state)) {
175 regs = task_pt_regs(state->task);
176
177 /*
178 * kthreads (other than the boot CPU's idle thread) have some
179 * partial regs at the end of their stack which were placed
180 * there by copy_thread_tls(). But the regs don't have any
181 * useful information, so we can skip them.
182 *
183 * This user_mode() check is slightly broader than a PF_KTHREAD
184 * check because it also catches the awkward situation where a
185 * newly forked kthread transitions into a user task by calling
186 * do_execve(), which eventually clears PF_KTHREAD.
187 */
188 if (!user_mode(regs))
189 goto the_end;
190
191 /*
192 * We're almost at the end, but not quite: there's still the
193 * syscall regs frame. Entry code doesn't encode the regs
194 * pointer for syscalls, so we have to set it manually.
195 */
196 state->regs = regs;
197 state->bp = NULL;
198 return true;
199 }
200
201 /* get the next frame pointer */
202 if (state->regs)
203 next_bp = (unsigned long *)state->regs->bp;
204 else
205 next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task,*state->bp);
206
207 /* is the next frame pointer an encoded pointer to pt_regs? */
208 regs = decode_frame_pointer(next_bp);
209 if (regs) {
210 next_frame = (unsigned long *)regs;
211 next_len = sizeof(*regs);
212 } else {
213 next_frame = next_bp;
214 next_len = FRAME_HEADER_SIZE;
215 }
216
217 /* make sure the next frame's data is accessible */
218 if (!update_stack_state(state, next_frame, next_len)) {
219 /*
220 * Don't warn on bad regs->bp. An interrupt in entry code
221 * might cause a false positive warning.
222 */
223 if (state->regs)
224 goto the_end;
225
226 goto bad_address;
227 }
228
229 /* Make sure it only unwinds up and doesn't overlap the last frame: */
230 if (state->stack_info.type == prev_type) {
231 if (state->regs && (void *)next_frame < (void *)state->regs + regs_size(state->regs))
232 goto bad_address;
233
234 if (state->bp && (void *)next_frame < (void *)state->bp + FRAME_HEADER_SIZE)
235 goto bad_address;
236 }
237
238 /* move to the next frame */
239 if (regs) {
240 state->regs = regs;
241 state->bp = NULL;
242 } else {
243 state->bp = next_bp;
244 state->regs = NULL;
245 }
246
247 return true;
248
249bad_address:
250 /*
251 * When unwinding a non-current task, the task might actually be
252 * running on another CPU, in which case it could be modifying its
253 * stack while we're reading it. This is generally not a problem and
254 * can be ignored as long as the caller understands that unwinding
255 * another task will not always succeed.
256 */
257 if (state->task != current)
258 goto the_end;
259
260 if (state->regs) {
261 printk_deferred_once(KERN_WARNING
262 "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
263 state->regs, state->task->comm,
264 state->task->pid, next_frame);
265 unwind_dump(state, (unsigned long *)state->regs);
266 } else {
267 printk_deferred_once(KERN_WARNING
268 "WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n",
269 state->bp, state->task->comm,
270 state->task->pid, next_frame);
271 unwind_dump(state, state->bp);
272 }
273the_end:
274 state->stack_info.type = STACK_TYPE_UNKNOWN;
275 return false;
276}
277EXPORT_SYMBOL_GPL(unwind_next_frame);
278
279void __unwind_start(struct unwind_state *state, struct task_struct *task,
280 struct pt_regs *regs, unsigned long *first_frame)
281{
282 unsigned long *bp, *frame;
283 size_t len;
284
285 memset(state, 0, sizeof(*state));
286 state->task = task;
287
288 /* don't even attempt to start from user mode regs */
289 if (regs && user_mode(regs)) {
290 state->stack_info.type = STACK_TYPE_UNKNOWN;
291 return;
292 }
293
294 /* set up the starting stack frame */
295 bp = get_frame_pointer(task, regs);
296 regs = decode_frame_pointer(bp);
297 if (regs) {
298 state->regs = regs;
299 frame = (unsigned long *)regs;
300 len = sizeof(*regs);
301 } else {
302 state->bp = bp;
303 frame = bp;
304 len = FRAME_HEADER_SIZE;
305 }
306
307 /* initialize stack info and make sure the frame data is accessible */
308 get_stack_info(frame, state->task, &state->stack_info,
309 &state->stack_mask);
310 update_stack_state(state, frame, len);
311
312 /*
313 * The caller can provide the address of the first frame directly
314 * (first_frame) or indirectly (regs->sp) to indicate which stack frame
315 * to start unwinding at. Skip ahead until we reach it.
316 */
317 while (!unwind_done(state) &&
318 (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
319 state->bp < first_frame))
320 unwind_next_frame(state);
321}
322EXPORT_SYMBOL_GPL(__unwind_start);