Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.13.7.
  1/**
  2 * @file cpu_buffer.c
  3 *
  4 * @remark Copyright 2002-2009 OProfile authors
  5 * @remark Read the file COPYING
  6 *
  7 * @author John Levon <levon@movementarian.org>
  8 * @author Barry Kasindorf <barry.kasindorf@amd.com>
  9 * @author Robert Richter <robert.richter@amd.com>
 10 *
 11 * Each CPU has a local buffer that stores PC value/event
 12 * pairs. We also log context switches when we notice them.
 13 * Eventually each CPU's buffer is processed into the global
 14 * event buffer by sync_buffer().
 15 *
 16 * We use a local buffer for two reasons: an NMI or similar
 17 * interrupt cannot synchronise, and high sampling rates
 18 * would lead to catastrophic global synchronisation if
 19 * a global buffer was used.
 20 */
 21
 22#include <linux/sched.h>
 23#include <linux/oprofile.h>
 24#include <linux/errno.h>
 25
 26#include <asm/ptrace.h>
 27
 28#include "event_buffer.h"
 29#include "cpu_buffer.h"
 30#include "buffer_sync.h"
 31#include "oprof.h"
 32
 33#define OP_BUFFER_FLAGS	0
 34
 35static struct trace_buffer *op_ring_buffer;
 36DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer);
 37
 38static void wq_sync_buffer(struct work_struct *work);
 39
 40#define DEFAULT_TIMER_EXPIRE (HZ / 10)
 41static int work_enabled;
 42
 43unsigned long oprofile_get_cpu_buffer_size(void)
 44{
 45	return oprofile_cpu_buffer_size;
 46}
 47
 48void oprofile_cpu_buffer_inc_smpl_lost(void)
 49{
 50	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
 51
 52	cpu_buf->sample_lost_overflow++;
 53}
 54
 55void free_cpu_buffers(void)
 56{
 57	if (op_ring_buffer)
 58		ring_buffer_free(op_ring_buffer);
 59	op_ring_buffer = NULL;
 60}
 61
 62#define RB_EVENT_HDR_SIZE 4
 63
 64int alloc_cpu_buffers(void)
 65{
 66	int i;
 67
 68	unsigned long buffer_size = oprofile_cpu_buffer_size;
 69	unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
 70						 RB_EVENT_HDR_SIZE);
 71
 72	op_ring_buffer = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
 73	if (!op_ring_buffer)
 74		goto fail;
 75
 76	for_each_possible_cpu(i) {
 77		struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
 78
 79		b->last_task = NULL;
 80		b->last_is_kernel = -1;
 81		b->tracing = 0;
 82		b->buffer_size = buffer_size;
 83		b->sample_received = 0;
 84		b->sample_lost_overflow = 0;
 85		b->backtrace_aborted = 0;
 86		b->sample_invalid_eip = 0;
 87		b->cpu = i;
 88		INIT_DELAYED_WORK(&b->work, wq_sync_buffer);
 89	}
 90	return 0;
 91
 92fail:
 93	free_cpu_buffers();
 94	return -ENOMEM;
 95}
 96
 97void start_cpu_work(void)
 98{
 99	int i;
100
101	work_enabled = 1;
102
103	for_each_online_cpu(i) {
104		struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
105
106		/*
107		 * Spread the work by 1 jiffy per cpu so they dont all
108		 * fire at once.
109		 */
110		schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i);
111	}
112}
113
114void end_cpu_work(void)
115{
116	work_enabled = 0;
117}
118
119void flush_cpu_work(void)
120{
121	int i;
122
123	for_each_online_cpu(i) {
124		struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
125
126		/* these works are per-cpu, no need for flush_sync */
127		flush_delayed_work(&b->work);
128	}
129}
130
131/*
132 * This function prepares the cpu buffer to write a sample.
133 *
134 * Struct op_entry is used during operations on the ring buffer while
135 * struct op_sample contains the data that is stored in the ring
136 * buffer. Struct entry can be uninitialized. The function reserves a
137 * data array that is specified by size. Use
138 * op_cpu_buffer_write_commit() after preparing the sample. In case of
139 * errors a null pointer is returned, otherwise the pointer to the
140 * sample.
141 *
142 */
143struct op_sample
144*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size)
145{
146	entry->event = ring_buffer_lock_reserve
147		(op_ring_buffer, sizeof(struct op_sample) +
148		 size * sizeof(entry->sample->data[0]));
149	if (!entry->event)
150		return NULL;
151	entry->sample = ring_buffer_event_data(entry->event);
152	entry->size = size;
153	entry->data = entry->sample->data;
154
155	return entry->sample;
156}
157
158int op_cpu_buffer_write_commit(struct op_entry *entry)
159{
160	return ring_buffer_unlock_commit(op_ring_buffer, entry->event);
161}
162
163struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
164{
165	struct ring_buffer_event *e;
166	e = ring_buffer_consume(op_ring_buffer, cpu, NULL, NULL);
167	if (!e)
168		return NULL;
169
170	entry->event = e;
171	entry->sample = ring_buffer_event_data(e);
172	entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample))
173		/ sizeof(entry->sample->data[0]);
174	entry->data = entry->sample->data;
175	return entry->sample;
176}
177
178unsigned long op_cpu_buffer_entries(int cpu)
179{
180	return ring_buffer_entries_cpu(op_ring_buffer, cpu);
181}
182
183static int
184op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
185	    int is_kernel, struct task_struct *task)
186{
187	struct op_entry entry;
188	struct op_sample *sample;
189	unsigned long flags;
190	int size;
191
192	flags = 0;
193
194	if (backtrace)
195		flags |= TRACE_BEGIN;
196
197	/* notice a switch from user->kernel or vice versa */
198	is_kernel = !!is_kernel;
199	if (cpu_buf->last_is_kernel != is_kernel) {
200		cpu_buf->last_is_kernel = is_kernel;
201		flags |= KERNEL_CTX_SWITCH;
202		if (is_kernel)
203			flags |= IS_KERNEL;
204	}
205
206	/* notice a task switch */
207	if (cpu_buf->last_task != task) {
208		cpu_buf->last_task = task;
209		flags |= USER_CTX_SWITCH;
210	}
211
212	if (!flags)
213		/* nothing to do */
214		return 0;
215
216	if (flags & USER_CTX_SWITCH)
217		size = 1;
218	else
219		size = 0;
220
221	sample = op_cpu_buffer_write_reserve(&entry, size);
222	if (!sample)
223		return -ENOMEM;
224
225	sample->eip = ESCAPE_CODE;
226	sample->event = flags;
227
228	if (size)
229		op_cpu_buffer_add_data(&entry, (unsigned long)task);
230
231	op_cpu_buffer_write_commit(&entry);
232
233	return 0;
234}
235
236static inline int
237op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
238	      unsigned long pc, unsigned long event)
239{
240	struct op_entry entry;
241	struct op_sample *sample;
242
243	sample = op_cpu_buffer_write_reserve(&entry, 0);
244	if (!sample)
245		return -ENOMEM;
246
247	sample->eip = pc;
248	sample->event = event;
249
250	return op_cpu_buffer_write_commit(&entry);
251}
252
253/*
254 * This must be safe from any context.
255 *
256 * is_kernel is needed because on some architectures you cannot
257 * tell if you are in kernel or user space simply by looking at
258 * pc. We tag this in the buffer by generating kernel enter/exit
259 * events whenever is_kernel changes
260 */
261static int
262log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
263	   unsigned long backtrace, int is_kernel, unsigned long event,
264	   struct task_struct *task)
265{
266	struct task_struct *tsk = task ? task : current;
267	cpu_buf->sample_received++;
268
269	if (pc == ESCAPE_CODE) {
270		cpu_buf->sample_invalid_eip++;
271		return 0;
272	}
273
274	if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
275		goto fail;
276
277	if (op_add_sample(cpu_buf, pc, event))
278		goto fail;
279
280	return 1;
281
282fail:
283	cpu_buf->sample_lost_overflow++;
284	return 0;
285}
286
287static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
288{
289	cpu_buf->tracing = 1;
290}
291
292static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
293{
294	cpu_buf->tracing = 0;
295}
296
297static inline void
298__oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
299			  unsigned long event, int is_kernel,
300			  struct task_struct *task)
301{
302	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
303	unsigned long backtrace = oprofile_backtrace_depth;
304
305	/*
306	 * if log_sample() fail we can't backtrace since we lost the
307	 * source of this event
308	 */
309	if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, task))
310		/* failed */
311		return;
312
313	if (!backtrace)
314		return;
315
316	oprofile_begin_trace(cpu_buf);
317	oprofile_ops.backtrace(regs, backtrace);
318	oprofile_end_trace(cpu_buf);
319}
320
321void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs,
322				unsigned long event, int is_kernel,
323				struct task_struct *task)
324{
325	__oprofile_add_ext_sample(pc, regs, event, is_kernel, task);
326}
327
328void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
329			     unsigned long event, int is_kernel)
330{
331	__oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
332}
333
334void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
335{
336	int is_kernel;
337	unsigned long pc;
338
339	if (likely(regs)) {
340		is_kernel = !user_mode(regs);
341		pc = profile_pc(regs);
342	} else {
343		is_kernel = 0;    /* This value will not be used */
344		pc = ESCAPE_CODE; /* as this causes an early return. */
345	}
346
347	__oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
348}
349
350/*
351 * Add samples with data to the ring buffer.
352 *
353 * Use oprofile_add_data(&entry, val) to add data and
354 * oprofile_write_commit(&entry) to commit the sample.
355 */
356void
357oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs,
358		       unsigned long pc, int code, int size)
359{
360	struct op_sample *sample;
361	int is_kernel = !user_mode(regs);
362	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
363
364	cpu_buf->sample_received++;
365
366	/* no backtraces for samples with data */
367	if (op_add_code(cpu_buf, 0, is_kernel, current))
368		goto fail;
369
370	sample = op_cpu_buffer_write_reserve(entry, size + 2);
371	if (!sample)
372		goto fail;
373	sample->eip = ESCAPE_CODE;
374	sample->event = 0;		/* no flags */
375
376	op_cpu_buffer_add_data(entry, code);
377	op_cpu_buffer_add_data(entry, pc);
378
379	return;
380
381fail:
382	entry->event = NULL;
383	cpu_buf->sample_lost_overflow++;
384}
385
386int oprofile_add_data(struct op_entry *entry, unsigned long val)
387{
388	if (!entry->event)
389		return 0;
390	return op_cpu_buffer_add_data(entry, val);
391}
392
393int oprofile_add_data64(struct op_entry *entry, u64 val)
394{
395	if (!entry->event)
396		return 0;
397	if (op_cpu_buffer_get_size(entry) < 2)
398		/*
399		 * the function returns 0 to indicate a too small
400		 * buffer, even if there is some space left
401		 */
402		return 0;
403	if (!op_cpu_buffer_add_data(entry, (u32)val))
404		return 0;
405	return op_cpu_buffer_add_data(entry, (u32)(val >> 32));
406}
407
408int oprofile_write_commit(struct op_entry *entry)
409{
410	if (!entry->event)
411		return -EINVAL;
412	return op_cpu_buffer_write_commit(entry);
413}
414
415void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
416{
417	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
418	log_sample(cpu_buf, pc, 0, is_kernel, event, NULL);
419}
420
421void oprofile_add_trace(unsigned long pc)
422{
423	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
424
425	if (!cpu_buf->tracing)
426		return;
427
428	/*
429	 * broken frame can give an eip with the same value as an
430	 * escape code, abort the trace if we get it
431	 */
432	if (pc == ESCAPE_CODE)
433		goto fail;
434
435	if (op_add_sample(cpu_buf, pc, 0))
436		goto fail;
437
438	return;
439fail:
440	cpu_buf->tracing = 0;
441	cpu_buf->backtrace_aborted++;
442	return;
443}
444
445/*
446 * This serves to avoid cpu buffer overflow, and makes sure
447 * the task mortuary progresses
448 *
449 * By using schedule_delayed_work_on and then schedule_delayed_work
450 * we guarantee this will stay on the correct cpu
451 */
452static void wq_sync_buffer(struct work_struct *work)
453{
454	struct oprofile_cpu_buffer *b =
455		container_of(work, struct oprofile_cpu_buffer, work.work);
456	if (b->cpu != smp_processor_id() && !cpu_online(b->cpu)) {
457		cancel_delayed_work(&b->work);
458		return;
459	}
460	sync_buffer(b->cpu);
461
462	/* don't re-add the work if we're shutting down */
463	if (work_enabled)
464		schedule_delayed_work(&b->work, DEFAULT_TIMER_EXPIRE);
465}