Linux Audio

Check our new training course

Loading...
v3.1
  1/*
  2 * builtin-record.c
  3 *
  4 * Builtin record command: Record the profile of a workload
  5 * (or a CPU, or a PID) into the perf.data output file - for
  6 * later analysis via perf report.
  7 */
  8#define _FILE_OFFSET_BITS 64
  9
 10#include "builtin.h"
 11
 12#include "perf.h"
 13
 14#include "util/build-id.h"
 15#include "util/util.h"
 16#include "util/parse-options.h"
 17#include "util/parse-events.h"
 18
 19#include "util/header.h"
 20#include "util/event.h"
 21#include "util/evlist.h"
 22#include "util/evsel.h"
 23#include "util/debug.h"
 24#include "util/session.h"
 
 25#include "util/symbol.h"
 26#include "util/cpumap.h"
 27#include "util/thread_map.h"
 
 28
 29#include <unistd.h>
 30#include <sched.h>
 31#include <sys/mman.h>
 32
 33enum write_mode_t {
 34	WRITE_FORCE,
 35	WRITE_APPEND
 36};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 37
 38static u64			user_interval			= ULLONG_MAX;
 39static u64			default_interval		=      0;
 
 
 
 
 
 40
 41static unsigned int		page_size;
 42static unsigned int		mmap_pages			= UINT_MAX;
 43static unsigned int		user_freq 			= UINT_MAX;
 44static int			freq				=   1000;
 45static int			output;
 46static int			pipe_output			=      0;
 47static const char		*output_name			= NULL;
 48static bool			group				=  false;
 49static int			realtime_prio			=      0;
 50static bool			nodelay				=  false;
 51static bool			raw_samples			=  false;
 52static bool			sample_id_all_avail		=   true;
 53static bool			system_wide			=  false;
 54static pid_t			target_pid			=     -1;
 55static pid_t			target_tid			=     -1;
 56static pid_t			child_pid			=     -1;
 57static bool			no_inherit			=  false;
 58static enum write_mode_t	write_mode			= WRITE_FORCE;
 59static bool			call_graph			=  false;
 60static bool			inherit_stat			=  false;
 61static bool			no_samples			=  false;
 62static bool			sample_address			=  false;
 63static bool			sample_time			=  false;
 64static bool			no_buildid			=  false;
 65static bool			no_buildid_cache		=  false;
 66static struct perf_evlist	*evsel_list;
 67
 68static long			samples				=      0;
 69static u64			bytes_written			=      0;
 70
 71static int			file_new			=      1;
 72static off_t			post_processing_offset;
 73
 74static struct perf_session	*session;
 75static const char		*cpu_list;
 76
 77static void advance_output(size_t size)
 78{
 79	bytes_written += size;
 80}
 81
 82static void write_output(void *buf, size_t size)
 83{
 84	while (size) {
 85		int ret = write(output, buf, size);
 86
 87		if (ret < 0)
 88			die("failed to write");
 89
 90		size -= ret;
 91		buf += ret;
 92
 93		bytes_written += ret;
 94	}
 95}
 96
 97static int process_synthesized_event(union perf_event *event,
 98				     struct perf_sample *sample __used,
 99				     struct perf_session *self __used)
100{
101	write_output(event, event->header.size);
 
 
 
 
 
102	return 0;
103}
104
105static void mmap_read(struct perf_mmap *md)
 
 
 
 
 
 
 
 
 
106{
107	unsigned int head = perf_mmap__read_head(md);
108	unsigned int old = md->prev;
109	unsigned char *data = md->base + page_size;
110	unsigned long size;
111	void *buf;
 
112
113	if (old == head)
114		return;
115
116	samples++;
117
118	size = head - old;
119
120	if ((old & md->mask) + size != (head & md->mask)) {
121		buf = &data[old & md->mask];
122		size = md->mask + 1 - (old & md->mask);
123		old += size;
124
125		write_output(buf, size);
 
 
 
126	}
127
128	buf = &data[old & md->mask];
129	size = head - old;
130	old += size;
131
132	write_output(buf, size);
 
 
 
133
134	md->prev = old;
135	perf_mmap__write_tail(md, old);
 
 
 
136}
137
138static volatile int done = 0;
139static volatile int signr = -1;
 
140
141static void sig_handler(int sig)
142{
 
 
 
143	done = 1;
144	signr = sig;
145}
146
147static void sig_atexit(void)
148{
149	if (child_pid > 0)
150		kill(child_pid, SIGTERM);
 
 
 
 
 
 
 
 
 
151
152	if (signr == -1 || signr == SIGUSR1)
153		return;
154
155	signal(signr, SIG_DFL);
156	kill(getpid(), signr);
157}
158
159static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
160{
161	struct perf_event_attr *attr = &evsel->attr;
162	int track = !evsel->idx; /* only the first counter needs these */
163
164	attr->disabled		= 1;
165	attr->inherit		= !no_inherit;
166	attr->read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
167				  PERF_FORMAT_TOTAL_TIME_RUNNING |
168				  PERF_FORMAT_ID;
169
170	attr->sample_type	|= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
171
172	if (evlist->nr_entries > 1)
173		attr->sample_type |= PERF_SAMPLE_ID;
174
175	/*
176	 * We default some events to a 1 default interval. But keep
177	 * it a weak assumption overridable by the user.
178	 */
179	if (!attr->sample_period || (user_freq != UINT_MAX &&
180				     user_interval != ULLONG_MAX)) {
181		if (freq) {
182			attr->sample_type	|= PERF_SAMPLE_PERIOD;
183			attr->freq		= 1;
184			attr->sample_freq	= freq;
185		} else {
186			attr->sample_period = default_interval;
187		}
188	}
189
190	if (no_samples)
191		attr->sample_freq = 0;
192
193	if (inherit_stat)
194		attr->inherit_stat = 1;
195
196	if (sample_address) {
197		attr->sample_type	|= PERF_SAMPLE_ADDR;
198		attr->mmap_data = track;
199	}
200
201	if (call_graph)
202		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
203
204	if (system_wide)
205		attr->sample_type	|= PERF_SAMPLE_CPU;
206
207	if (sample_id_all_avail &&
208	    (sample_time || system_wide || !no_inherit || cpu_list))
209		attr->sample_type	|= PERF_SAMPLE_TIME;
210
211	if (raw_samples) {
212		attr->sample_type	|= PERF_SAMPLE_TIME;
213		attr->sample_type	|= PERF_SAMPLE_RAW;
214		attr->sample_type	|= PERF_SAMPLE_CPU;
215	}
216
217	if (nodelay) {
218		attr->watermark = 0;
219		attr->wakeup_events = 1;
220	}
221
222	attr->mmap		= track;
223	attr->comm		= track;
224
225	if (target_pid == -1 && target_tid == -1 && !system_wide) {
226		attr->disabled = 1;
227		attr->enable_on_exec = 1;
228	}
229}
230
231static bool perf_evlist__equal(struct perf_evlist *evlist,
232			       struct perf_evlist *other)
233{
234	struct perf_evsel *pos, *pair;
235
236	if (evlist->nr_entries != other->nr_entries)
237		return false;
238
239	pair = list_entry(other->entries.next, struct perf_evsel, node);
240
241	list_for_each_entry(pos, &evlist->entries, node) {
242		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
243			return false;
244		pair = list_entry(pair->node.next, struct perf_evsel, node);
245	}
246
247	return true;
248}
249
250static void open_counters(struct perf_evlist *evlist)
251{
 
252	struct perf_evsel *pos;
 
 
 
 
253
254	if (evlist->cpus->map[0] < 0)
255		no_inherit = true;
256
257	list_for_each_entry(pos, &evlist->entries, node) {
258		struct perf_event_attr *attr = &pos->attr;
259		/*
260		 * Check if parse_single_tracepoint_event has already asked for
261		 * PERF_SAMPLE_TIME.
262		 *
263		 * XXX this is kludgy but short term fix for problems introduced by
264		 * eac23d1c that broke 'perf script' by having different sample_types
265		 * when using multiple tracepoint events when we use a perf binary
266		 * that tries to use sample_id_all on an older kernel.
267		 *
268		 * We need to move counter creation to perf_session, support
269		 * different sample_types, etc.
270		 */
271		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
272
273		config_attr(pos, evlist);
274retry_sample_id:
275		attr->sample_id_all = sample_id_all_avail ? 1 : 0;
276try_again:
277		if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
278			int err = errno;
279
280			if (err == EPERM || err == EACCES) {
281				ui__warning_paranoid();
282				exit(EXIT_FAILURE);
283			} else if (err ==  ENODEV && cpu_list) {
284				die("No such device - did you specify"
285					" an out-of-range profile CPU?\n");
286			} else if (err == EINVAL && sample_id_all_avail) {
287				/*
288				 * Old kernel, no attr->sample_id_type_all field
289				 */
290				sample_id_all_avail = false;
291				if (!sample_time && !raw_samples && !time_needed)
292					attr->sample_type &= ~PERF_SAMPLE_TIME;
293
294				goto retry_sample_id;
295			}
296
297			/*
298			 * If it's cycles then fall back to hrtimer
299			 * based cpu-clock-tick sw counter, which
300			 * is always available even if no PMU support:
301			 */
302			if (attr->type == PERF_TYPE_HARDWARE
303					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
304
305				if (verbose)
306					ui__warning("The cycles event is not supported, "
307						    "trying to fall back to cpu-clock-ticks\n");
308				attr->type = PERF_TYPE_SOFTWARE;
309				attr->config = PERF_COUNT_SW_CPU_CLOCK;
310				goto try_again;
311			}
312
313			if (err == ENOENT) {
314				ui__warning("The %s event is not supported.\n",
315					    event_name(pos));
316				exit(EXIT_FAILURE);
317			}
318
319			printf("\n");
320			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
321			      err, strerror(err));
322
323#if defined(__i386__) || defined(__x86_64__)
324			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
325				die("No hardware sampling interrupt available."
326				    " No APIC? If so then you can boot the kernel"
327				    " with the \"lapic\" boot parameter to"
328				    " force-enable it.\n");
329#endif
330
331			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
332		}
333	}
334
335	if (perf_evlist__set_filters(evlist)) {
336		error("failed to set filter with %d (%s)\n", errno,
337			strerror(errno));
338		exit(-1);
 
339	}
340
341	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
342		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
343
344	if (file_new)
345		session->evlist = evlist;
346	else {
347		if (!perf_evlist__equal(session->evlist, evlist)) {
348			fprintf(stderr, "incompatible append\n");
349			exit(-1);
 
 
350		}
351 	}
 
352
353	perf_session__update_sample_type(session);
 
 
 
354}
355
356static int process_buildids(void)
357{
358	u64 size = lseek(output, 0, SEEK_CUR);
 
 
359
 
360	if (size == 0)
361		return 0;
362
363	session->fd = output;
364	return __perf_session__process_events(session, post_processing_offset,
365					      size - post_processing_offset,
366					      size, &build_id__mark_dso_hit_ops);
367}
368
369static void atexit_header(void)
370{
371	if (!pipe_output) {
372		session->header.data_size += bytes_written;
 
 
 
 
 
 
373
374		if (!no_buildid)
375			process_buildids();
376		perf_session__write_header(session, evsel_list, output, true);
377		perf_session__delete(session);
378		perf_evlist__delete(evsel_list);
 
379		symbol__exit();
380	}
381}
382
383static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
384{
385	int err;
386	struct perf_session *psession = data;
387
388	if (machine__is_host(machine))
389		return;
390
391	/*
392	 *As for guest kernel when processing subcommand record&report,
393	 *we arrange module mmap prior to guest kernel mmap and trigger
394	 *a preload dso because default guest module symbols are loaded
395	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
396	 *method is used to avoid symbol missing when the first addr is
397	 *in module instead of in guest kernel.
398	 */
399	err = perf_event__synthesize_modules(process_synthesized_event,
400					     psession, machine);
401	if (err < 0)
402		pr_err("Couldn't record guest kernel [%d]'s reference"
403		       " relocation symbol.\n", machine->pid);
404
405	/*
406	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
407	 * have no _text sometimes.
408	 */
409	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
410						 psession, machine, "_text");
411	if (err < 0)
412		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
413							 psession, machine,
414							 "_stext");
415	if (err < 0)
416		pr_err("Couldn't record guest kernel [%d]'s reference"
417		       " relocation symbol.\n", machine->pid);
418}
419
420static struct perf_event_header finished_round_event = {
421	.size = sizeof(struct perf_event_header),
422	.type = PERF_RECORD_FINISHED_ROUND,
423};
424
425static void mmap_read_all(void)
426{
427	int i;
 
428
429	for (i = 0; i < evsel_list->nr_mmaps; i++) {
430		if (evsel_list->mmap[i].base)
431			mmap_read(&evsel_list->mmap[i]);
 
 
 
 
432	}
433
434	if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
435		write_output(&finished_round_event, sizeof(finished_round_event));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436}
437
438static int __cmd_record(int argc, const char **argv)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439{
440	struct stat st;
441	int flags;
442	int err;
443	unsigned long waking = 0;
444	int child_ready_pipe[2], go_pipe[2];
445	const bool forks = argc > 0;
446	char buf;
447	struct machine *machine;
 
 
 
 
 
448
449	page_size = sysconf(_SC_PAGE_SIZE);
450
451	atexit(sig_atexit);
452	signal(SIGCHLD, sig_handler);
453	signal(SIGINT, sig_handler);
454	signal(SIGUSR1, sig_handler);
455
456	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
457		perror("failed to create pipes");
458		exit(-1);
459	}
460
461	if (!output_name) {
462		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
463			pipe_output = 1;
464		else
465			output_name = "perf.data";
466	}
467	if (output_name) {
468		if (!strcmp(output_name, "-"))
469			pipe_output = 1;
470		else if (!stat(output_name, &st) && st.st_size) {
471			if (write_mode == WRITE_FORCE) {
472				char oldname[PATH_MAX];
473				snprintf(oldname, sizeof(oldname), "%s.old",
474					 output_name);
475				unlink(oldname);
476				rename(output_name, oldname);
477			}
478		} else if (write_mode == WRITE_APPEND) {
479			write_mode = WRITE_FORCE;
480		}
481	}
482
483	flags = O_CREAT|O_RDWR;
484	if (write_mode == WRITE_APPEND)
485		file_new = 0;
486	else
487		flags |= O_TRUNC;
488
489	if (pipe_output)
490		output = STDOUT_FILENO;
491	else
492		output = open(output_name, flags, S_IRUSR | S_IWUSR);
493	if (output < 0) {
494		perror("failed to create output file");
495		exit(-1);
496	}
497
498	session = perf_session__new(output_name, O_WRONLY,
499				    write_mode == WRITE_FORCE, false, NULL);
500	if (session == NULL) {
501		pr_err("Not enough memory for reading perf file header\n");
502		return -1;
503	}
504
505	if (!no_buildid)
506		perf_header__set_feat(&session->header, HEADER_BUILD_ID);
507
508	if (!file_new) {
509		err = perf_session__read_header(session, output);
510		if (err < 0)
511			goto out_delete_session;
512	}
513
514	if (have_tracepoints(&evsel_list->entries))
515		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
516
517	/* 512 kiB: default amount of unprivileged mlocked memory */
518	if (mmap_pages == UINT_MAX)
519		mmap_pages = (512 * 1024) / page_size;
520
521	if (forks) {
522		child_pid = fork();
523		if (child_pid < 0) {
524			perror("failed to fork");
525			exit(-1);
526		}
527
528		if (!child_pid) {
529			if (pipe_output)
530				dup2(2, 1);
531			close(child_ready_pipe[0]);
532			close(go_pipe[1]);
533			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
534
535			/*
536			 * Do a dummy execvp to get the PLT entry resolved,
537			 * so we avoid the resolver overhead on the real
538			 * execvp call.
539			 */
540			execvp("", (char **)argv);
541
542			/*
543			 * Tell the parent we're ready to go
544			 */
545			close(child_ready_pipe[1]);
546
547			/*
548			 * Wait until the parent tells us to go.
549			 */
550			if (read(go_pipe[0], &buf, 1) == -1)
551				perror("unable to read pipe");
552
553			execvp(argv[0], (char **)argv);
554
555			perror(argv[0]);
556			kill(getppid(), SIGUSR1);
557			exit(-1);
558		}
 
559
560		if (!system_wide && target_tid == -1 && target_pid == -1)
561			evsel_list->threads->map[0] = child_pid;
562
563		close(child_ready_pipe[1]);
564		close(go_pipe[0]);
565		/*
566		 * wait for child to settle
567		 */
568		if (read(child_ready_pipe[0], &buf, 1) == -1) {
569			perror("unable to read pipe");
570			exit(-1);
571		}
572		close(child_ready_pipe[0]);
573	}
574
575	open_counters(evsel_list);
 
576
577	/*
578	 * perf_session__delete(session) will be called at atexit_header()
579	 */
580	atexit(atexit_header);
581
582	if (pipe_output) {
583		err = perf_header__write_pipe(output);
584		if (err < 0)
585			return err;
586	} else if (file_new) {
587		err = perf_session__write_header(session, evsel_list,
588						 output, false);
589		if (err < 0)
590			return err;
591	}
592
593	post_processing_offset = lseek(output, 0, SEEK_CUR);
 
 
 
 
 
 
594
595	if (pipe_output) {
596		err = perf_session__synthesize_attrs(session,
597						     process_synthesized_event);
 
 
598		if (err < 0) {
599			pr_err("Couldn't synthesize attrs.\n");
600			return err;
601		}
602
603		err = perf_event__synthesize_event_types(process_synthesized_event,
604							 session);
605		if (err < 0) {
606			pr_err("Couldn't synthesize event_types.\n");
607			return err;
608		}
609
610		if (have_tracepoints(&evsel_list->entries)) {
611			/*
612			 * FIXME err <= 0 here actually means that
613			 * there were no tracepoints so its not really
614			 * an error, just that we don't need to
615			 * synthesize anything.  We really have to
616			 * return this more properly and also
617			 * propagate errors that now are calling die()
618			 */
619			err = perf_event__synthesize_tracing_data(output, evsel_list,
620								  process_synthesized_event,
621								  session);
622			if (err <= 0) {
623				pr_err("Couldn't record tracing data.\n");
624				return err;
625			}
626			advance_output(err);
627		}
628	}
629
630	machine = perf_session__find_host_machine(session);
631	if (!machine) {
632		pr_err("Couldn't find native kernel information.\n");
633		return -1;
634	}
635
636	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
637						 session, machine, "_text");
638	if (err < 0)
639		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
640							 session, machine, "_stext");
641	if (err < 0)
642		pr_err("Couldn't record kernel reference relocation symbol\n"
643		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
644		       "Check /proc/kallsyms permission or run as root.\n");
645
646	err = perf_event__synthesize_modules(process_synthesized_event,
647					     session, machine);
648	if (err < 0)
649		pr_err("Couldn't record kernel module information.\n"
650		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
651		       "Check /proc/modules permission or run as root.\n");
652
653	if (perf_guest)
654		perf_session__process_machines(session,
655					       perf_event__synthesize_guest_os);
656
657	if (!system_wide)
658		perf_event__synthesize_thread_map(evsel_list->threads,
659						  process_synthesized_event,
660						  session);
661	else
662		perf_event__synthesize_threads(process_synthesized_event,
663					       session);
664
665	if (realtime_prio) {
666		struct sched_param param;
667
668		param.sched_priority = realtime_prio;
669		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
670			pr_err("Could not set realtime priority.\n");
671			exit(-1);
 
672		}
673	}
674
675	perf_evlist__enable(evsel_list);
 
 
 
 
 
 
676
677	/*
678	 * Let the child rip
679	 */
680	if (forks)
681		close(go_pipe[1]);
 
 
 
 
 
682
683	for (;;) {
684		int hits = samples;
685
686		mmap_read_all();
 
 
 
687
688		if (hits == samples) {
689			if (done)
690				break;
691			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
692			waking++;
693		}
694
695		if (done)
696			perf_evlist__disable(evsel_list);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697	}
698
699	if (quiet || signr == SIGUSR1)
700		return 0;
701
702	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
703
704	/*
705	 * Approximate RIP event size: 24 bytes.
706	 */
707	fprintf(stderr,
708		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
709		(double)bytes_written / 1024.0 / 1024.0,
710		output_name,
711		bytes_written / 24);
712
713	return 0;
714
715out_delete_session:
716	perf_session__delete(session);
717	return err;
718}
719
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
720static const char * const record_usage[] = {
721	"perf record [<options>] [<command>]",
722	"perf record [<options>] -- <command> [<options>]",
723	NULL
724};
725
726static bool force, append_file;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
727
 
 
 
 
 
 
 
728const struct option record_options[] = {
729	OPT_CALLBACK('e', "event", &evsel_list, "event",
730		     "event selector. use 'perf list' to list available events",
731		     parse_events_option),
732	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
733		     "event filter", parse_filter),
734	OPT_INTEGER('p', "pid", &target_pid,
735		    "record events on existing process id"),
736	OPT_INTEGER('t', "tid", &target_tid,
737		    "record events on existing thread id"),
738	OPT_INTEGER('r', "realtime", &realtime_prio,
739		    "collect data with this RT SCHED_FIFO priority"),
740	OPT_BOOLEAN('D', "no-delay", &nodelay,
741		    "collect data without buffering"),
742	OPT_BOOLEAN('R', "raw-samples", &raw_samples,
743		    "collect raw sample records from all opened counters"),
744	OPT_BOOLEAN('a', "all-cpus", &system_wide,
745			    "system-wide collection from all CPUs"),
746	OPT_BOOLEAN('A', "append", &append_file,
747			    "append to the output file to do incremental profiling"),
748	OPT_STRING('C', "cpu", &cpu_list, "cpu",
749		    "list of cpus to monitor"),
750	OPT_BOOLEAN('f', "force", &force,
751			"overwrite existing data file (deprecated)"),
752	OPT_U64('c', "count", &user_interval, "event period to sample"),
753	OPT_STRING('o', "output", &output_name, "file",
754		    "output file name"),
755	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
756		    "child tasks do not inherit counters"),
757	OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
758	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
759	OPT_BOOLEAN(0, "group", &group,
 
 
 
760		    "put the counters into a counter group"),
761	OPT_BOOLEAN('g', "call-graph", &call_graph,
762		    "do call-graph (stack chain/backtrace) recording"),
 
 
 
 
763	OPT_INCR('v', "verbose", &verbose,
764		    "be more verbose (show counter open errors, etc)"),
765	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
766	OPT_BOOLEAN('s', "stat", &inherit_stat,
767		    "per thread counts"),
768	OPT_BOOLEAN('d', "data", &sample_address,
769		    "Sample addresses"),
770	OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
771	OPT_BOOLEAN('n', "no-samples", &no_samples,
 
772		    "don't sample"),
773	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
774		    "do not update the buildid cache"),
775	OPT_BOOLEAN('B', "no-buildid", &no_buildid,
776		    "do not collect buildids in perf.data"),
777	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
778		     "monitor event in cgroup name only",
779		     parse_cgroups),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
780	OPT_END()
781};
782
783int cmd_record(int argc, const char **argv, const char *prefix __used)
784{
785	int err = -ENOMEM;
786	struct perf_evsel *pos;
 
787
788	evsel_list = perf_evlist__new(NULL, NULL);
789	if (evsel_list == NULL)
790		return -ENOMEM;
791
 
 
792	argc = parse_options(argc, argv, record_options, record_usage,
793			    PARSE_OPT_STOP_AT_NON_OPTION);
794	if (!argc && target_pid == -1 && target_tid == -1 &&
795		!system_wide && !cpu_list)
796		usage_with_options(record_usage, record_options);
797
798	if (force && append_file) {
799		fprintf(stderr, "Can't overwrite and append at the same time."
800				" You need to choose between -f and -A");
801		usage_with_options(record_usage, record_options);
802	} else if (append_file) {
803		write_mode = WRITE_APPEND;
804	} else {
805		write_mode = WRITE_FORCE;
806	}
807
808	if (nr_cgroups && !system_wide) {
809		fprintf(stderr, "cgroup monitoring only available in"
810			" system-wide mode\n");
811		usage_with_options(record_usage, record_options);
812	}
813
814	symbol__init();
815
816	if (symbol_conf.kptr_restrict)
817		pr_warning(
818"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
819"check /proc/sys/kernel/kptr_restrict.\n\n"
820"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
821"file is not found in the buildid cache or in the vmlinux path.\n\n"
822"Samples in kernel modules won't be resolved at all.\n\n"
823"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
824"even with a suitable vmlinux or kallsyms file.\n\n");
825
826	if (no_buildid_cache || no_buildid)
827		disable_buildid_cache();
828
829	if (evsel_list->nr_entries == 0 &&
830	    perf_evlist__add_default(evsel_list) < 0) {
831		pr_err("Not enough memory for event selector list\n");
832		goto out_symbol_exit;
833	}
834
835	if (target_pid != -1)
836		target_tid = target_pid;
837
838	if (perf_evlist__create_maps(evsel_list, target_pid,
839				     target_tid, cpu_list) < 0)
840		usage_with_options(record_usage, record_options);
 
 
841
842	list_for_each_entry(pos, &evsel_list->entries, node) {
843		if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
844					 evsel_list->threads->nr) < 0)
845			goto out_free_fd;
846		if (perf_header__push_event(pos->attr.config, event_name(pos)))
847			goto out_free_fd;
 
 
 
848	}
849
850	if (perf_evlist__alloc_pollfd(evsel_list) < 0)
851		goto out_free_fd;
852
853	if (user_interval != ULLONG_MAX)
854		default_interval = user_interval;
855	if (user_freq != UINT_MAX)
856		freq = user_freq;
857
858	/*
859	 * User specified count overrides default frequency.
860	 */
861	if (default_interval)
862		freq = 0;
863	else if (freq) {
864		default_interval = freq;
865	} else {
866		fprintf(stderr, "frequency and count are zero, aborting\n");
867		err = -EINVAL;
868		goto out_free_fd;
869	}
870
871	err = __cmd_record(argc, argv);
872out_free_fd:
873	perf_evlist__delete_maps(evsel_list);
874out_symbol_exit:
875	symbol__exit();
876	return err;
877}
v3.15
  1/*
  2 * builtin-record.c
  3 *
  4 * Builtin record command: Record the profile of a workload
  5 * (or a CPU, or a PID) into the perf.data output file - for
  6 * later analysis via perf report.
  7 */
 
 
  8#include "builtin.h"
  9
 10#include "perf.h"
 11
 12#include "util/build-id.h"
 13#include "util/util.h"
 14#include "util/parse-options.h"
 15#include "util/parse-events.h"
 16
 17#include "util/header.h"
 18#include "util/event.h"
 19#include "util/evlist.h"
 20#include "util/evsel.h"
 21#include "util/debug.h"
 22#include "util/session.h"
 23#include "util/tool.h"
 24#include "util/symbol.h"
 25#include "util/cpumap.h"
 26#include "util/thread_map.h"
 27#include "util/data.h"
 28
 29#include <unistd.h>
 30#include <sched.h>
 31#include <sys/mman.h>
 32
 33#ifndef HAVE_ON_EXIT_SUPPORT
 34#ifndef ATEXIT_MAX
 35#define ATEXIT_MAX 32
 36#endif
 37static int __on_exit_count = 0;
 38typedef void (*on_exit_func_t) (int, void *);
 39static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
 40static void *__on_exit_args[ATEXIT_MAX];
 41static int __exitcode = 0;
 42static void __handle_on_exit_funcs(void);
 43static int on_exit(on_exit_func_t function, void *arg);
 44#define exit(x) (exit)(__exitcode = (x))
 45
 46static int on_exit(on_exit_func_t function, void *arg)
 47{
 48	if (__on_exit_count == ATEXIT_MAX)
 49		return -ENOMEM;
 50	else if (__on_exit_count == 0)
 51		atexit(__handle_on_exit_funcs);
 52	__on_exit_funcs[__on_exit_count] = function;
 53	__on_exit_args[__on_exit_count++] = arg;
 54	return 0;
 55}
 56
 57static void __handle_on_exit_funcs(void)
 58{
 59	int i;
 60	for (i = 0; i < __on_exit_count; i++)
 61		__on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
 62}
 63#endif
 64
 65struct record {
 66	struct perf_tool	tool;
 67	struct record_opts	opts;
 68	u64			bytes_written;
 69	struct perf_data_file	file;
 70	struct perf_evlist	*evlist;
 71	struct perf_session	*session;
 72	const char		*progname;
 73	int			realtime_prio;
 74	bool			no_buildid;
 75	bool			no_buildid_cache;
 76	long			samples;
 77};
 78
 79static int record__write(struct record *rec, void *bf, size_t size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 80{
 81	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
 82		pr_err("failed to write perf data, error: %m\n");
 83		return -1;
 84	}
 85
 86	rec->bytes_written += size;
 87	return 0;
 88}
 89
 90static int process_synthesized_event(struct perf_tool *tool,
 91				     union perf_event *event,
 92				     struct perf_sample *sample __maybe_unused,
 93				     struct machine *machine __maybe_unused)
 94{
 95	struct record *rec = container_of(tool, struct record, tool);
 96	return record__write(rec, event, event->header.size);
 97}
 98
 99static int record__mmap_read(struct record *rec, struct perf_mmap *md)
100{
101	unsigned int head = perf_mmap__read_head(md);
102	unsigned int old = md->prev;
103	unsigned char *data = md->base + page_size;
104	unsigned long size;
105	void *buf;
106	int rc = 0;
107
108	if (old == head)
109		return 0;
110
111	rec->samples++;
112
113	size = head - old;
114
115	if ((old & md->mask) + size != (head & md->mask)) {
116		buf = &data[old & md->mask];
117		size = md->mask + 1 - (old & md->mask);
118		old += size;
119
120		if (record__write(rec, buf, size) < 0) {
121			rc = -1;
122			goto out;
123		}
124	}
125
126	buf = &data[old & md->mask];
127	size = head - old;
128	old += size;
129
130	if (record__write(rec, buf, size) < 0) {
131		rc = -1;
132		goto out;
133	}
134
135	md->prev = old;
136	perf_mmap__write_tail(md, old);
137
138out:
139	return rc;
140}
141
142static volatile int done = 0;
143static volatile int signr = -1;
144static volatile int child_finished = 0;
145
146static void sig_handler(int sig)
147{
148	if (sig == SIGCHLD)
149		child_finished = 1;
150
151	done = 1;
152	signr = sig;
153}
154
155static void record__sig_exit(int exit_status __maybe_unused, void *arg)
156{
157	struct record *rec = arg;
158	int status;
159
160	if (rec->evlist->workload.pid > 0) {
161		if (!child_finished)
162			kill(rec->evlist->workload.pid, SIGTERM);
163
164		wait(&status);
165		if (WIFSIGNALED(status))
166			psignal(WTERMSIG(status), rec->progname);
167	}
168
169	if (signr == -1 || signr == SIGUSR1)
170		return;
171
172	signal(signr, SIG_DFL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173}
174
175static int record__open(struct record *rec)
176{
177	char msg[512];
178	struct perf_evsel *pos;
179	struct perf_evlist *evlist = rec->evlist;
180	struct perf_session *session = rec->session;
181	struct record_opts *opts = &rec->opts;
182	int rc = 0;
183
184	perf_evlist__config(evlist, opts);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
186	evlist__for_each(evlist, pos) {
 
 
187try_again:
188		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
189			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190				if (verbose)
191					ui__warning("%s\n", msg);
 
 
 
192				goto try_again;
193			}
194
195			rc = -errno;
196			perf_evsel__open_strerror(pos, &opts->target,
197						  errno, msg, sizeof(msg));
198			ui__error("%s\n", msg);
199			goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200		}
201	}
202
203	if (perf_evlist__apply_filters(evlist)) {
204		error("failed to set filter with %d (%s)\n", errno,
205			strerror(errno));
206		rc = -1;
207		goto out;
208	}
209
210	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
211		if (errno == EPERM) {
212			pr_err("Permission error mapping pages.\n"
213			       "Consider increasing "
214			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
215			       "or try again with a smaller value of -m/--mmap_pages.\n"
216			       "(current value: %u)\n", opts->mmap_pages);
217			rc = -errno;
218		} else {
219			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
220			rc = -errno;
221		}
222		goto out;
223	}
224
225	session->evlist = evlist;
226	perf_session__set_id_hdr_size(session);
227out:
228	return rc;
229}
230
231static int process_buildids(struct record *rec)
232{
233	struct perf_data_file *file  = &rec->file;
234	struct perf_session *session = rec->session;
235	u64 start = session->header.data_offset;
236
237	u64 size = lseek(file->fd, 0, SEEK_CUR);
238	if (size == 0)
239		return 0;
240
241	return __perf_session__process_events(session, start,
242					      size - start,
 
243					      size, &build_id__mark_dso_hit_ops);
244}
245
246static void record__exit(int status, void *arg)
247{
248	struct record *rec = arg;
249	struct perf_data_file *file = &rec->file;
250
251	if (status != 0)
252		return;
253
254	if (!file->is_pipe) {
255		rec->session->header.data_size += rec->bytes_written;
256
257		if (!rec->no_buildid)
258			process_buildids(rec);
259		perf_session__write_header(rec->session, rec->evlist,
260					   file->fd, true);
261		perf_session__delete(rec->session);
262		perf_evlist__delete(rec->evlist);
263		symbol__exit();
264	}
265}
266
267static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
268{
269	int err;
270	struct perf_tool *tool = data;
 
 
 
 
271	/*
272	 *As for guest kernel when processing subcommand record&report,
273	 *we arrange module mmap prior to guest kernel mmap and trigger
274	 *a preload dso because default guest module symbols are loaded
275	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
276	 *method is used to avoid symbol missing when the first addr is
277	 *in module instead of in guest kernel.
278	 */
279	err = perf_event__synthesize_modules(tool, process_synthesized_event,
280					     machine);
281	if (err < 0)
282		pr_err("Couldn't record guest kernel [%d]'s reference"
283		       " relocation symbol.\n", machine->pid);
284
285	/*
286	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
287	 * have no _text sometimes.
288	 */
289	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
290						 machine);
 
 
 
 
291	if (err < 0)
292		pr_err("Couldn't record guest kernel [%d]'s reference"
293		       " relocation symbol.\n", machine->pid);
294}
295
296static struct perf_event_header finished_round_event = {
297	.size = sizeof(struct perf_event_header),
298	.type = PERF_RECORD_FINISHED_ROUND,
299};
300
301static int record__mmap_read_all(struct record *rec)
302{
303	int i;
304	int rc = 0;
305
306	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
307		if (rec->evlist->mmap[i].base) {
308			if (record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
309				rc = -1;
310				goto out;
311			}
312		}
313	}
314
315	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
316		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
317
318out:
319	return rc;
320}
321
322static void record__init_features(struct record *rec)
323{
324	struct perf_session *session = rec->session;
325	int feat;
326
327	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
328		perf_header__set_feat(&session->header, feat);
329
330	if (rec->no_buildid)
331		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
332
333	if (!have_tracepoints(&rec->evlist->entries))
334		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
335
336	if (!rec->opts.branch_stack)
337		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
338}
339
340static volatile int workload_exec_errno;
341
342/*
343 * perf_evlist__prepare_workload will send a SIGUSR1
344 * if the fork fails, since we asked by setting its
345 * want_signal to true.
346 */
347static void workload_exec_failed_signal(int signo, siginfo_t *info,
348					void *ucontext __maybe_unused)
349{
350	workload_exec_errno = info->si_value.sival_int;
351	done = 1;
352	signr = signo;
353	child_finished = 1;
354}
355
356static int __cmd_record(struct record *rec, int argc, const char **argv)
357{
 
 
358	int err;
359	unsigned long waking = 0;
 
360	const bool forks = argc > 0;
 
361	struct machine *machine;
362	struct perf_tool *tool = &rec->tool;
363	struct record_opts *opts = &rec->opts;
364	struct perf_data_file *file = &rec->file;
365	struct perf_session *session;
366	bool disabled = false;
367
368	rec->progname = argv[0];
369
370	on_exit(record__sig_exit, rec);
371	signal(SIGCHLD, sig_handler);
372	signal(SIGINT, sig_handler);
373	signal(SIGTERM, sig_handler);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
375	session = perf_session__new(file, false, NULL);
 
376	if (session == NULL) {
377		pr_err("Perf session creation failed.\n");
378		return -1;
379	}
380
381	rec->session = session;
 
 
 
 
 
 
 
 
 
 
382
383	record__init_features(rec);
 
 
384
385	if (forks) {
386		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
387						    argv, file->is_pipe,
388						    workload_exec_failed_signal);
389		if (err < 0) {
390			pr_err("Couldn't run the workload!\n");
391			goto out_delete_session;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392		}
393	}
394
395	if (record__open(rec) != 0) {
396		err = -1;
397		goto out_delete_session;
 
 
 
 
 
 
 
 
 
 
398	}
399
400	if (!rec->evlist->nr_groups)
401		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
402
403	/*
404	 * perf_session__delete(session) will be called at record__exit()
405	 */
406	on_exit(record__exit, rec);
407
408	if (file->is_pipe) {
409		err = perf_header__write_pipe(file->fd);
410		if (err < 0)
411			goto out_delete_session;
412	} else {
413		err = perf_session__write_header(session, rec->evlist,
414						 file->fd, false);
415		if (err < 0)
416			goto out_delete_session;
417	}
418
419	if (!rec->no_buildid
420	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
421		pr_err("Couldn't generate buildids. "
422		       "Use --no-buildid to profile anyway.\n");
423		err = -1;
424		goto out_delete_session;
425	}
426
427	machine = &session->machines.host;
428
429	if (file->is_pipe) {
430		err = perf_event__synthesize_attrs(tool, session,
431						   process_synthesized_event);
432		if (err < 0) {
433			pr_err("Couldn't synthesize attrs.\n");
434			goto out_delete_session;
 
 
 
 
 
 
 
435		}
436
437		if (have_tracepoints(&rec->evlist->entries)) {
438			/*
439			 * FIXME err <= 0 here actually means that
440			 * there were no tracepoints so its not really
441			 * an error, just that we don't need to
442			 * synthesize anything.  We really have to
443			 * return this more properly and also
444			 * propagate errors that now are calling die()
445			 */
446			err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist,
447								  process_synthesized_event);
 
448			if (err <= 0) {
449				pr_err("Couldn't record tracing data.\n");
450				goto out_delete_session;
451			}
452			rec->bytes_written += err;
453		}
454	}
455
456	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
457						 machine);
 
 
 
 
 
 
 
 
 
458	if (err < 0)
459		pr_err("Couldn't record kernel reference relocation symbol\n"
460		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
461		       "Check /proc/kallsyms permission or run as root.\n");
462
463	err = perf_event__synthesize_modules(tool, process_synthesized_event,
464					     machine);
465	if (err < 0)
466		pr_err("Couldn't record kernel module information.\n"
467		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
468		       "Check /proc/modules permission or run as root.\n");
469
470	if (perf_guest) {
471		machines__process_guests(&session->machines,
472					 perf_event__synthesize_guest_os, tool);
473	}
474
475	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
476					    process_synthesized_event, opts->sample_address);
477	if (err != 0)
478		goto out_delete_session;
 
 
479
480	if (rec->realtime_prio) {
481		struct sched_param param;
482
483		param.sched_priority = rec->realtime_prio;
484		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
485			pr_err("Could not set realtime priority.\n");
486			err = -1;
487			goto out_delete_session;
488		}
489	}
490
491	/*
492	 * When perf is starting the traced process, all the events
493	 * (apart from group members) have enable_on_exec=1 set,
494	 * so don't spoil it by prematurely enabling them.
495	 */
496	if (!target__none(&opts->target) && !opts->initial_delay)
497		perf_evlist__enable(rec->evlist);
498
499	/*
500	 * Let the child rip
501	 */
502	if (forks)
503		perf_evlist__start_workload(rec->evlist);
504
505	if (opts->initial_delay) {
506		usleep(opts->initial_delay * 1000);
507		perf_evlist__enable(rec->evlist);
508	}
509
510	for (;;) {
511		int hits = rec->samples;
512
513		if (record__mmap_read_all(rec) < 0) {
514			err = -1;
515			goto out_delete_session;
516		}
517
518		if (hits == rec->samples) {
519			if (done)
520				break;
521			err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
522			waking++;
523		}
524
525		/*
526		 * When perf is starting the traced process, at the end events
527		 * die with the process and we wait for that. Thus no need to
528		 * disable events in this case.
529		 */
530		if (done && !disabled && !target__none(&opts->target)) {
531			perf_evlist__disable(rec->evlist);
532			disabled = true;
533		}
534	}
535
536	if (forks && workload_exec_errno) {
537		char msg[512];
538		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
539		pr_err("Workload failed: %s\n", emsg);
540		err = -1;
541		goto out_delete_session;
542	}
543
544	if (quiet || signr == SIGUSR1)
545		return 0;
546
547	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
548
549	/*
550	 * Approximate RIP event size: 24 bytes.
551	 */
552	fprintf(stderr,
553		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
554		(double)rec->bytes_written / 1024.0 / 1024.0,
555		file->path,
556		rec->bytes_written / 24);
557
558	return 0;
559
560out_delete_session:
561	perf_session__delete(session);
562	return err;
563}
564
565#define BRANCH_OPT(n, m) \
566	{ .name = n, .mode = (m) }
567
568#define BRANCH_END { .name = NULL }
569
570struct branch_mode {
571	const char *name;
572	int mode;
573};
574
575static const struct branch_mode branch_modes[] = {
576	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
577	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
578	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
579	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
580	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
581	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
582	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
583	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
584	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
585	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
586	BRANCH_END
587};
588
589static int
590parse_branch_stack(const struct option *opt, const char *str, int unset)
591{
592#define ONLY_PLM \
593	(PERF_SAMPLE_BRANCH_USER	|\
594	 PERF_SAMPLE_BRANCH_KERNEL	|\
595	 PERF_SAMPLE_BRANCH_HV)
596
597	uint64_t *mode = (uint64_t *)opt->value;
598	const struct branch_mode *br;
599	char *s, *os = NULL, *p;
600	int ret = -1;
601
602	if (unset)
603		return 0;
604
605	/*
606	 * cannot set it twice, -b + --branch-filter for instance
607	 */
608	if (*mode)
609		return -1;
610
611	/* str may be NULL in case no arg is passed to -b */
612	if (str) {
613		/* because str is read-only */
614		s = os = strdup(str);
615		if (!s)
616			return -1;
617
618		for (;;) {
619			p = strchr(s, ',');
620			if (p)
621				*p = '\0';
622
623			for (br = branch_modes; br->name; br++) {
624				if (!strcasecmp(s, br->name))
625					break;
626			}
627			if (!br->name) {
628				ui__warning("unknown branch filter %s,"
629					    " check man page\n", s);
630				goto error;
631			}
632
633			*mode |= br->mode;
634
635			if (!p)
636				break;
637
638			s = p + 1;
639		}
640	}
641	ret = 0;
642
643	/* default to any branch */
644	if ((*mode & ~ONLY_PLM) == 0) {
645		*mode = PERF_SAMPLE_BRANCH_ANY;
646	}
647error:
648	free(os);
649	return ret;
650}
651
652#ifdef HAVE_DWARF_UNWIND_SUPPORT
653static int get_stack_size(char *str, unsigned long *_size)
654{
655	char *endptr;
656	unsigned long size;
657	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
658
659	size = strtoul(str, &endptr, 0);
660
661	do {
662		if (*endptr)
663			break;
664
665		size = round_up(size, sizeof(u64));
666		if (!size || size > max_size)
667			break;
668
669		*_size = size;
670		return 0;
671
672	} while (0);
673
674	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
675	       max_size, str);
676	return -1;
677}
678#endif /* HAVE_DWARF_UNWIND_SUPPORT */
679
680int record_parse_callchain(const char *arg, struct record_opts *opts)
681{
682	char *tok, *name, *saveptr = NULL;
683	char *buf;
684	int ret = -1;
685
686	/* We need buffer that we know we can write to. */
687	buf = malloc(strlen(arg) + 1);
688	if (!buf)
689		return -ENOMEM;
690
691	strcpy(buf, arg);
692
693	tok = strtok_r((char *)buf, ",", &saveptr);
694	name = tok ? : (char *)buf;
695
696	do {
697		/* Framepointer style */
698		if (!strncmp(name, "fp", sizeof("fp"))) {
699			if (!strtok_r(NULL, ",", &saveptr)) {
700				opts->call_graph = CALLCHAIN_FP;
701				ret = 0;
702			} else
703				pr_err("callchain: No more arguments "
704				       "needed for -g fp\n");
705			break;
706
707#ifdef HAVE_DWARF_UNWIND_SUPPORT
708		/* Dwarf style */
709		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
710			const unsigned long default_stack_dump_size = 8192;
711
712			ret = 0;
713			opts->call_graph = CALLCHAIN_DWARF;
714			opts->stack_dump_size = default_stack_dump_size;
715
716			tok = strtok_r(NULL, ",", &saveptr);
717			if (tok) {
718				unsigned long size = 0;
719
720				ret = get_stack_size(tok, &size);
721				opts->stack_dump_size = size;
722			}
723#endif /* HAVE_DWARF_UNWIND_SUPPORT */
724		} else {
725			pr_err("callchain: Unknown --call-graph option "
726			       "value: %s\n", arg);
727			break;
728		}
729
730	} while (0);
731
732	free(buf);
733	return ret;
734}
735
736static void callchain_debug(struct record_opts *opts)
737{
738	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
739
740	pr_debug("callchain: type %s\n", str[opts->call_graph]);
741
742	if (opts->call_graph == CALLCHAIN_DWARF)
743		pr_debug("callchain: stack dump size %d\n",
744			 opts->stack_dump_size);
745}
746
747int record_parse_callchain_opt(const struct option *opt,
748			       const char *arg,
749			       int unset)
750{
751	struct record_opts *opts = opt->value;
752	int ret;
753
754	opts->call_graph_enabled = !unset;
755
756	/* --no-call-graph */
757	if (unset) {
758		opts->call_graph = CALLCHAIN_NONE;
759		pr_debug("callchain: disabled\n");
760		return 0;
761	}
762
763	ret = record_parse_callchain(arg, opts);
764	if (!ret)
765		callchain_debug(opts);
766
767	return ret;
768}
769
770int record_callchain_opt(const struct option *opt,
771			 const char *arg __maybe_unused,
772			 int unset __maybe_unused)
773{
774	struct record_opts *opts = opt->value;
775
776	opts->call_graph_enabled = !unset;
777
778	if (opts->call_graph == CALLCHAIN_NONE)
779		opts->call_graph = CALLCHAIN_FP;
780
781	callchain_debug(opts);
782	return 0;
783}
784
785static int perf_record_config(const char *var, const char *value, void *cb)
786{
787	struct record *rec = cb;
788
789	if (!strcmp(var, "record.call-graph"))
790		return record_parse_callchain(value, &rec->opts);
791
792	return perf_default_config(var, value, cb);
793}
794
795static const char * const record_usage[] = {
796	"perf record [<options>] [<command>]",
797	"perf record [<options>] -- <command> [<options>]",
798	NULL
799};
800
801/*
802 * XXX Ideally would be local to cmd_record() and passed to a record__new
803 * because we need to have access to it in record__exit, that is called
804 * after cmd_record() exits, but since record_options need to be accessible to
805 * builtin-script, leave it here.
806 *
807 * At least we don't ouch it in all the other functions here directly.
808 *
809 * Just say no to tons of global variables, sigh.
810 */
811static struct record record = {
812	.opts = {
813		.mmap_pages	     = UINT_MAX,
814		.user_freq	     = UINT_MAX,
815		.user_interval	     = ULLONG_MAX,
816		.freq		     = 4000,
817		.target		     = {
818			.uses_mmap   = true,
819			.default_per_cpu = true,
820		},
821	},
822};
823
824#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
825
826#ifdef HAVE_DWARF_UNWIND_SUPPORT
827const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
828#else
829const char record_callchain_help[] = CALLCHAIN_HELP "fp";
830#endif
831
832/*
833 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
834 * with it and switch to use the library functions in perf_evlist that came
835 * from builtin-record.c, i.e. use record_opts,
836 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
837 * using pipes, etc.
838 */
839const struct option record_options[] = {
840	OPT_CALLBACK('e', "event", &record.evlist, "event",
841		     "event selector. use 'perf list' to list available events",
842		     parse_events_option),
843	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
844		     "event filter", parse_filter),
845	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
846		    "record events on existing process id"),
847	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
848		    "record events on existing thread id"),
849	OPT_INTEGER('r', "realtime", &record.realtime_prio,
850		    "collect data with this RT SCHED_FIFO priority"),
851	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
852		    "collect data without buffering"),
853	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
854		    "collect raw sample records from all opened counters"),
855	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
856			    "system-wide collection from all CPUs"),
857	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
 
 
858		    "list of cpus to monitor"),
859	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
860	OPT_STRING('o', "output", &record.file.path, "file",
 
 
861		    "output file name"),
862	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
863			&record.opts.no_inherit_set,
864			"child tasks do not inherit counters"),
865	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
866	OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
867		     "number of mmap data pages",
868		     perf_evlist__parse_mmap_pages),
869	OPT_BOOLEAN(0, "group", &record.opts.group,
870		    "put the counters into a counter group"),
871	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
872			   NULL, "enables call-graph recording" ,
873			   &record_callchain_opt),
874	OPT_CALLBACK(0, "call-graph", &record.opts,
875		     "mode[,dump_size]", record_callchain_help,
876		     &record_parse_callchain_opt),
877	OPT_INCR('v', "verbose", &verbose,
878		    "be more verbose (show counter open errors, etc)"),
879	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
880	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
881		    "per thread counts"),
882	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
883		    "Sample addresses"),
884	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
885	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
886	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
887		    "don't sample"),
888	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
889		    "do not update the buildid cache"),
890	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
891		    "do not collect buildids in perf.data"),
892	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
893		     "monitor event in cgroup name only",
894		     parse_cgroups),
895	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
896		  "ms to wait before starting measurement after program start"),
897	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
898		   "user to profile"),
899
900	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
901		     "branch any", "sample any taken branches",
902		     parse_branch_stack),
903
904	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
905		     "branch filter mask", "branch stack filter modes",
906		     parse_branch_stack),
907	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
908		    "sample by weight (on special events only)"),
909	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
910		    "sample transaction flags (special events only)"),
911	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
912		    "use per-thread mmaps"),
913	OPT_END()
914};
915
916int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
917{
918	int err = -ENOMEM;
919	struct record *rec = &record;
920	char errbuf[BUFSIZ];
921
922	rec->evlist = perf_evlist__new();
923	if (rec->evlist == NULL)
924		return -ENOMEM;
925
926	perf_config(perf_record_config, rec);
927
928	argc = parse_options(argc, argv, record_options, record_usage,
929			    PARSE_OPT_STOP_AT_NON_OPTION);
930	if (!argc && target__none(&rec->opts.target))
 
 
 
 
 
 
931		usage_with_options(record_usage, record_options);
 
 
 
 
 
932
933	if (nr_cgroups && !rec->opts.target.system_wide) {
934		ui__error("cgroup monitoring only available in"
935			  " system-wide mode\n");
936		usage_with_options(record_usage, record_options);
937	}
938
939	symbol__init();
940
941	if (symbol_conf.kptr_restrict)
942		pr_warning(
943"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
944"check /proc/sys/kernel/kptr_restrict.\n\n"
945"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
946"file is not found in the buildid cache or in the vmlinux path.\n\n"
947"Samples in kernel modules won't be resolved at all.\n\n"
948"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
949"even with a suitable vmlinux or kallsyms file.\n\n");
950
951	if (rec->no_buildid_cache || rec->no_buildid)
952		disable_buildid_cache();
953
954	if (rec->evlist->nr_entries == 0 &&
955	    perf_evlist__add_default(rec->evlist) < 0) {
956		pr_err("Not enough memory for event selector list\n");
957		goto out_symbol_exit;
958	}
959
960	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
961		rec->opts.no_inherit = true;
962
963	err = target__validate(&rec->opts.target);
964	if (err) {
965		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
966		ui__warning("%s", errbuf);
967	}
968
969	err = target__parse_uid(&rec->opts.target);
970	if (err) {
971		int saved_errno = errno;
972
973		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
974		ui__error("%s", errbuf);
975
976		err = -saved_errno;
977		goto out_symbol_exit;
978	}
979
980	err = -ENOMEM;
981	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
982		usage_with_options(record_usage, record_options);
 
 
 
 
983
984	if (record_opts__config(&rec->opts)) {
 
 
 
 
 
 
 
 
985		err = -EINVAL;
986		goto out_symbol_exit;
987	}
988
989	err = __cmd_record(&record, argc, argv);
 
 
990out_symbol_exit:
991	symbol__exit();
992	return err;
993}