Loading...
1/*
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
7 */
8#define _FILE_OFFSET_BITS 64
9
10#include "builtin.h"
11
12#include "perf.h"
13
14#include "util/build-id.h"
15#include "util/util.h"
16#include "util/parse-options.h"
17#include "util/parse-events.h"
18
19#include "util/header.h"
20#include "util/event.h"
21#include "util/evlist.h"
22#include "util/evsel.h"
23#include "util/debug.h"
24#include "util/session.h"
25#include "util/symbol.h"
26#include "util/cpumap.h"
27#include "util/thread_map.h"
28
29#include <unistd.h>
30#include <sched.h>
31#include <sys/mman.h>
32
33enum write_mode_t {
34 WRITE_FORCE,
35 WRITE_APPEND
36};
37
38static u64 user_interval = ULLONG_MAX;
39static u64 default_interval = 0;
40
41static unsigned int page_size;
42static unsigned int mmap_pages = UINT_MAX;
43static unsigned int user_freq = UINT_MAX;
44static int freq = 1000;
45static int output;
46static int pipe_output = 0;
47static const char *output_name = NULL;
48static bool group = false;
49static int realtime_prio = 0;
50static bool nodelay = false;
51static bool raw_samples = false;
52static bool sample_id_all_avail = true;
53static bool system_wide = false;
54static pid_t target_pid = -1;
55static pid_t target_tid = -1;
56static pid_t child_pid = -1;
57static bool no_inherit = false;
58static enum write_mode_t write_mode = WRITE_FORCE;
59static bool call_graph = false;
60static bool inherit_stat = false;
61static bool no_samples = false;
62static bool sample_address = false;
63static bool sample_time = false;
64static bool no_buildid = false;
65static bool no_buildid_cache = false;
66static struct perf_evlist *evsel_list;
67
68static long samples = 0;
69static u64 bytes_written = 0;
70
71static int file_new = 1;
72static off_t post_processing_offset;
73
74static struct perf_session *session;
75static const char *cpu_list;
76
77static void advance_output(size_t size)
78{
79 bytes_written += size;
80}
81
82static void write_output(void *buf, size_t size)
83{
84 while (size) {
85 int ret = write(output, buf, size);
86
87 if (ret < 0)
88 die("failed to write");
89
90 size -= ret;
91 buf += ret;
92
93 bytes_written += ret;
94 }
95}
96
97static int process_synthesized_event(union perf_event *event,
98 struct perf_sample *sample __used,
99 struct perf_session *self __used)
100{
101 write_output(event, event->header.size);
102 return 0;
103}
104
105static void mmap_read(struct perf_mmap *md)
106{
107 unsigned int head = perf_mmap__read_head(md);
108 unsigned int old = md->prev;
109 unsigned char *data = md->base + page_size;
110 unsigned long size;
111 void *buf;
112
113 if (old == head)
114 return;
115
116 samples++;
117
118 size = head - old;
119
120 if ((old & md->mask) + size != (head & md->mask)) {
121 buf = &data[old & md->mask];
122 size = md->mask + 1 - (old & md->mask);
123 old += size;
124
125 write_output(buf, size);
126 }
127
128 buf = &data[old & md->mask];
129 size = head - old;
130 old += size;
131
132 write_output(buf, size);
133
134 md->prev = old;
135 perf_mmap__write_tail(md, old);
136}
137
138static volatile int done = 0;
139static volatile int signr = -1;
140
141static void sig_handler(int sig)
142{
143 done = 1;
144 signr = sig;
145}
146
147static void sig_atexit(void)
148{
149 if (child_pid > 0)
150 kill(child_pid, SIGTERM);
151
152 if (signr == -1 || signr == SIGUSR1)
153 return;
154
155 signal(signr, SIG_DFL);
156 kill(getpid(), signr);
157}
158
159static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
160{
161 struct perf_event_attr *attr = &evsel->attr;
162 int track = !evsel->idx; /* only the first counter needs these */
163
164 attr->disabled = 1;
165 attr->inherit = !no_inherit;
166 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
167 PERF_FORMAT_TOTAL_TIME_RUNNING |
168 PERF_FORMAT_ID;
169
170 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
171
172 if (evlist->nr_entries > 1)
173 attr->sample_type |= PERF_SAMPLE_ID;
174
175 /*
176 * We default some events to a 1 default interval. But keep
177 * it a weak assumption overridable by the user.
178 */
179 if (!attr->sample_period || (user_freq != UINT_MAX &&
180 user_interval != ULLONG_MAX)) {
181 if (freq) {
182 attr->sample_type |= PERF_SAMPLE_PERIOD;
183 attr->freq = 1;
184 attr->sample_freq = freq;
185 } else {
186 attr->sample_period = default_interval;
187 }
188 }
189
190 if (no_samples)
191 attr->sample_freq = 0;
192
193 if (inherit_stat)
194 attr->inherit_stat = 1;
195
196 if (sample_address) {
197 attr->sample_type |= PERF_SAMPLE_ADDR;
198 attr->mmap_data = track;
199 }
200
201 if (call_graph)
202 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
203
204 if (system_wide)
205 attr->sample_type |= PERF_SAMPLE_CPU;
206
207 if (sample_id_all_avail &&
208 (sample_time || system_wide || !no_inherit || cpu_list))
209 attr->sample_type |= PERF_SAMPLE_TIME;
210
211 if (raw_samples) {
212 attr->sample_type |= PERF_SAMPLE_TIME;
213 attr->sample_type |= PERF_SAMPLE_RAW;
214 attr->sample_type |= PERF_SAMPLE_CPU;
215 }
216
217 if (nodelay) {
218 attr->watermark = 0;
219 attr->wakeup_events = 1;
220 }
221
222 attr->mmap = track;
223 attr->comm = track;
224
225 if (target_pid == -1 && target_tid == -1 && !system_wide) {
226 attr->disabled = 1;
227 attr->enable_on_exec = 1;
228 }
229}
230
231static bool perf_evlist__equal(struct perf_evlist *evlist,
232 struct perf_evlist *other)
233{
234 struct perf_evsel *pos, *pair;
235
236 if (evlist->nr_entries != other->nr_entries)
237 return false;
238
239 pair = list_entry(other->entries.next, struct perf_evsel, node);
240
241 list_for_each_entry(pos, &evlist->entries, node) {
242 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
243 return false;
244 pair = list_entry(pair->node.next, struct perf_evsel, node);
245 }
246
247 return true;
248}
249
250static void open_counters(struct perf_evlist *evlist)
251{
252 struct perf_evsel *pos;
253
254 if (evlist->cpus->map[0] < 0)
255 no_inherit = true;
256
257 list_for_each_entry(pos, &evlist->entries, node) {
258 struct perf_event_attr *attr = &pos->attr;
259 /*
260 * Check if parse_single_tracepoint_event has already asked for
261 * PERF_SAMPLE_TIME.
262 *
263 * XXX this is kludgy but short term fix for problems introduced by
264 * eac23d1c that broke 'perf script' by having different sample_types
265 * when using multiple tracepoint events when we use a perf binary
266 * that tries to use sample_id_all on an older kernel.
267 *
268 * We need to move counter creation to perf_session, support
269 * different sample_types, etc.
270 */
271 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
272
273 config_attr(pos, evlist);
274retry_sample_id:
275 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
276try_again:
277 if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
278 int err = errno;
279
280 if (err == EPERM || err == EACCES) {
281 ui__warning_paranoid();
282 exit(EXIT_FAILURE);
283 } else if (err == ENODEV && cpu_list) {
284 die("No such device - did you specify"
285 " an out-of-range profile CPU?\n");
286 } else if (err == EINVAL && sample_id_all_avail) {
287 /*
288 * Old kernel, no attr->sample_id_type_all field
289 */
290 sample_id_all_avail = false;
291 if (!sample_time && !raw_samples && !time_needed)
292 attr->sample_type &= ~PERF_SAMPLE_TIME;
293
294 goto retry_sample_id;
295 }
296
297 /*
298 * If it's cycles then fall back to hrtimer
299 * based cpu-clock-tick sw counter, which
300 * is always available even if no PMU support:
301 */
302 if (attr->type == PERF_TYPE_HARDWARE
303 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
304
305 if (verbose)
306 ui__warning("The cycles event is not supported, "
307 "trying to fall back to cpu-clock-ticks\n");
308 attr->type = PERF_TYPE_SOFTWARE;
309 attr->config = PERF_COUNT_SW_CPU_CLOCK;
310 goto try_again;
311 }
312
313 if (err == ENOENT) {
314 ui__warning("The %s event is not supported.\n",
315 event_name(pos));
316 exit(EXIT_FAILURE);
317 }
318
319 printf("\n");
320 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
321 err, strerror(err));
322
323#if defined(__i386__) || defined(__x86_64__)
324 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
325 die("No hardware sampling interrupt available."
326 " No APIC? If so then you can boot the kernel"
327 " with the \"lapic\" boot parameter to"
328 " force-enable it.\n");
329#endif
330
331 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
332 }
333 }
334
335 if (perf_evlist__set_filters(evlist)) {
336 error("failed to set filter with %d (%s)\n", errno,
337 strerror(errno));
338 exit(-1);
339 }
340
341 if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
342 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
343
344 if (file_new)
345 session->evlist = evlist;
346 else {
347 if (!perf_evlist__equal(session->evlist, evlist)) {
348 fprintf(stderr, "incompatible append\n");
349 exit(-1);
350 }
351 }
352
353 perf_session__update_sample_type(session);
354}
355
356static int process_buildids(void)
357{
358 u64 size = lseek(output, 0, SEEK_CUR);
359
360 if (size == 0)
361 return 0;
362
363 session->fd = output;
364 return __perf_session__process_events(session, post_processing_offset,
365 size - post_processing_offset,
366 size, &build_id__mark_dso_hit_ops);
367}
368
369static void atexit_header(void)
370{
371 if (!pipe_output) {
372 session->header.data_size += bytes_written;
373
374 if (!no_buildid)
375 process_buildids();
376 perf_session__write_header(session, evsel_list, output, true);
377 perf_session__delete(session);
378 perf_evlist__delete(evsel_list);
379 symbol__exit();
380 }
381}
382
383static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
384{
385 int err;
386 struct perf_session *psession = data;
387
388 if (machine__is_host(machine))
389 return;
390
391 /*
392 *As for guest kernel when processing subcommand record&report,
393 *we arrange module mmap prior to guest kernel mmap and trigger
394 *a preload dso because default guest module symbols are loaded
395 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
396 *method is used to avoid symbol missing when the first addr is
397 *in module instead of in guest kernel.
398 */
399 err = perf_event__synthesize_modules(process_synthesized_event,
400 psession, machine);
401 if (err < 0)
402 pr_err("Couldn't record guest kernel [%d]'s reference"
403 " relocation symbol.\n", machine->pid);
404
405 /*
406 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
407 * have no _text sometimes.
408 */
409 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
410 psession, machine, "_text");
411 if (err < 0)
412 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
413 psession, machine,
414 "_stext");
415 if (err < 0)
416 pr_err("Couldn't record guest kernel [%d]'s reference"
417 " relocation symbol.\n", machine->pid);
418}
419
420static struct perf_event_header finished_round_event = {
421 .size = sizeof(struct perf_event_header),
422 .type = PERF_RECORD_FINISHED_ROUND,
423};
424
425static void mmap_read_all(void)
426{
427 int i;
428
429 for (i = 0; i < evsel_list->nr_mmaps; i++) {
430 if (evsel_list->mmap[i].base)
431 mmap_read(&evsel_list->mmap[i]);
432 }
433
434 if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
435 write_output(&finished_round_event, sizeof(finished_round_event));
436}
437
438static int __cmd_record(int argc, const char **argv)
439{
440 struct stat st;
441 int flags;
442 int err;
443 unsigned long waking = 0;
444 int child_ready_pipe[2], go_pipe[2];
445 const bool forks = argc > 0;
446 char buf;
447 struct machine *machine;
448
449 page_size = sysconf(_SC_PAGE_SIZE);
450
451 atexit(sig_atexit);
452 signal(SIGCHLD, sig_handler);
453 signal(SIGINT, sig_handler);
454 signal(SIGUSR1, sig_handler);
455
456 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
457 perror("failed to create pipes");
458 exit(-1);
459 }
460
461 if (!output_name) {
462 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
463 pipe_output = 1;
464 else
465 output_name = "perf.data";
466 }
467 if (output_name) {
468 if (!strcmp(output_name, "-"))
469 pipe_output = 1;
470 else if (!stat(output_name, &st) && st.st_size) {
471 if (write_mode == WRITE_FORCE) {
472 char oldname[PATH_MAX];
473 snprintf(oldname, sizeof(oldname), "%s.old",
474 output_name);
475 unlink(oldname);
476 rename(output_name, oldname);
477 }
478 } else if (write_mode == WRITE_APPEND) {
479 write_mode = WRITE_FORCE;
480 }
481 }
482
483 flags = O_CREAT|O_RDWR;
484 if (write_mode == WRITE_APPEND)
485 file_new = 0;
486 else
487 flags |= O_TRUNC;
488
489 if (pipe_output)
490 output = STDOUT_FILENO;
491 else
492 output = open(output_name, flags, S_IRUSR | S_IWUSR);
493 if (output < 0) {
494 perror("failed to create output file");
495 exit(-1);
496 }
497
498 session = perf_session__new(output_name, O_WRONLY,
499 write_mode == WRITE_FORCE, false, NULL);
500 if (session == NULL) {
501 pr_err("Not enough memory for reading perf file header\n");
502 return -1;
503 }
504
505 if (!no_buildid)
506 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
507
508 if (!file_new) {
509 err = perf_session__read_header(session, output);
510 if (err < 0)
511 goto out_delete_session;
512 }
513
514 if (have_tracepoints(&evsel_list->entries))
515 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
516
517 /* 512 kiB: default amount of unprivileged mlocked memory */
518 if (mmap_pages == UINT_MAX)
519 mmap_pages = (512 * 1024) / page_size;
520
521 if (forks) {
522 child_pid = fork();
523 if (child_pid < 0) {
524 perror("failed to fork");
525 exit(-1);
526 }
527
528 if (!child_pid) {
529 if (pipe_output)
530 dup2(2, 1);
531 close(child_ready_pipe[0]);
532 close(go_pipe[1]);
533 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
534
535 /*
536 * Do a dummy execvp to get the PLT entry resolved,
537 * so we avoid the resolver overhead on the real
538 * execvp call.
539 */
540 execvp("", (char **)argv);
541
542 /*
543 * Tell the parent we're ready to go
544 */
545 close(child_ready_pipe[1]);
546
547 /*
548 * Wait until the parent tells us to go.
549 */
550 if (read(go_pipe[0], &buf, 1) == -1)
551 perror("unable to read pipe");
552
553 execvp(argv[0], (char **)argv);
554
555 perror(argv[0]);
556 kill(getppid(), SIGUSR1);
557 exit(-1);
558 }
559
560 if (!system_wide && target_tid == -1 && target_pid == -1)
561 evsel_list->threads->map[0] = child_pid;
562
563 close(child_ready_pipe[1]);
564 close(go_pipe[0]);
565 /*
566 * wait for child to settle
567 */
568 if (read(child_ready_pipe[0], &buf, 1) == -1) {
569 perror("unable to read pipe");
570 exit(-1);
571 }
572 close(child_ready_pipe[0]);
573 }
574
575 open_counters(evsel_list);
576
577 /*
578 * perf_session__delete(session) will be called at atexit_header()
579 */
580 atexit(atexit_header);
581
582 if (pipe_output) {
583 err = perf_header__write_pipe(output);
584 if (err < 0)
585 return err;
586 } else if (file_new) {
587 err = perf_session__write_header(session, evsel_list,
588 output, false);
589 if (err < 0)
590 return err;
591 }
592
593 post_processing_offset = lseek(output, 0, SEEK_CUR);
594
595 if (pipe_output) {
596 err = perf_session__synthesize_attrs(session,
597 process_synthesized_event);
598 if (err < 0) {
599 pr_err("Couldn't synthesize attrs.\n");
600 return err;
601 }
602
603 err = perf_event__synthesize_event_types(process_synthesized_event,
604 session);
605 if (err < 0) {
606 pr_err("Couldn't synthesize event_types.\n");
607 return err;
608 }
609
610 if (have_tracepoints(&evsel_list->entries)) {
611 /*
612 * FIXME err <= 0 here actually means that
613 * there were no tracepoints so its not really
614 * an error, just that we don't need to
615 * synthesize anything. We really have to
616 * return this more properly and also
617 * propagate errors that now are calling die()
618 */
619 err = perf_event__synthesize_tracing_data(output, evsel_list,
620 process_synthesized_event,
621 session);
622 if (err <= 0) {
623 pr_err("Couldn't record tracing data.\n");
624 return err;
625 }
626 advance_output(err);
627 }
628 }
629
630 machine = perf_session__find_host_machine(session);
631 if (!machine) {
632 pr_err("Couldn't find native kernel information.\n");
633 return -1;
634 }
635
636 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
637 session, machine, "_text");
638 if (err < 0)
639 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
640 session, machine, "_stext");
641 if (err < 0)
642 pr_err("Couldn't record kernel reference relocation symbol\n"
643 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
644 "Check /proc/kallsyms permission or run as root.\n");
645
646 err = perf_event__synthesize_modules(process_synthesized_event,
647 session, machine);
648 if (err < 0)
649 pr_err("Couldn't record kernel module information.\n"
650 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
651 "Check /proc/modules permission or run as root.\n");
652
653 if (perf_guest)
654 perf_session__process_machines(session,
655 perf_event__synthesize_guest_os);
656
657 if (!system_wide)
658 perf_event__synthesize_thread_map(evsel_list->threads,
659 process_synthesized_event,
660 session);
661 else
662 perf_event__synthesize_threads(process_synthesized_event,
663 session);
664
665 if (realtime_prio) {
666 struct sched_param param;
667
668 param.sched_priority = realtime_prio;
669 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
670 pr_err("Could not set realtime priority.\n");
671 exit(-1);
672 }
673 }
674
675 perf_evlist__enable(evsel_list);
676
677 /*
678 * Let the child rip
679 */
680 if (forks)
681 close(go_pipe[1]);
682
683 for (;;) {
684 int hits = samples;
685
686 mmap_read_all();
687
688 if (hits == samples) {
689 if (done)
690 break;
691 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
692 waking++;
693 }
694
695 if (done)
696 perf_evlist__disable(evsel_list);
697 }
698
699 if (quiet || signr == SIGUSR1)
700 return 0;
701
702 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
703
704 /*
705 * Approximate RIP event size: 24 bytes.
706 */
707 fprintf(stderr,
708 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
709 (double)bytes_written / 1024.0 / 1024.0,
710 output_name,
711 bytes_written / 24);
712
713 return 0;
714
715out_delete_session:
716 perf_session__delete(session);
717 return err;
718}
719
720static const char * const record_usage[] = {
721 "perf record [<options>] [<command>]",
722 "perf record [<options>] -- <command> [<options>]",
723 NULL
724};
725
726static bool force, append_file;
727
728const struct option record_options[] = {
729 OPT_CALLBACK('e', "event", &evsel_list, "event",
730 "event selector. use 'perf list' to list available events",
731 parse_events_option),
732 OPT_CALLBACK(0, "filter", &evsel_list, "filter",
733 "event filter", parse_filter),
734 OPT_INTEGER('p', "pid", &target_pid,
735 "record events on existing process id"),
736 OPT_INTEGER('t', "tid", &target_tid,
737 "record events on existing thread id"),
738 OPT_INTEGER('r', "realtime", &realtime_prio,
739 "collect data with this RT SCHED_FIFO priority"),
740 OPT_BOOLEAN('D', "no-delay", &nodelay,
741 "collect data without buffering"),
742 OPT_BOOLEAN('R', "raw-samples", &raw_samples,
743 "collect raw sample records from all opened counters"),
744 OPT_BOOLEAN('a', "all-cpus", &system_wide,
745 "system-wide collection from all CPUs"),
746 OPT_BOOLEAN('A', "append", &append_file,
747 "append to the output file to do incremental profiling"),
748 OPT_STRING('C', "cpu", &cpu_list, "cpu",
749 "list of cpus to monitor"),
750 OPT_BOOLEAN('f', "force", &force,
751 "overwrite existing data file (deprecated)"),
752 OPT_U64('c', "count", &user_interval, "event period to sample"),
753 OPT_STRING('o', "output", &output_name, "file",
754 "output file name"),
755 OPT_BOOLEAN('i', "no-inherit", &no_inherit,
756 "child tasks do not inherit counters"),
757 OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
758 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
759 OPT_BOOLEAN(0, "group", &group,
760 "put the counters into a counter group"),
761 OPT_BOOLEAN('g', "call-graph", &call_graph,
762 "do call-graph (stack chain/backtrace) recording"),
763 OPT_INCR('v', "verbose", &verbose,
764 "be more verbose (show counter open errors, etc)"),
765 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
766 OPT_BOOLEAN('s', "stat", &inherit_stat,
767 "per thread counts"),
768 OPT_BOOLEAN('d', "data", &sample_address,
769 "Sample addresses"),
770 OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
771 OPT_BOOLEAN('n', "no-samples", &no_samples,
772 "don't sample"),
773 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
774 "do not update the buildid cache"),
775 OPT_BOOLEAN('B', "no-buildid", &no_buildid,
776 "do not collect buildids in perf.data"),
777 OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
778 "monitor event in cgroup name only",
779 parse_cgroups),
780 OPT_END()
781};
782
783int cmd_record(int argc, const char **argv, const char *prefix __used)
784{
785 int err = -ENOMEM;
786 struct perf_evsel *pos;
787
788 evsel_list = perf_evlist__new(NULL, NULL);
789 if (evsel_list == NULL)
790 return -ENOMEM;
791
792 argc = parse_options(argc, argv, record_options, record_usage,
793 PARSE_OPT_STOP_AT_NON_OPTION);
794 if (!argc && target_pid == -1 && target_tid == -1 &&
795 !system_wide && !cpu_list)
796 usage_with_options(record_usage, record_options);
797
798 if (force && append_file) {
799 fprintf(stderr, "Can't overwrite and append at the same time."
800 " You need to choose between -f and -A");
801 usage_with_options(record_usage, record_options);
802 } else if (append_file) {
803 write_mode = WRITE_APPEND;
804 } else {
805 write_mode = WRITE_FORCE;
806 }
807
808 if (nr_cgroups && !system_wide) {
809 fprintf(stderr, "cgroup monitoring only available in"
810 " system-wide mode\n");
811 usage_with_options(record_usage, record_options);
812 }
813
814 symbol__init();
815
816 if (symbol_conf.kptr_restrict)
817 pr_warning(
818"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
819"check /proc/sys/kernel/kptr_restrict.\n\n"
820"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
821"file is not found in the buildid cache or in the vmlinux path.\n\n"
822"Samples in kernel modules won't be resolved at all.\n\n"
823"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
824"even with a suitable vmlinux or kallsyms file.\n\n");
825
826 if (no_buildid_cache || no_buildid)
827 disable_buildid_cache();
828
829 if (evsel_list->nr_entries == 0 &&
830 perf_evlist__add_default(evsel_list) < 0) {
831 pr_err("Not enough memory for event selector list\n");
832 goto out_symbol_exit;
833 }
834
835 if (target_pid != -1)
836 target_tid = target_pid;
837
838 if (perf_evlist__create_maps(evsel_list, target_pid,
839 target_tid, cpu_list) < 0)
840 usage_with_options(record_usage, record_options);
841
842 list_for_each_entry(pos, &evsel_list->entries, node) {
843 if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
844 evsel_list->threads->nr) < 0)
845 goto out_free_fd;
846 if (perf_header__push_event(pos->attr.config, event_name(pos)))
847 goto out_free_fd;
848 }
849
850 if (perf_evlist__alloc_pollfd(evsel_list) < 0)
851 goto out_free_fd;
852
853 if (user_interval != ULLONG_MAX)
854 default_interval = user_interval;
855 if (user_freq != UINT_MAX)
856 freq = user_freq;
857
858 /*
859 * User specified count overrides default frequency.
860 */
861 if (default_interval)
862 freq = 0;
863 else if (freq) {
864 default_interval = freq;
865 } else {
866 fprintf(stderr, "frequency and count are zero, aborting\n");
867 err = -EINVAL;
868 goto out_free_fd;
869 }
870
871 err = __cmd_record(argc, argv);
872out_free_fd:
873 perf_evlist__delete_maps(evsel_list);
874out_symbol_exit:
875 symbol__exit();
876 return err;
877}
1/*
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
7 */
8#include "builtin.h"
9
10#include "perf.h"
11
12#include "util/build-id.h"
13#include "util/util.h"
14#include "util/parse-options.h"
15#include "util/parse-events.h"
16
17#include "util/header.h"
18#include "util/event.h"
19#include "util/evlist.h"
20#include "util/evsel.h"
21#include "util/debug.h"
22#include "util/session.h"
23#include "util/tool.h"
24#include "util/symbol.h"
25#include "util/cpumap.h"
26#include "util/thread_map.h"
27#include "util/data.h"
28
29#include <unistd.h>
30#include <sched.h>
31#include <sys/mman.h>
32
33#ifndef HAVE_ON_EXIT_SUPPORT
34#ifndef ATEXIT_MAX
35#define ATEXIT_MAX 32
36#endif
37static int __on_exit_count = 0;
38typedef void (*on_exit_func_t) (int, void *);
39static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
40static void *__on_exit_args[ATEXIT_MAX];
41static int __exitcode = 0;
42static void __handle_on_exit_funcs(void);
43static int on_exit(on_exit_func_t function, void *arg);
44#define exit(x) (exit)(__exitcode = (x))
45
46static int on_exit(on_exit_func_t function, void *arg)
47{
48 if (__on_exit_count == ATEXIT_MAX)
49 return -ENOMEM;
50 else if (__on_exit_count == 0)
51 atexit(__handle_on_exit_funcs);
52 __on_exit_funcs[__on_exit_count] = function;
53 __on_exit_args[__on_exit_count++] = arg;
54 return 0;
55}
56
57static void __handle_on_exit_funcs(void)
58{
59 int i;
60 for (i = 0; i < __on_exit_count; i++)
61 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
62}
63#endif
64
65struct record {
66 struct perf_tool tool;
67 struct record_opts opts;
68 u64 bytes_written;
69 struct perf_data_file file;
70 struct perf_evlist *evlist;
71 struct perf_session *session;
72 const char *progname;
73 int realtime_prio;
74 bool no_buildid;
75 bool no_buildid_cache;
76 long samples;
77};
78
79static int record__write(struct record *rec, void *bf, size_t size)
80{
81 if (perf_data_file__write(rec->session->file, bf, size) < 0) {
82 pr_err("failed to write perf data, error: %m\n");
83 return -1;
84 }
85
86 rec->bytes_written += size;
87 return 0;
88}
89
90static int process_synthesized_event(struct perf_tool *tool,
91 union perf_event *event,
92 struct perf_sample *sample __maybe_unused,
93 struct machine *machine __maybe_unused)
94{
95 struct record *rec = container_of(tool, struct record, tool);
96 return record__write(rec, event, event->header.size);
97}
98
99static int record__mmap_read(struct record *rec, struct perf_mmap *md)
100{
101 unsigned int head = perf_mmap__read_head(md);
102 unsigned int old = md->prev;
103 unsigned char *data = md->base + page_size;
104 unsigned long size;
105 void *buf;
106 int rc = 0;
107
108 if (old == head)
109 return 0;
110
111 rec->samples++;
112
113 size = head - old;
114
115 if ((old & md->mask) + size != (head & md->mask)) {
116 buf = &data[old & md->mask];
117 size = md->mask + 1 - (old & md->mask);
118 old += size;
119
120 if (record__write(rec, buf, size) < 0) {
121 rc = -1;
122 goto out;
123 }
124 }
125
126 buf = &data[old & md->mask];
127 size = head - old;
128 old += size;
129
130 if (record__write(rec, buf, size) < 0) {
131 rc = -1;
132 goto out;
133 }
134
135 md->prev = old;
136 perf_mmap__write_tail(md, old);
137
138out:
139 return rc;
140}
141
142static volatile int done = 0;
143static volatile int signr = -1;
144static volatile int child_finished = 0;
145
146static void sig_handler(int sig)
147{
148 if (sig == SIGCHLD)
149 child_finished = 1;
150
151 done = 1;
152 signr = sig;
153}
154
155static void record__sig_exit(int exit_status __maybe_unused, void *arg)
156{
157 struct record *rec = arg;
158 int status;
159
160 if (rec->evlist->workload.pid > 0) {
161 if (!child_finished)
162 kill(rec->evlist->workload.pid, SIGTERM);
163
164 wait(&status);
165 if (WIFSIGNALED(status))
166 psignal(WTERMSIG(status), rec->progname);
167 }
168
169 if (signr == -1 || signr == SIGUSR1)
170 return;
171
172 signal(signr, SIG_DFL);
173}
174
175static int record__open(struct record *rec)
176{
177 char msg[512];
178 struct perf_evsel *pos;
179 struct perf_evlist *evlist = rec->evlist;
180 struct perf_session *session = rec->session;
181 struct record_opts *opts = &rec->opts;
182 int rc = 0;
183
184 perf_evlist__config(evlist, opts);
185
186 evlist__for_each(evlist, pos) {
187try_again:
188 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
189 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
190 if (verbose)
191 ui__warning("%s\n", msg);
192 goto try_again;
193 }
194
195 rc = -errno;
196 perf_evsel__open_strerror(pos, &opts->target,
197 errno, msg, sizeof(msg));
198 ui__error("%s\n", msg);
199 goto out;
200 }
201 }
202
203 if (perf_evlist__apply_filters(evlist)) {
204 error("failed to set filter with %d (%s)\n", errno,
205 strerror(errno));
206 rc = -1;
207 goto out;
208 }
209
210 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
211 if (errno == EPERM) {
212 pr_err("Permission error mapping pages.\n"
213 "Consider increasing "
214 "/proc/sys/kernel/perf_event_mlock_kb,\n"
215 "or try again with a smaller value of -m/--mmap_pages.\n"
216 "(current value: %u)\n", opts->mmap_pages);
217 rc = -errno;
218 } else {
219 pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
220 rc = -errno;
221 }
222 goto out;
223 }
224
225 session->evlist = evlist;
226 perf_session__set_id_hdr_size(session);
227out:
228 return rc;
229}
230
231static int process_buildids(struct record *rec)
232{
233 struct perf_data_file *file = &rec->file;
234 struct perf_session *session = rec->session;
235 u64 start = session->header.data_offset;
236
237 u64 size = lseek(file->fd, 0, SEEK_CUR);
238 if (size == 0)
239 return 0;
240
241 return __perf_session__process_events(session, start,
242 size - start,
243 size, &build_id__mark_dso_hit_ops);
244}
245
246static void record__exit(int status, void *arg)
247{
248 struct record *rec = arg;
249 struct perf_data_file *file = &rec->file;
250
251 if (status != 0)
252 return;
253
254 if (!file->is_pipe) {
255 rec->session->header.data_size += rec->bytes_written;
256
257 if (!rec->no_buildid)
258 process_buildids(rec);
259 perf_session__write_header(rec->session, rec->evlist,
260 file->fd, true);
261 perf_session__delete(rec->session);
262 perf_evlist__delete(rec->evlist);
263 symbol__exit();
264 }
265}
266
267static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
268{
269 int err;
270 struct perf_tool *tool = data;
271 /*
272 *As for guest kernel when processing subcommand record&report,
273 *we arrange module mmap prior to guest kernel mmap and trigger
274 *a preload dso because default guest module symbols are loaded
275 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
276 *method is used to avoid symbol missing when the first addr is
277 *in module instead of in guest kernel.
278 */
279 err = perf_event__synthesize_modules(tool, process_synthesized_event,
280 machine);
281 if (err < 0)
282 pr_err("Couldn't record guest kernel [%d]'s reference"
283 " relocation symbol.\n", machine->pid);
284
285 /*
286 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
287 * have no _text sometimes.
288 */
289 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
290 machine);
291 if (err < 0)
292 pr_err("Couldn't record guest kernel [%d]'s reference"
293 " relocation symbol.\n", machine->pid);
294}
295
296static struct perf_event_header finished_round_event = {
297 .size = sizeof(struct perf_event_header),
298 .type = PERF_RECORD_FINISHED_ROUND,
299};
300
301static int record__mmap_read_all(struct record *rec)
302{
303 int i;
304 int rc = 0;
305
306 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
307 if (rec->evlist->mmap[i].base) {
308 if (record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
309 rc = -1;
310 goto out;
311 }
312 }
313 }
314
315 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
316 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
317
318out:
319 return rc;
320}
321
322static void record__init_features(struct record *rec)
323{
324 struct perf_session *session = rec->session;
325 int feat;
326
327 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
328 perf_header__set_feat(&session->header, feat);
329
330 if (rec->no_buildid)
331 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
332
333 if (!have_tracepoints(&rec->evlist->entries))
334 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
335
336 if (!rec->opts.branch_stack)
337 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
338}
339
340static volatile int workload_exec_errno;
341
342/*
343 * perf_evlist__prepare_workload will send a SIGUSR1
344 * if the fork fails, since we asked by setting its
345 * want_signal to true.
346 */
347static void workload_exec_failed_signal(int signo, siginfo_t *info,
348 void *ucontext __maybe_unused)
349{
350 workload_exec_errno = info->si_value.sival_int;
351 done = 1;
352 signr = signo;
353 child_finished = 1;
354}
355
356static int __cmd_record(struct record *rec, int argc, const char **argv)
357{
358 int err;
359 unsigned long waking = 0;
360 const bool forks = argc > 0;
361 struct machine *machine;
362 struct perf_tool *tool = &rec->tool;
363 struct record_opts *opts = &rec->opts;
364 struct perf_data_file *file = &rec->file;
365 struct perf_session *session;
366 bool disabled = false;
367
368 rec->progname = argv[0];
369
370 on_exit(record__sig_exit, rec);
371 signal(SIGCHLD, sig_handler);
372 signal(SIGINT, sig_handler);
373 signal(SIGTERM, sig_handler);
374
375 session = perf_session__new(file, false, NULL);
376 if (session == NULL) {
377 pr_err("Perf session creation failed.\n");
378 return -1;
379 }
380
381 rec->session = session;
382
383 record__init_features(rec);
384
385 if (forks) {
386 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
387 argv, file->is_pipe,
388 workload_exec_failed_signal);
389 if (err < 0) {
390 pr_err("Couldn't run the workload!\n");
391 goto out_delete_session;
392 }
393 }
394
395 if (record__open(rec) != 0) {
396 err = -1;
397 goto out_delete_session;
398 }
399
400 if (!rec->evlist->nr_groups)
401 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
402
403 /*
404 * perf_session__delete(session) will be called at record__exit()
405 */
406 on_exit(record__exit, rec);
407
408 if (file->is_pipe) {
409 err = perf_header__write_pipe(file->fd);
410 if (err < 0)
411 goto out_delete_session;
412 } else {
413 err = perf_session__write_header(session, rec->evlist,
414 file->fd, false);
415 if (err < 0)
416 goto out_delete_session;
417 }
418
419 if (!rec->no_buildid
420 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
421 pr_err("Couldn't generate buildids. "
422 "Use --no-buildid to profile anyway.\n");
423 err = -1;
424 goto out_delete_session;
425 }
426
427 machine = &session->machines.host;
428
429 if (file->is_pipe) {
430 err = perf_event__synthesize_attrs(tool, session,
431 process_synthesized_event);
432 if (err < 0) {
433 pr_err("Couldn't synthesize attrs.\n");
434 goto out_delete_session;
435 }
436
437 if (have_tracepoints(&rec->evlist->entries)) {
438 /*
439 * FIXME err <= 0 here actually means that
440 * there were no tracepoints so its not really
441 * an error, just that we don't need to
442 * synthesize anything. We really have to
443 * return this more properly and also
444 * propagate errors that now are calling die()
445 */
446 err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist,
447 process_synthesized_event);
448 if (err <= 0) {
449 pr_err("Couldn't record tracing data.\n");
450 goto out_delete_session;
451 }
452 rec->bytes_written += err;
453 }
454 }
455
456 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
457 machine);
458 if (err < 0)
459 pr_err("Couldn't record kernel reference relocation symbol\n"
460 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
461 "Check /proc/kallsyms permission or run as root.\n");
462
463 err = perf_event__synthesize_modules(tool, process_synthesized_event,
464 machine);
465 if (err < 0)
466 pr_err("Couldn't record kernel module information.\n"
467 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
468 "Check /proc/modules permission or run as root.\n");
469
470 if (perf_guest) {
471 machines__process_guests(&session->machines,
472 perf_event__synthesize_guest_os, tool);
473 }
474
475 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
476 process_synthesized_event, opts->sample_address);
477 if (err != 0)
478 goto out_delete_session;
479
480 if (rec->realtime_prio) {
481 struct sched_param param;
482
483 param.sched_priority = rec->realtime_prio;
484 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
485 pr_err("Could not set realtime priority.\n");
486 err = -1;
487 goto out_delete_session;
488 }
489 }
490
491 /*
492 * When perf is starting the traced process, all the events
493 * (apart from group members) have enable_on_exec=1 set,
494 * so don't spoil it by prematurely enabling them.
495 */
496 if (!target__none(&opts->target) && !opts->initial_delay)
497 perf_evlist__enable(rec->evlist);
498
499 /*
500 * Let the child rip
501 */
502 if (forks)
503 perf_evlist__start_workload(rec->evlist);
504
505 if (opts->initial_delay) {
506 usleep(opts->initial_delay * 1000);
507 perf_evlist__enable(rec->evlist);
508 }
509
510 for (;;) {
511 int hits = rec->samples;
512
513 if (record__mmap_read_all(rec) < 0) {
514 err = -1;
515 goto out_delete_session;
516 }
517
518 if (hits == rec->samples) {
519 if (done)
520 break;
521 err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
522 waking++;
523 }
524
525 /*
526 * When perf is starting the traced process, at the end events
527 * die with the process and we wait for that. Thus no need to
528 * disable events in this case.
529 */
530 if (done && !disabled && !target__none(&opts->target)) {
531 perf_evlist__disable(rec->evlist);
532 disabled = true;
533 }
534 }
535
536 if (forks && workload_exec_errno) {
537 char msg[512];
538 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
539 pr_err("Workload failed: %s\n", emsg);
540 err = -1;
541 goto out_delete_session;
542 }
543
544 if (quiet || signr == SIGUSR1)
545 return 0;
546
547 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
548
549 /*
550 * Approximate RIP event size: 24 bytes.
551 */
552 fprintf(stderr,
553 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
554 (double)rec->bytes_written / 1024.0 / 1024.0,
555 file->path,
556 rec->bytes_written / 24);
557
558 return 0;
559
560out_delete_session:
561 perf_session__delete(session);
562 return err;
563}
564
565#define BRANCH_OPT(n, m) \
566 { .name = n, .mode = (m) }
567
568#define BRANCH_END { .name = NULL }
569
570struct branch_mode {
571 const char *name;
572 int mode;
573};
574
575static const struct branch_mode branch_modes[] = {
576 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
577 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
578 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
579 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
580 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
581 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
582 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
583 BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
584 BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
585 BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
586 BRANCH_END
587};
588
589static int
590parse_branch_stack(const struct option *opt, const char *str, int unset)
591{
592#define ONLY_PLM \
593 (PERF_SAMPLE_BRANCH_USER |\
594 PERF_SAMPLE_BRANCH_KERNEL |\
595 PERF_SAMPLE_BRANCH_HV)
596
597 uint64_t *mode = (uint64_t *)opt->value;
598 const struct branch_mode *br;
599 char *s, *os = NULL, *p;
600 int ret = -1;
601
602 if (unset)
603 return 0;
604
605 /*
606 * cannot set it twice, -b + --branch-filter for instance
607 */
608 if (*mode)
609 return -1;
610
611 /* str may be NULL in case no arg is passed to -b */
612 if (str) {
613 /* because str is read-only */
614 s = os = strdup(str);
615 if (!s)
616 return -1;
617
618 for (;;) {
619 p = strchr(s, ',');
620 if (p)
621 *p = '\0';
622
623 for (br = branch_modes; br->name; br++) {
624 if (!strcasecmp(s, br->name))
625 break;
626 }
627 if (!br->name) {
628 ui__warning("unknown branch filter %s,"
629 " check man page\n", s);
630 goto error;
631 }
632
633 *mode |= br->mode;
634
635 if (!p)
636 break;
637
638 s = p + 1;
639 }
640 }
641 ret = 0;
642
643 /* default to any branch */
644 if ((*mode & ~ONLY_PLM) == 0) {
645 *mode = PERF_SAMPLE_BRANCH_ANY;
646 }
647error:
648 free(os);
649 return ret;
650}
651
652#ifdef HAVE_DWARF_UNWIND_SUPPORT
653static int get_stack_size(char *str, unsigned long *_size)
654{
655 char *endptr;
656 unsigned long size;
657 unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
658
659 size = strtoul(str, &endptr, 0);
660
661 do {
662 if (*endptr)
663 break;
664
665 size = round_up(size, sizeof(u64));
666 if (!size || size > max_size)
667 break;
668
669 *_size = size;
670 return 0;
671
672 } while (0);
673
674 pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
675 max_size, str);
676 return -1;
677}
678#endif /* HAVE_DWARF_UNWIND_SUPPORT */
679
680int record_parse_callchain(const char *arg, struct record_opts *opts)
681{
682 char *tok, *name, *saveptr = NULL;
683 char *buf;
684 int ret = -1;
685
686 /* We need buffer that we know we can write to. */
687 buf = malloc(strlen(arg) + 1);
688 if (!buf)
689 return -ENOMEM;
690
691 strcpy(buf, arg);
692
693 tok = strtok_r((char *)buf, ",", &saveptr);
694 name = tok ? : (char *)buf;
695
696 do {
697 /* Framepointer style */
698 if (!strncmp(name, "fp", sizeof("fp"))) {
699 if (!strtok_r(NULL, ",", &saveptr)) {
700 opts->call_graph = CALLCHAIN_FP;
701 ret = 0;
702 } else
703 pr_err("callchain: No more arguments "
704 "needed for -g fp\n");
705 break;
706
707#ifdef HAVE_DWARF_UNWIND_SUPPORT
708 /* Dwarf style */
709 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
710 const unsigned long default_stack_dump_size = 8192;
711
712 ret = 0;
713 opts->call_graph = CALLCHAIN_DWARF;
714 opts->stack_dump_size = default_stack_dump_size;
715
716 tok = strtok_r(NULL, ",", &saveptr);
717 if (tok) {
718 unsigned long size = 0;
719
720 ret = get_stack_size(tok, &size);
721 opts->stack_dump_size = size;
722 }
723#endif /* HAVE_DWARF_UNWIND_SUPPORT */
724 } else {
725 pr_err("callchain: Unknown --call-graph option "
726 "value: %s\n", arg);
727 break;
728 }
729
730 } while (0);
731
732 free(buf);
733 return ret;
734}
735
736static void callchain_debug(struct record_opts *opts)
737{
738 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
739
740 pr_debug("callchain: type %s\n", str[opts->call_graph]);
741
742 if (opts->call_graph == CALLCHAIN_DWARF)
743 pr_debug("callchain: stack dump size %d\n",
744 opts->stack_dump_size);
745}
746
747int record_parse_callchain_opt(const struct option *opt,
748 const char *arg,
749 int unset)
750{
751 struct record_opts *opts = opt->value;
752 int ret;
753
754 opts->call_graph_enabled = !unset;
755
756 /* --no-call-graph */
757 if (unset) {
758 opts->call_graph = CALLCHAIN_NONE;
759 pr_debug("callchain: disabled\n");
760 return 0;
761 }
762
763 ret = record_parse_callchain(arg, opts);
764 if (!ret)
765 callchain_debug(opts);
766
767 return ret;
768}
769
770int record_callchain_opt(const struct option *opt,
771 const char *arg __maybe_unused,
772 int unset __maybe_unused)
773{
774 struct record_opts *opts = opt->value;
775
776 opts->call_graph_enabled = !unset;
777
778 if (opts->call_graph == CALLCHAIN_NONE)
779 opts->call_graph = CALLCHAIN_FP;
780
781 callchain_debug(opts);
782 return 0;
783}
784
785static int perf_record_config(const char *var, const char *value, void *cb)
786{
787 struct record *rec = cb;
788
789 if (!strcmp(var, "record.call-graph"))
790 return record_parse_callchain(value, &rec->opts);
791
792 return perf_default_config(var, value, cb);
793}
794
795static const char * const record_usage[] = {
796 "perf record [<options>] [<command>]",
797 "perf record [<options>] -- <command> [<options>]",
798 NULL
799};
800
801/*
802 * XXX Ideally would be local to cmd_record() and passed to a record__new
803 * because we need to have access to it in record__exit, that is called
804 * after cmd_record() exits, but since record_options need to be accessible to
805 * builtin-script, leave it here.
806 *
807 * At least we don't ouch it in all the other functions here directly.
808 *
809 * Just say no to tons of global variables, sigh.
810 */
811static struct record record = {
812 .opts = {
813 .mmap_pages = UINT_MAX,
814 .user_freq = UINT_MAX,
815 .user_interval = ULLONG_MAX,
816 .freq = 4000,
817 .target = {
818 .uses_mmap = true,
819 .default_per_cpu = true,
820 },
821 },
822};
823
824#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
825
826#ifdef HAVE_DWARF_UNWIND_SUPPORT
827const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
828#else
829const char record_callchain_help[] = CALLCHAIN_HELP "fp";
830#endif
831
832/*
833 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
834 * with it and switch to use the library functions in perf_evlist that came
835 * from builtin-record.c, i.e. use record_opts,
836 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
837 * using pipes, etc.
838 */
839const struct option record_options[] = {
840 OPT_CALLBACK('e', "event", &record.evlist, "event",
841 "event selector. use 'perf list' to list available events",
842 parse_events_option),
843 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
844 "event filter", parse_filter),
845 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
846 "record events on existing process id"),
847 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
848 "record events on existing thread id"),
849 OPT_INTEGER('r', "realtime", &record.realtime_prio,
850 "collect data with this RT SCHED_FIFO priority"),
851 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
852 "collect data without buffering"),
853 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
854 "collect raw sample records from all opened counters"),
855 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
856 "system-wide collection from all CPUs"),
857 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
858 "list of cpus to monitor"),
859 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
860 OPT_STRING('o', "output", &record.file.path, "file",
861 "output file name"),
862 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
863 &record.opts.no_inherit_set,
864 "child tasks do not inherit counters"),
865 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
866 OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
867 "number of mmap data pages",
868 perf_evlist__parse_mmap_pages),
869 OPT_BOOLEAN(0, "group", &record.opts.group,
870 "put the counters into a counter group"),
871 OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
872 NULL, "enables call-graph recording" ,
873 &record_callchain_opt),
874 OPT_CALLBACK(0, "call-graph", &record.opts,
875 "mode[,dump_size]", record_callchain_help,
876 &record_parse_callchain_opt),
877 OPT_INCR('v', "verbose", &verbose,
878 "be more verbose (show counter open errors, etc)"),
879 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
880 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
881 "per thread counts"),
882 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
883 "Sample addresses"),
884 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
885 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
886 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
887 "don't sample"),
888 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
889 "do not update the buildid cache"),
890 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
891 "do not collect buildids in perf.data"),
892 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
893 "monitor event in cgroup name only",
894 parse_cgroups),
895 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
896 "ms to wait before starting measurement after program start"),
897 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
898 "user to profile"),
899
900 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
901 "branch any", "sample any taken branches",
902 parse_branch_stack),
903
904 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
905 "branch filter mask", "branch stack filter modes",
906 parse_branch_stack),
907 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
908 "sample by weight (on special events only)"),
909 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
910 "sample transaction flags (special events only)"),
911 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
912 "use per-thread mmaps"),
913 OPT_END()
914};
915
916int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
917{
918 int err = -ENOMEM;
919 struct record *rec = &record;
920 char errbuf[BUFSIZ];
921
922 rec->evlist = perf_evlist__new();
923 if (rec->evlist == NULL)
924 return -ENOMEM;
925
926 perf_config(perf_record_config, rec);
927
928 argc = parse_options(argc, argv, record_options, record_usage,
929 PARSE_OPT_STOP_AT_NON_OPTION);
930 if (!argc && target__none(&rec->opts.target))
931 usage_with_options(record_usage, record_options);
932
933 if (nr_cgroups && !rec->opts.target.system_wide) {
934 ui__error("cgroup monitoring only available in"
935 " system-wide mode\n");
936 usage_with_options(record_usage, record_options);
937 }
938
939 symbol__init();
940
941 if (symbol_conf.kptr_restrict)
942 pr_warning(
943"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
944"check /proc/sys/kernel/kptr_restrict.\n\n"
945"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
946"file is not found in the buildid cache or in the vmlinux path.\n\n"
947"Samples in kernel modules won't be resolved at all.\n\n"
948"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
949"even with a suitable vmlinux or kallsyms file.\n\n");
950
951 if (rec->no_buildid_cache || rec->no_buildid)
952 disable_buildid_cache();
953
954 if (rec->evlist->nr_entries == 0 &&
955 perf_evlist__add_default(rec->evlist) < 0) {
956 pr_err("Not enough memory for event selector list\n");
957 goto out_symbol_exit;
958 }
959
960 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
961 rec->opts.no_inherit = true;
962
963 err = target__validate(&rec->opts.target);
964 if (err) {
965 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
966 ui__warning("%s", errbuf);
967 }
968
969 err = target__parse_uid(&rec->opts.target);
970 if (err) {
971 int saved_errno = errno;
972
973 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
974 ui__error("%s", errbuf);
975
976 err = -saved_errno;
977 goto out_symbol_exit;
978 }
979
980 err = -ENOMEM;
981 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
982 usage_with_options(record_usage, record_options);
983
984 if (record_opts__config(&rec->opts)) {
985 err = -EINVAL;
986 goto out_symbol_exit;
987 }
988
989 err = __cmd_record(&record, argc, argv);
990out_symbol_exit:
991 symbol__exit();
992 return err;
993}