Loading...
1/*
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
7 */
8#include "builtin.h"
9
10#include "perf.h"
11
12#include "util/build-id.h"
13#include "util/util.h"
14#include "util/parse-options.h"
15#include "util/parse-events.h"
16
17#include "util/header.h"
18#include "util/event.h"
19#include "util/evlist.h"
20#include "util/evsel.h"
21#include "util/debug.h"
22#include "util/session.h"
23#include "util/tool.h"
24#include "util/symbol.h"
25#include "util/cpumap.h"
26#include "util/thread_map.h"
27#include "util/data.h"
28
29#include <unistd.h>
30#include <sched.h>
31#include <sys/mman.h>
32
33#ifndef HAVE_ON_EXIT_SUPPORT
34#ifndef ATEXIT_MAX
35#define ATEXIT_MAX 32
36#endif
37static int __on_exit_count = 0;
38typedef void (*on_exit_func_t) (int, void *);
39static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
40static void *__on_exit_args[ATEXIT_MAX];
41static int __exitcode = 0;
42static void __handle_on_exit_funcs(void);
43static int on_exit(on_exit_func_t function, void *arg);
44#define exit(x) (exit)(__exitcode = (x))
45
46static int on_exit(on_exit_func_t function, void *arg)
47{
48 if (__on_exit_count == ATEXIT_MAX)
49 return -ENOMEM;
50 else if (__on_exit_count == 0)
51 atexit(__handle_on_exit_funcs);
52 __on_exit_funcs[__on_exit_count] = function;
53 __on_exit_args[__on_exit_count++] = arg;
54 return 0;
55}
56
57static void __handle_on_exit_funcs(void)
58{
59 int i;
60 for (i = 0; i < __on_exit_count; i++)
61 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
62}
63#endif
64
65struct record {
66 struct perf_tool tool;
67 struct record_opts opts;
68 u64 bytes_written;
69 struct perf_data_file file;
70 struct perf_evlist *evlist;
71 struct perf_session *session;
72 const char *progname;
73 int realtime_prio;
74 bool no_buildid;
75 bool no_buildid_cache;
76 long samples;
77};
78
79static int record__write(struct record *rec, void *bf, size_t size)
80{
81 if (perf_data_file__write(rec->session->file, bf, size) < 0) {
82 pr_err("failed to write perf data, error: %m\n");
83 return -1;
84 }
85
86 rec->bytes_written += size;
87 return 0;
88}
89
90static int process_synthesized_event(struct perf_tool *tool,
91 union perf_event *event,
92 struct perf_sample *sample __maybe_unused,
93 struct machine *machine __maybe_unused)
94{
95 struct record *rec = container_of(tool, struct record, tool);
96 return record__write(rec, event, event->header.size);
97}
98
99static int record__mmap_read(struct record *rec, struct perf_mmap *md)
100{
101 unsigned int head = perf_mmap__read_head(md);
102 unsigned int old = md->prev;
103 unsigned char *data = md->base + page_size;
104 unsigned long size;
105 void *buf;
106 int rc = 0;
107
108 if (old == head)
109 return 0;
110
111 rec->samples++;
112
113 size = head - old;
114
115 if ((old & md->mask) + size != (head & md->mask)) {
116 buf = &data[old & md->mask];
117 size = md->mask + 1 - (old & md->mask);
118 old += size;
119
120 if (record__write(rec, buf, size) < 0) {
121 rc = -1;
122 goto out;
123 }
124 }
125
126 buf = &data[old & md->mask];
127 size = head - old;
128 old += size;
129
130 if (record__write(rec, buf, size) < 0) {
131 rc = -1;
132 goto out;
133 }
134
135 md->prev = old;
136 perf_mmap__write_tail(md, old);
137
138out:
139 return rc;
140}
141
142static volatile int done = 0;
143static volatile int signr = -1;
144static volatile int child_finished = 0;
145
146static void sig_handler(int sig)
147{
148 if (sig == SIGCHLD)
149 child_finished = 1;
150
151 done = 1;
152 signr = sig;
153}
154
155static void record__sig_exit(int exit_status __maybe_unused, void *arg)
156{
157 struct record *rec = arg;
158 int status;
159
160 if (rec->evlist->workload.pid > 0) {
161 if (!child_finished)
162 kill(rec->evlist->workload.pid, SIGTERM);
163
164 wait(&status);
165 if (WIFSIGNALED(status))
166 psignal(WTERMSIG(status), rec->progname);
167 }
168
169 if (signr == -1 || signr == SIGUSR1)
170 return;
171
172 signal(signr, SIG_DFL);
173}
174
175static int record__open(struct record *rec)
176{
177 char msg[512];
178 struct perf_evsel *pos;
179 struct perf_evlist *evlist = rec->evlist;
180 struct perf_session *session = rec->session;
181 struct record_opts *opts = &rec->opts;
182 int rc = 0;
183
184 perf_evlist__config(evlist, opts);
185
186 evlist__for_each(evlist, pos) {
187try_again:
188 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
189 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
190 if (verbose)
191 ui__warning("%s\n", msg);
192 goto try_again;
193 }
194
195 rc = -errno;
196 perf_evsel__open_strerror(pos, &opts->target,
197 errno, msg, sizeof(msg));
198 ui__error("%s\n", msg);
199 goto out;
200 }
201 }
202
203 if (perf_evlist__apply_filters(evlist)) {
204 error("failed to set filter with %d (%s)\n", errno,
205 strerror(errno));
206 rc = -1;
207 goto out;
208 }
209
210 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
211 if (errno == EPERM) {
212 pr_err("Permission error mapping pages.\n"
213 "Consider increasing "
214 "/proc/sys/kernel/perf_event_mlock_kb,\n"
215 "or try again with a smaller value of -m/--mmap_pages.\n"
216 "(current value: %u)\n", opts->mmap_pages);
217 rc = -errno;
218 } else {
219 pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
220 rc = -errno;
221 }
222 goto out;
223 }
224
225 session->evlist = evlist;
226 perf_session__set_id_hdr_size(session);
227out:
228 return rc;
229}
230
231static int process_buildids(struct record *rec)
232{
233 struct perf_data_file *file = &rec->file;
234 struct perf_session *session = rec->session;
235 u64 start = session->header.data_offset;
236
237 u64 size = lseek(file->fd, 0, SEEK_CUR);
238 if (size == 0)
239 return 0;
240
241 return __perf_session__process_events(session, start,
242 size - start,
243 size, &build_id__mark_dso_hit_ops);
244}
245
246static void record__exit(int status, void *arg)
247{
248 struct record *rec = arg;
249 struct perf_data_file *file = &rec->file;
250
251 if (status != 0)
252 return;
253
254 if (!file->is_pipe) {
255 rec->session->header.data_size += rec->bytes_written;
256
257 if (!rec->no_buildid)
258 process_buildids(rec);
259 perf_session__write_header(rec->session, rec->evlist,
260 file->fd, true);
261 perf_session__delete(rec->session);
262 perf_evlist__delete(rec->evlist);
263 symbol__exit();
264 }
265}
266
267static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
268{
269 int err;
270 struct perf_tool *tool = data;
271 /*
272 *As for guest kernel when processing subcommand record&report,
273 *we arrange module mmap prior to guest kernel mmap and trigger
274 *a preload dso because default guest module symbols are loaded
275 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
276 *method is used to avoid symbol missing when the first addr is
277 *in module instead of in guest kernel.
278 */
279 err = perf_event__synthesize_modules(tool, process_synthesized_event,
280 machine);
281 if (err < 0)
282 pr_err("Couldn't record guest kernel [%d]'s reference"
283 " relocation symbol.\n", machine->pid);
284
285 /*
286 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
287 * have no _text sometimes.
288 */
289 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
290 machine);
291 if (err < 0)
292 pr_err("Couldn't record guest kernel [%d]'s reference"
293 " relocation symbol.\n", machine->pid);
294}
295
296static struct perf_event_header finished_round_event = {
297 .size = sizeof(struct perf_event_header),
298 .type = PERF_RECORD_FINISHED_ROUND,
299};
300
301static int record__mmap_read_all(struct record *rec)
302{
303 int i;
304 int rc = 0;
305
306 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
307 if (rec->evlist->mmap[i].base) {
308 if (record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
309 rc = -1;
310 goto out;
311 }
312 }
313 }
314
315 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
316 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
317
318out:
319 return rc;
320}
321
322static void record__init_features(struct record *rec)
323{
324 struct perf_session *session = rec->session;
325 int feat;
326
327 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
328 perf_header__set_feat(&session->header, feat);
329
330 if (rec->no_buildid)
331 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
332
333 if (!have_tracepoints(&rec->evlist->entries))
334 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
335
336 if (!rec->opts.branch_stack)
337 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
338}
339
340static volatile int workload_exec_errno;
341
342/*
343 * perf_evlist__prepare_workload will send a SIGUSR1
344 * if the fork fails, since we asked by setting its
345 * want_signal to true.
346 */
347static void workload_exec_failed_signal(int signo, siginfo_t *info,
348 void *ucontext __maybe_unused)
349{
350 workload_exec_errno = info->si_value.sival_int;
351 done = 1;
352 signr = signo;
353 child_finished = 1;
354}
355
356static int __cmd_record(struct record *rec, int argc, const char **argv)
357{
358 int err;
359 unsigned long waking = 0;
360 const bool forks = argc > 0;
361 struct machine *machine;
362 struct perf_tool *tool = &rec->tool;
363 struct record_opts *opts = &rec->opts;
364 struct perf_data_file *file = &rec->file;
365 struct perf_session *session;
366 bool disabled = false;
367
368 rec->progname = argv[0];
369
370 on_exit(record__sig_exit, rec);
371 signal(SIGCHLD, sig_handler);
372 signal(SIGINT, sig_handler);
373 signal(SIGTERM, sig_handler);
374
375 session = perf_session__new(file, false, NULL);
376 if (session == NULL) {
377 pr_err("Perf session creation failed.\n");
378 return -1;
379 }
380
381 rec->session = session;
382
383 record__init_features(rec);
384
385 if (forks) {
386 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
387 argv, file->is_pipe,
388 workload_exec_failed_signal);
389 if (err < 0) {
390 pr_err("Couldn't run the workload!\n");
391 goto out_delete_session;
392 }
393 }
394
395 if (record__open(rec) != 0) {
396 err = -1;
397 goto out_delete_session;
398 }
399
400 if (!rec->evlist->nr_groups)
401 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
402
403 /*
404 * perf_session__delete(session) will be called at record__exit()
405 */
406 on_exit(record__exit, rec);
407
408 if (file->is_pipe) {
409 err = perf_header__write_pipe(file->fd);
410 if (err < 0)
411 goto out_delete_session;
412 } else {
413 err = perf_session__write_header(session, rec->evlist,
414 file->fd, false);
415 if (err < 0)
416 goto out_delete_session;
417 }
418
419 if (!rec->no_buildid
420 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
421 pr_err("Couldn't generate buildids. "
422 "Use --no-buildid to profile anyway.\n");
423 err = -1;
424 goto out_delete_session;
425 }
426
427 machine = &session->machines.host;
428
429 if (file->is_pipe) {
430 err = perf_event__synthesize_attrs(tool, session,
431 process_synthesized_event);
432 if (err < 0) {
433 pr_err("Couldn't synthesize attrs.\n");
434 goto out_delete_session;
435 }
436
437 if (have_tracepoints(&rec->evlist->entries)) {
438 /*
439 * FIXME err <= 0 here actually means that
440 * there were no tracepoints so its not really
441 * an error, just that we don't need to
442 * synthesize anything. We really have to
443 * return this more properly and also
444 * propagate errors that now are calling die()
445 */
446 err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist,
447 process_synthesized_event);
448 if (err <= 0) {
449 pr_err("Couldn't record tracing data.\n");
450 goto out_delete_session;
451 }
452 rec->bytes_written += err;
453 }
454 }
455
456 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
457 machine);
458 if (err < 0)
459 pr_err("Couldn't record kernel reference relocation symbol\n"
460 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
461 "Check /proc/kallsyms permission or run as root.\n");
462
463 err = perf_event__synthesize_modules(tool, process_synthesized_event,
464 machine);
465 if (err < 0)
466 pr_err("Couldn't record kernel module information.\n"
467 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
468 "Check /proc/modules permission or run as root.\n");
469
470 if (perf_guest) {
471 machines__process_guests(&session->machines,
472 perf_event__synthesize_guest_os, tool);
473 }
474
475 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
476 process_synthesized_event, opts->sample_address);
477 if (err != 0)
478 goto out_delete_session;
479
480 if (rec->realtime_prio) {
481 struct sched_param param;
482
483 param.sched_priority = rec->realtime_prio;
484 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
485 pr_err("Could not set realtime priority.\n");
486 err = -1;
487 goto out_delete_session;
488 }
489 }
490
491 /*
492 * When perf is starting the traced process, all the events
493 * (apart from group members) have enable_on_exec=1 set,
494 * so don't spoil it by prematurely enabling them.
495 */
496 if (!target__none(&opts->target) && !opts->initial_delay)
497 perf_evlist__enable(rec->evlist);
498
499 /*
500 * Let the child rip
501 */
502 if (forks)
503 perf_evlist__start_workload(rec->evlist);
504
505 if (opts->initial_delay) {
506 usleep(opts->initial_delay * 1000);
507 perf_evlist__enable(rec->evlist);
508 }
509
510 for (;;) {
511 int hits = rec->samples;
512
513 if (record__mmap_read_all(rec) < 0) {
514 err = -1;
515 goto out_delete_session;
516 }
517
518 if (hits == rec->samples) {
519 if (done)
520 break;
521 err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
522 waking++;
523 }
524
525 /*
526 * When perf is starting the traced process, at the end events
527 * die with the process and we wait for that. Thus no need to
528 * disable events in this case.
529 */
530 if (done && !disabled && !target__none(&opts->target)) {
531 perf_evlist__disable(rec->evlist);
532 disabled = true;
533 }
534 }
535
536 if (forks && workload_exec_errno) {
537 char msg[512];
538 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
539 pr_err("Workload failed: %s\n", emsg);
540 err = -1;
541 goto out_delete_session;
542 }
543
544 if (quiet || signr == SIGUSR1)
545 return 0;
546
547 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
548
549 /*
550 * Approximate RIP event size: 24 bytes.
551 */
552 fprintf(stderr,
553 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
554 (double)rec->bytes_written / 1024.0 / 1024.0,
555 file->path,
556 rec->bytes_written / 24);
557
558 return 0;
559
560out_delete_session:
561 perf_session__delete(session);
562 return err;
563}
564
565#define BRANCH_OPT(n, m) \
566 { .name = n, .mode = (m) }
567
568#define BRANCH_END { .name = NULL }
569
570struct branch_mode {
571 const char *name;
572 int mode;
573};
574
575static const struct branch_mode branch_modes[] = {
576 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
577 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
578 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
579 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
580 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
581 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
582 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
583 BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
584 BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
585 BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
586 BRANCH_END
587};
588
589static int
590parse_branch_stack(const struct option *opt, const char *str, int unset)
591{
592#define ONLY_PLM \
593 (PERF_SAMPLE_BRANCH_USER |\
594 PERF_SAMPLE_BRANCH_KERNEL |\
595 PERF_SAMPLE_BRANCH_HV)
596
597 uint64_t *mode = (uint64_t *)opt->value;
598 const struct branch_mode *br;
599 char *s, *os = NULL, *p;
600 int ret = -1;
601
602 if (unset)
603 return 0;
604
605 /*
606 * cannot set it twice, -b + --branch-filter for instance
607 */
608 if (*mode)
609 return -1;
610
611 /* str may be NULL in case no arg is passed to -b */
612 if (str) {
613 /* because str is read-only */
614 s = os = strdup(str);
615 if (!s)
616 return -1;
617
618 for (;;) {
619 p = strchr(s, ',');
620 if (p)
621 *p = '\0';
622
623 for (br = branch_modes; br->name; br++) {
624 if (!strcasecmp(s, br->name))
625 break;
626 }
627 if (!br->name) {
628 ui__warning("unknown branch filter %s,"
629 " check man page\n", s);
630 goto error;
631 }
632
633 *mode |= br->mode;
634
635 if (!p)
636 break;
637
638 s = p + 1;
639 }
640 }
641 ret = 0;
642
643 /* default to any branch */
644 if ((*mode & ~ONLY_PLM) == 0) {
645 *mode = PERF_SAMPLE_BRANCH_ANY;
646 }
647error:
648 free(os);
649 return ret;
650}
651
652#ifdef HAVE_DWARF_UNWIND_SUPPORT
653static int get_stack_size(char *str, unsigned long *_size)
654{
655 char *endptr;
656 unsigned long size;
657 unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
658
659 size = strtoul(str, &endptr, 0);
660
661 do {
662 if (*endptr)
663 break;
664
665 size = round_up(size, sizeof(u64));
666 if (!size || size > max_size)
667 break;
668
669 *_size = size;
670 return 0;
671
672 } while (0);
673
674 pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
675 max_size, str);
676 return -1;
677}
678#endif /* HAVE_DWARF_UNWIND_SUPPORT */
679
680int record_parse_callchain(const char *arg, struct record_opts *opts)
681{
682 char *tok, *name, *saveptr = NULL;
683 char *buf;
684 int ret = -1;
685
686 /* We need buffer that we know we can write to. */
687 buf = malloc(strlen(arg) + 1);
688 if (!buf)
689 return -ENOMEM;
690
691 strcpy(buf, arg);
692
693 tok = strtok_r((char *)buf, ",", &saveptr);
694 name = tok ? : (char *)buf;
695
696 do {
697 /* Framepointer style */
698 if (!strncmp(name, "fp", sizeof("fp"))) {
699 if (!strtok_r(NULL, ",", &saveptr)) {
700 opts->call_graph = CALLCHAIN_FP;
701 ret = 0;
702 } else
703 pr_err("callchain: No more arguments "
704 "needed for -g fp\n");
705 break;
706
707#ifdef HAVE_DWARF_UNWIND_SUPPORT
708 /* Dwarf style */
709 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
710 const unsigned long default_stack_dump_size = 8192;
711
712 ret = 0;
713 opts->call_graph = CALLCHAIN_DWARF;
714 opts->stack_dump_size = default_stack_dump_size;
715
716 tok = strtok_r(NULL, ",", &saveptr);
717 if (tok) {
718 unsigned long size = 0;
719
720 ret = get_stack_size(tok, &size);
721 opts->stack_dump_size = size;
722 }
723#endif /* HAVE_DWARF_UNWIND_SUPPORT */
724 } else {
725 pr_err("callchain: Unknown --call-graph option "
726 "value: %s\n", arg);
727 break;
728 }
729
730 } while (0);
731
732 free(buf);
733 return ret;
734}
735
736static void callchain_debug(struct record_opts *opts)
737{
738 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
739
740 pr_debug("callchain: type %s\n", str[opts->call_graph]);
741
742 if (opts->call_graph == CALLCHAIN_DWARF)
743 pr_debug("callchain: stack dump size %d\n",
744 opts->stack_dump_size);
745}
746
747int record_parse_callchain_opt(const struct option *opt,
748 const char *arg,
749 int unset)
750{
751 struct record_opts *opts = opt->value;
752 int ret;
753
754 opts->call_graph_enabled = !unset;
755
756 /* --no-call-graph */
757 if (unset) {
758 opts->call_graph = CALLCHAIN_NONE;
759 pr_debug("callchain: disabled\n");
760 return 0;
761 }
762
763 ret = record_parse_callchain(arg, opts);
764 if (!ret)
765 callchain_debug(opts);
766
767 return ret;
768}
769
770int record_callchain_opt(const struct option *opt,
771 const char *arg __maybe_unused,
772 int unset __maybe_unused)
773{
774 struct record_opts *opts = opt->value;
775
776 opts->call_graph_enabled = !unset;
777
778 if (opts->call_graph == CALLCHAIN_NONE)
779 opts->call_graph = CALLCHAIN_FP;
780
781 callchain_debug(opts);
782 return 0;
783}
784
785static int perf_record_config(const char *var, const char *value, void *cb)
786{
787 struct record *rec = cb;
788
789 if (!strcmp(var, "record.call-graph"))
790 return record_parse_callchain(value, &rec->opts);
791
792 return perf_default_config(var, value, cb);
793}
794
795static const char * const record_usage[] = {
796 "perf record [<options>] [<command>]",
797 "perf record [<options>] -- <command> [<options>]",
798 NULL
799};
800
801/*
802 * XXX Ideally would be local to cmd_record() and passed to a record__new
803 * because we need to have access to it in record__exit, that is called
804 * after cmd_record() exits, but since record_options need to be accessible to
805 * builtin-script, leave it here.
806 *
807 * At least we don't ouch it in all the other functions here directly.
808 *
809 * Just say no to tons of global variables, sigh.
810 */
811static struct record record = {
812 .opts = {
813 .mmap_pages = UINT_MAX,
814 .user_freq = UINT_MAX,
815 .user_interval = ULLONG_MAX,
816 .freq = 4000,
817 .target = {
818 .uses_mmap = true,
819 .default_per_cpu = true,
820 },
821 },
822};
823
824#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
825
826#ifdef HAVE_DWARF_UNWIND_SUPPORT
827const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
828#else
829const char record_callchain_help[] = CALLCHAIN_HELP "fp";
830#endif
831
832/*
833 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
834 * with it and switch to use the library functions in perf_evlist that came
835 * from builtin-record.c, i.e. use record_opts,
836 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
837 * using pipes, etc.
838 */
839const struct option record_options[] = {
840 OPT_CALLBACK('e', "event", &record.evlist, "event",
841 "event selector. use 'perf list' to list available events",
842 parse_events_option),
843 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
844 "event filter", parse_filter),
845 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
846 "record events on existing process id"),
847 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
848 "record events on existing thread id"),
849 OPT_INTEGER('r', "realtime", &record.realtime_prio,
850 "collect data with this RT SCHED_FIFO priority"),
851 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
852 "collect data without buffering"),
853 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
854 "collect raw sample records from all opened counters"),
855 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
856 "system-wide collection from all CPUs"),
857 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
858 "list of cpus to monitor"),
859 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
860 OPT_STRING('o', "output", &record.file.path, "file",
861 "output file name"),
862 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
863 &record.opts.no_inherit_set,
864 "child tasks do not inherit counters"),
865 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
866 OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
867 "number of mmap data pages",
868 perf_evlist__parse_mmap_pages),
869 OPT_BOOLEAN(0, "group", &record.opts.group,
870 "put the counters into a counter group"),
871 OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
872 NULL, "enables call-graph recording" ,
873 &record_callchain_opt),
874 OPT_CALLBACK(0, "call-graph", &record.opts,
875 "mode[,dump_size]", record_callchain_help,
876 &record_parse_callchain_opt),
877 OPT_INCR('v', "verbose", &verbose,
878 "be more verbose (show counter open errors, etc)"),
879 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
880 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
881 "per thread counts"),
882 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
883 "Sample addresses"),
884 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
885 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
886 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
887 "don't sample"),
888 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
889 "do not update the buildid cache"),
890 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
891 "do not collect buildids in perf.data"),
892 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
893 "monitor event in cgroup name only",
894 parse_cgroups),
895 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
896 "ms to wait before starting measurement after program start"),
897 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
898 "user to profile"),
899
900 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
901 "branch any", "sample any taken branches",
902 parse_branch_stack),
903
904 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
905 "branch filter mask", "branch stack filter modes",
906 parse_branch_stack),
907 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
908 "sample by weight (on special events only)"),
909 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
910 "sample transaction flags (special events only)"),
911 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
912 "use per-thread mmaps"),
913 OPT_END()
914};
915
916int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
917{
918 int err = -ENOMEM;
919 struct record *rec = &record;
920 char errbuf[BUFSIZ];
921
922 rec->evlist = perf_evlist__new();
923 if (rec->evlist == NULL)
924 return -ENOMEM;
925
926 perf_config(perf_record_config, rec);
927
928 argc = parse_options(argc, argv, record_options, record_usage,
929 PARSE_OPT_STOP_AT_NON_OPTION);
930 if (!argc && target__none(&rec->opts.target))
931 usage_with_options(record_usage, record_options);
932
933 if (nr_cgroups && !rec->opts.target.system_wide) {
934 ui__error("cgroup monitoring only available in"
935 " system-wide mode\n");
936 usage_with_options(record_usage, record_options);
937 }
938
939 symbol__init();
940
941 if (symbol_conf.kptr_restrict)
942 pr_warning(
943"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
944"check /proc/sys/kernel/kptr_restrict.\n\n"
945"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
946"file is not found in the buildid cache or in the vmlinux path.\n\n"
947"Samples in kernel modules won't be resolved at all.\n\n"
948"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
949"even with a suitable vmlinux or kallsyms file.\n\n");
950
951 if (rec->no_buildid_cache || rec->no_buildid)
952 disable_buildid_cache();
953
954 if (rec->evlist->nr_entries == 0 &&
955 perf_evlist__add_default(rec->evlist) < 0) {
956 pr_err("Not enough memory for event selector list\n");
957 goto out_symbol_exit;
958 }
959
960 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
961 rec->opts.no_inherit = true;
962
963 err = target__validate(&rec->opts.target);
964 if (err) {
965 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
966 ui__warning("%s", errbuf);
967 }
968
969 err = target__parse_uid(&rec->opts.target);
970 if (err) {
971 int saved_errno = errno;
972
973 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
974 ui__error("%s", errbuf);
975
976 err = -saved_errno;
977 goto out_symbol_exit;
978 }
979
980 err = -ENOMEM;
981 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
982 usage_with_options(record_usage, record_options);
983
984 if (record_opts__config(&rec->opts)) {
985 err = -EINVAL;
986 goto out_symbol_exit;
987 }
988
989 err = __cmd_record(&record, argc, argv);
990out_symbol_exit:
991 symbol__exit();
992 return err;
993}
1/*
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
7 */
8#define _FILE_OFFSET_BITS 64
9
10#include "builtin.h"
11
12#include "perf.h"
13
14#include "util/build-id.h"
15#include "util/util.h"
16#include "util/parse-options.h"
17#include "util/parse-events.h"
18
19#include "util/header.h"
20#include "util/event.h"
21#include "util/evlist.h"
22#include "util/evsel.h"
23#include "util/debug.h"
24#include "util/session.h"
25#include "util/tool.h"
26#include "util/symbol.h"
27#include "util/cpumap.h"
28#include "util/thread_map.h"
29
30#include <unistd.h>
31#include <sched.h>
32#include <sys/mman.h>
33
34enum write_mode_t {
35 WRITE_FORCE,
36 WRITE_APPEND
37};
38
39struct perf_record {
40 struct perf_tool tool;
41 struct perf_record_opts opts;
42 u64 bytes_written;
43 const char *output_name;
44 struct perf_evlist *evlist;
45 struct perf_session *session;
46 const char *progname;
47 int output;
48 unsigned int page_size;
49 int realtime_prio;
50 enum write_mode_t write_mode;
51 bool no_buildid;
52 bool no_buildid_cache;
53 bool force;
54 bool file_new;
55 bool append_file;
56 long samples;
57 off_t post_processing_offset;
58};
59
60static void advance_output(struct perf_record *rec, size_t size)
61{
62 rec->bytes_written += size;
63}
64
65static void write_output(struct perf_record *rec, void *buf, size_t size)
66{
67 while (size) {
68 int ret = write(rec->output, buf, size);
69
70 if (ret < 0)
71 die("failed to write");
72
73 size -= ret;
74 buf += ret;
75
76 rec->bytes_written += ret;
77 }
78}
79
80static int process_synthesized_event(struct perf_tool *tool,
81 union perf_event *event,
82 struct perf_sample *sample __used,
83 struct machine *machine __used)
84{
85 struct perf_record *rec = container_of(tool, struct perf_record, tool);
86 write_output(rec, event, event->header.size);
87 return 0;
88}
89
90static void perf_record__mmap_read(struct perf_record *rec,
91 struct perf_mmap *md)
92{
93 unsigned int head = perf_mmap__read_head(md);
94 unsigned int old = md->prev;
95 unsigned char *data = md->base + rec->page_size;
96 unsigned long size;
97 void *buf;
98
99 if (old == head)
100 return;
101
102 rec->samples++;
103
104 size = head - old;
105
106 if ((old & md->mask) + size != (head & md->mask)) {
107 buf = &data[old & md->mask];
108 size = md->mask + 1 - (old & md->mask);
109 old += size;
110
111 write_output(rec, buf, size);
112 }
113
114 buf = &data[old & md->mask];
115 size = head - old;
116 old += size;
117
118 write_output(rec, buf, size);
119
120 md->prev = old;
121 perf_mmap__write_tail(md, old);
122}
123
124static volatile int done = 0;
125static volatile int signr = -1;
126static volatile int child_finished = 0;
127
128static void sig_handler(int sig)
129{
130 if (sig == SIGCHLD)
131 child_finished = 1;
132
133 done = 1;
134 signr = sig;
135}
136
137static void perf_record__sig_exit(int exit_status __used, void *arg)
138{
139 struct perf_record *rec = arg;
140 int status;
141
142 if (rec->evlist->workload.pid > 0) {
143 if (!child_finished)
144 kill(rec->evlist->workload.pid, SIGTERM);
145
146 wait(&status);
147 if (WIFSIGNALED(status))
148 psignal(WTERMSIG(status), rec->progname);
149 }
150
151 if (signr == -1 || signr == SIGUSR1)
152 return;
153
154 signal(signr, SIG_DFL);
155 kill(getpid(), signr);
156}
157
158static bool perf_evlist__equal(struct perf_evlist *evlist,
159 struct perf_evlist *other)
160{
161 struct perf_evsel *pos, *pair;
162
163 if (evlist->nr_entries != other->nr_entries)
164 return false;
165
166 pair = list_entry(other->entries.next, struct perf_evsel, node);
167
168 list_for_each_entry(pos, &evlist->entries, node) {
169 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
170 return false;
171 pair = list_entry(pair->node.next, struct perf_evsel, node);
172 }
173
174 return true;
175}
176
177static void perf_record__open(struct perf_record *rec)
178{
179 struct perf_evsel *pos, *first;
180 struct perf_evlist *evlist = rec->evlist;
181 struct perf_session *session = rec->session;
182 struct perf_record_opts *opts = &rec->opts;
183
184 first = list_entry(evlist->entries.next, struct perf_evsel, node);
185
186 perf_evlist__config_attrs(evlist, opts);
187
188 list_for_each_entry(pos, &evlist->entries, node) {
189 struct perf_event_attr *attr = &pos->attr;
190 struct xyarray *group_fd = NULL;
191 /*
192 * Check if parse_single_tracepoint_event has already asked for
193 * PERF_SAMPLE_TIME.
194 *
195 * XXX this is kludgy but short term fix for problems introduced by
196 * eac23d1c that broke 'perf script' by having different sample_types
197 * when using multiple tracepoint events when we use a perf binary
198 * that tries to use sample_id_all on an older kernel.
199 *
200 * We need to move counter creation to perf_session, support
201 * different sample_types, etc.
202 */
203 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
204
205 if (opts->group && pos != first)
206 group_fd = first->fd;
207fallback_missing_features:
208 if (opts->exclude_guest_missing)
209 attr->exclude_guest = attr->exclude_host = 0;
210retry_sample_id:
211 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
212try_again:
213 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
214 opts->group, group_fd) < 0) {
215 int err = errno;
216
217 if (err == EPERM || err == EACCES) {
218 ui__error_paranoid();
219 exit(EXIT_FAILURE);
220 } else if (err == ENODEV && opts->target.cpu_list) {
221 die("No such device - did you specify"
222 " an out-of-range profile CPU?\n");
223 } else if (err == EINVAL) {
224 if (!opts->exclude_guest_missing &&
225 (attr->exclude_guest || attr->exclude_host)) {
226 pr_debug("Old kernel, cannot exclude "
227 "guest or host samples.\n");
228 opts->exclude_guest_missing = true;
229 goto fallback_missing_features;
230 } else if (!opts->sample_id_all_missing) {
231 /*
232 * Old kernel, no attr->sample_id_type_all field
233 */
234 opts->sample_id_all_missing = true;
235 if (!opts->sample_time && !opts->raw_samples && !time_needed)
236 attr->sample_type &= ~PERF_SAMPLE_TIME;
237
238 goto retry_sample_id;
239 }
240 }
241
242 /*
243 * If it's cycles then fall back to hrtimer
244 * based cpu-clock-tick sw counter, which
245 * is always available even if no PMU support.
246 *
247 * PPC returns ENXIO until 2.6.37 (behavior changed
248 * with commit b0a873e).
249 */
250 if ((err == ENOENT || err == ENXIO)
251 && attr->type == PERF_TYPE_HARDWARE
252 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
253
254 if (verbose)
255 ui__warning("The cycles event is not supported, "
256 "trying to fall back to cpu-clock-ticks\n");
257 attr->type = PERF_TYPE_SOFTWARE;
258 attr->config = PERF_COUNT_SW_CPU_CLOCK;
259 if (pos->name) {
260 free(pos->name);
261 pos->name = NULL;
262 }
263 goto try_again;
264 }
265
266 if (err == ENOENT) {
267 ui__error("The %s event is not supported.\n",
268 event_name(pos));
269 exit(EXIT_FAILURE);
270 }
271
272 printf("\n");
273 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
274 err, strerror(err));
275
276#if defined(__i386__) || defined(__x86_64__)
277 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
278 die("No hardware sampling interrupt available."
279 " No APIC? If so then you can boot the kernel"
280 " with the \"lapic\" boot parameter to"
281 " force-enable it.\n");
282#endif
283
284 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
285 }
286 }
287
288 if (perf_evlist__set_filters(evlist)) {
289 error("failed to set filter with %d (%s)\n", errno,
290 strerror(errno));
291 exit(-1);
292 }
293
294 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
295 if (errno == EPERM)
296 die("Permission error mapping pages.\n"
297 "Consider increasing "
298 "/proc/sys/kernel/perf_event_mlock_kb,\n"
299 "or try again with a smaller value of -m/--mmap_pages.\n"
300 "(current value: %d)\n", opts->mmap_pages);
301 else if (!is_power_of_2(opts->mmap_pages))
302 die("--mmap_pages/-m value must be a power of two.");
303
304 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
305 }
306
307 if (rec->file_new)
308 session->evlist = evlist;
309 else {
310 if (!perf_evlist__equal(session->evlist, evlist)) {
311 fprintf(stderr, "incompatible append\n");
312 exit(-1);
313 }
314 }
315
316 perf_session__update_sample_type(session);
317}
318
319static int process_buildids(struct perf_record *rec)
320{
321 u64 size = lseek(rec->output, 0, SEEK_CUR);
322
323 if (size == 0)
324 return 0;
325
326 rec->session->fd = rec->output;
327 return __perf_session__process_events(rec->session, rec->post_processing_offset,
328 size - rec->post_processing_offset,
329 size, &build_id__mark_dso_hit_ops);
330}
331
332static void perf_record__exit(int status __used, void *arg)
333{
334 struct perf_record *rec = arg;
335
336 if (!rec->opts.pipe_output) {
337 rec->session->header.data_size += rec->bytes_written;
338
339 if (!rec->no_buildid)
340 process_buildids(rec);
341 perf_session__write_header(rec->session, rec->evlist,
342 rec->output, true);
343 perf_session__delete(rec->session);
344 perf_evlist__delete(rec->evlist);
345 symbol__exit();
346 }
347}
348
349static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
350{
351 int err;
352 struct perf_tool *tool = data;
353
354 if (machine__is_host(machine))
355 return;
356
357 /*
358 *As for guest kernel when processing subcommand record&report,
359 *we arrange module mmap prior to guest kernel mmap and trigger
360 *a preload dso because default guest module symbols are loaded
361 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
362 *method is used to avoid symbol missing when the first addr is
363 *in module instead of in guest kernel.
364 */
365 err = perf_event__synthesize_modules(tool, process_synthesized_event,
366 machine);
367 if (err < 0)
368 pr_err("Couldn't record guest kernel [%d]'s reference"
369 " relocation symbol.\n", machine->pid);
370
371 /*
372 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
373 * have no _text sometimes.
374 */
375 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
376 machine, "_text");
377 if (err < 0)
378 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
379 machine, "_stext");
380 if (err < 0)
381 pr_err("Couldn't record guest kernel [%d]'s reference"
382 " relocation symbol.\n", machine->pid);
383}
384
385static struct perf_event_header finished_round_event = {
386 .size = sizeof(struct perf_event_header),
387 .type = PERF_RECORD_FINISHED_ROUND,
388};
389
390static void perf_record__mmap_read_all(struct perf_record *rec)
391{
392 int i;
393
394 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
395 if (rec->evlist->mmap[i].base)
396 perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
397 }
398
399 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
400 write_output(rec, &finished_round_event, sizeof(finished_round_event));
401}
402
403static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
404{
405 struct stat st;
406 int flags;
407 int err, output, feat;
408 unsigned long waking = 0;
409 const bool forks = argc > 0;
410 struct machine *machine;
411 struct perf_tool *tool = &rec->tool;
412 struct perf_record_opts *opts = &rec->opts;
413 struct perf_evlist *evsel_list = rec->evlist;
414 const char *output_name = rec->output_name;
415 struct perf_session *session;
416
417 rec->progname = argv[0];
418
419 rec->page_size = sysconf(_SC_PAGE_SIZE);
420
421 on_exit(perf_record__sig_exit, rec);
422 signal(SIGCHLD, sig_handler);
423 signal(SIGINT, sig_handler);
424 signal(SIGUSR1, sig_handler);
425
426 if (!output_name) {
427 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
428 opts->pipe_output = true;
429 else
430 rec->output_name = output_name = "perf.data";
431 }
432 if (output_name) {
433 if (!strcmp(output_name, "-"))
434 opts->pipe_output = true;
435 else if (!stat(output_name, &st) && st.st_size) {
436 if (rec->write_mode == WRITE_FORCE) {
437 char oldname[PATH_MAX];
438 snprintf(oldname, sizeof(oldname), "%s.old",
439 output_name);
440 unlink(oldname);
441 rename(output_name, oldname);
442 }
443 } else if (rec->write_mode == WRITE_APPEND) {
444 rec->write_mode = WRITE_FORCE;
445 }
446 }
447
448 flags = O_CREAT|O_RDWR;
449 if (rec->write_mode == WRITE_APPEND)
450 rec->file_new = 0;
451 else
452 flags |= O_TRUNC;
453
454 if (opts->pipe_output)
455 output = STDOUT_FILENO;
456 else
457 output = open(output_name, flags, S_IRUSR | S_IWUSR);
458 if (output < 0) {
459 perror("failed to create output file");
460 exit(-1);
461 }
462
463 rec->output = output;
464
465 session = perf_session__new(output_name, O_WRONLY,
466 rec->write_mode == WRITE_FORCE, false, NULL);
467 if (session == NULL) {
468 pr_err("Not enough memory for reading perf file header\n");
469 return -1;
470 }
471
472 rec->session = session;
473
474 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
475 perf_header__set_feat(&session->header, feat);
476
477 if (rec->no_buildid)
478 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
479
480 if (!have_tracepoints(&evsel_list->entries))
481 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
482
483 if (!rec->opts.branch_stack)
484 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
485
486 if (!rec->file_new) {
487 err = perf_session__read_header(session, output);
488 if (err < 0)
489 goto out_delete_session;
490 }
491
492 if (forks) {
493 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
494 if (err < 0) {
495 pr_err("Couldn't run the workload!\n");
496 goto out_delete_session;
497 }
498 }
499
500 perf_record__open(rec);
501
502 /*
503 * perf_session__delete(session) will be called at perf_record__exit()
504 */
505 on_exit(perf_record__exit, rec);
506
507 if (opts->pipe_output) {
508 err = perf_header__write_pipe(output);
509 if (err < 0)
510 return err;
511 } else if (rec->file_new) {
512 err = perf_session__write_header(session, evsel_list,
513 output, false);
514 if (err < 0)
515 return err;
516 }
517
518 if (!rec->no_buildid
519 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
520 pr_err("Couldn't generate buildids. "
521 "Use --no-buildid to profile anyway.\n");
522 return -1;
523 }
524
525 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
526
527 machine = perf_session__find_host_machine(session);
528 if (!machine) {
529 pr_err("Couldn't find native kernel information.\n");
530 return -1;
531 }
532
533 if (opts->pipe_output) {
534 err = perf_event__synthesize_attrs(tool, session,
535 process_synthesized_event);
536 if (err < 0) {
537 pr_err("Couldn't synthesize attrs.\n");
538 return err;
539 }
540
541 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
542 machine);
543 if (err < 0) {
544 pr_err("Couldn't synthesize event_types.\n");
545 return err;
546 }
547
548 if (have_tracepoints(&evsel_list->entries)) {
549 /*
550 * FIXME err <= 0 here actually means that
551 * there were no tracepoints so its not really
552 * an error, just that we don't need to
553 * synthesize anything. We really have to
554 * return this more properly and also
555 * propagate errors that now are calling die()
556 */
557 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
558 process_synthesized_event);
559 if (err <= 0) {
560 pr_err("Couldn't record tracing data.\n");
561 return err;
562 }
563 advance_output(rec, err);
564 }
565 }
566
567 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
568 machine, "_text");
569 if (err < 0)
570 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
571 machine, "_stext");
572 if (err < 0)
573 pr_err("Couldn't record kernel reference relocation symbol\n"
574 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
575 "Check /proc/kallsyms permission or run as root.\n");
576
577 err = perf_event__synthesize_modules(tool, process_synthesized_event,
578 machine);
579 if (err < 0)
580 pr_err("Couldn't record kernel module information.\n"
581 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
582 "Check /proc/modules permission or run as root.\n");
583
584 if (perf_guest)
585 perf_session__process_machines(session, tool,
586 perf_event__synthesize_guest_os);
587
588 if (!opts->target.system_wide)
589 perf_event__synthesize_thread_map(tool, evsel_list->threads,
590 process_synthesized_event,
591 machine);
592 else
593 perf_event__synthesize_threads(tool, process_synthesized_event,
594 machine);
595
596 if (rec->realtime_prio) {
597 struct sched_param param;
598
599 param.sched_priority = rec->realtime_prio;
600 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
601 pr_err("Could not set realtime priority.\n");
602 exit(-1);
603 }
604 }
605
606 perf_evlist__enable(evsel_list);
607
608 /*
609 * Let the child rip
610 */
611 if (forks)
612 perf_evlist__start_workload(evsel_list);
613
614 for (;;) {
615 int hits = rec->samples;
616
617 perf_record__mmap_read_all(rec);
618
619 if (hits == rec->samples) {
620 if (done)
621 break;
622 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
623 waking++;
624 }
625
626 if (done)
627 perf_evlist__disable(evsel_list);
628 }
629
630 if (quiet || signr == SIGUSR1)
631 return 0;
632
633 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
634
635 /*
636 * Approximate RIP event size: 24 bytes.
637 */
638 fprintf(stderr,
639 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
640 (double)rec->bytes_written / 1024.0 / 1024.0,
641 output_name,
642 rec->bytes_written / 24);
643
644 return 0;
645
646out_delete_session:
647 perf_session__delete(session);
648 return err;
649}
650
651#define BRANCH_OPT(n, m) \
652 { .name = n, .mode = (m) }
653
654#define BRANCH_END { .name = NULL }
655
656struct branch_mode {
657 const char *name;
658 int mode;
659};
660
661static const struct branch_mode branch_modes[] = {
662 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
663 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
664 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
665 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
666 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
667 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
668 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
669 BRANCH_END
670};
671
672static int
673parse_branch_stack(const struct option *opt, const char *str, int unset)
674{
675#define ONLY_PLM \
676 (PERF_SAMPLE_BRANCH_USER |\
677 PERF_SAMPLE_BRANCH_KERNEL |\
678 PERF_SAMPLE_BRANCH_HV)
679
680 uint64_t *mode = (uint64_t *)opt->value;
681 const struct branch_mode *br;
682 char *s, *os = NULL, *p;
683 int ret = -1;
684
685 if (unset)
686 return 0;
687
688 /*
689 * cannot set it twice, -b + --branch-filter for instance
690 */
691 if (*mode)
692 return -1;
693
694 /* str may be NULL in case no arg is passed to -b */
695 if (str) {
696 /* because str is read-only */
697 s = os = strdup(str);
698 if (!s)
699 return -1;
700
701 for (;;) {
702 p = strchr(s, ',');
703 if (p)
704 *p = '\0';
705
706 for (br = branch_modes; br->name; br++) {
707 if (!strcasecmp(s, br->name))
708 break;
709 }
710 if (!br->name) {
711 ui__warning("unknown branch filter %s,"
712 " check man page\n", s);
713 goto error;
714 }
715
716 *mode |= br->mode;
717
718 if (!p)
719 break;
720
721 s = p + 1;
722 }
723 }
724 ret = 0;
725
726 /* default to any branch */
727 if ((*mode & ~ONLY_PLM) == 0) {
728 *mode = PERF_SAMPLE_BRANCH_ANY;
729 }
730error:
731 free(os);
732 return ret;
733}
734
735static const char * const record_usage[] = {
736 "perf record [<options>] [<command>]",
737 "perf record [<options>] -- <command> [<options>]",
738 NULL
739};
740
741/*
742 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
743 * because we need to have access to it in perf_record__exit, that is called
744 * after cmd_record() exits, but since record_options need to be accessible to
745 * builtin-script, leave it here.
746 *
747 * At least we don't ouch it in all the other functions here directly.
748 *
749 * Just say no to tons of global variables, sigh.
750 */
751static struct perf_record record = {
752 .opts = {
753 .mmap_pages = UINT_MAX,
754 .user_freq = UINT_MAX,
755 .user_interval = ULLONG_MAX,
756 .freq = 4000,
757 .target = {
758 .uses_mmap = true,
759 },
760 },
761 .write_mode = WRITE_FORCE,
762 .file_new = true,
763};
764
765/*
766 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
767 * with it and switch to use the library functions in perf_evlist that came
768 * from builtin-record.c, i.e. use perf_record_opts,
769 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
770 * using pipes, etc.
771 */
772const struct option record_options[] = {
773 OPT_CALLBACK('e', "event", &record.evlist, "event",
774 "event selector. use 'perf list' to list available events",
775 parse_events_option),
776 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
777 "event filter", parse_filter),
778 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
779 "record events on existing process id"),
780 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
781 "record events on existing thread id"),
782 OPT_INTEGER('r', "realtime", &record.realtime_prio,
783 "collect data with this RT SCHED_FIFO priority"),
784 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
785 "collect data without buffering"),
786 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
787 "collect raw sample records from all opened counters"),
788 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
789 "system-wide collection from all CPUs"),
790 OPT_BOOLEAN('A', "append", &record.append_file,
791 "append to the output file to do incremental profiling"),
792 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
793 "list of cpus to monitor"),
794 OPT_BOOLEAN('f', "force", &record.force,
795 "overwrite existing data file (deprecated)"),
796 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
797 OPT_STRING('o', "output", &record.output_name, "file",
798 "output file name"),
799 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
800 "child tasks do not inherit counters"),
801 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
802 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
803 "number of mmap data pages"),
804 OPT_BOOLEAN(0, "group", &record.opts.group,
805 "put the counters into a counter group"),
806 OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
807 "do call-graph (stack chain/backtrace) recording"),
808 OPT_INCR('v', "verbose", &verbose,
809 "be more verbose (show counter open errors, etc)"),
810 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
811 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
812 "per thread counts"),
813 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
814 "Sample addresses"),
815 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
816 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
817 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
818 "don't sample"),
819 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
820 "do not update the buildid cache"),
821 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
822 "do not collect buildids in perf.data"),
823 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
824 "monitor event in cgroup name only",
825 parse_cgroups),
826 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
827 "user to profile"),
828
829 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
830 "branch any", "sample any taken branches",
831 parse_branch_stack),
832
833 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
834 "branch filter mask", "branch stack filter modes",
835 parse_branch_stack),
836 OPT_END()
837};
838
839int cmd_record(int argc, const char **argv, const char *prefix __used)
840{
841 int err = -ENOMEM;
842 struct perf_evsel *pos;
843 struct perf_evlist *evsel_list;
844 struct perf_record *rec = &record;
845 char errbuf[BUFSIZ];
846
847 perf_header__set_cmdline(argc, argv);
848
849 evsel_list = perf_evlist__new(NULL, NULL);
850 if (evsel_list == NULL)
851 return -ENOMEM;
852
853 rec->evlist = evsel_list;
854
855 argc = parse_options(argc, argv, record_options, record_usage,
856 PARSE_OPT_STOP_AT_NON_OPTION);
857 if (!argc && perf_target__none(&rec->opts.target))
858 usage_with_options(record_usage, record_options);
859
860 if (rec->force && rec->append_file) {
861 ui__error("Can't overwrite and append at the same time."
862 " You need to choose between -f and -A");
863 usage_with_options(record_usage, record_options);
864 } else if (rec->append_file) {
865 rec->write_mode = WRITE_APPEND;
866 } else {
867 rec->write_mode = WRITE_FORCE;
868 }
869
870 if (nr_cgroups && !rec->opts.target.system_wide) {
871 ui__error("cgroup monitoring only available in"
872 " system-wide mode\n");
873 usage_with_options(record_usage, record_options);
874 }
875
876 symbol__init();
877
878 if (symbol_conf.kptr_restrict)
879 pr_warning(
880"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
881"check /proc/sys/kernel/kptr_restrict.\n\n"
882"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
883"file is not found in the buildid cache or in the vmlinux path.\n\n"
884"Samples in kernel modules won't be resolved at all.\n\n"
885"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
886"even with a suitable vmlinux or kallsyms file.\n\n");
887
888 if (rec->no_buildid_cache || rec->no_buildid)
889 disable_buildid_cache();
890
891 if (evsel_list->nr_entries == 0 &&
892 perf_evlist__add_default(evsel_list) < 0) {
893 pr_err("Not enough memory for event selector list\n");
894 goto out_symbol_exit;
895 }
896
897 err = perf_target__validate(&rec->opts.target);
898 if (err) {
899 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
900 ui__warning("%s", errbuf);
901 }
902
903 err = perf_target__parse_uid(&rec->opts.target);
904 if (err) {
905 int saved_errno = errno;
906
907 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
908 ui__error("%s", errbuf);
909
910 err = -saved_errno;
911 goto out_free_fd;
912 }
913
914 err = -ENOMEM;
915 if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
916 usage_with_options(record_usage, record_options);
917
918 list_for_each_entry(pos, &evsel_list->entries, node) {
919 if (perf_header__push_event(pos->attr.config, event_name(pos)))
920 goto out_free_fd;
921 }
922
923 if (rec->opts.user_interval != ULLONG_MAX)
924 rec->opts.default_interval = rec->opts.user_interval;
925 if (rec->opts.user_freq != UINT_MAX)
926 rec->opts.freq = rec->opts.user_freq;
927
928 /*
929 * User specified count overrides default frequency.
930 */
931 if (rec->opts.default_interval)
932 rec->opts.freq = 0;
933 else if (rec->opts.freq) {
934 rec->opts.default_interval = rec->opts.freq;
935 } else {
936 ui__error("frequency and count are zero, aborting\n");
937 err = -EINVAL;
938 goto out_free_fd;
939 }
940
941 err = __cmd_record(&record, argc, argv);
942out_free_fd:
943 perf_evlist__delete_maps(evsel_list);
944out_symbol_exit:
945 symbol__exit();
946 return err;
947}