Loading...
1/*
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
7 */
8#define _FILE_OFFSET_BITS 64
9
10#include "builtin.h"
11
12#include "perf.h"
13
14#include "util/build-id.h"
15#include "util/util.h"
16#include "util/parse-options.h"
17#include "util/parse-events.h"
18
19#include "util/header.h"
20#include "util/event.h"
21#include "util/evlist.h"
22#include "util/evsel.h"
23#include "util/debug.h"
24#include "util/session.h"
25#include "util/symbol.h"
26#include "util/cpumap.h"
27#include "util/thread_map.h"
28
29#include <unistd.h>
30#include <sched.h>
31#include <sys/mman.h>
32
33enum write_mode_t {
34 WRITE_FORCE,
35 WRITE_APPEND
36};
37
38static u64 user_interval = ULLONG_MAX;
39static u64 default_interval = 0;
40
41static unsigned int page_size;
42static unsigned int mmap_pages = UINT_MAX;
43static unsigned int user_freq = UINT_MAX;
44static int freq = 1000;
45static int output;
46static int pipe_output = 0;
47static const char *output_name = NULL;
48static bool group = false;
49static int realtime_prio = 0;
50static bool nodelay = false;
51static bool raw_samples = false;
52static bool sample_id_all_avail = true;
53static bool system_wide = false;
54static pid_t target_pid = -1;
55static pid_t target_tid = -1;
56static pid_t child_pid = -1;
57static bool no_inherit = false;
58static enum write_mode_t write_mode = WRITE_FORCE;
59static bool call_graph = false;
60static bool inherit_stat = false;
61static bool no_samples = false;
62static bool sample_address = false;
63static bool sample_time = false;
64static bool no_buildid = false;
65static bool no_buildid_cache = false;
66static struct perf_evlist *evsel_list;
67
68static long samples = 0;
69static u64 bytes_written = 0;
70
71static int file_new = 1;
72static off_t post_processing_offset;
73
74static struct perf_session *session;
75static const char *cpu_list;
76
77static void advance_output(size_t size)
78{
79 bytes_written += size;
80}
81
82static void write_output(void *buf, size_t size)
83{
84 while (size) {
85 int ret = write(output, buf, size);
86
87 if (ret < 0)
88 die("failed to write");
89
90 size -= ret;
91 buf += ret;
92
93 bytes_written += ret;
94 }
95}
96
97static int process_synthesized_event(union perf_event *event,
98 struct perf_sample *sample __used,
99 struct perf_session *self __used)
100{
101 write_output(event, event->header.size);
102 return 0;
103}
104
105static void mmap_read(struct perf_mmap *md)
106{
107 unsigned int head = perf_mmap__read_head(md);
108 unsigned int old = md->prev;
109 unsigned char *data = md->base + page_size;
110 unsigned long size;
111 void *buf;
112
113 if (old == head)
114 return;
115
116 samples++;
117
118 size = head - old;
119
120 if ((old & md->mask) + size != (head & md->mask)) {
121 buf = &data[old & md->mask];
122 size = md->mask + 1 - (old & md->mask);
123 old += size;
124
125 write_output(buf, size);
126 }
127
128 buf = &data[old & md->mask];
129 size = head - old;
130 old += size;
131
132 write_output(buf, size);
133
134 md->prev = old;
135 perf_mmap__write_tail(md, old);
136}
137
138static volatile int done = 0;
139static volatile int signr = -1;
140
141static void sig_handler(int sig)
142{
143 done = 1;
144 signr = sig;
145}
146
147static void sig_atexit(void)
148{
149 if (child_pid > 0)
150 kill(child_pid, SIGTERM);
151
152 if (signr == -1 || signr == SIGUSR1)
153 return;
154
155 signal(signr, SIG_DFL);
156 kill(getpid(), signr);
157}
158
159static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
160{
161 struct perf_event_attr *attr = &evsel->attr;
162 int track = !evsel->idx; /* only the first counter needs these */
163
164 attr->disabled = 1;
165 attr->inherit = !no_inherit;
166 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
167 PERF_FORMAT_TOTAL_TIME_RUNNING |
168 PERF_FORMAT_ID;
169
170 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
171
172 if (evlist->nr_entries > 1)
173 attr->sample_type |= PERF_SAMPLE_ID;
174
175 /*
176 * We default some events to a 1 default interval. But keep
177 * it a weak assumption overridable by the user.
178 */
179 if (!attr->sample_period || (user_freq != UINT_MAX &&
180 user_interval != ULLONG_MAX)) {
181 if (freq) {
182 attr->sample_type |= PERF_SAMPLE_PERIOD;
183 attr->freq = 1;
184 attr->sample_freq = freq;
185 } else {
186 attr->sample_period = default_interval;
187 }
188 }
189
190 if (no_samples)
191 attr->sample_freq = 0;
192
193 if (inherit_stat)
194 attr->inherit_stat = 1;
195
196 if (sample_address) {
197 attr->sample_type |= PERF_SAMPLE_ADDR;
198 attr->mmap_data = track;
199 }
200
201 if (call_graph)
202 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
203
204 if (system_wide)
205 attr->sample_type |= PERF_SAMPLE_CPU;
206
207 if (sample_id_all_avail &&
208 (sample_time || system_wide || !no_inherit || cpu_list))
209 attr->sample_type |= PERF_SAMPLE_TIME;
210
211 if (raw_samples) {
212 attr->sample_type |= PERF_SAMPLE_TIME;
213 attr->sample_type |= PERF_SAMPLE_RAW;
214 attr->sample_type |= PERF_SAMPLE_CPU;
215 }
216
217 if (nodelay) {
218 attr->watermark = 0;
219 attr->wakeup_events = 1;
220 }
221
222 attr->mmap = track;
223 attr->comm = track;
224
225 if (target_pid == -1 && target_tid == -1 && !system_wide) {
226 attr->disabled = 1;
227 attr->enable_on_exec = 1;
228 }
229}
230
231static bool perf_evlist__equal(struct perf_evlist *evlist,
232 struct perf_evlist *other)
233{
234 struct perf_evsel *pos, *pair;
235
236 if (evlist->nr_entries != other->nr_entries)
237 return false;
238
239 pair = list_entry(other->entries.next, struct perf_evsel, node);
240
241 list_for_each_entry(pos, &evlist->entries, node) {
242 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
243 return false;
244 pair = list_entry(pair->node.next, struct perf_evsel, node);
245 }
246
247 return true;
248}
249
250static void open_counters(struct perf_evlist *evlist)
251{
252 struct perf_evsel *pos;
253
254 if (evlist->cpus->map[0] < 0)
255 no_inherit = true;
256
257 list_for_each_entry(pos, &evlist->entries, node) {
258 struct perf_event_attr *attr = &pos->attr;
259 /*
260 * Check if parse_single_tracepoint_event has already asked for
261 * PERF_SAMPLE_TIME.
262 *
263 * XXX this is kludgy but short term fix for problems introduced by
264 * eac23d1c that broke 'perf script' by having different sample_types
265 * when using multiple tracepoint events when we use a perf binary
266 * that tries to use sample_id_all on an older kernel.
267 *
268 * We need to move counter creation to perf_session, support
269 * different sample_types, etc.
270 */
271 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
272
273 config_attr(pos, evlist);
274retry_sample_id:
275 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
276try_again:
277 if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
278 int err = errno;
279
280 if (err == EPERM || err == EACCES) {
281 ui__warning_paranoid();
282 exit(EXIT_FAILURE);
283 } else if (err == ENODEV && cpu_list) {
284 die("No such device - did you specify"
285 " an out-of-range profile CPU?\n");
286 } else if (err == EINVAL && sample_id_all_avail) {
287 /*
288 * Old kernel, no attr->sample_id_type_all field
289 */
290 sample_id_all_avail = false;
291 if (!sample_time && !raw_samples && !time_needed)
292 attr->sample_type &= ~PERF_SAMPLE_TIME;
293
294 goto retry_sample_id;
295 }
296
297 /*
298 * If it's cycles then fall back to hrtimer
299 * based cpu-clock-tick sw counter, which
300 * is always available even if no PMU support:
301 */
302 if (attr->type == PERF_TYPE_HARDWARE
303 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
304
305 if (verbose)
306 ui__warning("The cycles event is not supported, "
307 "trying to fall back to cpu-clock-ticks\n");
308 attr->type = PERF_TYPE_SOFTWARE;
309 attr->config = PERF_COUNT_SW_CPU_CLOCK;
310 goto try_again;
311 }
312
313 if (err == ENOENT) {
314 ui__warning("The %s event is not supported.\n",
315 event_name(pos));
316 exit(EXIT_FAILURE);
317 }
318
319 printf("\n");
320 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
321 err, strerror(err));
322
323#if defined(__i386__) || defined(__x86_64__)
324 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
325 die("No hardware sampling interrupt available."
326 " No APIC? If so then you can boot the kernel"
327 " with the \"lapic\" boot parameter to"
328 " force-enable it.\n");
329#endif
330
331 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
332 }
333 }
334
335 if (perf_evlist__set_filters(evlist)) {
336 error("failed to set filter with %d (%s)\n", errno,
337 strerror(errno));
338 exit(-1);
339 }
340
341 if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
342 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
343
344 if (file_new)
345 session->evlist = evlist;
346 else {
347 if (!perf_evlist__equal(session->evlist, evlist)) {
348 fprintf(stderr, "incompatible append\n");
349 exit(-1);
350 }
351 }
352
353 perf_session__update_sample_type(session);
354}
355
356static int process_buildids(void)
357{
358 u64 size = lseek(output, 0, SEEK_CUR);
359
360 if (size == 0)
361 return 0;
362
363 session->fd = output;
364 return __perf_session__process_events(session, post_processing_offset,
365 size - post_processing_offset,
366 size, &build_id__mark_dso_hit_ops);
367}
368
369static void atexit_header(void)
370{
371 if (!pipe_output) {
372 session->header.data_size += bytes_written;
373
374 if (!no_buildid)
375 process_buildids();
376 perf_session__write_header(session, evsel_list, output, true);
377 perf_session__delete(session);
378 perf_evlist__delete(evsel_list);
379 symbol__exit();
380 }
381}
382
383static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
384{
385 int err;
386 struct perf_session *psession = data;
387
388 if (machine__is_host(machine))
389 return;
390
391 /*
392 *As for guest kernel when processing subcommand record&report,
393 *we arrange module mmap prior to guest kernel mmap and trigger
394 *a preload dso because default guest module symbols are loaded
395 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
396 *method is used to avoid symbol missing when the first addr is
397 *in module instead of in guest kernel.
398 */
399 err = perf_event__synthesize_modules(process_synthesized_event,
400 psession, machine);
401 if (err < 0)
402 pr_err("Couldn't record guest kernel [%d]'s reference"
403 " relocation symbol.\n", machine->pid);
404
405 /*
406 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
407 * have no _text sometimes.
408 */
409 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
410 psession, machine, "_text");
411 if (err < 0)
412 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
413 psession, machine,
414 "_stext");
415 if (err < 0)
416 pr_err("Couldn't record guest kernel [%d]'s reference"
417 " relocation symbol.\n", machine->pid);
418}
419
420static struct perf_event_header finished_round_event = {
421 .size = sizeof(struct perf_event_header),
422 .type = PERF_RECORD_FINISHED_ROUND,
423};
424
425static void mmap_read_all(void)
426{
427 int i;
428
429 for (i = 0; i < evsel_list->nr_mmaps; i++) {
430 if (evsel_list->mmap[i].base)
431 mmap_read(&evsel_list->mmap[i]);
432 }
433
434 if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
435 write_output(&finished_round_event, sizeof(finished_round_event));
436}
437
438static int __cmd_record(int argc, const char **argv)
439{
440 struct stat st;
441 int flags;
442 int err;
443 unsigned long waking = 0;
444 int child_ready_pipe[2], go_pipe[2];
445 const bool forks = argc > 0;
446 char buf;
447 struct machine *machine;
448
449 page_size = sysconf(_SC_PAGE_SIZE);
450
451 atexit(sig_atexit);
452 signal(SIGCHLD, sig_handler);
453 signal(SIGINT, sig_handler);
454 signal(SIGUSR1, sig_handler);
455
456 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
457 perror("failed to create pipes");
458 exit(-1);
459 }
460
461 if (!output_name) {
462 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
463 pipe_output = 1;
464 else
465 output_name = "perf.data";
466 }
467 if (output_name) {
468 if (!strcmp(output_name, "-"))
469 pipe_output = 1;
470 else if (!stat(output_name, &st) && st.st_size) {
471 if (write_mode == WRITE_FORCE) {
472 char oldname[PATH_MAX];
473 snprintf(oldname, sizeof(oldname), "%s.old",
474 output_name);
475 unlink(oldname);
476 rename(output_name, oldname);
477 }
478 } else if (write_mode == WRITE_APPEND) {
479 write_mode = WRITE_FORCE;
480 }
481 }
482
483 flags = O_CREAT|O_RDWR;
484 if (write_mode == WRITE_APPEND)
485 file_new = 0;
486 else
487 flags |= O_TRUNC;
488
489 if (pipe_output)
490 output = STDOUT_FILENO;
491 else
492 output = open(output_name, flags, S_IRUSR | S_IWUSR);
493 if (output < 0) {
494 perror("failed to create output file");
495 exit(-1);
496 }
497
498 session = perf_session__new(output_name, O_WRONLY,
499 write_mode == WRITE_FORCE, false, NULL);
500 if (session == NULL) {
501 pr_err("Not enough memory for reading perf file header\n");
502 return -1;
503 }
504
505 if (!no_buildid)
506 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
507
508 if (!file_new) {
509 err = perf_session__read_header(session, output);
510 if (err < 0)
511 goto out_delete_session;
512 }
513
514 if (have_tracepoints(&evsel_list->entries))
515 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
516
517 /* 512 kiB: default amount of unprivileged mlocked memory */
518 if (mmap_pages == UINT_MAX)
519 mmap_pages = (512 * 1024) / page_size;
520
521 if (forks) {
522 child_pid = fork();
523 if (child_pid < 0) {
524 perror("failed to fork");
525 exit(-1);
526 }
527
528 if (!child_pid) {
529 if (pipe_output)
530 dup2(2, 1);
531 close(child_ready_pipe[0]);
532 close(go_pipe[1]);
533 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
534
535 /*
536 * Do a dummy execvp to get the PLT entry resolved,
537 * so we avoid the resolver overhead on the real
538 * execvp call.
539 */
540 execvp("", (char **)argv);
541
542 /*
543 * Tell the parent we're ready to go
544 */
545 close(child_ready_pipe[1]);
546
547 /*
548 * Wait until the parent tells us to go.
549 */
550 if (read(go_pipe[0], &buf, 1) == -1)
551 perror("unable to read pipe");
552
553 execvp(argv[0], (char **)argv);
554
555 perror(argv[0]);
556 kill(getppid(), SIGUSR1);
557 exit(-1);
558 }
559
560 if (!system_wide && target_tid == -1 && target_pid == -1)
561 evsel_list->threads->map[0] = child_pid;
562
563 close(child_ready_pipe[1]);
564 close(go_pipe[0]);
565 /*
566 * wait for child to settle
567 */
568 if (read(child_ready_pipe[0], &buf, 1) == -1) {
569 perror("unable to read pipe");
570 exit(-1);
571 }
572 close(child_ready_pipe[0]);
573 }
574
575 open_counters(evsel_list);
576
577 /*
578 * perf_session__delete(session) will be called at atexit_header()
579 */
580 atexit(atexit_header);
581
582 if (pipe_output) {
583 err = perf_header__write_pipe(output);
584 if (err < 0)
585 return err;
586 } else if (file_new) {
587 err = perf_session__write_header(session, evsel_list,
588 output, false);
589 if (err < 0)
590 return err;
591 }
592
593 post_processing_offset = lseek(output, 0, SEEK_CUR);
594
595 if (pipe_output) {
596 err = perf_session__synthesize_attrs(session,
597 process_synthesized_event);
598 if (err < 0) {
599 pr_err("Couldn't synthesize attrs.\n");
600 return err;
601 }
602
603 err = perf_event__synthesize_event_types(process_synthesized_event,
604 session);
605 if (err < 0) {
606 pr_err("Couldn't synthesize event_types.\n");
607 return err;
608 }
609
610 if (have_tracepoints(&evsel_list->entries)) {
611 /*
612 * FIXME err <= 0 here actually means that
613 * there were no tracepoints so its not really
614 * an error, just that we don't need to
615 * synthesize anything. We really have to
616 * return this more properly and also
617 * propagate errors that now are calling die()
618 */
619 err = perf_event__synthesize_tracing_data(output, evsel_list,
620 process_synthesized_event,
621 session);
622 if (err <= 0) {
623 pr_err("Couldn't record tracing data.\n");
624 return err;
625 }
626 advance_output(err);
627 }
628 }
629
630 machine = perf_session__find_host_machine(session);
631 if (!machine) {
632 pr_err("Couldn't find native kernel information.\n");
633 return -1;
634 }
635
636 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
637 session, machine, "_text");
638 if (err < 0)
639 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
640 session, machine, "_stext");
641 if (err < 0)
642 pr_err("Couldn't record kernel reference relocation symbol\n"
643 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
644 "Check /proc/kallsyms permission or run as root.\n");
645
646 err = perf_event__synthesize_modules(process_synthesized_event,
647 session, machine);
648 if (err < 0)
649 pr_err("Couldn't record kernel module information.\n"
650 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
651 "Check /proc/modules permission or run as root.\n");
652
653 if (perf_guest)
654 perf_session__process_machines(session,
655 perf_event__synthesize_guest_os);
656
657 if (!system_wide)
658 perf_event__synthesize_thread_map(evsel_list->threads,
659 process_synthesized_event,
660 session);
661 else
662 perf_event__synthesize_threads(process_synthesized_event,
663 session);
664
665 if (realtime_prio) {
666 struct sched_param param;
667
668 param.sched_priority = realtime_prio;
669 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
670 pr_err("Could not set realtime priority.\n");
671 exit(-1);
672 }
673 }
674
675 perf_evlist__enable(evsel_list);
676
677 /*
678 * Let the child rip
679 */
680 if (forks)
681 close(go_pipe[1]);
682
683 for (;;) {
684 int hits = samples;
685
686 mmap_read_all();
687
688 if (hits == samples) {
689 if (done)
690 break;
691 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
692 waking++;
693 }
694
695 if (done)
696 perf_evlist__disable(evsel_list);
697 }
698
699 if (quiet || signr == SIGUSR1)
700 return 0;
701
702 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
703
704 /*
705 * Approximate RIP event size: 24 bytes.
706 */
707 fprintf(stderr,
708 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
709 (double)bytes_written / 1024.0 / 1024.0,
710 output_name,
711 bytes_written / 24);
712
713 return 0;
714
715out_delete_session:
716 perf_session__delete(session);
717 return err;
718}
719
720static const char * const record_usage[] = {
721 "perf record [<options>] [<command>]",
722 "perf record [<options>] -- <command> [<options>]",
723 NULL
724};
725
726static bool force, append_file;
727
728const struct option record_options[] = {
729 OPT_CALLBACK('e', "event", &evsel_list, "event",
730 "event selector. use 'perf list' to list available events",
731 parse_events_option),
732 OPT_CALLBACK(0, "filter", &evsel_list, "filter",
733 "event filter", parse_filter),
734 OPT_INTEGER('p', "pid", &target_pid,
735 "record events on existing process id"),
736 OPT_INTEGER('t', "tid", &target_tid,
737 "record events on existing thread id"),
738 OPT_INTEGER('r', "realtime", &realtime_prio,
739 "collect data with this RT SCHED_FIFO priority"),
740 OPT_BOOLEAN('D', "no-delay", &nodelay,
741 "collect data without buffering"),
742 OPT_BOOLEAN('R', "raw-samples", &raw_samples,
743 "collect raw sample records from all opened counters"),
744 OPT_BOOLEAN('a', "all-cpus", &system_wide,
745 "system-wide collection from all CPUs"),
746 OPT_BOOLEAN('A', "append", &append_file,
747 "append to the output file to do incremental profiling"),
748 OPT_STRING('C', "cpu", &cpu_list, "cpu",
749 "list of cpus to monitor"),
750 OPT_BOOLEAN('f', "force", &force,
751 "overwrite existing data file (deprecated)"),
752 OPT_U64('c', "count", &user_interval, "event period to sample"),
753 OPT_STRING('o', "output", &output_name, "file",
754 "output file name"),
755 OPT_BOOLEAN('i', "no-inherit", &no_inherit,
756 "child tasks do not inherit counters"),
757 OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
758 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
759 OPT_BOOLEAN(0, "group", &group,
760 "put the counters into a counter group"),
761 OPT_BOOLEAN('g', "call-graph", &call_graph,
762 "do call-graph (stack chain/backtrace) recording"),
763 OPT_INCR('v', "verbose", &verbose,
764 "be more verbose (show counter open errors, etc)"),
765 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
766 OPT_BOOLEAN('s', "stat", &inherit_stat,
767 "per thread counts"),
768 OPT_BOOLEAN('d', "data", &sample_address,
769 "Sample addresses"),
770 OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
771 OPT_BOOLEAN('n', "no-samples", &no_samples,
772 "don't sample"),
773 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
774 "do not update the buildid cache"),
775 OPT_BOOLEAN('B', "no-buildid", &no_buildid,
776 "do not collect buildids in perf.data"),
777 OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
778 "monitor event in cgroup name only",
779 parse_cgroups),
780 OPT_END()
781};
782
783int cmd_record(int argc, const char **argv, const char *prefix __used)
784{
785 int err = -ENOMEM;
786 struct perf_evsel *pos;
787
788 evsel_list = perf_evlist__new(NULL, NULL);
789 if (evsel_list == NULL)
790 return -ENOMEM;
791
792 argc = parse_options(argc, argv, record_options, record_usage,
793 PARSE_OPT_STOP_AT_NON_OPTION);
794 if (!argc && target_pid == -1 && target_tid == -1 &&
795 !system_wide && !cpu_list)
796 usage_with_options(record_usage, record_options);
797
798 if (force && append_file) {
799 fprintf(stderr, "Can't overwrite and append at the same time."
800 " You need to choose between -f and -A");
801 usage_with_options(record_usage, record_options);
802 } else if (append_file) {
803 write_mode = WRITE_APPEND;
804 } else {
805 write_mode = WRITE_FORCE;
806 }
807
808 if (nr_cgroups && !system_wide) {
809 fprintf(stderr, "cgroup monitoring only available in"
810 " system-wide mode\n");
811 usage_with_options(record_usage, record_options);
812 }
813
814 symbol__init();
815
816 if (symbol_conf.kptr_restrict)
817 pr_warning(
818"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
819"check /proc/sys/kernel/kptr_restrict.\n\n"
820"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
821"file is not found in the buildid cache or in the vmlinux path.\n\n"
822"Samples in kernel modules won't be resolved at all.\n\n"
823"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
824"even with a suitable vmlinux or kallsyms file.\n\n");
825
826 if (no_buildid_cache || no_buildid)
827 disable_buildid_cache();
828
829 if (evsel_list->nr_entries == 0 &&
830 perf_evlist__add_default(evsel_list) < 0) {
831 pr_err("Not enough memory for event selector list\n");
832 goto out_symbol_exit;
833 }
834
835 if (target_pid != -1)
836 target_tid = target_pid;
837
838 if (perf_evlist__create_maps(evsel_list, target_pid,
839 target_tid, cpu_list) < 0)
840 usage_with_options(record_usage, record_options);
841
842 list_for_each_entry(pos, &evsel_list->entries, node) {
843 if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
844 evsel_list->threads->nr) < 0)
845 goto out_free_fd;
846 if (perf_header__push_event(pos->attr.config, event_name(pos)))
847 goto out_free_fd;
848 }
849
850 if (perf_evlist__alloc_pollfd(evsel_list) < 0)
851 goto out_free_fd;
852
853 if (user_interval != ULLONG_MAX)
854 default_interval = user_interval;
855 if (user_freq != UINT_MAX)
856 freq = user_freq;
857
858 /*
859 * User specified count overrides default frequency.
860 */
861 if (default_interval)
862 freq = 0;
863 else if (freq) {
864 default_interval = freq;
865 } else {
866 fprintf(stderr, "frequency and count are zero, aborting\n");
867 err = -EINVAL;
868 goto out_free_fd;
869 }
870
871 err = __cmd_record(argc, argv);
872out_free_fd:
873 perf_evlist__delete_maps(evsel_list);
874out_symbol_exit:
875 symbol__exit();
876 return err;
877}
1/*
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
7 */
8#define _FILE_OFFSET_BITS 64
9
10#include "builtin.h"
11
12#include "perf.h"
13
14#include "util/build-id.h"
15#include "util/util.h"
16#include "util/parse-options.h"
17#include "util/parse-events.h"
18
19#include "util/header.h"
20#include "util/event.h"
21#include "util/evlist.h"
22#include "util/evsel.h"
23#include "util/debug.h"
24#include "util/session.h"
25#include "util/tool.h"
26#include "util/symbol.h"
27#include "util/cpumap.h"
28#include "util/thread_map.h"
29
30#include <unistd.h>
31#include <sched.h>
32#include <sys/mman.h>
33
34enum write_mode_t {
35 WRITE_FORCE,
36 WRITE_APPEND
37};
38
39struct perf_record {
40 struct perf_tool tool;
41 struct perf_record_opts opts;
42 u64 bytes_written;
43 const char *output_name;
44 struct perf_evlist *evlist;
45 struct perf_session *session;
46 const char *progname;
47 int output;
48 unsigned int page_size;
49 int realtime_prio;
50 enum write_mode_t write_mode;
51 bool no_buildid;
52 bool no_buildid_cache;
53 bool force;
54 bool file_new;
55 bool append_file;
56 long samples;
57 off_t post_processing_offset;
58};
59
60static void advance_output(struct perf_record *rec, size_t size)
61{
62 rec->bytes_written += size;
63}
64
65static void write_output(struct perf_record *rec, void *buf, size_t size)
66{
67 while (size) {
68 int ret = write(rec->output, buf, size);
69
70 if (ret < 0)
71 die("failed to write");
72
73 size -= ret;
74 buf += ret;
75
76 rec->bytes_written += ret;
77 }
78}
79
80static int process_synthesized_event(struct perf_tool *tool,
81 union perf_event *event,
82 struct perf_sample *sample __used,
83 struct machine *machine __used)
84{
85 struct perf_record *rec = container_of(tool, struct perf_record, tool);
86 write_output(rec, event, event->header.size);
87 return 0;
88}
89
90static void perf_record__mmap_read(struct perf_record *rec,
91 struct perf_mmap *md)
92{
93 unsigned int head = perf_mmap__read_head(md);
94 unsigned int old = md->prev;
95 unsigned char *data = md->base + rec->page_size;
96 unsigned long size;
97 void *buf;
98
99 if (old == head)
100 return;
101
102 rec->samples++;
103
104 size = head - old;
105
106 if ((old & md->mask) + size != (head & md->mask)) {
107 buf = &data[old & md->mask];
108 size = md->mask + 1 - (old & md->mask);
109 old += size;
110
111 write_output(rec, buf, size);
112 }
113
114 buf = &data[old & md->mask];
115 size = head - old;
116 old += size;
117
118 write_output(rec, buf, size);
119
120 md->prev = old;
121 perf_mmap__write_tail(md, old);
122}
123
124static volatile int done = 0;
125static volatile int signr = -1;
126static volatile int child_finished = 0;
127
128static void sig_handler(int sig)
129{
130 if (sig == SIGCHLD)
131 child_finished = 1;
132
133 done = 1;
134 signr = sig;
135}
136
137static void perf_record__sig_exit(int exit_status __used, void *arg)
138{
139 struct perf_record *rec = arg;
140 int status;
141
142 if (rec->evlist->workload.pid > 0) {
143 if (!child_finished)
144 kill(rec->evlist->workload.pid, SIGTERM);
145
146 wait(&status);
147 if (WIFSIGNALED(status))
148 psignal(WTERMSIG(status), rec->progname);
149 }
150
151 if (signr == -1 || signr == SIGUSR1)
152 return;
153
154 signal(signr, SIG_DFL);
155 kill(getpid(), signr);
156}
157
158static bool perf_evlist__equal(struct perf_evlist *evlist,
159 struct perf_evlist *other)
160{
161 struct perf_evsel *pos, *pair;
162
163 if (evlist->nr_entries != other->nr_entries)
164 return false;
165
166 pair = list_entry(other->entries.next, struct perf_evsel, node);
167
168 list_for_each_entry(pos, &evlist->entries, node) {
169 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
170 return false;
171 pair = list_entry(pair->node.next, struct perf_evsel, node);
172 }
173
174 return true;
175}
176
177static void perf_record__open(struct perf_record *rec)
178{
179 struct perf_evsel *pos, *first;
180 struct perf_evlist *evlist = rec->evlist;
181 struct perf_session *session = rec->session;
182 struct perf_record_opts *opts = &rec->opts;
183
184 first = list_entry(evlist->entries.next, struct perf_evsel, node);
185
186 perf_evlist__config_attrs(evlist, opts);
187
188 list_for_each_entry(pos, &evlist->entries, node) {
189 struct perf_event_attr *attr = &pos->attr;
190 struct xyarray *group_fd = NULL;
191 /*
192 * Check if parse_single_tracepoint_event has already asked for
193 * PERF_SAMPLE_TIME.
194 *
195 * XXX this is kludgy but short term fix for problems introduced by
196 * eac23d1c that broke 'perf script' by having different sample_types
197 * when using multiple tracepoint events when we use a perf binary
198 * that tries to use sample_id_all on an older kernel.
199 *
200 * We need to move counter creation to perf_session, support
201 * different sample_types, etc.
202 */
203 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
204
205 if (opts->group && pos != first)
206 group_fd = first->fd;
207fallback_missing_features:
208 if (opts->exclude_guest_missing)
209 attr->exclude_guest = attr->exclude_host = 0;
210retry_sample_id:
211 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
212try_again:
213 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
214 opts->group, group_fd) < 0) {
215 int err = errno;
216
217 if (err == EPERM || err == EACCES) {
218 ui__error_paranoid();
219 exit(EXIT_FAILURE);
220 } else if (err == ENODEV && opts->target.cpu_list) {
221 die("No such device - did you specify"
222 " an out-of-range profile CPU?\n");
223 } else if (err == EINVAL) {
224 if (!opts->exclude_guest_missing &&
225 (attr->exclude_guest || attr->exclude_host)) {
226 pr_debug("Old kernel, cannot exclude "
227 "guest or host samples.\n");
228 opts->exclude_guest_missing = true;
229 goto fallback_missing_features;
230 } else if (!opts->sample_id_all_missing) {
231 /*
232 * Old kernel, no attr->sample_id_type_all field
233 */
234 opts->sample_id_all_missing = true;
235 if (!opts->sample_time && !opts->raw_samples && !time_needed)
236 attr->sample_type &= ~PERF_SAMPLE_TIME;
237
238 goto retry_sample_id;
239 }
240 }
241
242 /*
243 * If it's cycles then fall back to hrtimer
244 * based cpu-clock-tick sw counter, which
245 * is always available even if no PMU support.
246 *
247 * PPC returns ENXIO until 2.6.37 (behavior changed
248 * with commit b0a873e).
249 */
250 if ((err == ENOENT || err == ENXIO)
251 && attr->type == PERF_TYPE_HARDWARE
252 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
253
254 if (verbose)
255 ui__warning("The cycles event is not supported, "
256 "trying to fall back to cpu-clock-ticks\n");
257 attr->type = PERF_TYPE_SOFTWARE;
258 attr->config = PERF_COUNT_SW_CPU_CLOCK;
259 if (pos->name) {
260 free(pos->name);
261 pos->name = NULL;
262 }
263 goto try_again;
264 }
265
266 if (err == ENOENT) {
267 ui__error("The %s event is not supported.\n",
268 event_name(pos));
269 exit(EXIT_FAILURE);
270 }
271
272 printf("\n");
273 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
274 err, strerror(err));
275
276#if defined(__i386__) || defined(__x86_64__)
277 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
278 die("No hardware sampling interrupt available."
279 " No APIC? If so then you can boot the kernel"
280 " with the \"lapic\" boot parameter to"
281 " force-enable it.\n");
282#endif
283
284 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
285 }
286 }
287
288 if (perf_evlist__set_filters(evlist)) {
289 error("failed to set filter with %d (%s)\n", errno,
290 strerror(errno));
291 exit(-1);
292 }
293
294 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
295 if (errno == EPERM)
296 die("Permission error mapping pages.\n"
297 "Consider increasing "
298 "/proc/sys/kernel/perf_event_mlock_kb,\n"
299 "or try again with a smaller value of -m/--mmap_pages.\n"
300 "(current value: %d)\n", opts->mmap_pages);
301 else if (!is_power_of_2(opts->mmap_pages))
302 die("--mmap_pages/-m value must be a power of two.");
303
304 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
305 }
306
307 if (rec->file_new)
308 session->evlist = evlist;
309 else {
310 if (!perf_evlist__equal(session->evlist, evlist)) {
311 fprintf(stderr, "incompatible append\n");
312 exit(-1);
313 }
314 }
315
316 perf_session__update_sample_type(session);
317}
318
319static int process_buildids(struct perf_record *rec)
320{
321 u64 size = lseek(rec->output, 0, SEEK_CUR);
322
323 if (size == 0)
324 return 0;
325
326 rec->session->fd = rec->output;
327 return __perf_session__process_events(rec->session, rec->post_processing_offset,
328 size - rec->post_processing_offset,
329 size, &build_id__mark_dso_hit_ops);
330}
331
332static void perf_record__exit(int status __used, void *arg)
333{
334 struct perf_record *rec = arg;
335
336 if (!rec->opts.pipe_output) {
337 rec->session->header.data_size += rec->bytes_written;
338
339 if (!rec->no_buildid)
340 process_buildids(rec);
341 perf_session__write_header(rec->session, rec->evlist,
342 rec->output, true);
343 perf_session__delete(rec->session);
344 perf_evlist__delete(rec->evlist);
345 symbol__exit();
346 }
347}
348
349static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
350{
351 int err;
352 struct perf_tool *tool = data;
353
354 if (machine__is_host(machine))
355 return;
356
357 /*
358 *As for guest kernel when processing subcommand record&report,
359 *we arrange module mmap prior to guest kernel mmap and trigger
360 *a preload dso because default guest module symbols are loaded
361 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
362 *method is used to avoid symbol missing when the first addr is
363 *in module instead of in guest kernel.
364 */
365 err = perf_event__synthesize_modules(tool, process_synthesized_event,
366 machine);
367 if (err < 0)
368 pr_err("Couldn't record guest kernel [%d]'s reference"
369 " relocation symbol.\n", machine->pid);
370
371 /*
372 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
373 * have no _text sometimes.
374 */
375 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
376 machine, "_text");
377 if (err < 0)
378 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
379 machine, "_stext");
380 if (err < 0)
381 pr_err("Couldn't record guest kernel [%d]'s reference"
382 " relocation symbol.\n", machine->pid);
383}
384
385static struct perf_event_header finished_round_event = {
386 .size = sizeof(struct perf_event_header),
387 .type = PERF_RECORD_FINISHED_ROUND,
388};
389
390static void perf_record__mmap_read_all(struct perf_record *rec)
391{
392 int i;
393
394 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
395 if (rec->evlist->mmap[i].base)
396 perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
397 }
398
399 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
400 write_output(rec, &finished_round_event, sizeof(finished_round_event));
401}
402
403static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
404{
405 struct stat st;
406 int flags;
407 int err, output, feat;
408 unsigned long waking = 0;
409 const bool forks = argc > 0;
410 struct machine *machine;
411 struct perf_tool *tool = &rec->tool;
412 struct perf_record_opts *opts = &rec->opts;
413 struct perf_evlist *evsel_list = rec->evlist;
414 const char *output_name = rec->output_name;
415 struct perf_session *session;
416
417 rec->progname = argv[0];
418
419 rec->page_size = sysconf(_SC_PAGE_SIZE);
420
421 on_exit(perf_record__sig_exit, rec);
422 signal(SIGCHLD, sig_handler);
423 signal(SIGINT, sig_handler);
424 signal(SIGUSR1, sig_handler);
425
426 if (!output_name) {
427 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
428 opts->pipe_output = true;
429 else
430 rec->output_name = output_name = "perf.data";
431 }
432 if (output_name) {
433 if (!strcmp(output_name, "-"))
434 opts->pipe_output = true;
435 else if (!stat(output_name, &st) && st.st_size) {
436 if (rec->write_mode == WRITE_FORCE) {
437 char oldname[PATH_MAX];
438 snprintf(oldname, sizeof(oldname), "%s.old",
439 output_name);
440 unlink(oldname);
441 rename(output_name, oldname);
442 }
443 } else if (rec->write_mode == WRITE_APPEND) {
444 rec->write_mode = WRITE_FORCE;
445 }
446 }
447
448 flags = O_CREAT|O_RDWR;
449 if (rec->write_mode == WRITE_APPEND)
450 rec->file_new = 0;
451 else
452 flags |= O_TRUNC;
453
454 if (opts->pipe_output)
455 output = STDOUT_FILENO;
456 else
457 output = open(output_name, flags, S_IRUSR | S_IWUSR);
458 if (output < 0) {
459 perror("failed to create output file");
460 exit(-1);
461 }
462
463 rec->output = output;
464
465 session = perf_session__new(output_name, O_WRONLY,
466 rec->write_mode == WRITE_FORCE, false, NULL);
467 if (session == NULL) {
468 pr_err("Not enough memory for reading perf file header\n");
469 return -1;
470 }
471
472 rec->session = session;
473
474 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
475 perf_header__set_feat(&session->header, feat);
476
477 if (rec->no_buildid)
478 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
479
480 if (!have_tracepoints(&evsel_list->entries))
481 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
482
483 if (!rec->opts.branch_stack)
484 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
485
486 if (!rec->file_new) {
487 err = perf_session__read_header(session, output);
488 if (err < 0)
489 goto out_delete_session;
490 }
491
492 if (forks) {
493 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
494 if (err < 0) {
495 pr_err("Couldn't run the workload!\n");
496 goto out_delete_session;
497 }
498 }
499
500 perf_record__open(rec);
501
502 /*
503 * perf_session__delete(session) will be called at perf_record__exit()
504 */
505 on_exit(perf_record__exit, rec);
506
507 if (opts->pipe_output) {
508 err = perf_header__write_pipe(output);
509 if (err < 0)
510 return err;
511 } else if (rec->file_new) {
512 err = perf_session__write_header(session, evsel_list,
513 output, false);
514 if (err < 0)
515 return err;
516 }
517
518 if (!rec->no_buildid
519 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
520 pr_err("Couldn't generate buildids. "
521 "Use --no-buildid to profile anyway.\n");
522 return -1;
523 }
524
525 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
526
527 machine = perf_session__find_host_machine(session);
528 if (!machine) {
529 pr_err("Couldn't find native kernel information.\n");
530 return -1;
531 }
532
533 if (opts->pipe_output) {
534 err = perf_event__synthesize_attrs(tool, session,
535 process_synthesized_event);
536 if (err < 0) {
537 pr_err("Couldn't synthesize attrs.\n");
538 return err;
539 }
540
541 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
542 machine);
543 if (err < 0) {
544 pr_err("Couldn't synthesize event_types.\n");
545 return err;
546 }
547
548 if (have_tracepoints(&evsel_list->entries)) {
549 /*
550 * FIXME err <= 0 here actually means that
551 * there were no tracepoints so its not really
552 * an error, just that we don't need to
553 * synthesize anything. We really have to
554 * return this more properly and also
555 * propagate errors that now are calling die()
556 */
557 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
558 process_synthesized_event);
559 if (err <= 0) {
560 pr_err("Couldn't record tracing data.\n");
561 return err;
562 }
563 advance_output(rec, err);
564 }
565 }
566
567 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
568 machine, "_text");
569 if (err < 0)
570 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
571 machine, "_stext");
572 if (err < 0)
573 pr_err("Couldn't record kernel reference relocation symbol\n"
574 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
575 "Check /proc/kallsyms permission or run as root.\n");
576
577 err = perf_event__synthesize_modules(tool, process_synthesized_event,
578 machine);
579 if (err < 0)
580 pr_err("Couldn't record kernel module information.\n"
581 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
582 "Check /proc/modules permission or run as root.\n");
583
584 if (perf_guest)
585 perf_session__process_machines(session, tool,
586 perf_event__synthesize_guest_os);
587
588 if (!opts->target.system_wide)
589 perf_event__synthesize_thread_map(tool, evsel_list->threads,
590 process_synthesized_event,
591 machine);
592 else
593 perf_event__synthesize_threads(tool, process_synthesized_event,
594 machine);
595
596 if (rec->realtime_prio) {
597 struct sched_param param;
598
599 param.sched_priority = rec->realtime_prio;
600 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
601 pr_err("Could not set realtime priority.\n");
602 exit(-1);
603 }
604 }
605
606 perf_evlist__enable(evsel_list);
607
608 /*
609 * Let the child rip
610 */
611 if (forks)
612 perf_evlist__start_workload(evsel_list);
613
614 for (;;) {
615 int hits = rec->samples;
616
617 perf_record__mmap_read_all(rec);
618
619 if (hits == rec->samples) {
620 if (done)
621 break;
622 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
623 waking++;
624 }
625
626 if (done)
627 perf_evlist__disable(evsel_list);
628 }
629
630 if (quiet || signr == SIGUSR1)
631 return 0;
632
633 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
634
635 /*
636 * Approximate RIP event size: 24 bytes.
637 */
638 fprintf(stderr,
639 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
640 (double)rec->bytes_written / 1024.0 / 1024.0,
641 output_name,
642 rec->bytes_written / 24);
643
644 return 0;
645
646out_delete_session:
647 perf_session__delete(session);
648 return err;
649}
650
651#define BRANCH_OPT(n, m) \
652 { .name = n, .mode = (m) }
653
654#define BRANCH_END { .name = NULL }
655
656struct branch_mode {
657 const char *name;
658 int mode;
659};
660
661static const struct branch_mode branch_modes[] = {
662 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
663 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
664 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
665 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
666 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
667 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
668 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
669 BRANCH_END
670};
671
672static int
673parse_branch_stack(const struct option *opt, const char *str, int unset)
674{
675#define ONLY_PLM \
676 (PERF_SAMPLE_BRANCH_USER |\
677 PERF_SAMPLE_BRANCH_KERNEL |\
678 PERF_SAMPLE_BRANCH_HV)
679
680 uint64_t *mode = (uint64_t *)opt->value;
681 const struct branch_mode *br;
682 char *s, *os = NULL, *p;
683 int ret = -1;
684
685 if (unset)
686 return 0;
687
688 /*
689 * cannot set it twice, -b + --branch-filter for instance
690 */
691 if (*mode)
692 return -1;
693
694 /* str may be NULL in case no arg is passed to -b */
695 if (str) {
696 /* because str is read-only */
697 s = os = strdup(str);
698 if (!s)
699 return -1;
700
701 for (;;) {
702 p = strchr(s, ',');
703 if (p)
704 *p = '\0';
705
706 for (br = branch_modes; br->name; br++) {
707 if (!strcasecmp(s, br->name))
708 break;
709 }
710 if (!br->name) {
711 ui__warning("unknown branch filter %s,"
712 " check man page\n", s);
713 goto error;
714 }
715
716 *mode |= br->mode;
717
718 if (!p)
719 break;
720
721 s = p + 1;
722 }
723 }
724 ret = 0;
725
726 /* default to any branch */
727 if ((*mode & ~ONLY_PLM) == 0) {
728 *mode = PERF_SAMPLE_BRANCH_ANY;
729 }
730error:
731 free(os);
732 return ret;
733}
734
735static const char * const record_usage[] = {
736 "perf record [<options>] [<command>]",
737 "perf record [<options>] -- <command> [<options>]",
738 NULL
739};
740
741/*
742 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
743 * because we need to have access to it in perf_record__exit, that is called
744 * after cmd_record() exits, but since record_options need to be accessible to
745 * builtin-script, leave it here.
746 *
747 * At least we don't ouch it in all the other functions here directly.
748 *
749 * Just say no to tons of global variables, sigh.
750 */
751static struct perf_record record = {
752 .opts = {
753 .mmap_pages = UINT_MAX,
754 .user_freq = UINT_MAX,
755 .user_interval = ULLONG_MAX,
756 .freq = 4000,
757 .target = {
758 .uses_mmap = true,
759 },
760 },
761 .write_mode = WRITE_FORCE,
762 .file_new = true,
763};
764
765/*
766 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
767 * with it and switch to use the library functions in perf_evlist that came
768 * from builtin-record.c, i.e. use perf_record_opts,
769 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
770 * using pipes, etc.
771 */
772const struct option record_options[] = {
773 OPT_CALLBACK('e', "event", &record.evlist, "event",
774 "event selector. use 'perf list' to list available events",
775 parse_events_option),
776 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
777 "event filter", parse_filter),
778 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
779 "record events on existing process id"),
780 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
781 "record events on existing thread id"),
782 OPT_INTEGER('r', "realtime", &record.realtime_prio,
783 "collect data with this RT SCHED_FIFO priority"),
784 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
785 "collect data without buffering"),
786 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
787 "collect raw sample records from all opened counters"),
788 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
789 "system-wide collection from all CPUs"),
790 OPT_BOOLEAN('A', "append", &record.append_file,
791 "append to the output file to do incremental profiling"),
792 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
793 "list of cpus to monitor"),
794 OPT_BOOLEAN('f', "force", &record.force,
795 "overwrite existing data file (deprecated)"),
796 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
797 OPT_STRING('o', "output", &record.output_name, "file",
798 "output file name"),
799 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
800 "child tasks do not inherit counters"),
801 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
802 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
803 "number of mmap data pages"),
804 OPT_BOOLEAN(0, "group", &record.opts.group,
805 "put the counters into a counter group"),
806 OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
807 "do call-graph (stack chain/backtrace) recording"),
808 OPT_INCR('v', "verbose", &verbose,
809 "be more verbose (show counter open errors, etc)"),
810 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
811 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
812 "per thread counts"),
813 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
814 "Sample addresses"),
815 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
816 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
817 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
818 "don't sample"),
819 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
820 "do not update the buildid cache"),
821 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
822 "do not collect buildids in perf.data"),
823 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
824 "monitor event in cgroup name only",
825 parse_cgroups),
826 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
827 "user to profile"),
828
829 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
830 "branch any", "sample any taken branches",
831 parse_branch_stack),
832
833 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
834 "branch filter mask", "branch stack filter modes",
835 parse_branch_stack),
836 OPT_END()
837};
838
839int cmd_record(int argc, const char **argv, const char *prefix __used)
840{
841 int err = -ENOMEM;
842 struct perf_evsel *pos;
843 struct perf_evlist *evsel_list;
844 struct perf_record *rec = &record;
845 char errbuf[BUFSIZ];
846
847 perf_header__set_cmdline(argc, argv);
848
849 evsel_list = perf_evlist__new(NULL, NULL);
850 if (evsel_list == NULL)
851 return -ENOMEM;
852
853 rec->evlist = evsel_list;
854
855 argc = parse_options(argc, argv, record_options, record_usage,
856 PARSE_OPT_STOP_AT_NON_OPTION);
857 if (!argc && perf_target__none(&rec->opts.target))
858 usage_with_options(record_usage, record_options);
859
860 if (rec->force && rec->append_file) {
861 ui__error("Can't overwrite and append at the same time."
862 " You need to choose between -f and -A");
863 usage_with_options(record_usage, record_options);
864 } else if (rec->append_file) {
865 rec->write_mode = WRITE_APPEND;
866 } else {
867 rec->write_mode = WRITE_FORCE;
868 }
869
870 if (nr_cgroups && !rec->opts.target.system_wide) {
871 ui__error("cgroup monitoring only available in"
872 " system-wide mode\n");
873 usage_with_options(record_usage, record_options);
874 }
875
876 symbol__init();
877
878 if (symbol_conf.kptr_restrict)
879 pr_warning(
880"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
881"check /proc/sys/kernel/kptr_restrict.\n\n"
882"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
883"file is not found in the buildid cache or in the vmlinux path.\n\n"
884"Samples in kernel modules won't be resolved at all.\n\n"
885"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
886"even with a suitable vmlinux or kallsyms file.\n\n");
887
888 if (rec->no_buildid_cache || rec->no_buildid)
889 disable_buildid_cache();
890
891 if (evsel_list->nr_entries == 0 &&
892 perf_evlist__add_default(evsel_list) < 0) {
893 pr_err("Not enough memory for event selector list\n");
894 goto out_symbol_exit;
895 }
896
897 err = perf_target__validate(&rec->opts.target);
898 if (err) {
899 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
900 ui__warning("%s", errbuf);
901 }
902
903 err = perf_target__parse_uid(&rec->opts.target);
904 if (err) {
905 int saved_errno = errno;
906
907 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
908 ui__error("%s", errbuf);
909
910 err = -saved_errno;
911 goto out_free_fd;
912 }
913
914 err = -ENOMEM;
915 if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
916 usage_with_options(record_usage, record_options);
917
918 list_for_each_entry(pos, &evsel_list->entries, node) {
919 if (perf_header__push_event(pos->attr.config, event_name(pos)))
920 goto out_free_fd;
921 }
922
923 if (rec->opts.user_interval != ULLONG_MAX)
924 rec->opts.default_interval = rec->opts.user_interval;
925 if (rec->opts.user_freq != UINT_MAX)
926 rec->opts.freq = rec->opts.user_freq;
927
928 /*
929 * User specified count overrides default frequency.
930 */
931 if (rec->opts.default_interval)
932 rec->opts.freq = 0;
933 else if (rec->opts.freq) {
934 rec->opts.default_interval = rec->opts.freq;
935 } else {
936 ui__error("frequency and count are zero, aborting\n");
937 err = -EINVAL;
938 goto out_free_fd;
939 }
940
941 err = __cmd_record(&record, argc, argv);
942out_free_fd:
943 perf_evlist__delete_maps(evsel_list);
944out_symbol_exit:
945 symbol__exit();
946 return err;
947}