Linux Audio

Check our new training course

Loading...
v3.1
  1/*
  2 * builtin-record.c
  3 *
  4 * Builtin record command: Record the profile of a workload
  5 * (or a CPU, or a PID) into the perf.data output file - for
  6 * later analysis via perf report.
  7 */
  8#define _FILE_OFFSET_BITS 64
  9
 10#include "builtin.h"
 11
 12#include "perf.h"
 13
 14#include "util/build-id.h"
 15#include "util/util.h"
 16#include "util/parse-options.h"
 17#include "util/parse-events.h"
 18
 
 
 19#include "util/header.h"
 20#include "util/event.h"
 21#include "util/evlist.h"
 22#include "util/evsel.h"
 23#include "util/debug.h"
 24#include "util/session.h"
 
 25#include "util/symbol.h"
 26#include "util/cpumap.h"
 27#include "util/thread_map.h"
 
 
 
 
 
 
 
 
 28
 29#include <unistd.h>
 30#include <sched.h>
 31#include <sys/mman.h>
 32
 33enum write_mode_t {
 34	WRITE_FORCE,
 35	WRITE_APPEND
 36};
 37
 38static u64			user_interval			= ULLONG_MAX;
 39static u64			default_interval		=      0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 40
 41static unsigned int		page_size;
 42static unsigned int		mmap_pages			= UINT_MAX;
 43static unsigned int		user_freq 			= UINT_MAX;
 44static int			freq				=   1000;
 45static int			output;
 46static int			pipe_output			=      0;
 47static const char		*output_name			= NULL;
 48static bool			group				=  false;
 49static int			realtime_prio			=      0;
 50static bool			nodelay				=  false;
 51static bool			raw_samples			=  false;
 52static bool			sample_id_all_avail		=   true;
 53static bool			system_wide			=  false;
 54static pid_t			target_pid			=     -1;
 55static pid_t			target_tid			=     -1;
 56static pid_t			child_pid			=     -1;
 57static bool			no_inherit			=  false;
 58static enum write_mode_t	write_mode			= WRITE_FORCE;
 59static bool			call_graph			=  false;
 60static bool			inherit_stat			=  false;
 61static bool			no_samples			=  false;
 62static bool			sample_address			=  false;
 63static bool			sample_time			=  false;
 64static bool			no_buildid			=  false;
 65static bool			no_buildid_cache		=  false;
 66static struct perf_evlist	*evsel_list;
 67
 68static long			samples				=      0;
 69static u64			bytes_written			=      0;
 70
 71static int			file_new			=      1;
 72static off_t			post_processing_offset;
 73
 74static struct perf_session	*session;
 75static const char		*cpu_list;
 76
 77static void advance_output(size_t size)
 78{
 79	bytes_written += size;
 80}
 81
 82static void write_output(void *buf, size_t size)
 83{
 84	while (size) {
 85		int ret = write(output, buf, size);
 86
 87		if (ret < 0)
 88			die("failed to write");
 89
 90		size -= ret;
 91		buf += ret;
 92
 93		bytes_written += ret;
 94	}
 95}
 96
 97static int process_synthesized_event(union perf_event *event,
 98				     struct perf_sample *sample __used,
 99				     struct perf_session *self __used)
100{
101	write_output(event, event->header.size);
 
 
 
 
 
102	return 0;
103}
104
105static void mmap_read(struct perf_mmap *md)
 
 
 
106{
107	unsigned int head = perf_mmap__read_head(md);
108	unsigned int old = md->prev;
 
 
 
 
 
 
 
109	unsigned char *data = md->base + page_size;
110	unsigned long size;
111	void *buf;
 
112
113	if (old == head)
114		return;
115
116	samples++;
117
118	size = head - old;
119
120	if ((old & md->mask) + size != (head & md->mask)) {
121		buf = &data[old & md->mask];
122		size = md->mask + 1 - (old & md->mask);
123		old += size;
124
125		write_output(buf, size);
 
 
 
126	}
127
128	buf = &data[old & md->mask];
129	size = head - old;
130	old += size;
131
132	write_output(buf, size);
 
 
 
133
134	md->prev = old;
135	perf_mmap__write_tail(md, old);
 
 
136}
137
138static volatile int done = 0;
139static volatile int signr = -1;
 
 
 
 
140
141static void sig_handler(int sig)
142{
 
 
 
 
 
143	done = 1;
144	signr = sig;
145}
146
147static void sig_atexit(void)
148{
149	if (child_pid > 0)
150		kill(child_pid, SIGTERM);
151
152	if (signr == -1 || signr == SIGUSR1)
153		return;
154
155	signal(signr, SIG_DFL);
156	kill(getpid(), signr);
157}
158
159static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
160{
161	struct perf_event_attr *attr = &evsel->attr;
162	int track = !evsel->idx; /* only the first counter needs these */
163
164	attr->disabled		= 1;
165	attr->inherit		= !no_inherit;
166	attr->read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
167				  PERF_FORMAT_TOTAL_TIME_RUNNING |
168				  PERF_FORMAT_ID;
169
170	attr->sample_type	|= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
172	if (evlist->nr_entries > 1)
173		attr->sample_type |= PERF_SAMPLE_ID;
 
 
 
 
 
 
 
 
174
175	/*
176	 * We default some events to a 1 default interval. But keep
177	 * it a weak assumption overridable by the user.
178	 */
179	if (!attr->sample_period || (user_freq != UINT_MAX &&
180				     user_interval != ULLONG_MAX)) {
181		if (freq) {
182			attr->sample_type	|= PERF_SAMPLE_PERIOD;
183			attr->freq		= 1;
184			attr->sample_freq	= freq;
185		} else {
186			attr->sample_period = default_interval;
187		}
188	}
189
190	if (no_samples)
191		attr->sample_freq = 0;
 
 
192
193	if (inherit_stat)
194		attr->inherit_stat = 1;
 
 
195
196	if (sample_address) {
197		attr->sample_type	|= PERF_SAMPLE_ADDR;
198		attr->mmap_data = track;
199	}
200
201	if (call_graph)
202		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
203
204	if (system_wide)
205		attr->sample_type	|= PERF_SAMPLE_CPU;
 
 
206
207	if (sample_id_all_avail &&
208	    (sample_time || system_wide || !no_inherit || cpu_list))
209		attr->sample_type	|= PERF_SAMPLE_TIME;
 
 
210
211	if (raw_samples) {
212		attr->sample_type	|= PERF_SAMPLE_TIME;
213		attr->sample_type	|= PERF_SAMPLE_RAW;
214		attr->sample_type	|= PERF_SAMPLE_CPU;
215	}
216
217	if (nodelay) {
218		attr->watermark = 0;
219		attr->wakeup_events = 1;
220	}
221
222	attr->mmap		= track;
223	attr->comm		= track;
 
 
224
225	if (target_pid == -1 && target_tid == -1 && !system_wide) {
226		attr->disabled = 1;
227		attr->enable_on_exec = 1;
 
 
 
 
 
 
 
 
228	}
 
 
229}
230
231static bool perf_evlist__equal(struct perf_evlist *evlist,
232			       struct perf_evlist *other)
233{
234	struct perf_evsel *pos, *pair;
 
 
 
 
 
 
 
 
235
236	if (evlist->nr_entries != other->nr_entries)
237		return false;
238
239	pair = list_entry(other->entries.next, struct perf_evsel, node);
 
 
 
 
 
240
241	list_for_each_entry(pos, &evlist->entries, node) {
242		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
243			return false;
244		pair = list_entry(pair->node.next, struct perf_evsel, node);
245	}
246
247	return true;
 
 
 
248}
249
250static void open_counters(struct perf_evlist *evlist)
 
 
251{
 
252	struct perf_evsel *pos;
 
 
 
 
253
254	if (evlist->cpus->map[0] < 0)
255		no_inherit = true;
256
257	list_for_each_entry(pos, &evlist->entries, node) {
258		struct perf_event_attr *attr = &pos->attr;
259		/*
260		 * Check if parse_single_tracepoint_event has already asked for
261		 * PERF_SAMPLE_TIME.
262		 *
263		 * XXX this is kludgy but short term fix for problems introduced by
264		 * eac23d1c that broke 'perf script' by having different sample_types
265		 * when using multiple tracepoint events when we use a perf binary
266		 * that tries to use sample_id_all on an older kernel.
267		 *
268		 * We need to move counter creation to perf_session, support
269		 * different sample_types, etc.
270		 */
271		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
272
273		config_attr(pos, evlist);
274retry_sample_id:
275		attr->sample_id_all = sample_id_all_avail ? 1 : 0;
276try_again:
277		if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
278			int err = errno;
279
280			if (err == EPERM || err == EACCES) {
281				ui__warning_paranoid();
282				exit(EXIT_FAILURE);
283			} else if (err ==  ENODEV && cpu_list) {
284				die("No such device - did you specify"
285					" an out-of-range profile CPU?\n");
286			} else if (err == EINVAL && sample_id_all_avail) {
287				/*
288				 * Old kernel, no attr->sample_id_type_all field
289				 */
290				sample_id_all_avail = false;
291				if (!sample_time && !raw_samples && !time_needed)
292					attr->sample_type &= ~PERF_SAMPLE_TIME;
293
294				goto retry_sample_id;
295			}
296
297			/*
298			 * If it's cycles then fall back to hrtimer
299			 * based cpu-clock-tick sw counter, which
300			 * is always available even if no PMU support:
301			 */
302			if (attr->type == PERF_TYPE_HARDWARE
303					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
304
305				if (verbose)
306					ui__warning("The cycles event is not supported, "
307						    "trying to fall back to cpu-clock-ticks\n");
308				attr->type = PERF_TYPE_SOFTWARE;
309				attr->config = PERF_COUNT_SW_CPU_CLOCK;
310				goto try_again;
311			}
312
313			if (err == ENOENT) {
314				ui__warning("The %s event is not supported.\n",
315					    event_name(pos));
316				exit(EXIT_FAILURE);
317			}
318
319			printf("\n");
320			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
321			      err, strerror(err));
322
323#if defined(__i386__) || defined(__x86_64__)
324			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
325				die("No hardware sampling interrupt available."
326				    " No APIC? If so then you can boot the kernel"
327				    " with the \"lapic\" boot parameter to"
328				    " force-enable it.\n");
329#endif
330
331			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
332		}
333	}
334
335	if (perf_evlist__set_filters(evlist)) {
336		error("failed to set filter with %d (%s)\n", errno,
337			strerror(errno));
338		exit(-1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339	}
340
341	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
342		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
 
 
 
343
344	if (file_new)
345		session->evlist = evlist;
346	else {
347		if (!perf_evlist__equal(session->evlist, evlist)) {
348			fprintf(stderr, "incompatible append\n");
349			exit(-1);
350		}
351 	}
 
352
353	perf_session__update_sample_type(session);
354}
355
356static int process_buildids(void)
357{
358	u64 size = lseek(output, 0, SEEK_CUR);
 
359
360	if (size == 0)
361		return 0;
362
363	session->fd = output;
364	return __perf_session__process_events(session, post_processing_offset,
365					      size - post_processing_offset,
366					      size, &build_id__mark_dso_hit_ops);
367}
368
369static void atexit_header(void)
370{
371	if (!pipe_output) {
372		session->header.data_size += bytes_written;
373
374		if (!no_buildid)
375			process_buildids();
376		perf_session__write_header(session, evsel_list, output, true);
377		perf_session__delete(session);
378		perf_evlist__delete(evsel_list);
379		symbol__exit();
380	}
 
381}
382
383static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
384{
385	int err;
386	struct perf_session *psession = data;
387
388	if (machine__is_host(machine))
389		return;
390
391	/*
392	 *As for guest kernel when processing subcommand record&report,
393	 *we arrange module mmap prior to guest kernel mmap and trigger
394	 *a preload dso because default guest module symbols are loaded
395	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
396	 *method is used to avoid symbol missing when the first addr is
397	 *in module instead of in guest kernel.
398	 */
399	err = perf_event__synthesize_modules(process_synthesized_event,
400					     psession, machine);
401	if (err < 0)
402		pr_err("Couldn't record guest kernel [%d]'s reference"
403		       " relocation symbol.\n", machine->pid);
404
405	/*
406	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
407	 * have no _text sometimes.
408	 */
409	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
410						 psession, machine, "_text");
411	if (err < 0)
412		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
413							 psession, machine,
414							 "_stext");
415	if (err < 0)
416		pr_err("Couldn't record guest kernel [%d]'s reference"
417		       " relocation symbol.\n", machine->pid);
418}
419
420static struct perf_event_header finished_round_event = {
421	.size = sizeof(struct perf_event_header),
422	.type = PERF_RECORD_FINISHED_ROUND,
423};
424
425static void mmap_read_all(void)
426{
 
427	int i;
 
428
429	for (i = 0; i < evsel_list->nr_mmaps; i++) {
430		if (evsel_list->mmap[i].base)
431			mmap_read(&evsel_list->mmap[i]);
432	}
433
434	if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
435		write_output(&finished_round_event, sizeof(finished_round_event));
436}
437
438static int __cmd_record(int argc, const char **argv)
439{
440	struct stat st;
441	int flags;
442	int err;
443	unsigned long waking = 0;
444	int child_ready_pipe[2], go_pipe[2];
445	const bool forks = argc > 0;
446	char buf;
447	struct machine *machine;
448
449	page_size = sysconf(_SC_PAGE_SIZE);
450
451	atexit(sig_atexit);
452	signal(SIGCHLD, sig_handler);
453	signal(SIGINT, sig_handler);
454	signal(SIGUSR1, sig_handler);
455
456	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
457		perror("failed to create pipes");
458		exit(-1);
459	}
460
461	if (!output_name) {
462		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
463			pipe_output = 1;
464		else
465			output_name = "perf.data";
466	}
467	if (output_name) {
468		if (!strcmp(output_name, "-"))
469			pipe_output = 1;
470		else if (!stat(output_name, &st) && st.st_size) {
471			if (write_mode == WRITE_FORCE) {
472				char oldname[PATH_MAX];
473				snprintf(oldname, sizeof(oldname), "%s.old",
474					 output_name);
475				unlink(oldname);
476				rename(output_name, oldname);
477			}
478		} else if (write_mode == WRITE_APPEND) {
479			write_mode = WRITE_FORCE;
480		}
481	}
482
483	flags = O_CREAT|O_RDWR;
484	if (write_mode == WRITE_APPEND)
485		file_new = 0;
486	else
487		flags |= O_TRUNC;
488
489	if (pipe_output)
490		output = STDOUT_FILENO;
491	else
492		output = open(output_name, flags, S_IRUSR | S_IWUSR);
493	if (output < 0) {
494		perror("failed to create output file");
495		exit(-1);
496	}
497
498	session = perf_session__new(output_name, O_WRONLY,
499				    write_mode == WRITE_FORCE, false, NULL);
500	if (session == NULL) {
501		pr_err("Not enough memory for reading perf file header\n");
502		return -1;
503	}
504
505	if (!no_buildid)
506		perf_header__set_feat(&session->header, HEADER_BUILD_ID);
 
507
508	if (!file_new) {
509		err = perf_session__read_header(session, output);
510		if (err < 0)
511			goto out_delete_session;
512	}
513
514	if (have_tracepoints(&evsel_list->entries))
515		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
516
517	/* 512 kiB: default amount of unprivileged mlocked memory */
518	if (mmap_pages == UINT_MAX)
519		mmap_pages = (512 * 1024) / page_size;
520
521	if (forks) {
522		child_pid = fork();
523		if (child_pid < 0) {
524			perror("failed to fork");
525			exit(-1);
526		}
527
528		if (!child_pid) {
529			if (pipe_output)
530				dup2(2, 1);
531			close(child_ready_pipe[0]);
532			close(go_pipe[1]);
533			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
534
535			/*
536			 * Do a dummy execvp to get the PLT entry resolved,
537			 * so we avoid the resolver overhead on the real
538			 * execvp call.
539			 */
540			execvp("", (char **)argv);
541
542			/*
543			 * Tell the parent we're ready to go
544			 */
545			close(child_ready_pipe[1]);
546
547			/*
548			 * Wait until the parent tells us to go.
549			 */
550			if (read(go_pipe[0], &buf, 1) == -1)
551				perror("unable to read pipe");
552
553			execvp(argv[0], (char **)argv);
 
554
555			perror(argv[0]);
556			kill(getppid(), SIGUSR1);
557			exit(-1);
558		}
559
560		if (!system_wide && target_tid == -1 && target_pid == -1)
561			evsel_list->threads->map[0] = child_pid;
562
563		close(child_ready_pipe[1]);
564		close(go_pipe[0]);
565		/*
566		 * wait for child to settle
567		 */
568		if (read(child_ready_pipe[0], &buf, 1) == -1) {
569			perror("unable to read pipe");
570			exit(-1);
571		}
572		close(child_ready_pipe[0]);
573	}
 
574
575	open_counters(evsel_list);
 
576
577	/*
578	 * perf_session__delete(session) will be called at atexit_header()
579	 */
580	atexit(atexit_header);
581
582	if (pipe_output) {
583		err = perf_header__write_pipe(output);
584		if (err < 0)
585			return err;
586	} else if (file_new) {
587		err = perf_session__write_header(session, evsel_list,
588						 output, false);
589		if (err < 0)
590			return err;
591	}
 
 
 
592
593	post_processing_offset = lseek(output, 0, SEEK_CUR);
594
595	if (pipe_output) {
596		err = perf_session__synthesize_attrs(session,
597						     process_synthesized_event);
 
 
 
 
 
 
 
 
 
 
598		if (err < 0) {
599			pr_err("Couldn't synthesize attrs.\n");
600			return err;
601		}
602
603		err = perf_event__synthesize_event_types(process_synthesized_event,
604							 session);
605		if (err < 0) {
606			pr_err("Couldn't synthesize event_types.\n");
607			return err;
608		}
609
610		if (have_tracepoints(&evsel_list->entries)) {
611			/*
612			 * FIXME err <= 0 here actually means that
613			 * there were no tracepoints so its not really
614			 * an error, just that we don't need to
615			 * synthesize anything.  We really have to
616			 * return this more properly and also
617			 * propagate errors that now are calling die()
618			 */
619			err = perf_event__synthesize_tracing_data(output, evsel_list,
620								  process_synthesized_event,
621								  session);
622			if (err <= 0) {
623				pr_err("Couldn't record tracing data.\n");
624				return err;
625			}
626			advance_output(err);
627		}
628	}
629
630	machine = perf_session__find_host_machine(session);
631	if (!machine) {
632		pr_err("Couldn't find native kernel information.\n");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
633		return -1;
634	}
635
636	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
637						 session, machine, "_text");
638	if (err < 0)
639		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
640							 session, machine, "_stext");
641	if (err < 0)
642		pr_err("Couldn't record kernel reference relocation symbol\n"
643		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
644		       "Check /proc/kallsyms permission or run as root.\n");
645
646	err = perf_event__synthesize_modules(process_synthesized_event,
647					     session, machine);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
648	if (err < 0)
649		pr_err("Couldn't record kernel module information.\n"
650		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
651		       "Check /proc/modules permission or run as root.\n");
652
653	if (perf_guest)
654		perf_session__process_machines(session,
655					       perf_event__synthesize_guest_os);
656
657	if (!system_wide)
658		perf_event__synthesize_thread_map(evsel_list->threads,
659						  process_synthesized_event,
660						  session);
661	else
662		perf_event__synthesize_threads(process_synthesized_event,
663					       session);
664
665	if (realtime_prio) {
666		struct sched_param param;
667
668		param.sched_priority = realtime_prio;
669		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
670			pr_err("Could not set realtime priority.\n");
671			exit(-1);
 
672		}
673	}
674
675	perf_evlist__enable(evsel_list);
 
 
 
 
 
 
676
677	/*
678	 * Let the child rip
679	 */
680	if (forks)
681		close(go_pipe[1]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
682
 
 
 
 
 
 
 
 
 
683	for (;;) {
684		int hits = samples;
 
 
 
 
 
 
685
686		mmap_read_all();
 
 
 
 
 
 
 
 
 
687
688		if (hits == samples) {
689			if (done)
690				break;
691			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
 
 
 
 
 
 
692			waking++;
 
 
 
693		}
694
695		if (done)
696			perf_evlist__disable(evsel_list);
 
 
 
 
 
 
 
 
697	}
 
698
699	if (quiet || signr == SIGUSR1)
700		return 0;
 
 
 
 
 
701
702	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
 
703
704	/*
705	 * Approximate RIP event size: 24 bytes.
706	 */
707	fprintf(stderr,
708		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
709		(double)bytes_written / 1024.0 / 1024.0,
710		output_name,
711		bytes_written / 24);
712
713	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714
715out_delete_session:
716	perf_session__delete(session);
717	return err;
718}
719
720static const char * const record_usage[] = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
721	"perf record [<options>] [<command>]",
722	"perf record [<options>] -- <command> [<options>]",
723	NULL
724};
 
725
726static bool force, append_file;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
727
728const struct option record_options[] = {
729	OPT_CALLBACK('e', "event", &evsel_list, "event",
 
 
 
 
 
 
 
 
 
 
730		     "event selector. use 'perf list' to list available events",
731		     parse_events_option),
732	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
733		     "event filter", parse_filter),
734	OPT_INTEGER('p', "pid", &target_pid,
 
 
 
735		    "record events on existing process id"),
736	OPT_INTEGER('t', "tid", &target_tid,
737		    "record events on existing thread id"),
738	OPT_INTEGER('r', "realtime", &realtime_prio,
739		    "collect data with this RT SCHED_FIFO priority"),
740	OPT_BOOLEAN('D', "no-delay", &nodelay,
741		    "collect data without buffering"),
742	OPT_BOOLEAN('R', "raw-samples", &raw_samples,
743		    "collect raw sample records from all opened counters"),
744	OPT_BOOLEAN('a', "all-cpus", &system_wide,
745			    "system-wide collection from all CPUs"),
746	OPT_BOOLEAN('A', "append", &append_file,
747			    "append to the output file to do incremental profiling"),
748	OPT_STRING('C', "cpu", &cpu_list, "cpu",
749		    "list of cpus to monitor"),
750	OPT_BOOLEAN('f', "force", &force,
751			"overwrite existing data file (deprecated)"),
752	OPT_U64('c', "count", &user_interval, "event period to sample"),
753	OPT_STRING('o', "output", &output_name, "file",
754		    "output file name"),
755	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
756		    "child tasks do not inherit counters"),
757	OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
758	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
759	OPT_BOOLEAN(0, "group", &group,
 
 
 
760		    "put the counters into a counter group"),
761	OPT_BOOLEAN('g', "call-graph", &call_graph,
762		    "do call-graph (stack chain/backtrace) recording"),
 
 
 
 
763	OPT_INCR('v', "verbose", &verbose,
764		    "be more verbose (show counter open errors, etc)"),
765	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
766	OPT_BOOLEAN('s', "stat", &inherit_stat,
767		    "per thread counts"),
768	OPT_BOOLEAN('d', "data", &sample_address,
769		    "Sample addresses"),
770	OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
771	OPT_BOOLEAN('n', "no-samples", &no_samples,
 
 
772		    "don't sample"),
773	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
774		    "do not update the buildid cache"),
775	OPT_BOOLEAN('B', "no-buildid", &no_buildid,
776		    "do not collect buildids in perf.data"),
777	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
 
 
778		     "monitor event in cgroup name only",
779		     parse_cgroups),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
780	OPT_END()
781};
782
783int cmd_record(int argc, const char **argv, const char *prefix __used)
 
 
784{
785	int err = -ENOMEM;
786	struct perf_evsel *pos;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
787
788	evsel_list = perf_evlist__new(NULL, NULL);
789	if (evsel_list == NULL)
790		return -ENOMEM;
791
 
 
792	argc = parse_options(argc, argv, record_options, record_usage,
793			    PARSE_OPT_STOP_AT_NON_OPTION);
794	if (!argc && target_pid == -1 && target_tid == -1 &&
795		!system_wide && !cpu_list)
796		usage_with_options(record_usage, record_options);
797
798	if (force && append_file) {
799		fprintf(stderr, "Can't overwrite and append at the same time."
800				" You need to choose between -f and -A");
801		usage_with_options(record_usage, record_options);
802	} else if (append_file) {
803		write_mode = WRITE_APPEND;
804	} else {
805		write_mode = WRITE_FORCE;
 
 
806	}
807
808	if (nr_cgroups && !system_wide) {
809		fprintf(stderr, "cgroup monitoring only available in"
810			" system-wide mode\n");
811		usage_with_options(record_usage, record_options);
812	}
813
814	symbol__init();
 
 
 
 
 
 
 
815
816	if (symbol_conf.kptr_restrict)
817		pr_warning(
818"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
819"check /proc/sys/kernel/kptr_restrict.\n\n"
820"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
821"file is not found in the buildid cache or in the vmlinux path.\n\n"
822"Samples in kernel modules won't be resolved at all.\n\n"
823"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
824"even with a suitable vmlinux or kallsyms file.\n\n");
825
826	if (no_buildid_cache || no_buildid)
827		disable_buildid_cache();
828
829	if (evsel_list->nr_entries == 0 &&
830	    perf_evlist__add_default(evsel_list) < 0) {
831		pr_err("Not enough memory for event selector list\n");
832		goto out_symbol_exit;
833	}
834
835	if (target_pid != -1)
836		target_tid = target_pid;
837
838	if (perf_evlist__create_maps(evsel_list, target_pid,
839				     target_tid, cpu_list) < 0)
840		usage_with_options(record_usage, record_options);
 
 
 
 
 
 
841
842	list_for_each_entry(pos, &evsel_list->entries, node) {
843		if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
844					 evsel_list->threads->nr) < 0)
845			goto out_free_fd;
846		if (perf_header__push_event(pos->attr.config, event_name(pos)))
847			goto out_free_fd;
848	}
849
850	if (perf_evlist__alloc_pollfd(evsel_list) < 0)
851		goto out_free_fd;
852
853	if (user_interval != ULLONG_MAX)
854		default_interval = user_interval;
855	if (user_freq != UINT_MAX)
856		freq = user_freq;
857
858	/*
859	 * User specified count overrides default frequency.
 
 
860	 */
861	if (default_interval)
862		freq = 0;
863	else if (freq) {
864		default_interval = freq;
865	} else {
866		fprintf(stderr, "frequency and count are zero, aborting\n");
867		err = -EINVAL;
868		goto out_free_fd;
869	}
870
871	err = __cmd_record(argc, argv);
872out_free_fd:
873	perf_evlist__delete_maps(evsel_list);
874out_symbol_exit:
 
875	symbol__exit();
 
876	return err;
 
 
 
 
 
 
 
 
 
877}
v4.6
   1/*
   2 * builtin-record.c
   3 *
   4 * Builtin record command: Record the profile of a workload
   5 * (or a CPU, or a PID) into the perf.data output file - for
   6 * later analysis via perf report.
   7 */
 
 
   8#include "builtin.h"
   9
  10#include "perf.h"
  11
  12#include "util/build-id.h"
  13#include "util/util.h"
  14#include <subcmd/parse-options.h>
  15#include "util/parse-events.h"
  16
  17#include "util/callchain.h"
  18#include "util/cgroup.h"
  19#include "util/header.h"
  20#include "util/event.h"
  21#include "util/evlist.h"
  22#include "util/evsel.h"
  23#include "util/debug.h"
  24#include "util/session.h"
  25#include "util/tool.h"
  26#include "util/symbol.h"
  27#include "util/cpumap.h"
  28#include "util/thread_map.h"
  29#include "util/data.h"
  30#include "util/perf_regs.h"
  31#include "util/auxtrace.h"
  32#include "util/parse-branch-options.h"
  33#include "util/parse-regs-options.h"
  34#include "util/llvm-utils.h"
  35#include "util/bpf-loader.h"
  36#include "asm/bug.h"
  37
  38#include <unistd.h>
  39#include <sched.h>
  40#include <sys/mman.h>
  41
 
 
 
 
  42
  43struct record {
  44	struct perf_tool	tool;
  45	struct record_opts	opts;
  46	u64			bytes_written;
  47	struct perf_data_file	file;
  48	struct auxtrace_record	*itr;
  49	struct perf_evlist	*evlist;
  50	struct perf_session	*session;
  51	const char		*progname;
  52	int			realtime_prio;
  53	bool			no_buildid;
  54	bool			no_buildid_set;
  55	bool			no_buildid_cache;
  56	bool			no_buildid_cache_set;
  57	bool			buildid_all;
  58	unsigned long long	samples;
  59};
  60
  61static int record__write(struct record *rec, void *bf, size_t size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  62{
  63	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
  64		pr_err("failed to write perf data, error: %m\n");
  65		return -1;
  66	}
  67
  68	rec->bytes_written += size;
  69	return 0;
  70}
  71
  72static int process_synthesized_event(struct perf_tool *tool,
  73				     union perf_event *event,
  74				     struct perf_sample *sample __maybe_unused,
  75				     struct machine *machine __maybe_unused)
  76{
  77	struct record *rec = container_of(tool, struct record, tool);
  78	return record__write(rec, event, event->header.size);
  79}
  80
  81static int record__mmap_read(struct record *rec, int idx)
  82{
  83	struct perf_mmap *md = &rec->evlist->mmap[idx];
  84	u64 head = perf_mmap__read_head(md);
  85	u64 old = md->prev;
  86	unsigned char *data = md->base + page_size;
  87	unsigned long size;
  88	void *buf;
  89	int rc = 0;
  90
  91	if (old == head)
  92		return 0;
  93
  94	rec->samples++;
  95
  96	size = head - old;
  97
  98	if ((old & md->mask) + size != (head & md->mask)) {
  99		buf = &data[old & md->mask];
 100		size = md->mask + 1 - (old & md->mask);
 101		old += size;
 102
 103		if (record__write(rec, buf, size) < 0) {
 104			rc = -1;
 105			goto out;
 106		}
 107	}
 108
 109	buf = &data[old & md->mask];
 110	size = head - old;
 111	old += size;
 112
 113	if (record__write(rec, buf, size) < 0) {
 114		rc = -1;
 115		goto out;
 116	}
 117
 118	md->prev = old;
 119	perf_evlist__mmap_consume(rec->evlist, idx);
 120out:
 121	return rc;
 122}
 123
 124static volatile int done;
 125static volatile int signr = -1;
 126static volatile int child_finished;
 127static volatile int auxtrace_snapshot_enabled;
 128static volatile int auxtrace_snapshot_err;
 129static volatile int auxtrace_record__snapshot_started;
 130
 131static void sig_handler(int sig)
 132{
 133	if (sig == SIGCHLD)
 134		child_finished = 1;
 135	else
 136		signr = sig;
 137
 138	done = 1;
 
 139}
 140
 141static void record__sig_exit(void)
 142{
 143	if (signr == -1)
 
 
 
 144		return;
 145
 146	signal(signr, SIG_DFL);
 147	raise(signr);
 148}
 149
 150#ifdef HAVE_AUXTRACE_SUPPORT
 
 
 
 
 
 
 
 
 
 151
 152static int record__process_auxtrace(struct perf_tool *tool,
 153				    union perf_event *event, void *data1,
 154				    size_t len1, void *data2, size_t len2)
 155{
 156	struct record *rec = container_of(tool, struct record, tool);
 157	struct perf_data_file *file = &rec->file;
 158	size_t padding;
 159	u8 pad[8] = {0};
 160
 161	if (!perf_data_file__is_pipe(file)) {
 162		off_t file_offset;
 163		int fd = perf_data_file__fd(file);
 164		int err;
 165
 166		file_offset = lseek(fd, 0, SEEK_CUR);
 167		if (file_offset == -1)
 168			return -1;
 169		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
 170						     event, file_offset);
 171		if (err)
 172			return err;
 173	}
 174
 175	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
 176	padding = (len1 + len2) & 7;
 177	if (padding)
 178		padding = 8 - padding;
 179
 180	record__write(rec, event, event->header.size);
 181	record__write(rec, data1, len1);
 182	if (len2)
 183		record__write(rec, data2, len2);
 184	record__write(rec, &pad, padding);
 185
 186	return 0;
 187}
 
 
 
 
 
 
 
 
 
 
 
 
 188
 189static int record__auxtrace_mmap_read(struct record *rec,
 190				      struct auxtrace_mmap *mm)
 191{
 192	int ret;
 193
 194	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
 195				  record__process_auxtrace);
 196	if (ret < 0)
 197		return ret;
 198
 199	if (ret)
 200		rec->samples++;
 
 
 201
 202	return 0;
 203}
 204
 205static int record__auxtrace_mmap_read_snapshot(struct record *rec,
 206					       struct auxtrace_mmap *mm)
 207{
 208	int ret;
 209
 210	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
 211					   record__process_auxtrace,
 212					   rec->opts.auxtrace_snapshot_size);
 213	if (ret < 0)
 214		return ret;
 215
 216	if (ret)
 217		rec->samples++;
 
 
 
 218
 219	return 0;
 220}
 
 
 221
 222static int record__auxtrace_read_snapshot_all(struct record *rec)
 223{
 224	int i;
 225	int rc = 0;
 226
 227	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
 228		struct auxtrace_mmap *mm =
 229				&rec->evlist->mmap[i].auxtrace_mmap;
 230
 231		if (!mm->base)
 232			continue;
 233
 234		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
 235			rc = -1;
 236			goto out;
 237		}
 238	}
 239out:
 240	return rc;
 241}
 242
 243static void record__read_auxtrace_snapshot(struct record *rec)
 
 244{
 245	pr_debug("Recording AUX area tracing snapshot\n");
 246	if (record__auxtrace_read_snapshot_all(rec) < 0) {
 247		auxtrace_snapshot_err = -1;
 248	} else {
 249		auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
 250		if (!auxtrace_snapshot_err)
 251			auxtrace_snapshot_enabled = 1;
 252	}
 253}
 254
 255#else
 
 256
 257static inline
 258int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
 259			       struct auxtrace_mmap *mm __maybe_unused)
 260{
 261	return 0;
 262}
 263
 264static inline
 265void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
 266{
 267}
 
 268
 269static inline
 270int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
 271{
 272	return 0;
 273}
 274
 275#endif
 276
 277static int record__open(struct record *rec)
 278{
 279	char msg[512];
 280	struct perf_evsel *pos;
 281	struct perf_evlist *evlist = rec->evlist;
 282	struct perf_session *session = rec->session;
 283	struct record_opts *opts = &rec->opts;
 284	int rc = 0;
 285
 286	perf_evlist__config(evlist, opts);
 
 287
 288	evlist__for_each(evlist, pos) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 289try_again:
 290		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
 291			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 292				if (verbose)
 293					ui__warning("%s\n", msg);
 
 
 
 294				goto try_again;
 295			}
 296
 297			rc = -errno;
 298			perf_evsel__open_strerror(pos, &opts->target,
 299						  errno, msg, sizeof(msg));
 300			ui__error("%s\n", msg);
 301			goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 302		}
 303	}
 304
 305	if (perf_evlist__apply_filters(evlist, &pos)) {
 306		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
 307			pos->filter, perf_evsel__name(pos), errno,
 308			strerror_r(errno, msg, sizeof(msg)));
 309		rc = -1;
 310		goto out;
 311	}
 312
 313	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
 314				 opts->auxtrace_mmap_pages,
 315				 opts->auxtrace_snapshot_mode) < 0) {
 316		if (errno == EPERM) {
 317			pr_err("Permission error mapping pages.\n"
 318			       "Consider increasing "
 319			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
 320			       "or try again with a smaller value of -m/--mmap_pages.\n"
 321			       "(current value: %u,%u)\n",
 322			       opts->mmap_pages, opts->auxtrace_mmap_pages);
 323			rc = -errno;
 324		} else {
 325			pr_err("failed to mmap with %d (%s)\n", errno,
 326				strerror_r(errno, msg, sizeof(msg)));
 327			if (errno)
 328				rc = -errno;
 329			else
 330				rc = -EINVAL;
 331		}
 332		goto out;
 333	}
 334
 335	session->evlist = evlist;
 336	perf_session__set_id_hdr_size(session);
 337out:
 338	return rc;
 339}
 340
 341static int process_sample_event(struct perf_tool *tool,
 342				union perf_event *event,
 343				struct perf_sample *sample,
 344				struct perf_evsel *evsel,
 345				struct machine *machine)
 346{
 347	struct record *rec = container_of(tool, struct record, tool);
 348
 349	rec->samples++;
 350
 351	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
 352}
 353
 354static int process_buildids(struct record *rec)
 355{
 356	struct perf_data_file *file  = &rec->file;
 357	struct perf_session *session = rec->session;
 358
 359	if (file->size == 0)
 360		return 0;
 361
 362	/*
 363	 * During this process, it'll load kernel map and replace the
 364	 * dso->long_name to a real pathname it found.  In this case
 365	 * we prefer the vmlinux path like
 366	 *   /lib/modules/3.16.4/build/vmlinux
 367	 *
 368	 * rather than build-id path (in debug directory).
 369	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
 370	 */
 371	symbol_conf.ignore_vmlinux_buildid = true;
 372
 373	/*
 374	 * If --buildid-all is given, it marks all DSO regardless of hits,
 375	 * so no need to process samples.
 376	 */
 377	if (rec->buildid_all)
 378		rec->tool.sample = NULL;
 379
 380	return perf_session__process_events(session);
 381}
 382
 383static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
 384{
 385	int err;
 386	struct perf_tool *tool = data;
 
 
 
 
 387	/*
 388	 *As for guest kernel when processing subcommand record&report,
 389	 *we arrange module mmap prior to guest kernel mmap and trigger
 390	 *a preload dso because default guest module symbols are loaded
 391	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
 392	 *method is used to avoid symbol missing when the first addr is
 393	 *in module instead of in guest kernel.
 394	 */
 395	err = perf_event__synthesize_modules(tool, process_synthesized_event,
 396					     machine);
 397	if (err < 0)
 398		pr_err("Couldn't record guest kernel [%d]'s reference"
 399		       " relocation symbol.\n", machine->pid);
 400
 401	/*
 402	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
 403	 * have no _text sometimes.
 404	 */
 405	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 406						 machine);
 
 
 
 
 407	if (err < 0)
 408		pr_err("Couldn't record guest kernel [%d]'s reference"
 409		       " relocation symbol.\n", machine->pid);
 410}
 411
 412static struct perf_event_header finished_round_event = {
 413	.size = sizeof(struct perf_event_header),
 414	.type = PERF_RECORD_FINISHED_ROUND,
 415};
 416
 417static int record__mmap_read_all(struct record *rec)
 418{
 419	u64 bytes_written = rec->bytes_written;
 420	int i;
 421	int rc = 0;
 422
 423	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
 424		struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 425
 426		if (rec->evlist->mmap[i].base) {
 427			if (record__mmap_read(rec, i) != 0) {
 428				rc = -1;
 429				goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 430			}
 
 
 431		}
 
 
 
 
 
 
 
 432
 433		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
 434		    record__auxtrace_mmap_read(rec, mm) != 0) {
 435			rc = -1;
 436			goto out;
 437		}
 
 
 438	}
 439
 440	/*
 441	 * Mark the round finished in case we wrote
 442	 * at least one event.
 443	 */
 444	if (bytes_written != rec->bytes_written)
 445		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
 446
 447out:
 448	return rc;
 449}
 450
 451static void record__init_features(struct record *rec)
 452{
 453	struct perf_session *session = rec->session;
 454	int feat;
 
 455
 456	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
 457		perf_header__set_feat(&session->header, feat);
 458
 459	if (rec->no_buildid)
 460		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 
 461
 462	if (!have_tracepoints(&rec->evlist->entries))
 463		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
 
 
 
 
 464
 465	if (!rec->opts.branch_stack)
 466		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
 
 
 
 
 467
 468	if (!rec->opts.full_auxtrace)
 469		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 
 
 
 
 470
 471	perf_header__clear_feat(&session->header, HEADER_STAT);
 472}
 
 
 473
 474static void
 475record__finish_output(struct record *rec)
 476{
 477	struct perf_data_file *file = &rec->file;
 478	int fd = perf_data_file__fd(file);
 479
 480	if (file->is_pipe)
 481		return;
 482
 483	rec->session->header.data_size += rec->bytes_written;
 484	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
 
 
 485
 486	if (!rec->no_buildid) {
 487		process_buildids(rec);
 488
 489		if (rec->buildid_all)
 490			dsos__hit_all(rec->session);
 
 
 
 
 
 
 
 
 491	}
 492	perf_session__write_header(rec->session, rec->evlist, fd, true);
 493
 494	return;
 495}
 496
 497static volatile int workload_exec_errno;
 
 
 
 498
 499/*
 500 * perf_evlist__prepare_workload will send a SIGUSR1
 501 * if the fork fails, since we asked by setting its
 502 * want_signal to true.
 503 */
 504static void workload_exec_failed_signal(int signo __maybe_unused,
 505					siginfo_t *info,
 506					void *ucontext __maybe_unused)
 507{
 508	workload_exec_errno = info->si_value.sival_int;
 509	done = 1;
 510	child_finished = 1;
 511}
 512
 513static void snapshot_sig_handler(int sig);
 514
 515static int record__synthesize(struct record *rec)
 516{
 517	struct perf_session *session = rec->session;
 518	struct machine *machine = &session->machines.host;
 519	struct perf_data_file *file = &rec->file;
 520	struct record_opts *opts = &rec->opts;
 521	struct perf_tool *tool = &rec->tool;
 522	int fd = perf_data_file__fd(file);
 523	int err = 0;
 524
 525	if (file->is_pipe) {
 526		err = perf_event__synthesize_attrs(tool, session,
 527						   process_synthesized_event);
 528		if (err < 0) {
 529			pr_err("Couldn't synthesize attrs.\n");
 530			goto out;
 531		}
 532
 533		if (have_tracepoints(&rec->evlist->entries)) {
 
 
 
 
 
 
 
 534			/*
 535			 * FIXME err <= 0 here actually means that
 536			 * there were no tracepoints so its not really
 537			 * an error, just that we don't need to
 538			 * synthesize anything.  We really have to
 539			 * return this more properly and also
 540			 * propagate errors that now are calling die()
 541			 */
 542			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
 543								  process_synthesized_event);
 
 544			if (err <= 0) {
 545				pr_err("Couldn't record tracing data.\n");
 546				goto out;
 547			}
 548			rec->bytes_written += err;
 549		}
 550	}
 551
 552	if (rec->opts.full_auxtrace) {
 553		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
 554					session, process_synthesized_event);
 555		if (err)
 556			goto out;
 557	}
 558
 559	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 560						 machine);
 561	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
 562			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 563			   "Check /proc/kallsyms permission or run as root.\n");
 564
 565	err = perf_event__synthesize_modules(tool, process_synthesized_event,
 566					     machine);
 567	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
 568			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 569			   "Check /proc/modules permission or run as root.\n");
 570
 571	if (perf_guest) {
 572		machines__process_guests(&session->machines,
 573					 perf_event__synthesize_guest_os, tool);
 574	}
 575
 576	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
 577					    process_synthesized_event, opts->sample_address,
 578					    opts->proc_map_timeout);
 579out:
 580	return err;
 581}
 582
 583static int __cmd_record(struct record *rec, int argc, const char **argv)
 584{
 585	int err;
 586	int status = 0;
 587	unsigned long waking = 0;
 588	const bool forks = argc > 0;
 589	struct machine *machine;
 590	struct perf_tool *tool = &rec->tool;
 591	struct record_opts *opts = &rec->opts;
 592	struct perf_data_file *file = &rec->file;
 593	struct perf_session *session;
 594	bool disabled = false, draining = false;
 595	int fd;
 596
 597	rec->progname = argv[0];
 598
 599	atexit(record__sig_exit);
 600	signal(SIGCHLD, sig_handler);
 601	signal(SIGINT, sig_handler);
 602	signal(SIGTERM, sig_handler);
 603	if (rec->opts.auxtrace_snapshot_mode)
 604		signal(SIGUSR2, snapshot_sig_handler);
 605	else
 606		signal(SIGUSR2, SIG_IGN);
 607
 608	session = perf_session__new(file, false, tool);
 609	if (session == NULL) {
 610		pr_err("Perf session creation failed.\n");
 611		return -1;
 612	}
 613
 614	fd = perf_data_file__fd(file);
 615	rec->session = session;
 616
 617	record__init_features(rec);
 
 
 
 
 
 618
 619	if (forks) {
 620		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
 621						    argv, file->is_pipe,
 622						    workload_exec_failed_signal);
 623		if (err < 0) {
 624			pr_err("Couldn't run the workload!\n");
 625			status = err;
 626			goto out_delete_session;
 627		}
 628	}
 629
 630	if (record__open(rec) != 0) {
 631		err = -1;
 632		goto out_child;
 633	}
 634
 635	err = bpf__apply_obj_config();
 636	if (err) {
 637		char errbuf[BUFSIZ];
 638
 639		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
 640		pr_err("ERROR: Apply config to BPF failed: %s\n",
 641			 errbuf);
 642		goto out_child;
 643	}
 644
 645	/*
 646	 * Normally perf_session__new would do this, but it doesn't have the
 647	 * evlist.
 648	 */
 649	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
 650		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
 651		rec->tool.ordered_events = false;
 652	}
 653
 654	if (!rec->evlist->nr_groups)
 655		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
 656
 657	if (file->is_pipe) {
 658		err = perf_header__write_pipe(fd);
 659		if (err < 0)
 660			goto out_child;
 661	} else {
 662		err = perf_session__write_header(session, rec->evlist, fd, false);
 663		if (err < 0)
 664			goto out_child;
 665	}
 666
 667	if (!rec->no_buildid
 668	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
 669		pr_err("Couldn't generate buildids. "
 670		       "Use --no-buildid to profile anyway.\n");
 671		err = -1;
 672		goto out_child;
 673	}
 674
 675	machine = &session->machines.host;
 676
 677	err = record__synthesize(rec);
 678	if (err < 0)
 679		goto out_child;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 680
 681	if (rec->realtime_prio) {
 682		struct sched_param param;
 683
 684		param.sched_priority = rec->realtime_prio;
 685		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
 686			pr_err("Could not set realtime priority.\n");
 687			err = -1;
 688			goto out_child;
 689		}
 690	}
 691
 692	/*
 693	 * When perf is starting the traced process, all the events
 694	 * (apart from group members) have enable_on_exec=1 set,
 695	 * so don't spoil it by prematurely enabling them.
 696	 */
 697	if (!target__none(&opts->target) && !opts->initial_delay)
 698		perf_evlist__enable(rec->evlist);
 699
 700	/*
 701	 * Let the child rip
 702	 */
 703	if (forks) {
 704		union perf_event *event;
 705
 706		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
 707		if (event == NULL) {
 708			err = -ENOMEM;
 709			goto out_child;
 710		}
 711
 712		/*
 713		 * Some H/W events are generated before COMM event
 714		 * which is emitted during exec(), so perf script
 715		 * cannot see a correct process name for those events.
 716		 * Synthesize COMM event to prevent it.
 717		 */
 718		perf_event__synthesize_comm(tool, event,
 719					    rec->evlist->workload.pid,
 720					    process_synthesized_event,
 721					    machine);
 722		free(event);
 723
 724		perf_evlist__start_workload(rec->evlist);
 725	}
 726
 727	if (opts->initial_delay) {
 728		usleep(opts->initial_delay * 1000);
 729		perf_evlist__enable(rec->evlist);
 730	}
 731
 732	auxtrace_snapshot_enabled = 1;
 733	for (;;) {
 734		unsigned long long hits = rec->samples;
 735
 736		if (record__mmap_read_all(rec) < 0) {
 737			auxtrace_snapshot_enabled = 0;
 738			err = -1;
 739			goto out_child;
 740		}
 741
 742		if (auxtrace_record__snapshot_started) {
 743			auxtrace_record__snapshot_started = 0;
 744			if (!auxtrace_snapshot_err)
 745				record__read_auxtrace_snapshot(rec);
 746			if (auxtrace_snapshot_err) {
 747				pr_err("AUX area tracing snapshot failed\n");
 748				err = -1;
 749				goto out_child;
 750			}
 751		}
 752
 753		if (hits == rec->samples) {
 754			if (done || draining)
 755				break;
 756			err = perf_evlist__poll(rec->evlist, -1);
 757			/*
 758			 * Propagate error, only if there's any. Ignore positive
 759			 * number of returned events and interrupt error.
 760			 */
 761			if (err > 0 || (err < 0 && errno == EINTR))
 762				err = 0;
 763			waking++;
 764
 765			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
 766				draining = true;
 767		}
 768
 769		/*
 770		 * When perf is starting the traced process, at the end events
 771		 * die with the process and we wait for that. Thus no need to
 772		 * disable events in this case.
 773		 */
 774		if (done && !disabled && !target__none(&opts->target)) {
 775			auxtrace_snapshot_enabled = 0;
 776			perf_evlist__disable(rec->evlist);
 777			disabled = true;
 778		}
 779	}
 780	auxtrace_snapshot_enabled = 0;
 781
 782	if (forks && workload_exec_errno) {
 783		char msg[STRERR_BUFSIZE];
 784		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
 785		pr_err("Workload failed: %s\n", emsg);
 786		err = -1;
 787		goto out_child;
 788	}
 789
 790	if (!quiet)
 791		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
 792
 793out_child:
 794	if (forks) {
 795		int exit_status;
 
 
 
 
 
 796
 797		if (!child_finished)
 798			kill(rec->evlist->workload.pid, SIGTERM);
 799
 800		wait(&exit_status);
 801
 802		if (err < 0)
 803			status = err;
 804		else if (WIFEXITED(exit_status))
 805			status = WEXITSTATUS(exit_status);
 806		else if (WIFSIGNALED(exit_status))
 807			signr = WTERMSIG(exit_status);
 808	} else
 809		status = err;
 810
 811	/* this will be recalculated during process_buildids() */
 812	rec->samples = 0;
 813
 814	if (!err)
 815		record__finish_output(rec);
 816
 817	if (!err && !quiet) {
 818		char samples[128];
 819
 820		if (rec->samples && !rec->opts.full_auxtrace)
 821			scnprintf(samples, sizeof(samples),
 822				  " (%" PRIu64 " samples)", rec->samples);
 823		else
 824			samples[0] = '\0';
 825
 826		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s ]\n",
 827			perf_data_file__size(file) / 1024.0 / 1024.0,
 828			file->path, samples);
 829	}
 830
 831out_delete_session:
 832	perf_session__delete(session);
 833	return status;
 834}
 835
 836static void callchain_debug(void)
 837{
 838	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
 839
 840	pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
 841
 842	if (callchain_param.record_mode == CALLCHAIN_DWARF)
 843		pr_debug("callchain: stack dump size %d\n",
 844			 callchain_param.dump_size);
 845}
 846
 847int record_parse_callchain_opt(const struct option *opt,
 848			       const char *arg,
 849			       int unset)
 850{
 851	int ret;
 852	struct record_opts *record = (struct record_opts *)opt->value;
 853
 854	record->callgraph_set = true;
 855	callchain_param.enabled = !unset;
 856
 857	/* --no-call-graph */
 858	if (unset) {
 859		callchain_param.record_mode = CALLCHAIN_NONE;
 860		pr_debug("callchain: disabled\n");
 861		return 0;
 862	}
 863
 864	ret = parse_callchain_record_opt(arg, &callchain_param);
 865	if (!ret) {
 866		/* Enable data address sampling for DWARF unwind. */
 867		if (callchain_param.record_mode == CALLCHAIN_DWARF)
 868			record->sample_address = true;
 869		callchain_debug();
 870	}
 871
 872	return ret;
 873}
 874
 875int record_callchain_opt(const struct option *opt,
 876			 const char *arg __maybe_unused,
 877			 int unset __maybe_unused)
 878{
 879	struct record_opts *record = (struct record_opts *)opt->value;
 880
 881	record->callgraph_set = true;
 882	callchain_param.enabled = true;
 883
 884	if (callchain_param.record_mode == CALLCHAIN_NONE)
 885		callchain_param.record_mode = CALLCHAIN_FP;
 886
 887	callchain_debug();
 888	return 0;
 889}
 890
 891static int perf_record_config(const char *var, const char *value, void *cb)
 892{
 893	struct record *rec = cb;
 894
 895	if (!strcmp(var, "record.build-id")) {
 896		if (!strcmp(value, "cache"))
 897			rec->no_buildid_cache = false;
 898		else if (!strcmp(value, "no-cache"))
 899			rec->no_buildid_cache = true;
 900		else if (!strcmp(value, "skip"))
 901			rec->no_buildid = true;
 902		else
 903			return -1;
 904		return 0;
 905	}
 906	if (!strcmp(var, "record.call-graph"))
 907		var = "call-graph.record-mode"; /* fall-through */
 908
 909	return perf_default_config(var, value, cb);
 910}
 911
 912struct clockid_map {
 913	const char *name;
 914	int clockid;
 915};
 916
 917#define CLOCKID_MAP(n, c)	\
 918	{ .name = n, .clockid = (c), }
 919
 920#define CLOCKID_END	{ .name = NULL, }
 921
 922
 923/*
 924 * Add the missing ones, we need to build on many distros...
 925 */
 926#ifndef CLOCK_MONOTONIC_RAW
 927#define CLOCK_MONOTONIC_RAW 4
 928#endif
 929#ifndef CLOCK_BOOTTIME
 930#define CLOCK_BOOTTIME 7
 931#endif
 932#ifndef CLOCK_TAI
 933#define CLOCK_TAI 11
 934#endif
 935
 936static const struct clockid_map clockids[] = {
 937	/* available for all events, NMI safe */
 938	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
 939	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
 940
 941	/* available for some events */
 942	CLOCKID_MAP("realtime", CLOCK_REALTIME),
 943	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
 944	CLOCKID_MAP("tai", CLOCK_TAI),
 945
 946	/* available for the lazy */
 947	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
 948	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
 949	CLOCKID_MAP("real", CLOCK_REALTIME),
 950	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
 951
 952	CLOCKID_END,
 953};
 954
 955static int parse_clockid(const struct option *opt, const char *str, int unset)
 956{
 957	struct record_opts *opts = (struct record_opts *)opt->value;
 958	const struct clockid_map *cm;
 959	const char *ostr = str;
 960
 961	if (unset) {
 962		opts->use_clockid = 0;
 963		return 0;
 964	}
 965
 966	/* no arg passed */
 967	if (!str)
 968		return 0;
 969
 970	/* no setting it twice */
 971	if (opts->use_clockid)
 972		return -1;
 973
 974	opts->use_clockid = true;
 975
 976	/* if its a number, we're done */
 977	if (sscanf(str, "%d", &opts->clockid) == 1)
 978		return 0;
 979
 980	/* allow a "CLOCK_" prefix to the name */
 981	if (!strncasecmp(str, "CLOCK_", 6))
 982		str += 6;
 983
 984	for (cm = clockids; cm->name; cm++) {
 985		if (!strcasecmp(str, cm->name)) {
 986			opts->clockid = cm->clockid;
 987			return 0;
 988		}
 989	}
 990
 991	opts->use_clockid = false;
 992	ui__warning("unknown clockid %s, check man page\n", ostr);
 993	return -1;
 994}
 995
 996static int record__parse_mmap_pages(const struct option *opt,
 997				    const char *str,
 998				    int unset __maybe_unused)
 999{
1000	struct record_opts *opts = opt->value;
1001	char *s, *p;
1002	unsigned int mmap_pages;
1003	int ret;
1004
1005	if (!str)
1006		return -EINVAL;
1007
1008	s = strdup(str);
1009	if (!s)
1010		return -ENOMEM;
1011
1012	p = strchr(s, ',');
1013	if (p)
1014		*p = '\0';
1015
1016	if (*s) {
1017		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1018		if (ret)
1019			goto out_free;
1020		opts->mmap_pages = mmap_pages;
1021	}
1022
1023	if (!p) {
1024		ret = 0;
1025		goto out_free;
1026	}
1027
1028	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1029	if (ret)
1030		goto out_free;
1031
1032	opts->auxtrace_mmap_pages = mmap_pages;
1033
1034out_free:
1035	free(s);
1036	return ret;
1037}
1038
1039static const char * const __record_usage[] = {
1040	"perf record [<options>] [<command>]",
1041	"perf record [<options>] -- <command> [<options>]",
1042	NULL
1043};
1044const char * const *record_usage = __record_usage;
1045
1046/*
1047 * XXX Ideally would be local to cmd_record() and passed to a record__new
1048 * because we need to have access to it in record__exit, that is called
1049 * after cmd_record() exits, but since record_options need to be accessible to
1050 * builtin-script, leave it here.
1051 *
1052 * At least we don't ouch it in all the other functions here directly.
1053 *
1054 * Just say no to tons of global variables, sigh.
1055 */
1056static struct record record = {
1057	.opts = {
1058		.sample_time	     = true,
1059		.mmap_pages	     = UINT_MAX,
1060		.user_freq	     = UINT_MAX,
1061		.user_interval	     = ULLONG_MAX,
1062		.freq		     = 4000,
1063		.target		     = {
1064			.uses_mmap   = true,
1065			.default_per_cpu = true,
1066		},
1067		.proc_map_timeout     = 500,
1068	},
1069	.tool = {
1070		.sample		= process_sample_event,
1071		.fork		= perf_event__process_fork,
1072		.exit		= perf_event__process_exit,
1073		.comm		= perf_event__process_comm,
1074		.mmap		= perf_event__process_mmap,
1075		.mmap2		= perf_event__process_mmap2,
1076		.ordered_events	= true,
1077	},
1078};
1079
1080const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1081	"\n\t\t\t\tDefault: fp";
1082
1083/*
1084 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1085 * with it and switch to use the library functions in perf_evlist that came
1086 * from builtin-record.c, i.e. use record_opts,
1087 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1088 * using pipes, etc.
1089 */
1090struct option __record_options[] = {
1091	OPT_CALLBACK('e', "event", &record.evlist, "event",
1092		     "event selector. use 'perf list' to list available events",
1093		     parse_events_option),
1094	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1095		     "event filter", parse_filter),
1096	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1097			   NULL, "don't record events from perf itself",
1098			   exclude_perf),
1099	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1100		    "record events on existing process id"),
1101	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1102		    "record events on existing thread id"),
1103	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1104		    "collect data with this RT SCHED_FIFO priority"),
1105	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1106		    "collect data without buffering"),
1107	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1108		    "collect raw sample records from all opened counters"),
1109	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1110			    "system-wide collection from all CPUs"),
1111	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
 
 
1112		    "list of cpus to monitor"),
1113	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1114	OPT_STRING('o', "output", &record.file.path, "file",
 
 
1115		    "output file name"),
1116	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1117			&record.opts.no_inherit_set,
1118			"child tasks do not inherit counters"),
1119	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1120	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1121		     "number of mmap data pages and AUX area tracing mmap pages",
1122		     record__parse_mmap_pages),
1123	OPT_BOOLEAN(0, "group", &record.opts.group,
1124		    "put the counters into a counter group"),
1125	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
1126			   NULL, "enables call-graph recording" ,
1127			   &record_callchain_opt),
1128	OPT_CALLBACK(0, "call-graph", &record.opts,
1129		     "record_mode[,record_size]", record_callchain_help,
1130		     &record_parse_callchain_opt),
1131	OPT_INCR('v', "verbose", &verbose,
1132		    "be more verbose (show counter open errors, etc)"),
1133	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1134	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1135		    "per thread counts"),
1136	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1137	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1138			&record.opts.sample_time_set,
1139			"Record the sample timestamps"),
1140	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1141	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1142		    "don't sample"),
1143	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1144			&record.no_buildid_cache_set,
1145			"do not update the buildid cache"),
1146	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1147			&record.no_buildid_set,
1148			"do not collect buildids in perf.data"),
1149	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1150		     "monitor event in cgroup name only",
1151		     parse_cgroups),
1152	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1153		  "ms to wait before starting measurement after program start"),
1154	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1155		   "user to profile"),
1156
1157	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1158		     "branch any", "sample any taken branches",
1159		     parse_branch_stack),
1160
1161	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1162		     "branch filter mask", "branch stack filter modes",
1163		     parse_branch_stack),
1164	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1165		    "sample by weight (on special events only)"),
1166	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1167		    "sample transaction flags (special events only)"),
1168	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1169		    "use per-thread mmaps"),
1170	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1171		    "sample selected machine registers on interrupt,"
1172		    " use -I ? to list register names", parse_regs),
1173	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1174		    "Record running/enabled time of read (:S) events"),
1175	OPT_CALLBACK('k', "clockid", &record.opts,
1176	"clockid", "clockid to use for events, see clock_gettime()",
1177	parse_clockid),
1178	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1179			  "opts", "AUX area tracing Snapshot Mode", ""),
1180	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1181			"per thread proc mmap processing timeout in ms"),
1182	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1183		    "Record context switch events"),
1184	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1185			 "Configure all used events to run in kernel space.",
1186			 PARSE_OPT_EXCLUSIVE),
1187	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1188			 "Configure all used events to run in user space.",
1189			 PARSE_OPT_EXCLUSIVE),
1190	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1191		   "clang binary to use for compiling BPF scriptlets"),
1192	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1193		   "options passed to clang when compiling BPF scriptlets"),
1194	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1195		   "file", "vmlinux pathname"),
1196	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1197		    "Record build-id of all DSOs regardless of hits"),
1198	OPT_END()
1199};
1200
1201struct option *record_options = __record_options;
1202
1203int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1204{
1205	int err;
1206	struct record *rec = &record;
1207	char errbuf[BUFSIZ];
1208
1209#ifndef HAVE_LIBBPF_SUPPORT
1210# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1211	set_nobuild('\0', "clang-path", true);
1212	set_nobuild('\0', "clang-opt", true);
1213# undef set_nobuild
1214#endif
1215
1216#ifndef HAVE_BPF_PROLOGUE
1217# if !defined (HAVE_DWARF_SUPPORT)
1218#  define REASON  "NO_DWARF=1"
1219# elif !defined (HAVE_LIBBPF_SUPPORT)
1220#  define REASON  "NO_LIBBPF=1"
1221# else
1222#  define REASON  "this architecture doesn't support BPF prologue"
1223# endif
1224# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1225	set_nobuild('\0', "vmlinux", true);
1226# undef set_nobuild
1227# undef REASON
1228#endif
1229
1230	rec->evlist = perf_evlist__new();
1231	if (rec->evlist == NULL)
1232		return -ENOMEM;
1233
1234	perf_config(perf_record_config, rec);
1235
1236	argc = parse_options(argc, argv, record_options, record_usage,
1237			    PARSE_OPT_STOP_AT_NON_OPTION);
1238	if (!argc && target__none(&rec->opts.target))
 
1239		usage_with_options(record_usage, record_options);
1240
1241	if (nr_cgroups && !rec->opts.target.system_wide) {
1242		usage_with_options_msg(record_usage, record_options,
1243			"cgroup monitoring only available in system-wide mode");
1244
1245	}
1246	if (rec->opts.record_switch_events &&
1247	    !perf_can_record_switch_events()) {
1248		ui__error("kernel does not support recording context switch events\n");
1249		parse_options_usage(record_usage, record_options, "switch-events", 0);
1250		return -EINVAL;
1251	}
1252
1253	if (!rec->itr) {
1254		rec->itr = auxtrace_record__init(rec->evlist, &err);
1255		if (err)
1256			return err;
1257	}
1258
1259	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1260					      rec->opts.auxtrace_snapshot_opts);
1261	if (err)
1262		return err;
1263
1264	err = -ENOMEM;
1265
1266	symbol__init(NULL);
1267
1268	if (symbol_conf.kptr_restrict)
1269		pr_warning(
1270"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1271"check /proc/sys/kernel/kptr_restrict.\n\n"
1272"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1273"file is not found in the buildid cache or in the vmlinux path.\n\n"
1274"Samples in kernel modules won't be resolved at all.\n\n"
1275"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1276"even with a suitable vmlinux or kallsyms file.\n\n");
1277
1278	if (rec->no_buildid_cache || rec->no_buildid)
1279		disable_buildid_cache();
1280
1281	if (rec->evlist->nr_entries == 0 &&
1282	    perf_evlist__add_default(rec->evlist) < 0) {
1283		pr_err("Not enough memory for event selector list\n");
1284		goto out_symbol_exit;
1285	}
1286
1287	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1288		rec->opts.no_inherit = true;
1289
1290	err = target__validate(&rec->opts.target);
1291	if (err) {
1292		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1293		ui__warning("%s", errbuf);
1294	}
1295
1296	err = target__parse_uid(&rec->opts.target);
1297	if (err) {
1298		int saved_errno = errno;
1299
1300		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1301		ui__error("%s", errbuf);
1302
1303		err = -saved_errno;
1304		goto out_symbol_exit;
 
1305	}
1306
1307	err = -ENOMEM;
1308	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1309		usage_with_options(record_usage, record_options);
1310
1311	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1312	if (err)
1313		goto out_symbol_exit;
1314
1315	/*
1316	 * We take all buildids when the file contains
1317	 * AUX area tracing data because we do not decode the
1318	 * trace because it would take too long.
1319	 */
1320	if (rec->opts.full_auxtrace)
1321		rec->buildid_all = true;
1322
1323	if (record_opts__config(&rec->opts)) {
 
 
1324		err = -EINVAL;
1325		goto out_symbol_exit;
1326	}
1327
1328	err = __cmd_record(&record, argc, argv);
 
 
1329out_symbol_exit:
1330	perf_evlist__delete(rec->evlist);
1331	symbol__exit();
1332	auxtrace_record__free(rec->itr);
1333	return err;
1334}
1335
1336static void snapshot_sig_handler(int sig __maybe_unused)
1337{
1338	if (!auxtrace_snapshot_enabled)
1339		return;
1340	auxtrace_snapshot_enabled = 0;
1341	auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
1342	auxtrace_record__snapshot_started = 1;
1343}