Linux Audio

Check our new training course

Linux kernel drivers training

Mar 31-Apr 9, 2025, special US time zones
Register
Loading...
v5.4
   1/*
   2 * builtin-trace.c
   3 *
   4 * Builtin 'trace' command:
   5 *
   6 * Display a continuously updated trace of any workload, CPU, specific PID,
   7 * system wide, etc.  Default format is loosely strace like, but any other
   8 * event may be specified using --event.
   9 *
  10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
  11 *
  12 * Initially based on the 'trace' prototype by Thomas Gleixner:
  13 *
  14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
 
 
  15 */
  16
  17#include "util/record.h"
  18#include <traceevent/event-parse.h>
  19#include <api/fs/tracing_path.h>
  20#include <bpf/bpf.h>
  21#include "util/bpf_map.h"
  22#include "util/rlimit.h"
  23#include "builtin.h"
  24#include "util/cgroup.h"
  25#include "util/color.h"
  26#include "util/config.h"
  27#include "util/debug.h"
  28#include "util/dso.h"
  29#include "util/env.h"
  30#include "util/event.h"
  31#include "util/evsel.h"
  32#include "util/evsel_fprintf.h"
  33#include "util/synthetic-events.h"
  34#include "util/evlist.h"
  35#include "util/evswitch.h"
  36#include "util/mmap.h"
  37#include <subcmd/pager.h>
  38#include <subcmd/exec-cmd.h>
  39#include "util/machine.h"
  40#include "util/map.h"
  41#include "util/symbol.h"
  42#include "util/path.h"
  43#include "util/session.h"
  44#include "util/thread.h"
  45#include <subcmd/parse-options.h>
  46#include "util/strlist.h"
  47#include "util/intlist.h"
  48#include "util/thread_map.h"
  49#include "util/stat.h"
  50#include "util/tool.h"
  51#include "util/util.h"
  52#include "trace/beauty/beauty.h"
  53#include "trace-event.h"
  54#include "util/parse-events.h"
  55#include "util/bpf-loader.h"
  56#include "callchain.h"
  57#include "print_binary.h"
  58#include "string2.h"
  59#include "syscalltbl.h"
  60#include "rb_resort.h"
  61#include "../perf.h"
  62
  63#include <errno.h>
  64#include <inttypes.h>
  65#include <poll.h>
  66#include <signal.h>
  67#include <stdlib.h>
  68#include <string.h>
 
  69#include <linux/err.h>
  70#include <linux/filter.h>
  71#include <linux/kernel.h>
  72#include <linux/random.h>
  73#include <linux/stringify.h>
  74#include <linux/time64.h>
  75#include <linux/zalloc.h>
  76#include <fcntl.h>
  77#include <sys/sysmacros.h>
  78
  79#include <linux/ctype.h>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  80
  81#ifndef O_CLOEXEC
  82# define O_CLOEXEC		02000000
  83#endif
  84
  85#ifndef F_LINUX_SPECIFIC_BASE
  86# define F_LINUX_SPECIFIC_BASE	1024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  87#endif
  88
  89struct trace {
  90	struct perf_tool	tool;
  91	struct syscalltbl	*sctbl;
  92	struct {
  93		struct syscall  *table;
  94		struct bpf_map  *map;
  95		struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY
  96			struct bpf_map  *sys_enter,
  97					*sys_exit;
  98		}		prog_array;
  99		struct {
 100			struct evsel *sys_enter,
 101					  *sys_exit,
 102					  *augmented;
 103		}		events;
 104		struct bpf_program *unaugmented_prog;
 105	} syscalls;
 106	struct {
 107		struct bpf_map *map;
 108	} dump;
 109	struct record_opts	opts;
 110	struct evlist	*evlist;
 111	struct machine		*host;
 112	struct thread		*current;
 113	struct bpf_object	*bpf_obj;
 114	struct cgroup		*cgroup;
 115	u64			base_time;
 116	FILE			*output;
 117	unsigned long		nr_events;
 118	unsigned long		nr_events_printed;
 119	unsigned long		max_events;
 120	struct evswitch		evswitch;
 121	struct strlist		*ev_qualifier;
 122	struct {
 123		size_t		nr;
 124		int		*entries;
 125	}			ev_qualifier_ids;
 126	struct {
 127		size_t		nr;
 128		pid_t		*entries;
 129		struct bpf_map  *map;
 130	}			filter_pids;
 131	double			duration_filter;
 132	double			runtime_ms;
 133	struct {
 134		u64		vfs_getname,
 135				proc_getname;
 136	} stats;
 137	unsigned int		max_stack;
 138	unsigned int		min_stack;
 139	int			raw_augmented_syscalls_args_size;
 140	bool			raw_augmented_syscalls;
 141	bool			fd_path_disabled;
 142	bool			sort_events;
 143	bool			not_ev_qualifier;
 144	bool			live;
 145	bool			full_time;
 146	bool			sched;
 147	bool			multiple_threads;
 148	bool			summary;
 149	bool			summary_only;
 150	bool			failure_only;
 151	bool			show_comm;
 152	bool			print_sample;
 153	bool			show_tool_stats;
 154	bool			trace_syscalls;
 155	bool			kernel_syscallchains;
 156	s16			args_alignment;
 157	bool			show_tstamp;
 158	bool			show_duration;
 159	bool			show_zeros;
 160	bool			show_arg_names;
 161	bool			show_string_prefix;
 162	bool			force;
 163	bool			vfs_getname;
 164	int			trace_pgfaults;
 165	struct {
 166		struct ordered_events	data;
 167		u64			last;
 168	} oe;
 169};
 170
 171struct tp_field {
 172	int offset;
 173	union {
 174		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
 175		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
 176	};
 177};
 178
 179#define TP_UINT_FIELD(bits) \
 180static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
 181{ \
 182	u##bits value; \
 183	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 184	return value;  \
 185}
 186
 187TP_UINT_FIELD(8);
 188TP_UINT_FIELD(16);
 189TP_UINT_FIELD(32);
 190TP_UINT_FIELD(64);
 191
 192#define TP_UINT_FIELD__SWAPPED(bits) \
 193static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
 194{ \
 195	u##bits value; \
 196	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 197	return bswap_##bits(value);\
 198}
 199
 200TP_UINT_FIELD__SWAPPED(16);
 201TP_UINT_FIELD__SWAPPED(32);
 202TP_UINT_FIELD__SWAPPED(64);
 203
 204static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
 
 
 205{
 206	field->offset = offset;
 207
 208	switch (size) {
 209	case 1:
 210		field->integer = tp_field__u8;
 211		break;
 212	case 2:
 213		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
 214		break;
 215	case 4:
 216		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
 217		break;
 218	case 8:
 219		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
 220		break;
 221	default:
 222		return -1;
 223	}
 224
 225	return 0;
 226}
 227
 228static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap)
 229{
 230	return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
 231}
 232
 233static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
 234{
 235	return sample->raw_data + field->offset;
 236}
 237
 238static int __tp_field__init_ptr(struct tp_field *field, int offset)
 239{
 240	field->offset = offset;
 241	field->pointer = tp_field__ptr;
 242	return 0;
 243}
 244
 245static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field)
 246{
 247	return __tp_field__init_ptr(field, format_field->offset);
 248}
 249
 250struct syscall_tp {
 251	struct tp_field id;
 252	union {
 253		struct tp_field args, ret;
 254	};
 255};
 256
 257static int perf_evsel__init_tp_uint_field(struct evsel *evsel,
 258					  struct tp_field *field,
 259					  const char *name)
 260{
 261	struct tep_format_field *format_field = perf_evsel__field(evsel, name);
 262
 263	if (format_field == NULL)
 264		return -1;
 265
 266	return tp_field__init_uint(field, format_field, evsel->needs_swap);
 267}
 268
 269#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
 270	({ struct syscall_tp *sc = evsel->priv;\
 271	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
 272
 273static int perf_evsel__init_tp_ptr_field(struct evsel *evsel,
 274					 struct tp_field *field,
 275					 const char *name)
 276{
 277	struct tep_format_field *format_field = perf_evsel__field(evsel, name);
 278
 279	if (format_field == NULL)
 280		return -1;
 281
 282	return tp_field__init_ptr(field, format_field);
 283}
 284
 285#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
 286	({ struct syscall_tp *sc = evsel->priv;\
 287	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
 288
 289static void evsel__delete_priv(struct evsel *evsel)
 290{
 291	zfree(&evsel->priv);
 292	evsel__delete(evsel);
 293}
 294
 295static int perf_evsel__init_syscall_tp(struct evsel *evsel)
 296{
 297	struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
 298
 299	if (evsel->priv != NULL) {
 300		if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
 301		    perf_evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
 302			goto out_delete;
 303		return 0;
 304	}
 305
 306	return -ENOMEM;
 307out_delete:
 308	zfree(&evsel->priv);
 309	return -ENOENT;
 310}
 311
 312static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
 313{
 314	struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
 315
 316	if (evsel->priv != NULL) {
 317		struct tep_format_field *syscall_id = perf_evsel__field(tp, "id");
 318		if (syscall_id == NULL)
 319			syscall_id = perf_evsel__field(tp, "__syscall_nr");
 320		if (syscall_id == NULL)
 321			goto out_delete;
 322		if (__tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
 323			goto out_delete;
 324
 325		return 0;
 326	}
 327
 328	return -ENOMEM;
 329out_delete:
 330	zfree(&evsel->priv);
 331	return -EINVAL;
 332}
 333
 334static int perf_evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
 335{
 336	struct syscall_tp *sc = evsel->priv;
 337
 338	return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
 339}
 340
 341static int perf_evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
 342{
 343	struct syscall_tp *sc = evsel->priv;
 344
 345	return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
 346}
 347
 348static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
 349{
 350	evsel->priv = malloc(sizeof(struct syscall_tp));
 351	if (evsel->priv != NULL) {
 352		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
 353			goto out_delete;
 354
 355		evsel->handler = handler;
 356		return 0;
 357	}
 358
 359	return -ENOMEM;
 360
 361out_delete:
 362	zfree(&evsel->priv);
 363	return -ENOENT;
 364}
 365
 366static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
 367{
 368	struct evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
 369
 370	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
 371	if (IS_ERR(evsel))
 372		evsel = perf_evsel__newtp("syscalls", direction);
 373
 374	if (IS_ERR(evsel))
 375		return NULL;
 376
 377	if (perf_evsel__init_raw_syscall_tp(evsel, handler))
 378		goto out_delete;
 379
 380	return evsel;
 381
 382out_delete:
 383	evsel__delete_priv(evsel);
 384	return NULL;
 385}
 386
 387#define perf_evsel__sc_tp_uint(evsel, name, sample) \
 388	({ struct syscall_tp *fields = evsel->priv; \
 389	   fields->name.integer(&fields->name, sample); })
 390
 391#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
 392	({ struct syscall_tp *fields = evsel->priv; \
 393	   fields->name.pointer(&fields->name, sample); })
 394
 395size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
 396{
 397	int idx = val - sa->offset;
 
 
 
 
 
 398
 399	if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
 400		size_t printed = scnprintf(bf, size, intfmt, val);
 401		if (show_prefix)
 402			printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
 403		return printed;
 404	}
 
 
 
 
 405
 406	return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
 
 
 
 407}
 408
 409static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
 410						const char *intfmt,
 411					        struct syscall_arg *arg)
 412{
 413	return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->show_string_prefix, arg->val);
 
 
 
 
 
 
 414}
 415
 416static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
 417					      struct syscall_arg *arg)
 418{
 419	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
 420}
 421
 422#define SCA_STRARRAY syscall_arg__scnprintf_strarray
 423
 424size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg)
 
 
 
 
 
 
 425{
 426	return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val);
 427}
 428
 429size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
 430{
 431	size_t printed;
 432	int i;
 433
 434	for (i = 0; i < sas->nr_entries; ++i) {
 435		struct strarray *sa = sas->entries[i];
 436		int idx = val - sa->offset;
 437
 438		if (idx >= 0 && idx < sa->nr_entries) {
 439			if (sa->entries[idx] == NULL)
 440				break;
 441			return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
 442		}
 443	}
 444
 445	printed = scnprintf(bf, size, intfmt, val);
 446	if (show_prefix)
 447		printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sas->entries[0]->prefix);
 448	return printed;
 449}
 450
 451size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
 452					struct syscall_arg *arg)
 453{
 454	return strarrays__scnprintf(arg->parm, bf, size, "%d", arg->show_string_prefix, arg->val);
 455}
 456
 457#ifndef AT_FDCWD
 458#define AT_FDCWD	-100
 459#endif
 460
 461static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
 462					   struct syscall_arg *arg)
 463{
 464	int fd = arg->val;
 465	const char *prefix = "AT_FD";
 466
 467	if (fd == AT_FDCWD)
 468		return scnprintf(bf, size, "%s%s", arg->show_string_prefix ? prefix : "", "CWD");
 469
 470	return syscall_arg__scnprintf_fd(bf, size, arg);
 471}
 472
 473#define SCA_FDAT syscall_arg__scnprintf_fd_at
 474
 475static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
 476					      struct syscall_arg *arg);
 477
 478#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
 479
 480size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
 
 481{
 482	return scnprintf(bf, size, "%#lx", arg->val);
 483}
 484
 485size_t syscall_arg__scnprintf_ptr(char *bf, size_t size, struct syscall_arg *arg)
 
 
 
 486{
 487	if (arg->val == 0)
 488		return scnprintf(bf, size, "NULL");
 489	return syscall_arg__scnprintf_hex(bf, size, arg);
 490}
 491
 492size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
 
 
 
 493{
 494	return scnprintf(bf, size, "%d", arg->val);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 495}
 496
 497size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 498{
 499	return scnprintf(bf, size, "%ld", arg->val);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 500}
 501
 
 
 502static const char *bpf_cmd[] = {
 503	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
 504	"MAP_GET_NEXT_KEY", "PROG_LOAD",
 505};
 506static DEFINE_STRARRAY(bpf_cmd, "BPF_");
 507
 508static const char *fsmount_flags[] = {
 509	[1] = "CLOEXEC",
 510};
 511static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");
 512
 513#include "trace/beauty/generated/fsconfig_arrays.c"
 514
 515static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_");
 516
 517static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
 518static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1);
 519
 520static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
 521static DEFINE_STRARRAY(itimers, "ITIMER_");
 522
 523static const char *keyctl_options[] = {
 524	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
 525	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
 526	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
 527	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
 528	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
 529};
 530static DEFINE_STRARRAY(keyctl_options, "KEYCTL_");
 531
 532static const char *whences[] = { "SET", "CUR", "END",
 533#ifdef SEEK_DATA
 534"DATA",
 535#endif
 536#ifdef SEEK_HOLE
 537"HOLE",
 538#endif
 539};
 540static DEFINE_STRARRAY(whences, "SEEK_");
 541
 542static const char *fcntl_cmds[] = {
 543	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
 544	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
 545	"SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
 546	"GETOWNER_UIDS",
 547};
 548static DEFINE_STRARRAY(fcntl_cmds, "F_");
 549
 550static const char *fcntl_linux_specific_cmds[] = {
 551	"SETLEASE", "GETLEASE", "NOTIFY", [5] =	"CANCELLK", "DUPFD_CLOEXEC",
 552	"SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
 553	"GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
 554};
 555
 556static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, "F_", F_LINUX_SPECIFIC_BASE);
 557
 558static struct strarray *fcntl_cmds_arrays[] = {
 559	&strarray__fcntl_cmds,
 560	&strarray__fcntl_linux_specific_cmds,
 561};
 562
 563static DEFINE_STRARRAYS(fcntl_cmds_arrays);
 564
 565static const char *rlimit_resources[] = {
 566	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
 567	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
 568	"RTTIME",
 569};
 570static DEFINE_STRARRAY(rlimit_resources, "RLIMIT_");
 571
 572static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
 573static DEFINE_STRARRAY(sighow, "SIG_");
 574
 575static const char *clockid[] = {
 576	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
 577	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
 578	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
 579};
 580static DEFINE_STRARRAY(clockid, "CLOCK_");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 581
 582static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 583						 struct syscall_arg *arg)
 584{
 585	bool show_prefix = arg->show_string_prefix;
 586	const char *suffix = "_OK";
 587	size_t printed = 0;
 588	int mode = arg->val;
 589
 590	if (mode == F_OK) /* 0 */
 591		return scnprintf(bf, size, "F%s", show_prefix ? suffix : "");
 592#define	P_MODE(n) \
 593	if (mode & n##_OK) { \
 594		printed += scnprintf(bf + printed, size - printed, "%s%s", #n, show_prefix ? suffix : ""); \
 595		mode &= ~n##_OK; \
 596	}
 597
 598	P_MODE(R);
 599	P_MODE(W);
 600	P_MODE(X);
 601#undef P_MODE
 602
 603	if (mode)
 604		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
 605
 606	return printed;
 607}
 608
 609#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
 610
 611static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
 612					      struct syscall_arg *arg);
 613
 614#define SCA_FILENAME syscall_arg__scnprintf_filename
 615
 616static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 617						struct syscall_arg *arg)
 618{
 619	bool show_prefix = arg->show_string_prefix;
 620	const char *prefix = "O_";
 621	int printed = 0, flags = arg->val;
 622
 
 
 
 
 
 623#define	P_FLAG(n) \
 624	if (flags & O_##n) { \
 625		printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
 626		flags &= ~O_##n; \
 627	}
 628
 
 
 629	P_FLAG(CLOEXEC);
 
 
 
 
 
 
 
 
 630	P_FLAG(NONBLOCK);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 631#undef P_FLAG
 632
 633	if (flags)
 634		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 635
 636	return printed;
 637}
 638
 639#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 640
 641#ifndef GRND_NONBLOCK
 642#define GRND_NONBLOCK	0x0001
 643#endif
 644#ifndef GRND_RANDOM
 645#define GRND_RANDOM	0x0002
 646#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 647
 648static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
 
 
 649						   struct syscall_arg *arg)
 650{
 651	bool show_prefix = arg->show_string_prefix;
 652	const char *prefix = "GRND_";
 653	int printed = 0, flags = arg->val;
 654
 
 
 655#define	P_FLAG(n) \
 656	if (flags & GRND_##n) { \
 657		printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
 658		flags &= ~GRND_##n; \
 659	}
 660
 661	P_FLAG(RANDOM);
 
 662	P_FLAG(NONBLOCK);
 663#undef P_FLAG
 664
 665	if (flags)
 666		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 667
 668	return printed;
 669}
 670
 671#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
 672
 673#define STRARRAY(name, array) \
 674	  { .scnprintf	= SCA_STRARRAY, \
 675	    .parm	= &strarray__##array, }
 676
 677#define STRARRAY_FLAGS(name, array) \
 678	  { .scnprintf	= SCA_STRARRAY_FLAGS, \
 679	    .parm	= &strarray__##array, }
 680
 681#include "trace/beauty/arch_errno_names.c"
 682#include "trace/beauty/eventfd.c"
 683#include "trace/beauty/futex_op.c"
 684#include "trace/beauty/futex_val3.c"
 685#include "trace/beauty/mmap.c"
 686#include "trace/beauty/mode_t.c"
 687#include "trace/beauty/msg_flags.c"
 688#include "trace/beauty/open_flags.c"
 689#include "trace/beauty/perf_event_open.c"
 690#include "trace/beauty/pid.c"
 691#include "trace/beauty/sched_policy.c"
 692#include "trace/beauty/seccomp.c"
 693#include "trace/beauty/signum.c"
 694#include "trace/beauty/socket_type.c"
 695#include "trace/beauty/waitid_options.c"
 696
 697struct syscall_arg_fmt {
 698	size_t	   (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
 699	unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
 700	void	   *parm;
 701	const char *name;
 702	bool	   show_zero;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 703};
 704
 
 
 
 
 
 
 
 705static struct syscall_fmt {
 706	const char *name;
 707	const char *alias;
 708	struct {
 709		const char *sys_enter,
 710			   *sys_exit;
 711	}	   bpf_prog_name;
 712	struct syscall_arg_fmt arg[6];
 713	u8	   nr_args;
 714	bool	   errpid;
 715	bool	   timeout;
 716	bool	   hexret;
 717} syscall_fmts[] = {
 718	{ .name	    = "access",
 719	  .arg = { [1] = { .scnprintf = SCA_ACCMODE,  /* mode */ }, }, },
 720	{ .name	    = "arch_prctl",
 721	  .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, /* code */ },
 722		   [1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, },
 723	{ .name	    = "bind",
 724	  .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
 725		   [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ },
 726		   [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
 727	{ .name	    = "bpf",
 728	  .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
 729	{ .name	    = "brk",	    .hexret = true,
 730	  .arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, },
 731	{ .name     = "clock_gettime",
 732	  .arg = { [0] = STRARRAY(clk_id, clockid), }, },
 733	{ .name	    = "clone",	    .errpid = true, .nr_args = 5,
 734	  .arg = { [0] = { .name = "flags",	    .scnprintf = SCA_CLONE_FLAGS, },
 735		   [1] = { .name = "child_stack",   .scnprintf = SCA_HEX, },
 736		   [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
 737		   [3] = { .name = "child_tidptr",  .scnprintf = SCA_HEX, },
 738		   [4] = { .name = "tls",	    .scnprintf = SCA_HEX, }, }, },
 739	{ .name	    = "close",
 740	  .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
 741	{ .name	    = "connect",
 742	  .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
 743		   [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ },
 744		   [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
 745	{ .name	    = "epoll_ctl",
 746	  .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
 747	{ .name	    = "eventfd2",
 748	  .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
 749	{ .name	    = "fchmodat",
 750	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
 751	{ .name	    = "fchownat",
 752	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
 753	{ .name	    = "fcntl",
 754	  .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
 755			   .parm      = &strarrays__fcntl_cmds_arrays,
 756			   .show_zero = true, },
 757		   [2] = { .scnprintf =  SCA_FCNTL_ARG, /* arg */ }, }, },
 758	{ .name	    = "flock",
 759	  .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
 760	{ .name     = "fsconfig",
 761	  .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, },
 762	{ .name     = "fsmount",
 763	  .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags),
 764		   [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, },
 765	{ .name     = "fspick",
 766	  .arg = { [0] = { .scnprintf = SCA_FDAT,	  /* dfd */ },
 767		   [1] = { .scnprintf = SCA_FILENAME,	  /* path */ },
 768		   [2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, },
 769	{ .name	    = "fstat", .alias = "newfstat", },
 770	{ .name	    = "fstatat", .alias = "newfstatat", },
 771	{ .name	    = "futex",
 772	  .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ },
 773		   [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, },
 774	{ .name	    = "futimesat",
 775	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
 776	{ .name	    = "getitimer",
 777	  .arg = { [0] = STRARRAY(which, itimers), }, },
 778	{ .name	    = "getpid",	    .errpid = true, },
 779	{ .name	    = "getpgid",    .errpid = true, },
 780	{ .name	    = "getppid",    .errpid = true, },
 781	{ .name	    = "getrandom",
 782	  .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
 783	{ .name	    = "getrlimit",
 784	  .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
 785	{ .name	    = "gettid",	    .errpid = true, },
 786	{ .name	    = "ioctl",
 787	  .arg = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 788#if defined(__i386__) || defined(__x86_64__)
 789/*
 790 * FIXME: Make this available to all arches.
 791 */
 792		   [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
 793		   [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
 
 794#else
 795		   [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
 796#endif
 797	{ .name	    = "kcmp",	    .nr_args = 5,
 798	  .arg = { [0] = { .name = "pid1",	.scnprintf = SCA_PID, },
 799		   [1] = { .name = "pid2",	.scnprintf = SCA_PID, },
 800		   [2] = { .name = "type",	.scnprintf = SCA_KCMP_TYPE, },
 801		   [3] = { .name = "idx1",	.scnprintf = SCA_KCMP_IDX, },
 802		   [4] = { .name = "idx2",	.scnprintf = SCA_KCMP_IDX, }, }, },
 803	{ .name	    = "keyctl",
 804	  .arg = { [0] = STRARRAY(option, keyctl_options), }, },
 805	{ .name	    = "kill",
 806	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
 807	{ .name	    = "linkat",
 808	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
 809	{ .name	    = "lseek",
 810	  .arg = { [2] = STRARRAY(whence, whences), }, },
 811	{ .name	    = "lstat", .alias = "newlstat", },
 812	{ .name     = "madvise",
 813	  .arg = { [0] = { .scnprintf = SCA_HEX,      /* start */ },
 814		   [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
 815	{ .name	    = "mkdirat",
 816	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
 817	{ .name	    = "mknodat",
 818	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 819	{ .name	    = "mmap",	    .hexret = true,
 820/* The standard mmap maps to old_mmap on s390x */
 821#if defined(__s390x__)
 822	.alias = "old_mmap",
 823#endif
 824	  .arg = { [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ },
 825		   [3] = { .scnprintf = SCA_MMAP_FLAGS,	/* flags */ },
 826		   [5] = { .scnprintf = SCA_HEX,	/* offset */ }, }, },
 827	{ .name	    = "mount",
 828	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
 829		   [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
 830			   .mask_val  = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
 831	{ .name	    = "move_mount",
 832	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* from_dfd */ },
 833		   [1] = { .scnprintf = SCA_FILENAME, /* from_pathname */ },
 834		   [2] = { .scnprintf = SCA_FDAT,	/* to_dfd */ },
 835		   [3] = { .scnprintf = SCA_FILENAME, /* to_pathname */ },
 836		   [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, },
 837	{ .name	    = "mprotect",
 838	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* start */ },
 839		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ }, }, },
 840	{ .name	    = "mq_unlink",
 841	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
 842	{ .name	    = "mremap",	    .hexret = true,
 843	  .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, }, },
 844	{ .name	    = "name_to_handle_at",
 845	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
 846	{ .name	    = "newfstatat",
 847	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
 848	{ .name	    = "open",
 849	  .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
 850	{ .name	    = "open_by_handle_at",
 851	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* dfd */ },
 852		   [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
 853	{ .name	    = "openat",
 854	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* dfd */ },
 855		   [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
 856	{ .name	    = "perf_event_open",
 857	  .arg = { [2] = { .scnprintf = SCA_INT,	/* cpu */ },
 858		   [3] = { .scnprintf = SCA_FD,		/* group_fd */ },
 859		   [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
 860	{ .name	    = "pipe2",
 861	  .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
 862	{ .name	    = "pkey_alloc",
 863	  .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS,	/* access_rights */ }, }, },
 864	{ .name	    = "pkey_free",
 865	  .arg = { [0] = { .scnprintf = SCA_INT,	/* key */ }, }, },
 866	{ .name	    = "pkey_mprotect",
 867	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* start */ },
 868		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ },
 869		   [3] = { .scnprintf = SCA_INT,	/* pkey */ }, }, },
 870	{ .name	    = "poll", .timeout = true, },
 871	{ .name	    = "ppoll", .timeout = true, },
 872	{ .name	    = "prctl",
 873	  .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
 874		   [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
 875		   [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
 876	{ .name	    = "pread", .alias = "pread64", },
 877	{ .name	    = "preadv", .alias = "pread", },
 878	{ .name	    = "prlimit64",
 879	  .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
 880	{ .name	    = "pwrite", .alias = "pwrite64", },
 881	{ .name	    = "readlinkat",
 882	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
 883	{ .name	    = "recvfrom",
 884	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
 885	{ .name	    = "recvmmsg",
 886	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
 887	{ .name	    = "recvmsg",
 888	  .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
 889	{ .name	    = "renameat",
 890	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
 891		   [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, }, },
 892	{ .name	    = "renameat2",
 893	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
 894		   [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ },
 895		   [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, },
 896	{ .name	    = "rt_sigaction",
 897	  .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
 898	{ .name	    = "rt_sigprocmask",
 899	  .arg = { [0] = STRARRAY(how, sighow), }, },
 900	{ .name	    = "rt_sigqueueinfo",
 901	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
 902	{ .name	    = "rt_tgsigqueueinfo",
 903	  .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
 904	{ .name	    = "sched_setscheduler",
 905	  .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
 906	{ .name	    = "seccomp",
 907	  .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP,	   /* op */ },
 908		   [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
 909	{ .name	    = "select", .timeout = true, },
 910	{ .name	    = "sendfile", .alias = "sendfile64", },
 911	{ .name	    = "sendmmsg",
 912	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
 913	{ .name	    = "sendmsg",
 914	  .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
 915	{ .name	    = "sendto",
 916	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ },
 917		   [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, },
 918	{ .name	    = "set_tid_address", .errpid = true, },
 919	{ .name	    = "setitimer",
 920	  .arg = { [0] = STRARRAY(which, itimers), }, },
 921	{ .name	    = "setrlimit",
 922	  .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
 923	{ .name	    = "socket",
 924	  .arg = { [0] = STRARRAY(family, socket_families),
 925		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
 926		   [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
 927	{ .name	    = "socketpair",
 928	  .arg = { [0] = STRARRAY(family, socket_families),
 929		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
 930		   [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
 931	{ .name	    = "stat", .alias = "newstat", },
 932	{ .name	    = "statx",
 933	  .arg = { [0] = { .scnprintf = SCA_FDAT,	 /* fdat */ },
 934		   [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
 935		   [3] = { .scnprintf = SCA_STATX_MASK,	 /* mask */ }, }, },
 936	{ .name	    = "swapoff",
 937	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
 938	{ .name	    = "swapon",
 939	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
 940	{ .name	    = "symlinkat",
 941	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
 942	{ .name	    = "sync_file_range",
 943	  .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, /* flags */ }, }, },
 944	{ .name	    = "tgkill",
 945	  .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
 946	{ .name	    = "tkill",
 947	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
 948	{ .name     = "umount2", .alias = "umount",
 949	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, },
 950	{ .name	    = "uname", .alias = "newuname", },
 951	{ .name	    = "unlinkat",
 952	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
 953	{ .name	    = "utimensat",
 954	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
 955	{ .name	    = "wait4",	    .errpid = true,
 956	  .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
 957	{ .name	    = "waitid",	    .errpid = true,
 958	  .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
 
 
 
 
 
 
 
 
 
 
 
 
 959};
 960
 961static int syscall_fmt__cmp(const void *name, const void *fmtp)
 962{
 963	const struct syscall_fmt *fmt = fmtp;
 964	return strcmp(name, fmt->name);
 965}
 966
 967static struct syscall_fmt *syscall_fmt__find(const char *name)
 968{
 969	const int nmemb = ARRAY_SIZE(syscall_fmts);
 970	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
 971}
 972
 973static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
 974{
 975	int i, nmemb = ARRAY_SIZE(syscall_fmts);
 976
 977	for (i = 0; i < nmemb; ++i) {
 978		if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0)
 979			return &syscall_fmts[i];
 980	}
 981
 982	return NULL;
 983}
 984
 985/*
 986 * is_exit: is this "exit" or "exit_group"?
 987 * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
 988 * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
 989 * nonexistent: Just a hole in the syscall table, syscall id not allocated
 990 */
 991struct syscall {
 992	struct tep_event    *tp_format;
 993	int		    nr_args;
 994	int		    args_size;
 995	struct {
 996		struct bpf_program *sys_enter,
 997				   *sys_exit;
 998	}		    bpf_prog;
 999	bool		    is_exit;
1000	bool		    is_open;
1001	bool		    nonexistent;
1002	struct tep_format_field *args;
1003	const char	    *name;
 
1004	struct syscall_fmt  *fmt;
1005	struct syscall_arg_fmt *arg_fmt;
1006};
1007
1008/*
1009 * Must match what is in the BPF program:
1010 *
1011 * tools/perf/examples/bpf/augmented_raw_syscalls.c
1012 */
1013struct bpf_map_syscall_entry {
1014	bool	enabled;
1015	u16	string_args_len[6];
1016};
1017
1018/*
1019 * We need to have this 'calculated' boolean because in some cases we really
1020 * don't know what is the duration of a syscall, for instance, when we start
1021 * a session and some threads are waiting for a syscall to finish, say 'poll',
1022 * in which case all we can do is to print "( ? ) for duration and for the
1023 * start timestamp.
1024 */
1025static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
1026{
1027	double duration = (double)t / NSEC_PER_MSEC;
1028	size_t printed = fprintf(fp, "(");
1029
1030	if (!calculated)
1031		printed += fprintf(fp, "         ");
1032	else if (duration >= 1.0)
1033		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1034	else if (duration >= 0.01)
1035		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1036	else
1037		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1038	return printed + fprintf(fp, "): ");
1039}
1040
1041/**
1042 * filename.ptr: The filename char pointer that will be vfs_getname'd
1043 * filename.entry_str_pos: Where to insert the string translated from
1044 *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1045 * ret_scnprintf: syscall args may set this to a different syscall return
1046 *                formatter, for instance, fcntl may return fds, file flags, etc.
1047 */
1048struct thread_trace {
1049	u64		  entry_time;
 
1050	bool		  entry_pending;
1051	unsigned long	  nr_events;
1052	unsigned long	  pfmaj, pfmin;
1053	char		  *entry_str;
1054	double		  runtime_ms;
1055	size_t		  (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1056        struct {
1057		unsigned long ptr;
1058		short int     entry_str_pos;
1059		bool	      pending_open;
1060		unsigned int  namelen;
1061		char	      *name;
1062	} filename;
1063	struct {
1064		int	      max;
1065		struct file   *table;
1066	} files;
1067
1068	struct intlist *syscall_stats;
1069};
1070
1071static struct thread_trace *thread_trace__new(void)
1072{
1073	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1074
1075	if (ttrace) {
1076		ttrace->files.max = -1;
1077		ttrace->syscall_stats = intlist__new(NULL);
1078	}
1079
1080	return ttrace;
1081}
1082
1083static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1084{
1085	struct thread_trace *ttrace;
1086
1087	if (thread == NULL)
1088		goto fail;
1089
1090	if (thread__priv(thread) == NULL)
1091		thread__set_priv(thread, thread_trace__new());
1092
1093	if (thread__priv(thread) == NULL)
1094		goto fail;
1095
1096	ttrace = thread__priv(thread);
1097	++ttrace->nr_events;
1098
1099	return ttrace;
1100fail:
1101	color_fprintf(fp, PERF_COLOR_RED,
1102		      "WARNING: not enough memory, dropping samples!\n");
1103	return NULL;
1104}
1105
1106
1107void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
1108				    size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
1109{
1110	struct thread_trace *ttrace = thread__priv(arg->thread);
1111
1112	ttrace->ret_scnprintf = ret_scnprintf;
1113}
1114
1115#define TRACE_PFMAJ		(1 << 0)
1116#define TRACE_PFMIN		(1 << 1)
1117
1118static const size_t trace__entry_str_size = 2048;
1119
1120static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1121{
1122	if (fd < 0)
1123		return NULL;
1124
1125	if (fd > ttrace->files.max) {
1126		struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
1127
1128		if (nfiles == NULL)
1129			return NULL;
1130
1131		if (ttrace->files.max != -1) {
1132			memset(nfiles + ttrace->files.max + 1, 0,
1133			       (fd - ttrace->files.max) * sizeof(struct file));
1134		} else {
1135			memset(nfiles, 0, (fd + 1) * sizeof(struct file));
1136		}
1137
1138		ttrace->files.table = nfiles;
1139		ttrace->files.max   = fd;
1140	}
1141
1142	return ttrace->files.table + fd;
1143}
1144
1145struct file *thread__files_entry(struct thread *thread, int fd)
1146{
1147	return thread_trace__files_entry(thread__priv(thread), fd);
1148}
1149
1150static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1151{
1152	struct thread_trace *ttrace = thread__priv(thread);
1153	struct file *file = thread_trace__files_entry(ttrace, fd);
1154
1155	if (file != NULL) {
1156		struct stat st;
1157		if (stat(pathname, &st) == 0)
1158			file->dev_maj = major(st.st_rdev);
1159		file->pathname = strdup(pathname);
1160		if (file->pathname)
1161			return 0;
1162	}
1163
1164	return -1;
1165}
1166
1167static int thread__read_fd_path(struct thread *thread, int fd)
1168{
1169	char linkname[PATH_MAX], pathname[PATH_MAX];
1170	struct stat st;
1171	int ret;
1172
1173	if (thread->pid_ == thread->tid) {
1174		scnprintf(linkname, sizeof(linkname),
1175			  "/proc/%d/fd/%d", thread->pid_, fd);
1176	} else {
1177		scnprintf(linkname, sizeof(linkname),
1178			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1179	}
1180
1181	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1182		return -1;
1183
1184	ret = readlink(linkname, pathname, sizeof(pathname));
1185
1186	if (ret < 0 || ret > st.st_size)
1187		return -1;
1188
1189	pathname[ret] = '\0';
1190	return trace__set_fd_pathname(thread, fd, pathname);
1191}
1192
1193static const char *thread__fd_path(struct thread *thread, int fd,
1194				   struct trace *trace)
1195{
1196	struct thread_trace *ttrace = thread__priv(thread);
1197
1198	if (ttrace == NULL || trace->fd_path_disabled)
1199		return NULL;
1200
1201	if (fd < 0)
1202		return NULL;
1203
1204	if ((fd > ttrace->files.max || ttrace->files.table[fd].pathname == NULL)) {
1205		if (!trace->live)
1206			return NULL;
1207		++trace->stats.proc_getname;
1208		if (thread__read_fd_path(thread, fd))
1209			return NULL;
1210	}
1211
1212	return ttrace->files.table[fd].pathname;
1213}
1214
1215size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
 
1216{
1217	int fd = arg->val;
1218	size_t printed = scnprintf(bf, size, "%d", fd);
1219	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1220
1221	if (path)
1222		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1223
1224	return printed;
1225}
1226
1227size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1228{
1229        size_t printed = scnprintf(bf, size, "%d", fd);
1230	struct thread *thread = machine__find_thread(trace->host, pid, pid);
1231
1232	if (thread) {
1233		const char *path = thread__fd_path(thread, fd, trace);
1234
1235		if (path)
1236			printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1237
1238		thread__put(thread);
1239	}
1240
1241        return printed;
1242}
1243
1244static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1245					      struct syscall_arg *arg)
1246{
1247	int fd = arg->val;
1248	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1249	struct thread_trace *ttrace = thread__priv(arg->thread);
1250
1251	if (ttrace && fd >= 0 && fd <= ttrace->files.max)
1252		zfree(&ttrace->files.table[fd].pathname);
1253
1254	return printed;
1255}
1256
1257static void thread__set_filename_pos(struct thread *thread, const char *bf,
1258				     unsigned long ptr)
1259{
1260	struct thread_trace *ttrace = thread__priv(thread);
1261
1262	ttrace->filename.ptr = ptr;
1263	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1264}
1265
1266static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
1267{
1268	struct augmented_arg *augmented_arg = arg->augmented.args;
1269	size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value);
1270	/*
1271	 * So that the next arg with a payload can consume its augmented arg, i.e. for rename* syscalls
1272	 * we would have two strings, each prefixed by its size.
1273	 */
1274	int consumed = sizeof(*augmented_arg) + augmented_arg->size;
1275
1276	arg->augmented.args = ((void *)arg->augmented.args) + consumed;
1277	arg->augmented.size -= consumed;
1278
1279	return printed;
1280}
1281
1282static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1283					      struct syscall_arg *arg)
1284{
1285	unsigned long ptr = arg->val;
1286
1287	if (arg->augmented.args)
1288		return syscall_arg__scnprintf_augmented_string(arg, bf, size);
1289
1290	if (!arg->trace->vfs_getname)
1291		return scnprintf(bf, size, "%#x", ptr);
1292
1293	thread__set_filename_pos(arg->thread, bf, ptr);
1294	return 0;
1295}
1296
1297static bool trace__filter_duration(struct trace *trace, double t)
1298{
1299	return t < (trace->duration_filter * NSEC_PER_MSEC);
1300}
1301
1302static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1303{
1304	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1305
1306	return fprintf(fp, "%10.3f ", ts);
1307}
1308
1309/*
1310 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1311 * using ttrace->entry_time for a thread that receives a sys_exit without
1312 * first having received a sys_enter ("poll" issued before tracing session
1313 * starts, lost sys_enter exit due to ring buffer overflow).
1314 */
1315static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1316{
1317	if (tstamp > 0)
1318		return __trace__fprintf_tstamp(trace, tstamp, fp);
1319
1320	return fprintf(fp, "         ? ");
1321}
1322
1323static bool done = false;
1324static bool interrupted = false;
1325
1326static void sig_handler(int sig)
1327{
1328	done = true;
1329	interrupted = sig == SIGINT;
1330}
1331
1332static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
 
1333{
1334	size_t printed = 0;
 
1335
1336	if (trace->multiple_threads) {
1337		if (trace->show_comm)
1338			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1339		printed += fprintf(fp, "%d ", thread->tid);
1340	}
1341
1342	return printed;
1343}
1344
1345static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1346					u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1347{
1348	size_t printed = 0;
1349
1350	if (trace->show_tstamp)
1351		printed = trace__fprintf_tstamp(trace, tstamp, fp);
1352	if (trace->show_duration)
1353		printed += fprintf_duration(duration, duration_calculated, fp);
1354	return printed + trace__fprintf_comm_tid(trace, thread, fp);
1355}
1356
1357static int trace__process_event(struct trace *trace, struct machine *machine,
1358				union perf_event *event, struct perf_sample *sample)
1359{
1360	int ret = 0;
1361
1362	switch (event->header.type) {
1363	case PERF_RECORD_LOST:
1364		color_fprintf(trace->output, PERF_COLOR_RED,
1365			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1366		ret = machine__process_lost_event(machine, event, sample);
1367		break;
1368	default:
1369		ret = machine__process_event(machine, event, sample);
1370		break;
1371	}
1372
1373	return ret;
1374}
1375
1376static int trace__tool_process(struct perf_tool *tool,
1377			       union perf_event *event,
1378			       struct perf_sample *sample,
1379			       struct machine *machine)
1380{
1381	struct trace *trace = container_of(tool, struct trace, tool);
1382	return trace__process_event(trace, machine, event, sample);
1383}
1384
1385static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1386{
1387	struct machine *machine = vmachine;
1388
1389	if (machine->kptr_restrict_warned)
1390		return NULL;
1391
1392	if (symbol_conf.kptr_restrict) {
1393		pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1394			   "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1395			   "Kernel samples will not be resolved.\n");
1396		machine->kptr_restrict_warned = true;
1397		return NULL;
1398	}
1399
1400	return machine__resolve_kernel_addr(vmachine, addrp, modp);
1401}
1402
1403static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
1404{
1405	int err = symbol__init(NULL);
1406
1407	if (err)
1408		return err;
1409
1410	trace->host = machine__new_host();
1411	if (trace->host == NULL)
1412		return -ENOMEM;
1413
1414	err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1415	if (err < 0)
1416		goto out;
1417
1418	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1419					    evlist->core.threads, trace__tool_process, false,
1420					    1);
1421out:
1422	if (err)
1423		symbol__exit();
1424
1425	return err;
1426}
1427
1428static void trace__symbols__exit(struct trace *trace)
1429{
1430	machine__exit(trace->host);
1431	trace->host = NULL;
1432
1433	symbol__exit();
1434}
1435
1436static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
1437{
1438	int idx;
1439
1440	if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1441		nr_args = sc->fmt->nr_args;
1442
1443	sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
1444	if (sc->arg_fmt == NULL)
1445		return -1;
1446
1447	for (idx = 0; idx < nr_args; ++idx) {
1448		if (sc->fmt)
1449			sc->arg_fmt[idx] = sc->fmt->arg[idx];
1450	}
1451
1452	sc->nr_args = nr_args;
1453	return 0;
1454}
1455
1456static int syscall__set_arg_fmts(struct syscall *sc)
1457{
1458	struct tep_format_field *field, *last_field = NULL;
1459	int idx = 0, len;
1460
1461	for (field = sc->args; field; field = field->next, ++idx) {
1462		last_field = field;
 
1463
1464		if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1465			continue;
1466
1467		len = strlen(field->name);
1468
1469		if (strcmp(field->type, "const char *") == 0 &&
1470		    ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
1471		     strstr(field->name, "path") != NULL))
1472			sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
1473		else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
1474			sc->arg_fmt[idx].scnprintf = SCA_PTR;
1475		else if (strcmp(field->type, "pid_t") == 0)
1476			sc->arg_fmt[idx].scnprintf = SCA_PID;
1477		else if (strcmp(field->type, "umode_t") == 0)
1478			sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
1479		else if ((strcmp(field->type, "int") == 0 ||
1480			  strcmp(field->type, "unsigned int") == 0 ||
1481			  strcmp(field->type, "long") == 0) &&
1482			 len >= 2 && strcmp(field->name + len - 2, "fd") == 0) {
1483			/*
1484			 * /sys/kernel/tracing/events/syscalls/sys_enter*
1485			 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1486			 * 65 int
1487			 * 23 unsigned int
1488			 * 7 unsigned long
1489			 */
1490			sc->arg_fmt[idx].scnprintf = SCA_FD;
1491		}
1492	}
1493
1494	if (last_field)
1495		sc->args_size = last_field->offset + last_field->size;
1496
1497	return 0;
1498}
1499
1500static int trace__read_syscall_info(struct trace *trace, int id)
1501{
1502	char tp_name[128];
1503	struct syscall *sc;
1504	const char *name = syscalltbl__name(trace->sctbl, id);
1505
1506	if (trace->syscalls.table == NULL) {
1507		trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
1508		if (trace->syscalls.table == NULL)
1509			return -ENOMEM;
1510	}
1511
1512	sc = trace->syscalls.table + id;
1513	if (sc->nonexistent)
1514		return 0;
1515
1516	if (name == NULL) {
1517		sc->nonexistent = true;
1518		return 0;
 
 
 
 
 
 
 
 
 
1519	}
1520
 
1521	sc->name = name;
 
1522	sc->fmt  = syscall_fmt__find(sc->name);
1523
1524	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1525	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1526
1527	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1528		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1529		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1530	}
1531
1532	if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1533		return -ENOMEM;
1534
1535	if (IS_ERR(sc->tp_format))
1536		return PTR_ERR(sc->tp_format);
1537
1538	sc->args = sc->tp_format->format.fields;
 
1539	/*
1540	 * We need to check and discard the first variable '__syscall_nr'
1541	 * or 'nr' that mean the syscall number. It is needless here.
1542	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1543	 */
1544	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1545		sc->args = sc->args->next;
1546		--sc->nr_args;
1547	}
1548
1549	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1550	sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
1551
1552	return syscall__set_arg_fmts(sc);
1553}
1554
1555static int intcmp(const void *a, const void *b)
1556{
1557	const int *one = a, *another = b;
1558
1559	return *one - *another;
1560}
1561
1562static int trace__validate_ev_qualifier(struct trace *trace)
1563{
1564	int err = 0;
1565	bool printed_invalid_prefix = false;
1566	struct str_node *pos;
1567	size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier);
1568
1569	trace->ev_qualifier_ids.entries = malloc(nr_allocated *
 
1570						 sizeof(trace->ev_qualifier_ids.entries[0]));
1571
1572	if (trace->ev_qualifier_ids.entries == NULL) {
1573		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1574		       trace->output);
1575		err = -EINVAL;
1576		goto out;
1577	}
1578
1579	strlist__for_each_entry(pos, trace->ev_qualifier) {
 
 
1580		const char *sc = pos->s;
1581		int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
1582
1583		if (id < 0) {
1584			id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1585			if (id >= 0)
1586				goto matches;
1587
1588			if (!printed_invalid_prefix) {
1589				pr_debug("Skipping unknown syscalls: ");
1590				printed_invalid_prefix = true;
1591			} else {
1592				pr_debug(", ");
1593			}
1594
1595			pr_debug("%s", sc);
1596			continue;
1597		}
1598matches:
1599		trace->ev_qualifier_ids.entries[nr_used++] = id;
1600		if (match_next == -1)
1601			continue;
1602
1603		while (1) {
1604			id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1605			if (id < 0)
1606				break;
1607			if (nr_allocated == nr_used) {
1608				void *entries;
1609
1610				nr_allocated += 8;
1611				entries = realloc(trace->ev_qualifier_ids.entries,
1612						  nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1613				if (entries == NULL) {
1614					err = -ENOMEM;
1615					fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1616					goto out_free;
1617				}
1618				trace->ev_qualifier_ids.entries = entries;
1619			}
1620			trace->ev_qualifier_ids.entries[nr_used++] = id;
1621		}
 
 
1622	}
1623
1624	trace->ev_qualifier_ids.nr = nr_used;
1625	qsort(trace->ev_qualifier_ids.entries, nr_used, sizeof(int), intcmp);
 
 
 
 
1626out:
1627	if (printed_invalid_prefix)
1628		pr_debug("\n");
1629	return err;
1630out_free:
1631	zfree(&trace->ev_qualifier_ids.entries);
1632	trace->ev_qualifier_ids.nr = 0;
1633	goto out;
1634}
1635
1636static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id)
1637{
1638	bool in_ev_qualifier;
1639
1640	if (trace->ev_qualifier_ids.nr == 0)
1641		return true;
1642
1643	in_ev_qualifier = bsearch(&id, trace->ev_qualifier_ids.entries,
1644				  trace->ev_qualifier_ids.nr, sizeof(int), intcmp) != NULL;
1645
1646	if (in_ev_qualifier)
1647	       return !trace->not_ev_qualifier;
1648
1649	return trace->not_ev_qualifier;
1650}
1651
1652/*
1653 * args is to be interpreted as a series of longs but we need to handle
1654 * 8-byte unaligned accesses. args points to raw_data within the event
1655 * and raw_data is guaranteed to be 8-byte unaligned because it is
1656 * preceded by raw_size which is a u32. So we need to copy args to a temp
1657 * variable to read it. Most notably this avoids extended load instructions
1658 * on unaligned addresses
1659 */
1660unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
1661{
1662	unsigned long val;
1663	unsigned char *p = arg->args + sizeof(unsigned long) * idx;
1664
1665	memcpy(&val, p, sizeof(val));
1666	return val;
1667}
1668
1669static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1670				      struct syscall_arg *arg)
1671{
1672	if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1673		return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1674
1675	return scnprintf(bf, size, "arg%d: ", arg->idx);
1676}
1677
1678/*
1679 * Check if the value is in fact zero, i.e. mask whatever needs masking, such
1680 * as mount 'flags' argument that needs ignoring some magic flag, see comment
1681 * in tools/perf/trace/beauty/mount_flags.c
1682 */
1683static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val)
1684{
1685	if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val)
1686		return sc->arg_fmt[arg->idx].mask_val(arg, val);
1687
1688	return val;
1689}
1690
1691static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
1692				     struct syscall_arg *arg, unsigned long val)
1693{
1694	if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
1695		arg->val = val;
1696		if (sc->arg_fmt[arg->idx].parm)
1697			arg->parm = sc->arg_fmt[arg->idx].parm;
1698		return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
1699	}
1700	return scnprintf(bf, size, "%ld", val);
1701}
1702
1703static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1704				      unsigned char *args, void *augmented_args, int augmented_args_size,
1705				      struct trace *trace, struct thread *thread)
1706{
1707	size_t printed = 0;
 
1708	unsigned long val;
1709	u8 bit = 1;
1710	struct syscall_arg arg = {
1711		.args	= args,
1712		.augmented = {
1713			.size = augmented_args_size,
1714			.args = augmented_args,
1715		},
1716		.idx	= 0,
1717		.mask	= 0,
1718		.trace  = trace,
1719		.thread = thread,
1720		.show_string_prefix = trace->show_string_prefix,
1721	};
1722	struct thread_trace *ttrace = thread__priv(thread);
1723
1724	/*
1725	 * Things like fcntl will set this in its 'cmd' formatter to pick the
1726	 * right formatter for the return value (an fd? file flags?), which is
1727	 * not needed for syscalls that always return a given type, say an fd.
1728	 */
1729	ttrace->ret_scnprintf = NULL;
1730
1731	if (sc->args != NULL) {
1732		struct tep_format_field *field;
 
 
 
 
 
 
 
1733
1734		for (field = sc->args; field;
1735		     field = field->next, ++arg.idx, bit <<= 1) {
1736			if (arg.mask & bit)
1737				continue;
1738
1739			val = syscall_arg__val(&arg, arg.idx);
1740			/*
1741			 * Some syscall args need some mask, most don't and
1742			 * return val untouched.
1743			 */
1744			val = syscall__mask_val(sc, &arg, val);
1745
1746			/*
1747 			 * Suppress this argument if its value is zero and
1748 			 * and we don't have a string associated in an
1749 			 * strarray for it.
1750 			 */
1751			if (val == 0 &&
1752			    !trace->show_zeros &&
1753			    !(sc->arg_fmt &&
1754			      (sc->arg_fmt[arg.idx].show_zero ||
1755			       sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
1756			       sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1757			      sc->arg_fmt[arg.idx].parm))
1758				continue;
1759
1760			printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
1761
1762			if (trace->show_arg_names)
1763				printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
1764
1765			printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
 
 
 
 
 
 
1766		}
1767	} else if (IS_ERR(sc->tp_format)) {
1768		/*
1769		 * If we managed to read the tracepoint /format file, then we
1770		 * may end up not having any args, like with gettid(), so only
1771		 * print the raw args when we didn't manage to read it.
1772		 */
1773		while (arg.idx < sc->nr_args) {
1774			if (arg.mask & bit)
1775				goto next_arg;
1776			val = syscall_arg__val(&arg, arg.idx);
1777			if (printed)
1778				printed += scnprintf(bf + printed, size - printed, ", ");
1779			printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
1780			printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1781next_arg:
1782			++arg.idx;
1783			bit <<= 1;
1784		}
1785	}
1786
1787	return printed;
1788}
1789
1790typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel,
1791				  union perf_event *event,
1792				  struct perf_sample *sample);
1793
1794static struct syscall *trace__syscall_info(struct trace *trace,
1795					   struct evsel *evsel, int id)
1796{
1797	int err = 0;
1798
1799	if (id < 0) {
1800
1801		/*
1802		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1803		 * before that, leaving at a higher verbosity level till that is
1804		 * explained. Reproduced with plain ftrace with:
1805		 *
1806		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1807		 * grep "NR -1 " /t/trace_pipe
1808		 *
1809		 * After generating some load on the machine.
1810 		 */
1811		if (verbose > 1) {
1812			static u64 n;
1813			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1814				id, perf_evsel__name(evsel), ++n);
1815		}
1816		return NULL;
1817	}
1818
1819	err = -EINVAL;
1820
1821	if (id > trace->sctbl->syscalls.max_id)
1822		goto out_cant_read;
1823
1824	if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
1825	    (err = trace__read_syscall_info(trace, id)) != 0)
1826		goto out_cant_read;
1827
1828	if (trace->syscalls.table[id].name == NULL) {
1829		if (trace->syscalls.table[id].nonexistent)
1830			return NULL;
1831		goto out_cant_read;
1832	}
1833
1834	return &trace->syscalls.table[id];
1835
1836out_cant_read:
1837	if (verbose > 0) {
1838		char sbuf[STRERR_BUFSIZE];
1839		fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf)));
1840		if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL)
1841			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1842		fputs(" information\n", trace->output);
1843	}
1844	return NULL;
1845}
1846
1847static void thread__update_stats(struct thread_trace *ttrace,
1848				 int id, struct perf_sample *sample)
1849{
1850	struct int_node *inode;
1851	struct stats *stats;
1852	u64 duration = 0;
1853
1854	inode = intlist__findnew(ttrace->syscall_stats, id);
1855	if (inode == NULL)
1856		return;
1857
1858	stats = inode->priv;
1859	if (stats == NULL) {
1860		stats = malloc(sizeof(struct stats));
1861		if (stats == NULL)
1862			return;
1863		init_stats(stats);
1864		inode->priv = stats;
1865	}
1866
1867	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1868		duration = sample->time - ttrace->entry_time;
1869
1870	update_stats(stats, duration);
1871}
1872
1873static int trace__printf_interrupted_entry(struct trace *trace)
1874{
1875	struct thread_trace *ttrace;
 
1876	size_t printed;
1877	int len;
1878
1879	if (trace->failure_only || trace->current == NULL)
1880		return 0;
1881
1882	ttrace = thread__priv(trace->current);
1883
1884	if (!ttrace->entry_pending)
1885		return 0;
1886
1887	printed  = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
1888	printed += len = fprintf(trace->output, "%s)", ttrace->entry_str);
1889
1890	if (len < trace->args_alignment - 4)
1891		printed += fprintf(trace->output, "%-*s", trace->args_alignment - 4 - len, " ");
1892
1893	printed += fprintf(trace->output, " ...\n");
1894
 
 
1895	ttrace->entry_pending = false;
1896	++trace->nr_events_printed;
1897
1898	return printed;
1899}
1900
1901static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
1902				 struct perf_sample *sample, struct thread *thread)
1903{
1904	int printed = 0;
1905
1906	if (trace->print_sample) {
1907		double ts = (double)sample->time / NSEC_PER_MSEC;
1908
1909		printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
1910				   perf_evsel__name(evsel), ts,
1911				   thread__comm_str(thread),
1912				   sample->pid, sample->tid, sample->cpu);
1913	}
1914
1915	return printed;
1916}
1917
1918static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
1919{
1920	void *augmented_args = NULL;
1921	/*
1922	 * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
1923	 * and there we get all 6 syscall args plus the tracepoint common fields
1924	 * that gets calculated at the start and the syscall_nr (another long).
1925	 * So we check if that is the case and if so don't look after the
1926	 * sc->args_size but always after the full raw_syscalls:sys_enter payload,
1927	 * which is fixed.
1928	 *
1929	 * We'll revisit this later to pass s->args_size to the BPF augmenter
1930	 * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
1931	 * copies only what we need for each syscall, like what happens when we
1932	 * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace
1933	 * traffic to just what is needed for each syscall.
1934	 */
1935	int args_size = raw_augmented_args_size ?: sc->args_size;
1936
1937	*augmented_args_size = sample->raw_size - args_size;
1938	if (*augmented_args_size > 0)
1939		augmented_args = sample->raw_data + args_size;
1940
1941	return augmented_args;
1942}
1943
1944static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
1945			    union perf_event *event __maybe_unused,
1946			    struct perf_sample *sample)
1947{
1948	char *msg;
1949	void *args;
1950	int printed = 0;
1951	struct thread *thread;
1952	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1953	int augmented_args_size = 0;
1954	void *augmented_args = NULL;
1955	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1956	struct thread_trace *ttrace;
1957
1958	if (sc == NULL)
1959		return -1;
1960
1961	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1962	ttrace = thread__trace(thread, trace->output);
1963	if (ttrace == NULL)
1964		goto out_put;
1965
1966	trace__fprintf_sample(trace, evsel, sample, thread);
1967
1968	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1969
1970	if (ttrace->entry_str == NULL) {
1971		ttrace->entry_str = malloc(trace__entry_str_size);
1972		if (!ttrace->entry_str)
1973			goto out_put;
1974	}
1975
1976	if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1977		trace__printf_interrupted_entry(trace);
1978	/*
1979	 * If this is raw_syscalls.sys_enter, then it always comes with the 6 possible
1980	 * arguments, even if the syscall being handled, say "openat", uses only 4 arguments
1981	 * this breaks syscall__augmented_args() check for augmented args, as we calculate
1982	 * syscall->args_size using each syscalls:sys_enter_NAME tracefs format file,
1983	 * so when handling, say the openat syscall, we end up getting 6 args for the
1984	 * raw_syscalls:sys_enter event, when we expected just 4, we end up mistakenly
1985	 * thinking that the extra 2 u64 args are the augmented filename, so just check
1986	 * here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
1987	 */
1988	if (evsel != trace->syscalls.events.sys_enter)
1989		augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
1990	ttrace->entry_time = sample->time;
1991	msg = ttrace->entry_str;
1992	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1993
1994	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1995					   args, augmented_args, augmented_args_size, trace, thread);
1996
1997	if (sc->is_exit) {
1998		if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
1999			int alignment = 0;
2000
2001			trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
2002			printed = fprintf(trace->output, "%s)", ttrace->entry_str);
2003			if (trace->args_alignment > printed)
2004				alignment = trace->args_alignment - printed;
2005			fprintf(trace->output, "%*s= ?\n", alignment, " ");
2006		}
2007	} else {
2008		ttrace->entry_pending = true;
2009		/* See trace__vfs_getname & trace__sys_exit */
2010		ttrace->filename.pending_open = false;
2011	}
2012
2013	if (trace->current != thread) {
2014		thread__put(trace->current);
2015		trace->current = thread__get(thread);
2016	}
2017	err = 0;
2018out_put:
2019	thread__put(thread);
2020	return err;
2021}
2022
2023static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
2024				    struct perf_sample *sample)
2025{
2026	struct thread_trace *ttrace;
2027	struct thread *thread;
2028	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2029	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2030	char msg[1024];
2031	void *args, *augmented_args = NULL;
2032	int augmented_args_size;
2033
2034	if (sc == NULL)
2035		return -1;
2036
2037	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2038	ttrace = thread__trace(thread, trace->output);
2039	/*
2040	 * We need to get ttrace just to make sure it is there when syscall__scnprintf_args()
2041	 * and the rest of the beautifiers accessing it via struct syscall_arg touches it.
2042	 */
2043	if (ttrace == NULL)
2044		goto out_put;
2045
2046	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2047	augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2048	syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
2049	fprintf(trace->output, "%s", msg);
2050	err = 0;
2051out_put:
2052	thread__put(thread);
2053	return err;
2054}
2055
2056static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel,
2057				    struct perf_sample *sample,
2058				    struct callchain_cursor *cursor)
2059{
2060	struct addr_location al;
2061	int max_stack = evsel->core.attr.sample_max_stack ?
2062			evsel->core.attr.sample_max_stack :
2063			trace->max_stack;
2064	int err;
2065
2066	if (machine__resolve(trace->host, &al, sample) < 0)
2067		return -1;
2068
2069	err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
2070	addr_location__put(&al);
2071	return err;
2072}
2073
2074static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
2075{
2076	/* TODO: user-configurable print_opts */
2077	const unsigned int print_opts = EVSEL__PRINT_SYM |
2078				        EVSEL__PRINT_DSO |
2079				        EVSEL__PRINT_UNKNOWN_AS_ADDR;
2080
2081	return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, symbol_conf.bt_stop_list, trace->output);
2082}
2083
2084static const char *errno_to_name(struct evsel *evsel, int err)
2085{
2086	struct perf_env *env = perf_evsel__env(evsel);
2087	const char *arch_name = perf_env__arch(env);
2088
2089	return arch_syscalls__strerrno(arch_name, err);
2090}
2091
2092static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
2093			   union perf_event *event __maybe_unused,
2094			   struct perf_sample *sample)
2095{
2096	long ret;
2097	u64 duration = 0;
2098	bool duration_calculated = false;
2099	struct thread *thread;
2100	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0;
2101	int alignment = trace->args_alignment;
2102	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2103	struct thread_trace *ttrace;
2104
2105	if (sc == NULL)
2106		return -1;
2107
2108	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2109	ttrace = thread__trace(thread, trace->output);
2110	if (ttrace == NULL)
2111		goto out_put;
2112
2113	trace__fprintf_sample(trace, evsel, sample, thread);
2114
2115	if (trace->summary)
2116		thread__update_stats(ttrace, id, sample);
2117
2118	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2119
2120	if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
2121		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2122		ttrace->filename.pending_open = false;
2123		++trace->stats.vfs_getname;
2124	}
2125
 
 
2126	if (ttrace->entry_time) {
2127		duration = sample->time - ttrace->entry_time;
2128		if (trace__filter_duration(trace, duration))
2129			goto out;
2130		duration_calculated = true;
2131	} else if (trace->duration_filter)
2132		goto out;
2133
2134	if (sample->callchain) {
2135		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2136		if (callchain_ret == 0) {
2137			if (callchain_cursor.nr < trace->min_stack)
2138				goto out;
2139			callchain_ret = 1;
2140		}
2141	}
2142
2143	if (trace->summary_only || (ret >= 0 && trace->failure_only))
2144		goto out;
2145
2146	trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
2147
2148	if (ttrace->entry_pending) {
2149		printed = fprintf(trace->output, "%s", ttrace->entry_str);
2150	} else {
2151		printed += fprintf(trace->output, " ... [");
2152		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2153		printed += 9;
2154		printed += fprintf(trace->output, "]: %s()", sc->name);
2155	}
2156
2157	printed++; /* the closing ')' */
2158
2159	if (alignment > printed)
2160		alignment -= printed;
2161	else
2162		alignment = 0;
2163
2164	fprintf(trace->output, ")%*s= ", alignment, " ");
2165
2166	if (sc->fmt == NULL) {
2167		if (ret < 0)
2168			goto errno_print;
2169signed_print:
2170		fprintf(trace->output, "%ld", ret);
2171	} else if (ret < 0) {
2172errno_print: {
2173		char bf[STRERR_BUFSIZE];
2174		const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
2175			   *e = errno_to_name(evsel, -ret);
2176
2177		fprintf(trace->output, "-1 %s (%s)", e, emsg);
2178	}
2179	} else if (ret == 0 && sc->fmt->timeout)
2180		fprintf(trace->output, "0 (Timeout)");
2181	else if (ttrace->ret_scnprintf) {
2182		char bf[1024];
2183		struct syscall_arg arg = {
2184			.val	= ret,
2185			.thread	= thread,
2186			.trace	= trace,
2187		};
2188		ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
2189		ttrace->ret_scnprintf = NULL;
2190		fprintf(trace->output, "%s", bf);
2191	} else if (sc->fmt->hexret)
2192		fprintf(trace->output, "%#lx", ret);
2193	else if (sc->fmt->errpid) {
2194		struct thread *child = machine__find_thread(trace->host, ret, ret);
2195
2196		if (child != NULL) {
2197			fprintf(trace->output, "%ld", ret);
2198			if (child->comm_set)
2199				fprintf(trace->output, " (%s)", thread__comm_str(child));
2200			thread__put(child);
2201		}
2202	} else
2203		goto signed_print;
2204
2205	fputc('\n', trace->output);
2206
2207	/*
2208	 * We only consider an 'event' for the sake of --max-events a non-filtered
2209	 * sys_enter + sys_exit and other tracepoint events.
2210	 */
2211	if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
2212		interrupted = true;
2213
2214	if (callchain_ret > 0)
2215		trace__fprintf_callchain(trace, sample);
2216	else if (callchain_ret < 0)
2217		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2218out:
2219	ttrace->entry_pending = false;
2220	err = 0;
2221out_put:
2222	thread__put(thread);
2223	return err;
2224}
2225
2226static int trace__vfs_getname(struct trace *trace, struct evsel *evsel,
2227			      union perf_event *event __maybe_unused,
2228			      struct perf_sample *sample)
2229{
2230	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2231	struct thread_trace *ttrace;
2232	size_t filename_len, entry_str_len, to_move;
2233	ssize_t remaining_space;
2234	char *pos;
2235	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2236
2237	if (!thread)
2238		goto out;
2239
2240	ttrace = thread__priv(thread);
2241	if (!ttrace)
2242		goto out_put;
2243
2244	filename_len = strlen(filename);
2245	if (filename_len == 0)
2246		goto out_put;
2247
2248	if (ttrace->filename.namelen < filename_len) {
2249		char *f = realloc(ttrace->filename.name, filename_len + 1);
2250
2251		if (f == NULL)
2252			goto out_put;
2253
2254		ttrace->filename.namelen = filename_len;
2255		ttrace->filename.name = f;
2256	}
2257
2258	strcpy(ttrace->filename.name, filename);
2259	ttrace->filename.pending_open = true;
2260
2261	if (!ttrace->filename.ptr)
2262		goto out_put;
2263
2264	entry_str_len = strlen(ttrace->entry_str);
2265	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2266	if (remaining_space <= 0)
2267		goto out_put;
2268
2269	if (filename_len > (size_t)remaining_space) {
2270		filename += filename_len - remaining_space;
2271		filename_len = remaining_space;
2272	}
2273
2274	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2275	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2276	memmove(pos + filename_len, pos, to_move);
2277	memcpy(pos, filename, filename_len);
2278
2279	ttrace->filename.ptr = 0;
2280	ttrace->filename.entry_str_pos = 0;
2281out_put:
2282	thread__put(thread);
2283out:
2284	return 0;
2285}
2286
2287static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
2288				     union perf_event *event __maybe_unused,
2289				     struct perf_sample *sample)
2290{
2291        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2292	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2293	struct thread *thread = machine__findnew_thread(trace->host,
2294							sample->pid,
2295							sample->tid);
2296	struct thread_trace *ttrace = thread__trace(thread, trace->output);
2297
2298	if (ttrace == NULL)
2299		goto out_dump;
2300
2301	ttrace->runtime_ms += runtime_ms;
2302	trace->runtime_ms += runtime_ms;
2303out_put:
2304	thread__put(thread);
2305	return 0;
2306
2307out_dump:
2308	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2309	       evsel->name,
2310	       perf_evsel__strval(evsel, sample, "comm"),
2311	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2312	       runtime,
2313	       perf_evsel__intval(evsel, sample, "vruntime"));
2314	goto out_put;
 
2315}
2316
2317static int bpf_output__printer(enum binary_printer_ops op,
2318			       unsigned int val, void *extra __maybe_unused, FILE *fp)
2319{
 
2320	unsigned char ch = (unsigned char)val;
2321
2322	switch (op) {
2323	case BINARY_PRINT_CHAR_DATA:
2324		return fprintf(fp, "%c", isprint(ch) ? ch : '.');
 
2325	case BINARY_PRINT_DATA_BEGIN:
2326	case BINARY_PRINT_LINE_BEGIN:
2327	case BINARY_PRINT_ADDR:
2328	case BINARY_PRINT_NUM_DATA:
2329	case BINARY_PRINT_NUM_PAD:
2330	case BINARY_PRINT_SEP:
2331	case BINARY_PRINT_CHAR_PAD:
2332	case BINARY_PRINT_LINE_END:
2333	case BINARY_PRINT_DATA_END:
2334	default:
2335		break;
2336	}
2337
2338	return 0;
2339}
2340
2341static void bpf_output__fprintf(struct trace *trace,
2342				struct perf_sample *sample)
2343{
2344	binary__fprintf(sample->raw_data, sample->raw_size, 8,
2345			bpf_output__printer, NULL, trace->output);
2346	++trace->nr_events_printed;
2347}
2348
2349static int trace__event_handler(struct trace *trace, struct evsel *evsel,
2350				union perf_event *event __maybe_unused,
2351				struct perf_sample *sample)
2352{
2353	struct thread *thread;
2354	int callchain_ret = 0;
2355	/*
2356	 * Check if we called perf_evsel__disable(evsel) due to, for instance,
2357	 * this event's max_events having been hit and this is an entry coming
2358	 * from the ring buffer that we should discard, since the max events
2359	 * have already been considered/printed.
2360	 */
2361	if (evsel->disabled)
2362		return 0;
2363
2364	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2365
2366	if (sample->callchain) {
2367		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2368		if (callchain_ret == 0) {
2369			if (callchain_cursor.nr < trace->min_stack)
2370				goto out;
2371			callchain_ret = 1;
2372		}
2373	}
2374
2375	trace__printf_interrupted_entry(trace);
2376	trace__fprintf_tstamp(trace, sample->time, trace->output);
2377
2378	if (trace->trace_syscalls && trace->show_duration)
2379		fprintf(trace->output, "(         ): ");
2380
2381	if (thread)
2382		trace__fprintf_comm_tid(trace, thread, trace->output);
2383
2384	if (evsel == trace->syscalls.events.augmented) {
2385		int id = perf_evsel__sc_tp_uint(evsel, id, sample);
2386		struct syscall *sc = trace__syscall_info(trace, evsel, id);
2387
2388		if (sc) {
2389			fprintf(trace->output, "%s(", sc->name);
2390			trace__fprintf_sys_enter(trace, evsel, sample);
2391			fputc(')', trace->output);
2392			goto newline;
2393		}
2394
2395		/*
2396		 * XXX: Not having the associated syscall info or not finding/adding
2397		 * 	the thread should never happen, but if it does...
2398		 * 	fall thru and print it as a bpf_output event.
2399		 */
2400	}
2401
2402	fprintf(trace->output, "%s:", evsel->name);
2403
2404	if (perf_evsel__is_bpf_output(evsel)) {
2405		bpf_output__fprintf(trace, sample);
2406	} else if (evsel->tp_format) {
2407		if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
2408		    trace__fprintf_sys_enter(trace, evsel, sample)) {
2409			event_format__fprintf(evsel->tp_format, sample->cpu,
2410					      sample->raw_data, sample->raw_size,
2411					      trace->output);
2412			++trace->nr_events_printed;
2413
2414			if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
2415				evsel__disable(evsel);
2416				evsel__close(evsel);
2417			}
2418		}
2419	}
2420
2421newline:
2422	fprintf(trace->output, "\n");
2423
2424	if (callchain_ret > 0)
2425		trace__fprintf_callchain(trace, sample);
2426	else if (callchain_ret < 0)
2427		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2428out:
2429	thread__put(thread);
2430	return 0;
2431}
2432
2433static void print_location(FILE *f, struct perf_sample *sample,
2434			   struct addr_location *al,
2435			   bool print_dso, bool print_sym)
2436{
2437
2438	if ((verbose > 0 || print_dso) && al->map)
2439		fprintf(f, "%s@", al->map->dso->long_name);
2440
2441	if ((verbose > 0 || print_sym) && al->sym)
2442		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2443			al->addr - al->sym->start);
2444	else if (al->map)
2445		fprintf(f, "0x%" PRIx64, al->addr);
2446	else
2447		fprintf(f, "0x%" PRIx64, sample->addr);
2448}
2449
2450static int trace__pgfault(struct trace *trace,
2451			  struct evsel *evsel,
2452			  union perf_event *event __maybe_unused,
2453			  struct perf_sample *sample)
2454{
2455	struct thread *thread;
2456	struct addr_location al;
2457	char map_type = 'd';
2458	struct thread_trace *ttrace;
2459	int err = -1;
2460	int callchain_ret = 0;
2461
2462	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2463
2464	if (sample->callchain) {
2465		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2466		if (callchain_ret == 0) {
2467			if (callchain_cursor.nr < trace->min_stack)
2468				goto out_put;
2469			callchain_ret = 1;
2470		}
2471	}
2472
2473	ttrace = thread__trace(thread, trace->output);
2474	if (ttrace == NULL)
2475		goto out_put;
2476
2477	if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2478		ttrace->pfmaj++;
2479	else
2480		ttrace->pfmin++;
2481
2482	if (trace->summary_only)
2483		goto out;
2484
2485	thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
 
2486
2487	trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
2488
2489	fprintf(trace->output, "%sfault [",
2490		evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2491		"maj" : "min");
2492
2493	print_location(trace->output, sample, &al, false, true);
2494
2495	fprintf(trace->output, "] => ");
2496
2497	thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
 
2498
2499	if (!al.map) {
2500		thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
 
2501
2502		if (al.map)
2503			map_type = 'x';
2504		else
2505			map_type = '?';
2506	}
2507
2508	print_location(trace->output, sample, &al, true, false);
2509
2510	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2511
2512	if (callchain_ret > 0)
2513		trace__fprintf_callchain(trace, sample);
2514	else if (callchain_ret < 0)
2515		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2516
2517	++trace->nr_events_printed;
2518out:
2519	err = 0;
2520out_put:
2521	thread__put(thread);
2522	return err;
2523}
2524
2525static void trace__set_base_time(struct trace *trace,
2526				 struct evsel *evsel,
2527				 struct perf_sample *sample)
2528{
2529	/*
2530	 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2531	 * and don't use sample->time unconditionally, we may end up having
2532	 * some other event in the future without PERF_SAMPLE_TIME for good
2533	 * reason, i.e. we may not be interested in its timestamps, just in
2534	 * it taking place, picking some piece of information when it
2535	 * appears in our event stream (vfs_getname comes to mind).
2536	 */
2537	if (trace->base_time == 0 && !trace->full_time &&
2538	    (evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
2539		trace->base_time = sample->time;
2540}
2541
2542static int trace__process_sample(struct perf_tool *tool,
2543				 union perf_event *event,
2544				 struct perf_sample *sample,
2545				 struct evsel *evsel,
2546				 struct machine *machine __maybe_unused)
2547{
2548	struct trace *trace = container_of(tool, struct trace, tool);
2549	struct thread *thread;
2550	int err = 0;
2551
2552	tracepoint_handler handler = evsel->handler;
2553
2554	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2555	if (thread && thread__is_filtered(thread))
2556		goto out;
2557
2558	trace__set_base_time(trace, evsel, sample);
 
2559
2560	if (handler) {
2561		++trace->nr_events;
2562		handler(trace, evsel, event, sample);
2563	}
2564out:
2565	thread__put(thread);
2566	return err;
2567}
2568
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2569static int trace__record(struct trace *trace, int argc, const char **argv)
2570{
2571	unsigned int rec_argc, i, j;
2572	const char **rec_argv;
2573	const char * const record_args[] = {
2574		"record",
2575		"-R",
2576		"-m", "1024",
2577		"-c", "1",
2578	};
2579
2580	const char * const sc_args[] = { "-e", };
2581	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2582	const char * const majpf_args[] = { "-e", "major-faults" };
2583	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2584	const char * const minpf_args[] = { "-e", "minor-faults" };
2585	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2586
2587	/* +1 is for the event string below */
2588	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2589		majpf_args_nr + minpf_args_nr + argc;
2590	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2591
2592	if (rec_argv == NULL)
2593		return -ENOMEM;
2594
2595	j = 0;
2596	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2597		rec_argv[j++] = record_args[i];
2598
2599	if (trace->trace_syscalls) {
2600		for (i = 0; i < sc_args_nr; i++)
2601			rec_argv[j++] = sc_args[i];
2602
2603		/* event string may be different for older kernels - e.g., RHEL6 */
2604		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2605			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2606		else if (is_valid_tracepoint("syscalls:sys_enter"))
2607			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2608		else {
2609			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2610			free(rec_argv);
2611			return -1;
2612		}
2613	}
2614
2615	if (trace->trace_pgfaults & TRACE_PFMAJ)
2616		for (i = 0; i < majpf_args_nr; i++)
2617			rec_argv[j++] = majpf_args[i];
2618
2619	if (trace->trace_pgfaults & TRACE_PFMIN)
2620		for (i = 0; i < minpf_args_nr; i++)
2621			rec_argv[j++] = minpf_args[i];
2622
2623	for (i = 0; i < (unsigned int)argc; i++)
2624		rec_argv[j++] = argv[i];
2625
2626	return cmd_record(j, rec_argv);
2627}
2628
2629static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2630
2631static bool evlist__add_vfs_getname(struct evlist *evlist)
2632{
2633	bool found = false;
2634	struct evsel *evsel, *tmp;
2635	struct parse_events_error err = { .idx = 0, };
2636	int ret = parse_events(evlist, "probe:vfs_getname*", &err);
2637
2638	if (ret)
2639		return false;
2640
2641	evlist__for_each_entry_safe(evlist, evsel, tmp) {
2642		if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname"))
2643			continue;
2644
2645		if (perf_evsel__field(evsel, "pathname")) {
2646			evsel->handler = trace__vfs_getname;
2647			found = true;
2648			continue;
2649		}
2650
2651		list_del_init(&evsel->core.node);
2652		evsel->evlist = NULL;
2653		evsel__delete(evsel);
2654	}
2655
2656	return found;
 
 
2657}
2658
2659static struct evsel *perf_evsel__new_pgfault(u64 config)
 
2660{
2661	struct evsel *evsel;
2662	struct perf_event_attr attr = {
2663		.type = PERF_TYPE_SOFTWARE,
2664		.mmap_data = 1,
2665	};
2666
2667	attr.config = config;
2668	attr.sample_period = 1;
2669
2670	event_attr_init(&attr);
2671
2672	evsel = evsel__new(&attr);
2673	if (evsel)
2674		evsel->handler = trace__pgfault;
2675
2676	return evsel;
 
 
 
2677}
2678
2679static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2680{
2681	const u32 type = event->header.type;
2682	struct evsel *evsel;
 
 
 
2683
2684	if (type != PERF_RECORD_SAMPLE) {
2685		trace__process_event(trace, trace->host, event, sample);
2686		return;
2687	}
2688
2689	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2690	if (evsel == NULL) {
2691		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2692		return;
2693	}
2694
2695	if (evswitch__discard(&trace->evswitch, evsel))
2696		return;
2697
2698	trace__set_base_time(trace, evsel, sample);
2699
2700	if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
2701	    sample->raw_data == NULL) {
2702		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2703		       perf_evsel__name(evsel), sample->tid,
2704		       sample->cpu, sample->raw_size);
2705	} else {
2706		tracepoint_handler handler = evsel->handler;
2707		handler(trace, evsel, event, sample);
2708	}
2709
2710	if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
2711		interrupted = true;
2712}
2713
2714static int trace__add_syscall_newtp(struct trace *trace)
2715{
2716	int ret = -1;
2717	struct evlist *evlist = trace->evlist;
2718	struct evsel *sys_enter, *sys_exit;
2719
2720	sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
2721	if (sys_enter == NULL)
2722		goto out;
2723
2724	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2725		goto out_delete_sys_enter;
2726
2727	sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
2728	if (sys_exit == NULL)
2729		goto out_delete_sys_enter;
2730
2731	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2732		goto out_delete_sys_exit;
2733
2734	perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
2735	perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
2736
2737	evlist__add(evlist, sys_enter);
2738	evlist__add(evlist, sys_exit);
2739
2740	if (callchain_param.enabled && !trace->kernel_syscallchains) {
2741		/*
2742		 * We're interested only in the user space callchain
2743		 * leading to the syscall, allow overriding that for
2744		 * debugging reasons using --kernel_syscall_callchains
2745		 */
2746		sys_exit->core.attr.exclude_callchain_kernel = 1;
2747	}
2748
2749	trace->syscalls.events.sys_enter = sys_enter;
2750	trace->syscalls.events.sys_exit  = sys_exit;
2751
2752	ret = 0;
2753out:
2754	return ret;
2755
2756out_delete_sys_exit:
2757	evsel__delete_priv(sys_exit);
2758out_delete_sys_enter:
2759	evsel__delete_priv(sys_enter);
2760	goto out;
2761}
2762
2763static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
2764{
2765	int err = -1;
2766	struct evsel *sys_exit;
2767	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2768						trace->ev_qualifier_ids.nr,
2769						trace->ev_qualifier_ids.entries);
2770
2771	if (filter == NULL)
2772		goto out_enomem;
2773
2774	if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2775					  filter)) {
2776		sys_exit = trace->syscalls.events.sys_exit;
2777		err = perf_evsel__append_tp_filter(sys_exit, filter);
2778	}
2779
2780	free(filter);
2781out:
2782	return err;
2783out_enomem:
2784	errno = ENOMEM;
2785	goto out;
2786}
2787
2788#ifdef HAVE_LIBBPF_SUPPORT
2789static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
2790{
2791	if (trace->bpf_obj == NULL)
2792		return NULL;
2793
2794	return bpf_object__find_program_by_title(trace->bpf_obj, name);
2795}
2796
2797static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc,
2798							const char *prog_name, const char *type)
2799{
2800	struct bpf_program *prog;
2801
2802	if (prog_name == NULL) {
2803		char default_prog_name[256];
2804		scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name);
2805		prog = trace__find_bpf_program_by_title(trace, default_prog_name);
2806		if (prog != NULL)
2807			goto out_found;
2808		if (sc->fmt && sc->fmt->alias) {
2809			scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias);
2810			prog = trace__find_bpf_program_by_title(trace, default_prog_name);
2811			if (prog != NULL)
2812				goto out_found;
2813		}
2814		goto out_unaugmented;
2815	}
2816
2817	prog = trace__find_bpf_program_by_title(trace, prog_name);
2818
2819	if (prog != NULL) {
2820out_found:
2821		return prog;
2822	}
2823
2824	pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
2825		 prog_name, type, sc->name);
2826out_unaugmented:
2827	return trace->syscalls.unaugmented_prog;
2828}
2829
2830static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
2831{
2832	struct syscall *sc = trace__syscall_info(trace, NULL, id);
2833
2834	if (sc == NULL)
2835		return;
2836
2837	sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_enter : NULL, "enter");
2838	sc->bpf_prog.sys_exit  = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit  : NULL,  "exit");
2839}
2840
2841static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
2842{
2843	struct syscall *sc = trace__syscall_info(trace, NULL, id);
2844	return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
2845}
2846
2847static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
2848{
2849	struct syscall *sc = trace__syscall_info(trace, NULL, id);
2850	return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
2851}
2852
2853static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
2854{
2855	struct syscall *sc = trace__syscall_info(trace, NULL, id);
2856	int arg = 0;
2857
2858	if (sc == NULL)
2859		goto out;
2860
2861	for (; arg < sc->nr_args; ++arg) {
2862		entry->string_args_len[arg] = 0;
2863		if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) {
2864			/* Should be set like strace -s strsize */
2865			entry->string_args_len[arg] = PATH_MAX;
2866		}
2867	}
2868out:
2869	for (; arg < 6; ++arg)
2870		entry->string_args_len[arg] = 0;
2871}
2872static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
2873{
2874	int fd = bpf_map__fd(trace->syscalls.map);
2875	struct bpf_map_syscall_entry value = {
2876		.enabled = !trace->not_ev_qualifier,
2877	};
2878	int err = 0;
2879	size_t i;
2880
2881	for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
2882		int key = trace->ev_qualifier_ids.entries[i];
2883
2884		if (value.enabled) {
2885			trace__init_bpf_map_syscall_args(trace, key, &value);
2886			trace__init_syscall_bpf_progs(trace, key);
2887		}
2888
2889		err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
2890		if (err)
2891			break;
2892	}
2893
2894	return err;
2895}
2896
2897static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
2898{
2899	int fd = bpf_map__fd(trace->syscalls.map);
2900	struct bpf_map_syscall_entry value = {
2901		.enabled = enabled,
2902	};
2903	int err = 0, key;
2904
2905	for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
2906		if (enabled)
2907			trace__init_bpf_map_syscall_args(trace, key, &value);
2908
2909		err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
2910		if (err)
2911			break;
2912	}
2913
2914	return err;
2915}
2916
2917static int trace__init_syscalls_bpf_map(struct trace *trace)
2918{
2919	bool enabled = true;
2920
2921	if (trace->ev_qualifier_ids.nr)
2922		enabled = trace->not_ev_qualifier;
2923
2924	return __trace__init_syscalls_bpf_map(trace, enabled);
2925}
2926
2927static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
2928{
2929	struct tep_format_field *field, *candidate_field;
2930	int id;
2931
2932	/*
2933	 * We're only interested in syscalls that have a pointer:
2934	 */
2935	for (field = sc->args; field; field = field->next) {
2936		if (field->flags & TEP_FIELD_IS_POINTER)
2937			goto try_to_find_pair;
2938	}
2939
2940	return NULL;
2941
2942try_to_find_pair:
2943	for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) {
2944		struct syscall *pair = trace__syscall_info(trace, NULL, id);
2945		struct bpf_program *pair_prog;
2946		bool is_candidate = false;
2947
2948		if (pair == NULL || pair == sc ||
2949		    pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
2950			continue;
2951
2952		for (field = sc->args, candidate_field = pair->args;
2953		     field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
2954			bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
2955			     candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
2956
2957			if (is_pointer) {
2958			       if (!candidate_is_pointer) {
2959					// The candidate just doesn't copies our pointer arg, might copy other pointers we want.
2960					continue;
2961			       }
2962			} else {
2963				if (candidate_is_pointer) {
2964					// The candidate might copy a pointer we don't have, skip it.
2965					goto next_candidate;
2966				}
2967				continue;
2968			}
2969
2970			if (strcmp(field->type, candidate_field->type))
2971				goto next_candidate;
2972
2973			is_candidate = true;
2974		}
2975
2976		if (!is_candidate)
2977			goto next_candidate;
2978
2979		/*
2980		 * Check if the tentative pair syscall augmenter has more pointers, if it has,
2981		 * then it may be collecting that and we then can't use it, as it would collect
2982		 * more than what is common to the two syscalls.
2983		 */
2984		if (candidate_field) {
2985			for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next)
2986				if (candidate_field->flags & TEP_FIELD_IS_POINTER)
2987					goto next_candidate;
2988		}
2989
2990		pair_prog = pair->bpf_prog.sys_enter;
2991		/*
2992		 * If the pair isn't enabled, then its bpf_prog.sys_enter will not
2993		 * have been searched for, so search it here and if it returns the
2994		 * unaugmented one, then ignore it, otherwise we'll reuse that BPF
2995		 * program for a filtered syscall on a non-filtered one.
2996		 *
2997		 * For instance, we have "!syscalls:sys_enter_renameat" and that is
2998		 * useful for "renameat2".
2999		 */
3000		if (pair_prog == NULL) {
3001			pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3002			if (pair_prog == trace->syscalls.unaugmented_prog)
3003				goto next_candidate;
3004		}
3005
3006		pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
3007		return pair_prog;
3008	next_candidate:
3009		continue;
3010	}
3011
3012	return NULL;
3013}
3014
3015static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
3016{
3017	int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
3018	    map_exit_fd  = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
3019	int err = 0, key;
3020
3021	for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3022		int prog_fd;
3023
3024		if (!trace__syscall_enabled(trace, key))
3025			continue;
3026
3027		trace__init_syscall_bpf_progs(trace, key);
3028
3029		// It'll get at least the "!raw_syscalls:unaugmented"
3030		prog_fd = trace__bpf_prog_sys_enter_fd(trace, key);
3031		err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3032		if (err)
3033			break;
3034		prog_fd = trace__bpf_prog_sys_exit_fd(trace, key);
3035		err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
3036		if (err)
3037			break;
3038	}
3039
3040	/*
3041	 * Now lets do a second pass looking for enabled syscalls without
3042	 * an augmenter that have a signature that is a superset of another
3043	 * syscall with an augmenter so that we can auto-reuse it.
3044	 *
3045	 * I.e. if we have an augmenter for the "open" syscall that has
3046	 * this signature:
3047	 *
3048	 *   int open(const char *pathname, int flags, mode_t mode);
3049	 *
3050	 * I.e. that will collect just the first string argument, then we
3051	 * can reuse it for the 'creat' syscall, that has this signature:
3052	 *
3053	 *   int creat(const char *pathname, mode_t mode);
3054	 *
3055	 * and for:
3056	 *
3057	 *   int stat(const char *pathname, struct stat *statbuf);
3058	 *   int lstat(const char *pathname, struct stat *statbuf);
3059	 *
3060	 * Because the 'open' augmenter will collect the first arg as a string,
3061	 * and leave alone all the other args, which already helps with
3062	 * beautifying 'stat' and 'lstat''s pathname arg.
3063	 *
3064	 * Then, in time, when 'stat' gets an augmenter that collects both
3065	 * first and second arg (this one on the raw_syscalls:sys_exit prog
3066	 * array tail call, then that one will be used.
3067	 */
3068	for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3069		struct syscall *sc = trace__syscall_info(trace, NULL, key);
3070		struct bpf_program *pair_prog;
3071		int prog_fd;
3072
3073		if (sc == NULL || sc->bpf_prog.sys_enter == NULL)
3074			continue;
3075
3076		/*
3077		 * For now we're just reusing the sys_enter prog, and if it
3078		 * already has an augmenter, we don't need to find one.
3079		 */
3080		if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
3081			continue;
3082
3083		/*
3084		 * Look at all the other syscalls for one that has a signature
3085		 * that is close enough that we can share:
3086		 */
3087		pair_prog = trace__find_usable_bpf_prog_entry(trace, sc);
3088		if (pair_prog == NULL)
3089			continue;
3090
3091		sc->bpf_prog.sys_enter = pair_prog;
3092
3093		/*
3094		 * Update the BPF_MAP_TYPE_PROG_SHARED for raw_syscalls:sys_enter
3095		 * with the fd for the program we're reusing:
3096		 */
3097		prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter);
3098		err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3099		if (err)
3100			break;
3101	}
3102
3103
3104	return err;
3105}
3106#else
3107static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
3108{
3109	return 0;
3110}
3111
3112static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused)
3113{
3114	return 0;
3115}
3116
3117static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused,
3118							    const char *name __maybe_unused)
3119{
3120	return NULL;
3121}
3122
3123static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
3124{
3125	return 0;
3126}
3127#endif // HAVE_LIBBPF_SUPPORT
3128
3129static int trace__set_ev_qualifier_filter(struct trace *trace)
3130{
3131	if (trace->syscalls.map)
3132		return trace__set_ev_qualifier_bpf_filter(trace);
3133	if (trace->syscalls.events.sys_enter)
3134		return trace__set_ev_qualifier_tp_filter(trace);
3135	return 0;
3136}
3137
3138static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused,
3139				    size_t npids __maybe_unused, pid_t *pids __maybe_unused)
3140{
3141	int err = 0;
3142#ifdef HAVE_LIBBPF_SUPPORT
3143	bool value = true;
3144	int map_fd = bpf_map__fd(map);
3145	size_t i;
3146
3147	for (i = 0; i < npids; ++i) {
3148		err = bpf_map_update_elem(map_fd, &pids[i], &value, BPF_ANY);
3149		if (err)
3150			break;
3151	}
3152#endif
3153	return err;
3154}
3155
3156static int trace__set_filter_loop_pids(struct trace *trace)
3157{
3158	unsigned int nr = 1, err;
3159	pid_t pids[32] = {
3160		getpid(),
3161	};
3162	struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
3163
3164	while (thread && nr < ARRAY_SIZE(pids)) {
3165		struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
3166
3167		if (parent == NULL)
3168			break;
3169
3170		if (!strcmp(thread__comm_str(parent), "sshd") ||
3171		    strstarts(thread__comm_str(parent), "gnome-terminal")) {
3172			pids[nr++] = parent->tid;
3173			break;
3174		}
3175		thread = parent;
3176	}
3177
3178	err = perf_evlist__set_tp_filter_pids(trace->evlist, nr, pids);
3179	if (!err && trace->filter_pids.map)
3180		err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids);
3181
3182	return err;
3183}
3184
3185static int trace__set_filter_pids(struct trace *trace)
3186{
3187	int err = 0;
3188	/*
3189	 * Better not use !target__has_task() here because we need to cover the
3190	 * case where no threads were specified in the command line, but a
3191	 * workload was, and in that case we will fill in the thread_map when
3192	 * we fork the workload in perf_evlist__prepare_workload.
3193	 */
3194	if (trace->filter_pids.nr > 0) {
3195		err = perf_evlist__set_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
3196						      trace->filter_pids.entries);
3197		if (!err && trace->filter_pids.map) {
3198			err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
3199						       trace->filter_pids.entries);
3200		}
3201	} else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) {
3202		err = trace__set_filter_loop_pids(trace);
3203	}
3204
3205	return err;
3206}
3207
3208static int __trace__deliver_event(struct trace *trace, union perf_event *event)
3209{
3210	struct evlist *evlist = trace->evlist;
3211	struct perf_sample sample;
3212	int err;
3213
3214	err = perf_evlist__parse_sample(evlist, event, &sample);
3215	if (err)
3216		fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
3217	else
3218		trace__handle_event(trace, event, &sample);
3219
3220	return 0;
3221}
3222
3223static int __trace__flush_events(struct trace *trace)
3224{
3225	u64 first = ordered_events__first_time(&trace->oe.data);
3226	u64 flush = trace->oe.last - NSEC_PER_SEC;
3227
3228	/* Is there some thing to flush.. */
3229	if (first && first < flush)
3230		return ordered_events__flush_time(&trace->oe.data, flush);
3231
3232	return 0;
3233}
3234
3235static int trace__flush_events(struct trace *trace)
3236{
3237	return !trace->sort_events ? 0 : __trace__flush_events(trace);
3238}
3239
3240static int trace__deliver_event(struct trace *trace, union perf_event *event)
3241{
3242	int err;
3243
3244	if (!trace->sort_events)
3245		return __trace__deliver_event(trace, event);
3246
3247	err = perf_evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
3248	if (err && err != -1)
3249		return err;
3250
3251	err = ordered_events__queue(&trace->oe.data, event, trace->oe.last, 0);
3252	if (err)
3253		return err;
3254
3255	return trace__flush_events(trace);
3256}
3257
3258static int ordered_events__deliver_event(struct ordered_events *oe,
3259					 struct ordered_event *event)
3260{
3261	struct trace *trace = container_of(oe, struct trace, oe.data);
3262
3263	return __trace__deliver_event(trace, event->event);
3264}
3265
3266static int trace__run(struct trace *trace, int argc, const char **argv)
3267{
3268	struct evlist *evlist = trace->evlist;
3269	struct evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
3270	int err = -1, i;
3271	unsigned long before;
3272	const bool forks = argc > 0;
3273	bool draining = false;
3274
3275	trace->live = true;
3276
3277	if (!trace->raw_augmented_syscalls) {
3278		if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
3279			goto out_error_raw_syscalls;
3280
3281		if (trace->trace_syscalls)
3282			trace->vfs_getname = evlist__add_vfs_getname(evlist);
3283	}
3284
3285	if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
3286		pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
3287		if (pgfault_maj == NULL)
3288			goto out_error_mem;
3289		perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
3290		evlist__add(evlist, pgfault_maj);
3291	}
3292
3293	if ((trace->trace_pgfaults & TRACE_PFMIN)) {
3294		pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
3295		if (pgfault_min == NULL)
3296			goto out_error_mem;
3297		perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
3298		evlist__add(evlist, pgfault_min);
3299	}
3300
 
 
 
 
3301	if (trace->sched &&
3302	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
3303				   trace__sched_stat_runtime))
3304		goto out_error_sched_stat_runtime;
3305
3306	/*
3307	 * If a global cgroup was set, apply it to all the events without an
3308	 * explicit cgroup. I.e.:
3309	 *
3310	 * 	trace -G A -e sched:*switch
3311	 *
3312	 * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
3313	 * _and_ sched:sched_switch to the 'A' cgroup, while:
3314	 *
3315	 * trace -e sched:*switch -G A
3316	 *
3317	 * will only set the sched:sched_switch event to the 'A' cgroup, all the
3318	 * other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
3319	 * a cgroup (on the root cgroup, sys wide, etc).
3320	 *
3321	 * Multiple cgroups:
3322	 *
3323	 * trace -G A -e sched:*switch -G B
3324	 *
3325	 * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
3326	 * to the 'B' cgroup.
3327	 *
3328	 * evlist__set_default_cgroup() grabs a reference of the passed cgroup
3329	 * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL.
3330	 */
3331	if (trace->cgroup)
3332		evlist__set_default_cgroup(trace->evlist, trace->cgroup);
3333
3334	err = perf_evlist__create_maps(evlist, &trace->opts.target);
3335	if (err < 0) {
3336		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
3337		goto out_delete_evlist;
3338	}
3339
3340	err = trace__symbols_init(trace, evlist);
3341	if (err < 0) {
3342		fprintf(trace->output, "Problems initializing symbol libraries!\n");
3343		goto out_delete_evlist;
3344	}
3345
3346	perf_evlist__config(evlist, &trace->opts, &callchain_param);
3347
3348	signal(SIGCHLD, sig_handler);
3349	signal(SIGINT, sig_handler);
3350
3351	if (forks) {
3352		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
3353						    argv, false, NULL);
3354		if (err < 0) {
3355			fprintf(trace->output, "Couldn't run the workload!\n");
3356			goto out_delete_evlist;
3357		}
3358	}
3359
3360	err = evlist__open(evlist);
3361	if (err < 0)
3362		goto out_error_open;
3363
3364	err = bpf__apply_obj_config();
3365	if (err) {
3366		char errbuf[BUFSIZ];
3367
3368		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
3369		pr_err("ERROR: Apply config to BPF failed: %s\n",
3370			 errbuf);
3371		goto out_error_open;
3372	}
3373
3374	err = trace__set_filter_pids(trace);
 
 
 
 
 
 
 
 
 
 
3375	if (err < 0)
3376		goto out_error_mem;
3377
3378	if (trace->syscalls.map)
3379		trace__init_syscalls_bpf_map(trace);
3380
3381	if (trace->syscalls.prog_array.sys_enter)
3382		trace__init_syscalls_bpf_prog_array_maps(trace);
3383
3384	if (trace->ev_qualifier_ids.nr > 0) {
3385		err = trace__set_ev_qualifier_filter(trace);
3386		if (err < 0)
3387			goto out_errno;
3388
3389		if (trace->syscalls.events.sys_exit) {
3390			pr_debug("event qualifier tracepoint filter: %s\n",
3391				 trace->syscalls.events.sys_exit->filter);
3392		}
3393	}
3394
3395	/*
3396	 * If the "close" syscall is not traced, then we will not have the
3397	 * opportunity to, in syscall_arg__scnprintf_close_fd() invalidate the
3398	 * fd->pathname table and were ending up showing the last value set by
3399	 * syscalls opening a pathname and associating it with a descriptor or
3400	 * reading it from /proc/pid/fd/ in cases where that doesn't make
3401	 * sense.
3402	 *
3403	 *  So just disable this beautifier (SCA_FD, SCA_FDAT) when 'close' is
3404	 *  not in use.
3405	 */
3406	trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close"));
3407
3408	err = perf_evlist__apply_filters(evlist, &evsel);
3409	if (err < 0)
3410		goto out_error_apply_filters;
3411
3412	if (trace->dump.map)
3413		bpf_map__fprintf(trace->dump.map, trace->output);
3414
3415	err = evlist__mmap(evlist, trace->opts.mmap_pages);
3416	if (err < 0)
3417		goto out_error_mmap;
3418
3419	if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
3420		evlist__enable(evlist);
3421
3422	if (forks)
3423		perf_evlist__start_workload(evlist);
3424
3425	if (trace->opts.initial_delay) {
3426		usleep(trace->opts.initial_delay * 1000);
3427		evlist__enable(evlist);
3428	}
3429
3430	trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
3431				  evlist->core.threads->nr > 1 ||
3432				  evlist__first(evlist)->core.attr.inherit;
3433
3434	/*
3435	 * Now that we already used evsel->core.attr to ask the kernel to setup the
3436	 * events, lets reuse evsel->core.attr.sample_max_stack as the limit in
3437	 * trace__resolve_callchain(), allowing per-event max-stack settings
3438	 * to override an explicitly set --max-stack global setting.
3439	 */
3440	evlist__for_each_entry(evlist, evsel) {
3441		if (evsel__has_callchain(evsel) &&
3442		    evsel->core.attr.sample_max_stack == 0)
3443			evsel->core.attr.sample_max_stack = trace->max_stack;
3444	}
3445again:
3446	before = trace->nr_events;
3447
3448	for (i = 0; i < evlist->core.nr_mmaps; i++) {
3449		union perf_event *event;
3450		struct mmap *md;
3451
3452		md = &evlist->mmap[i];
3453		if (perf_mmap__read_init(md) < 0)
3454			continue;
3455
3456		while ((event = perf_mmap__read_event(md)) != NULL) {
3457			++trace->nr_events;
3458
3459			err = trace__deliver_event(trace, event);
3460			if (err)
3461				goto out_disable;
 
 
3462
3463			perf_mmap__consume(md);
 
 
3464
3465			if (interrupted)
3466				goto out_disable;
3467
3468			if (done && !draining) {
3469				evlist__disable(evlist);
3470				draining = true;
3471			}
3472		}
3473		perf_mmap__read_done(md);
3474	}
3475
3476	if (trace->nr_events == before) {
3477		int timeout = done ? 100 : -1;
3478
3479		if (!draining && evlist__poll(evlist, timeout) > 0) {
3480			if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
3481				draining = true;
3482
3483			goto again;
3484		} else {
3485			if (trace__flush_events(trace))
3486				goto out_disable;
3487		}
3488	} else {
3489		goto again;
3490	}
3491
3492out_disable:
3493	thread__zput(trace->current);
3494
3495	evlist__disable(evlist);
3496
3497	if (trace->sort_events)
3498		ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
3499
3500	if (!err) {
3501		if (trace->summary)
3502			trace__fprintf_thread_summary(trace, trace->output);
3503
3504		if (trace->show_tool_stats) {
3505			fprintf(trace->output, "Stats:\n "
3506					       " vfs_getname : %" PRIu64 "\n"
3507					       " proc_getname: %" PRIu64 "\n",
3508				trace->stats.vfs_getname,
3509				trace->stats.proc_getname);
3510		}
3511	}
3512
3513out_delete_evlist:
3514	trace__symbols__exit(trace);
3515
3516	evlist__delete(evlist);
3517	cgroup__put(trace->cgroup);
3518	trace->evlist = NULL;
3519	trace->live = false;
3520	return err;
3521{
3522	char errbuf[BUFSIZ];
3523
3524out_error_sched_stat_runtime:
3525	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
3526	goto out_error;
3527
3528out_error_raw_syscalls:
3529	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
3530	goto out_error;
3531
3532out_error_mmap:
3533	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
3534	goto out_error;
3535
3536out_error_open:
3537	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
3538
3539out_error:
3540	fprintf(trace->output, "%s\n", errbuf);
3541	goto out_delete_evlist;
3542
3543out_error_apply_filters:
3544	fprintf(trace->output,
3545		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
3546		evsel->filter, perf_evsel__name(evsel), errno,
3547		str_error_r(errno, errbuf, sizeof(errbuf)));
3548	goto out_delete_evlist;
3549}
3550out_error_mem:
3551	fprintf(trace->output, "Not enough memory to run!\n");
3552	goto out_delete_evlist;
3553
3554out_errno:
3555	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
3556	goto out_delete_evlist;
3557}
3558
3559static int trace__replay(struct trace *trace)
3560{
3561	const struct evsel_str_handler handlers[] = {
3562		{ "probe:vfs_getname",	     trace__vfs_getname, },
3563	};
3564	struct perf_data data = {
3565		.path  = input_name,
3566		.mode  = PERF_DATA_MODE_READ,
3567		.force = trace->force,
3568	};
3569	struct perf_session *session;
3570	struct evsel *evsel;
3571	int err = -1;
3572
3573	trace->tool.sample	  = trace__process_sample;
3574	trace->tool.mmap	  = perf_event__process_mmap;
3575	trace->tool.mmap2	  = perf_event__process_mmap2;
3576	trace->tool.comm	  = perf_event__process_comm;
3577	trace->tool.exit	  = perf_event__process_exit;
3578	trace->tool.fork	  = perf_event__process_fork;
3579	trace->tool.attr	  = perf_event__process_attr;
3580	trace->tool.tracing_data  = perf_event__process_tracing_data;
3581	trace->tool.build_id	  = perf_event__process_build_id;
3582	trace->tool.namespaces	  = perf_event__process_namespaces;
3583
3584	trace->tool.ordered_events = true;
3585	trace->tool.ordering_requires_timestamps = true;
3586
3587	/* add tid to output */
3588	trace->multiple_threads = true;
3589
3590	session = perf_session__new(&data, false, &trace->tool);
3591	if (IS_ERR(session))
3592		return PTR_ERR(session);
3593
3594	if (trace->opts.target.pid)
3595		symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
3596
3597	if (trace->opts.target.tid)
3598		symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
3599
3600	if (symbol__init(&session->header.env) < 0)
3601		goto out;
3602
3603	trace->host = &session->machines.host;
3604
3605	err = perf_session__set_tracepoints_handlers(session, handlers);
3606	if (err)
3607		goto out;
3608
3609	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
3610						     "raw_syscalls:sys_enter");
3611	/* older kernels have syscalls tp versus raw_syscalls */
3612	if (evsel == NULL)
3613		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
3614							     "syscalls:sys_enter");
3615
3616	if (evsel &&
3617	    (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
3618	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
3619		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
3620		goto out;
3621	}
3622
3623	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
3624						     "raw_syscalls:sys_exit");
3625	if (evsel == NULL)
3626		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
3627							     "syscalls:sys_exit");
3628	if (evsel &&
3629	    (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
3630	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
3631		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
3632		goto out;
3633	}
3634
3635	evlist__for_each_entry(session->evlist, evsel) {
3636		if (evsel->core.attr.type == PERF_TYPE_SOFTWARE &&
3637		    (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
3638		     evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
3639		     evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS))
3640			evsel->handler = trace__pgfault;
3641	}
3642
 
 
 
 
3643	setup_pager();
3644
3645	err = perf_session__process_events(session);
3646	if (err)
3647		pr_err("Failed to process events, error %d", err);
3648
3649	else if (trace->summary)
3650		trace__fprintf_thread_summary(trace, trace->output);
3651
3652out:
3653	perf_session__delete(session);
3654
3655	return err;
3656}
3657
3658static size_t trace__fprintf_threads_header(FILE *fp)
3659{
3660	size_t printed;
3661
3662	printed  = fprintf(fp, "\n Summary of events:\n\n");
3663
3664	return printed;
3665}
3666
3667DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
3668	struct stats 	*stats;
3669	double		msecs;
3670	int		syscall;
3671)
3672{
3673	struct int_node *source = rb_entry(nd, struct int_node, rb_node);
3674	struct stats *stats = source->priv;
3675
3676	entry->syscall = source->i;
3677	entry->stats   = stats;
3678	entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
3679}
3680
3681static size_t thread__dump_stats(struct thread_trace *ttrace,
3682				 struct trace *trace, FILE *fp)
3683{
 
3684	size_t printed = 0;
3685	struct syscall *sc;
3686	struct rb_node *nd;
3687	DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
3688
3689	if (syscall_stats == NULL)
3690		return 0;
3691
3692	printed += fprintf(fp, "\n");
3693
3694	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
3695	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
3696	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
3697
3698	resort_rb__for_each_entry(nd, syscall_stats) {
3699		struct stats *stats = syscall_stats_entry->stats;
 
3700		if (stats) {
3701			double min = (double)(stats->min) / NSEC_PER_MSEC;
3702			double max = (double)(stats->max) / NSEC_PER_MSEC;
3703			double avg = avg_stats(stats);
3704			double pct;
3705			u64 n = (u64) stats->n;
3706
3707			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
3708			avg /= NSEC_PER_MSEC;
3709
3710			sc = &trace->syscalls.table[syscall_stats_entry->syscall];
3711			printed += fprintf(fp, "   %-15s", sc->name);
3712			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
3713					   n, syscall_stats_entry->msecs, min, avg);
3714			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
3715		}
 
 
3716	}
3717
3718	resort_rb__delete(syscall_stats);
3719	printed += fprintf(fp, "\n\n");
3720
3721	return printed;
3722}
3723
3724static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
 
 
 
 
 
 
 
3725{
3726	size_t printed = 0;
 
 
 
3727	struct thread_trace *ttrace = thread__priv(thread);
3728	double ratio;
3729
3730	if (ttrace == NULL)
3731		return 0;
3732
3733	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
3734
3735	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
3736	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
3737	printed += fprintf(fp, "%.1f%%", ratio);
3738	if (ttrace->pfmaj)
3739		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
3740	if (ttrace->pfmin)
3741		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
3742	if (trace->sched)
3743		printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
3744	else if (fputc('\n', fp) != EOF)
3745		++printed;
3746
3747	printed += thread__dump_stats(ttrace, trace, fp);
3748
3749	return printed;
3750}
3751
3752static unsigned long thread__nr_events(struct thread_trace *ttrace)
3753{
3754	return ttrace ? ttrace->nr_events : 0;
3755}
3756
3757DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
3758	struct thread *thread;
3759)
3760{
3761	entry->thread = rb_entry(nd, struct thread, rb_node);
3762}
3763
3764static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
3765{
3766	size_t printed = trace__fprintf_threads_header(fp);
3767	struct rb_node *nd;
3768	int i;
3769
3770	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
3771		DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
3772
3773		if (threads == NULL) {
3774			fprintf(fp, "%s", "Error sorting output by nr_events!\n");
3775			return 0;
3776		}
3777
3778		resort_rb__for_each_entry(nd, threads)
3779			printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
3780
3781		resort_rb__delete(threads);
3782	}
3783	return printed;
3784}
3785
3786static int trace__set_duration(const struct option *opt, const char *str,
3787			       int unset __maybe_unused)
3788{
3789	struct trace *trace = opt->value;
3790
3791	trace->duration_filter = atof(str);
3792	return 0;
3793}
3794
3795static int trace__set_filter_pids_from_option(const struct option *opt, const char *str,
3796					      int unset __maybe_unused)
3797{
3798	int ret = -1;
3799	size_t i;
3800	struct trace *trace = opt->value;
3801	/*
3802	 * FIXME: introduce a intarray class, plain parse csv and create a
3803	 * { int nr, int entries[] } struct...
3804	 */
3805	struct intlist *list = intlist__new(str);
3806
3807	if (list == NULL)
3808		return -1;
3809
3810	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
3811	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
3812
3813	if (trace->filter_pids.entries == NULL)
3814		goto out;
3815
3816	trace->filter_pids.entries[0] = getpid();
3817
3818	for (i = 1; i < trace->filter_pids.nr; ++i)
3819		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
3820
3821	intlist__delete(list);
3822	ret = 0;
3823out:
3824	return ret;
3825}
3826
3827static int trace__open_output(struct trace *trace, const char *filename)
3828{
3829	struct stat st;
3830
3831	if (!stat(filename, &st) && st.st_size) {
3832		char oldname[PATH_MAX];
3833
3834		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
3835		unlink(oldname);
3836		rename(filename, oldname);
3837	}
3838
3839	trace->output = fopen(filename, "w");
3840
3841	return trace->output == NULL ? -errno : 0;
3842}
3843
3844static int parse_pagefaults(const struct option *opt, const char *str,
3845			    int unset __maybe_unused)
3846{
3847	int *trace_pgfaults = opt->value;
3848
3849	if (strcmp(str, "all") == 0)
3850		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3851	else if (strcmp(str, "maj") == 0)
3852		*trace_pgfaults |= TRACE_PFMAJ;
3853	else if (strcmp(str, "min") == 0)
3854		*trace_pgfaults |= TRACE_PFMIN;
3855	else
3856		return -1;
3857
3858	return 0;
3859}
3860
3861static void evlist__set_evsel_handler(struct evlist *evlist, void *handler)
3862{
3863	struct evsel *evsel;
3864
3865	evlist__for_each_entry(evlist, evsel)
3866		evsel->handler = handler;
3867}
3868
3869static int evlist__set_syscall_tp_fields(struct evlist *evlist)
3870{
3871	struct evsel *evsel;
3872
3873	evlist__for_each_entry(evlist, evsel) {
3874		if (evsel->priv || !evsel->tp_format)
3875			continue;
3876
3877		if (strcmp(evsel->tp_format->system, "syscalls"))
3878			continue;
3879
3880		if (perf_evsel__init_syscall_tp(evsel))
3881			return -1;
3882
3883		if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
3884			struct syscall_tp *sc = evsel->priv;
3885
3886			if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
3887				return -1;
3888		} else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
3889			struct syscall_tp *sc = evsel->priv;
3890
3891			if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
3892				return -1;
3893		}
3894	}
3895
3896	return 0;
3897}
3898
3899/*
3900 * XXX: Hackish, just splitting the combined -e+--event (syscalls
3901 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
3902 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
3903 *
3904 * It'd be better to introduce a parse_options() variant that would return a
3905 * list with the terms it didn't match to an event...
3906 */
3907static int trace__parse_events_option(const struct option *opt, const char *str,
3908				      int unset __maybe_unused)
3909{
3910	struct trace *trace = (struct trace *)opt->value;
3911	const char *s = str;
3912	char *sep = NULL, *lists[2] = { NULL, NULL, };
3913	int len = strlen(str) + 1, err = -1, list, idx;
3914	char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
3915	char group_name[PATH_MAX];
3916	struct syscall_fmt *fmt;
3917
3918	if (strace_groups_dir == NULL)
3919		return -1;
3920
3921	if (*s == '!') {
3922		++s;
3923		trace->not_ev_qualifier = true;
3924	}
3925
3926	while (1) {
3927		if ((sep = strchr(s, ',')) != NULL)
3928			*sep = '\0';
3929
3930		list = 0;
3931		if (syscalltbl__id(trace->sctbl, s) >= 0 ||
3932		    syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
3933			list = 1;
3934			goto do_concat;
3935		}
3936
3937		fmt = syscall_fmt__find_by_alias(s);
3938		if (fmt != NULL) {
3939			list = 1;
3940			s = fmt->name;
3941		} else {
3942			path__join(group_name, sizeof(group_name), strace_groups_dir, s);
3943			if (access(group_name, R_OK) == 0)
3944				list = 1;
3945		}
3946do_concat:
3947		if (lists[list]) {
3948			sprintf(lists[list] + strlen(lists[list]), ",%s", s);
3949		} else {
3950			lists[list] = malloc(len);
3951			if (lists[list] == NULL)
3952				goto out;
3953			strcpy(lists[list], s);
3954		}
3955
3956		if (!sep)
3957			break;
3958
3959		*sep = ',';
3960		s = sep + 1;
3961	}
3962
3963	if (lists[1] != NULL) {
3964		struct strlist_config slist_config = {
3965			.dirname = strace_groups_dir,
3966		};
3967
3968		trace->ev_qualifier = strlist__new(lists[1], &slist_config);
3969		if (trace->ev_qualifier == NULL) {
3970			fputs("Not enough memory to parse event qualifier", trace->output);
3971			goto out;
3972		}
3973
3974		if (trace__validate_ev_qualifier(trace))
3975			goto out;
3976		trace->trace_syscalls = true;
3977	}
3978
3979	err = 0;
3980
3981	if (lists[0]) {
3982		struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
3983					       "event selector. use 'perf list' to list available events",
3984					       parse_events_option);
3985		err = parse_events_option(&o, lists[0], 0);
3986	}
3987out:
3988	if (sep)
3989		*sep = ',';
3990
3991	return err;
3992}
3993
3994static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
3995{
3996	struct trace *trace = opt->value;
3997
3998	if (!list_empty(&trace->evlist->core.entries))
3999		return parse_cgroups(opt, str, unset);
4000
4001	trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
4002
4003	return 0;
4004}
4005
4006static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
4007{
4008	if (trace->bpf_obj == NULL)
4009		return NULL;
4010
4011	return bpf_object__find_map_by_name(trace->bpf_obj, name);
4012}
4013
4014static void trace__set_bpf_map_filtered_pids(struct trace *trace)
4015{
4016	trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
4017}
4018
4019static void trace__set_bpf_map_syscalls(struct trace *trace)
4020{
4021	trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
4022	trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
4023	trace->syscalls.prog_array.sys_exit  = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
4024}
4025
4026static int trace__config(const char *var, const char *value, void *arg)
4027{
4028	struct trace *trace = arg;
4029	int err = 0;
4030
4031	if (!strcmp(var, "trace.add_events")) {
4032		struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
4033					       "event selector. use 'perf list' to list available events",
4034					       parse_events_option);
4035		/*
4036		 * We can't propagate parse_event_option() return, as it is 1
4037		 * for failure while perf_config() expects -1.
4038		 */
4039		if (parse_events_option(&o, value, 0))
4040			err = -1;
4041	} else if (!strcmp(var, "trace.show_timestamp")) {
4042		trace->show_tstamp = perf_config_bool(var, value);
4043	} else if (!strcmp(var, "trace.show_duration")) {
4044		trace->show_duration = perf_config_bool(var, value);
4045	} else if (!strcmp(var, "trace.show_arg_names")) {
4046		trace->show_arg_names = perf_config_bool(var, value);
4047		if (!trace->show_arg_names)
4048			trace->show_zeros = true;
4049	} else if (!strcmp(var, "trace.show_zeros")) {
4050		bool new_show_zeros = perf_config_bool(var, value);
4051		if (!trace->show_arg_names && !new_show_zeros) {
4052			pr_warning("trace.show_zeros has to be set when trace.show_arg_names=no\n");
4053			goto out;
4054		}
4055		trace->show_zeros = new_show_zeros;
4056	} else if (!strcmp(var, "trace.show_prefix")) {
4057		trace->show_string_prefix = perf_config_bool(var, value);
4058	} else if (!strcmp(var, "trace.no_inherit")) {
4059		trace->opts.no_inherit = perf_config_bool(var, value);
4060	} else if (!strcmp(var, "trace.args_alignment")) {
4061		int args_alignment = 0;
4062		if (perf_config_int(&args_alignment, var, value) == 0)
4063			trace->args_alignment = args_alignment;
4064	}
4065out:
4066	return err;
4067}
4068
4069int cmd_trace(int argc, const char **argv)
4070{
4071	const char *trace_usage[] = {
4072		"perf trace [<options>] [<command>]",
4073		"perf trace [<options>] -- <command> [<options>]",
4074		"perf trace record [<options>] [<command>]",
4075		"perf trace record [<options>] -- <command> [<options>]",
4076		NULL
4077	};
4078	struct trace trace = {
 
 
 
 
 
 
 
4079		.opts = {
4080			.target = {
4081				.uid	   = UINT_MAX,
4082				.uses_mmap = true,
4083			},
4084			.user_freq     = UINT_MAX,
4085			.user_interval = ULLONG_MAX,
4086			.no_buffering  = true,
4087			.mmap_pages    = UINT_MAX,
 
4088		},
4089		.output = stderr,
4090		.show_comm = true,
4091		.show_tstamp = true,
4092		.show_duration = true,
4093		.show_arg_names = true,
4094		.args_alignment = 70,
4095		.trace_syscalls = false,
4096		.kernel_syscallchains = false,
4097		.max_stack = UINT_MAX,
4098		.max_events = ULONG_MAX,
4099	};
4100	const char *map_dump_str = NULL;
4101	const char *output_name = NULL;
 
4102	const struct option trace_options[] = {
4103	OPT_CALLBACK('e', "event", &trace, "event",
4104		     "event/syscall selector. use 'perf list' to list available events",
4105		     trace__parse_events_option),
4106	OPT_BOOLEAN(0, "comm", &trace.show_comm,
4107		    "show the thread COMM next to its id"),
4108	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
4109	OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
4110		     trace__parse_events_option),
4111	OPT_STRING('o', "output", &output_name, "file", "output file name"),
4112	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
4113	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
4114		    "trace events on existing process id"),
4115	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
4116		    "trace events on existing thread id"),
4117	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
4118		     "pids to filter (by the kernel)", trace__set_filter_pids_from_option),
4119	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
4120		    "system-wide collection from all CPUs"),
4121	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
4122		    "list of cpus to monitor"),
4123	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
4124		    "child tasks do not inherit counters"),
4125	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
4126		     "number of mmap data pages",
4127		     perf_evlist__parse_mmap_pages),
4128	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
4129		   "user to profile"),
4130	OPT_CALLBACK(0, "duration", &trace, "float",
4131		     "show only events with duration > N.M ms",
4132		     trace__set_duration),
4133#ifdef HAVE_LIBBPF_SUPPORT
4134	OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"),
4135#endif
4136	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
4137	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4138	OPT_BOOLEAN('T', "time", &trace.full_time,
4139		    "Show full timestamp, not time relative to first start"),
4140	OPT_BOOLEAN(0, "failure", &trace.failure_only,
4141		    "Show only syscalls that failed"),
4142	OPT_BOOLEAN('s', "summary", &trace.summary_only,
4143		    "Show only syscall summary with statistics"),
4144	OPT_BOOLEAN('S', "with-summary", &trace.summary,
4145		    "Show all syscalls and summary with statistics"),
4146	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
4147		     "Trace pagefaults", parse_pagefaults, "maj"),
4148	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
4149	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
4150	OPT_CALLBACK(0, "call-graph", &trace.opts,
4151		     "record_mode[,record_size]", record_callchain_help,
4152		     &record_parse_callchain_opt),
4153	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
4154		    "Show the kernel callchains on the syscall exit path"),
4155	OPT_ULONG(0, "max-events", &trace.max_events,
4156		"Set the maximum number of events to print, exit after that is reached. "),
4157	OPT_UINTEGER(0, "min-stack", &trace.min_stack,
4158		     "Set the minimum stack depth when parsing the callchain, "
4159		     "anything below the specified depth will be ignored."),
4160	OPT_UINTEGER(0, "max-stack", &trace.max_stack,
4161		     "Set the maximum stack depth when parsing the callchain, "
4162		     "anything beyond the specified depth will be ignored. "
4163		     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
4164	OPT_BOOLEAN(0, "sort-events", &trace.sort_events,
4165			"Sort batch of events before processing, use if getting out of order events"),
4166	OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
4167			"print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
4168	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
4169			"per thread proc mmap processing timeout in ms"),
4170	OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
4171		     trace__parse_cgroups),
4172	OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
4173		     "ms to wait before starting measurement after program "
4174		     "start"),
4175	OPTS_EVSWITCH(&trace.evswitch),
4176	OPT_END()
4177	};
4178	bool __maybe_unused max_stack_user_set = true;
4179	bool mmap_pages_user_set = true;
4180	struct evsel *evsel;
4181	const char * const trace_subcommands[] = { "record", NULL };
4182	int err = -1;
4183	char bf[BUFSIZ];
4184
4185	signal(SIGSEGV, sighandler_dump_stack);
4186	signal(SIGFPE, sighandler_dump_stack);
4187
4188	trace.evlist = evlist__new();
4189	trace.sctbl = syscalltbl__new();
4190
4191	if (trace.evlist == NULL || trace.sctbl == NULL) {
4192		pr_err("Not enough memory to run!\n");
4193		err = -ENOMEM;
4194		goto out;
4195	}
4196
4197	/*
4198	 * Parsing .perfconfig may entail creating a BPF event, that may need
4199	 * to create BPF maps, so bump RLIM_MEMLOCK as the default 64K setting
4200	 * is too small. This affects just this process, not touching the
4201	 * global setting. If it fails we'll get something in 'perf trace -v'
4202	 * to help diagnose the problem.
4203	 */
4204	rlimit__bump_memlock();
4205
4206	err = perf_config(trace__config, &trace);
4207	if (err)
4208		goto out;
4209
4210	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
4211				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
4212
4213	if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
4214		usage_with_options_msg(trace_usage, trace_options,
4215				       "cgroup monitoring only available in system-wide mode");
4216	}
4217
4218	evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
4219	if (IS_ERR(evsel)) {
4220		bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
4221		pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
4222		goto out;
4223	}
4224
4225	if (evsel) {
4226		trace.syscalls.events.augmented = evsel;
4227
4228		evsel = perf_evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
4229		if (evsel == NULL) {
4230			pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n");
4231			goto out;
4232		}
4233
4234		if (evsel->bpf_obj == NULL) {
4235			pr_err("ERROR: raw_syscalls:sys_enter not associated to a BPF object\n");
4236			goto out;
4237		}
4238
4239		trace.bpf_obj = evsel->bpf_obj;
4240
4241		trace__set_bpf_map_filtered_pids(&trace);
4242		trace__set_bpf_map_syscalls(&trace);
4243		trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented");
4244	}
4245
4246	err = bpf__setup_stdout(trace.evlist);
4247	if (err) {
4248		bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
4249		pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
4250		goto out;
4251	}
4252
4253	err = -1;
4254
4255	if (map_dump_str) {
4256		trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str);
4257		if (trace.dump.map == NULL) {
4258			pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
4259			goto out;
4260		}
4261	}
4262
4263	if (trace.trace_pgfaults) {
4264		trace.opts.sample_address = true;
4265		trace.opts.sample_time = true;
4266	}
4267
4268	if (trace.opts.mmap_pages == UINT_MAX)
4269		mmap_pages_user_set = false;
4270
4271	if (trace.max_stack == UINT_MAX) {
4272		trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
4273		max_stack_user_set = false;
4274	}
4275
4276#ifdef HAVE_DWARF_UNWIND_SUPPORT
4277	if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
4278		record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
4279	}
4280#endif
4281
4282	if (callchain_param.enabled) {
4283		if (!mmap_pages_user_set && geteuid() == 0)
4284			trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
4285
4286		symbol_conf.use_callchain = true;
4287	}
4288
4289	if (trace.evlist->core.nr_entries > 0) {
4290		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
4291		if (evlist__set_syscall_tp_fields(trace.evlist)) {
4292			perror("failed to set syscalls:* tracepoint fields");
4293			goto out;
4294		}
4295	}
4296
4297	if (trace.sort_events) {
4298		ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace);
4299		ordered_events__set_copy_on_queue(&trace.oe.data, true);
4300	}
4301
4302	/*
4303	 * If we are augmenting syscalls, then combine what we put in the
4304	 * __augmented_syscalls__ BPF map with what is in the
4305	 * syscalls:sys_exit_FOO tracepoints, i.e. just like we do without BPF,
4306	 * combining raw_syscalls:sys_enter with raw_syscalls:sys_exit.
4307	 *
4308	 * We'll switch to look at two BPF maps, one for sys_enter and the
4309	 * other for sys_exit when we start augmenting the sys_exit paths with
4310	 * buffers that are being copied from kernel to userspace, think 'read'
4311	 * syscall.
4312	 */
4313	if (trace.syscalls.events.augmented) {
4314		evlist__for_each_entry(trace.evlist, evsel) {
4315			bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
4316
4317			if (raw_syscalls_sys_exit) {
4318				trace.raw_augmented_syscalls = true;
4319				goto init_augmented_syscall_tp;
4320			}
4321
4322			if (trace.syscalls.events.augmented->priv == NULL &&
4323			    strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) {
4324				struct evsel *augmented = trace.syscalls.events.augmented;
4325				if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) ||
4326				    perf_evsel__init_augmented_syscall_tp_args(augmented))
4327					goto out;
4328				/*
4329				 * Augmented is __augmented_syscalls__ BPF_OUTPUT event
4330				 * Above we made sure we can get from the payload the tp fields
4331				 * that we get from syscalls:sys_enter tracefs format file.
4332				 */
4333				augmented->handler = trace__sys_enter;
4334				/*
4335				 * Now we do the same for the *syscalls:sys_enter event so that
4336				 * if we handle it directly, i.e. if the BPF prog returns 0 so
4337				 * as not to filter it, then we'll handle it just like we would
4338				 * for the BPF_OUTPUT one:
4339				 */
4340				if (perf_evsel__init_augmented_syscall_tp(evsel, evsel) ||
4341				    perf_evsel__init_augmented_syscall_tp_args(evsel))
4342					goto out;
4343				evsel->handler = trace__sys_enter;
4344			}
4345
4346			if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
4347				struct syscall_tp *sc;
4348init_augmented_syscall_tp:
4349				if (perf_evsel__init_augmented_syscall_tp(evsel, evsel))
4350					goto out;
4351				sc = evsel->priv;
4352				/*
4353				 * For now with BPF raw_augmented we hook into
4354				 * raw_syscalls:sys_enter and there we get all
4355				 * 6 syscall args plus the tracepoint common
4356				 * fields and the syscall_nr (another long).
4357				 * So we check if that is the case and if so
4358				 * don't look after the sc->args_size but
4359				 * always after the full raw_syscalls:sys_enter
4360				 * payload, which is fixed.
4361				 *
4362				 * We'll revisit this later to pass
4363				 * s->args_size to the BPF augmenter (now
4364				 * tools/perf/examples/bpf/augmented_raw_syscalls.c,
4365				 * so that it copies only what we need for each
4366				 * syscall, like what happens when we use
4367				 * syscalls:sys_enter_NAME, so that we reduce
4368				 * the kernel/userspace traffic to just what is
4369				 * needed for each syscall.
4370				 */
4371				if (trace.raw_augmented_syscalls)
4372					trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
4373				perf_evsel__init_augmented_syscall_tp_ret(evsel);
4374				evsel->handler = trace__sys_exit;
4375			}
4376		}
4377	}
4378
4379	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
4380		return trace__record(&trace, argc-1, &argv[1]);
4381
4382	/* summary_only implies summary option, but don't overwrite summary if set */
4383	if (trace.summary_only)
4384		trace.summary = trace.summary_only;
4385
4386	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
4387	    trace.evlist->core.nr_entries == 0 /* Was --events used? */) {
4388		trace.trace_syscalls = true;
 
4389	}
4390
4391	if (output_name != NULL) {
4392		err = trace__open_output(&trace, output_name);
4393		if (err < 0) {
4394			perror("failed to create output file");
4395			goto out;
4396		}
4397	}
4398
4399	err = evswitch__init(&trace.evswitch, trace.evlist, stderr);
4400	if (err)
4401		goto out_close;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4402
4403	err = target__validate(&trace.opts.target);
4404	if (err) {
4405		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
4406		fprintf(trace.output, "%s", bf);
4407		goto out_close;
4408	}
4409
4410	err = target__parse_uid(&trace.opts.target);
4411	if (err) {
4412		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
4413		fprintf(trace.output, "%s", bf);
4414		goto out_close;
4415	}
4416
4417	if (!argc && target__none(&trace.opts.target))
4418		trace.opts.target.system_wide = true;
4419
4420	if (input_name)
4421		err = trace__replay(&trace);
4422	else
4423		err = trace__run(&trace, argc, argv);
4424
4425out_close:
4426	if (output_name != NULL)
4427		fclose(trace.output);
4428out:
4429	return err;
4430}
v4.6
   1/*
   2 * builtin-trace.c
   3 *
   4 * Builtin 'trace' command:
   5 *
   6 * Display a continuously updated trace of any workload, CPU, specific PID,
   7 * system wide, etc.  Default format is loosely strace like, but any other
   8 * event may be specified using --event.
   9 *
  10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
  11 *
  12 * Initially based on the 'trace' prototype by Thomas Gleixner:
  13 *
  14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
  15 *
  16 * Released under the GPL v2. (and only v2, not any later version)
  17 */
  18
 
  19#include <traceevent/event-parse.h>
  20#include <api/fs/tracing_path.h>
 
 
 
  21#include "builtin.h"
 
  22#include "util/color.h"
 
  23#include "util/debug.h"
 
 
 
 
 
 
  24#include "util/evlist.h"
 
 
 
  25#include <subcmd/exec-cmd.h>
  26#include "util/machine.h"
 
 
 
  27#include "util/session.h"
  28#include "util/thread.h"
  29#include <subcmd/parse-options.h>
  30#include "util/strlist.h"
  31#include "util/intlist.h"
  32#include "util/thread_map.h"
  33#include "util/stat.h"
 
 
 
  34#include "trace-event.h"
  35#include "util/parse-events.h"
  36#include "util/bpf-loader.h"
  37
  38#include <libaudit.h>
 
 
 
 
 
 
 
 
 
  39#include <stdlib.h>
  40#include <sys/mman.h>
  41#include <linux/futex.h>
  42#include <linux/err.h>
 
 
 
 
 
 
 
 
  43
  44/* For older distros: */
  45#ifndef MAP_STACK
  46# define MAP_STACK		0x20000
  47#endif
  48
  49#ifndef MADV_HWPOISON
  50# define MADV_HWPOISON		100
  51
  52#endif
  53
  54#ifndef MADV_MERGEABLE
  55# define MADV_MERGEABLE		12
  56#endif
  57
  58#ifndef MADV_UNMERGEABLE
  59# define MADV_UNMERGEABLE	13
  60#endif
  61
  62#ifndef EFD_SEMAPHORE
  63# define EFD_SEMAPHORE		1
  64#endif
  65
  66#ifndef EFD_NONBLOCK
  67# define EFD_NONBLOCK		00004000
  68#endif
  69
  70#ifndef EFD_CLOEXEC
  71# define EFD_CLOEXEC		02000000
  72#endif
  73
  74#ifndef O_CLOEXEC
  75# define O_CLOEXEC		02000000
  76#endif
  77
  78#ifndef SOCK_DCCP
  79# define SOCK_DCCP		6
  80#endif
  81
  82#ifndef SOCK_CLOEXEC
  83# define SOCK_CLOEXEC		02000000
  84#endif
  85
  86#ifndef SOCK_NONBLOCK
  87# define SOCK_NONBLOCK		00004000
  88#endif
  89
  90#ifndef MSG_CMSG_CLOEXEC
  91# define MSG_CMSG_CLOEXEC	0x40000000
  92#endif
  93
  94#ifndef PERF_FLAG_FD_NO_GROUP
  95# define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
  96#endif
  97
  98#ifndef PERF_FLAG_FD_OUTPUT
  99# define PERF_FLAG_FD_OUTPUT		(1UL << 1)
 100#endif
 101
 102#ifndef PERF_FLAG_PID_CGROUP
 103# define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
 104#endif
 105
 106#ifndef PERF_FLAG_FD_CLOEXEC
 107# define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
 108#endif
 109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 110
 111struct tp_field {
 112	int offset;
 113	union {
 114		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
 115		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
 116	};
 117};
 118
 119#define TP_UINT_FIELD(bits) \
 120static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
 121{ \
 122	u##bits value; \
 123	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 124	return value;  \
 125}
 126
 127TP_UINT_FIELD(8);
 128TP_UINT_FIELD(16);
 129TP_UINT_FIELD(32);
 130TP_UINT_FIELD(64);
 131
 132#define TP_UINT_FIELD__SWAPPED(bits) \
 133static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
 134{ \
 135	u##bits value; \
 136	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 137	return bswap_##bits(value);\
 138}
 139
 140TP_UINT_FIELD__SWAPPED(16);
 141TP_UINT_FIELD__SWAPPED(32);
 142TP_UINT_FIELD__SWAPPED(64);
 143
 144static int tp_field__init_uint(struct tp_field *field,
 145			       struct format_field *format_field,
 146			       bool needs_swap)
 147{
 148	field->offset = format_field->offset;
 149
 150	switch (format_field->size) {
 151	case 1:
 152		field->integer = tp_field__u8;
 153		break;
 154	case 2:
 155		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
 156		break;
 157	case 4:
 158		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
 159		break;
 160	case 8:
 161		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
 162		break;
 163	default:
 164		return -1;
 165	}
 166
 167	return 0;
 168}
 169
 
 
 
 
 
 170static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
 171{
 172	return sample->raw_data + field->offset;
 173}
 174
 175static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
 176{
 177	field->offset = format_field->offset;
 178	field->pointer = tp_field__ptr;
 179	return 0;
 180}
 181
 
 
 
 
 
 182struct syscall_tp {
 183	struct tp_field id;
 184	union {
 185		struct tp_field args, ret;
 186	};
 187};
 188
 189static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
 190					  struct tp_field *field,
 191					  const char *name)
 192{
 193	struct format_field *format_field = perf_evsel__field(evsel, name);
 194
 195	if (format_field == NULL)
 196		return -1;
 197
 198	return tp_field__init_uint(field, format_field, evsel->needs_swap);
 199}
 200
 201#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
 202	({ struct syscall_tp *sc = evsel->priv;\
 203	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
 204
 205static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
 206					 struct tp_field *field,
 207					 const char *name)
 208{
 209	struct format_field *format_field = perf_evsel__field(evsel, name);
 210
 211	if (format_field == NULL)
 212		return -1;
 213
 214	return tp_field__init_ptr(field, format_field);
 215}
 216
 217#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
 218	({ struct syscall_tp *sc = evsel->priv;\
 219	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
 220
 221static void perf_evsel__delete_priv(struct perf_evsel *evsel)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 222{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 223	zfree(&evsel->priv);
 224	perf_evsel__delete(evsel);
 
 
 
 
 
 
 
 225}
 226
 227static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
 
 
 
 
 
 
 
 228{
 229	evsel->priv = malloc(sizeof(struct syscall_tp));
 230	if (evsel->priv != NULL) {
 231		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
 232			goto out_delete;
 233
 234		evsel->handler = handler;
 235		return 0;
 236	}
 237
 238	return -ENOMEM;
 239
 240out_delete:
 241	zfree(&evsel->priv);
 242	return -ENOENT;
 243}
 244
 245static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
 246{
 247	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
 248
 249	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
 250	if (IS_ERR(evsel))
 251		evsel = perf_evsel__newtp("syscalls", direction);
 252
 253	if (IS_ERR(evsel))
 254		return NULL;
 255
 256	if (perf_evsel__init_syscall_tp(evsel, handler))
 257		goto out_delete;
 258
 259	return evsel;
 260
 261out_delete:
 262	perf_evsel__delete_priv(evsel);
 263	return NULL;
 264}
 265
 266#define perf_evsel__sc_tp_uint(evsel, name, sample) \
 267	({ struct syscall_tp *fields = evsel->priv; \
 268	   fields->name.integer(&fields->name, sample); })
 269
 270#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
 271	({ struct syscall_tp *fields = evsel->priv; \
 272	   fields->name.pointer(&fields->name, sample); })
 273
 274struct syscall_arg {
 275	unsigned long val;
 276	struct thread *thread;
 277	struct trace  *trace;
 278	void	      *parm;
 279	u8	      idx;
 280	u8	      mask;
 281};
 282
 283struct strarray {
 284	int	    offset;
 285	int	    nr_entries;
 286	const char **entries;
 287};
 288
 289#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
 290	.nr_entries = ARRAY_SIZE(array), \
 291	.entries = array, \
 292}
 293
 294#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
 295	.offset	    = off, \
 296	.nr_entries = ARRAY_SIZE(array), \
 297	.entries = array, \
 298}
 299
 300static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
 301						const char *intfmt,
 302					        struct syscall_arg *arg)
 303{
 304	struct strarray *sa = arg->parm;
 305	int idx = arg->val - sa->offset;
 306
 307	if (idx < 0 || idx >= sa->nr_entries)
 308		return scnprintf(bf, size, intfmt, arg->val);
 309
 310	return scnprintf(bf, size, "%s", sa->entries[idx]);
 311}
 312
 313static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
 314					      struct syscall_arg *arg)
 315{
 316	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
 317}
 318
 319#define SCA_STRARRAY syscall_arg__scnprintf_strarray
 320
 321#if defined(__i386__) || defined(__x86_64__)
 322/*
 323 * FIXME: Make this available to all arches as soon as the ioctl beautifier
 324 * 	  gets rewritten to support all arches.
 325 */
 326static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
 327						 struct syscall_arg *arg)
 328{
 329	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
 330}
 331
 332#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
 333#endif /* defined(__i386__) || defined(__x86_64__) */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 334
 335static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
 336					struct syscall_arg *arg);
 
 
 
 337
 338#define SCA_FD syscall_arg__scnprintf_fd
 
 
 339
 340static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
 341					   struct syscall_arg *arg)
 342{
 343	int fd = arg->val;
 
 344
 345	if (fd == AT_FDCWD)
 346		return scnprintf(bf, size, "CWD");
 347
 348	return syscall_arg__scnprintf_fd(bf, size, arg);
 349}
 350
 351#define SCA_FDAT syscall_arg__scnprintf_fd_at
 352
 353static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
 354					      struct syscall_arg *arg);
 355
 356#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
 357
 358static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
 359					 struct syscall_arg *arg)
 360{
 361	return scnprintf(bf, size, "%#lx", arg->val);
 362}
 363
 364#define SCA_HEX syscall_arg__scnprintf_hex
 365
 366static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
 367					 struct syscall_arg *arg)
 368{
 369	return scnprintf(bf, size, "%d", arg->val);
 
 
 370}
 371
 372#define SCA_INT syscall_arg__scnprintf_int
 373
 374static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 375					       struct syscall_arg *arg)
 376{
 377	int printed = 0, prot = arg->val;
 378
 379	if (prot == PROT_NONE)
 380		return scnprintf(bf, size, "NONE");
 381#define	P_MMAP_PROT(n) \
 382	if (prot & PROT_##n) { \
 383		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 384		prot &= ~PROT_##n; \
 385	}
 386
 387	P_MMAP_PROT(EXEC);
 388	P_MMAP_PROT(READ);
 389	P_MMAP_PROT(WRITE);
 390#ifdef PROT_SEM
 391	P_MMAP_PROT(SEM);
 392#endif
 393	P_MMAP_PROT(GROWSDOWN);
 394	P_MMAP_PROT(GROWSUP);
 395#undef P_MMAP_PROT
 396
 397	if (prot)
 398		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
 399
 400	return printed;
 401}
 402
 403#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 404
 405static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 406						struct syscall_arg *arg)
 407{
 408	int printed = 0, flags = arg->val;
 409
 410#define	P_MMAP_FLAG(n) \
 411	if (flags & MAP_##n) { \
 412		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 413		flags &= ~MAP_##n; \
 414	}
 415
 416	P_MMAP_FLAG(SHARED);
 417	P_MMAP_FLAG(PRIVATE);
 418#ifdef MAP_32BIT
 419	P_MMAP_FLAG(32BIT);
 420#endif
 421	P_MMAP_FLAG(ANONYMOUS);
 422	P_MMAP_FLAG(DENYWRITE);
 423	P_MMAP_FLAG(EXECUTABLE);
 424	P_MMAP_FLAG(FILE);
 425	P_MMAP_FLAG(FIXED);
 426	P_MMAP_FLAG(GROWSDOWN);
 427#ifdef MAP_HUGETLB
 428	P_MMAP_FLAG(HUGETLB);
 429#endif
 430	P_MMAP_FLAG(LOCKED);
 431	P_MMAP_FLAG(NONBLOCK);
 432	P_MMAP_FLAG(NORESERVE);
 433	P_MMAP_FLAG(POPULATE);
 434	P_MMAP_FLAG(STACK);
 435#ifdef MAP_UNINITIALIZED
 436	P_MMAP_FLAG(UNINITIALIZED);
 437#endif
 438#undef P_MMAP_FLAG
 439
 440	if (flags)
 441		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 442
 443	return printed;
 444}
 445
 446#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 447
 448static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
 449						  struct syscall_arg *arg)
 450{
 451	int printed = 0, flags = arg->val;
 452
 453#define P_MREMAP_FLAG(n) \
 454	if (flags & MREMAP_##n) { \
 455		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 456		flags &= ~MREMAP_##n; \
 457	}
 458
 459	P_MREMAP_FLAG(MAYMOVE);
 460#ifdef MREMAP_FIXED
 461	P_MREMAP_FLAG(FIXED);
 462#endif
 463#undef P_MREMAP_FLAG
 464
 465	if (flags)
 466		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 467
 468	return printed;
 469}
 470
 471#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
 472
 473static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 474						      struct syscall_arg *arg)
 475{
 476	int behavior = arg->val;
 477
 478	switch (behavior) {
 479#define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
 480	P_MADV_BHV(NORMAL);
 481	P_MADV_BHV(RANDOM);
 482	P_MADV_BHV(SEQUENTIAL);
 483	P_MADV_BHV(WILLNEED);
 484	P_MADV_BHV(DONTNEED);
 485	P_MADV_BHV(REMOVE);
 486	P_MADV_BHV(DONTFORK);
 487	P_MADV_BHV(DOFORK);
 488	P_MADV_BHV(HWPOISON);
 489#ifdef MADV_SOFT_OFFLINE
 490	P_MADV_BHV(SOFT_OFFLINE);
 491#endif
 492	P_MADV_BHV(MERGEABLE);
 493	P_MADV_BHV(UNMERGEABLE);
 494#ifdef MADV_HUGEPAGE
 495	P_MADV_BHV(HUGEPAGE);
 496#endif
 497#ifdef MADV_NOHUGEPAGE
 498	P_MADV_BHV(NOHUGEPAGE);
 499#endif
 500#ifdef MADV_DONTDUMP
 501	P_MADV_BHV(DONTDUMP);
 502#endif
 503#ifdef MADV_DODUMP
 504	P_MADV_BHV(DODUMP);
 505#endif
 506#undef P_MADV_PHV
 507	default: break;
 508	}
 509
 510	return scnprintf(bf, size, "%#x", behavior);
 511}
 512
 513#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
 514
 515static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
 516					   struct syscall_arg *arg)
 517{
 518	int printed = 0, op = arg->val;
 519
 520	if (op == 0)
 521		return scnprintf(bf, size, "NONE");
 522#define	P_CMD(cmd) \
 523	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
 524		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
 525		op &= ~LOCK_##cmd; \
 526	}
 527
 528	P_CMD(SH);
 529	P_CMD(EX);
 530	P_CMD(NB);
 531	P_CMD(UN);
 532	P_CMD(MAND);
 533	P_CMD(RW);
 534	P_CMD(READ);
 535	P_CMD(WRITE);
 536#undef P_OP
 537
 538	if (op)
 539		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
 540
 541	return printed;
 542}
 543
 544#define SCA_FLOCK syscall_arg__scnprintf_flock
 545
 546static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
 547{
 548	enum syscall_futex_args {
 549		SCF_UADDR   = (1 << 0),
 550		SCF_OP	    = (1 << 1),
 551		SCF_VAL	    = (1 << 2),
 552		SCF_TIMEOUT = (1 << 3),
 553		SCF_UADDR2  = (1 << 4),
 554		SCF_VAL3    = (1 << 5),
 555	};
 556	int op = arg->val;
 557	int cmd = op & FUTEX_CMD_MASK;
 558	size_t printed = 0;
 559
 560	switch (cmd) {
 561#define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
 562	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
 563	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 564	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 565	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
 566	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
 567	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
 568	P_FUTEX_OP(WAKE_OP);							  break;
 569	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 570	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 571	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
 572	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
 573	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
 574	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
 575	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
 576	}
 577
 578	if (op & FUTEX_PRIVATE_FLAG)
 579		printed += scnprintf(bf + printed, size - printed, "|PRIV");
 580
 581	if (op & FUTEX_CLOCK_REALTIME)
 582		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
 583
 584	return printed;
 585}
 586
 587#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
 588
 589static const char *bpf_cmd[] = {
 590	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
 591	"MAP_GET_NEXT_KEY", "PROG_LOAD",
 592};
 593static DEFINE_STRARRAY(bpf_cmd);
 
 
 
 
 
 
 
 
 
 594
 595static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
 596static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
 597
 598static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
 599static DEFINE_STRARRAY(itimers);
 600
 601static const char *keyctl_options[] = {
 602	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
 603	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
 604	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
 605	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
 606	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
 607};
 608static DEFINE_STRARRAY(keyctl_options);
 609
 610static const char *whences[] = { "SET", "CUR", "END",
 611#ifdef SEEK_DATA
 612"DATA",
 613#endif
 614#ifdef SEEK_HOLE
 615"HOLE",
 616#endif
 617};
 618static DEFINE_STRARRAY(whences);
 619
 620static const char *fcntl_cmds[] = {
 621	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
 622	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
 623	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
 624	"F_GETOWNER_UIDS",
 
 
 
 
 
 
 
 625};
 626static DEFINE_STRARRAY(fcntl_cmds);
 
 
 
 
 
 
 
 
 627
 628static const char *rlimit_resources[] = {
 629	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
 630	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
 631	"RTTIME",
 632};
 633static DEFINE_STRARRAY(rlimit_resources);
 634
 635static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
 636static DEFINE_STRARRAY(sighow);
 637
 638static const char *clockid[] = {
 639	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
 640	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
 641	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
 642};
 643static DEFINE_STRARRAY(clockid);
 644
 645static const char *socket_families[] = {
 646	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
 647	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
 648	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
 649	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
 650	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
 651	"ALG", "NFC", "VSOCK",
 652};
 653static DEFINE_STRARRAY(socket_families);
 654
 655#ifndef SOCK_TYPE_MASK
 656#define SOCK_TYPE_MASK 0xf
 657#endif
 658
 659static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
 660						      struct syscall_arg *arg)
 661{
 662	size_t printed;
 663	int type = arg->val,
 664	    flags = type & ~SOCK_TYPE_MASK;
 665
 666	type &= SOCK_TYPE_MASK;
 667	/*
 668 	 * Can't use a strarray, MIPS may override for ABI reasons.
 669 	 */
 670	switch (type) {
 671#define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
 672	P_SK_TYPE(STREAM);
 673	P_SK_TYPE(DGRAM);
 674	P_SK_TYPE(RAW);
 675	P_SK_TYPE(RDM);
 676	P_SK_TYPE(SEQPACKET);
 677	P_SK_TYPE(DCCP);
 678	P_SK_TYPE(PACKET);
 679#undef P_SK_TYPE
 680	default:
 681		printed = scnprintf(bf, size, "%#x", type);
 682	}
 683
 684#define	P_SK_FLAG(n) \
 685	if (flags & SOCK_##n) { \
 686		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
 687		flags &= ~SOCK_##n; \
 688	}
 689
 690	P_SK_FLAG(CLOEXEC);
 691	P_SK_FLAG(NONBLOCK);
 692#undef P_SK_FLAG
 693
 694	if (flags)
 695		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
 696
 697	return printed;
 698}
 699
 700#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
 701
 702#ifndef MSG_PROBE
 703#define MSG_PROBE	     0x10
 704#endif
 705#ifndef MSG_WAITFORONE
 706#define MSG_WAITFORONE	0x10000
 707#endif
 708#ifndef MSG_SENDPAGE_NOTLAST
 709#define MSG_SENDPAGE_NOTLAST 0x20000
 710#endif
 711#ifndef MSG_FASTOPEN
 712#define MSG_FASTOPEN	     0x20000000
 713#endif
 714
 715static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
 716					       struct syscall_arg *arg)
 717{
 718	int printed = 0, flags = arg->val;
 719
 720	if (flags == 0)
 721		return scnprintf(bf, size, "NONE");
 722#define	P_MSG_FLAG(n) \
 723	if (flags & MSG_##n) { \
 724		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 725		flags &= ~MSG_##n; \
 726	}
 727
 728	P_MSG_FLAG(OOB);
 729	P_MSG_FLAG(PEEK);
 730	P_MSG_FLAG(DONTROUTE);
 731	P_MSG_FLAG(TRYHARD);
 732	P_MSG_FLAG(CTRUNC);
 733	P_MSG_FLAG(PROBE);
 734	P_MSG_FLAG(TRUNC);
 735	P_MSG_FLAG(DONTWAIT);
 736	P_MSG_FLAG(EOR);
 737	P_MSG_FLAG(WAITALL);
 738	P_MSG_FLAG(FIN);
 739	P_MSG_FLAG(SYN);
 740	P_MSG_FLAG(CONFIRM);
 741	P_MSG_FLAG(RST);
 742	P_MSG_FLAG(ERRQUEUE);
 743	P_MSG_FLAG(NOSIGNAL);
 744	P_MSG_FLAG(MORE);
 745	P_MSG_FLAG(WAITFORONE);
 746	P_MSG_FLAG(SENDPAGE_NOTLAST);
 747	P_MSG_FLAG(FASTOPEN);
 748	P_MSG_FLAG(CMSG_CLOEXEC);
 749#undef P_MSG_FLAG
 750
 751	if (flags)
 752		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 753
 754	return printed;
 755}
 756
 757#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
 758
 759static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 760						 struct syscall_arg *arg)
 761{
 
 
 762	size_t printed = 0;
 763	int mode = arg->val;
 764
 765	if (mode == F_OK) /* 0 */
 766		return scnprintf(bf, size, "F");
 767#define	P_MODE(n) \
 768	if (mode & n##_OK) { \
 769		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
 770		mode &= ~n##_OK; \
 771	}
 772
 773	P_MODE(R);
 774	P_MODE(W);
 775	P_MODE(X);
 776#undef P_MODE
 777
 778	if (mode)
 779		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
 780
 781	return printed;
 782}
 783
 784#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
 785
 786static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
 787					      struct syscall_arg *arg);
 788
 789#define SCA_FILENAME syscall_arg__scnprintf_filename
 790
 791static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
 792					       struct syscall_arg *arg)
 793{
 
 
 794	int printed = 0, flags = arg->val;
 795
 796	if (!(flags & O_CREAT))
 797		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
 798
 799	if (flags == 0)
 800		return scnprintf(bf, size, "RDONLY");
 801#define	P_FLAG(n) \
 802	if (flags & O_##n) { \
 803		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 804		flags &= ~O_##n; \
 805	}
 806
 807	P_FLAG(APPEND);
 808	P_FLAG(ASYNC);
 809	P_FLAG(CLOEXEC);
 810	P_FLAG(CREAT);
 811	P_FLAG(DIRECT);
 812	P_FLAG(DIRECTORY);
 813	P_FLAG(EXCL);
 814	P_FLAG(LARGEFILE);
 815	P_FLAG(NOATIME);
 816	P_FLAG(NOCTTY);
 817#ifdef O_NONBLOCK
 818	P_FLAG(NONBLOCK);
 819#elif O_NDELAY
 820	P_FLAG(NDELAY);
 821#endif
 822#ifdef O_PATH
 823	P_FLAG(PATH);
 824#endif
 825	P_FLAG(RDWR);
 826#ifdef O_DSYNC
 827	if ((flags & O_SYNC) == O_SYNC)
 828		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
 829	else {
 830		P_FLAG(DSYNC);
 831	}
 832#else
 833	P_FLAG(SYNC);
 834#endif
 835	P_FLAG(TRUNC);
 836	P_FLAG(WRONLY);
 837#undef P_FLAG
 838
 839	if (flags)
 840		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 841
 842	return printed;
 843}
 844
 845#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
 846
 847static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
 848						struct syscall_arg *arg)
 849{
 850	int printed = 0, flags = arg->val;
 851
 852	if (flags == 0)
 853		return 0;
 854
 855#define	P_FLAG(n) \
 856	if (flags & PERF_FLAG_##n) { \
 857		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 858		flags &= ~PERF_FLAG_##n; \
 859	}
 860
 861	P_FLAG(FD_NO_GROUP);
 862	P_FLAG(FD_OUTPUT);
 863	P_FLAG(PID_CGROUP);
 864	P_FLAG(FD_CLOEXEC);
 865#undef P_FLAG
 866
 867	if (flags)
 868		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 869
 870	return printed;
 871}
 872
 873#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
 874
 875static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
 876						   struct syscall_arg *arg)
 877{
 
 
 878	int printed = 0, flags = arg->val;
 879
 880	if (flags == 0)
 881		return scnprintf(bf, size, "NONE");
 882#define	P_FLAG(n) \
 883	if (flags & EFD_##n) { \
 884		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 885		flags &= ~EFD_##n; \
 886	}
 887
 888	P_FLAG(SEMAPHORE);
 889	P_FLAG(CLOEXEC);
 890	P_FLAG(NONBLOCK);
 891#undef P_FLAG
 892
 893	if (flags)
 894		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 895
 896	return printed;
 897}
 898
 899#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
 900
 901static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 902						struct syscall_arg *arg)
 903{
 904	int printed = 0, flags = arg->val;
 905
 906#define	P_FLAG(n) \
 907	if (flags & O_##n) { \
 908		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 909		flags &= ~O_##n; \
 910	}
 911
 912	P_FLAG(CLOEXEC);
 913	P_FLAG(NONBLOCK);
 914#undef P_FLAG
 915
 916	if (flags)
 917		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 918
 919	return printed;
 920}
 921
 922#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 923
 924static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
 925{
 926	int sig = arg->val;
 927
 928	switch (sig) {
 929#define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
 930	P_SIGNUM(HUP);
 931	P_SIGNUM(INT);
 932	P_SIGNUM(QUIT);
 933	P_SIGNUM(ILL);
 934	P_SIGNUM(TRAP);
 935	P_SIGNUM(ABRT);
 936	P_SIGNUM(BUS);
 937	P_SIGNUM(FPE);
 938	P_SIGNUM(KILL);
 939	P_SIGNUM(USR1);
 940	P_SIGNUM(SEGV);
 941	P_SIGNUM(USR2);
 942	P_SIGNUM(PIPE);
 943	P_SIGNUM(ALRM);
 944	P_SIGNUM(TERM);
 945	P_SIGNUM(CHLD);
 946	P_SIGNUM(CONT);
 947	P_SIGNUM(STOP);
 948	P_SIGNUM(TSTP);
 949	P_SIGNUM(TTIN);
 950	P_SIGNUM(TTOU);
 951	P_SIGNUM(URG);
 952	P_SIGNUM(XCPU);
 953	P_SIGNUM(XFSZ);
 954	P_SIGNUM(VTALRM);
 955	P_SIGNUM(PROF);
 956	P_SIGNUM(WINCH);
 957	P_SIGNUM(IO);
 958	P_SIGNUM(PWR);
 959	P_SIGNUM(SYS);
 960#ifdef SIGEMT
 961	P_SIGNUM(EMT);
 962#endif
 963#ifdef SIGSTKFLT
 964	P_SIGNUM(STKFLT);
 965#endif
 966#ifdef SIGSWI
 967	P_SIGNUM(SWI);
 968#endif
 969	default: break;
 970	}
 971
 972	return scnprintf(bf, size, "%#x", sig);
 973}
 974
 975#define SCA_SIGNUM syscall_arg__scnprintf_signum
 976
 977#if defined(__i386__) || defined(__x86_64__)
 978/*
 979 * FIXME: Make this available to all arches.
 980 */
 981#define TCGETS		0x5401
 982
 983static const char *tioctls[] = {
 984	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
 985	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
 986	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
 987	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
 988	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
 989	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
 990	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
 991	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
 992	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
 993	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
 994	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
 995	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
 996	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
 997	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
 998	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
 999};
1000
1001static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1002#endif /* defined(__i386__) || defined(__x86_64__) */
1003
1004#define STRARRAY(arg, name, array) \
1005	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1006	  .arg_parm	 = { [arg] = &strarray__##array, }
1007
1008static struct syscall_fmt {
1009	const char *name;
1010	const char *alias;
1011	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1012	void	   *arg_parm[6];
1013	bool	   errmsg;
 
 
 
 
1014	bool	   timeout;
1015	bool	   hexret;
1016} syscall_fmts[] = {
1017	{ .name	    = "access",	    .errmsg = true,
1018	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1019			     [1] = SCA_ACCMODE,  /* mode */ }, },
1020	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
1021	{ .name	    = "bpf",	    .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
 
 
 
 
 
 
1022	{ .name	    = "brk",	    .hexret = true,
1023	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1024	{ .name	    = "chdir",	    .errmsg = true,
1025	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1026	{ .name	    = "chmod",	    .errmsg = true,
1027	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1028	{ .name	    = "chroot",	    .errmsg = true,
1029	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1030	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1031	{ .name	    = "close",	    .errmsg = true,
1032	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1033	{ .name	    = "connect",    .errmsg = true, },
1034	{ .name	    = "creat",	    .errmsg = true,
1035	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1036	{ .name	    = "dup",	    .errmsg = true,
1037	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1038	{ .name	    = "dup2",	    .errmsg = true,
1039	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1040	{ .name	    = "dup3",	    .errmsg = true,
1041	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1042	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1043	{ .name	    = "eventfd2",   .errmsg = true,
1044	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1045	{ .name	    = "faccessat",  .errmsg = true,
1046	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1047			     [1] = SCA_FILENAME, /* filename */ }, },
1048	{ .name	    = "fadvise64",  .errmsg = true,
1049	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1050	{ .name	    = "fallocate",  .errmsg = true,
1051	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1052	{ .name	    = "fchdir",	    .errmsg = true,
1053	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1054	{ .name	    = "fchmod",	    .errmsg = true,
1055	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056	{ .name	    = "fchmodat",   .errmsg = true,
1057	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1058			     [1] = SCA_FILENAME, /* filename */ }, },
1059	{ .name	    = "fchown",	    .errmsg = true,
1060	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061	{ .name	    = "fchownat",   .errmsg = true,
1062	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1063			     [1] = SCA_FILENAME, /* filename */ }, },
1064	{ .name	    = "fcntl",	    .errmsg = true,
1065	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1066			     [1] = SCA_STRARRAY, /* cmd */ },
1067	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1068	{ .name	    = "fdatasync",  .errmsg = true,
1069	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1070	{ .name	    = "flock",	    .errmsg = true,
1071	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1072			     [1] = SCA_FLOCK, /* cmd */ }, },
1073	{ .name	    = "fsetxattr",  .errmsg = true,
1074	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1075	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
1076	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1077	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
1078	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1079			     [1] = SCA_FILENAME, /* filename */ }, },
1080	{ .name	    = "fstatfs",    .errmsg = true,
1081	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1082	{ .name	    = "fsync",    .errmsg = true,
1083	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1084	{ .name	    = "ftruncate", .errmsg = true,
1085	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1086	{ .name	    = "futex",	    .errmsg = true,
1087	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1088	{ .name	    = "futimesat", .errmsg = true,
1089	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1090			     [1] = SCA_FILENAME, /* filename */ }, },
1091	{ .name	    = "getdents",   .errmsg = true,
1092	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1093	{ .name	    = "getdents64", .errmsg = true,
1094	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1095	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1096	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1097	{ .name	    = "getxattr",    .errmsg = true,
1098	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1099	{ .name	    = "inotify_add_watch",	    .errmsg = true,
1100	  .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1101	{ .name	    = "ioctl",	    .errmsg = true,
1102	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1103#if defined(__i386__) || defined(__x86_64__)
1104/*
1105 * FIXME: Make this available to all arches.
1106 */
1107			     [1] = SCA_STRHEXARRAY, /* cmd */
1108			     [2] = SCA_HEX, /* arg */ },
1109	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
1110#else
1111			     [2] = SCA_HEX, /* arg */ }, },
1112#endif
1113	{ .name	    = "keyctl",	    .errmsg = true, STRARRAY(0, option, keyctl_options), },
1114	{ .name	    = "kill",	    .errmsg = true,
1115	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1116	{ .name	    = "lchown",    .errmsg = true,
1117	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1118	{ .name	    = "lgetxattr",  .errmsg = true,
1119	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1120	{ .name	    = "linkat",	    .errmsg = true,
1121	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1122	{ .name	    = "listxattr",  .errmsg = true,
1123	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1124	{ .name	    = "llistxattr", .errmsg = true,
1125	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1126	{ .name	    = "lremovexattr",  .errmsg = true,
1127	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1128	{ .name	    = "lseek",	    .errmsg = true,
1129	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1130			     [2] = SCA_STRARRAY, /* whence */ },
1131	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
1132	{ .name	    = "lsetxattr",  .errmsg = true,
1133	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1134	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat",
1135	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1136	{ .name	    = "lsxattr",    .errmsg = true,
1137	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1138	{ .name     = "madvise",    .errmsg = true,
1139	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
1140			     [2] = SCA_MADV_BHV, /* behavior */ }, },
1141	{ .name	    = "mkdir",    .errmsg = true,
1142	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1143	{ .name	    = "mkdirat",    .errmsg = true,
1144	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1145			     [1] = SCA_FILENAME, /* pathname */ }, },
1146	{ .name	    = "mknod",      .errmsg = true,
1147	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1148	{ .name	    = "mknodat",    .errmsg = true,
1149	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1150			     [1] = SCA_FILENAME, /* filename */ }, },
1151	{ .name	    = "mlock",	    .errmsg = true,
1152	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1153	{ .name	    = "mlockall",   .errmsg = true,
1154	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1155	{ .name	    = "mmap",	    .hexret = true,
1156	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
1157			     [2] = SCA_MMAP_PROT, /* prot */
1158			     [3] = SCA_MMAP_FLAGS, /* flags */
1159			     [4] = SCA_FD, 	  /* fd */ }, },
1160	{ .name	    = "mprotect",   .errmsg = true,
1161	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
1162			     [2] = SCA_MMAP_PROT, /* prot */ }, },
1163	{ .name	    = "mq_unlink", .errmsg = true,
1164	  .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
 
 
 
 
 
 
 
 
 
 
 
 
 
1165	{ .name	    = "mremap",	    .hexret = true,
1166	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1167			     [3] = SCA_MREMAP_FLAGS, /* flags */
1168			     [4] = SCA_HEX, /* new_addr */ }, },
1169	{ .name	    = "munlock",    .errmsg = true,
1170	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1171	{ .name	    = "munmap",	    .errmsg = true,
1172	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1173	{ .name	    = "name_to_handle_at", .errmsg = true,
1174	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1175	{ .name	    = "newfstatat", .errmsg = true,
1176	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1177			     [1] = SCA_FILENAME, /* filename */ }, },
1178	{ .name	    = "open",	    .errmsg = true,
1179	  .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1180			     [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1181	{ .name	    = "open_by_handle_at", .errmsg = true,
1182	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1183			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1184	{ .name	    = "openat",	    .errmsg = true,
1185	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1186			     [1] = SCA_FILENAME, /* filename */
1187			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1188	{ .name	    = "perf_event_open", .errmsg = true,
1189	  .arg_scnprintf = { [1] = SCA_INT, /* pid */
1190			     [2] = SCA_INT, /* cpu */
1191			     [3] = SCA_FD,  /* group_fd */
1192			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1193	{ .name	    = "pipe2",	    .errmsg = true,
1194	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1195	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
1196	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
1197	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
1198	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1199	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
1200	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1201	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1202	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
1203	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1204	{ .name	    = "pwritev",    .errmsg = true,
1205	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1206	{ .name	    = "read",	    .errmsg = true,
1207	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1208	{ .name	    = "readlink",   .errmsg = true,
1209	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1210	{ .name	    = "readlinkat", .errmsg = true,
1211	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1212			     [1] = SCA_FILENAME, /* pathname */ }, },
1213	{ .name	    = "readv",	    .errmsg = true,
1214	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1215	{ .name	    = "recvfrom",   .errmsg = true,
1216	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1217			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1218	{ .name	    = "recvmmsg",   .errmsg = true,
1219	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1220			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1221	{ .name	    = "recvmsg",    .errmsg = true,
1222	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1223			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1224	{ .name	    = "removexattr", .errmsg = true,
1225	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1226	{ .name	    = "renameat",   .errmsg = true,
1227	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1228	{ .name	    = "rmdir",    .errmsg = true,
1229	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1230	{ .name	    = "rt_sigaction", .errmsg = true,
1231	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1232	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1233	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1234	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1235	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1236	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1237	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1238	{ .name	    = "sendmmsg",    .errmsg = true,
1239	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1240			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1241	{ .name	    = "sendmsg",    .errmsg = true,
1242	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1243			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1244	{ .name	    = "sendto",	    .errmsg = true,
1245	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1246			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1247	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1248	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1249	{ .name	    = "setxattr",   .errmsg = true,
1250	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1251	{ .name	    = "shutdown",   .errmsg = true,
1252	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1253	{ .name	    = "socket",	    .errmsg = true,
1254	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1255			     [1] = SCA_SK_TYPE, /* type */ },
1256	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1257	{ .name	    = "socketpair", .errmsg = true,
1258	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1259			     [1] = SCA_SK_TYPE, /* type */ },
1260	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1261	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat",
1262	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1263	{ .name	    = "statfs",	    .errmsg = true,
1264	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1265	{ .name	    = "swapoff",    .errmsg = true,
1266	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1267	{ .name	    = "swapon",	    .errmsg = true,
1268	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1269	{ .name	    = "symlinkat",  .errmsg = true,
1270	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1271	{ .name	    = "tgkill",	    .errmsg = true,
1272	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1273	{ .name	    = "tkill",	    .errmsg = true,
1274	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1275	{ .name	    = "truncate",   .errmsg = true,
1276	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1277	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1278	{ .name	    = "unlinkat",   .errmsg = true,
1279	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1280			     [1] = SCA_FILENAME, /* pathname */ }, },
1281	{ .name	    = "utime",  .errmsg = true,
1282	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1283	{ .name	    = "utimensat",  .errmsg = true,
1284	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1285			     [1] = SCA_FILENAME, /* filename */ }, },
1286	{ .name	    = "utimes",  .errmsg = true,
1287	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1288	{ .name	    = "vmsplice",  .errmsg = true,
1289	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1290	{ .name	    = "write",	    .errmsg = true,
1291	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1292	{ .name	    = "writev",	    .errmsg = true,
1293	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1294};
1295
1296static int syscall_fmt__cmp(const void *name, const void *fmtp)
1297{
1298	const struct syscall_fmt *fmt = fmtp;
1299	return strcmp(name, fmt->name);
1300}
1301
1302static struct syscall_fmt *syscall_fmt__find(const char *name)
1303{
1304	const int nmemb = ARRAY_SIZE(syscall_fmts);
1305	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1306}
1307
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1308struct syscall {
1309	struct event_format *tp_format;
1310	int		    nr_args;
1311	struct format_field *args;
 
 
 
 
 
 
 
 
1312	const char	    *name;
1313	bool		    is_exit;
1314	struct syscall_fmt  *fmt;
1315	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1316	void		    **arg_parm;
 
 
 
 
 
 
 
 
 
1317};
1318
1319static size_t fprintf_duration(unsigned long t, FILE *fp)
 
 
 
 
 
 
 
1320{
1321	double duration = (double)t / NSEC_PER_MSEC;
1322	size_t printed = fprintf(fp, "(");
1323
1324	if (duration >= 1.0)
 
 
1325		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1326	else if (duration >= 0.01)
1327		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1328	else
1329		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1330	return printed + fprintf(fp, "): ");
1331}
1332
1333/**
1334 * filename.ptr: The filename char pointer that will be vfs_getname'd
1335 * filename.entry_str_pos: Where to insert the string translated from
1336 *                         filename.ptr by the vfs_getname tracepoint/kprobe.
 
 
1337 */
1338struct thread_trace {
1339	u64		  entry_time;
1340	u64		  exit_time;
1341	bool		  entry_pending;
1342	unsigned long	  nr_events;
1343	unsigned long	  pfmaj, pfmin;
1344	char		  *entry_str;
1345	double		  runtime_ms;
 
1346        struct {
1347		unsigned long ptr;
1348		short int     entry_str_pos;
1349		bool	      pending_open;
1350		unsigned int  namelen;
1351		char	      *name;
1352	} filename;
1353	struct {
1354		int	  max;
1355		char	  **table;
1356	} paths;
1357
1358	struct intlist *syscall_stats;
1359};
1360
1361static struct thread_trace *thread_trace__new(void)
1362{
1363	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1364
1365	if (ttrace)
1366		ttrace->paths.max = -1;
1367
1368	ttrace->syscall_stats = intlist__new(NULL);
1369
1370	return ttrace;
1371}
1372
1373static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1374{
1375	struct thread_trace *ttrace;
1376
1377	if (thread == NULL)
1378		goto fail;
1379
1380	if (thread__priv(thread) == NULL)
1381		thread__set_priv(thread, thread_trace__new());
1382
1383	if (thread__priv(thread) == NULL)
1384		goto fail;
1385
1386	ttrace = thread__priv(thread);
1387	++ttrace->nr_events;
1388
1389	return ttrace;
1390fail:
1391	color_fprintf(fp, PERF_COLOR_RED,
1392		      "WARNING: not enough memory, dropping samples!\n");
1393	return NULL;
1394}
1395
 
 
 
 
 
 
 
 
 
1396#define TRACE_PFMAJ		(1 << 0)
1397#define TRACE_PFMIN		(1 << 1)
1398
1399static const size_t trace__entry_str_size = 2048;
1400
1401struct trace {
1402	struct perf_tool	tool;
1403	struct {
1404		int		machine;
1405		int		open_id;
1406	}			audit;
1407	struct {
1408		int		max;
1409		struct syscall  *table;
1410		struct {
1411			struct perf_evsel *sys_enter,
1412					  *sys_exit;
1413		}		events;
1414	} syscalls;
1415	struct record_opts	opts;
1416	struct perf_evlist	*evlist;
1417	struct machine		*host;
1418	struct thread		*current;
1419	u64			base_time;
1420	FILE			*output;
1421	unsigned long		nr_events;
1422	struct strlist		*ev_qualifier;
1423	struct {
1424		size_t		nr;
1425		int		*entries;
1426	}			ev_qualifier_ids;
1427	struct intlist		*tid_list;
1428	struct intlist		*pid_list;
1429	struct {
1430		size_t		nr;
1431		pid_t		*entries;
1432	}			filter_pids;
1433	double			duration_filter;
1434	double			runtime_ms;
1435	struct {
1436		u64		vfs_getname,
1437				proc_getname;
1438	} stats;
1439	bool			not_ev_qualifier;
1440	bool			live;
1441	bool			full_time;
1442	bool			sched;
1443	bool			multiple_threads;
1444	bool			summary;
1445	bool			summary_only;
1446	bool			show_comm;
1447	bool			show_tool_stats;
1448	bool			trace_syscalls;
1449	bool			force;
1450	bool			vfs_getname;
1451	int			trace_pgfaults;
1452};
1453
1454static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1455{
1456	struct thread_trace *ttrace = thread__priv(thread);
 
1457
1458	if (fd > ttrace->paths.max) {
1459		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1460
1461		if (npath == NULL)
1462			return -1;
1463
1464		if (ttrace->paths.max != -1) {
1465			memset(npath + ttrace->paths.max + 1, 0,
1466			       (fd - ttrace->paths.max) * sizeof(char *));
1467		} else {
1468			memset(npath, 0, (fd + 1) * sizeof(char *));
1469		}
1470
1471		ttrace->paths.table = npath;
1472		ttrace->paths.max   = fd;
1473	}
1474
1475	ttrace->paths.table[fd] = strdup(pathname);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1476
1477	return ttrace->paths.table[fd] != NULL ? 0 : -1;
1478}
1479
1480static int thread__read_fd_path(struct thread *thread, int fd)
1481{
1482	char linkname[PATH_MAX], pathname[PATH_MAX];
1483	struct stat st;
1484	int ret;
1485
1486	if (thread->pid_ == thread->tid) {
1487		scnprintf(linkname, sizeof(linkname),
1488			  "/proc/%d/fd/%d", thread->pid_, fd);
1489	} else {
1490		scnprintf(linkname, sizeof(linkname),
1491			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1492	}
1493
1494	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1495		return -1;
1496
1497	ret = readlink(linkname, pathname, sizeof(pathname));
1498
1499	if (ret < 0 || ret > st.st_size)
1500		return -1;
1501
1502	pathname[ret] = '\0';
1503	return trace__set_fd_pathname(thread, fd, pathname);
1504}
1505
1506static const char *thread__fd_path(struct thread *thread, int fd,
1507				   struct trace *trace)
1508{
1509	struct thread_trace *ttrace = thread__priv(thread);
1510
1511	if (ttrace == NULL)
1512		return NULL;
1513
1514	if (fd < 0)
1515		return NULL;
1516
1517	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1518		if (!trace->live)
1519			return NULL;
1520		++trace->stats.proc_getname;
1521		if (thread__read_fd_path(thread, fd))
1522			return NULL;
1523	}
1524
1525	return ttrace->paths.table[fd];
1526}
1527
1528static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1529					struct syscall_arg *arg)
1530{
1531	int fd = arg->val;
1532	size_t printed = scnprintf(bf, size, "%d", fd);
1533	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1534
1535	if (path)
1536		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1537
1538	return printed;
1539}
1540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1541static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1542					      struct syscall_arg *arg)
1543{
1544	int fd = arg->val;
1545	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1546	struct thread_trace *ttrace = thread__priv(arg->thread);
1547
1548	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1549		zfree(&ttrace->paths.table[fd]);
1550
1551	return printed;
1552}
1553
1554static void thread__set_filename_pos(struct thread *thread, const char *bf,
1555				     unsigned long ptr)
1556{
1557	struct thread_trace *ttrace = thread__priv(thread);
1558
1559	ttrace->filename.ptr = ptr;
1560	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1561}
1562
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1563static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1564					      struct syscall_arg *arg)
1565{
1566	unsigned long ptr = arg->val;
1567
 
 
 
1568	if (!arg->trace->vfs_getname)
1569		return scnprintf(bf, size, "%#x", ptr);
1570
1571	thread__set_filename_pos(arg->thread, bf, ptr);
1572	return 0;
1573}
1574
1575static bool trace__filter_duration(struct trace *trace, double t)
1576{
1577	return t < (trace->duration_filter * NSEC_PER_MSEC);
1578}
1579
1580static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1581{
1582	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1583
1584	return fprintf(fp, "%10.3f ", ts);
1585}
1586
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1587static bool done = false;
1588static bool interrupted = false;
1589
1590static void sig_handler(int sig)
1591{
1592	done = true;
1593	interrupted = sig == SIGINT;
1594}
1595
1596static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1597					u64 duration, u64 tstamp, FILE *fp)
1598{
1599	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1600	printed += fprintf_duration(duration, fp);
1601
1602	if (trace->multiple_threads) {
1603		if (trace->show_comm)
1604			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1605		printed += fprintf(fp, "%d ", thread->tid);
1606	}
1607
1608	return printed;
1609}
1610
 
 
 
 
 
 
 
 
 
 
 
 
1611static int trace__process_event(struct trace *trace, struct machine *machine,
1612				union perf_event *event, struct perf_sample *sample)
1613{
1614	int ret = 0;
1615
1616	switch (event->header.type) {
1617	case PERF_RECORD_LOST:
1618		color_fprintf(trace->output, PERF_COLOR_RED,
1619			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1620		ret = machine__process_lost_event(machine, event, sample);
 
1621	default:
1622		ret = machine__process_event(machine, event, sample);
1623		break;
1624	}
1625
1626	return ret;
1627}
1628
1629static int trace__tool_process(struct perf_tool *tool,
1630			       union perf_event *event,
1631			       struct perf_sample *sample,
1632			       struct machine *machine)
1633{
1634	struct trace *trace = container_of(tool, struct trace, tool);
1635	return trace__process_event(trace, machine, event, sample);
1636}
1637
1638static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1639{
1640	int err = symbol__init(NULL);
1641
1642	if (err)
1643		return err;
1644
1645	trace->host = machine__new_host();
1646	if (trace->host == NULL)
1647		return -ENOMEM;
1648
1649	if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1650		return -errno;
 
1651
1652	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1653					    evlist->threads, trace__tool_process, false,
1654					    trace->opts.proc_map_timeout);
 
1655	if (err)
1656		symbol__exit();
1657
1658	return err;
1659}
1660
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1661static int syscall__set_arg_fmts(struct syscall *sc)
1662{
1663	struct format_field *field;
1664	int idx = 0;
1665
1666	sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1667	if (sc->arg_scnprintf == NULL)
1668		return -1;
1669
1670	if (sc->fmt)
1671		sc->arg_parm = sc->fmt->arg_parm;
1672
1673	for (field = sc->args; field; field = field->next) {
1674		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1675			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1676		else if (field->flags & FIELD_IS_POINTER)
1677			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1678		++idx;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1679	}
1680
 
 
 
1681	return 0;
1682}
1683
1684static int trace__read_syscall_info(struct trace *trace, int id)
1685{
1686	char tp_name[128];
1687	struct syscall *sc;
1688	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1689
1690	if (name == NULL)
1691		return -1;
 
 
 
1692
1693	if (id > trace->syscalls.max) {
1694		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
 
1695
1696		if (nsyscalls == NULL)
1697			return -1;
1698
1699		if (trace->syscalls.max != -1) {
1700			memset(nsyscalls + trace->syscalls.max + 1, 0,
1701			       (id - trace->syscalls.max) * sizeof(*sc));
1702		} else {
1703			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1704		}
1705
1706		trace->syscalls.table = nsyscalls;
1707		trace->syscalls.max   = id;
1708	}
1709
1710	sc = trace->syscalls.table + id;
1711	sc->name = name;
1712
1713	sc->fmt  = syscall_fmt__find(sc->name);
1714
1715	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1716	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1717
1718	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1719		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1720		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1721	}
1722
 
 
 
1723	if (IS_ERR(sc->tp_format))
1724		return -1;
1725
1726	sc->args = sc->tp_format->format.fields;
1727	sc->nr_args = sc->tp_format->format.nr_fields;
1728	/*
1729	 * We need to check and discard the first variable '__syscall_nr'
1730	 * or 'nr' that mean the syscall number. It is needless here.
1731	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1732	 */
1733	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1734		sc->args = sc->args->next;
1735		--sc->nr_args;
1736	}
1737
1738	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
 
1739
1740	return syscall__set_arg_fmts(sc);
1741}
1742
 
 
 
 
 
 
 
1743static int trace__validate_ev_qualifier(struct trace *trace)
1744{
1745	int err = 0, i;
 
1746	struct str_node *pos;
 
1747
1748	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1749	trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1750						 sizeof(trace->ev_qualifier_ids.entries[0]));
1751
1752	if (trace->ev_qualifier_ids.entries == NULL) {
1753		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1754		       trace->output);
1755		err = -EINVAL;
1756		goto out;
1757	}
1758
1759	i = 0;
1760
1761	strlist__for_each(pos, trace->ev_qualifier) {
1762		const char *sc = pos->s;
1763		int id = audit_name_to_syscall(sc, trace->audit.machine);
1764
1765		if (id < 0) {
1766			if (err == 0) {
1767				fputs("Error:\tInvalid syscall ", trace->output);
1768				err = -EINVAL;
 
 
 
 
1769			} else {
1770				fputs(", ", trace->output);
1771			}
1772
1773			fputs(sc, trace->output);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1774		}
1775
1776		trace->ev_qualifier_ids.entries[i++] = id;
1777	}
1778
1779	if (err < 0) {
1780		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1781		      "\nHint:\tand: 'man syscalls'\n", trace->output);
1782		zfree(&trace->ev_qualifier_ids.entries);
1783		trace->ev_qualifier_ids.nr = 0;
1784	}
1785out:
 
 
1786	return err;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1787}
1788
1789/*
1790 * args is to be interpreted as a series of longs but we need to handle
1791 * 8-byte unaligned accesses. args points to raw_data within the event
1792 * and raw_data is guaranteed to be 8-byte unaligned because it is
1793 * preceded by raw_size which is a u32. So we need to copy args to a temp
1794 * variable to read it. Most notably this avoids extended load instructions
1795 * on unaligned addresses
1796 */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1797
1798static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1799				      unsigned char *args, struct trace *trace,
1800				      struct thread *thread)
1801{
1802	size_t printed = 0;
1803	unsigned char *p;
1804	unsigned long val;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1805
1806	if (sc->args != NULL) {
1807		struct format_field *field;
1808		u8 bit = 1;
1809		struct syscall_arg arg = {
1810			.idx	= 0,
1811			.mask	= 0,
1812			.trace  = trace,
1813			.thread = thread,
1814		};
1815
1816		for (field = sc->args; field;
1817		     field = field->next, ++arg.idx, bit <<= 1) {
1818			if (arg.mask & bit)
1819				continue;
1820
1821			/* special care for unaligned accesses */
1822			p = args + sizeof(unsigned long) * arg.idx;
1823			memcpy(&val, p, sizeof(val));
 
 
 
1824
1825			/*
1826 			 * Suppress this argument if its value is zero and
1827 			 * and we don't have a string associated in an
1828 			 * strarray for it.
1829 			 */
1830			if (val == 0 &&
1831			    !(sc->arg_scnprintf &&
1832			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1833			      sc->arg_parm[arg.idx]))
 
 
 
1834				continue;
1835
1836			printed += scnprintf(bf + printed, size - printed,
1837					     "%s%s: ", printed ? ", " : "", field->name);
1838			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1839				arg.val = val;
1840				if (sc->arg_parm)
1841					arg.parm = sc->arg_parm[arg.idx];
1842				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1843								      size - printed, &arg);
1844			} else {
1845				printed += scnprintf(bf + printed, size - printed,
1846						     "%ld", val);
1847			}
1848		}
1849	} else {
1850		int i = 0;
1851
1852		while (i < 6) {
1853			/* special care for unaligned accesses */
1854			p = args + sizeof(unsigned long) * i;
1855			memcpy(&val, p, sizeof(val));
1856			printed += scnprintf(bf + printed, size - printed,
1857					     "%sarg%d: %ld",
1858					     printed ? ", " : "", i, val);
1859			++i;
 
 
 
 
 
 
1860		}
1861	}
1862
1863	return printed;
1864}
1865
1866typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1867				  union perf_event *event,
1868				  struct perf_sample *sample);
1869
1870static struct syscall *trace__syscall_info(struct trace *trace,
1871					   struct perf_evsel *evsel, int id)
1872{
 
1873
1874	if (id < 0) {
1875
1876		/*
1877		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1878		 * before that, leaving at a higher verbosity level till that is
1879		 * explained. Reproduced with plain ftrace with:
1880		 *
1881		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1882		 * grep "NR -1 " /t/trace_pipe
1883		 *
1884		 * After generating some load on the machine.
1885 		 */
1886		if (verbose > 1) {
1887			static u64 n;
1888			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1889				id, perf_evsel__name(evsel), ++n);
1890		}
1891		return NULL;
1892	}
1893
1894	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1895	    trace__read_syscall_info(trace, id))
 
1896		goto out_cant_read;
1897
1898	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
 
1899		goto out_cant_read;
1900
 
 
 
 
 
 
1901	return &trace->syscalls.table[id];
1902
1903out_cant_read:
1904	if (verbose) {
1905		fprintf(trace->output, "Problems reading syscall %d", id);
1906		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
 
1907			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1908		fputs(" information\n", trace->output);
1909	}
1910	return NULL;
1911}
1912
1913static void thread__update_stats(struct thread_trace *ttrace,
1914				 int id, struct perf_sample *sample)
1915{
1916	struct int_node *inode;
1917	struct stats *stats;
1918	u64 duration = 0;
1919
1920	inode = intlist__findnew(ttrace->syscall_stats, id);
1921	if (inode == NULL)
1922		return;
1923
1924	stats = inode->priv;
1925	if (stats == NULL) {
1926		stats = malloc(sizeof(struct stats));
1927		if (stats == NULL)
1928			return;
1929		init_stats(stats);
1930		inode->priv = stats;
1931	}
1932
1933	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1934		duration = sample->time - ttrace->entry_time;
1935
1936	update_stats(stats, duration);
1937}
1938
1939static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1940{
1941	struct thread_trace *ttrace;
1942	u64 duration;
1943	size_t printed;
 
1944
1945	if (trace->current == NULL)
1946		return 0;
1947
1948	ttrace = thread__priv(trace->current);
1949
1950	if (!ttrace->entry_pending)
1951		return 0;
1952
1953	duration = sample->time - ttrace->entry_time;
 
 
 
 
 
 
1954
1955	printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1956	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1957	ttrace->entry_pending = false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1958
1959	return printed;
1960}
1961
1962static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1963			    union perf_event *event __maybe_unused,
1964			    struct perf_sample *sample)
1965{
1966	char *msg;
1967	void *args;
1968	size_t printed = 0;
1969	struct thread *thread;
1970	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
 
 
1971	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1972	struct thread_trace *ttrace;
1973
1974	if (sc == NULL)
1975		return -1;
1976
1977	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1978	ttrace = thread__trace(thread, trace->output);
1979	if (ttrace == NULL)
1980		goto out_put;
1981
 
 
1982	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1983
1984	if (ttrace->entry_str == NULL) {
1985		ttrace->entry_str = malloc(trace__entry_str_size);
1986		if (!ttrace->entry_str)
1987			goto out_put;
1988	}
1989
1990	if (!trace->summary_only)
1991		trace__printf_interrupted_entry(trace, sample);
1992
 
 
 
 
 
 
 
 
 
 
 
1993	ttrace->entry_time = sample->time;
1994	msg = ttrace->entry_str;
1995	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1996
1997	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1998					   args, trace, thread);
1999
2000	if (sc->is_exit) {
2001		if (!trace->duration_filter && !trace->summary_only) {
2002			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
2003			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
 
 
 
 
 
2004		}
2005	} else {
2006		ttrace->entry_pending = true;
2007		/* See trace__vfs_getname & trace__sys_exit */
2008		ttrace->filename.pending_open = false;
2009	}
2010
2011	if (trace->current != thread) {
2012		thread__put(trace->current);
2013		trace->current = thread__get(thread);
2014	}
2015	err = 0;
2016out_put:
2017	thread__put(thread);
2018	return err;
2019}
2020
2021static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2022			   union perf_event *event __maybe_unused,
2023			   struct perf_sample *sample)
2024{
2025	long ret;
2026	u64 duration = 0;
 
2027	struct thread *thread;
2028	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
 
2029	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2030	struct thread_trace *ttrace;
2031
2032	if (sc == NULL)
2033		return -1;
2034
2035	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2036	ttrace = thread__trace(thread, trace->output);
2037	if (ttrace == NULL)
2038		goto out_put;
2039
 
 
2040	if (trace->summary)
2041		thread__update_stats(ttrace, id, sample);
2042
2043	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2044
2045	if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2046		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2047		ttrace->filename.pending_open = false;
2048		++trace->stats.vfs_getname;
2049	}
2050
2051	ttrace->exit_time = sample->time;
2052
2053	if (ttrace->entry_time) {
2054		duration = sample->time - ttrace->entry_time;
2055		if (trace__filter_duration(trace, duration))
2056			goto out;
 
2057	} else if (trace->duration_filter)
2058		goto out;
2059
2060	if (trace->summary_only)
 
 
 
 
 
 
 
 
 
2061		goto out;
2062
2063	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2064
2065	if (ttrace->entry_pending) {
2066		fprintf(trace->output, "%-70s", ttrace->entry_str);
2067	} else {
2068		fprintf(trace->output, " ... [");
2069		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2070		fprintf(trace->output, "]: %s()", sc->name);
 
2071	}
2072
 
 
 
 
 
 
 
 
 
2073	if (sc->fmt == NULL) {
 
 
2074signed_print:
2075		fprintf(trace->output, ") = %ld", ret);
2076	} else if (ret < 0 && sc->fmt->errmsg) {
 
2077		char bf[STRERR_BUFSIZE];
2078		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2079			   *e = audit_errno_to_name(-ret);
2080
2081		fprintf(trace->output, ") = -1 %s %s", e, emsg);
 
2082	} else if (ret == 0 && sc->fmt->timeout)
2083		fprintf(trace->output, ") = 0 Timeout");
2084	else if (sc->fmt->hexret)
2085		fprintf(trace->output, ") = %#lx", ret);
2086	else
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2087		goto signed_print;
2088
2089	fputc('\n', trace->output);
 
 
 
 
 
 
 
 
 
 
 
 
2090out:
2091	ttrace->entry_pending = false;
2092	err = 0;
2093out_put:
2094	thread__put(thread);
2095	return err;
2096}
2097
2098static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2099			      union perf_event *event __maybe_unused,
2100			      struct perf_sample *sample)
2101{
2102	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2103	struct thread_trace *ttrace;
2104	size_t filename_len, entry_str_len, to_move;
2105	ssize_t remaining_space;
2106	char *pos;
2107	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2108
2109	if (!thread)
2110		goto out;
2111
2112	ttrace = thread__priv(thread);
2113	if (!ttrace)
2114		goto out;
2115
2116	filename_len = strlen(filename);
 
 
2117
2118	if (ttrace->filename.namelen < filename_len) {
2119		char *f = realloc(ttrace->filename.name, filename_len + 1);
2120
2121		if (f == NULL)
2122				goto out;
2123
2124		ttrace->filename.namelen = filename_len;
2125		ttrace->filename.name = f;
2126	}
2127
2128	strcpy(ttrace->filename.name, filename);
2129	ttrace->filename.pending_open = true;
2130
2131	if (!ttrace->filename.ptr)
2132		goto out;
2133
2134	entry_str_len = strlen(ttrace->entry_str);
2135	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2136	if (remaining_space <= 0)
2137		goto out;
2138
2139	if (filename_len > (size_t)remaining_space) {
2140		filename += filename_len - remaining_space;
2141		filename_len = remaining_space;
2142	}
2143
2144	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2145	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2146	memmove(pos + filename_len, pos, to_move);
2147	memcpy(pos, filename, filename_len);
2148
2149	ttrace->filename.ptr = 0;
2150	ttrace->filename.entry_str_pos = 0;
 
 
2151out:
2152	return 0;
2153}
2154
2155static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2156				     union perf_event *event __maybe_unused,
2157				     struct perf_sample *sample)
2158{
2159        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2160	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2161	struct thread *thread = machine__findnew_thread(trace->host,
2162							sample->pid,
2163							sample->tid);
2164	struct thread_trace *ttrace = thread__trace(thread, trace->output);
2165
2166	if (ttrace == NULL)
2167		goto out_dump;
2168
2169	ttrace->runtime_ms += runtime_ms;
2170	trace->runtime_ms += runtime_ms;
 
2171	thread__put(thread);
2172	return 0;
2173
2174out_dump:
2175	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2176	       evsel->name,
2177	       perf_evsel__strval(evsel, sample, "comm"),
2178	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2179	       runtime,
2180	       perf_evsel__intval(evsel, sample, "vruntime"));
2181	thread__put(thread);
2182	return 0;
2183}
2184
2185static void bpf_output__printer(enum binary_printer_ops op,
2186				unsigned int val, void *extra)
2187{
2188	FILE *output = extra;
2189	unsigned char ch = (unsigned char)val;
2190
2191	switch (op) {
2192	case BINARY_PRINT_CHAR_DATA:
2193		fprintf(output, "%c", isprint(ch) ? ch : '.');
2194		break;
2195	case BINARY_PRINT_DATA_BEGIN:
2196	case BINARY_PRINT_LINE_BEGIN:
2197	case BINARY_PRINT_ADDR:
2198	case BINARY_PRINT_NUM_DATA:
2199	case BINARY_PRINT_NUM_PAD:
2200	case BINARY_PRINT_SEP:
2201	case BINARY_PRINT_CHAR_PAD:
2202	case BINARY_PRINT_LINE_END:
2203	case BINARY_PRINT_DATA_END:
2204	default:
2205		break;
2206	}
 
 
2207}
2208
2209static void bpf_output__fprintf(struct trace *trace,
2210				struct perf_sample *sample)
2211{
2212	print_binary(sample->raw_data, sample->raw_size, 8,
2213		     bpf_output__printer, trace->output);
 
2214}
2215
2216static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2217				union perf_event *event __maybe_unused,
2218				struct perf_sample *sample)
2219{
2220	trace__printf_interrupted_entry(trace, sample);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2221	trace__fprintf_tstamp(trace, sample->time, trace->output);
2222
2223	if (trace->trace_syscalls)
2224		fprintf(trace->output, "(         ): ");
2225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2226	fprintf(trace->output, "%s:", evsel->name);
2227
2228	if (perf_evsel__is_bpf_output(evsel)) {
2229		bpf_output__fprintf(trace, sample);
2230	} else if (evsel->tp_format) {
2231		event_format__fprintf(evsel->tp_format, sample->cpu,
2232				      sample->raw_data, sample->raw_size,
2233				      trace->output);
 
 
 
 
 
 
 
 
 
2234	}
2235
2236	fprintf(trace->output, ")\n");
 
 
 
 
 
 
 
 
2237	return 0;
2238}
2239
2240static void print_location(FILE *f, struct perf_sample *sample,
2241			   struct addr_location *al,
2242			   bool print_dso, bool print_sym)
2243{
2244
2245	if ((verbose || print_dso) && al->map)
2246		fprintf(f, "%s@", al->map->dso->long_name);
2247
2248	if ((verbose || print_sym) && al->sym)
2249		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2250			al->addr - al->sym->start);
2251	else if (al->map)
2252		fprintf(f, "0x%" PRIx64, al->addr);
2253	else
2254		fprintf(f, "0x%" PRIx64, sample->addr);
2255}
2256
2257static int trace__pgfault(struct trace *trace,
2258			  struct perf_evsel *evsel,
2259			  union perf_event *event __maybe_unused,
2260			  struct perf_sample *sample)
2261{
2262	struct thread *thread;
2263	struct addr_location al;
2264	char map_type = 'd';
2265	struct thread_trace *ttrace;
2266	int err = -1;
 
2267
2268	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
 
 
 
 
 
 
 
 
 
 
2269	ttrace = thread__trace(thread, trace->output);
2270	if (ttrace == NULL)
2271		goto out_put;
2272
2273	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2274		ttrace->pfmaj++;
2275	else
2276		ttrace->pfmin++;
2277
2278	if (trace->summary_only)
2279		goto out;
2280
2281	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
2282			      sample->ip, &al);
2283
2284	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2285
2286	fprintf(trace->output, "%sfault [",
2287		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2288		"maj" : "min");
2289
2290	print_location(trace->output, sample, &al, false, true);
2291
2292	fprintf(trace->output, "] => ");
2293
2294	thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
2295				   sample->addr, &al);
2296
2297	if (!al.map) {
2298		thread__find_addr_location(thread, sample->cpumode,
2299					   MAP__FUNCTION, sample->addr, &al);
2300
2301		if (al.map)
2302			map_type = 'x';
2303		else
2304			map_type = '?';
2305	}
2306
2307	print_location(trace->output, sample, &al, true, false);
2308
2309	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
 
 
 
 
 
 
 
2310out:
2311	err = 0;
2312out_put:
2313	thread__put(thread);
2314	return err;
2315}
2316
2317static bool skip_sample(struct trace *trace, struct perf_sample *sample)
 
 
2318{
2319	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2320	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2321		return false;
2322
2323	if (trace->pid_list || trace->tid_list)
2324		return true;
2325
2326	return false;
 
 
 
2327}
2328
2329static int trace__process_sample(struct perf_tool *tool,
2330				 union perf_event *event,
2331				 struct perf_sample *sample,
2332				 struct perf_evsel *evsel,
2333				 struct machine *machine __maybe_unused)
2334{
2335	struct trace *trace = container_of(tool, struct trace, tool);
 
2336	int err = 0;
2337
2338	tracepoint_handler handler = evsel->handler;
2339
2340	if (skip_sample(trace, sample))
2341		return 0;
 
2342
2343	if (!trace->full_time && trace->base_time == 0)
2344		trace->base_time = sample->time;
2345
2346	if (handler) {
2347		++trace->nr_events;
2348		handler(trace, evsel, event, sample);
2349	}
2350
 
2351	return err;
2352}
2353
2354static int parse_target_str(struct trace *trace)
2355{
2356	if (trace->opts.target.pid) {
2357		trace->pid_list = intlist__new(trace->opts.target.pid);
2358		if (trace->pid_list == NULL) {
2359			pr_err("Error parsing process id string\n");
2360			return -EINVAL;
2361		}
2362	}
2363
2364	if (trace->opts.target.tid) {
2365		trace->tid_list = intlist__new(trace->opts.target.tid);
2366		if (trace->tid_list == NULL) {
2367			pr_err("Error parsing thread id string\n");
2368			return -EINVAL;
2369		}
2370	}
2371
2372	return 0;
2373}
2374
2375static int trace__record(struct trace *trace, int argc, const char **argv)
2376{
2377	unsigned int rec_argc, i, j;
2378	const char **rec_argv;
2379	const char * const record_args[] = {
2380		"record",
2381		"-R",
2382		"-m", "1024",
2383		"-c", "1",
2384	};
2385
2386	const char * const sc_args[] = { "-e", };
2387	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2388	const char * const majpf_args[] = { "-e", "major-faults" };
2389	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2390	const char * const minpf_args[] = { "-e", "minor-faults" };
2391	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2392
2393	/* +1 is for the event string below */
2394	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2395		majpf_args_nr + minpf_args_nr + argc;
2396	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2397
2398	if (rec_argv == NULL)
2399		return -ENOMEM;
2400
2401	j = 0;
2402	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2403		rec_argv[j++] = record_args[i];
2404
2405	if (trace->trace_syscalls) {
2406		for (i = 0; i < sc_args_nr; i++)
2407			rec_argv[j++] = sc_args[i];
2408
2409		/* event string may be different for older kernels - e.g., RHEL6 */
2410		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2411			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2412		else if (is_valid_tracepoint("syscalls:sys_enter"))
2413			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2414		else {
2415			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
 
2416			return -1;
2417		}
2418	}
2419
2420	if (trace->trace_pgfaults & TRACE_PFMAJ)
2421		for (i = 0; i < majpf_args_nr; i++)
2422			rec_argv[j++] = majpf_args[i];
2423
2424	if (trace->trace_pgfaults & TRACE_PFMIN)
2425		for (i = 0; i < minpf_args_nr; i++)
2426			rec_argv[j++] = minpf_args[i];
2427
2428	for (i = 0; i < (unsigned int)argc; i++)
2429		rec_argv[j++] = argv[i];
2430
2431	return cmd_record(j, rec_argv, NULL);
2432}
2433
2434static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2435
2436static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2437{
2438	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
 
 
 
2439
2440	if (IS_ERR(evsel))
2441		return false;
2442
2443	if (perf_evsel__field(evsel, "pathname") == NULL) {
2444		perf_evsel__delete(evsel);
2445		return false;
 
 
 
 
 
 
 
 
 
 
2446	}
2447
2448	evsel->handler = trace__vfs_getname;
2449	perf_evlist__add(evlist, evsel);
2450	return true;
2451}
2452
2453static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2454				    u64 config)
2455{
2456	struct perf_evsel *evsel;
2457	struct perf_event_attr attr = {
2458		.type = PERF_TYPE_SOFTWARE,
2459		.mmap_data = 1,
2460	};
2461
2462	attr.config = config;
2463	attr.sample_period = 1;
2464
2465	event_attr_init(&attr);
2466
2467	evsel = perf_evsel__new(&attr);
2468	if (!evsel)
2469		return -ENOMEM;
2470
2471	evsel->handler = trace__pgfault;
2472	perf_evlist__add(evlist, evsel);
2473
2474	return 0;
2475}
2476
2477static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2478{
2479	const u32 type = event->header.type;
2480	struct perf_evsel *evsel;
2481
2482	if (!trace->full_time && trace->base_time == 0)
2483		trace->base_time = sample->time;
2484
2485	if (type != PERF_RECORD_SAMPLE) {
2486		trace__process_event(trace, trace->host, event, sample);
2487		return;
2488	}
2489
2490	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2491	if (evsel == NULL) {
2492		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2493		return;
2494	}
2495
2496	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
 
 
 
 
 
2497	    sample->raw_data == NULL) {
2498		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2499		       perf_evsel__name(evsel), sample->tid,
2500		       sample->cpu, sample->raw_size);
2501	} else {
2502		tracepoint_handler handler = evsel->handler;
2503		handler(trace, evsel, event, sample);
2504	}
 
 
 
2505}
2506
2507static int trace__add_syscall_newtp(struct trace *trace)
2508{
2509	int ret = -1;
2510	struct perf_evlist *evlist = trace->evlist;
2511	struct perf_evsel *sys_enter, *sys_exit;
2512
2513	sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2514	if (sys_enter == NULL)
2515		goto out;
2516
2517	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2518		goto out_delete_sys_enter;
2519
2520	sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2521	if (sys_exit == NULL)
2522		goto out_delete_sys_enter;
2523
2524	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2525		goto out_delete_sys_exit;
2526
2527	perf_evlist__add(evlist, sys_enter);
2528	perf_evlist__add(evlist, sys_exit);
 
 
 
 
 
 
 
 
 
 
 
 
2529
2530	trace->syscalls.events.sys_enter = sys_enter;
2531	trace->syscalls.events.sys_exit  = sys_exit;
2532
2533	ret = 0;
2534out:
2535	return ret;
2536
2537out_delete_sys_exit:
2538	perf_evsel__delete_priv(sys_exit);
2539out_delete_sys_enter:
2540	perf_evsel__delete_priv(sys_enter);
2541	goto out;
2542}
2543
2544static int trace__set_ev_qualifier_filter(struct trace *trace)
2545{
2546	int err = -1;
 
2547	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2548						trace->ev_qualifier_ids.nr,
2549						trace->ev_qualifier_ids.entries);
2550
2551	if (filter == NULL)
2552		goto out_enomem;
2553
2554	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2555		err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
 
 
 
2556
2557	free(filter);
2558out:
2559	return err;
2560out_enomem:
2561	errno = ENOMEM;
2562	goto out;
2563}
2564
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2565static int trace__run(struct trace *trace, int argc, const char **argv)
2566{
2567	struct perf_evlist *evlist = trace->evlist;
2568	struct perf_evsel *evsel;
2569	int err = -1, i;
2570	unsigned long before;
2571	const bool forks = argc > 0;
2572	bool draining = false;
2573
2574	trace->live = true;
2575
2576	if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2577		goto out_error_raw_syscalls;
2578
2579	if (trace->trace_syscalls)
2580		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2581
2582	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2583	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2584		goto out_error_mem;
 
 
 
 
 
 
 
 
 
 
 
 
 
2585	}
2586
2587	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2588	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2589		goto out_error_mem;
2590
2591	if (trace->sched &&
2592	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2593				   trace__sched_stat_runtime))
2594		goto out_error_sched_stat_runtime;
2595
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2596	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2597	if (err < 0) {
2598		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2599		goto out_delete_evlist;
2600	}
2601
2602	err = trace__symbols_init(trace, evlist);
2603	if (err < 0) {
2604		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2605		goto out_delete_evlist;
2606	}
2607
2608	perf_evlist__config(evlist, &trace->opts);
2609
2610	signal(SIGCHLD, sig_handler);
2611	signal(SIGINT, sig_handler);
2612
2613	if (forks) {
2614		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2615						    argv, false, NULL);
2616		if (err < 0) {
2617			fprintf(trace->output, "Couldn't run the workload!\n");
2618			goto out_delete_evlist;
2619		}
2620	}
2621
2622	err = perf_evlist__open(evlist);
2623	if (err < 0)
2624		goto out_error_open;
2625
2626	err = bpf__apply_obj_config();
2627	if (err) {
2628		char errbuf[BUFSIZ];
2629
2630		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2631		pr_err("ERROR: Apply config to BPF failed: %s\n",
2632			 errbuf);
2633		goto out_error_open;
2634	}
2635
2636	/*
2637	 * Better not use !target__has_task() here because we need to cover the
2638	 * case where no threads were specified in the command line, but a
2639	 * workload was, and in that case we will fill in the thread_map when
2640	 * we fork the workload in perf_evlist__prepare_workload.
2641	 */
2642	if (trace->filter_pids.nr > 0)
2643		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2644	else if (thread_map__pid(evlist->threads, 0) == -1)
2645		err = perf_evlist__set_filter_pid(evlist, getpid());
2646
2647	if (err < 0)
2648		goto out_error_mem;
2649
 
 
 
 
 
 
2650	if (trace->ev_qualifier_ids.nr > 0) {
2651		err = trace__set_ev_qualifier_filter(trace);
2652		if (err < 0)
2653			goto out_errno;
2654
2655		pr_debug("event qualifier tracepoint filter: %s\n",
2656			 trace->syscalls.events.sys_exit->filter);
 
 
2657	}
2658
 
 
 
 
 
 
 
 
 
 
 
 
 
2659	err = perf_evlist__apply_filters(evlist, &evsel);
2660	if (err < 0)
2661		goto out_error_apply_filters;
2662
2663	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
 
 
 
2664	if (err < 0)
2665		goto out_error_mmap;
2666
2667	if (!target__none(&trace->opts.target))
2668		perf_evlist__enable(evlist);
2669
2670	if (forks)
2671		perf_evlist__start_workload(evlist);
2672
2673	trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2674				  evlist->threads->nr > 1 ||
2675				  perf_evlist__first(evlist)->attr.inherit;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2676again:
2677	before = trace->nr_events;
2678
2679	for (i = 0; i < evlist->nr_mmaps; i++) {
2680		union perf_event *event;
 
2681
2682		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2683			struct perf_sample sample;
 
2684
 
2685			++trace->nr_events;
2686
2687			err = perf_evlist__parse_sample(evlist, event, &sample);
2688			if (err) {
2689				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2690				goto next_event;
2691			}
2692
2693			trace__handle_event(trace, event, &sample);
2694next_event:
2695			perf_evlist__mmap_consume(evlist, i);
2696
2697			if (interrupted)
2698				goto out_disable;
2699
2700			if (done && !draining) {
2701				perf_evlist__disable(evlist);
2702				draining = true;
2703			}
2704		}
 
2705	}
2706
2707	if (trace->nr_events == before) {
2708		int timeout = done ? 100 : -1;
2709
2710		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2711			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2712				draining = true;
2713
2714			goto again;
 
 
 
2715		}
2716	} else {
2717		goto again;
2718	}
2719
2720out_disable:
2721	thread__zput(trace->current);
2722
2723	perf_evlist__disable(evlist);
 
 
 
2724
2725	if (!err) {
2726		if (trace->summary)
2727			trace__fprintf_thread_summary(trace, trace->output);
2728
2729		if (trace->show_tool_stats) {
2730			fprintf(trace->output, "Stats:\n "
2731					       " vfs_getname : %" PRIu64 "\n"
2732					       " proc_getname: %" PRIu64 "\n",
2733				trace->stats.vfs_getname,
2734				trace->stats.proc_getname);
2735		}
2736	}
2737
2738out_delete_evlist:
2739	perf_evlist__delete(evlist);
 
 
 
2740	trace->evlist = NULL;
2741	trace->live = false;
2742	return err;
2743{
2744	char errbuf[BUFSIZ];
2745
2746out_error_sched_stat_runtime:
2747	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2748	goto out_error;
2749
2750out_error_raw_syscalls:
2751	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2752	goto out_error;
2753
2754out_error_mmap:
2755	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2756	goto out_error;
2757
2758out_error_open:
2759	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2760
2761out_error:
2762	fprintf(trace->output, "%s\n", errbuf);
2763	goto out_delete_evlist;
2764
2765out_error_apply_filters:
2766	fprintf(trace->output,
2767		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
2768		evsel->filter, perf_evsel__name(evsel), errno,
2769		strerror_r(errno, errbuf, sizeof(errbuf)));
2770	goto out_delete_evlist;
2771}
2772out_error_mem:
2773	fprintf(trace->output, "Not enough memory to run!\n");
2774	goto out_delete_evlist;
2775
2776out_errno:
2777	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2778	goto out_delete_evlist;
2779}
2780
2781static int trace__replay(struct trace *trace)
2782{
2783	const struct perf_evsel_str_handler handlers[] = {
2784		{ "probe:vfs_getname",	     trace__vfs_getname, },
2785	};
2786	struct perf_data_file file = {
2787		.path  = input_name,
2788		.mode  = PERF_DATA_MODE_READ,
2789		.force = trace->force,
2790	};
2791	struct perf_session *session;
2792	struct perf_evsel *evsel;
2793	int err = -1;
2794
2795	trace->tool.sample	  = trace__process_sample;
2796	trace->tool.mmap	  = perf_event__process_mmap;
2797	trace->tool.mmap2	  = perf_event__process_mmap2;
2798	trace->tool.comm	  = perf_event__process_comm;
2799	trace->tool.exit	  = perf_event__process_exit;
2800	trace->tool.fork	  = perf_event__process_fork;
2801	trace->tool.attr	  = perf_event__process_attr;
2802	trace->tool.tracing_data = perf_event__process_tracing_data;
2803	trace->tool.build_id	  = perf_event__process_build_id;
 
2804
2805	trace->tool.ordered_events = true;
2806	trace->tool.ordering_requires_timestamps = true;
2807
2808	/* add tid to output */
2809	trace->multiple_threads = true;
2810
2811	session = perf_session__new(&file, false, &trace->tool);
2812	if (session == NULL)
2813		return -1;
 
 
 
 
 
 
2814
2815	if (symbol__init(&session->header.env) < 0)
2816		goto out;
2817
2818	trace->host = &session->machines.host;
2819
2820	err = perf_session__set_tracepoints_handlers(session, handlers);
2821	if (err)
2822		goto out;
2823
2824	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2825						     "raw_syscalls:sys_enter");
2826	/* older kernels have syscalls tp versus raw_syscalls */
2827	if (evsel == NULL)
2828		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2829							     "syscalls:sys_enter");
2830
2831	if (evsel &&
2832	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2833	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2834		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2835		goto out;
2836	}
2837
2838	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2839						     "raw_syscalls:sys_exit");
2840	if (evsel == NULL)
2841		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2842							     "syscalls:sys_exit");
2843	if (evsel &&
2844	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2845	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2846		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2847		goto out;
2848	}
2849
2850	evlist__for_each(session->evlist, evsel) {
2851		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2852		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2853		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2854		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2855			evsel->handler = trace__pgfault;
2856	}
2857
2858	err = parse_target_str(trace);
2859	if (err != 0)
2860		goto out;
2861
2862	setup_pager();
2863
2864	err = perf_session__process_events(session);
2865	if (err)
2866		pr_err("Failed to process events, error %d", err);
2867
2868	else if (trace->summary)
2869		trace__fprintf_thread_summary(trace, trace->output);
2870
2871out:
2872	perf_session__delete(session);
2873
2874	return err;
2875}
2876
2877static size_t trace__fprintf_threads_header(FILE *fp)
2878{
2879	size_t printed;
2880
2881	printed  = fprintf(fp, "\n Summary of events:\n\n");
2882
2883	return printed;
2884}
2885
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2886static size_t thread__dump_stats(struct thread_trace *ttrace,
2887				 struct trace *trace, FILE *fp)
2888{
2889	struct stats *stats;
2890	size_t printed = 0;
2891	struct syscall *sc;
2892	struct int_node *inode = intlist__first(ttrace->syscall_stats);
 
2893
2894	if (inode == NULL)
2895		return 0;
2896
2897	printed += fprintf(fp, "\n");
2898
2899	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2900	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2901	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2902
2903	/* each int_node is a syscall */
2904	while (inode) {
2905		stats = inode->priv;
2906		if (stats) {
2907			double min = (double)(stats->min) / NSEC_PER_MSEC;
2908			double max = (double)(stats->max) / NSEC_PER_MSEC;
2909			double avg = avg_stats(stats);
2910			double pct;
2911			u64 n = (u64) stats->n;
2912
2913			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2914			avg /= NSEC_PER_MSEC;
2915
2916			sc = &trace->syscalls.table[inode->i];
2917			printed += fprintf(fp, "   %-15s", sc->name);
2918			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2919					   n, avg * n, min, avg);
2920			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2921		}
2922
2923		inode = intlist__next(inode);
2924	}
2925
 
2926	printed += fprintf(fp, "\n\n");
2927
2928	return printed;
2929}
2930
2931/* struct used to pass data to per-thread function */
2932struct summary_data {
2933	FILE *fp;
2934	struct trace *trace;
2935	size_t printed;
2936};
2937
2938static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2939{
2940	struct summary_data *data = priv;
2941	FILE *fp = data->fp;
2942	size_t printed = data->printed;
2943	struct trace *trace = data->trace;
2944	struct thread_trace *ttrace = thread__priv(thread);
2945	double ratio;
2946
2947	if (ttrace == NULL)
2948		return 0;
2949
2950	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2951
2952	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2953	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2954	printed += fprintf(fp, "%.1f%%", ratio);
2955	if (ttrace->pfmaj)
2956		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2957	if (ttrace->pfmin)
2958		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2959	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
 
 
 
 
2960	printed += thread__dump_stats(ttrace, trace, fp);
2961
2962	data->printed += printed;
 
2963
2964	return 0;
 
 
 
 
 
 
 
 
 
2965}
2966
2967static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2968{
2969	struct summary_data data = {
2970		.fp = fp,
2971		.trace = trace
2972	};
2973	data.printed = trace__fprintf_threads_header(fp);
 
 
 
 
 
 
2974
2975	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
 
2976
2977	return data.printed;
 
 
2978}
2979
2980static int trace__set_duration(const struct option *opt, const char *str,
2981			       int unset __maybe_unused)
2982{
2983	struct trace *trace = opt->value;
2984
2985	trace->duration_filter = atof(str);
2986	return 0;
2987}
2988
2989static int trace__set_filter_pids(const struct option *opt, const char *str,
2990				  int unset __maybe_unused)
2991{
2992	int ret = -1;
2993	size_t i;
2994	struct trace *trace = opt->value;
2995	/*
2996	 * FIXME: introduce a intarray class, plain parse csv and create a
2997	 * { int nr, int entries[] } struct...
2998	 */
2999	struct intlist *list = intlist__new(str);
3000
3001	if (list == NULL)
3002		return -1;
3003
3004	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
3005	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
3006
3007	if (trace->filter_pids.entries == NULL)
3008		goto out;
3009
3010	trace->filter_pids.entries[0] = getpid();
3011
3012	for (i = 1; i < trace->filter_pids.nr; ++i)
3013		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
3014
3015	intlist__delete(list);
3016	ret = 0;
3017out:
3018	return ret;
3019}
3020
3021static int trace__open_output(struct trace *trace, const char *filename)
3022{
3023	struct stat st;
3024
3025	if (!stat(filename, &st) && st.st_size) {
3026		char oldname[PATH_MAX];
3027
3028		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
3029		unlink(oldname);
3030		rename(filename, oldname);
3031	}
3032
3033	trace->output = fopen(filename, "w");
3034
3035	return trace->output == NULL ? -errno : 0;
3036}
3037
3038static int parse_pagefaults(const struct option *opt, const char *str,
3039			    int unset __maybe_unused)
3040{
3041	int *trace_pgfaults = opt->value;
3042
3043	if (strcmp(str, "all") == 0)
3044		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3045	else if (strcmp(str, "maj") == 0)
3046		*trace_pgfaults |= TRACE_PFMAJ;
3047	else if (strcmp(str, "min") == 0)
3048		*trace_pgfaults |= TRACE_PFMIN;
3049	else
3050		return -1;
3051
3052	return 0;
3053}
3054
3055static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3056{
3057	struct perf_evsel *evsel;
3058
3059	evlist__for_each(evlist, evsel)
3060		evsel->handler = handler;
3061}
3062
3063int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3064{
3065	const char *trace_usage[] = {
3066		"perf trace [<options>] [<command>]",
3067		"perf trace [<options>] -- <command> [<options>]",
3068		"perf trace record [<options>] [<command>]",
3069		"perf trace record [<options>] -- <command> [<options>]",
3070		NULL
3071	};
3072	struct trace trace = {
3073		.audit = {
3074			.machine = audit_detect_machine(),
3075			.open_id = audit_name_to_syscall("open", trace.audit.machine),
3076		},
3077		.syscalls = {
3078			. max = -1,
3079		},
3080		.opts = {
3081			.target = {
3082				.uid	   = UINT_MAX,
3083				.uses_mmap = true,
3084			},
3085			.user_freq     = UINT_MAX,
3086			.user_interval = ULLONG_MAX,
3087			.no_buffering  = true,
3088			.mmap_pages    = UINT_MAX,
3089			.proc_map_timeout  = 500,
3090		},
3091		.output = stderr,
3092		.show_comm = true,
3093		.trace_syscalls = true,
 
 
 
 
 
 
 
3094	};
 
3095	const char *output_name = NULL;
3096	const char *ev_qualifier_str = NULL;
3097	const struct option trace_options[] = {
3098	OPT_CALLBACK(0, "event", &trace.evlist, "event",
3099		     "event selector. use 'perf list' to list available events",
3100		     parse_events_option),
3101	OPT_BOOLEAN(0, "comm", &trace.show_comm,
3102		    "show the thread COMM next to its id"),
3103	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3104	OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
 
3105	OPT_STRING('o', "output", &output_name, "file", "output file name"),
3106	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3107	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3108		    "trace events on existing process id"),
3109	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3110		    "trace events on existing thread id"),
3111	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3112		     "pids to filter (by the kernel)", trace__set_filter_pids),
3113	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3114		    "system-wide collection from all CPUs"),
3115	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3116		    "list of cpus to monitor"),
3117	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3118		    "child tasks do not inherit counters"),
3119	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3120		     "number of mmap data pages",
3121		     perf_evlist__parse_mmap_pages),
3122	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3123		   "user to profile"),
3124	OPT_CALLBACK(0, "duration", &trace, "float",
3125		     "show only events with duration > N.M ms",
3126		     trace__set_duration),
 
 
 
3127	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3128	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3129	OPT_BOOLEAN('T', "time", &trace.full_time,
3130		    "Show full timestamp, not time relative to first start"),
 
 
3131	OPT_BOOLEAN('s', "summary", &trace.summary_only,
3132		    "Show only syscall summary with statistics"),
3133	OPT_BOOLEAN('S', "with-summary", &trace.summary,
3134		    "Show all syscalls and summary with statistics"),
3135	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3136		     "Trace pagefaults", parse_pagefaults, "maj"),
3137	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3138	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3139	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3140			"per thread proc mmap processing timeout in ms"),
 
 
 
 
 
 
3141	OPT_END()
3142	};
 
 
 
3143	const char * const trace_subcommands[] = { "record", NULL };
3144	int err;
3145	char bf[BUFSIZ];
3146
3147	signal(SIGSEGV, sighandler_dump_stack);
3148	signal(SIGFPE, sighandler_dump_stack);
3149
3150	trace.evlist = perf_evlist__new();
 
3151
3152	if (trace.evlist == NULL) {
3153		pr_err("Not enough memory to run!\n");
3154		err = -ENOMEM;
3155		goto out;
3156	}
3157
 
 
 
 
 
 
 
 
 
 
 
 
 
3158	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3159				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3161	if (trace.trace_pgfaults) {
3162		trace.opts.sample_address = true;
3163		trace.opts.sample_time = true;
3164	}
3165
3166	if (trace.evlist->nr_entries > 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3167		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3168
3169	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3170		return trace__record(&trace, argc-1, &argv[1]);
3171
3172	/* summary_only implies summary option, but don't overwrite summary if set */
3173	if (trace.summary_only)
3174		trace.summary = trace.summary_only;
3175
3176	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3177	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
3178		pr_err("Please specify something to trace.\n");
3179		return -1;
3180	}
3181
3182	if (output_name != NULL) {
3183		err = trace__open_output(&trace, output_name);
3184		if (err < 0) {
3185			perror("failed to create output file");
3186			goto out;
3187		}
3188	}
3189
3190	if (ev_qualifier_str != NULL) {
3191		const char *s = ev_qualifier_str;
3192		struct strlist_config slist_config = {
3193			.dirname = system_path(STRACE_GROUPS_DIR),
3194		};
3195
3196		trace.not_ev_qualifier = *s == '!';
3197		if (trace.not_ev_qualifier)
3198			++s;
3199		trace.ev_qualifier = strlist__new(s, &slist_config);
3200		if (trace.ev_qualifier == NULL) {
3201			fputs("Not enough memory to parse event qualifier",
3202			      trace.output);
3203			err = -ENOMEM;
3204			goto out_close;
3205		}
3206
3207		err = trace__validate_ev_qualifier(&trace);
3208		if (err)
3209			goto out_close;
3210	}
3211
3212	err = target__validate(&trace.opts.target);
3213	if (err) {
3214		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3215		fprintf(trace.output, "%s", bf);
3216		goto out_close;
3217	}
3218
3219	err = target__parse_uid(&trace.opts.target);
3220	if (err) {
3221		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3222		fprintf(trace.output, "%s", bf);
3223		goto out_close;
3224	}
3225
3226	if (!argc && target__none(&trace.opts.target))
3227		trace.opts.target.system_wide = true;
3228
3229	if (input_name)
3230		err = trace__replay(&trace);
3231	else
3232		err = trace__run(&trace, argc, argv);
3233
3234out_close:
3235	if (output_name != NULL)
3236		fclose(trace.output);
3237out:
3238	return err;
3239}