Linux Audio

Check our new training course

Buildroot integration, development and maintenance

Need a Buildroot system for your embedded project?
Loading...
v3.15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   1#include <traceevent/event-parse.h>
 
   2#include "builtin.h"
   3#include "util/color.h"
   4#include "util/debug.h"
   5#include "util/evlist.h"
 
   6#include "util/machine.h"
   7#include "util/session.h"
   8#include "util/thread.h"
   9#include "util/parse-options.h"
  10#include "util/strlist.h"
  11#include "util/intlist.h"
  12#include "util/thread_map.h"
  13#include "util/stat.h"
  14#include "trace-event.h"
  15#include "util/parse-events.h"
 
  16
  17#include <libaudit.h>
  18#include <stdlib.h>
  19#include <sys/eventfd.h>
  20#include <sys/mman.h>
  21#include <linux/futex.h>
 
  22
  23/* For older distros: */
  24#ifndef MAP_STACK
  25# define MAP_STACK		0x20000
  26#endif
  27
  28#ifndef MADV_HWPOISON
  29# define MADV_HWPOISON		100
 
  30#endif
  31
  32#ifndef MADV_MERGEABLE
  33# define MADV_MERGEABLE		12
  34#endif
  35
  36#ifndef MADV_UNMERGEABLE
  37# define MADV_UNMERGEABLE	13
  38#endif
  39
  40#ifndef EFD_SEMAPHORE
  41# define EFD_SEMAPHORE		1
  42#endif
  43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  44struct tp_field {
  45	int offset;
  46	union {
  47		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
  48		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
  49	};
  50};
  51
  52#define TP_UINT_FIELD(bits) \
  53static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
  54{ \
  55	return *(u##bits *)(sample->raw_data + field->offset); \
 
 
  56}
  57
  58TP_UINT_FIELD(8);
  59TP_UINT_FIELD(16);
  60TP_UINT_FIELD(32);
  61TP_UINT_FIELD(64);
  62
  63#define TP_UINT_FIELD__SWAPPED(bits) \
  64static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
  65{ \
  66	u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
 
  67	return bswap_##bits(value);\
  68}
  69
  70TP_UINT_FIELD__SWAPPED(16);
  71TP_UINT_FIELD__SWAPPED(32);
  72TP_UINT_FIELD__SWAPPED(64);
  73
  74static int tp_field__init_uint(struct tp_field *field,
  75			       struct format_field *format_field,
  76			       bool needs_swap)
  77{
  78	field->offset = format_field->offset;
  79
  80	switch (format_field->size) {
  81	case 1:
  82		field->integer = tp_field__u8;
  83		break;
  84	case 2:
  85		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
  86		break;
  87	case 4:
  88		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
  89		break;
  90	case 8:
  91		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
  92		break;
  93	default:
  94		return -1;
  95	}
  96
  97	return 0;
  98}
  99
 100static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
 101{
 102	return sample->raw_data + field->offset;
 103}
 104
 105static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
 106{
 107	field->offset = format_field->offset;
 108	field->pointer = tp_field__ptr;
 109	return 0;
 110}
 111
 112struct syscall_tp {
 113	struct tp_field id;
 114	union {
 115		struct tp_field args, ret;
 116	};
 117};
 118
 119static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
 120					  struct tp_field *field,
 121					  const char *name)
 122{
 123	struct format_field *format_field = perf_evsel__field(evsel, name);
 124
 125	if (format_field == NULL)
 126		return -1;
 127
 128	return tp_field__init_uint(field, format_field, evsel->needs_swap);
 129}
 130
 131#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
 132	({ struct syscall_tp *sc = evsel->priv;\
 133	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
 134
 135static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
 136					 struct tp_field *field,
 137					 const char *name)
 138{
 139	struct format_field *format_field = perf_evsel__field(evsel, name);
 140
 141	if (format_field == NULL)
 142		return -1;
 143
 144	return tp_field__init_ptr(field, format_field);
 145}
 146
 147#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
 148	({ struct syscall_tp *sc = evsel->priv;\
 149	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
 150
 151static void perf_evsel__delete_priv(struct perf_evsel *evsel)
 152{
 153	zfree(&evsel->priv);
 154	perf_evsel__delete(evsel);
 155}
 156
 157static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
 158{
 159	evsel->priv = malloc(sizeof(struct syscall_tp));
 160	if (evsel->priv != NULL) {
 161		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
 162			goto out_delete;
 163
 164		evsel->handler = handler;
 165		return 0;
 166	}
 167
 168	return -ENOMEM;
 169
 170out_delete:
 171	zfree(&evsel->priv);
 172	return -ENOENT;
 173}
 174
 175static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
 176{
 177	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
 178
 179	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
 180	if (evsel == NULL)
 181		evsel = perf_evsel__newtp("syscalls", direction);
 182
 183	if (evsel) {
 184		if (perf_evsel__init_syscall_tp(evsel, handler))
 185			goto out_delete;
 186	}
 
 187
 188	return evsel;
 189
 190out_delete:
 191	perf_evsel__delete_priv(evsel);
 192	return NULL;
 193}
 194
 195#define perf_evsel__sc_tp_uint(evsel, name, sample) \
 196	({ struct syscall_tp *fields = evsel->priv; \
 197	   fields->name.integer(&fields->name, sample); })
 198
 199#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
 200	({ struct syscall_tp *fields = evsel->priv; \
 201	   fields->name.pointer(&fields->name, sample); })
 202
 203static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
 204					  void *sys_enter_handler,
 205					  void *sys_exit_handler)
 206{
 207	int ret = -1;
 208	struct perf_evsel *sys_enter, *sys_exit;
 209
 210	sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
 211	if (sys_enter == NULL)
 212		goto out;
 213
 214	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
 215		goto out_delete_sys_enter;
 216
 217	sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
 218	if (sys_exit == NULL)
 219		goto out_delete_sys_enter;
 220
 221	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
 222		goto out_delete_sys_exit;
 223
 224	perf_evlist__add(evlist, sys_enter);
 225	perf_evlist__add(evlist, sys_exit);
 226
 227	ret = 0;
 228out:
 229	return ret;
 230
 231out_delete_sys_exit:
 232	perf_evsel__delete_priv(sys_exit);
 233out_delete_sys_enter:
 234	perf_evsel__delete_priv(sys_enter);
 235	goto out;
 236}
 237
 238
 239struct syscall_arg {
 240	unsigned long val;
 241	struct thread *thread;
 242	struct trace  *trace;
 243	void	      *parm;
 244	u8	      idx;
 245	u8	      mask;
 246};
 247
 248struct strarray {
 249	int	    offset;
 250	int	    nr_entries;
 251	const char **entries;
 252};
 253
 254#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
 255	.nr_entries = ARRAY_SIZE(array), \
 256	.entries = array, \
 257}
 258
 259#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
 260	.offset	    = off, \
 261	.nr_entries = ARRAY_SIZE(array), \
 262	.entries = array, \
 263}
 264
 265static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
 266						const char *intfmt,
 267					        struct syscall_arg *arg)
 268{
 269	struct strarray *sa = arg->parm;
 270	int idx = arg->val - sa->offset;
 271
 272	if (idx < 0 || idx >= sa->nr_entries)
 273		return scnprintf(bf, size, intfmt, arg->val);
 274
 275	return scnprintf(bf, size, "%s", sa->entries[idx]);
 276}
 277
 278static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
 279					      struct syscall_arg *arg)
 280{
 281	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
 282}
 283
 284#define SCA_STRARRAY syscall_arg__scnprintf_strarray
 285
 286#if defined(__i386__) || defined(__x86_64__)
 287/*
 288 * FIXME: Make this available to all arches as soon as the ioctl beautifier
 289 * 	  gets rewritten to support all arches.
 290 */
 291static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
 292						 struct syscall_arg *arg)
 293{
 294	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
 295}
 296
 297#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
 298#endif /* defined(__i386__) || defined(__x86_64__) */
 299
 300static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
 301					struct syscall_arg *arg);
 302
 303#define SCA_FD syscall_arg__scnprintf_fd
 304
 305static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
 306					   struct syscall_arg *arg)
 307{
 308	int fd = arg->val;
 309
 310	if (fd == AT_FDCWD)
 311		return scnprintf(bf, size, "CWD");
 312
 313	return syscall_arg__scnprintf_fd(bf, size, arg);
 314}
 315
 316#define SCA_FDAT syscall_arg__scnprintf_fd_at
 317
 318static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
 319					      struct syscall_arg *arg);
 320
 321#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
 322
 323static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
 324					 struct syscall_arg *arg)
 325{
 326	return scnprintf(bf, size, "%#lx", arg->val);
 327}
 328
 329#define SCA_HEX syscall_arg__scnprintf_hex
 330
 
 
 
 
 
 
 
 
 331static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 332					       struct syscall_arg *arg)
 333{
 334	int printed = 0, prot = arg->val;
 335
 336	if (prot == PROT_NONE)
 337		return scnprintf(bf, size, "NONE");
 338#define	P_MMAP_PROT(n) \
 339	if (prot & PROT_##n) { \
 340		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 341		prot &= ~PROT_##n; \
 342	}
 343
 344	P_MMAP_PROT(EXEC);
 345	P_MMAP_PROT(READ);
 346	P_MMAP_PROT(WRITE);
 347#ifdef PROT_SEM
 348	P_MMAP_PROT(SEM);
 349#endif
 350	P_MMAP_PROT(GROWSDOWN);
 351	P_MMAP_PROT(GROWSUP);
 352#undef P_MMAP_PROT
 353
 354	if (prot)
 355		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
 356
 357	return printed;
 358}
 359
 360#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 361
 362static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 363						struct syscall_arg *arg)
 364{
 365	int printed = 0, flags = arg->val;
 366
 367#define	P_MMAP_FLAG(n) \
 368	if (flags & MAP_##n) { \
 369		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 370		flags &= ~MAP_##n; \
 371	}
 372
 373	P_MMAP_FLAG(SHARED);
 374	P_MMAP_FLAG(PRIVATE);
 375#ifdef MAP_32BIT
 376	P_MMAP_FLAG(32BIT);
 377#endif
 378	P_MMAP_FLAG(ANONYMOUS);
 379	P_MMAP_FLAG(DENYWRITE);
 380	P_MMAP_FLAG(EXECUTABLE);
 381	P_MMAP_FLAG(FILE);
 382	P_MMAP_FLAG(FIXED);
 383	P_MMAP_FLAG(GROWSDOWN);
 384#ifdef MAP_HUGETLB
 385	P_MMAP_FLAG(HUGETLB);
 386#endif
 387	P_MMAP_FLAG(LOCKED);
 388	P_MMAP_FLAG(NONBLOCK);
 389	P_MMAP_FLAG(NORESERVE);
 390	P_MMAP_FLAG(POPULATE);
 391	P_MMAP_FLAG(STACK);
 392#ifdef MAP_UNINITIALIZED
 393	P_MMAP_FLAG(UNINITIALIZED);
 394#endif
 395#undef P_MMAP_FLAG
 396
 397	if (flags)
 398		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 399
 400	return printed;
 401}
 402
 403#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 405static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 406						      struct syscall_arg *arg)
 407{
 408	int behavior = arg->val;
 409
 410	switch (behavior) {
 411#define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
 412	P_MADV_BHV(NORMAL);
 413	P_MADV_BHV(RANDOM);
 414	P_MADV_BHV(SEQUENTIAL);
 415	P_MADV_BHV(WILLNEED);
 416	P_MADV_BHV(DONTNEED);
 417	P_MADV_BHV(REMOVE);
 418	P_MADV_BHV(DONTFORK);
 419	P_MADV_BHV(DOFORK);
 420	P_MADV_BHV(HWPOISON);
 421#ifdef MADV_SOFT_OFFLINE
 422	P_MADV_BHV(SOFT_OFFLINE);
 423#endif
 424	P_MADV_BHV(MERGEABLE);
 425	P_MADV_BHV(UNMERGEABLE);
 426#ifdef MADV_HUGEPAGE
 427	P_MADV_BHV(HUGEPAGE);
 428#endif
 429#ifdef MADV_NOHUGEPAGE
 430	P_MADV_BHV(NOHUGEPAGE);
 431#endif
 432#ifdef MADV_DONTDUMP
 433	P_MADV_BHV(DONTDUMP);
 434#endif
 435#ifdef MADV_DODUMP
 436	P_MADV_BHV(DODUMP);
 437#endif
 438#undef P_MADV_PHV
 439	default: break;
 440	}
 441
 442	return scnprintf(bf, size, "%#x", behavior);
 443}
 444
 445#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
 446
 447static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
 448					   struct syscall_arg *arg)
 449{
 450	int printed = 0, op = arg->val;
 451
 452	if (op == 0)
 453		return scnprintf(bf, size, "NONE");
 454#define	P_CMD(cmd) \
 455	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
 456		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
 457		op &= ~LOCK_##cmd; \
 458	}
 459
 460	P_CMD(SH);
 461	P_CMD(EX);
 462	P_CMD(NB);
 463	P_CMD(UN);
 464	P_CMD(MAND);
 465	P_CMD(RW);
 466	P_CMD(READ);
 467	P_CMD(WRITE);
 468#undef P_OP
 469
 470	if (op)
 471		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
 472
 473	return printed;
 474}
 475
 476#define SCA_FLOCK syscall_arg__scnprintf_flock
 477
 478static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
 479{
 480	enum syscall_futex_args {
 481		SCF_UADDR   = (1 << 0),
 482		SCF_OP	    = (1 << 1),
 483		SCF_VAL	    = (1 << 2),
 484		SCF_TIMEOUT = (1 << 3),
 485		SCF_UADDR2  = (1 << 4),
 486		SCF_VAL3    = (1 << 5),
 487	};
 488	int op = arg->val;
 489	int cmd = op & FUTEX_CMD_MASK;
 490	size_t printed = 0;
 491
 492	switch (cmd) {
 493#define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
 494	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
 495	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 496	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 497	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
 498	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
 499	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
 500	P_FUTEX_OP(WAKE_OP);							  break;
 501	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 502	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 503	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
 504	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
 505	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
 506	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
 507	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
 508	}
 509
 510	if (op & FUTEX_PRIVATE_FLAG)
 511		printed += scnprintf(bf + printed, size - printed, "|PRIV");
 512
 513	if (op & FUTEX_CLOCK_REALTIME)
 514		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
 515
 516	return printed;
 517}
 518
 519#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
 520
 
 
 
 
 
 
 521static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
 522static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
 523
 524static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
 525static DEFINE_STRARRAY(itimers);
 526
 
 
 
 
 
 
 
 
 
 527static const char *whences[] = { "SET", "CUR", "END",
 528#ifdef SEEK_DATA
 529"DATA",
 530#endif
 531#ifdef SEEK_HOLE
 532"HOLE",
 533#endif
 534};
 535static DEFINE_STRARRAY(whences);
 536
 537static const char *fcntl_cmds[] = {
 538	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
 539	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
 540	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
 541	"F_GETOWNER_UIDS",
 542};
 543static DEFINE_STRARRAY(fcntl_cmds);
 544
 545static const char *rlimit_resources[] = {
 546	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
 547	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
 548	"RTTIME",
 549};
 550static DEFINE_STRARRAY(rlimit_resources);
 551
 552static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
 553static DEFINE_STRARRAY(sighow);
 554
 555static const char *clockid[] = {
 556	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
 557	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
 
 558};
 559static DEFINE_STRARRAY(clockid);
 560
 561static const char *socket_families[] = {
 562	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
 563	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
 564	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
 565	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
 566	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
 567	"ALG", "NFC", "VSOCK",
 568};
 569static DEFINE_STRARRAY(socket_families);
 570
 571#ifndef SOCK_TYPE_MASK
 572#define SOCK_TYPE_MASK 0xf
 573#endif
 574
 575static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
 576						      struct syscall_arg *arg)
 577{
 578	size_t printed;
 579	int type = arg->val,
 580	    flags = type & ~SOCK_TYPE_MASK;
 581
 582	type &= SOCK_TYPE_MASK;
 583	/*
 584 	 * Can't use a strarray, MIPS may override for ABI reasons.
 585 	 */
 586	switch (type) {
 587#define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
 588	P_SK_TYPE(STREAM);
 589	P_SK_TYPE(DGRAM);
 590	P_SK_TYPE(RAW);
 591	P_SK_TYPE(RDM);
 592	P_SK_TYPE(SEQPACKET);
 593	P_SK_TYPE(DCCP);
 594	P_SK_TYPE(PACKET);
 595#undef P_SK_TYPE
 596	default:
 597		printed = scnprintf(bf, size, "%#x", type);
 598	}
 599
 600#define	P_SK_FLAG(n) \
 601	if (flags & SOCK_##n) { \
 602		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
 603		flags &= ~SOCK_##n; \
 604	}
 605
 606	P_SK_FLAG(CLOEXEC);
 607	P_SK_FLAG(NONBLOCK);
 608#undef P_SK_FLAG
 609
 610	if (flags)
 611		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
 612
 613	return printed;
 614}
 615
 616#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
 617
 618#ifndef MSG_PROBE
 619#define MSG_PROBE	     0x10
 620#endif
 621#ifndef MSG_WAITFORONE
 622#define MSG_WAITFORONE	0x10000
 623#endif
 624#ifndef MSG_SENDPAGE_NOTLAST
 625#define MSG_SENDPAGE_NOTLAST 0x20000
 626#endif
 627#ifndef MSG_FASTOPEN
 628#define MSG_FASTOPEN	     0x20000000
 629#endif
 630
 631static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
 632					       struct syscall_arg *arg)
 633{
 634	int printed = 0, flags = arg->val;
 635
 636	if (flags == 0)
 637		return scnprintf(bf, size, "NONE");
 638#define	P_MSG_FLAG(n) \
 639	if (flags & MSG_##n) { \
 640		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 641		flags &= ~MSG_##n; \
 642	}
 643
 644	P_MSG_FLAG(OOB);
 645	P_MSG_FLAG(PEEK);
 646	P_MSG_FLAG(DONTROUTE);
 647	P_MSG_FLAG(TRYHARD);
 648	P_MSG_FLAG(CTRUNC);
 649	P_MSG_FLAG(PROBE);
 650	P_MSG_FLAG(TRUNC);
 651	P_MSG_FLAG(DONTWAIT);
 652	P_MSG_FLAG(EOR);
 653	P_MSG_FLAG(WAITALL);
 654	P_MSG_FLAG(FIN);
 655	P_MSG_FLAG(SYN);
 656	P_MSG_FLAG(CONFIRM);
 657	P_MSG_FLAG(RST);
 658	P_MSG_FLAG(ERRQUEUE);
 659	P_MSG_FLAG(NOSIGNAL);
 660	P_MSG_FLAG(MORE);
 661	P_MSG_FLAG(WAITFORONE);
 662	P_MSG_FLAG(SENDPAGE_NOTLAST);
 663	P_MSG_FLAG(FASTOPEN);
 664	P_MSG_FLAG(CMSG_CLOEXEC);
 665#undef P_MSG_FLAG
 666
 667	if (flags)
 668		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 669
 670	return printed;
 671}
 672
 673#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
 674
 675static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 676						 struct syscall_arg *arg)
 677{
 678	size_t printed = 0;
 679	int mode = arg->val;
 680
 681	if (mode == F_OK) /* 0 */
 682		return scnprintf(bf, size, "F");
 683#define	P_MODE(n) \
 684	if (mode & n##_OK) { \
 685		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
 686		mode &= ~n##_OK; \
 687	}
 688
 689	P_MODE(R);
 690	P_MODE(W);
 691	P_MODE(X);
 692#undef P_MODE
 693
 694	if (mode)
 695		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
 696
 697	return printed;
 698}
 699
 700#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
 701
 
 
 
 
 
 702static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
 703					       struct syscall_arg *arg)
 704{
 705	int printed = 0, flags = arg->val;
 706
 707	if (!(flags & O_CREAT))
 708		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
 709
 710	if (flags == 0)
 711		return scnprintf(bf, size, "RDONLY");
 712#define	P_FLAG(n) \
 713	if (flags & O_##n) { \
 714		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 715		flags &= ~O_##n; \
 716	}
 717
 718	P_FLAG(APPEND);
 719	P_FLAG(ASYNC);
 720	P_FLAG(CLOEXEC);
 721	P_FLAG(CREAT);
 722	P_FLAG(DIRECT);
 723	P_FLAG(DIRECTORY);
 724	P_FLAG(EXCL);
 725	P_FLAG(LARGEFILE);
 726	P_FLAG(NOATIME);
 727	P_FLAG(NOCTTY);
 728#ifdef O_NONBLOCK
 729	P_FLAG(NONBLOCK);
 730#elif O_NDELAY
 731	P_FLAG(NDELAY);
 732#endif
 733#ifdef O_PATH
 734	P_FLAG(PATH);
 735#endif
 736	P_FLAG(RDWR);
 737#ifdef O_DSYNC
 738	if ((flags & O_SYNC) == O_SYNC)
 739		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
 740	else {
 741		P_FLAG(DSYNC);
 742	}
 743#else
 744	P_FLAG(SYNC);
 745#endif
 746	P_FLAG(TRUNC);
 747	P_FLAG(WRONLY);
 748#undef P_FLAG
 749
 750	if (flags)
 751		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 752
 753	return printed;
 754}
 755
 756#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
 757
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 758static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
 759						   struct syscall_arg *arg)
 760{
 761	int printed = 0, flags = arg->val;
 762
 763	if (flags == 0)
 764		return scnprintf(bf, size, "NONE");
 765#define	P_FLAG(n) \
 766	if (flags & EFD_##n) { \
 767		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 768		flags &= ~EFD_##n; \
 769	}
 770
 771	P_FLAG(SEMAPHORE);
 772	P_FLAG(CLOEXEC);
 773	P_FLAG(NONBLOCK);
 774#undef P_FLAG
 775
 776	if (flags)
 777		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 778
 779	return printed;
 780}
 781
 782#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
 783
 784static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 785						struct syscall_arg *arg)
 786{
 787	int printed = 0, flags = arg->val;
 788
 789#define	P_FLAG(n) \
 790	if (flags & O_##n) { \
 791		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 792		flags &= ~O_##n; \
 793	}
 794
 795	P_FLAG(CLOEXEC);
 796	P_FLAG(NONBLOCK);
 797#undef P_FLAG
 798
 799	if (flags)
 800		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 801
 802	return printed;
 803}
 804
 805#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 806
 807static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
 808{
 809	int sig = arg->val;
 810
 811	switch (sig) {
 812#define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
 813	P_SIGNUM(HUP);
 814	P_SIGNUM(INT);
 815	P_SIGNUM(QUIT);
 816	P_SIGNUM(ILL);
 817	P_SIGNUM(TRAP);
 818	P_SIGNUM(ABRT);
 819	P_SIGNUM(BUS);
 820	P_SIGNUM(FPE);
 821	P_SIGNUM(KILL);
 822	P_SIGNUM(USR1);
 823	P_SIGNUM(SEGV);
 824	P_SIGNUM(USR2);
 825	P_SIGNUM(PIPE);
 826	P_SIGNUM(ALRM);
 827	P_SIGNUM(TERM);
 828	P_SIGNUM(CHLD);
 829	P_SIGNUM(CONT);
 830	P_SIGNUM(STOP);
 831	P_SIGNUM(TSTP);
 832	P_SIGNUM(TTIN);
 833	P_SIGNUM(TTOU);
 834	P_SIGNUM(URG);
 835	P_SIGNUM(XCPU);
 836	P_SIGNUM(XFSZ);
 837	P_SIGNUM(VTALRM);
 838	P_SIGNUM(PROF);
 839	P_SIGNUM(WINCH);
 840	P_SIGNUM(IO);
 841	P_SIGNUM(PWR);
 842	P_SIGNUM(SYS);
 843#ifdef SIGEMT
 844	P_SIGNUM(EMT);
 845#endif
 846#ifdef SIGSTKFLT
 847	P_SIGNUM(STKFLT);
 848#endif
 849#ifdef SIGSWI
 850	P_SIGNUM(SWI);
 851#endif
 852	default: break;
 853	}
 854
 855	return scnprintf(bf, size, "%#x", sig);
 856}
 857
 858#define SCA_SIGNUM syscall_arg__scnprintf_signum
 859
 860#if defined(__i386__) || defined(__x86_64__)
 861/*
 862 * FIXME: Make this available to all arches.
 863 */
 864#define TCGETS		0x5401
 865
 866static const char *tioctls[] = {
 867	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
 868	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
 869	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
 870	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
 871	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
 872	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
 873	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
 874	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
 875	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
 876	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
 877	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
 878	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
 879	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
 880	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
 881	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
 882};
 883
 884static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
 885#endif /* defined(__i386__) || defined(__x86_64__) */
 886
 887#define STRARRAY(arg, name, array) \
 888	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
 889	  .arg_parm	 = { [arg] = &strarray__##array, }
 890
 891static struct syscall_fmt {
 892	const char *name;
 893	const char *alias;
 894	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
 895	void	   *arg_parm[6];
 896	bool	   errmsg;
 897	bool	   timeout;
 898	bool	   hexret;
 899} syscall_fmts[] = {
 900	{ .name	    = "access",	    .errmsg = true,
 901	  .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
 
 902	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
 
 903	{ .name	    = "brk",	    .hexret = true,
 904	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
 
 
 
 
 
 
 905	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
 906	{ .name	    = "close",	    .errmsg = true,
 907	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
 908	{ .name	    = "connect",    .errmsg = true, },
 
 
 909	{ .name	    = "dup",	    .errmsg = true,
 910	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 911	{ .name	    = "dup2",	    .errmsg = true,
 912	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 913	{ .name	    = "dup3",	    .errmsg = true,
 914	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 915	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
 916	{ .name	    = "eventfd2",   .errmsg = true,
 917	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
 918	{ .name	    = "faccessat",  .errmsg = true,
 919	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
 
 920	{ .name	    = "fadvise64",  .errmsg = true,
 921	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 922	{ .name	    = "fallocate",  .errmsg = true,
 923	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 924	{ .name	    = "fchdir",	    .errmsg = true,
 925	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 926	{ .name	    = "fchmod",	    .errmsg = true,
 927	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 928	{ .name	    = "fchmodat",   .errmsg = true,
 929	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
 
 930	{ .name	    = "fchown",	    .errmsg = true,
 931	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 932	{ .name	    = "fchownat",   .errmsg = true,
 933	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
 
 934	{ .name	    = "fcntl",	    .errmsg = true,
 935	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
 936			     [1] = SCA_STRARRAY, /* cmd */ },
 937	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
 938	{ .name	    = "fdatasync",  .errmsg = true,
 939	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 940	{ .name	    = "flock",	    .errmsg = true,
 941	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
 942			     [1] = SCA_FLOCK, /* cmd */ }, },
 943	{ .name	    = "fsetxattr",  .errmsg = true,
 944	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 945	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
 946	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 947	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
 948	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
 
 949	{ .name	    = "fstatfs",    .errmsg = true,
 950	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 951	{ .name	    = "fsync",    .errmsg = true,
 952	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 953	{ .name	    = "ftruncate", .errmsg = true,
 954	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 955	{ .name	    = "futex",	    .errmsg = true,
 956	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
 957	{ .name	    = "futimesat", .errmsg = true,
 958	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
 
 959	{ .name	    = "getdents",   .errmsg = true,
 960	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 961	{ .name	    = "getdents64", .errmsg = true,
 962	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 963	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
 964	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
 
 
 
 
 965	{ .name	    = "ioctl",	    .errmsg = true,
 966	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
 967#if defined(__i386__) || defined(__x86_64__)
 968/*
 969 * FIXME: Make this available to all arches.
 970 */
 971			     [1] = SCA_STRHEXARRAY, /* cmd */
 972			     [2] = SCA_HEX, /* arg */ },
 973	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
 974#else
 975			     [2] = SCA_HEX, /* arg */ }, },
 976#endif
 
 977	{ .name	    = "kill",	    .errmsg = true,
 978	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
 
 
 
 
 979	{ .name	    = "linkat",	    .errmsg = true,
 980	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
 
 
 
 
 
 
 981	{ .name	    = "lseek",	    .errmsg = true,
 982	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
 983			     [2] = SCA_STRARRAY, /* whence */ },
 984	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
 985	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
 
 
 
 
 
 986	{ .name     = "madvise",    .errmsg = true,
 987	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
 988			     [2] = SCA_MADV_BHV, /* behavior */ }, },
 
 
 989	{ .name	    = "mkdirat",    .errmsg = true,
 990	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
 
 
 
 991	{ .name	    = "mknodat",    .errmsg = true,
 992	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
 
 993	{ .name	    = "mlock",	    .errmsg = true,
 994	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 995	{ .name	    = "mlockall",   .errmsg = true,
 996	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 997	{ .name	    = "mmap",	    .hexret = true,
 998	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
 999			     [2] = SCA_MMAP_PROT, /* prot */
1000			     [3] = SCA_MMAP_FLAGS, /* flags */
1001			     [4] = SCA_FD, 	  /* fd */ }, },
1002	{ .name	    = "mprotect",   .errmsg = true,
1003	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
1004			     [2] = SCA_MMAP_PROT, /* prot */ }, },
 
 
1005	{ .name	    = "mremap",	    .hexret = true,
1006	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
 
1007			     [4] = SCA_HEX, /* new_addr */ }, },
1008	{ .name	    = "munlock",    .errmsg = true,
1009	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1010	{ .name	    = "munmap",	    .errmsg = true,
1011	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1012	{ .name	    = "name_to_handle_at", .errmsg = true,
1013	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1014	{ .name	    = "newfstatat", .errmsg = true,
1015	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
 
1016	{ .name	    = "open",	    .errmsg = true,
1017	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
 
1018	{ .name	    = "open_by_handle_at", .errmsg = true,
1019	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1020			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1021	{ .name	    = "openat",	    .errmsg = true,
1022	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
 
1023			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
 
 
 
 
 
1024	{ .name	    = "pipe2",	    .errmsg = true,
1025	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1026	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
1027	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
1028	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
1029	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1030	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
1031	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1032	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1033	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
1034	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1035	{ .name	    = "pwritev",    .errmsg = true,
1036	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1037	{ .name	    = "read",	    .errmsg = true,
1038	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 
 
1039	{ .name	    = "readlinkat", .errmsg = true,
1040	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
 
1041	{ .name	    = "readv",	    .errmsg = true,
1042	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1043	{ .name	    = "recvfrom",   .errmsg = true,
1044	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
 
1045	{ .name	    = "recvmmsg",   .errmsg = true,
1046	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
 
1047	{ .name	    = "recvmsg",    .errmsg = true,
1048	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
 
 
 
1049	{ .name	    = "renameat",   .errmsg = true,
1050	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
 
 
1051	{ .name	    = "rt_sigaction", .errmsg = true,
1052	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1053	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1054	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1055	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1056	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1057	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1058	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1059	{ .name	    = "sendmmsg",    .errmsg = true,
1060	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
 
1061	{ .name	    = "sendmsg",    .errmsg = true,
1062	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
 
1063	{ .name	    = "sendto",	    .errmsg = true,
1064	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
 
1065	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1066	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
 
 
1067	{ .name	    = "shutdown",   .errmsg = true,
1068	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1069	{ .name	    = "socket",	    .errmsg = true,
1070	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1071			     [1] = SCA_SK_TYPE, /* type */ },
1072	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1073	{ .name	    = "socketpair", .errmsg = true,
1074	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1075			     [1] = SCA_SK_TYPE, /* type */ },
1076	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1077	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
 
 
 
 
 
 
 
1078	{ .name	    = "symlinkat",  .errmsg = true,
1079	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1080	{ .name	    = "tgkill",	    .errmsg = true,
1081	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1082	{ .name	    = "tkill",	    .errmsg = true,
1083	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
 
 
1084	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1085	{ .name	    = "unlinkat",   .errmsg = true,
1086	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
 
 
 
1087	{ .name	    = "utimensat",  .errmsg = true,
1088	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
 
 
 
 
 
1089	{ .name	    = "write",	    .errmsg = true,
1090	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1091	{ .name	    = "writev",	    .errmsg = true,
1092	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1093};
1094
1095static int syscall_fmt__cmp(const void *name, const void *fmtp)
1096{
1097	const struct syscall_fmt *fmt = fmtp;
1098	return strcmp(name, fmt->name);
1099}
1100
1101static struct syscall_fmt *syscall_fmt__find(const char *name)
1102{
1103	const int nmemb = ARRAY_SIZE(syscall_fmts);
1104	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1105}
1106
1107struct syscall {
1108	struct event_format *tp_format;
 
 
1109	const char	    *name;
1110	bool		    filtered;
1111	struct syscall_fmt  *fmt;
1112	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1113	void		    **arg_parm;
1114};
1115
1116static size_t fprintf_duration(unsigned long t, FILE *fp)
1117{
1118	double duration = (double)t / NSEC_PER_MSEC;
1119	size_t printed = fprintf(fp, "(");
1120
1121	if (duration >= 1.0)
1122		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1123	else if (duration >= 0.01)
1124		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1125	else
1126		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1127	return printed + fprintf(fp, "): ");
1128}
1129
 
 
 
 
 
1130struct thread_trace {
1131	u64		  entry_time;
1132	u64		  exit_time;
1133	bool		  entry_pending;
1134	unsigned long	  nr_events;
 
1135	char		  *entry_str;
1136	double		  runtime_ms;
 
 
 
 
 
 
 
1137	struct {
1138		int	  max;
1139		char	  **table;
1140	} paths;
1141
1142	struct intlist *syscall_stats;
1143};
1144
1145static struct thread_trace *thread_trace__new(void)
1146{
1147	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1148
1149	if (ttrace)
1150		ttrace->paths.max = -1;
1151
1152	ttrace->syscall_stats = intlist__new(NULL);
1153
1154	return ttrace;
1155}
1156
1157static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1158{
1159	struct thread_trace *ttrace;
1160
1161	if (thread == NULL)
1162		goto fail;
1163
1164	if (thread->priv == NULL)
1165		thread->priv = thread_trace__new();
1166		
1167	if (thread->priv == NULL)
1168		goto fail;
1169
1170	ttrace = thread->priv;
1171	++ttrace->nr_events;
1172
1173	return ttrace;
1174fail:
1175	color_fprintf(fp, PERF_COLOR_RED,
1176		      "WARNING: not enough memory, dropping samples!\n");
1177	return NULL;
1178}
1179
 
 
 
 
 
1180struct trace {
1181	struct perf_tool	tool;
1182	struct {
1183		int		machine;
1184		int		open_id;
1185	}			audit;
1186	struct {
1187		int		max;
1188		struct syscall  *table;
 
 
 
 
1189	} syscalls;
1190	struct record_opts	opts;
 
1191	struct machine		*host;
 
1192	u64			base_time;
1193	FILE			*output;
1194	unsigned long		nr_events;
1195	struct strlist		*ev_qualifier;
1196	const char 		*last_vfs_getname;
 
 
 
1197	struct intlist		*tid_list;
1198	struct intlist		*pid_list;
 
 
 
 
1199	double			duration_filter;
1200	double			runtime_ms;
1201	struct {
1202		u64		vfs_getname,
1203				proc_getname;
1204	} stats;
1205	bool			not_ev_qualifier;
1206	bool			live;
1207	bool			full_time;
1208	bool			sched;
1209	bool			multiple_threads;
1210	bool			summary;
1211	bool			summary_only;
1212	bool			show_comm;
1213	bool			show_tool_stats;
 
 
 
 
1214};
1215
1216static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1217{
1218	struct thread_trace *ttrace = thread->priv;
1219
1220	if (fd > ttrace->paths.max) {
1221		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1222
1223		if (npath == NULL)
1224			return -1;
1225
1226		if (ttrace->paths.max != -1) {
1227			memset(npath + ttrace->paths.max + 1, 0,
1228			       (fd - ttrace->paths.max) * sizeof(char *));
1229		} else {
1230			memset(npath, 0, (fd + 1) * sizeof(char *));
1231		}
1232
1233		ttrace->paths.table = npath;
1234		ttrace->paths.max   = fd;
1235	}
1236
1237	ttrace->paths.table[fd] = strdup(pathname);
1238
1239	return ttrace->paths.table[fd] != NULL ? 0 : -1;
1240}
1241
1242static int thread__read_fd_path(struct thread *thread, int fd)
1243{
1244	char linkname[PATH_MAX], pathname[PATH_MAX];
1245	struct stat st;
1246	int ret;
1247
1248	if (thread->pid_ == thread->tid) {
1249		scnprintf(linkname, sizeof(linkname),
1250			  "/proc/%d/fd/%d", thread->pid_, fd);
1251	} else {
1252		scnprintf(linkname, sizeof(linkname),
1253			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1254	}
1255
1256	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1257		return -1;
1258
1259	ret = readlink(linkname, pathname, sizeof(pathname));
1260
1261	if (ret < 0 || ret > st.st_size)
1262		return -1;
1263
1264	pathname[ret] = '\0';
1265	return trace__set_fd_pathname(thread, fd, pathname);
1266}
1267
1268static const char *thread__fd_path(struct thread *thread, int fd,
1269				   struct trace *trace)
1270{
1271	struct thread_trace *ttrace = thread->priv;
1272
1273	if (ttrace == NULL)
1274		return NULL;
1275
1276	if (fd < 0)
1277		return NULL;
1278
1279	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1280		if (!trace->live)
1281			return NULL;
1282		++trace->stats.proc_getname;
1283		if (thread__read_fd_path(thread, fd)) {
1284			return NULL;
1285	}
1286
1287	return ttrace->paths.table[fd];
1288}
1289
1290static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1291					struct syscall_arg *arg)
1292{
1293	int fd = arg->val;
1294	size_t printed = scnprintf(bf, size, "%d", fd);
1295	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1296
1297	if (path)
1298		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1299
1300	return printed;
1301}
1302
1303static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1304					      struct syscall_arg *arg)
1305{
1306	int fd = arg->val;
1307	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1308	struct thread_trace *ttrace = arg->thread->priv;
1309
1310	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1311		zfree(&ttrace->paths.table[fd]);
1312
1313	return printed;
1314}
1315
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1316static bool trace__filter_duration(struct trace *trace, double t)
1317{
1318	return t < (trace->duration_filter * NSEC_PER_MSEC);
1319}
1320
1321static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1322{
1323	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1324
1325	return fprintf(fp, "%10.3f ", ts);
1326}
1327
1328static bool done = false;
1329static bool interrupted = false;
1330
1331static void sig_handler(int sig)
1332{
1333	done = true;
1334	interrupted = sig == SIGINT;
1335}
1336
1337static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1338					u64 duration, u64 tstamp, FILE *fp)
1339{
1340	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1341	printed += fprintf_duration(duration, fp);
1342
1343	if (trace->multiple_threads) {
1344		if (trace->show_comm)
1345			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1346		printed += fprintf(fp, "%d ", thread->tid);
1347	}
1348
1349	return printed;
1350}
1351
1352static int trace__process_event(struct trace *trace, struct machine *machine,
1353				union perf_event *event, struct perf_sample *sample)
1354{
1355	int ret = 0;
1356
1357	switch (event->header.type) {
1358	case PERF_RECORD_LOST:
1359		color_fprintf(trace->output, PERF_COLOR_RED,
1360			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1361		ret = machine__process_lost_event(machine, event, sample);
1362	default:
1363		ret = machine__process_event(machine, event, sample);
1364		break;
1365	}
1366
1367	return ret;
1368}
1369
1370static int trace__tool_process(struct perf_tool *tool,
1371			       union perf_event *event,
1372			       struct perf_sample *sample,
1373			       struct machine *machine)
1374{
1375	struct trace *trace = container_of(tool, struct trace, tool);
1376	return trace__process_event(trace, machine, event, sample);
1377}
1378
1379static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1380{
1381	int err = symbol__init();
1382
1383	if (err)
1384		return err;
1385
1386	trace->host = machine__new_host();
1387	if (trace->host == NULL)
1388		return -ENOMEM;
1389
 
 
 
1390	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1391					    evlist->threads, trace__tool_process, false);
 
1392	if (err)
1393		symbol__exit();
1394
1395	return err;
1396}
1397
1398static int syscall__set_arg_fmts(struct syscall *sc)
1399{
1400	struct format_field *field;
1401	int idx = 0;
1402
1403	sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1404	if (sc->arg_scnprintf == NULL)
1405		return -1;
1406
1407	if (sc->fmt)
1408		sc->arg_parm = sc->fmt->arg_parm;
1409
1410	for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1411		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1412			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1413		else if (field->flags & FIELD_IS_POINTER)
1414			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1415		++idx;
1416	}
1417
1418	return 0;
1419}
1420
1421static int trace__read_syscall_info(struct trace *trace, int id)
1422{
1423	char tp_name[128];
1424	struct syscall *sc;
1425	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1426
1427	if (name == NULL)
1428		return -1;
1429
1430	if (id > trace->syscalls.max) {
1431		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1432
1433		if (nsyscalls == NULL)
1434			return -1;
1435
1436		if (trace->syscalls.max != -1) {
1437			memset(nsyscalls + trace->syscalls.max + 1, 0,
1438			       (id - trace->syscalls.max) * sizeof(*sc));
1439		} else {
1440			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1441		}
1442
1443		trace->syscalls.table = nsyscalls;
1444		trace->syscalls.max   = id;
1445	}
1446
1447	sc = trace->syscalls.table + id;
1448	sc->name = name;
1449
1450	if (trace->ev_qualifier) {
1451		bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1452
1453		if (!(in ^ trace->not_ev_qualifier)) {
1454			sc->filtered = true;
1455			/*
1456			 * No need to do read tracepoint information since this will be
1457			 * filtered out.
1458			 */
1459			return 0;
1460		}
1461	}
1462
1463	sc->fmt  = syscall_fmt__find(sc->name);
1464
1465	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1466	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1467
1468	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1469		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1470		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1471	}
1472
1473	if (sc->tp_format == NULL)
1474		return -1;
1475
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1476	return syscall__set_arg_fmts(sc);
1477}
1478
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1479static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1480				      unsigned long *args, struct trace *trace,
1481				      struct thread *thread)
1482{
1483	size_t printed = 0;
 
 
1484
1485	if (sc->tp_format != NULL) {
1486		struct format_field *field;
1487		u8 bit = 1;
1488		struct syscall_arg arg = {
1489			.idx	= 0,
1490			.mask	= 0,
1491			.trace  = trace,
1492			.thread = thread,
1493		};
1494
1495		for (field = sc->tp_format->format.fields->next; field;
1496		     field = field->next, ++arg.idx, bit <<= 1) {
1497			if (arg.mask & bit)
1498				continue;
 
 
 
 
 
1499			/*
1500 			 * Suppress this argument if its value is zero and
1501 			 * and we don't have a string associated in an
1502 			 * strarray for it.
1503 			 */
1504			if (args[arg.idx] == 0 &&
1505			    !(sc->arg_scnprintf &&
1506			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1507			      sc->arg_parm[arg.idx]))
1508				continue;
1509
1510			printed += scnprintf(bf + printed, size - printed,
1511					     "%s%s: ", printed ? ", " : "", field->name);
1512			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1513				arg.val = args[arg.idx];
1514				if (sc->arg_parm)
1515					arg.parm = sc->arg_parm[arg.idx];
1516				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1517								      size - printed, &arg);
1518			} else {
1519				printed += scnprintf(bf + printed, size - printed,
1520						     "%ld", args[arg.idx]);
1521			}
1522		}
1523	} else {
1524		int i = 0;
1525
1526		while (i < 6) {
 
 
 
1527			printed += scnprintf(bf + printed, size - printed,
1528					     "%sarg%d: %ld",
1529					     printed ? ", " : "", i, args[i]);
1530			++i;
1531		}
1532	}
1533
1534	return printed;
1535}
1536
1537typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
 
1538				  struct perf_sample *sample);
1539
1540static struct syscall *trace__syscall_info(struct trace *trace,
1541					   struct perf_evsel *evsel, int id)
1542{
1543
1544	if (id < 0) {
1545
1546		/*
1547		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1548		 * before that, leaving at a higher verbosity level till that is
1549		 * explained. Reproduced with plain ftrace with:
1550		 *
1551		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1552		 * grep "NR -1 " /t/trace_pipe
1553		 *
1554		 * After generating some load on the machine.
1555 		 */
1556		if (verbose > 1) {
1557			static u64 n;
1558			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1559				id, perf_evsel__name(evsel), ++n);
1560		}
1561		return NULL;
1562	}
1563
1564	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1565	    trace__read_syscall_info(trace, id))
1566		goto out_cant_read;
1567
1568	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1569		goto out_cant_read;
1570
1571	return &trace->syscalls.table[id];
1572
1573out_cant_read:
1574	if (verbose) {
1575		fprintf(trace->output, "Problems reading syscall %d", id);
1576		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1577			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1578		fputs(" information\n", trace->output);
1579	}
1580	return NULL;
1581}
1582
1583static void thread__update_stats(struct thread_trace *ttrace,
1584				 int id, struct perf_sample *sample)
1585{
1586	struct int_node *inode;
1587	struct stats *stats;
1588	u64 duration = 0;
1589
1590	inode = intlist__findnew(ttrace->syscall_stats, id);
1591	if (inode == NULL)
1592		return;
1593
1594	stats = inode->priv;
1595	if (stats == NULL) {
1596		stats = malloc(sizeof(struct stats));
1597		if (stats == NULL)
1598			return;
1599		init_stats(stats);
1600		inode->priv = stats;
1601	}
1602
1603	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1604		duration = sample->time - ttrace->entry_time;
1605
1606	update_stats(stats, duration);
1607}
1608
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1609static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 
1610			    struct perf_sample *sample)
1611{
1612	char *msg;
1613	void *args;
1614	size_t printed = 0;
1615	struct thread *thread;
1616	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1617	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1618	struct thread_trace *ttrace;
1619
1620	if (sc == NULL)
1621		return -1;
1622
1623	if (sc->filtered)
1624		return 0;
1625
1626	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1627	ttrace = thread__trace(thread, trace->output);
1628	if (ttrace == NULL)
1629		return -1;
1630
1631	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1632	ttrace = thread->priv;
1633
1634	if (ttrace->entry_str == NULL) {
1635		ttrace->entry_str = malloc(1024);
1636		if (!ttrace->entry_str)
1637			return -1;
1638	}
1639
 
 
 
1640	ttrace->entry_time = sample->time;
1641	msg = ttrace->entry_str;
1642	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1643
1644	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1645					   args, trace, thread);
1646
1647	if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1648		if (!trace->duration_filter && !trace->summary_only) {
1649			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1650			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1651		}
1652	} else
1653		ttrace->entry_pending = true;
 
 
 
1654
1655	return 0;
 
 
 
 
 
 
 
1656}
1657
1658static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 
1659			   struct perf_sample *sample)
1660{
1661	int ret;
1662	u64 duration = 0;
1663	struct thread *thread;
1664	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1665	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1666	struct thread_trace *ttrace;
1667
1668	if (sc == NULL)
1669		return -1;
1670
1671	if (sc->filtered)
1672		return 0;
1673
1674	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1675	ttrace = thread__trace(thread, trace->output);
1676	if (ttrace == NULL)
1677		return -1;
1678
1679	if (trace->summary)
1680		thread__update_stats(ttrace, id, sample);
1681
1682	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1683
1684	if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1685		trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1686		trace->last_vfs_getname = NULL;
1687		++trace->stats.vfs_getname;
1688	}
1689
1690	ttrace = thread->priv;
1691
1692	ttrace->exit_time = sample->time;
1693
1694	if (ttrace->entry_time) {
1695		duration = sample->time - ttrace->entry_time;
1696		if (trace__filter_duration(trace, duration))
1697			goto out;
1698	} else if (trace->duration_filter)
1699		goto out;
1700
1701	if (trace->summary_only)
1702		goto out;
1703
1704	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1705
1706	if (ttrace->entry_pending) {
1707		fprintf(trace->output, "%-70s", ttrace->entry_str);
1708	} else {
1709		fprintf(trace->output, " ... [");
1710		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1711		fprintf(trace->output, "]: %s()", sc->name);
1712	}
1713
1714	if (sc->fmt == NULL) {
1715signed_print:
1716		fprintf(trace->output, ") = %d", ret);
1717	} else if (ret < 0 && sc->fmt->errmsg) {
1718		char bf[256];
1719		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1720			   *e = audit_errno_to_name(-ret);
1721
1722		fprintf(trace->output, ") = -1 %s %s", e, emsg);
1723	} else if (ret == 0 && sc->fmt->timeout)
1724		fprintf(trace->output, ") = 0 Timeout");
1725	else if (sc->fmt->hexret)
1726		fprintf(trace->output, ") = %#x", ret);
1727	else
1728		goto signed_print;
1729
1730	fputc('\n', trace->output);
1731out:
1732	ttrace->entry_pending = false;
1733
1734	return 0;
 
 
1735}
1736
1737static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
 
1738			      struct perf_sample *sample)
1739{
1740	trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1741	return 0;
1742}
1743
1744static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
 
1745				     struct perf_sample *sample)
1746{
1747        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1748	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1749	struct thread *thread = machine__findnew_thread(trace->host,
1750							sample->pid,
1751							sample->tid);
1752	struct thread_trace *ttrace = thread__trace(thread, trace->output);
1753
1754	if (ttrace == NULL)
1755		goto out_dump;
1756
1757	ttrace->runtime_ms += runtime_ms;
1758	trace->runtime_ms += runtime_ms;
 
1759	return 0;
1760
1761out_dump:
1762	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1763	       evsel->name,
1764	       perf_evsel__strval(evsel, sample, "comm"),
1765	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1766	       runtime,
1767	       perf_evsel__intval(evsel, sample, "vruntime"));
 
1768	return 0;
1769}
1770
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1771static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1772{
1773	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1774	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1775		return false;
1776
1777	if (trace->pid_list || trace->tid_list)
1778		return true;
1779
1780	return false;
1781}
1782
1783static int trace__process_sample(struct perf_tool *tool,
1784				 union perf_event *event __maybe_unused,
1785				 struct perf_sample *sample,
1786				 struct perf_evsel *evsel,
1787				 struct machine *machine __maybe_unused)
1788{
1789	struct trace *trace = container_of(tool, struct trace, tool);
1790	int err = 0;
1791
1792	tracepoint_handler handler = evsel->handler;
1793
1794	if (skip_sample(trace, sample))
1795		return 0;
1796
1797	if (!trace->full_time && trace->base_time == 0)
1798		trace->base_time = sample->time;
1799
1800	if (handler) {
1801		++trace->nr_events;
1802		handler(trace, evsel, sample);
1803	}
1804
1805	return err;
1806}
1807
1808static int parse_target_str(struct trace *trace)
1809{
1810	if (trace->opts.target.pid) {
1811		trace->pid_list = intlist__new(trace->opts.target.pid);
1812		if (trace->pid_list == NULL) {
1813			pr_err("Error parsing process id string\n");
1814			return -EINVAL;
1815		}
1816	}
1817
1818	if (trace->opts.target.tid) {
1819		trace->tid_list = intlist__new(trace->opts.target.tid);
1820		if (trace->tid_list == NULL) {
1821			pr_err("Error parsing thread id string\n");
1822			return -EINVAL;
1823		}
1824	}
1825
1826	return 0;
1827}
1828
1829static int trace__record(int argc, const char **argv)
1830{
1831	unsigned int rec_argc, i, j;
1832	const char **rec_argv;
1833	const char * const record_args[] = {
1834		"record",
1835		"-R",
1836		"-m", "1024",
1837		"-c", "1",
1838		"-e",
1839	};
1840
 
 
 
 
 
 
 
1841	/* +1 is for the event string below */
1842	rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
 
1843	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1844
1845	if (rec_argv == NULL)
1846		return -ENOMEM;
1847
 
1848	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1849		rec_argv[i] = record_args[i];
1850
1851	/* event string may be different for older kernels - e.g., RHEL6 */
1852	if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1853		rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1854	else if (is_valid_tracepoint("syscalls:sys_enter"))
1855		rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1856	else {
1857		pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1858		return -1;
 
 
 
 
 
1859	}
1860	i++;
1861
1862	for (j = 0; j < (unsigned int)argc; j++, i++)
1863		rec_argv[i] = argv[j];
 
 
 
 
 
1864
1865	return cmd_record(i, rec_argv, NULL);
 
 
 
1866}
1867
1868static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1869
1870static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1871{
1872	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1873	if (evsel == NULL)
1874		return;
 
1875
1876	if (perf_evsel__field(evsel, "pathname") == NULL) {
1877		perf_evsel__delete(evsel);
1878		return;
1879	}
1880
1881	evsel->handler = trace__vfs_getname;
1882	perf_evlist__add(evlist, evsel);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1883}
1884
1885static int trace__run(struct trace *trace, int argc, const char **argv)
1886{
1887	struct perf_evlist *evlist = perf_evlist__new();
1888	struct perf_evsel *evsel;
1889	int err = -1, i;
1890	unsigned long before;
1891	const bool forks = argc > 0;
 
1892
1893	trace->live = true;
1894
1895	if (evlist == NULL) {
1896		fprintf(trace->output, "Not enough memory to run!\n");
1897		goto out;
1898	}
 
1899
1900	if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1901		goto out_error_tp;
 
 
1902
1903	perf_evlist__add_vfs_getname(evlist);
 
 
1904
1905	if (trace->sched &&
1906		perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1907				trace__sched_stat_runtime))
1908		goto out_error_tp;
1909
1910	err = perf_evlist__create_maps(evlist, &trace->opts.target);
1911	if (err < 0) {
1912		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1913		goto out_delete_evlist;
1914	}
1915
1916	err = trace__symbols_init(trace, evlist);
1917	if (err < 0) {
1918		fprintf(trace->output, "Problems initializing symbol libraries!\n");
1919		goto out_delete_evlist;
1920	}
1921
1922	perf_evlist__config(evlist, &trace->opts);
1923
1924	signal(SIGCHLD, sig_handler);
1925	signal(SIGINT, sig_handler);
1926
1927	if (forks) {
1928		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1929						    argv, false, NULL);
1930		if (err < 0) {
1931			fprintf(trace->output, "Couldn't run the workload!\n");
1932			goto out_delete_evlist;
1933		}
1934	}
1935
1936	err = perf_evlist__open(evlist);
1937	if (err < 0)
1938		goto out_error_open;
1939
1940	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1941	if (err < 0) {
1942		fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1943		goto out_delete_evlist;
 
 
 
 
1944	}
1945
1946	perf_evlist__enable(evlist);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1947
1948	if (forks)
1949		perf_evlist__start_workload(evlist);
1950
1951	trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
 
 
1952again:
1953	before = trace->nr_events;
1954
1955	for (i = 0; i < evlist->nr_mmaps; i++) {
1956		union perf_event *event;
1957
1958		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1959			const u32 type = event->header.type;
1960			tracepoint_handler handler;
1961			struct perf_sample sample;
1962
1963			++trace->nr_events;
1964
1965			err = perf_evlist__parse_sample(evlist, event, &sample);
1966			if (err) {
1967				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1968				goto next_event;
1969			}
1970
1971			if (!trace->full_time && trace->base_time == 0)
1972				trace->base_time = sample.time;
1973
1974			if (type != PERF_RECORD_SAMPLE) {
1975				trace__process_event(trace, trace->host, event, &sample);
1976				continue;
1977			}
1978
1979			evsel = perf_evlist__id2evsel(evlist, sample.id);
1980			if (evsel == NULL) {
1981				fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1982				goto next_event;
1983			}
1984
1985			if (sample.raw_data == NULL) {
1986				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1987				       perf_evsel__name(evsel), sample.tid,
1988				       sample.cpu, sample.raw_size);
1989				goto next_event;
1990			}
1991
1992			handler = evsel->handler;
1993			handler(trace, evsel, &sample);
1994next_event:
1995			perf_evlist__mmap_consume(evlist, i);
1996
1997			if (interrupted)
1998				goto out_disable;
 
 
 
 
 
1999		}
2000	}
2001
2002	if (trace->nr_events == before) {
2003		int timeout = done ? 100 : -1;
2004
2005		if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
 
 
 
2006			goto again;
 
2007	} else {
2008		goto again;
2009	}
2010
2011out_disable:
 
 
2012	perf_evlist__disable(evlist);
2013
2014	if (!err) {
2015		if (trace->summary)
2016			trace__fprintf_thread_summary(trace, trace->output);
2017
2018		if (trace->show_tool_stats) {
2019			fprintf(trace->output, "Stats:\n "
2020					       " vfs_getname : %" PRIu64 "\n"
2021					       " proc_getname: %" PRIu64 "\n",
2022				trace->stats.vfs_getname,
2023				trace->stats.proc_getname);
2024		}
2025	}
2026
2027out_delete_evlist:
2028	perf_evlist__delete(evlist);
2029out:
2030	trace->live = false;
2031	return err;
2032{
2033	char errbuf[BUFSIZ];
2034
2035out_error_tp:
2036	perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
 
 
 
 
 
 
 
 
2037	goto out_error;
2038
2039out_error_open:
2040	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2041
2042out_error:
2043	fprintf(trace->output, "%s\n", errbuf);
2044	goto out_delete_evlist;
 
 
 
 
 
 
 
2045}
 
 
 
 
 
 
 
2046}
2047
2048static int trace__replay(struct trace *trace)
2049{
2050	const struct perf_evsel_str_handler handlers[] = {
2051		{ "probe:vfs_getname",	     trace__vfs_getname, },
2052	};
2053	struct perf_data_file file = {
2054		.path  = input_name,
2055		.mode  = PERF_DATA_MODE_READ,
 
2056	};
2057	struct perf_session *session;
2058	struct perf_evsel *evsel;
2059	int err = -1;
2060
2061	trace->tool.sample	  = trace__process_sample;
2062	trace->tool.mmap	  = perf_event__process_mmap;
2063	trace->tool.mmap2	  = perf_event__process_mmap2;
2064	trace->tool.comm	  = perf_event__process_comm;
2065	trace->tool.exit	  = perf_event__process_exit;
2066	trace->tool.fork	  = perf_event__process_fork;
2067	trace->tool.attr	  = perf_event__process_attr;
2068	trace->tool.tracing_data = perf_event__process_tracing_data;
2069	trace->tool.build_id	  = perf_event__process_build_id;
2070
2071	trace->tool.ordered_samples = true;
2072	trace->tool.ordering_requires_timestamps = true;
2073
2074	/* add tid to output */
2075	trace->multiple_threads = true;
2076
2077	if (symbol__init() < 0)
2078		return -1;
2079
2080	session = perf_session__new(&file, false, &trace->tool);
2081	if (session == NULL)
2082		return -ENOMEM;
 
 
 
2083
2084	trace->host = &session->machines.host;
2085
2086	err = perf_session__set_tracepoints_handlers(session, handlers);
2087	if (err)
2088		goto out;
2089
2090	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2091						     "raw_syscalls:sys_enter");
2092	/* older kernels have syscalls tp versus raw_syscalls */
2093	if (evsel == NULL)
2094		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2095							     "syscalls:sys_enter");
2096	if (evsel == NULL) {
2097		pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2098		goto out;
2099	}
2100
2101	if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2102	    perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
 
2103		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2104		goto out;
2105	}
2106
2107	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2108						     "raw_syscalls:sys_exit");
2109	if (evsel == NULL)
2110		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2111							     "syscalls:sys_exit");
2112	if (evsel == NULL) {
2113		pr_err("Data file does not have raw_syscalls:sys_exit event\n");
 
 
2114		goto out;
2115	}
2116
2117	if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2118	    perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2119		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2120		goto out;
 
 
2121	}
2122
2123	err = parse_target_str(trace);
2124	if (err != 0)
2125		goto out;
2126
2127	setup_pager();
2128
2129	err = perf_session__process_events(session, &trace->tool);
2130	if (err)
2131		pr_err("Failed to process events, error %d", err);
2132
2133	else if (trace->summary)
2134		trace__fprintf_thread_summary(trace, trace->output);
2135
2136out:
2137	perf_session__delete(session);
2138
2139	return err;
2140}
2141
2142static size_t trace__fprintf_threads_header(FILE *fp)
2143{
2144	size_t printed;
2145
2146	printed  = fprintf(fp, "\n Summary of events:\n\n");
2147
2148	return printed;
2149}
2150
2151static size_t thread__dump_stats(struct thread_trace *ttrace,
2152				 struct trace *trace, FILE *fp)
2153{
2154	struct stats *stats;
2155	size_t printed = 0;
2156	struct syscall *sc;
2157	struct int_node *inode = intlist__first(ttrace->syscall_stats);
2158
2159	if (inode == NULL)
2160		return 0;
2161
2162	printed += fprintf(fp, "\n");
2163
2164	printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2165	printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2166	printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2167
2168	/* each int_node is a syscall */
2169	while (inode) {
2170		stats = inode->priv;
2171		if (stats) {
2172			double min = (double)(stats->min) / NSEC_PER_MSEC;
2173			double max = (double)(stats->max) / NSEC_PER_MSEC;
2174			double avg = avg_stats(stats);
2175			double pct;
2176			u64 n = (u64) stats->n;
2177
2178			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2179			avg /= NSEC_PER_MSEC;
2180
2181			sc = &trace->syscalls.table[inode->i];
2182			printed += fprintf(fp, "   %-15s", sc->name);
2183			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2184					   n, min, avg);
2185			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2186		}
2187
2188		inode = intlist__next(inode);
2189	}
2190
2191	printed += fprintf(fp, "\n\n");
2192
2193	return printed;
2194}
2195
2196/* struct used to pass data to per-thread function */
2197struct summary_data {
2198	FILE *fp;
2199	struct trace *trace;
2200	size_t printed;
2201};
2202
2203static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2204{
2205	struct summary_data *data = priv;
2206	FILE *fp = data->fp;
2207	size_t printed = data->printed;
2208	struct trace *trace = data->trace;
2209	struct thread_trace *ttrace = thread->priv;
2210	double ratio;
2211
2212	if (ttrace == NULL)
2213		return 0;
2214
2215	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2216
2217	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2218	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2219	printed += fprintf(fp, "%.1f%%", ratio);
 
 
 
 
2220	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2221	printed += thread__dump_stats(ttrace, trace, fp);
2222
2223	data->printed += printed;
2224
2225	return 0;
2226}
2227
2228static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2229{
2230	struct summary_data data = {
2231		.fp = fp,
2232		.trace = trace
2233	};
2234	data.printed = trace__fprintf_threads_header(fp);
2235
2236	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2237
2238	return data.printed;
2239}
2240
2241static int trace__set_duration(const struct option *opt, const char *str,
2242			       int unset __maybe_unused)
2243{
2244	struct trace *trace = opt->value;
2245
2246	trace->duration_filter = atof(str);
2247	return 0;
2248}
2249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2250static int trace__open_output(struct trace *trace, const char *filename)
2251{
2252	struct stat st;
2253
2254	if (!stat(filename, &st) && st.st_size) {
2255		char oldname[PATH_MAX];
2256
2257		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2258		unlink(oldname);
2259		rename(filename, oldname);
2260	}
2261
2262	trace->output = fopen(filename, "w");
2263
2264	return trace->output == NULL ? -errno : 0;
2265}
2266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2267int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2268{
2269	const char * const trace_usage[] = {
2270		"perf trace [<options>] [<command>]",
2271		"perf trace [<options>] -- <command> [<options>]",
2272		"perf trace record [<options>] [<command>]",
2273		"perf trace record [<options>] -- <command> [<options>]",
2274		NULL
2275	};
2276	struct trace trace = {
2277		.audit = {
2278			.machine = audit_detect_machine(),
2279			.open_id = audit_name_to_syscall("open", trace.audit.machine),
2280		},
2281		.syscalls = {
2282			. max = -1,
2283		},
2284		.opts = {
2285			.target = {
2286				.uid	   = UINT_MAX,
2287				.uses_mmap = true,
2288			},
2289			.user_freq     = UINT_MAX,
2290			.user_interval = ULLONG_MAX,
2291			.no_buffering  = true,
2292			.mmap_pages    = 1024,
 
2293		},
2294		.output = stdout,
2295		.show_comm = true,
 
2296	};
2297	const char *output_name = NULL;
2298	const char *ev_qualifier_str = NULL;
2299	const struct option trace_options[] = {
 
 
 
2300	OPT_BOOLEAN(0, "comm", &trace.show_comm,
2301		    "show the thread COMM next to its id"),
2302	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2303	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2304		    "list of events to trace"),
2305	OPT_STRING('o', "output", &output_name, "file", "output file name"),
2306	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2307	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2308		    "trace events on existing process id"),
2309	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2310		    "trace events on existing thread id"),
 
 
2311	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2312		    "system-wide collection from all CPUs"),
2313	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2314		    "list of cpus to monitor"),
2315	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2316		    "child tasks do not inherit counters"),
2317	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2318		     "number of mmap data pages",
2319		     perf_evlist__parse_mmap_pages),
2320	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2321		   "user to profile"),
2322	OPT_CALLBACK(0, "duration", &trace, "float",
2323		     "show only events with duration > N.M ms",
2324		     trace__set_duration),
2325	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2326	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2327	OPT_BOOLEAN('T', "time", &trace.full_time,
2328		    "Show full timestamp, not time relative to first start"),
2329	OPT_BOOLEAN('s', "summary", &trace.summary_only,
2330		    "Show only syscall summary with statistics"),
2331	OPT_BOOLEAN('S', "with-summary", &trace.summary,
2332		    "Show all syscalls and summary with statistics"),
 
 
 
 
 
 
2333	OPT_END()
2334	};
 
2335	int err;
2336	char bf[BUFSIZ];
2337
2338	if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2339		return trace__record(argc-2, &argv[2]);
2340
2341	argc = parse_options(argc, argv, trace_options, trace_usage, 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2342
2343	/* summary_only implies summary option, but don't overwrite summary if set */
2344	if (trace.summary_only)
2345		trace.summary = trace.summary_only;
2346
 
 
 
 
 
 
2347	if (output_name != NULL) {
2348		err = trace__open_output(&trace, output_name);
2349		if (err < 0) {
2350			perror("failed to create output file");
2351			goto out;
2352		}
2353	}
2354
2355	if (ev_qualifier_str != NULL) {
2356		const char *s = ev_qualifier_str;
 
 
 
2357
2358		trace.not_ev_qualifier = *s == '!';
2359		if (trace.not_ev_qualifier)
2360			++s;
2361		trace.ev_qualifier = strlist__new(true, s);
2362		if (trace.ev_qualifier == NULL) {
2363			fputs("Not enough memory to parse event qualifier",
2364			      trace.output);
2365			err = -ENOMEM;
2366			goto out_close;
2367		}
 
 
 
 
2368	}
2369
2370	err = target__validate(&trace.opts.target);
2371	if (err) {
2372		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2373		fprintf(trace.output, "%s", bf);
2374		goto out_close;
2375	}
2376
2377	err = target__parse_uid(&trace.opts.target);
2378	if (err) {
2379		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2380		fprintf(trace.output, "%s", bf);
2381		goto out_close;
2382	}
2383
2384	if (!argc && target__none(&trace.opts.target))
2385		trace.opts.target.system_wide = true;
2386
2387	if (input_name)
2388		err = trace__replay(&trace);
2389	else
2390		err = trace__run(&trace, argc, argv);
2391
2392out_close:
2393	if (output_name != NULL)
2394		fclose(trace.output);
2395out:
2396	return err;
2397}
v4.6
   1/*
   2 * builtin-trace.c
   3 *
   4 * Builtin 'trace' command:
   5 *
   6 * Display a continuously updated trace of any workload, CPU, specific PID,
   7 * system wide, etc.  Default format is loosely strace like, but any other
   8 * event may be specified using --event.
   9 *
  10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
  11 *
  12 * Initially based on the 'trace' prototype by Thomas Gleixner:
  13 *
  14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
  15 *
  16 * Released under the GPL v2. (and only v2, not any later version)
  17 */
  18
  19#include <traceevent/event-parse.h>
  20#include <api/fs/tracing_path.h>
  21#include "builtin.h"
  22#include "util/color.h"
  23#include "util/debug.h"
  24#include "util/evlist.h"
  25#include <subcmd/exec-cmd.h>
  26#include "util/machine.h"
  27#include "util/session.h"
  28#include "util/thread.h"
  29#include <subcmd/parse-options.h>
  30#include "util/strlist.h"
  31#include "util/intlist.h"
  32#include "util/thread_map.h"
  33#include "util/stat.h"
  34#include "trace-event.h"
  35#include "util/parse-events.h"
  36#include "util/bpf-loader.h"
  37
  38#include <libaudit.h>
  39#include <stdlib.h>
 
  40#include <sys/mman.h>
  41#include <linux/futex.h>
  42#include <linux/err.h>
  43
  44/* For older distros: */
  45#ifndef MAP_STACK
  46# define MAP_STACK		0x20000
  47#endif
  48
  49#ifndef MADV_HWPOISON
  50# define MADV_HWPOISON		100
  51
  52#endif
  53
  54#ifndef MADV_MERGEABLE
  55# define MADV_MERGEABLE		12
  56#endif
  57
  58#ifndef MADV_UNMERGEABLE
  59# define MADV_UNMERGEABLE	13
  60#endif
  61
  62#ifndef EFD_SEMAPHORE
  63# define EFD_SEMAPHORE		1
  64#endif
  65
  66#ifndef EFD_NONBLOCK
  67# define EFD_NONBLOCK		00004000
  68#endif
  69
  70#ifndef EFD_CLOEXEC
  71# define EFD_CLOEXEC		02000000
  72#endif
  73
  74#ifndef O_CLOEXEC
  75# define O_CLOEXEC		02000000
  76#endif
  77
  78#ifndef SOCK_DCCP
  79# define SOCK_DCCP		6
  80#endif
  81
  82#ifndef SOCK_CLOEXEC
  83# define SOCK_CLOEXEC		02000000
  84#endif
  85
  86#ifndef SOCK_NONBLOCK
  87# define SOCK_NONBLOCK		00004000
  88#endif
  89
  90#ifndef MSG_CMSG_CLOEXEC
  91# define MSG_CMSG_CLOEXEC	0x40000000
  92#endif
  93
  94#ifndef PERF_FLAG_FD_NO_GROUP
  95# define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
  96#endif
  97
  98#ifndef PERF_FLAG_FD_OUTPUT
  99# define PERF_FLAG_FD_OUTPUT		(1UL << 1)
 100#endif
 101
 102#ifndef PERF_FLAG_PID_CGROUP
 103# define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
 104#endif
 105
 106#ifndef PERF_FLAG_FD_CLOEXEC
 107# define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
 108#endif
 109
 110
 111struct tp_field {
 112	int offset;
 113	union {
 114		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
 115		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
 116	};
 117};
 118
 119#define TP_UINT_FIELD(bits) \
 120static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
 121{ \
 122	u##bits value; \
 123	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 124	return value;  \
 125}
 126
 127TP_UINT_FIELD(8);
 128TP_UINT_FIELD(16);
 129TP_UINT_FIELD(32);
 130TP_UINT_FIELD(64);
 131
 132#define TP_UINT_FIELD__SWAPPED(bits) \
 133static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
 134{ \
 135	u##bits value; \
 136	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 137	return bswap_##bits(value);\
 138}
 139
 140TP_UINT_FIELD__SWAPPED(16);
 141TP_UINT_FIELD__SWAPPED(32);
 142TP_UINT_FIELD__SWAPPED(64);
 143
 144static int tp_field__init_uint(struct tp_field *field,
 145			       struct format_field *format_field,
 146			       bool needs_swap)
 147{
 148	field->offset = format_field->offset;
 149
 150	switch (format_field->size) {
 151	case 1:
 152		field->integer = tp_field__u8;
 153		break;
 154	case 2:
 155		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
 156		break;
 157	case 4:
 158		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
 159		break;
 160	case 8:
 161		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
 162		break;
 163	default:
 164		return -1;
 165	}
 166
 167	return 0;
 168}
 169
 170static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
 171{
 172	return sample->raw_data + field->offset;
 173}
 174
 175static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
 176{
 177	field->offset = format_field->offset;
 178	field->pointer = tp_field__ptr;
 179	return 0;
 180}
 181
 182struct syscall_tp {
 183	struct tp_field id;
 184	union {
 185		struct tp_field args, ret;
 186	};
 187};
 188
 189static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
 190					  struct tp_field *field,
 191					  const char *name)
 192{
 193	struct format_field *format_field = perf_evsel__field(evsel, name);
 194
 195	if (format_field == NULL)
 196		return -1;
 197
 198	return tp_field__init_uint(field, format_field, evsel->needs_swap);
 199}
 200
 201#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
 202	({ struct syscall_tp *sc = evsel->priv;\
 203	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
 204
 205static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
 206					 struct tp_field *field,
 207					 const char *name)
 208{
 209	struct format_field *format_field = perf_evsel__field(evsel, name);
 210
 211	if (format_field == NULL)
 212		return -1;
 213
 214	return tp_field__init_ptr(field, format_field);
 215}
 216
 217#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
 218	({ struct syscall_tp *sc = evsel->priv;\
 219	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
 220
 221static void perf_evsel__delete_priv(struct perf_evsel *evsel)
 222{
 223	zfree(&evsel->priv);
 224	perf_evsel__delete(evsel);
 225}
 226
 227static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
 228{
 229	evsel->priv = malloc(sizeof(struct syscall_tp));
 230	if (evsel->priv != NULL) {
 231		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
 232			goto out_delete;
 233
 234		evsel->handler = handler;
 235		return 0;
 236	}
 237
 238	return -ENOMEM;
 239
 240out_delete:
 241	zfree(&evsel->priv);
 242	return -ENOENT;
 243}
 244
 245static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
 246{
 247	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
 248
 249	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
 250	if (IS_ERR(evsel))
 251		evsel = perf_evsel__newtp("syscalls", direction);
 252
 253	if (IS_ERR(evsel))
 254		return NULL;
 255
 256	if (perf_evsel__init_syscall_tp(evsel, handler))
 257		goto out_delete;
 258
 259	return evsel;
 260
 261out_delete:
 262	perf_evsel__delete_priv(evsel);
 263	return NULL;
 264}
 265
 266#define perf_evsel__sc_tp_uint(evsel, name, sample) \
 267	({ struct syscall_tp *fields = evsel->priv; \
 268	   fields->name.integer(&fields->name, sample); })
 269
 270#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
 271	({ struct syscall_tp *fields = evsel->priv; \
 272	   fields->name.pointer(&fields->name, sample); })
 273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 274struct syscall_arg {
 275	unsigned long val;
 276	struct thread *thread;
 277	struct trace  *trace;
 278	void	      *parm;
 279	u8	      idx;
 280	u8	      mask;
 281};
 282
 283struct strarray {
 284	int	    offset;
 285	int	    nr_entries;
 286	const char **entries;
 287};
 288
 289#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
 290	.nr_entries = ARRAY_SIZE(array), \
 291	.entries = array, \
 292}
 293
 294#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
 295	.offset	    = off, \
 296	.nr_entries = ARRAY_SIZE(array), \
 297	.entries = array, \
 298}
 299
 300static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
 301						const char *intfmt,
 302					        struct syscall_arg *arg)
 303{
 304	struct strarray *sa = arg->parm;
 305	int idx = arg->val - sa->offset;
 306
 307	if (idx < 0 || idx >= sa->nr_entries)
 308		return scnprintf(bf, size, intfmt, arg->val);
 309
 310	return scnprintf(bf, size, "%s", sa->entries[idx]);
 311}
 312
 313static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
 314					      struct syscall_arg *arg)
 315{
 316	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
 317}
 318
 319#define SCA_STRARRAY syscall_arg__scnprintf_strarray
 320
 321#if defined(__i386__) || defined(__x86_64__)
 322/*
 323 * FIXME: Make this available to all arches as soon as the ioctl beautifier
 324 * 	  gets rewritten to support all arches.
 325 */
 326static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
 327						 struct syscall_arg *arg)
 328{
 329	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
 330}
 331
 332#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
 333#endif /* defined(__i386__) || defined(__x86_64__) */
 334
 335static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
 336					struct syscall_arg *arg);
 337
 338#define SCA_FD syscall_arg__scnprintf_fd
 339
 340static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
 341					   struct syscall_arg *arg)
 342{
 343	int fd = arg->val;
 344
 345	if (fd == AT_FDCWD)
 346		return scnprintf(bf, size, "CWD");
 347
 348	return syscall_arg__scnprintf_fd(bf, size, arg);
 349}
 350
 351#define SCA_FDAT syscall_arg__scnprintf_fd_at
 352
 353static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
 354					      struct syscall_arg *arg);
 355
 356#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
 357
 358static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
 359					 struct syscall_arg *arg)
 360{
 361	return scnprintf(bf, size, "%#lx", arg->val);
 362}
 363
 364#define SCA_HEX syscall_arg__scnprintf_hex
 365
 366static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
 367					 struct syscall_arg *arg)
 368{
 369	return scnprintf(bf, size, "%d", arg->val);
 370}
 371
 372#define SCA_INT syscall_arg__scnprintf_int
 373
 374static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 375					       struct syscall_arg *arg)
 376{
 377	int printed = 0, prot = arg->val;
 378
 379	if (prot == PROT_NONE)
 380		return scnprintf(bf, size, "NONE");
 381#define	P_MMAP_PROT(n) \
 382	if (prot & PROT_##n) { \
 383		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 384		prot &= ~PROT_##n; \
 385	}
 386
 387	P_MMAP_PROT(EXEC);
 388	P_MMAP_PROT(READ);
 389	P_MMAP_PROT(WRITE);
 390#ifdef PROT_SEM
 391	P_MMAP_PROT(SEM);
 392#endif
 393	P_MMAP_PROT(GROWSDOWN);
 394	P_MMAP_PROT(GROWSUP);
 395#undef P_MMAP_PROT
 396
 397	if (prot)
 398		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
 399
 400	return printed;
 401}
 402
 403#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 404
 405static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 406						struct syscall_arg *arg)
 407{
 408	int printed = 0, flags = arg->val;
 409
 410#define	P_MMAP_FLAG(n) \
 411	if (flags & MAP_##n) { \
 412		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 413		flags &= ~MAP_##n; \
 414	}
 415
 416	P_MMAP_FLAG(SHARED);
 417	P_MMAP_FLAG(PRIVATE);
 418#ifdef MAP_32BIT
 419	P_MMAP_FLAG(32BIT);
 420#endif
 421	P_MMAP_FLAG(ANONYMOUS);
 422	P_MMAP_FLAG(DENYWRITE);
 423	P_MMAP_FLAG(EXECUTABLE);
 424	P_MMAP_FLAG(FILE);
 425	P_MMAP_FLAG(FIXED);
 426	P_MMAP_FLAG(GROWSDOWN);
 427#ifdef MAP_HUGETLB
 428	P_MMAP_FLAG(HUGETLB);
 429#endif
 430	P_MMAP_FLAG(LOCKED);
 431	P_MMAP_FLAG(NONBLOCK);
 432	P_MMAP_FLAG(NORESERVE);
 433	P_MMAP_FLAG(POPULATE);
 434	P_MMAP_FLAG(STACK);
 435#ifdef MAP_UNINITIALIZED
 436	P_MMAP_FLAG(UNINITIALIZED);
 437#endif
 438#undef P_MMAP_FLAG
 439
 440	if (flags)
 441		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 442
 443	return printed;
 444}
 445
 446#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 447
 448static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
 449						  struct syscall_arg *arg)
 450{
 451	int printed = 0, flags = arg->val;
 452
 453#define P_MREMAP_FLAG(n) \
 454	if (flags & MREMAP_##n) { \
 455		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 456		flags &= ~MREMAP_##n; \
 457	}
 458
 459	P_MREMAP_FLAG(MAYMOVE);
 460#ifdef MREMAP_FIXED
 461	P_MREMAP_FLAG(FIXED);
 462#endif
 463#undef P_MREMAP_FLAG
 464
 465	if (flags)
 466		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 467
 468	return printed;
 469}
 470
 471#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
 472
 473static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 474						      struct syscall_arg *arg)
 475{
 476	int behavior = arg->val;
 477
 478	switch (behavior) {
 479#define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
 480	P_MADV_BHV(NORMAL);
 481	P_MADV_BHV(RANDOM);
 482	P_MADV_BHV(SEQUENTIAL);
 483	P_MADV_BHV(WILLNEED);
 484	P_MADV_BHV(DONTNEED);
 485	P_MADV_BHV(REMOVE);
 486	P_MADV_BHV(DONTFORK);
 487	P_MADV_BHV(DOFORK);
 488	P_MADV_BHV(HWPOISON);
 489#ifdef MADV_SOFT_OFFLINE
 490	P_MADV_BHV(SOFT_OFFLINE);
 491#endif
 492	P_MADV_BHV(MERGEABLE);
 493	P_MADV_BHV(UNMERGEABLE);
 494#ifdef MADV_HUGEPAGE
 495	P_MADV_BHV(HUGEPAGE);
 496#endif
 497#ifdef MADV_NOHUGEPAGE
 498	P_MADV_BHV(NOHUGEPAGE);
 499#endif
 500#ifdef MADV_DONTDUMP
 501	P_MADV_BHV(DONTDUMP);
 502#endif
 503#ifdef MADV_DODUMP
 504	P_MADV_BHV(DODUMP);
 505#endif
 506#undef P_MADV_PHV
 507	default: break;
 508	}
 509
 510	return scnprintf(bf, size, "%#x", behavior);
 511}
 512
 513#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
 514
 515static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
 516					   struct syscall_arg *arg)
 517{
 518	int printed = 0, op = arg->val;
 519
 520	if (op == 0)
 521		return scnprintf(bf, size, "NONE");
 522#define	P_CMD(cmd) \
 523	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
 524		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
 525		op &= ~LOCK_##cmd; \
 526	}
 527
 528	P_CMD(SH);
 529	P_CMD(EX);
 530	P_CMD(NB);
 531	P_CMD(UN);
 532	P_CMD(MAND);
 533	P_CMD(RW);
 534	P_CMD(READ);
 535	P_CMD(WRITE);
 536#undef P_OP
 537
 538	if (op)
 539		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
 540
 541	return printed;
 542}
 543
 544#define SCA_FLOCK syscall_arg__scnprintf_flock
 545
 546static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
 547{
 548	enum syscall_futex_args {
 549		SCF_UADDR   = (1 << 0),
 550		SCF_OP	    = (1 << 1),
 551		SCF_VAL	    = (1 << 2),
 552		SCF_TIMEOUT = (1 << 3),
 553		SCF_UADDR2  = (1 << 4),
 554		SCF_VAL3    = (1 << 5),
 555	};
 556	int op = arg->val;
 557	int cmd = op & FUTEX_CMD_MASK;
 558	size_t printed = 0;
 559
 560	switch (cmd) {
 561#define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
 562	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
 563	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 564	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 565	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
 566	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
 567	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
 568	P_FUTEX_OP(WAKE_OP);							  break;
 569	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 570	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 571	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
 572	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
 573	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
 574	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
 575	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
 576	}
 577
 578	if (op & FUTEX_PRIVATE_FLAG)
 579		printed += scnprintf(bf + printed, size - printed, "|PRIV");
 580
 581	if (op & FUTEX_CLOCK_REALTIME)
 582		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
 583
 584	return printed;
 585}
 586
 587#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
 588
 589static const char *bpf_cmd[] = {
 590	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
 591	"MAP_GET_NEXT_KEY", "PROG_LOAD",
 592};
 593static DEFINE_STRARRAY(bpf_cmd);
 594
 595static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
 596static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
 597
 598static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
 599static DEFINE_STRARRAY(itimers);
 600
 601static const char *keyctl_options[] = {
 602	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
 603	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
 604	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
 605	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
 606	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
 607};
 608static DEFINE_STRARRAY(keyctl_options);
 609
 610static const char *whences[] = { "SET", "CUR", "END",
 611#ifdef SEEK_DATA
 612"DATA",
 613#endif
 614#ifdef SEEK_HOLE
 615"HOLE",
 616#endif
 617};
 618static DEFINE_STRARRAY(whences);
 619
 620static const char *fcntl_cmds[] = {
 621	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
 622	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
 623	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
 624	"F_GETOWNER_UIDS",
 625};
 626static DEFINE_STRARRAY(fcntl_cmds);
 627
 628static const char *rlimit_resources[] = {
 629	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
 630	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
 631	"RTTIME",
 632};
 633static DEFINE_STRARRAY(rlimit_resources);
 634
 635static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
 636static DEFINE_STRARRAY(sighow);
 637
 638static const char *clockid[] = {
 639	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
 640	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
 641	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
 642};
 643static DEFINE_STRARRAY(clockid);
 644
 645static const char *socket_families[] = {
 646	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
 647	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
 648	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
 649	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
 650	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
 651	"ALG", "NFC", "VSOCK",
 652};
 653static DEFINE_STRARRAY(socket_families);
 654
 655#ifndef SOCK_TYPE_MASK
 656#define SOCK_TYPE_MASK 0xf
 657#endif
 658
 659static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
 660						      struct syscall_arg *arg)
 661{
 662	size_t printed;
 663	int type = arg->val,
 664	    flags = type & ~SOCK_TYPE_MASK;
 665
 666	type &= SOCK_TYPE_MASK;
 667	/*
 668 	 * Can't use a strarray, MIPS may override for ABI reasons.
 669 	 */
 670	switch (type) {
 671#define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
 672	P_SK_TYPE(STREAM);
 673	P_SK_TYPE(DGRAM);
 674	P_SK_TYPE(RAW);
 675	P_SK_TYPE(RDM);
 676	P_SK_TYPE(SEQPACKET);
 677	P_SK_TYPE(DCCP);
 678	P_SK_TYPE(PACKET);
 679#undef P_SK_TYPE
 680	default:
 681		printed = scnprintf(bf, size, "%#x", type);
 682	}
 683
 684#define	P_SK_FLAG(n) \
 685	if (flags & SOCK_##n) { \
 686		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
 687		flags &= ~SOCK_##n; \
 688	}
 689
 690	P_SK_FLAG(CLOEXEC);
 691	P_SK_FLAG(NONBLOCK);
 692#undef P_SK_FLAG
 693
 694	if (flags)
 695		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
 696
 697	return printed;
 698}
 699
 700#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
 701
 702#ifndef MSG_PROBE
 703#define MSG_PROBE	     0x10
 704#endif
 705#ifndef MSG_WAITFORONE
 706#define MSG_WAITFORONE	0x10000
 707#endif
 708#ifndef MSG_SENDPAGE_NOTLAST
 709#define MSG_SENDPAGE_NOTLAST 0x20000
 710#endif
 711#ifndef MSG_FASTOPEN
 712#define MSG_FASTOPEN	     0x20000000
 713#endif
 714
 715static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
 716					       struct syscall_arg *arg)
 717{
 718	int printed = 0, flags = arg->val;
 719
 720	if (flags == 0)
 721		return scnprintf(bf, size, "NONE");
 722#define	P_MSG_FLAG(n) \
 723	if (flags & MSG_##n) { \
 724		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 725		flags &= ~MSG_##n; \
 726	}
 727
 728	P_MSG_FLAG(OOB);
 729	P_MSG_FLAG(PEEK);
 730	P_MSG_FLAG(DONTROUTE);
 731	P_MSG_FLAG(TRYHARD);
 732	P_MSG_FLAG(CTRUNC);
 733	P_MSG_FLAG(PROBE);
 734	P_MSG_FLAG(TRUNC);
 735	P_MSG_FLAG(DONTWAIT);
 736	P_MSG_FLAG(EOR);
 737	P_MSG_FLAG(WAITALL);
 738	P_MSG_FLAG(FIN);
 739	P_MSG_FLAG(SYN);
 740	P_MSG_FLAG(CONFIRM);
 741	P_MSG_FLAG(RST);
 742	P_MSG_FLAG(ERRQUEUE);
 743	P_MSG_FLAG(NOSIGNAL);
 744	P_MSG_FLAG(MORE);
 745	P_MSG_FLAG(WAITFORONE);
 746	P_MSG_FLAG(SENDPAGE_NOTLAST);
 747	P_MSG_FLAG(FASTOPEN);
 748	P_MSG_FLAG(CMSG_CLOEXEC);
 749#undef P_MSG_FLAG
 750
 751	if (flags)
 752		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 753
 754	return printed;
 755}
 756
 757#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
 758
 759static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 760						 struct syscall_arg *arg)
 761{
 762	size_t printed = 0;
 763	int mode = arg->val;
 764
 765	if (mode == F_OK) /* 0 */
 766		return scnprintf(bf, size, "F");
 767#define	P_MODE(n) \
 768	if (mode & n##_OK) { \
 769		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
 770		mode &= ~n##_OK; \
 771	}
 772
 773	P_MODE(R);
 774	P_MODE(W);
 775	P_MODE(X);
 776#undef P_MODE
 777
 778	if (mode)
 779		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
 780
 781	return printed;
 782}
 783
 784#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
 785
 786static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
 787					      struct syscall_arg *arg);
 788
 789#define SCA_FILENAME syscall_arg__scnprintf_filename
 790
 791static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
 792					       struct syscall_arg *arg)
 793{
 794	int printed = 0, flags = arg->val;
 795
 796	if (!(flags & O_CREAT))
 797		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
 798
 799	if (flags == 0)
 800		return scnprintf(bf, size, "RDONLY");
 801#define	P_FLAG(n) \
 802	if (flags & O_##n) { \
 803		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 804		flags &= ~O_##n; \
 805	}
 806
 807	P_FLAG(APPEND);
 808	P_FLAG(ASYNC);
 809	P_FLAG(CLOEXEC);
 810	P_FLAG(CREAT);
 811	P_FLAG(DIRECT);
 812	P_FLAG(DIRECTORY);
 813	P_FLAG(EXCL);
 814	P_FLAG(LARGEFILE);
 815	P_FLAG(NOATIME);
 816	P_FLAG(NOCTTY);
 817#ifdef O_NONBLOCK
 818	P_FLAG(NONBLOCK);
 819#elif O_NDELAY
 820	P_FLAG(NDELAY);
 821#endif
 822#ifdef O_PATH
 823	P_FLAG(PATH);
 824#endif
 825	P_FLAG(RDWR);
 826#ifdef O_DSYNC
 827	if ((flags & O_SYNC) == O_SYNC)
 828		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
 829	else {
 830		P_FLAG(DSYNC);
 831	}
 832#else
 833	P_FLAG(SYNC);
 834#endif
 835	P_FLAG(TRUNC);
 836	P_FLAG(WRONLY);
 837#undef P_FLAG
 838
 839	if (flags)
 840		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 841
 842	return printed;
 843}
 844
 845#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
 846
 847static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
 848						struct syscall_arg *arg)
 849{
 850	int printed = 0, flags = arg->val;
 851
 852	if (flags == 0)
 853		return 0;
 854
 855#define	P_FLAG(n) \
 856	if (flags & PERF_FLAG_##n) { \
 857		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 858		flags &= ~PERF_FLAG_##n; \
 859	}
 860
 861	P_FLAG(FD_NO_GROUP);
 862	P_FLAG(FD_OUTPUT);
 863	P_FLAG(PID_CGROUP);
 864	P_FLAG(FD_CLOEXEC);
 865#undef P_FLAG
 866
 867	if (flags)
 868		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 869
 870	return printed;
 871}
 872
 873#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
 874
 875static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
 876						   struct syscall_arg *arg)
 877{
 878	int printed = 0, flags = arg->val;
 879
 880	if (flags == 0)
 881		return scnprintf(bf, size, "NONE");
 882#define	P_FLAG(n) \
 883	if (flags & EFD_##n) { \
 884		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 885		flags &= ~EFD_##n; \
 886	}
 887
 888	P_FLAG(SEMAPHORE);
 889	P_FLAG(CLOEXEC);
 890	P_FLAG(NONBLOCK);
 891#undef P_FLAG
 892
 893	if (flags)
 894		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 895
 896	return printed;
 897}
 898
 899#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
 900
 901static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 902						struct syscall_arg *arg)
 903{
 904	int printed = 0, flags = arg->val;
 905
 906#define	P_FLAG(n) \
 907	if (flags & O_##n) { \
 908		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 909		flags &= ~O_##n; \
 910	}
 911
 912	P_FLAG(CLOEXEC);
 913	P_FLAG(NONBLOCK);
 914#undef P_FLAG
 915
 916	if (flags)
 917		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 918
 919	return printed;
 920}
 921
 922#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 923
 924static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
 925{
 926	int sig = arg->val;
 927
 928	switch (sig) {
 929#define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
 930	P_SIGNUM(HUP);
 931	P_SIGNUM(INT);
 932	P_SIGNUM(QUIT);
 933	P_SIGNUM(ILL);
 934	P_SIGNUM(TRAP);
 935	P_SIGNUM(ABRT);
 936	P_SIGNUM(BUS);
 937	P_SIGNUM(FPE);
 938	P_SIGNUM(KILL);
 939	P_SIGNUM(USR1);
 940	P_SIGNUM(SEGV);
 941	P_SIGNUM(USR2);
 942	P_SIGNUM(PIPE);
 943	P_SIGNUM(ALRM);
 944	P_SIGNUM(TERM);
 945	P_SIGNUM(CHLD);
 946	P_SIGNUM(CONT);
 947	P_SIGNUM(STOP);
 948	P_SIGNUM(TSTP);
 949	P_SIGNUM(TTIN);
 950	P_SIGNUM(TTOU);
 951	P_SIGNUM(URG);
 952	P_SIGNUM(XCPU);
 953	P_SIGNUM(XFSZ);
 954	P_SIGNUM(VTALRM);
 955	P_SIGNUM(PROF);
 956	P_SIGNUM(WINCH);
 957	P_SIGNUM(IO);
 958	P_SIGNUM(PWR);
 959	P_SIGNUM(SYS);
 960#ifdef SIGEMT
 961	P_SIGNUM(EMT);
 962#endif
 963#ifdef SIGSTKFLT
 964	P_SIGNUM(STKFLT);
 965#endif
 966#ifdef SIGSWI
 967	P_SIGNUM(SWI);
 968#endif
 969	default: break;
 970	}
 971
 972	return scnprintf(bf, size, "%#x", sig);
 973}
 974
 975#define SCA_SIGNUM syscall_arg__scnprintf_signum
 976
 977#if defined(__i386__) || defined(__x86_64__)
 978/*
 979 * FIXME: Make this available to all arches.
 980 */
 981#define TCGETS		0x5401
 982
 983static const char *tioctls[] = {
 984	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
 985	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
 986	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
 987	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
 988	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
 989	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
 990	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
 991	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
 992	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
 993	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
 994	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
 995	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
 996	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
 997	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
 998	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
 999};
1000
1001static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1002#endif /* defined(__i386__) || defined(__x86_64__) */
1003
1004#define STRARRAY(arg, name, array) \
1005	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1006	  .arg_parm	 = { [arg] = &strarray__##array, }
1007
1008static struct syscall_fmt {
1009	const char *name;
1010	const char *alias;
1011	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1012	void	   *arg_parm[6];
1013	bool	   errmsg;
1014	bool	   timeout;
1015	bool	   hexret;
1016} syscall_fmts[] = {
1017	{ .name	    = "access",	    .errmsg = true,
1018	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1019			     [1] = SCA_ACCMODE,  /* mode */ }, },
1020	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
1021	{ .name	    = "bpf",	    .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
1022	{ .name	    = "brk",	    .hexret = true,
1023	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1024	{ .name	    = "chdir",	    .errmsg = true,
1025	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1026	{ .name	    = "chmod",	    .errmsg = true,
1027	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1028	{ .name	    = "chroot",	    .errmsg = true,
1029	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1030	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1031	{ .name	    = "close",	    .errmsg = true,
1032	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1033	{ .name	    = "connect",    .errmsg = true, },
1034	{ .name	    = "creat",	    .errmsg = true,
1035	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1036	{ .name	    = "dup",	    .errmsg = true,
1037	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1038	{ .name	    = "dup2",	    .errmsg = true,
1039	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1040	{ .name	    = "dup3",	    .errmsg = true,
1041	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1042	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1043	{ .name	    = "eventfd2",   .errmsg = true,
1044	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1045	{ .name	    = "faccessat",  .errmsg = true,
1046	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1047			     [1] = SCA_FILENAME, /* filename */ }, },
1048	{ .name	    = "fadvise64",  .errmsg = true,
1049	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1050	{ .name	    = "fallocate",  .errmsg = true,
1051	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1052	{ .name	    = "fchdir",	    .errmsg = true,
1053	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1054	{ .name	    = "fchmod",	    .errmsg = true,
1055	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056	{ .name	    = "fchmodat",   .errmsg = true,
1057	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1058			     [1] = SCA_FILENAME, /* filename */ }, },
1059	{ .name	    = "fchown",	    .errmsg = true,
1060	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061	{ .name	    = "fchownat",   .errmsg = true,
1062	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1063			     [1] = SCA_FILENAME, /* filename */ }, },
1064	{ .name	    = "fcntl",	    .errmsg = true,
1065	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1066			     [1] = SCA_STRARRAY, /* cmd */ },
1067	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1068	{ .name	    = "fdatasync",  .errmsg = true,
1069	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1070	{ .name	    = "flock",	    .errmsg = true,
1071	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1072			     [1] = SCA_FLOCK, /* cmd */ }, },
1073	{ .name	    = "fsetxattr",  .errmsg = true,
1074	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1075	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
1076	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1077	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
1078	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1079			     [1] = SCA_FILENAME, /* filename */ }, },
1080	{ .name	    = "fstatfs",    .errmsg = true,
1081	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1082	{ .name	    = "fsync",    .errmsg = true,
1083	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1084	{ .name	    = "ftruncate", .errmsg = true,
1085	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1086	{ .name	    = "futex",	    .errmsg = true,
1087	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1088	{ .name	    = "futimesat", .errmsg = true,
1089	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1090			     [1] = SCA_FILENAME, /* filename */ }, },
1091	{ .name	    = "getdents",   .errmsg = true,
1092	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1093	{ .name	    = "getdents64", .errmsg = true,
1094	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1095	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1096	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1097	{ .name	    = "getxattr",    .errmsg = true,
1098	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1099	{ .name	    = "inotify_add_watch",	    .errmsg = true,
1100	  .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1101	{ .name	    = "ioctl",	    .errmsg = true,
1102	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1103#if defined(__i386__) || defined(__x86_64__)
1104/*
1105 * FIXME: Make this available to all arches.
1106 */
1107			     [1] = SCA_STRHEXARRAY, /* cmd */
1108			     [2] = SCA_HEX, /* arg */ },
1109	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
1110#else
1111			     [2] = SCA_HEX, /* arg */ }, },
1112#endif
1113	{ .name	    = "keyctl",	    .errmsg = true, STRARRAY(0, option, keyctl_options), },
1114	{ .name	    = "kill",	    .errmsg = true,
1115	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1116	{ .name	    = "lchown",    .errmsg = true,
1117	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1118	{ .name	    = "lgetxattr",  .errmsg = true,
1119	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1120	{ .name	    = "linkat",	    .errmsg = true,
1121	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1122	{ .name	    = "listxattr",  .errmsg = true,
1123	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1124	{ .name	    = "llistxattr", .errmsg = true,
1125	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1126	{ .name	    = "lremovexattr",  .errmsg = true,
1127	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1128	{ .name	    = "lseek",	    .errmsg = true,
1129	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1130			     [2] = SCA_STRARRAY, /* whence */ },
1131	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
1132	{ .name	    = "lsetxattr",  .errmsg = true,
1133	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1134	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat",
1135	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1136	{ .name	    = "lsxattr",    .errmsg = true,
1137	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1138	{ .name     = "madvise",    .errmsg = true,
1139	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
1140			     [2] = SCA_MADV_BHV, /* behavior */ }, },
1141	{ .name	    = "mkdir",    .errmsg = true,
1142	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1143	{ .name	    = "mkdirat",    .errmsg = true,
1144	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1145			     [1] = SCA_FILENAME, /* pathname */ }, },
1146	{ .name	    = "mknod",      .errmsg = true,
1147	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1148	{ .name	    = "mknodat",    .errmsg = true,
1149	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1150			     [1] = SCA_FILENAME, /* filename */ }, },
1151	{ .name	    = "mlock",	    .errmsg = true,
1152	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1153	{ .name	    = "mlockall",   .errmsg = true,
1154	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1155	{ .name	    = "mmap",	    .hexret = true,
1156	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
1157			     [2] = SCA_MMAP_PROT, /* prot */
1158			     [3] = SCA_MMAP_FLAGS, /* flags */
1159			     [4] = SCA_FD, 	  /* fd */ }, },
1160	{ .name	    = "mprotect",   .errmsg = true,
1161	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
1162			     [2] = SCA_MMAP_PROT, /* prot */ }, },
1163	{ .name	    = "mq_unlink", .errmsg = true,
1164	  .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1165	{ .name	    = "mremap",	    .hexret = true,
1166	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1167			     [3] = SCA_MREMAP_FLAGS, /* flags */
1168			     [4] = SCA_HEX, /* new_addr */ }, },
1169	{ .name	    = "munlock",    .errmsg = true,
1170	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1171	{ .name	    = "munmap",	    .errmsg = true,
1172	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1173	{ .name	    = "name_to_handle_at", .errmsg = true,
1174	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1175	{ .name	    = "newfstatat", .errmsg = true,
1176	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1177			     [1] = SCA_FILENAME, /* filename */ }, },
1178	{ .name	    = "open",	    .errmsg = true,
1179	  .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1180			     [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1181	{ .name	    = "open_by_handle_at", .errmsg = true,
1182	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1183			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1184	{ .name	    = "openat",	    .errmsg = true,
1185	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1186			     [1] = SCA_FILENAME, /* filename */
1187			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1188	{ .name	    = "perf_event_open", .errmsg = true,
1189	  .arg_scnprintf = { [1] = SCA_INT, /* pid */
1190			     [2] = SCA_INT, /* cpu */
1191			     [3] = SCA_FD,  /* group_fd */
1192			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1193	{ .name	    = "pipe2",	    .errmsg = true,
1194	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1195	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
1196	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
1197	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
1198	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1199	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
1200	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1201	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1202	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
1203	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1204	{ .name	    = "pwritev",    .errmsg = true,
1205	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1206	{ .name	    = "read",	    .errmsg = true,
1207	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1208	{ .name	    = "readlink",   .errmsg = true,
1209	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1210	{ .name	    = "readlinkat", .errmsg = true,
1211	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1212			     [1] = SCA_FILENAME, /* pathname */ }, },
1213	{ .name	    = "readv",	    .errmsg = true,
1214	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1215	{ .name	    = "recvfrom",   .errmsg = true,
1216	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1217			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1218	{ .name	    = "recvmmsg",   .errmsg = true,
1219	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1220			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1221	{ .name	    = "recvmsg",    .errmsg = true,
1222	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1223			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1224	{ .name	    = "removexattr", .errmsg = true,
1225	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1226	{ .name	    = "renameat",   .errmsg = true,
1227	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1228	{ .name	    = "rmdir",    .errmsg = true,
1229	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1230	{ .name	    = "rt_sigaction", .errmsg = true,
1231	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1232	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1233	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1234	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1235	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1236	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1237	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1238	{ .name	    = "sendmmsg",    .errmsg = true,
1239	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1240			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1241	{ .name	    = "sendmsg",    .errmsg = true,
1242	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1243			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1244	{ .name	    = "sendto",	    .errmsg = true,
1245	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1246			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1247	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1248	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1249	{ .name	    = "setxattr",   .errmsg = true,
1250	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1251	{ .name	    = "shutdown",   .errmsg = true,
1252	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1253	{ .name	    = "socket",	    .errmsg = true,
1254	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1255			     [1] = SCA_SK_TYPE, /* type */ },
1256	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1257	{ .name	    = "socketpair", .errmsg = true,
1258	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1259			     [1] = SCA_SK_TYPE, /* type */ },
1260	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1261	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat",
1262	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1263	{ .name	    = "statfs",	    .errmsg = true,
1264	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1265	{ .name	    = "swapoff",    .errmsg = true,
1266	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1267	{ .name	    = "swapon",	    .errmsg = true,
1268	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1269	{ .name	    = "symlinkat",  .errmsg = true,
1270	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1271	{ .name	    = "tgkill",	    .errmsg = true,
1272	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1273	{ .name	    = "tkill",	    .errmsg = true,
1274	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1275	{ .name	    = "truncate",   .errmsg = true,
1276	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1277	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1278	{ .name	    = "unlinkat",   .errmsg = true,
1279	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1280			     [1] = SCA_FILENAME, /* pathname */ }, },
1281	{ .name	    = "utime",  .errmsg = true,
1282	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1283	{ .name	    = "utimensat",  .errmsg = true,
1284	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1285			     [1] = SCA_FILENAME, /* filename */ }, },
1286	{ .name	    = "utimes",  .errmsg = true,
1287	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1288	{ .name	    = "vmsplice",  .errmsg = true,
1289	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1290	{ .name	    = "write",	    .errmsg = true,
1291	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1292	{ .name	    = "writev",	    .errmsg = true,
1293	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1294};
1295
1296static int syscall_fmt__cmp(const void *name, const void *fmtp)
1297{
1298	const struct syscall_fmt *fmt = fmtp;
1299	return strcmp(name, fmt->name);
1300}
1301
1302static struct syscall_fmt *syscall_fmt__find(const char *name)
1303{
1304	const int nmemb = ARRAY_SIZE(syscall_fmts);
1305	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1306}
1307
1308struct syscall {
1309	struct event_format *tp_format;
1310	int		    nr_args;
1311	struct format_field *args;
1312	const char	    *name;
1313	bool		    is_exit;
1314	struct syscall_fmt  *fmt;
1315	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1316	void		    **arg_parm;
1317};
1318
1319static size_t fprintf_duration(unsigned long t, FILE *fp)
1320{
1321	double duration = (double)t / NSEC_PER_MSEC;
1322	size_t printed = fprintf(fp, "(");
1323
1324	if (duration >= 1.0)
1325		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1326	else if (duration >= 0.01)
1327		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1328	else
1329		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1330	return printed + fprintf(fp, "): ");
1331}
1332
1333/**
1334 * filename.ptr: The filename char pointer that will be vfs_getname'd
1335 * filename.entry_str_pos: Where to insert the string translated from
1336 *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1337 */
1338struct thread_trace {
1339	u64		  entry_time;
1340	u64		  exit_time;
1341	bool		  entry_pending;
1342	unsigned long	  nr_events;
1343	unsigned long	  pfmaj, pfmin;
1344	char		  *entry_str;
1345	double		  runtime_ms;
1346        struct {
1347		unsigned long ptr;
1348		short int     entry_str_pos;
1349		bool	      pending_open;
1350		unsigned int  namelen;
1351		char	      *name;
1352	} filename;
1353	struct {
1354		int	  max;
1355		char	  **table;
1356	} paths;
1357
1358	struct intlist *syscall_stats;
1359};
1360
1361static struct thread_trace *thread_trace__new(void)
1362{
1363	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1364
1365	if (ttrace)
1366		ttrace->paths.max = -1;
1367
1368	ttrace->syscall_stats = intlist__new(NULL);
1369
1370	return ttrace;
1371}
1372
1373static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1374{
1375	struct thread_trace *ttrace;
1376
1377	if (thread == NULL)
1378		goto fail;
1379
1380	if (thread__priv(thread) == NULL)
1381		thread__set_priv(thread, thread_trace__new());
1382
1383	if (thread__priv(thread) == NULL)
1384		goto fail;
1385
1386	ttrace = thread__priv(thread);
1387	++ttrace->nr_events;
1388
1389	return ttrace;
1390fail:
1391	color_fprintf(fp, PERF_COLOR_RED,
1392		      "WARNING: not enough memory, dropping samples!\n");
1393	return NULL;
1394}
1395
1396#define TRACE_PFMAJ		(1 << 0)
1397#define TRACE_PFMIN		(1 << 1)
1398
1399static const size_t trace__entry_str_size = 2048;
1400
1401struct trace {
1402	struct perf_tool	tool;
1403	struct {
1404		int		machine;
1405		int		open_id;
1406	}			audit;
1407	struct {
1408		int		max;
1409		struct syscall  *table;
1410		struct {
1411			struct perf_evsel *sys_enter,
1412					  *sys_exit;
1413		}		events;
1414	} syscalls;
1415	struct record_opts	opts;
1416	struct perf_evlist	*evlist;
1417	struct machine		*host;
1418	struct thread		*current;
1419	u64			base_time;
1420	FILE			*output;
1421	unsigned long		nr_events;
1422	struct strlist		*ev_qualifier;
1423	struct {
1424		size_t		nr;
1425		int		*entries;
1426	}			ev_qualifier_ids;
1427	struct intlist		*tid_list;
1428	struct intlist		*pid_list;
1429	struct {
1430		size_t		nr;
1431		pid_t		*entries;
1432	}			filter_pids;
1433	double			duration_filter;
1434	double			runtime_ms;
1435	struct {
1436		u64		vfs_getname,
1437				proc_getname;
1438	} stats;
1439	bool			not_ev_qualifier;
1440	bool			live;
1441	bool			full_time;
1442	bool			sched;
1443	bool			multiple_threads;
1444	bool			summary;
1445	bool			summary_only;
1446	bool			show_comm;
1447	bool			show_tool_stats;
1448	bool			trace_syscalls;
1449	bool			force;
1450	bool			vfs_getname;
1451	int			trace_pgfaults;
1452};
1453
1454static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1455{
1456	struct thread_trace *ttrace = thread__priv(thread);
1457
1458	if (fd > ttrace->paths.max) {
1459		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1460
1461		if (npath == NULL)
1462			return -1;
1463
1464		if (ttrace->paths.max != -1) {
1465			memset(npath + ttrace->paths.max + 1, 0,
1466			       (fd - ttrace->paths.max) * sizeof(char *));
1467		} else {
1468			memset(npath, 0, (fd + 1) * sizeof(char *));
1469		}
1470
1471		ttrace->paths.table = npath;
1472		ttrace->paths.max   = fd;
1473	}
1474
1475	ttrace->paths.table[fd] = strdup(pathname);
1476
1477	return ttrace->paths.table[fd] != NULL ? 0 : -1;
1478}
1479
1480static int thread__read_fd_path(struct thread *thread, int fd)
1481{
1482	char linkname[PATH_MAX], pathname[PATH_MAX];
1483	struct stat st;
1484	int ret;
1485
1486	if (thread->pid_ == thread->tid) {
1487		scnprintf(linkname, sizeof(linkname),
1488			  "/proc/%d/fd/%d", thread->pid_, fd);
1489	} else {
1490		scnprintf(linkname, sizeof(linkname),
1491			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1492	}
1493
1494	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1495		return -1;
1496
1497	ret = readlink(linkname, pathname, sizeof(pathname));
1498
1499	if (ret < 0 || ret > st.st_size)
1500		return -1;
1501
1502	pathname[ret] = '\0';
1503	return trace__set_fd_pathname(thread, fd, pathname);
1504}
1505
1506static const char *thread__fd_path(struct thread *thread, int fd,
1507				   struct trace *trace)
1508{
1509	struct thread_trace *ttrace = thread__priv(thread);
1510
1511	if (ttrace == NULL)
1512		return NULL;
1513
1514	if (fd < 0)
1515		return NULL;
1516
1517	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1518		if (!trace->live)
1519			return NULL;
1520		++trace->stats.proc_getname;
1521		if (thread__read_fd_path(thread, fd))
1522			return NULL;
1523	}
1524
1525	return ttrace->paths.table[fd];
1526}
1527
1528static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1529					struct syscall_arg *arg)
1530{
1531	int fd = arg->val;
1532	size_t printed = scnprintf(bf, size, "%d", fd);
1533	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1534
1535	if (path)
1536		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1537
1538	return printed;
1539}
1540
1541static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1542					      struct syscall_arg *arg)
1543{
1544	int fd = arg->val;
1545	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1546	struct thread_trace *ttrace = thread__priv(arg->thread);
1547
1548	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1549		zfree(&ttrace->paths.table[fd]);
1550
1551	return printed;
1552}
1553
1554static void thread__set_filename_pos(struct thread *thread, const char *bf,
1555				     unsigned long ptr)
1556{
1557	struct thread_trace *ttrace = thread__priv(thread);
1558
1559	ttrace->filename.ptr = ptr;
1560	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1561}
1562
1563static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1564					      struct syscall_arg *arg)
1565{
1566	unsigned long ptr = arg->val;
1567
1568	if (!arg->trace->vfs_getname)
1569		return scnprintf(bf, size, "%#x", ptr);
1570
1571	thread__set_filename_pos(arg->thread, bf, ptr);
1572	return 0;
1573}
1574
1575static bool trace__filter_duration(struct trace *trace, double t)
1576{
1577	return t < (trace->duration_filter * NSEC_PER_MSEC);
1578}
1579
1580static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1581{
1582	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1583
1584	return fprintf(fp, "%10.3f ", ts);
1585}
1586
1587static bool done = false;
1588static bool interrupted = false;
1589
1590static void sig_handler(int sig)
1591{
1592	done = true;
1593	interrupted = sig == SIGINT;
1594}
1595
1596static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1597					u64 duration, u64 tstamp, FILE *fp)
1598{
1599	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1600	printed += fprintf_duration(duration, fp);
1601
1602	if (trace->multiple_threads) {
1603		if (trace->show_comm)
1604			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1605		printed += fprintf(fp, "%d ", thread->tid);
1606	}
1607
1608	return printed;
1609}
1610
1611static int trace__process_event(struct trace *trace, struct machine *machine,
1612				union perf_event *event, struct perf_sample *sample)
1613{
1614	int ret = 0;
1615
1616	switch (event->header.type) {
1617	case PERF_RECORD_LOST:
1618		color_fprintf(trace->output, PERF_COLOR_RED,
1619			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1620		ret = machine__process_lost_event(machine, event, sample);
1621	default:
1622		ret = machine__process_event(machine, event, sample);
1623		break;
1624	}
1625
1626	return ret;
1627}
1628
1629static int trace__tool_process(struct perf_tool *tool,
1630			       union perf_event *event,
1631			       struct perf_sample *sample,
1632			       struct machine *machine)
1633{
1634	struct trace *trace = container_of(tool, struct trace, tool);
1635	return trace__process_event(trace, machine, event, sample);
1636}
1637
1638static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1639{
1640	int err = symbol__init(NULL);
1641
1642	if (err)
1643		return err;
1644
1645	trace->host = machine__new_host();
1646	if (trace->host == NULL)
1647		return -ENOMEM;
1648
1649	if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1650		return -errno;
1651
1652	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1653					    evlist->threads, trace__tool_process, false,
1654					    trace->opts.proc_map_timeout);
1655	if (err)
1656		symbol__exit();
1657
1658	return err;
1659}
1660
1661static int syscall__set_arg_fmts(struct syscall *sc)
1662{
1663	struct format_field *field;
1664	int idx = 0;
1665
1666	sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1667	if (sc->arg_scnprintf == NULL)
1668		return -1;
1669
1670	if (sc->fmt)
1671		sc->arg_parm = sc->fmt->arg_parm;
1672
1673	for (field = sc->args; field; field = field->next) {
1674		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1675			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1676		else if (field->flags & FIELD_IS_POINTER)
1677			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1678		++idx;
1679	}
1680
1681	return 0;
1682}
1683
1684static int trace__read_syscall_info(struct trace *trace, int id)
1685{
1686	char tp_name[128];
1687	struct syscall *sc;
1688	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1689
1690	if (name == NULL)
1691		return -1;
1692
1693	if (id > trace->syscalls.max) {
1694		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1695
1696		if (nsyscalls == NULL)
1697			return -1;
1698
1699		if (trace->syscalls.max != -1) {
1700			memset(nsyscalls + trace->syscalls.max + 1, 0,
1701			       (id - trace->syscalls.max) * sizeof(*sc));
1702		} else {
1703			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1704		}
1705
1706		trace->syscalls.table = nsyscalls;
1707		trace->syscalls.max   = id;
1708	}
1709
1710	sc = trace->syscalls.table + id;
1711	sc->name = name;
1712
 
 
 
 
 
 
 
 
 
 
 
 
 
1713	sc->fmt  = syscall_fmt__find(sc->name);
1714
1715	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1716	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1717
1718	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1719		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1720		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1721	}
1722
1723	if (IS_ERR(sc->tp_format))
1724		return -1;
1725
1726	sc->args = sc->tp_format->format.fields;
1727	sc->nr_args = sc->tp_format->format.nr_fields;
1728	/*
1729	 * We need to check and discard the first variable '__syscall_nr'
1730	 * or 'nr' that mean the syscall number. It is needless here.
1731	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1732	 */
1733	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1734		sc->args = sc->args->next;
1735		--sc->nr_args;
1736	}
1737
1738	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1739
1740	return syscall__set_arg_fmts(sc);
1741}
1742
1743static int trace__validate_ev_qualifier(struct trace *trace)
1744{
1745	int err = 0, i;
1746	struct str_node *pos;
1747
1748	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1749	trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1750						 sizeof(trace->ev_qualifier_ids.entries[0]));
1751
1752	if (trace->ev_qualifier_ids.entries == NULL) {
1753		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1754		       trace->output);
1755		err = -EINVAL;
1756		goto out;
1757	}
1758
1759	i = 0;
1760
1761	strlist__for_each(pos, trace->ev_qualifier) {
1762		const char *sc = pos->s;
1763		int id = audit_name_to_syscall(sc, trace->audit.machine);
1764
1765		if (id < 0) {
1766			if (err == 0) {
1767				fputs("Error:\tInvalid syscall ", trace->output);
1768				err = -EINVAL;
1769			} else {
1770				fputs(", ", trace->output);
1771			}
1772
1773			fputs(sc, trace->output);
1774		}
1775
1776		trace->ev_qualifier_ids.entries[i++] = id;
1777	}
1778
1779	if (err < 0) {
1780		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1781		      "\nHint:\tand: 'man syscalls'\n", trace->output);
1782		zfree(&trace->ev_qualifier_ids.entries);
1783		trace->ev_qualifier_ids.nr = 0;
1784	}
1785out:
1786	return err;
1787}
1788
1789/*
1790 * args is to be interpreted as a series of longs but we need to handle
1791 * 8-byte unaligned accesses. args points to raw_data within the event
1792 * and raw_data is guaranteed to be 8-byte unaligned because it is
1793 * preceded by raw_size which is a u32. So we need to copy args to a temp
1794 * variable to read it. Most notably this avoids extended load instructions
1795 * on unaligned addresses
1796 */
1797
1798static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1799				      unsigned char *args, struct trace *trace,
1800				      struct thread *thread)
1801{
1802	size_t printed = 0;
1803	unsigned char *p;
1804	unsigned long val;
1805
1806	if (sc->args != NULL) {
1807		struct format_field *field;
1808		u8 bit = 1;
1809		struct syscall_arg arg = {
1810			.idx	= 0,
1811			.mask	= 0,
1812			.trace  = trace,
1813			.thread = thread,
1814		};
1815
1816		for (field = sc->args; field;
1817		     field = field->next, ++arg.idx, bit <<= 1) {
1818			if (arg.mask & bit)
1819				continue;
1820
1821			/* special care for unaligned accesses */
1822			p = args + sizeof(unsigned long) * arg.idx;
1823			memcpy(&val, p, sizeof(val));
1824
1825			/*
1826 			 * Suppress this argument if its value is zero and
1827 			 * and we don't have a string associated in an
1828 			 * strarray for it.
1829 			 */
1830			if (val == 0 &&
1831			    !(sc->arg_scnprintf &&
1832			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1833			      sc->arg_parm[arg.idx]))
1834				continue;
1835
1836			printed += scnprintf(bf + printed, size - printed,
1837					     "%s%s: ", printed ? ", " : "", field->name);
1838			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1839				arg.val = val;
1840				if (sc->arg_parm)
1841					arg.parm = sc->arg_parm[arg.idx];
1842				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1843								      size - printed, &arg);
1844			} else {
1845				printed += scnprintf(bf + printed, size - printed,
1846						     "%ld", val);
1847			}
1848		}
1849	} else {
1850		int i = 0;
1851
1852		while (i < 6) {
1853			/* special care for unaligned accesses */
1854			p = args + sizeof(unsigned long) * i;
1855			memcpy(&val, p, sizeof(val));
1856			printed += scnprintf(bf + printed, size - printed,
1857					     "%sarg%d: %ld",
1858					     printed ? ", " : "", i, val);
1859			++i;
1860		}
1861	}
1862
1863	return printed;
1864}
1865
1866typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1867				  union perf_event *event,
1868				  struct perf_sample *sample);
1869
1870static struct syscall *trace__syscall_info(struct trace *trace,
1871					   struct perf_evsel *evsel, int id)
1872{
1873
1874	if (id < 0) {
1875
1876		/*
1877		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1878		 * before that, leaving at a higher verbosity level till that is
1879		 * explained. Reproduced with plain ftrace with:
1880		 *
1881		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1882		 * grep "NR -1 " /t/trace_pipe
1883		 *
1884		 * After generating some load on the machine.
1885 		 */
1886		if (verbose > 1) {
1887			static u64 n;
1888			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1889				id, perf_evsel__name(evsel), ++n);
1890		}
1891		return NULL;
1892	}
1893
1894	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1895	    trace__read_syscall_info(trace, id))
1896		goto out_cant_read;
1897
1898	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1899		goto out_cant_read;
1900
1901	return &trace->syscalls.table[id];
1902
1903out_cant_read:
1904	if (verbose) {
1905		fprintf(trace->output, "Problems reading syscall %d", id);
1906		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1907			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1908		fputs(" information\n", trace->output);
1909	}
1910	return NULL;
1911}
1912
1913static void thread__update_stats(struct thread_trace *ttrace,
1914				 int id, struct perf_sample *sample)
1915{
1916	struct int_node *inode;
1917	struct stats *stats;
1918	u64 duration = 0;
1919
1920	inode = intlist__findnew(ttrace->syscall_stats, id);
1921	if (inode == NULL)
1922		return;
1923
1924	stats = inode->priv;
1925	if (stats == NULL) {
1926		stats = malloc(sizeof(struct stats));
1927		if (stats == NULL)
1928			return;
1929		init_stats(stats);
1930		inode->priv = stats;
1931	}
1932
1933	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1934		duration = sample->time - ttrace->entry_time;
1935
1936	update_stats(stats, duration);
1937}
1938
1939static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1940{
1941	struct thread_trace *ttrace;
1942	u64 duration;
1943	size_t printed;
1944
1945	if (trace->current == NULL)
1946		return 0;
1947
1948	ttrace = thread__priv(trace->current);
1949
1950	if (!ttrace->entry_pending)
1951		return 0;
1952
1953	duration = sample->time - ttrace->entry_time;
1954
1955	printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1956	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1957	ttrace->entry_pending = false;
1958
1959	return printed;
1960}
1961
1962static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1963			    union perf_event *event __maybe_unused,
1964			    struct perf_sample *sample)
1965{
1966	char *msg;
1967	void *args;
1968	size_t printed = 0;
1969	struct thread *thread;
1970	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1971	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1972	struct thread_trace *ttrace;
1973
1974	if (sc == NULL)
1975		return -1;
1976
 
 
 
1977	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1978	ttrace = thread__trace(thread, trace->output);
1979	if (ttrace == NULL)
1980		goto out_put;
1981
1982	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
 
1983
1984	if (ttrace->entry_str == NULL) {
1985		ttrace->entry_str = malloc(trace__entry_str_size);
1986		if (!ttrace->entry_str)
1987			goto out_put;
1988	}
1989
1990	if (!trace->summary_only)
1991		trace__printf_interrupted_entry(trace, sample);
1992
1993	ttrace->entry_time = sample->time;
1994	msg = ttrace->entry_str;
1995	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1996
1997	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1998					   args, trace, thread);
1999
2000	if (sc->is_exit) {
2001		if (!trace->duration_filter && !trace->summary_only) {
2002			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
2003			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
2004		}
2005	} else {
2006		ttrace->entry_pending = true;
2007		/* See trace__vfs_getname & trace__sys_exit */
2008		ttrace->filename.pending_open = false;
2009	}
2010
2011	if (trace->current != thread) {
2012		thread__put(trace->current);
2013		trace->current = thread__get(thread);
2014	}
2015	err = 0;
2016out_put:
2017	thread__put(thread);
2018	return err;
2019}
2020
2021static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2022			   union perf_event *event __maybe_unused,
2023			   struct perf_sample *sample)
2024{
2025	long ret;
2026	u64 duration = 0;
2027	struct thread *thread;
2028	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2029	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2030	struct thread_trace *ttrace;
2031
2032	if (sc == NULL)
2033		return -1;
2034
 
 
 
2035	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2036	ttrace = thread__trace(thread, trace->output);
2037	if (ttrace == NULL)
2038		goto out_put;
2039
2040	if (trace->summary)
2041		thread__update_stats(ttrace, id, sample);
2042
2043	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2044
2045	if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2046		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2047		ttrace->filename.pending_open = false;
2048		++trace->stats.vfs_getname;
2049	}
2050
 
 
2051	ttrace->exit_time = sample->time;
2052
2053	if (ttrace->entry_time) {
2054		duration = sample->time - ttrace->entry_time;
2055		if (trace__filter_duration(trace, duration))
2056			goto out;
2057	} else if (trace->duration_filter)
2058		goto out;
2059
2060	if (trace->summary_only)
2061		goto out;
2062
2063	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2064
2065	if (ttrace->entry_pending) {
2066		fprintf(trace->output, "%-70s", ttrace->entry_str);
2067	} else {
2068		fprintf(trace->output, " ... [");
2069		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2070		fprintf(trace->output, "]: %s()", sc->name);
2071	}
2072
2073	if (sc->fmt == NULL) {
2074signed_print:
2075		fprintf(trace->output, ") = %ld", ret);
2076	} else if (ret < 0 && sc->fmt->errmsg) {
2077		char bf[STRERR_BUFSIZE];
2078		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2079			   *e = audit_errno_to_name(-ret);
2080
2081		fprintf(trace->output, ") = -1 %s %s", e, emsg);
2082	} else if (ret == 0 && sc->fmt->timeout)
2083		fprintf(trace->output, ") = 0 Timeout");
2084	else if (sc->fmt->hexret)
2085		fprintf(trace->output, ") = %#lx", ret);
2086	else
2087		goto signed_print;
2088
2089	fputc('\n', trace->output);
2090out:
2091	ttrace->entry_pending = false;
2092	err = 0;
2093out_put:
2094	thread__put(thread);
2095	return err;
2096}
2097
2098static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2099			      union perf_event *event __maybe_unused,
2100			      struct perf_sample *sample)
2101{
2102	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2103	struct thread_trace *ttrace;
2104	size_t filename_len, entry_str_len, to_move;
2105	ssize_t remaining_space;
2106	char *pos;
2107	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2108
2109	if (!thread)
2110		goto out;
2111
2112	ttrace = thread__priv(thread);
2113	if (!ttrace)
2114		goto out;
2115
2116	filename_len = strlen(filename);
2117
2118	if (ttrace->filename.namelen < filename_len) {
2119		char *f = realloc(ttrace->filename.name, filename_len + 1);
2120
2121		if (f == NULL)
2122				goto out;
2123
2124		ttrace->filename.namelen = filename_len;
2125		ttrace->filename.name = f;
2126	}
2127
2128	strcpy(ttrace->filename.name, filename);
2129	ttrace->filename.pending_open = true;
2130
2131	if (!ttrace->filename.ptr)
2132		goto out;
2133
2134	entry_str_len = strlen(ttrace->entry_str);
2135	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2136	if (remaining_space <= 0)
2137		goto out;
2138
2139	if (filename_len > (size_t)remaining_space) {
2140		filename += filename_len - remaining_space;
2141		filename_len = remaining_space;
2142	}
2143
2144	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2145	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2146	memmove(pos + filename_len, pos, to_move);
2147	memcpy(pos, filename, filename_len);
2148
2149	ttrace->filename.ptr = 0;
2150	ttrace->filename.entry_str_pos = 0;
2151out:
2152	return 0;
2153}
2154
2155static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2156				     union perf_event *event __maybe_unused,
2157				     struct perf_sample *sample)
2158{
2159        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2160	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2161	struct thread *thread = machine__findnew_thread(trace->host,
2162							sample->pid,
2163							sample->tid);
2164	struct thread_trace *ttrace = thread__trace(thread, trace->output);
2165
2166	if (ttrace == NULL)
2167		goto out_dump;
2168
2169	ttrace->runtime_ms += runtime_ms;
2170	trace->runtime_ms += runtime_ms;
2171	thread__put(thread);
2172	return 0;
2173
2174out_dump:
2175	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2176	       evsel->name,
2177	       perf_evsel__strval(evsel, sample, "comm"),
2178	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2179	       runtime,
2180	       perf_evsel__intval(evsel, sample, "vruntime"));
2181	thread__put(thread);
2182	return 0;
2183}
2184
2185static void bpf_output__printer(enum binary_printer_ops op,
2186				unsigned int val, void *extra)
2187{
2188	FILE *output = extra;
2189	unsigned char ch = (unsigned char)val;
2190
2191	switch (op) {
2192	case BINARY_PRINT_CHAR_DATA:
2193		fprintf(output, "%c", isprint(ch) ? ch : '.');
2194		break;
2195	case BINARY_PRINT_DATA_BEGIN:
2196	case BINARY_PRINT_LINE_BEGIN:
2197	case BINARY_PRINT_ADDR:
2198	case BINARY_PRINT_NUM_DATA:
2199	case BINARY_PRINT_NUM_PAD:
2200	case BINARY_PRINT_SEP:
2201	case BINARY_PRINT_CHAR_PAD:
2202	case BINARY_PRINT_LINE_END:
2203	case BINARY_PRINT_DATA_END:
2204	default:
2205		break;
2206	}
2207}
2208
2209static void bpf_output__fprintf(struct trace *trace,
2210				struct perf_sample *sample)
2211{
2212	print_binary(sample->raw_data, sample->raw_size, 8,
2213		     bpf_output__printer, trace->output);
2214}
2215
2216static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2217				union perf_event *event __maybe_unused,
2218				struct perf_sample *sample)
2219{
2220	trace__printf_interrupted_entry(trace, sample);
2221	trace__fprintf_tstamp(trace, sample->time, trace->output);
2222
2223	if (trace->trace_syscalls)
2224		fprintf(trace->output, "(         ): ");
2225
2226	fprintf(trace->output, "%s:", evsel->name);
2227
2228	if (perf_evsel__is_bpf_output(evsel)) {
2229		bpf_output__fprintf(trace, sample);
2230	} else if (evsel->tp_format) {
2231		event_format__fprintf(evsel->tp_format, sample->cpu,
2232				      sample->raw_data, sample->raw_size,
2233				      trace->output);
2234	}
2235
2236	fprintf(trace->output, ")\n");
2237	return 0;
2238}
2239
2240static void print_location(FILE *f, struct perf_sample *sample,
2241			   struct addr_location *al,
2242			   bool print_dso, bool print_sym)
2243{
2244
2245	if ((verbose || print_dso) && al->map)
2246		fprintf(f, "%s@", al->map->dso->long_name);
2247
2248	if ((verbose || print_sym) && al->sym)
2249		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2250			al->addr - al->sym->start);
2251	else if (al->map)
2252		fprintf(f, "0x%" PRIx64, al->addr);
2253	else
2254		fprintf(f, "0x%" PRIx64, sample->addr);
2255}
2256
2257static int trace__pgfault(struct trace *trace,
2258			  struct perf_evsel *evsel,
2259			  union perf_event *event __maybe_unused,
2260			  struct perf_sample *sample)
2261{
2262	struct thread *thread;
2263	struct addr_location al;
2264	char map_type = 'd';
2265	struct thread_trace *ttrace;
2266	int err = -1;
2267
2268	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2269	ttrace = thread__trace(thread, trace->output);
2270	if (ttrace == NULL)
2271		goto out_put;
2272
2273	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2274		ttrace->pfmaj++;
2275	else
2276		ttrace->pfmin++;
2277
2278	if (trace->summary_only)
2279		goto out;
2280
2281	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
2282			      sample->ip, &al);
2283
2284	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2285
2286	fprintf(trace->output, "%sfault [",
2287		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2288		"maj" : "min");
2289
2290	print_location(trace->output, sample, &al, false, true);
2291
2292	fprintf(trace->output, "] => ");
2293
2294	thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
2295				   sample->addr, &al);
2296
2297	if (!al.map) {
2298		thread__find_addr_location(thread, sample->cpumode,
2299					   MAP__FUNCTION, sample->addr, &al);
2300
2301		if (al.map)
2302			map_type = 'x';
2303		else
2304			map_type = '?';
2305	}
2306
2307	print_location(trace->output, sample, &al, true, false);
2308
2309	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2310out:
2311	err = 0;
2312out_put:
2313	thread__put(thread);
2314	return err;
2315}
2316
2317static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2318{
2319	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2320	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2321		return false;
2322
2323	if (trace->pid_list || trace->tid_list)
2324		return true;
2325
2326	return false;
2327}
2328
2329static int trace__process_sample(struct perf_tool *tool,
2330				 union perf_event *event,
2331				 struct perf_sample *sample,
2332				 struct perf_evsel *evsel,
2333				 struct machine *machine __maybe_unused)
2334{
2335	struct trace *trace = container_of(tool, struct trace, tool);
2336	int err = 0;
2337
2338	tracepoint_handler handler = evsel->handler;
2339
2340	if (skip_sample(trace, sample))
2341		return 0;
2342
2343	if (!trace->full_time && trace->base_time == 0)
2344		trace->base_time = sample->time;
2345
2346	if (handler) {
2347		++trace->nr_events;
2348		handler(trace, evsel, event, sample);
2349	}
2350
2351	return err;
2352}
2353
2354static int parse_target_str(struct trace *trace)
2355{
2356	if (trace->opts.target.pid) {
2357		trace->pid_list = intlist__new(trace->opts.target.pid);
2358		if (trace->pid_list == NULL) {
2359			pr_err("Error parsing process id string\n");
2360			return -EINVAL;
2361		}
2362	}
2363
2364	if (trace->opts.target.tid) {
2365		trace->tid_list = intlist__new(trace->opts.target.tid);
2366		if (trace->tid_list == NULL) {
2367			pr_err("Error parsing thread id string\n");
2368			return -EINVAL;
2369		}
2370	}
2371
2372	return 0;
2373}
2374
2375static int trace__record(struct trace *trace, int argc, const char **argv)
2376{
2377	unsigned int rec_argc, i, j;
2378	const char **rec_argv;
2379	const char * const record_args[] = {
2380		"record",
2381		"-R",
2382		"-m", "1024",
2383		"-c", "1",
 
2384	};
2385
2386	const char * const sc_args[] = { "-e", };
2387	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2388	const char * const majpf_args[] = { "-e", "major-faults" };
2389	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2390	const char * const minpf_args[] = { "-e", "minor-faults" };
2391	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2392
2393	/* +1 is for the event string below */
2394	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2395		majpf_args_nr + minpf_args_nr + argc;
2396	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2397
2398	if (rec_argv == NULL)
2399		return -ENOMEM;
2400
2401	j = 0;
2402	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2403		rec_argv[j++] = record_args[i];
2404
2405	if (trace->trace_syscalls) {
2406		for (i = 0; i < sc_args_nr; i++)
2407			rec_argv[j++] = sc_args[i];
2408
2409		/* event string may be different for older kernels - e.g., RHEL6 */
2410		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2411			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2412		else if (is_valid_tracepoint("syscalls:sys_enter"))
2413			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2414		else {
2415			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2416			return -1;
2417		}
2418	}
 
2419
2420	if (trace->trace_pgfaults & TRACE_PFMAJ)
2421		for (i = 0; i < majpf_args_nr; i++)
2422			rec_argv[j++] = majpf_args[i];
2423
2424	if (trace->trace_pgfaults & TRACE_PFMIN)
2425		for (i = 0; i < minpf_args_nr; i++)
2426			rec_argv[j++] = minpf_args[i];
2427
2428	for (i = 0; i < (unsigned int)argc; i++)
2429		rec_argv[j++] = argv[i];
2430
2431	return cmd_record(j, rec_argv, NULL);
2432}
2433
2434static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2435
2436static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2437{
2438	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2439
2440	if (IS_ERR(evsel))
2441		return false;
2442
2443	if (perf_evsel__field(evsel, "pathname") == NULL) {
2444		perf_evsel__delete(evsel);
2445		return false;
2446	}
2447
2448	evsel->handler = trace__vfs_getname;
2449	perf_evlist__add(evlist, evsel);
2450	return true;
2451}
2452
2453static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2454				    u64 config)
2455{
2456	struct perf_evsel *evsel;
2457	struct perf_event_attr attr = {
2458		.type = PERF_TYPE_SOFTWARE,
2459		.mmap_data = 1,
2460	};
2461
2462	attr.config = config;
2463	attr.sample_period = 1;
2464
2465	event_attr_init(&attr);
2466
2467	evsel = perf_evsel__new(&attr);
2468	if (!evsel)
2469		return -ENOMEM;
2470
2471	evsel->handler = trace__pgfault;
2472	perf_evlist__add(evlist, evsel);
2473
2474	return 0;
2475}
2476
2477static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2478{
2479	const u32 type = event->header.type;
2480	struct perf_evsel *evsel;
2481
2482	if (!trace->full_time && trace->base_time == 0)
2483		trace->base_time = sample->time;
2484
2485	if (type != PERF_RECORD_SAMPLE) {
2486		trace__process_event(trace, trace->host, event, sample);
2487		return;
2488	}
2489
2490	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2491	if (evsel == NULL) {
2492		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2493		return;
2494	}
2495
2496	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2497	    sample->raw_data == NULL) {
2498		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2499		       perf_evsel__name(evsel), sample->tid,
2500		       sample->cpu, sample->raw_size);
2501	} else {
2502		tracepoint_handler handler = evsel->handler;
2503		handler(trace, evsel, event, sample);
2504	}
2505}
2506
2507static int trace__add_syscall_newtp(struct trace *trace)
2508{
2509	int ret = -1;
2510	struct perf_evlist *evlist = trace->evlist;
2511	struct perf_evsel *sys_enter, *sys_exit;
2512
2513	sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2514	if (sys_enter == NULL)
2515		goto out;
2516
2517	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2518		goto out_delete_sys_enter;
2519
2520	sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2521	if (sys_exit == NULL)
2522		goto out_delete_sys_enter;
2523
2524	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2525		goto out_delete_sys_exit;
2526
2527	perf_evlist__add(evlist, sys_enter);
2528	perf_evlist__add(evlist, sys_exit);
2529
2530	trace->syscalls.events.sys_enter = sys_enter;
2531	trace->syscalls.events.sys_exit  = sys_exit;
2532
2533	ret = 0;
2534out:
2535	return ret;
2536
2537out_delete_sys_exit:
2538	perf_evsel__delete_priv(sys_exit);
2539out_delete_sys_enter:
2540	perf_evsel__delete_priv(sys_enter);
2541	goto out;
2542}
2543
2544static int trace__set_ev_qualifier_filter(struct trace *trace)
2545{
2546	int err = -1;
2547	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2548						trace->ev_qualifier_ids.nr,
2549						trace->ev_qualifier_ids.entries);
2550
2551	if (filter == NULL)
2552		goto out_enomem;
2553
2554	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2555		err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2556
2557	free(filter);
2558out:
2559	return err;
2560out_enomem:
2561	errno = ENOMEM;
2562	goto out;
2563}
2564
2565static int trace__run(struct trace *trace, int argc, const char **argv)
2566{
2567	struct perf_evlist *evlist = trace->evlist;
2568	struct perf_evsel *evsel;
2569	int err = -1, i;
2570	unsigned long before;
2571	const bool forks = argc > 0;
2572	bool draining = false;
2573
2574	trace->live = true;
2575
2576	if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2577		goto out_error_raw_syscalls;
2578
2579	if (trace->trace_syscalls)
2580		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2581
2582	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2583	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2584		goto out_error_mem;
2585	}
2586
2587	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2588	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2589		goto out_error_mem;
2590
2591	if (trace->sched &&
2592	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2593				   trace__sched_stat_runtime))
2594		goto out_error_sched_stat_runtime;
2595
2596	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2597	if (err < 0) {
2598		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2599		goto out_delete_evlist;
2600	}
2601
2602	err = trace__symbols_init(trace, evlist);
2603	if (err < 0) {
2604		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2605		goto out_delete_evlist;
2606	}
2607
2608	perf_evlist__config(evlist, &trace->opts);
2609
2610	signal(SIGCHLD, sig_handler);
2611	signal(SIGINT, sig_handler);
2612
2613	if (forks) {
2614		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2615						    argv, false, NULL);
2616		if (err < 0) {
2617			fprintf(trace->output, "Couldn't run the workload!\n");
2618			goto out_delete_evlist;
2619		}
2620	}
2621
2622	err = perf_evlist__open(evlist);
2623	if (err < 0)
2624		goto out_error_open;
2625
2626	err = bpf__apply_obj_config();
2627	if (err) {
2628		char errbuf[BUFSIZ];
2629
2630		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2631		pr_err("ERROR: Apply config to BPF failed: %s\n",
2632			 errbuf);
2633		goto out_error_open;
2634	}
2635
2636	/*
2637	 * Better not use !target__has_task() here because we need to cover the
2638	 * case where no threads were specified in the command line, but a
2639	 * workload was, and in that case we will fill in the thread_map when
2640	 * we fork the workload in perf_evlist__prepare_workload.
2641	 */
2642	if (trace->filter_pids.nr > 0)
2643		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2644	else if (thread_map__pid(evlist->threads, 0) == -1)
2645		err = perf_evlist__set_filter_pid(evlist, getpid());
2646
2647	if (err < 0)
2648		goto out_error_mem;
2649
2650	if (trace->ev_qualifier_ids.nr > 0) {
2651		err = trace__set_ev_qualifier_filter(trace);
2652		if (err < 0)
2653			goto out_errno;
2654
2655		pr_debug("event qualifier tracepoint filter: %s\n",
2656			 trace->syscalls.events.sys_exit->filter);
2657	}
2658
2659	err = perf_evlist__apply_filters(evlist, &evsel);
2660	if (err < 0)
2661		goto out_error_apply_filters;
2662
2663	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2664	if (err < 0)
2665		goto out_error_mmap;
2666
2667	if (!target__none(&trace->opts.target))
2668		perf_evlist__enable(evlist);
2669
2670	if (forks)
2671		perf_evlist__start_workload(evlist);
2672
2673	trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2674				  evlist->threads->nr > 1 ||
2675				  perf_evlist__first(evlist)->attr.inherit;
2676again:
2677	before = trace->nr_events;
2678
2679	for (i = 0; i < evlist->nr_mmaps; i++) {
2680		union perf_event *event;
2681
2682		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
 
 
2683			struct perf_sample sample;
2684
2685			++trace->nr_events;
2686
2687			err = perf_evlist__parse_sample(evlist, event, &sample);
2688			if (err) {
2689				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2690				goto next_event;
2691			}
2692
2693			trace__handle_event(trace, event, &sample);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2694next_event:
2695			perf_evlist__mmap_consume(evlist, i);
2696
2697			if (interrupted)
2698				goto out_disable;
2699
2700			if (done && !draining) {
2701				perf_evlist__disable(evlist);
2702				draining = true;
2703			}
2704		}
2705	}
2706
2707	if (trace->nr_events == before) {
2708		int timeout = done ? 100 : -1;
2709
2710		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2711			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2712				draining = true;
2713
2714			goto again;
2715		}
2716	} else {
2717		goto again;
2718	}
2719
2720out_disable:
2721	thread__zput(trace->current);
2722
2723	perf_evlist__disable(evlist);
2724
2725	if (!err) {
2726		if (trace->summary)
2727			trace__fprintf_thread_summary(trace, trace->output);
2728
2729		if (trace->show_tool_stats) {
2730			fprintf(trace->output, "Stats:\n "
2731					       " vfs_getname : %" PRIu64 "\n"
2732					       " proc_getname: %" PRIu64 "\n",
2733				trace->stats.vfs_getname,
2734				trace->stats.proc_getname);
2735		}
2736	}
2737
2738out_delete_evlist:
2739	perf_evlist__delete(evlist);
2740	trace->evlist = NULL;
2741	trace->live = false;
2742	return err;
2743{
2744	char errbuf[BUFSIZ];
2745
2746out_error_sched_stat_runtime:
2747	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2748	goto out_error;
2749
2750out_error_raw_syscalls:
2751	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2752	goto out_error;
2753
2754out_error_mmap:
2755	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2756	goto out_error;
2757
2758out_error_open:
2759	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2760
2761out_error:
2762	fprintf(trace->output, "%s\n", errbuf);
2763	goto out_delete_evlist;
2764
2765out_error_apply_filters:
2766	fprintf(trace->output,
2767		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
2768		evsel->filter, perf_evsel__name(evsel), errno,
2769		strerror_r(errno, errbuf, sizeof(errbuf)));
2770	goto out_delete_evlist;
2771}
2772out_error_mem:
2773	fprintf(trace->output, "Not enough memory to run!\n");
2774	goto out_delete_evlist;
2775
2776out_errno:
2777	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2778	goto out_delete_evlist;
2779}
2780
2781static int trace__replay(struct trace *trace)
2782{
2783	const struct perf_evsel_str_handler handlers[] = {
2784		{ "probe:vfs_getname",	     trace__vfs_getname, },
2785	};
2786	struct perf_data_file file = {
2787		.path  = input_name,
2788		.mode  = PERF_DATA_MODE_READ,
2789		.force = trace->force,
2790	};
2791	struct perf_session *session;
2792	struct perf_evsel *evsel;
2793	int err = -1;
2794
2795	trace->tool.sample	  = trace__process_sample;
2796	trace->tool.mmap	  = perf_event__process_mmap;
2797	trace->tool.mmap2	  = perf_event__process_mmap2;
2798	trace->tool.comm	  = perf_event__process_comm;
2799	trace->tool.exit	  = perf_event__process_exit;
2800	trace->tool.fork	  = perf_event__process_fork;
2801	trace->tool.attr	  = perf_event__process_attr;
2802	trace->tool.tracing_data = perf_event__process_tracing_data;
2803	trace->tool.build_id	  = perf_event__process_build_id;
2804
2805	trace->tool.ordered_events = true;
2806	trace->tool.ordering_requires_timestamps = true;
2807
2808	/* add tid to output */
2809	trace->multiple_threads = true;
2810
 
 
 
2811	session = perf_session__new(&file, false, &trace->tool);
2812	if (session == NULL)
2813		return -1;
2814
2815	if (symbol__init(&session->header.env) < 0)
2816		goto out;
2817
2818	trace->host = &session->machines.host;
2819
2820	err = perf_session__set_tracepoints_handlers(session, handlers);
2821	if (err)
2822		goto out;
2823
2824	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2825						     "raw_syscalls:sys_enter");
2826	/* older kernels have syscalls tp versus raw_syscalls */
2827	if (evsel == NULL)
2828		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2829							     "syscalls:sys_enter");
 
 
 
 
2830
2831	if (evsel &&
2832	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2833	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2834		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2835		goto out;
2836	}
2837
2838	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2839						     "raw_syscalls:sys_exit");
2840	if (evsel == NULL)
2841		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2842							     "syscalls:sys_exit");
2843	if (evsel &&
2844	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2845	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2846		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2847		goto out;
2848	}
2849
2850	evlist__for_each(session->evlist, evsel) {
2851		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2852		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2853		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2854		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2855			evsel->handler = trace__pgfault;
2856	}
2857
2858	err = parse_target_str(trace);
2859	if (err != 0)
2860		goto out;
2861
2862	setup_pager();
2863
2864	err = perf_session__process_events(session);
2865	if (err)
2866		pr_err("Failed to process events, error %d", err);
2867
2868	else if (trace->summary)
2869		trace__fprintf_thread_summary(trace, trace->output);
2870
2871out:
2872	perf_session__delete(session);
2873
2874	return err;
2875}
2876
2877static size_t trace__fprintf_threads_header(FILE *fp)
2878{
2879	size_t printed;
2880
2881	printed  = fprintf(fp, "\n Summary of events:\n\n");
2882
2883	return printed;
2884}
2885
2886static size_t thread__dump_stats(struct thread_trace *ttrace,
2887				 struct trace *trace, FILE *fp)
2888{
2889	struct stats *stats;
2890	size_t printed = 0;
2891	struct syscall *sc;
2892	struct int_node *inode = intlist__first(ttrace->syscall_stats);
2893
2894	if (inode == NULL)
2895		return 0;
2896
2897	printed += fprintf(fp, "\n");
2898
2899	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2900	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2901	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2902
2903	/* each int_node is a syscall */
2904	while (inode) {
2905		stats = inode->priv;
2906		if (stats) {
2907			double min = (double)(stats->min) / NSEC_PER_MSEC;
2908			double max = (double)(stats->max) / NSEC_PER_MSEC;
2909			double avg = avg_stats(stats);
2910			double pct;
2911			u64 n = (u64) stats->n;
2912
2913			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2914			avg /= NSEC_PER_MSEC;
2915
2916			sc = &trace->syscalls.table[inode->i];
2917			printed += fprintf(fp, "   %-15s", sc->name);
2918			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2919					   n, avg * n, min, avg);
2920			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2921		}
2922
2923		inode = intlist__next(inode);
2924	}
2925
2926	printed += fprintf(fp, "\n\n");
2927
2928	return printed;
2929}
2930
2931/* struct used to pass data to per-thread function */
2932struct summary_data {
2933	FILE *fp;
2934	struct trace *trace;
2935	size_t printed;
2936};
2937
2938static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2939{
2940	struct summary_data *data = priv;
2941	FILE *fp = data->fp;
2942	size_t printed = data->printed;
2943	struct trace *trace = data->trace;
2944	struct thread_trace *ttrace = thread__priv(thread);
2945	double ratio;
2946
2947	if (ttrace == NULL)
2948		return 0;
2949
2950	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2951
2952	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2953	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2954	printed += fprintf(fp, "%.1f%%", ratio);
2955	if (ttrace->pfmaj)
2956		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2957	if (ttrace->pfmin)
2958		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2959	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2960	printed += thread__dump_stats(ttrace, trace, fp);
2961
2962	data->printed += printed;
2963
2964	return 0;
2965}
2966
2967static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2968{
2969	struct summary_data data = {
2970		.fp = fp,
2971		.trace = trace
2972	};
2973	data.printed = trace__fprintf_threads_header(fp);
2974
2975	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2976
2977	return data.printed;
2978}
2979
2980static int trace__set_duration(const struct option *opt, const char *str,
2981			       int unset __maybe_unused)
2982{
2983	struct trace *trace = opt->value;
2984
2985	trace->duration_filter = atof(str);
2986	return 0;
2987}
2988
2989static int trace__set_filter_pids(const struct option *opt, const char *str,
2990				  int unset __maybe_unused)
2991{
2992	int ret = -1;
2993	size_t i;
2994	struct trace *trace = opt->value;
2995	/*
2996	 * FIXME: introduce a intarray class, plain parse csv and create a
2997	 * { int nr, int entries[] } struct...
2998	 */
2999	struct intlist *list = intlist__new(str);
3000
3001	if (list == NULL)
3002		return -1;
3003
3004	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
3005	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
3006
3007	if (trace->filter_pids.entries == NULL)
3008		goto out;
3009
3010	trace->filter_pids.entries[0] = getpid();
3011
3012	for (i = 1; i < trace->filter_pids.nr; ++i)
3013		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
3014
3015	intlist__delete(list);
3016	ret = 0;
3017out:
3018	return ret;
3019}
3020
3021static int trace__open_output(struct trace *trace, const char *filename)
3022{
3023	struct stat st;
3024
3025	if (!stat(filename, &st) && st.st_size) {
3026		char oldname[PATH_MAX];
3027
3028		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
3029		unlink(oldname);
3030		rename(filename, oldname);
3031	}
3032
3033	trace->output = fopen(filename, "w");
3034
3035	return trace->output == NULL ? -errno : 0;
3036}
3037
3038static int parse_pagefaults(const struct option *opt, const char *str,
3039			    int unset __maybe_unused)
3040{
3041	int *trace_pgfaults = opt->value;
3042
3043	if (strcmp(str, "all") == 0)
3044		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3045	else if (strcmp(str, "maj") == 0)
3046		*trace_pgfaults |= TRACE_PFMAJ;
3047	else if (strcmp(str, "min") == 0)
3048		*trace_pgfaults |= TRACE_PFMIN;
3049	else
3050		return -1;
3051
3052	return 0;
3053}
3054
3055static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3056{
3057	struct perf_evsel *evsel;
3058
3059	evlist__for_each(evlist, evsel)
3060		evsel->handler = handler;
3061}
3062
3063int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3064{
3065	const char *trace_usage[] = {
3066		"perf trace [<options>] [<command>]",
3067		"perf trace [<options>] -- <command> [<options>]",
3068		"perf trace record [<options>] [<command>]",
3069		"perf trace record [<options>] -- <command> [<options>]",
3070		NULL
3071	};
3072	struct trace trace = {
3073		.audit = {
3074			.machine = audit_detect_machine(),
3075			.open_id = audit_name_to_syscall("open", trace.audit.machine),
3076		},
3077		.syscalls = {
3078			. max = -1,
3079		},
3080		.opts = {
3081			.target = {
3082				.uid	   = UINT_MAX,
3083				.uses_mmap = true,
3084			},
3085			.user_freq     = UINT_MAX,
3086			.user_interval = ULLONG_MAX,
3087			.no_buffering  = true,
3088			.mmap_pages    = UINT_MAX,
3089			.proc_map_timeout  = 500,
3090		},
3091		.output = stderr,
3092		.show_comm = true,
3093		.trace_syscalls = true,
3094	};
3095	const char *output_name = NULL;
3096	const char *ev_qualifier_str = NULL;
3097	const struct option trace_options[] = {
3098	OPT_CALLBACK(0, "event", &trace.evlist, "event",
3099		     "event selector. use 'perf list' to list available events",
3100		     parse_events_option),
3101	OPT_BOOLEAN(0, "comm", &trace.show_comm,
3102		    "show the thread COMM next to its id"),
3103	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3104	OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
 
3105	OPT_STRING('o', "output", &output_name, "file", "output file name"),
3106	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3107	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3108		    "trace events on existing process id"),
3109	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3110		    "trace events on existing thread id"),
3111	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3112		     "pids to filter (by the kernel)", trace__set_filter_pids),
3113	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3114		    "system-wide collection from all CPUs"),
3115	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3116		    "list of cpus to monitor"),
3117	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3118		    "child tasks do not inherit counters"),
3119	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3120		     "number of mmap data pages",
3121		     perf_evlist__parse_mmap_pages),
3122	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3123		   "user to profile"),
3124	OPT_CALLBACK(0, "duration", &trace, "float",
3125		     "show only events with duration > N.M ms",
3126		     trace__set_duration),
3127	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3128	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3129	OPT_BOOLEAN('T', "time", &trace.full_time,
3130		    "Show full timestamp, not time relative to first start"),
3131	OPT_BOOLEAN('s', "summary", &trace.summary_only,
3132		    "Show only syscall summary with statistics"),
3133	OPT_BOOLEAN('S', "with-summary", &trace.summary,
3134		    "Show all syscalls and summary with statistics"),
3135	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3136		     "Trace pagefaults", parse_pagefaults, "maj"),
3137	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3138	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3139	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3140			"per thread proc mmap processing timeout in ms"),
3141	OPT_END()
3142	};
3143	const char * const trace_subcommands[] = { "record", NULL };
3144	int err;
3145	char bf[BUFSIZ];
3146
3147	signal(SIGSEGV, sighandler_dump_stack);
3148	signal(SIGFPE, sighandler_dump_stack);
3149
3150	trace.evlist = perf_evlist__new();
3151
3152	if (trace.evlist == NULL) {
3153		pr_err("Not enough memory to run!\n");
3154		err = -ENOMEM;
3155		goto out;
3156	}
3157
3158	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3159				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3160
3161	if (trace.trace_pgfaults) {
3162		trace.opts.sample_address = true;
3163		trace.opts.sample_time = true;
3164	}
3165
3166	if (trace.evlist->nr_entries > 0)
3167		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3168
3169	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3170		return trace__record(&trace, argc-1, &argv[1]);
3171
3172	/* summary_only implies summary option, but don't overwrite summary if set */
3173	if (trace.summary_only)
3174		trace.summary = trace.summary_only;
3175
3176	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3177	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
3178		pr_err("Please specify something to trace.\n");
3179		return -1;
3180	}
3181
3182	if (output_name != NULL) {
3183		err = trace__open_output(&trace, output_name);
3184		if (err < 0) {
3185			perror("failed to create output file");
3186			goto out;
3187		}
3188	}
3189
3190	if (ev_qualifier_str != NULL) {
3191		const char *s = ev_qualifier_str;
3192		struct strlist_config slist_config = {
3193			.dirname = system_path(STRACE_GROUPS_DIR),
3194		};
3195
3196		trace.not_ev_qualifier = *s == '!';
3197		if (trace.not_ev_qualifier)
3198			++s;
3199		trace.ev_qualifier = strlist__new(s, &slist_config);
3200		if (trace.ev_qualifier == NULL) {
3201			fputs("Not enough memory to parse event qualifier",
3202			      trace.output);
3203			err = -ENOMEM;
3204			goto out_close;
3205		}
3206
3207		err = trace__validate_ev_qualifier(&trace);
3208		if (err)
3209			goto out_close;
3210	}
3211
3212	err = target__validate(&trace.opts.target);
3213	if (err) {
3214		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3215		fprintf(trace.output, "%s", bf);
3216		goto out_close;
3217	}
3218
3219	err = target__parse_uid(&trace.opts.target);
3220	if (err) {
3221		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3222		fprintf(trace.output, "%s", bf);
3223		goto out_close;
3224	}
3225
3226	if (!argc && target__none(&trace.opts.target))
3227		trace.opts.target.system_wide = true;
3228
3229	if (input_name)
3230		err = trace__replay(&trace);
3231	else
3232		err = trace__run(&trace, argc, argv);
3233
3234out_close:
3235	if (output_name != NULL)
3236		fclose(trace.output);
3237out:
3238	return err;
3239}