Linux Audio

Check our new training course

Loading...
v3.15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   1#include <traceevent/event-parse.h>
 
 
 
 
   2#include "builtin.h"
 
   3#include "util/color.h"
 
   4#include "util/debug.h"
 
 
 
 
 
 
   5#include "util/evlist.h"
 
 
 
 
   6#include "util/machine.h"
 
 
 
   7#include "util/session.h"
   8#include "util/thread.h"
   9#include "util/parse-options.h"
  10#include "util/strlist.h"
  11#include "util/intlist.h"
  12#include "util/thread_map.h"
  13#include "util/stat.h"
 
 
 
  14#include "trace-event.h"
  15#include "util/parse-events.h"
  16
  17#include <libaudit.h>
 
 
 
 
 
 
 
 
 
 
  18#include <stdlib.h>
  19#include <sys/eventfd.h>
  20#include <sys/mman.h>
  21#include <linux/futex.h>
  22
  23/* For older distros: */
  24#ifndef MAP_STACK
  25# define MAP_STACK		0x20000
  26#endif
 
 
  27
  28#ifndef MADV_HWPOISON
  29# define MADV_HWPOISON		100
  30#endif
  31
  32#ifndef MADV_MERGEABLE
  33# define MADV_MERGEABLE		12
  34#endif
  35
  36#ifndef MADV_UNMERGEABLE
  37# define MADV_UNMERGEABLE	13
  38#endif
  39
  40#ifndef EFD_SEMAPHORE
  41# define EFD_SEMAPHORE		1
  42#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  43
  44struct tp_field {
  45	int offset;
  46	union {
  47		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
  48		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
  49	};
  50};
  51
  52#define TP_UINT_FIELD(bits) \
  53static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
  54{ \
  55	return *(u##bits *)(sample->raw_data + field->offset); \
 
 
  56}
  57
  58TP_UINT_FIELD(8);
  59TP_UINT_FIELD(16);
  60TP_UINT_FIELD(32);
  61TP_UINT_FIELD(64);
  62
  63#define TP_UINT_FIELD__SWAPPED(bits) \
  64static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
  65{ \
  66	u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
 
  67	return bswap_##bits(value);\
  68}
  69
  70TP_UINT_FIELD__SWAPPED(16);
  71TP_UINT_FIELD__SWAPPED(32);
  72TP_UINT_FIELD__SWAPPED(64);
  73
  74static int tp_field__init_uint(struct tp_field *field,
  75			       struct format_field *format_field,
  76			       bool needs_swap)
  77{
  78	field->offset = format_field->offset;
  79
  80	switch (format_field->size) {
  81	case 1:
  82		field->integer = tp_field__u8;
  83		break;
  84	case 2:
  85		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
  86		break;
  87	case 4:
  88		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
  89		break;
  90	case 8:
  91		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
  92		break;
  93	default:
  94		return -1;
  95	}
  96
  97	return 0;
  98}
  99
 
 
 
 
 
 100static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
 101{
 102	return sample->raw_data + field->offset;
 103}
 104
 105static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
 106{
 107	field->offset = format_field->offset;
 108	field->pointer = tp_field__ptr;
 109	return 0;
 110}
 111
 
 
 
 
 
 112struct syscall_tp {
 113	struct tp_field id;
 114	union {
 115		struct tp_field args, ret;
 116	};
 117};
 118
 119static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
 120					  struct tp_field *field,
 121					  const char *name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 122{
 123	struct format_field *format_field = perf_evsel__field(evsel, name);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 124
 125	if (format_field == NULL)
 126		return -1;
 127
 128	return tp_field__init_uint(field, format_field, evsel->needs_swap);
 129}
 130
 131#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
 132	({ struct syscall_tp *sc = evsel->priv;\
 133	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
 134
 135static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
 136					 struct tp_field *field,
 137					 const char *name)
 138{
 139	struct format_field *format_field = perf_evsel__field(evsel, name);
 140
 141	if (format_field == NULL)
 142		return -1;
 143
 144	return tp_field__init_ptr(field, format_field);
 145}
 146
 147#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
 148	({ struct syscall_tp *sc = evsel->priv;\
 149	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
 150
 151static void perf_evsel__delete_priv(struct perf_evsel *evsel)
 152{
 153	zfree(&evsel->priv);
 154	perf_evsel__delete(evsel);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 155}
 156
 157static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
 158{
 159	evsel->priv = malloc(sizeof(struct syscall_tp));
 160	if (evsel->priv != NULL) {
 161		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
 162			goto out_delete;
 163
 164		evsel->handler = handler;
 165		return 0;
 166	}
 167
 168	return -ENOMEM;
 169
 170out_delete:
 171	zfree(&evsel->priv);
 172	return -ENOENT;
 173}
 174
 175static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
 176{
 177	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
 178
 179	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
 180	if (evsel == NULL)
 181		evsel = perf_evsel__newtp("syscalls", direction);
 182
 183	if (evsel) {
 184		if (perf_evsel__init_syscall_tp(evsel, handler))
 185			goto out_delete;
 186	}
 
 187
 188	return evsel;
 189
 190out_delete:
 191	perf_evsel__delete_priv(evsel);
 192	return NULL;
 193}
 194
 195#define perf_evsel__sc_tp_uint(evsel, name, sample) \
 196	({ struct syscall_tp *fields = evsel->priv; \
 197	   fields->name.integer(&fields->name, sample); })
 198
 199#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
 200	({ struct syscall_tp *fields = evsel->priv; \
 201	   fields->name.pointer(&fields->name, sample); })
 202
 203static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
 204					  void *sys_enter_handler,
 205					  void *sys_exit_handler)
 206{
 207	int ret = -1;
 208	struct perf_evsel *sys_enter, *sys_exit;
 209
 210	sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
 211	if (sys_enter == NULL)
 212		goto out;
 213
 214	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
 215		goto out_delete_sys_enter;
 216
 217	sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
 218	if (sys_exit == NULL)
 219		goto out_delete_sys_enter;
 220
 221	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
 222		goto out_delete_sys_exit;
 223
 224	perf_evlist__add(evlist, sys_enter);
 225	perf_evlist__add(evlist, sys_exit);
 226
 227	ret = 0;
 228out:
 229	return ret;
 
 
 
 230
 231out_delete_sys_exit:
 232	perf_evsel__delete_priv(sys_exit);
 233out_delete_sys_enter:
 234	perf_evsel__delete_priv(sys_enter);
 235	goto out;
 236}
 237
 
 
 
 238
 239struct syscall_arg {
 240	unsigned long val;
 241	struct thread *thread;
 242	struct trace  *trace;
 243	void	      *parm;
 244	u8	      idx;
 245	u8	      mask;
 246};
 247
 248struct strarray {
 249	int	    offset;
 250	int	    nr_entries;
 251	const char **entries;
 252};
 253
 254#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
 255	.nr_entries = ARRAY_SIZE(array), \
 256	.entries = array, \
 257}
 258
 259#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
 260	.offset	    = off, \
 261	.nr_entries = ARRAY_SIZE(array), \
 262	.entries = array, \
 263}
 264
 265static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
 266						const char *intfmt,
 267					        struct syscall_arg *arg)
 268{
 269	struct strarray *sa = arg->parm;
 270	int idx = arg->val - sa->offset;
 271
 272	if (idx < 0 || idx >= sa->nr_entries)
 273		return scnprintf(bf, size, intfmt, arg->val);
 274
 275	return scnprintf(bf, size, "%s", sa->entries[idx]);
 276}
 277
 278static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
 279					      struct syscall_arg *arg)
 280{
 281	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
 282}
 283
 284#define SCA_STRARRAY syscall_arg__scnprintf_strarray
 285
 286#if defined(__i386__) || defined(__x86_64__)
 287/*
 288 * FIXME: Make this available to all arches as soon as the ioctl beautifier
 289 * 	  gets rewritten to support all arches.
 290 */
 291static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
 292						 struct syscall_arg *arg)
 293{
 294	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
 295}
 296
 297#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
 298#endif /* defined(__i386__) || defined(__x86_64__) */
 
 
 299
 300static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
 301					struct syscall_arg *arg);
 
 
 302
 303#define SCA_FD syscall_arg__scnprintf_fd
 
 
 
 304
 305static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
 306					   struct syscall_arg *arg)
 307{
 308	int fd = arg->val;
 
 309
 310	if (fd == AT_FDCWD)
 311		return scnprintf(bf, size, "CWD");
 
 
 
 
 
 
 
 
 312
 313	return syscall_arg__scnprintf_fd(bf, size, arg);
 
 
 
 314}
 315
 316#define SCA_FDAT syscall_arg__scnprintf_fd_at
 
 
 317
 318static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
 319					      struct syscall_arg *arg);
 
 
 
 
 320
 321#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
 
 322
 323static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
 324					 struct syscall_arg *arg)
 325{
 326	return scnprintf(bf, size, "%#lx", arg->val);
 327}
 328
 329#define SCA_HEX syscall_arg__scnprintf_hex
 330
 331static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 332					       struct syscall_arg *arg)
 333{
 334	int printed = 0, prot = arg->val;
 335
 336	if (prot == PROT_NONE)
 337		return scnprintf(bf, size, "NONE");
 338#define	P_MMAP_PROT(n) \
 339	if (prot & PROT_##n) { \
 340		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 341		prot &= ~PROT_##n; \
 342	}
 343
 344	P_MMAP_PROT(EXEC);
 345	P_MMAP_PROT(READ);
 346	P_MMAP_PROT(WRITE);
 347#ifdef PROT_SEM
 348	P_MMAP_PROT(SEM);
 349#endif
 350	P_MMAP_PROT(GROWSDOWN);
 351	P_MMAP_PROT(GROWSUP);
 352#undef P_MMAP_PROT
 353
 354	if (prot)
 355		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
 
 356
 357	return printed;
 358}
 359
 360#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 
 361
 362static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 363						struct syscall_arg *arg)
 364{
 365	int printed = 0, flags = arg->val;
 
 366
 367#define	P_MMAP_FLAG(n) \
 368	if (flags & MAP_##n) { \
 369		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 370		flags &= ~MAP_##n; \
 371	}
 372
 373	P_MMAP_FLAG(SHARED);
 374	P_MMAP_FLAG(PRIVATE);
 375#ifdef MAP_32BIT
 376	P_MMAP_FLAG(32BIT);
 377#endif
 378	P_MMAP_FLAG(ANONYMOUS);
 379	P_MMAP_FLAG(DENYWRITE);
 380	P_MMAP_FLAG(EXECUTABLE);
 381	P_MMAP_FLAG(FILE);
 382	P_MMAP_FLAG(FIXED);
 383	P_MMAP_FLAG(GROWSDOWN);
 384#ifdef MAP_HUGETLB
 385	P_MMAP_FLAG(HUGETLB);
 386#endif
 387	P_MMAP_FLAG(LOCKED);
 388	P_MMAP_FLAG(NONBLOCK);
 389	P_MMAP_FLAG(NORESERVE);
 390	P_MMAP_FLAG(POPULATE);
 391	P_MMAP_FLAG(STACK);
 392#ifdef MAP_UNINITIALIZED
 393	P_MMAP_FLAG(UNINITIALIZED);
 394#endif
 395#undef P_MMAP_FLAG
 396
 397	if (flags)
 398		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 
 
 399
 400	return printed;
 401}
 402
 403#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 404
 405static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 406						      struct syscall_arg *arg)
 407{
 408	int behavior = arg->val;
 409
 410	switch (behavior) {
 411#define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
 412	P_MADV_BHV(NORMAL);
 413	P_MADV_BHV(RANDOM);
 414	P_MADV_BHV(SEQUENTIAL);
 415	P_MADV_BHV(WILLNEED);
 416	P_MADV_BHV(DONTNEED);
 417	P_MADV_BHV(REMOVE);
 418	P_MADV_BHV(DONTFORK);
 419	P_MADV_BHV(DOFORK);
 420	P_MADV_BHV(HWPOISON);
 421#ifdef MADV_SOFT_OFFLINE
 422	P_MADV_BHV(SOFT_OFFLINE);
 423#endif
 424	P_MADV_BHV(MERGEABLE);
 425	P_MADV_BHV(UNMERGEABLE);
 426#ifdef MADV_HUGEPAGE
 427	P_MADV_BHV(HUGEPAGE);
 428#endif
 429#ifdef MADV_NOHUGEPAGE
 430	P_MADV_BHV(NOHUGEPAGE);
 431#endif
 432#ifdef MADV_DONTDUMP
 433	P_MADV_BHV(DONTDUMP);
 434#endif
 435#ifdef MADV_DODUMP
 436	P_MADV_BHV(DODUMP);
 437#endif
 438#undef P_MADV_PHV
 439	default: break;
 440	}
 441
 442	return scnprintf(bf, size, "%#x", behavior);
 
 
 
 
 
 
 443}
 444
 445#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
 
 
 446
 447static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
 448					   struct syscall_arg *arg)
 449{
 450	int printed = 0, op = arg->val;
 451
 452	if (op == 0)
 453		return scnprintf(bf, size, "NONE");
 454#define	P_CMD(cmd) \
 455	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
 456		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
 457		op &= ~LOCK_##cmd; \
 458	}
 459
 460	P_CMD(SH);
 461	P_CMD(EX);
 462	P_CMD(NB);
 463	P_CMD(UN);
 464	P_CMD(MAND);
 465	P_CMD(RW);
 466	P_CMD(READ);
 467	P_CMD(WRITE);
 468#undef P_OP
 469
 470	if (op)
 471		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
 472
 473	return printed;
 474}
 475
 476#define SCA_FLOCK syscall_arg__scnprintf_flock
 
 
 
 
 
 477
 478static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
 479{
 480	enum syscall_futex_args {
 481		SCF_UADDR   = (1 << 0),
 482		SCF_OP	    = (1 << 1),
 483		SCF_VAL	    = (1 << 2),
 484		SCF_TIMEOUT = (1 << 3),
 485		SCF_UADDR2  = (1 << 4),
 486		SCF_VAL3    = (1 << 5),
 487	};
 488	int op = arg->val;
 489	int cmd = op & FUTEX_CMD_MASK;
 490	size_t printed = 0;
 491
 492	switch (cmd) {
 493#define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
 494	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
 495	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 496	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 497	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
 498	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
 499	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
 500	P_FUTEX_OP(WAKE_OP);							  break;
 501	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 502	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 503	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
 504	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
 505	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
 506	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
 507	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
 508	}
 509
 510	if (op & FUTEX_PRIVATE_FLAG)
 511		printed += scnprintf(bf + printed, size - printed, "|PRIV");
 
 
 512
 513	if (op & FUTEX_CLOCK_REALTIME)
 514		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
 
 
 515
 516	return printed;
 
 
 
 
 
 517}
 518
 519#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 520
 521static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
 522static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
 523
 524static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
 525static DEFINE_STRARRAY(itimers);
 
 
 
 
 
 
 
 
 
 526
 527static const char *whences[] = { "SET", "CUR", "END",
 528#ifdef SEEK_DATA
 529"DATA",
 530#endif
 531#ifdef SEEK_HOLE
 532"HOLE",
 533#endif
 534};
 535static DEFINE_STRARRAY(whences);
 536
 537static const char *fcntl_cmds[] = {
 538	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
 539	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
 540	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
 541	"F_GETOWNER_UIDS",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 542};
 543static DEFINE_STRARRAY(fcntl_cmds);
 
 544
 545static const char *rlimit_resources[] = {
 546	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
 547	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
 548	"RTTIME",
 549};
 550static DEFINE_STRARRAY(rlimit_resources);
 551
 552static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
 553static DEFINE_STRARRAY(sighow);
 554
 555static const char *clockid[] = {
 556	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
 557	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
 
 558};
 559static DEFINE_STRARRAY(clockid);
 560
 561static const char *socket_families[] = {
 562	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
 563	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
 564	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
 565	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
 566	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
 567	"ALG", "NFC", "VSOCK",
 568};
 569static DEFINE_STRARRAY(socket_families);
 570
 571#ifndef SOCK_TYPE_MASK
 572#define SOCK_TYPE_MASK 0xf
 573#endif
 574
 575static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
 576						      struct syscall_arg *arg)
 577{
 578	size_t printed;
 579	int type = arg->val,
 580	    flags = type & ~SOCK_TYPE_MASK;
 581
 582	type &= SOCK_TYPE_MASK;
 583	/*
 584 	 * Can't use a strarray, MIPS may override for ABI reasons.
 585 	 */
 586	switch (type) {
 587#define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
 588	P_SK_TYPE(STREAM);
 589	P_SK_TYPE(DGRAM);
 590	P_SK_TYPE(RAW);
 591	P_SK_TYPE(RDM);
 592	P_SK_TYPE(SEQPACKET);
 593	P_SK_TYPE(DCCP);
 594	P_SK_TYPE(PACKET);
 595#undef P_SK_TYPE
 596	default:
 597		printed = scnprintf(bf, size, "%#x", type);
 598	}
 599
 600#define	P_SK_FLAG(n) \
 601	if (flags & SOCK_##n) { \
 602		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
 603		flags &= ~SOCK_##n; \
 604	}
 605
 606	P_SK_FLAG(CLOEXEC);
 607	P_SK_FLAG(NONBLOCK);
 608#undef P_SK_FLAG
 609
 610	if (flags)
 611		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
 612
 613	return printed;
 614}
 615
 616#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
 617
 618#ifndef MSG_PROBE
 619#define MSG_PROBE	     0x10
 620#endif
 621#ifndef MSG_WAITFORONE
 622#define MSG_WAITFORONE	0x10000
 623#endif
 624#ifndef MSG_SENDPAGE_NOTLAST
 625#define MSG_SENDPAGE_NOTLAST 0x20000
 626#endif
 627#ifndef MSG_FASTOPEN
 628#define MSG_FASTOPEN	     0x20000000
 629#endif
 630
 631static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
 632					       struct syscall_arg *arg)
 633{
 634	int printed = 0, flags = arg->val;
 635
 636	if (flags == 0)
 637		return scnprintf(bf, size, "NONE");
 638#define	P_MSG_FLAG(n) \
 639	if (flags & MSG_##n) { \
 640		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 641		flags &= ~MSG_##n; \
 642	}
 643
 644	P_MSG_FLAG(OOB);
 645	P_MSG_FLAG(PEEK);
 646	P_MSG_FLAG(DONTROUTE);
 647	P_MSG_FLAG(TRYHARD);
 648	P_MSG_FLAG(CTRUNC);
 649	P_MSG_FLAG(PROBE);
 650	P_MSG_FLAG(TRUNC);
 651	P_MSG_FLAG(DONTWAIT);
 652	P_MSG_FLAG(EOR);
 653	P_MSG_FLAG(WAITALL);
 654	P_MSG_FLAG(FIN);
 655	P_MSG_FLAG(SYN);
 656	P_MSG_FLAG(CONFIRM);
 657	P_MSG_FLAG(RST);
 658	P_MSG_FLAG(ERRQUEUE);
 659	P_MSG_FLAG(NOSIGNAL);
 660	P_MSG_FLAG(MORE);
 661	P_MSG_FLAG(WAITFORONE);
 662	P_MSG_FLAG(SENDPAGE_NOTLAST);
 663	P_MSG_FLAG(FASTOPEN);
 664	P_MSG_FLAG(CMSG_CLOEXEC);
 665#undef P_MSG_FLAG
 666
 667	if (flags)
 668		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 669
 670	return printed;
 671}
 672
 673#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
 674
 675static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 676						 struct syscall_arg *arg)
 677{
 
 
 678	size_t printed = 0;
 679	int mode = arg->val;
 680
 681	if (mode == F_OK) /* 0 */
 682		return scnprintf(bf, size, "F");
 683#define	P_MODE(n) \
 684	if (mode & n##_OK) { \
 685		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
 686		mode &= ~n##_OK; \
 687	}
 688
 689	P_MODE(R);
 690	P_MODE(W);
 691	P_MODE(X);
 692#undef P_MODE
 693
 694	if (mode)
 695		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
 696
 697	return printed;
 698}
 699
 700#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
 701
 702static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
 703					       struct syscall_arg *arg)
 
 
 
 
 
 704{
 
 
 705	int printed = 0, flags = arg->val;
 706
 707	if (!(flags & O_CREAT))
 708		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
 709
 710	if (flags == 0)
 711		return scnprintf(bf, size, "RDONLY");
 712#define	P_FLAG(n) \
 713	if (flags & O_##n) { \
 714		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 715		flags &= ~O_##n; \
 716	}
 717
 718	P_FLAG(APPEND);
 719	P_FLAG(ASYNC);
 720	P_FLAG(CLOEXEC);
 721	P_FLAG(CREAT);
 722	P_FLAG(DIRECT);
 723	P_FLAG(DIRECTORY);
 724	P_FLAG(EXCL);
 725	P_FLAG(LARGEFILE);
 726	P_FLAG(NOATIME);
 727	P_FLAG(NOCTTY);
 728#ifdef O_NONBLOCK
 729	P_FLAG(NONBLOCK);
 730#elif O_NDELAY
 731	P_FLAG(NDELAY);
 732#endif
 733#ifdef O_PATH
 734	P_FLAG(PATH);
 735#endif
 736	P_FLAG(RDWR);
 737#ifdef O_DSYNC
 738	if ((flags & O_SYNC) == O_SYNC)
 739		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
 740	else {
 741		P_FLAG(DSYNC);
 742	}
 743#else
 744	P_FLAG(SYNC);
 745#endif
 746	P_FLAG(TRUNC);
 747	P_FLAG(WRONLY);
 748#undef P_FLAG
 749
 750	if (flags)
 751		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 752
 753	return printed;
 754}
 755
 756#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
 757
 758static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
 759						   struct syscall_arg *arg)
 760{
 761	int printed = 0, flags = arg->val;
 762
 763	if (flags == 0)
 764		return scnprintf(bf, size, "NONE");
 765#define	P_FLAG(n) \
 766	if (flags & EFD_##n) { \
 767		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 768		flags &= ~EFD_##n; \
 769	}
 770
 771	P_FLAG(SEMAPHORE);
 772	P_FLAG(CLOEXEC);
 773	P_FLAG(NONBLOCK);
 774#undef P_FLAG
 775
 776	if (flags)
 777		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 778
 779	return printed;
 780}
 781
 782#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
 
 
 
 
 
 783
 784static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 785						struct syscall_arg *arg)
 786{
 
 
 787	int printed = 0, flags = arg->val;
 788
 789#define	P_FLAG(n) \
 790	if (flags & O_##n) { \
 791		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 792		flags &= ~O_##n; \
 793	}
 794
 795	P_FLAG(CLOEXEC);
 796	P_FLAG(NONBLOCK);
 797#undef P_FLAG
 798
 799	if (flags)
 800		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 801
 802	return printed;
 803}
 804
 805#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 806
 807static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
 808{
 809	int sig = arg->val;
 810
 811	switch (sig) {
 812#define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
 813	P_SIGNUM(HUP);
 814	P_SIGNUM(INT);
 815	P_SIGNUM(QUIT);
 816	P_SIGNUM(ILL);
 817	P_SIGNUM(TRAP);
 818	P_SIGNUM(ABRT);
 819	P_SIGNUM(BUS);
 820	P_SIGNUM(FPE);
 821	P_SIGNUM(KILL);
 822	P_SIGNUM(USR1);
 823	P_SIGNUM(SEGV);
 824	P_SIGNUM(USR2);
 825	P_SIGNUM(PIPE);
 826	P_SIGNUM(ALRM);
 827	P_SIGNUM(TERM);
 828	P_SIGNUM(CHLD);
 829	P_SIGNUM(CONT);
 830	P_SIGNUM(STOP);
 831	P_SIGNUM(TSTP);
 832	P_SIGNUM(TTIN);
 833	P_SIGNUM(TTOU);
 834	P_SIGNUM(URG);
 835	P_SIGNUM(XCPU);
 836	P_SIGNUM(XFSZ);
 837	P_SIGNUM(VTALRM);
 838	P_SIGNUM(PROF);
 839	P_SIGNUM(WINCH);
 840	P_SIGNUM(IO);
 841	P_SIGNUM(PWR);
 842	P_SIGNUM(SYS);
 843#ifdef SIGEMT
 844	P_SIGNUM(EMT);
 845#endif
 846#ifdef SIGSTKFLT
 847	P_SIGNUM(STKFLT);
 848#endif
 849#ifdef SIGSWI
 850	P_SIGNUM(SWI);
 851#endif
 852	default: break;
 853	}
 854
 855	return scnprintf(bf, size, "%#x", sig);
 856}
 857
 858#define SCA_SIGNUM syscall_arg__scnprintf_signum
 859
 860#if defined(__i386__) || defined(__x86_64__)
 861/*
 862 * FIXME: Make this available to all arches.
 863 */
 864#define TCGETS		0x5401
 865
 866static const char *tioctls[] = {
 867	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
 868	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
 869	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
 870	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
 871	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
 872	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
 873	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
 874	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
 875	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
 876	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
 877	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
 878	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
 879	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
 880	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
 881	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
 882};
 883
 884static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
 885#endif /* defined(__i386__) || defined(__x86_64__) */
 886
 887#define STRARRAY(arg, name, array) \
 888	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
 889	  .arg_parm	 = { [arg] = &strarray__##array, }
 890
 891static struct syscall_fmt {
 892	const char *name;
 893	const char *alias;
 894	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
 895	void	   *arg_parm[6];
 896	bool	   errmsg;
 897	bool	   timeout;
 898	bool	   hexret;
 899} syscall_fmts[] = {
 900	{ .name	    = "access",	    .errmsg = true,
 901	  .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
 902	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 903	{ .name	    = "brk",	    .hexret = true,
 904	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
 905	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
 906	{ .name	    = "close",	    .errmsg = true,
 907	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
 908	{ .name	    = "connect",    .errmsg = true, },
 909	{ .name	    = "dup",	    .errmsg = true,
 910	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 911	{ .name	    = "dup2",	    .errmsg = true,
 912	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 913	{ .name	    = "dup3",	    .errmsg = true,
 914	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 915	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
 916	{ .name	    = "eventfd2",   .errmsg = true,
 917	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
 918	{ .name	    = "faccessat",  .errmsg = true,
 919	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
 920	{ .name	    = "fadvise64",  .errmsg = true,
 921	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 922	{ .name	    = "fallocate",  .errmsg = true,
 923	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 924	{ .name	    = "fchdir",	    .errmsg = true,
 925	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 926	{ .name	    = "fchmod",	    .errmsg = true,
 927	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 928	{ .name	    = "fchmodat",   .errmsg = true,
 929	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
 930	{ .name	    = "fchown",	    .errmsg = true,
 931	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 932	{ .name	    = "fchownat",   .errmsg = true,
 933	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
 934	{ .name	    = "fcntl",	    .errmsg = true,
 935	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
 936			     [1] = SCA_STRARRAY, /* cmd */ },
 937	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
 938	{ .name	    = "fdatasync",  .errmsg = true,
 939	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 940	{ .name	    = "flock",	    .errmsg = true,
 941	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
 942			     [1] = SCA_FLOCK, /* cmd */ }, },
 943	{ .name	    = "fsetxattr",  .errmsg = true,
 944	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 945	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
 946	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 947	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
 948	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
 949	{ .name	    = "fstatfs",    .errmsg = true,
 950	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 951	{ .name	    = "fsync",    .errmsg = true,
 952	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 953	{ .name	    = "ftruncate", .errmsg = true,
 954	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 955	{ .name	    = "futex",	    .errmsg = true,
 956	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
 957	{ .name	    = "futimesat", .errmsg = true,
 958	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
 959	{ .name	    = "getdents",   .errmsg = true,
 960	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 961	{ .name	    = "getdents64", .errmsg = true,
 962	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 963	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
 964	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
 965	{ .name	    = "ioctl",	    .errmsg = true,
 966	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
 967#if defined(__i386__) || defined(__x86_64__)
 968/*
 969 * FIXME: Make this available to all arches.
 970 */
 971			     [1] = SCA_STRHEXARRAY, /* cmd */
 972			     [2] = SCA_HEX, /* arg */ },
 973	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
 974#else
 975			     [2] = SCA_HEX, /* arg */ }, },
 976#endif
 977	{ .name	    = "kill",	    .errmsg = true,
 978	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
 979	{ .name	    = "linkat",	    .errmsg = true,
 980	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
 981	{ .name	    = "lseek",	    .errmsg = true,
 982	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
 983			     [2] = SCA_STRARRAY, /* whence */ },
 984	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
 985	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
 986	{ .name     = "madvise",    .errmsg = true,
 987	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
 988			     [2] = SCA_MADV_BHV, /* behavior */ }, },
 989	{ .name	    = "mkdirat",    .errmsg = true,
 990	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
 991	{ .name	    = "mknodat",    .errmsg = true,
 992	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
 993	{ .name	    = "mlock",	    .errmsg = true,
 994	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 995	{ .name	    = "mlockall",   .errmsg = true,
 996	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 
 
 997	{ .name	    = "mmap",	    .hexret = true,
 998	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
 999			     [2] = SCA_MMAP_PROT, /* prot */
1000			     [3] = SCA_MMAP_FLAGS, /* flags */
1001			     [4] = SCA_FD, 	  /* fd */ }, },
1002	{ .name	    = "mprotect",   .errmsg = true,
1003	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
1004			     [2] = SCA_MMAP_PROT, /* prot */ }, },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1005	{ .name	    = "mremap",	    .hexret = true,
1006	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1007			     [4] = SCA_HEX, /* new_addr */ }, },
1008	{ .name	    = "munlock",    .errmsg = true,
1009	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1010	{ .name	    = "munmap",	    .errmsg = true,
1011	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1012	{ .name	    = "name_to_handle_at", .errmsg = true,
1013	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1014	{ .name	    = "newfstatat", .errmsg = true,
1015	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1016	{ .name	    = "open",	    .errmsg = true,
1017	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1018	{ .name	    = "open_by_handle_at", .errmsg = true,
1019	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1020			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1021	{ .name	    = "openat",	    .errmsg = true,
1022	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1023			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1024	{ .name	    = "pipe2",	    .errmsg = true,
1025	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1026	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
1027	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
1028	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
1029	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1030	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
1031	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1032	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1033	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
1034	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1035	{ .name	    = "pwritev",    .errmsg = true,
1036	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1037	{ .name	    = "read",	    .errmsg = true,
1038	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1039	{ .name	    = "readlinkat", .errmsg = true,
1040	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1041	{ .name	    = "readv",	    .errmsg = true,
1042	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1043	{ .name	    = "recvfrom",   .errmsg = true,
1044	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1045	{ .name	    = "recvmmsg",   .errmsg = true,
1046	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1047	{ .name	    = "recvmsg",    .errmsg = true,
1048	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1049	{ .name	    = "renameat",   .errmsg = true,
1050	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1051	{ .name	    = "rt_sigaction", .errmsg = true,
1052	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1053	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1054	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1055	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1056	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1057	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1058	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1059	{ .name	    = "sendmmsg",    .errmsg = true,
1060	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1061	{ .name	    = "sendmsg",    .errmsg = true,
1062	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1063	{ .name	    = "sendto",	    .errmsg = true,
1064	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1065	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1066	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1067	{ .name	    = "shutdown",   .errmsg = true,
1068	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1069	{ .name	    = "socket",	    .errmsg = true,
1070	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1071			     [1] = SCA_SK_TYPE, /* type */ },
1072	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1073	{ .name	    = "socketpair", .errmsg = true,
1074	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1075			     [1] = SCA_SK_TYPE, /* type */ },
1076	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1077	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
1078	{ .name	    = "symlinkat",  .errmsg = true,
1079	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1080	{ .name	    = "tgkill",	    .errmsg = true,
1081	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1082	{ .name	    = "tkill",	    .errmsg = true,
1083	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1084	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1085	{ .name	    = "unlinkat",   .errmsg = true,
1086	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1087	{ .name	    = "utimensat",  .errmsg = true,
1088	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1089	{ .name	    = "write",	    .errmsg = true,
1090	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1091	{ .name	    = "writev",	    .errmsg = true,
1092	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1093};
1094
1095static int syscall_fmt__cmp(const void *name, const void *fmtp)
1096{
1097	const struct syscall_fmt *fmt = fmtp;
1098	return strcmp(name, fmt->name);
1099}
1100
 
 
 
 
 
1101static struct syscall_fmt *syscall_fmt__find(const char *name)
1102{
1103	const int nmemb = ARRAY_SIZE(syscall_fmts);
1104	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1105}
1106
 
 
 
 
 
 
1107struct syscall {
1108	struct event_format *tp_format;
 
 
 
 
 
 
 
 
 
 
1109	const char	    *name;
1110	bool		    filtered;
1111	struct syscall_fmt  *fmt;
1112	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1113	void		    **arg_parm;
 
 
 
 
 
 
 
 
 
1114};
1115
1116static size_t fprintf_duration(unsigned long t, FILE *fp)
 
 
 
 
 
 
 
1117{
1118	double duration = (double)t / NSEC_PER_MSEC;
1119	size_t printed = fprintf(fp, "(");
1120
1121	if (duration >= 1.0)
 
 
1122		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1123	else if (duration >= 0.01)
1124		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1125	else
1126		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1127	return printed + fprintf(fp, "): ");
1128}
1129
 
 
 
 
 
 
 
1130struct thread_trace {
1131	u64		  entry_time;
1132	u64		  exit_time;
1133	bool		  entry_pending;
1134	unsigned long	  nr_events;
 
1135	char		  *entry_str;
1136	double		  runtime_ms;
 
 
 
 
 
 
 
 
1137	struct {
1138		int	  max;
1139		char	  **table;
1140	} paths;
1141
1142	struct intlist *syscall_stats;
1143};
1144
1145static struct thread_trace *thread_trace__new(void)
1146{
1147	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1148
1149	if (ttrace)
1150		ttrace->paths.max = -1;
1151
1152	ttrace->syscall_stats = intlist__new(NULL);
1153
1154	return ttrace;
1155}
1156
1157static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1158{
1159	struct thread_trace *ttrace;
1160
1161	if (thread == NULL)
1162		goto fail;
1163
1164	if (thread->priv == NULL)
1165		thread->priv = thread_trace__new();
1166		
1167	if (thread->priv == NULL)
1168		goto fail;
1169
1170	ttrace = thread->priv;
1171	++ttrace->nr_events;
1172
1173	return ttrace;
1174fail:
1175	color_fprintf(fp, PERF_COLOR_RED,
1176		      "WARNING: not enough memory, dropping samples!\n");
1177	return NULL;
1178}
1179
1180struct trace {
1181	struct perf_tool	tool;
1182	struct {
1183		int		machine;
1184		int		open_id;
1185	}			audit;
1186	struct {
1187		int		max;
1188		struct syscall  *table;
1189	} syscalls;
1190	struct record_opts	opts;
1191	struct machine		*host;
1192	u64			base_time;
1193	FILE			*output;
1194	unsigned long		nr_events;
1195	struct strlist		*ev_qualifier;
1196	const char 		*last_vfs_getname;
1197	struct intlist		*tid_list;
1198	struct intlist		*pid_list;
1199	double			duration_filter;
1200	double			runtime_ms;
1201	struct {
1202		u64		vfs_getname,
1203				proc_getname;
1204	} stats;
1205	bool			not_ev_qualifier;
1206	bool			live;
1207	bool			full_time;
1208	bool			sched;
1209	bool			multiple_threads;
1210	bool			summary;
1211	bool			summary_only;
1212	bool			show_comm;
1213	bool			show_tool_stats;
1214};
1215
1216static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
 
1217{
1218	struct thread_trace *ttrace = thread->priv;
1219
1220	if (fd > ttrace->paths.max) {
1221		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1222
1223		if (npath == NULL)
1224			return -1;
 
 
1225
1226		if (ttrace->paths.max != -1) {
1227			memset(npath + ttrace->paths.max + 1, 0,
1228			       (fd - ttrace->paths.max) * sizeof(char *));
 
 
 
 
 
 
 
 
 
 
 
1229		} else {
1230			memset(npath, 0, (fd + 1) * sizeof(char *));
1231		}
1232
1233		ttrace->paths.table = npath;
1234		ttrace->paths.max   = fd;
1235	}
1236
1237	ttrace->paths.table[fd] = strdup(pathname);
 
1238
1239	return ttrace->paths.table[fd] != NULL ? 0 : -1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1240}
1241
1242static int thread__read_fd_path(struct thread *thread, int fd)
1243{
1244	char linkname[PATH_MAX], pathname[PATH_MAX];
1245	struct stat st;
1246	int ret;
1247
1248	if (thread->pid_ == thread->tid) {
1249		scnprintf(linkname, sizeof(linkname),
1250			  "/proc/%d/fd/%d", thread->pid_, fd);
1251	} else {
1252		scnprintf(linkname, sizeof(linkname),
1253			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1254	}
1255
1256	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1257		return -1;
1258
1259	ret = readlink(linkname, pathname, sizeof(pathname));
1260
1261	if (ret < 0 || ret > st.st_size)
1262		return -1;
1263
1264	pathname[ret] = '\0';
1265	return trace__set_fd_pathname(thread, fd, pathname);
1266}
1267
1268static const char *thread__fd_path(struct thread *thread, int fd,
1269				   struct trace *trace)
1270{
1271	struct thread_trace *ttrace = thread->priv;
1272
1273	if (ttrace == NULL)
1274		return NULL;
1275
1276	if (fd < 0)
1277		return NULL;
1278
1279	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1280		if (!trace->live)
1281			return NULL;
1282		++trace->stats.proc_getname;
1283		if (thread__read_fd_path(thread, fd)) {
1284			return NULL;
1285	}
1286
1287	return ttrace->paths.table[fd];
1288}
1289
1290static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1291					struct syscall_arg *arg)
1292{
1293	int fd = arg->val;
1294	size_t printed = scnprintf(bf, size, "%d", fd);
1295	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1296
1297	if (path)
1298		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1299
1300	return printed;
1301}
1302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1303static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1304					      struct syscall_arg *arg)
1305{
1306	int fd = arg->val;
1307	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1308	struct thread_trace *ttrace = arg->thread->priv;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1309
1310	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1311		zfree(&ttrace->paths.table[fd]);
1312
1313	return printed;
1314}
1315
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1316static bool trace__filter_duration(struct trace *trace, double t)
1317{
1318	return t < (trace->duration_filter * NSEC_PER_MSEC);
1319}
1320
1321static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1322{
1323	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1324
1325	return fprintf(fp, "%10.3f ", ts);
1326}
1327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1328static bool done = false;
1329static bool interrupted = false;
1330
1331static void sig_handler(int sig)
1332{
1333	done = true;
1334	interrupted = sig == SIGINT;
1335}
1336
1337static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1338					u64 duration, u64 tstamp, FILE *fp)
1339{
1340	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1341	printed += fprintf_duration(duration, fp);
1342
1343	if (trace->multiple_threads) {
1344		if (trace->show_comm)
1345			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1346		printed += fprintf(fp, "%d ", thread->tid);
1347	}
1348
1349	return printed;
1350}
1351
 
 
 
 
 
 
 
 
 
 
 
 
1352static int trace__process_event(struct trace *trace, struct machine *machine,
1353				union perf_event *event, struct perf_sample *sample)
1354{
1355	int ret = 0;
1356
1357	switch (event->header.type) {
1358	case PERF_RECORD_LOST:
1359		color_fprintf(trace->output, PERF_COLOR_RED,
1360			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1361		ret = machine__process_lost_event(machine, event, sample);
 
1362	default:
1363		ret = machine__process_event(machine, event, sample);
1364		break;
1365	}
1366
1367	return ret;
1368}
1369
1370static int trace__tool_process(struct perf_tool *tool,
1371			       union perf_event *event,
1372			       struct perf_sample *sample,
1373			       struct machine *machine)
1374{
1375	struct trace *trace = container_of(tool, struct trace, tool);
1376	return trace__process_event(trace, machine, event, sample);
1377}
1378
1379static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1380{
1381	int err = symbol__init();
1382
1383	if (err)
1384		return err;
1385
1386	trace->host = machine__new_host();
1387	if (trace->host == NULL)
1388		return -ENOMEM;
1389
 
 
 
 
1390	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1391					    evlist->threads, trace__tool_process, false);
 
 
1392	if (err)
1393		symbol__exit();
1394
1395	return err;
1396}
1397
1398static int syscall__set_arg_fmts(struct syscall *sc)
 
 
 
 
 
 
 
 
1399{
1400	struct format_field *field;
1401	int idx = 0;
1402
1403	sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1404	if (sc->arg_scnprintf == NULL)
 
 
 
1405		return -1;
1406
1407	if (sc->fmt)
1408		sc->arg_parm = sc->fmt->arg_parm;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1409
1410	for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1411		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1412			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1413		else if (field->flags & FIELD_IS_POINTER)
1414			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1415		++idx;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1416	}
1417
 
 
 
 
 
 
 
 
 
 
1418	return 0;
1419}
1420
1421static int trace__read_syscall_info(struct trace *trace, int id)
1422{
1423	char tp_name[128];
1424	struct syscall *sc;
1425	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1426
1427	if (name == NULL)
1428		return -1;
1429
1430	if (id > trace->syscalls.max) {
1431		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
 
 
 
 
 
 
 
 
1432
1433		if (nsyscalls == NULL)
1434			return -1;
1435
1436		if (trace->syscalls.max != -1) {
1437			memset(nsyscalls + trace->syscalls.max + 1, 0,
1438			       (id - trace->syscalls.max) * sizeof(*sc));
1439		} else {
1440			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1441		}
1442
1443		trace->syscalls.table = nsyscalls;
1444		trace->syscalls.max   = id;
1445	}
1446
1447	sc = trace->syscalls.table + id;
1448	sc->name = name;
1449
1450	if (trace->ev_qualifier) {
1451		bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1452
1453		if (!(in ^ trace->not_ev_qualifier)) {
1454			sc->filtered = true;
1455			/*
1456			 * No need to do read tracepoint information since this will be
1457			 * filtered out.
1458			 */
1459			return 0;
1460		}
1461	}
1462
 
1463	sc->fmt  = syscall_fmt__find(sc->name);
1464
1465	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1466	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1467
1468	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1469		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1470		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1471	}
1472
1473	if (sc->tp_format == NULL)
1474		return -1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1475
1476	return syscall__set_arg_fmts(sc);
1477}
1478
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1479static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1480				      unsigned long *args, struct trace *trace,
1481				      struct thread *thread)
1482{
1483	size_t printed = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1484
1485	if (sc->tp_format != NULL) {
1486		struct format_field *field;
1487		u8 bit = 1;
1488		struct syscall_arg arg = {
1489			.idx	= 0,
1490			.mask	= 0,
1491			.trace  = trace,
1492			.thread = thread,
1493		};
1494
1495		for (field = sc->tp_format->format.fields->next; field;
1496		     field = field->next, ++arg.idx, bit <<= 1) {
1497			if (arg.mask & bit)
1498				continue;
 
 
 
 
 
 
 
 
 
1499			/*
1500 			 * Suppress this argument if its value is zero and
1501 			 * and we don't have a string associated in an
1502 			 * strarray for it.
1503 			 */
1504			if (args[arg.idx] == 0 &&
1505			    !(sc->arg_scnprintf &&
1506			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1507			      sc->arg_parm[arg.idx]))
 
 
 
1508				continue;
1509
1510			printed += scnprintf(bf + printed, size - printed,
1511					     "%s%s: ", printed ? ", " : "", field->name);
1512			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1513				arg.val = args[arg.idx];
1514				if (sc->arg_parm)
1515					arg.parm = sc->arg_parm[arg.idx];
1516				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1517								      size - printed, &arg);
1518			} else {
1519				printed += scnprintf(bf + printed, size - printed,
1520						     "%ld", args[arg.idx]);
1521			}
1522		}
1523	} else {
1524		int i = 0;
1525
1526		while (i < 6) {
1527			printed += scnprintf(bf + printed, size - printed,
1528					     "%sarg%d: %ld",
1529					     printed ? ", " : "", i, args[i]);
1530			++i;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1531		}
1532	}
1533
1534	return printed;
1535}
1536
1537typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
 
1538				  struct perf_sample *sample);
1539
1540static struct syscall *trace__syscall_info(struct trace *trace,
1541					   struct perf_evsel *evsel, int id)
1542{
 
1543
1544	if (id < 0) {
1545
1546		/*
1547		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1548		 * before that, leaving at a higher verbosity level till that is
1549		 * explained. Reproduced with plain ftrace with:
1550		 *
1551		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1552		 * grep "NR -1 " /t/trace_pipe
1553		 *
1554		 * After generating some load on the machine.
1555 		 */
1556		if (verbose > 1) {
1557			static u64 n;
1558			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1559				id, perf_evsel__name(evsel), ++n);
1560		}
1561		return NULL;
1562	}
1563
1564	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1565	    trace__read_syscall_info(trace, id))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1566		goto out_cant_read;
1567
1568	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
 
 
1569		goto out_cant_read;
 
1570
1571	return &trace->syscalls.table[id];
1572
1573out_cant_read:
1574	if (verbose) {
1575		fprintf(trace->output, "Problems reading syscall %d", id);
1576		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
 
1577			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1578		fputs(" information\n", trace->output);
1579	}
1580	return NULL;
1581}
1582
1583static void thread__update_stats(struct thread_trace *ttrace,
1584				 int id, struct perf_sample *sample)
 
 
 
 
 
 
 
1585{
1586	struct int_node *inode;
1587	struct stats *stats;
1588	u64 duration = 0;
1589
1590	inode = intlist__findnew(ttrace->syscall_stats, id);
1591	if (inode == NULL)
1592		return;
1593
1594	stats = inode->priv;
1595	if (stats == NULL) {
1596		stats = malloc(sizeof(struct stats));
1597		if (stats == NULL)
1598			return;
1599		init_stats(stats);
 
 
 
 
1600		inode->priv = stats;
1601	}
1602
1603	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1604		duration = sample->time - ttrace->entry_time;
1605
1606	update_stats(stats, duration);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1607}
1608
1609static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 
1610			    struct perf_sample *sample)
1611{
1612	char *msg;
1613	void *args;
1614	size_t printed = 0;
1615	struct thread *thread;
1616	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
 
 
1617	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1618	struct thread_trace *ttrace;
1619
1620	if (sc == NULL)
1621		return -1;
1622
1623	if (sc->filtered)
1624		return 0;
1625
1626	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1627	ttrace = thread__trace(thread, trace->output);
1628	if (ttrace == NULL)
1629		return -1;
 
 
1630
1631	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1632	ttrace = thread->priv;
1633
1634	if (ttrace->entry_str == NULL) {
1635		ttrace->entry_str = malloc(1024);
1636		if (!ttrace->entry_str)
1637			return -1;
1638	}
1639
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1640	ttrace->entry_time = sample->time;
1641	msg = ttrace->entry_str;
1642	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1643
1644	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1645					   args, trace, thread);
1646
1647	if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1648		if (!trace->duration_filter && !trace->summary_only) {
1649			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1650			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
 
 
 
 
 
1651		}
1652	} else
1653		ttrace->entry_pending = true;
 
 
 
1654
1655	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1656}
1657
1658static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1659			   struct perf_sample *sample)
1660{
1661	int ret;
1662	u64 duration = 0;
 
1663	struct thread *thread;
1664	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
 
1665	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1666	struct thread_trace *ttrace;
1667
1668	if (sc == NULL)
1669		return -1;
1670
1671	if (sc->filtered)
1672		return 0;
1673
1674	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1675	ttrace = thread__trace(thread, trace->output);
1676	if (ttrace == NULL)
1677		return -1;
1678
1679	if (trace->summary)
1680		thread__update_stats(ttrace, id, sample);
1681
1682	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1683
1684	if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1685		trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1686		trace->last_vfs_getname = NULL;
 
 
 
1687		++trace->stats.vfs_getname;
1688	}
1689
1690	ttrace = thread->priv;
1691
1692	ttrace->exit_time = sample->time;
1693
1694	if (ttrace->entry_time) {
1695		duration = sample->time - ttrace->entry_time;
1696		if (trace__filter_duration(trace, duration))
1697			goto out;
 
1698	} else if (trace->duration_filter)
1699		goto out;
1700
1701	if (trace->summary_only)
 
 
 
 
 
 
 
 
 
1702		goto out;
1703
1704	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1705
1706	if (ttrace->entry_pending) {
1707		fprintf(trace->output, "%-70s", ttrace->entry_str);
1708	} else {
1709		fprintf(trace->output, " ... [");
1710		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1711		fprintf(trace->output, "]: %s()", sc->name);
 
1712	}
1713
 
 
 
 
 
 
 
 
 
1714	if (sc->fmt == NULL) {
 
 
1715signed_print:
1716		fprintf(trace->output, ") = %d", ret);
1717	} else if (ret < 0 && sc->fmt->errmsg) {
1718		char bf[256];
1719		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1720			   *e = audit_errno_to_name(-ret);
 
1721
1722		fprintf(trace->output, ") = -1 %s %s", e, emsg);
 
1723	} else if (ret == 0 && sc->fmt->timeout)
1724		fprintf(trace->output, ") = 0 Timeout");
1725	else if (sc->fmt->hexret)
1726		fprintf(trace->output, ") = %#x", ret);
1727	else
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1728		goto signed_print;
1729
1730	fputc('\n', trace->output);
 
 
 
 
 
 
 
 
 
 
 
 
1731out:
1732	ttrace->entry_pending = false;
1733
1734	return 0;
 
 
1735}
1736
1737static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
 
1738			      struct perf_sample *sample)
1739{
1740	trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1741	return 0;
1742}
1743
1744static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
 
1745				     struct perf_sample *sample)
1746{
1747        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1748	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1749	struct thread *thread = machine__findnew_thread(trace->host,
1750							sample->pid,
1751							sample->tid);
1752	struct thread_trace *ttrace = thread__trace(thread, trace->output);
1753
1754	if (ttrace == NULL)
1755		goto out_dump;
1756
1757	ttrace->runtime_ms += runtime_ms;
1758	trace->runtime_ms += runtime_ms;
 
 
1759	return 0;
1760
1761out_dump:
1762	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1763	       evsel->name,
1764	       perf_evsel__strval(evsel, sample, "comm"),
1765	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1766	       runtime,
1767	       perf_evsel__intval(evsel, sample, "vruntime"));
1768	return 0;
1769}
1770
1771static bool skip_sample(struct trace *trace, struct perf_sample *sample)
 
1772{
1773	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1774	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1775		return false;
1776
1777	if (trace->pid_list || trace->tid_list)
1778		return true;
 
 
 
 
 
 
 
 
 
 
 
 
 
1779
1780	return false;
1781}
1782
1783static int trace__process_sample(struct perf_tool *tool,
1784				 union perf_event *event __maybe_unused,
1785				 struct perf_sample *sample,
1786				 struct perf_evsel *evsel,
1787				 struct machine *machine __maybe_unused)
1788{
1789	struct trace *trace = container_of(tool, struct trace, tool);
1790	int err = 0;
 
 
1791
1792	tracepoint_handler handler = evsel->handler;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1793
1794	if (skip_sample(trace, sample))
1795		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
1796
1797	if (!trace->full_time && trace->base_time == 0)
1798		trace->base_time = sample->time;
 
 
 
 
 
 
1799
1800	if (handler) {
1801		++trace->nr_events;
1802		handler(trace, evsel, sample);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1803	}
1804
1805	return err;
1806}
1807
1808static int parse_target_str(struct trace *trace)
 
 
1809{
1810	if (trace->opts.target.pid) {
1811		trace->pid_list = intlist__new(trace->opts.target.pid);
1812		if (trace->pid_list == NULL) {
1813			pr_err("Error parsing process id string\n");
1814			return -EINVAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1815		}
1816	}
1817
1818	if (trace->opts.target.tid) {
1819		trace->tid_list = intlist__new(trace->opts.target.tid);
1820		if (trace->tid_list == NULL) {
1821			pr_err("Error parsing thread id string\n");
1822			return -EINVAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1823		}
1824	}
1825
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1826	return 0;
1827}
1828
1829static int trace__record(int argc, const char **argv)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1830{
1831	unsigned int rec_argc, i, j;
1832	const char **rec_argv;
1833	const char * const record_args[] = {
1834		"record",
1835		"-R",
1836		"-m", "1024",
1837		"-c", "1",
1838		"-e",
1839	};
 
 
 
 
 
 
 
 
 
1840
1841	/* +1 is for the event string below */
1842	rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
 
1843	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1844
1845	if (rec_argv == NULL)
1846		return -ENOMEM;
1847
 
1848	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1849		rec_argv[i] = record_args[i];
1850
1851	/* event string may be different for older kernels - e.g., RHEL6 */
1852	if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1853		rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1854	else if (is_valid_tracepoint("syscalls:sys_enter"))
1855		rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1856	else {
1857		pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1858		return -1;
 
 
 
 
 
1859	}
1860	i++;
1861
1862	for (j = 0; j < (unsigned int)argc; j++, i++)
1863		rec_argv[i] = argv[j];
1864
1865	return cmd_record(i, rec_argv, NULL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1866}
1867
1868static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1869
1870static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1871{
1872	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1873	if (evsel == NULL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1874		return;
 
1875
1876	if (perf_evsel__field(evsel, "pathname") == NULL) {
1877		perf_evsel__delete(evsel);
1878		return;
 
 
 
 
 
 
 
 
 
 
 
1879	}
1880
1881	evsel->handler = trace__vfs_getname;
1882	perf_evlist__add(evlist, evsel);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1883}
1884
1885static int trace__run(struct trace *trace, int argc, const char **argv)
1886{
1887	struct perf_evlist *evlist = perf_evlist__new();
1888	struct perf_evsel *evsel;
1889	int err = -1, i;
1890	unsigned long before;
1891	const bool forks = argc > 0;
 
1892
1893	trace->live = true;
1894
1895	if (evlist == NULL) {
1896		fprintf(trace->output, "Not enough memory to run!\n");
1897		goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1898	}
1899
1900	if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1901		goto out_error_tp;
1902
1903	perf_evlist__add_vfs_getname(evlist);
1904
1905	if (trace->sched &&
1906		perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1907				trace__sched_stat_runtime))
1908		goto out_error_tp;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1909
1910	err = perf_evlist__create_maps(evlist, &trace->opts.target);
1911	if (err < 0) {
1912		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1913		goto out_delete_evlist;
1914	}
1915
1916	err = trace__symbols_init(trace, evlist);
1917	if (err < 0) {
1918		fprintf(trace->output, "Problems initializing symbol libraries!\n");
1919		goto out_delete_evlist;
1920	}
1921
1922	perf_evlist__config(evlist, &trace->opts);
1923
1924	signal(SIGCHLD, sig_handler);
1925	signal(SIGINT, sig_handler);
1926
1927	if (forks) {
1928		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1929						    argv, false, NULL);
1930		if (err < 0) {
1931			fprintf(trace->output, "Couldn't run the workload!\n");
1932			goto out_delete_evlist;
1933		}
1934	}
1935
1936	err = perf_evlist__open(evlist);
1937	if (err < 0)
1938		goto out_error_open;
1939
1940	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1941	if (err < 0) {
1942		fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1943		goto out_delete_evlist;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1944	}
1945
1946	perf_evlist__enable(evlist);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1947
1948	if (forks)
1949		perf_evlist__start_workload(evlist);
1950
1951	trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1952again:
1953	before = trace->nr_events;
1954
1955	for (i = 0; i < evlist->nr_mmaps; i++) {
1956		union perf_event *event;
 
1957
1958		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1959			const u32 type = event->header.type;
1960			tracepoint_handler handler;
1961			struct perf_sample sample;
1962
 
1963			++trace->nr_events;
1964
1965			err = perf_evlist__parse_sample(evlist, event, &sample);
1966			if (err) {
1967				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1968				goto next_event;
1969			}
1970
1971			if (!trace->full_time && trace->base_time == 0)
1972				trace->base_time = sample.time;
1973
1974			if (type != PERF_RECORD_SAMPLE) {
1975				trace__process_event(trace, trace->host, event, &sample);
1976				continue;
1977			}
1978
1979			evsel = perf_evlist__id2evsel(evlist, sample.id);
1980			if (evsel == NULL) {
1981				fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1982				goto next_event;
1983			}
1984
1985			if (sample.raw_data == NULL) {
1986				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1987				       perf_evsel__name(evsel), sample.tid,
1988				       sample.cpu, sample.raw_size);
1989				goto next_event;
1990			}
1991
1992			handler = evsel->handler;
1993			handler(trace, evsel, &sample);
1994next_event:
1995			perf_evlist__mmap_consume(evlist, i);
1996
1997			if (interrupted)
1998				goto out_disable;
 
 
 
 
 
1999		}
 
2000	}
2001
2002	if (trace->nr_events == before) {
2003		int timeout = done ? 100 : -1;
2004
2005		if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
 
 
 
2006			goto again;
 
 
 
 
2007	} else {
2008		goto again;
2009	}
2010
2011out_disable:
2012	perf_evlist__disable(evlist);
 
 
 
 
 
2013
2014	if (!err) {
2015		if (trace->summary)
2016			trace__fprintf_thread_summary(trace, trace->output);
2017
2018		if (trace->show_tool_stats) {
2019			fprintf(trace->output, "Stats:\n "
2020					       " vfs_getname : %" PRIu64 "\n"
2021					       " proc_getname: %" PRIu64 "\n",
2022				trace->stats.vfs_getname,
2023				trace->stats.proc_getname);
2024		}
2025	}
2026
2027out_delete_evlist:
2028	perf_evlist__delete(evlist);
2029out:
 
 
 
2030	trace->live = false;
2031	return err;
2032{
2033	char errbuf[BUFSIZ];
2034
2035out_error_tp:
2036	perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
 
 
 
 
 
 
 
 
2037	goto out_error;
2038
2039out_error_open:
2040	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2041
2042out_error:
2043	fprintf(trace->output, "%s\n", errbuf);
2044	goto out_delete_evlist;
 
 
 
 
 
 
 
2045}
 
 
 
 
 
 
 
2046}
2047
2048static int trace__replay(struct trace *trace)
2049{
2050	const struct perf_evsel_str_handler handlers[] = {
2051		{ "probe:vfs_getname",	     trace__vfs_getname, },
2052	};
2053	struct perf_data_file file = {
2054		.path  = input_name,
2055		.mode  = PERF_DATA_MODE_READ,
 
2056	};
2057	struct perf_session *session;
2058	struct perf_evsel *evsel;
2059	int err = -1;
2060
2061	trace->tool.sample	  = trace__process_sample;
2062	trace->tool.mmap	  = perf_event__process_mmap;
2063	trace->tool.mmap2	  = perf_event__process_mmap2;
2064	trace->tool.comm	  = perf_event__process_comm;
2065	trace->tool.exit	  = perf_event__process_exit;
2066	trace->tool.fork	  = perf_event__process_fork;
2067	trace->tool.attr	  = perf_event__process_attr;
2068	trace->tool.tracing_data = perf_event__process_tracing_data;
2069	trace->tool.build_id	  = perf_event__process_build_id;
 
2070
2071	trace->tool.ordered_samples = true;
2072	trace->tool.ordering_requires_timestamps = true;
2073
2074	/* add tid to output */
2075	trace->multiple_threads = true;
2076
2077	if (symbol__init() < 0)
2078		return -1;
 
2079
2080	session = perf_session__new(&file, false, &trace->tool);
2081	if (session == NULL)
2082		return -ENOMEM;
 
 
 
 
 
2083
2084	trace->host = &session->machines.host;
2085
2086	err = perf_session__set_tracepoints_handlers(session, handlers);
2087	if (err)
2088		goto out;
2089
2090	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2091						     "raw_syscalls:sys_enter");
2092	/* older kernels have syscalls tp versus raw_syscalls */
2093	if (evsel == NULL)
2094		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2095							     "syscalls:sys_enter");
2096	if (evsel == NULL) {
2097		pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2098		goto out;
2099	}
2100
2101	if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2102	    perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
 
2103		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2104		goto out;
2105	}
2106
2107	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2108						     "raw_syscalls:sys_exit");
2109	if (evsel == NULL)
2110		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2111							     "syscalls:sys_exit");
2112	if (evsel == NULL) {
2113		pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2114		goto out;
2115	}
2116
2117	if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2118	    perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2119		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2120		goto out;
2121	}
2122
2123	err = parse_target_str(trace);
2124	if (err != 0)
2125		goto out;
 
 
 
 
2126
2127	setup_pager();
2128
2129	err = perf_session__process_events(session, &trace->tool);
2130	if (err)
2131		pr_err("Failed to process events, error %d", err);
2132
2133	else if (trace->summary)
2134		trace__fprintf_thread_summary(trace, trace->output);
2135
2136out:
2137	perf_session__delete(session);
2138
2139	return err;
2140}
2141
2142static size_t trace__fprintf_threads_header(FILE *fp)
2143{
2144	size_t printed;
2145
2146	printed  = fprintf(fp, "\n Summary of events:\n\n");
2147
2148	return printed;
2149}
2150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2151static size_t thread__dump_stats(struct thread_trace *ttrace,
2152				 struct trace *trace, FILE *fp)
2153{
2154	struct stats *stats;
2155	size_t printed = 0;
2156	struct syscall *sc;
2157	struct int_node *inode = intlist__first(ttrace->syscall_stats);
 
2158
2159	if (inode == NULL)
2160		return 0;
2161
2162	printed += fprintf(fp, "\n");
2163
2164	printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2165	printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2166	printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2167
2168	/* each int_node is a syscall */
2169	while (inode) {
2170		stats = inode->priv;
2171		if (stats) {
2172			double min = (double)(stats->min) / NSEC_PER_MSEC;
2173			double max = (double)(stats->max) / NSEC_PER_MSEC;
2174			double avg = avg_stats(stats);
2175			double pct;
2176			u64 n = (u64) stats->n;
2177
2178			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2179			avg /= NSEC_PER_MSEC;
2180
2181			sc = &trace->syscalls.table[inode->i];
2182			printed += fprintf(fp, "   %-15s", sc->name);
2183			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2184					   n, min, avg);
2185			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2186		}
2187
2188		inode = intlist__next(inode);
 
 
 
 
 
 
 
 
 
2189	}
2190
 
2191	printed += fprintf(fp, "\n\n");
2192
2193	return printed;
2194}
2195
2196/* struct used to pass data to per-thread function */
2197struct summary_data {
2198	FILE *fp;
2199	struct trace *trace;
2200	size_t printed;
2201};
2202
2203static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2204{
2205	struct summary_data *data = priv;
2206	FILE *fp = data->fp;
2207	size_t printed = data->printed;
2208	struct trace *trace = data->trace;
2209	struct thread_trace *ttrace = thread->priv;
2210	double ratio;
2211
2212	if (ttrace == NULL)
2213		return 0;
2214
2215	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2216
2217	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2218	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2219	printed += fprintf(fp, "%.1f%%", ratio);
2220	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
 
 
 
 
 
 
 
 
2221	printed += thread__dump_stats(ttrace, trace, fp);
2222
2223	data->printed += printed;
 
 
 
 
 
 
2224
2225	return 0;
 
 
 
 
2226}
2227
2228static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2229{
2230	struct summary_data data = {
2231		.fp = fp,
2232		.trace = trace
2233	};
2234	data.printed = trace__fprintf_threads_header(fp);
 
 
 
 
 
 
2235
2236	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
 
2237
2238	return data.printed;
 
 
2239}
2240
2241static int trace__set_duration(const struct option *opt, const char *str,
2242			       int unset __maybe_unused)
2243{
2244	struct trace *trace = opt->value;
2245
2246	trace->duration_filter = atof(str);
2247	return 0;
2248}
2249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2250static int trace__open_output(struct trace *trace, const char *filename)
2251{
2252	struct stat st;
2253
2254	if (!stat(filename, &st) && st.st_size) {
2255		char oldname[PATH_MAX];
2256
2257		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2258		unlink(oldname);
2259		rename(filename, oldname);
2260	}
2261
2262	trace->output = fopen(filename, "w");
2263
2264	return trace->output == NULL ? -errno : 0;
2265}
2266
2267int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2268{
2269	const char * const trace_usage[] = {
2270		"perf trace [<options>] [<command>]",
2271		"perf trace [<options>] -- <command> [<options>]",
2272		"perf trace record [<options>] [<command>]",
2273		"perf trace record [<options>] -- <command> [<options>]",
2274		NULL
2275	};
2276	struct trace trace = {
2277		.audit = {
2278			.machine = audit_detect_machine(),
2279			.open_id = audit_name_to_syscall("open", trace.audit.machine),
2280		},
2281		.syscalls = {
2282			. max = -1,
2283		},
2284		.opts = {
2285			.target = {
2286				.uid	   = UINT_MAX,
2287				.uses_mmap = true,
2288			},
2289			.user_freq     = UINT_MAX,
2290			.user_interval = ULLONG_MAX,
2291			.no_buffering  = true,
2292			.mmap_pages    = 1024,
2293		},
2294		.output = stdout,
2295		.show_comm = true,
 
 
 
 
 
 
 
 
2296	};
 
2297	const char *output_name = NULL;
2298	const char *ev_qualifier_str = NULL;
2299	const struct option trace_options[] = {
 
 
 
 
 
2300	OPT_BOOLEAN(0, "comm", &trace.show_comm,
2301		    "show the thread COMM next to its id"),
2302	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2303	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2304		    "list of events to trace"),
2305	OPT_STRING('o', "output", &output_name, "file", "output file name"),
2306	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2307	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2308		    "trace events on existing process id"),
2309	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2310		    "trace events on existing thread id"),
 
 
2311	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2312		    "system-wide collection from all CPUs"),
2313	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2314		    "list of cpus to monitor"),
2315	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2316		    "child tasks do not inherit counters"),
2317	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2318		     "number of mmap data pages",
2319		     perf_evlist__parse_mmap_pages),
2320	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2321		   "user to profile"),
2322	OPT_CALLBACK(0, "duration", &trace, "float",
2323		     "show only events with duration > N.M ms",
2324		     trace__set_duration),
 
 
 
2325	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2326	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2327	OPT_BOOLEAN('T', "time", &trace.full_time,
2328		    "Show full timestamp, not time relative to first start"),
 
 
2329	OPT_BOOLEAN('s', "summary", &trace.summary_only,
2330		    "Show only syscall summary with statistics"),
2331	OPT_BOOLEAN('S', "with-summary", &trace.summary,
2332		    "Show all syscalls and summary with statistics"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2333	OPT_END()
2334	};
2335	int err;
 
 
 
 
2336	char bf[BUFSIZ];
2337
2338	if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2339		return trace__record(argc-2, &argv[2]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2340
2341	argc = parse_options(argc, argv, trace_options, trace_usage, 0);
 
 
2342
2343	/* summary_only implies summary option, but don't overwrite summary if set */
2344	if (trace.summary_only)
2345		trace.summary = trace.summary_only;
2346
2347	if (output_name != NULL) {
2348		err = trace__open_output(&trace, output_name);
2349		if (err < 0) {
2350			perror("failed to create output file");
2351			goto out;
2352		}
2353	}
2354
2355	if (ev_qualifier_str != NULL) {
2356		const char *s = ev_qualifier_str;
2357
2358		trace.not_ev_qualifier = *s == '!';
2359		if (trace.not_ev_qualifier)
2360			++s;
2361		trace.ev_qualifier = strlist__new(true, s);
2362		if (trace.ev_qualifier == NULL) {
2363			fputs("Not enough memory to parse event qualifier",
2364			      trace.output);
2365			err = -ENOMEM;
2366			goto out_close;
2367		}
2368	}
2369
2370	err = target__validate(&trace.opts.target);
2371	if (err) {
2372		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2373		fprintf(trace.output, "%s", bf);
2374		goto out_close;
2375	}
2376
2377	err = target__parse_uid(&trace.opts.target);
2378	if (err) {
2379		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2380		fprintf(trace.output, "%s", bf);
2381		goto out_close;
2382	}
2383
2384	if (!argc && target__none(&trace.opts.target))
2385		trace.opts.target.system_wide = true;
2386
2387	if (input_name)
2388		err = trace__replay(&trace);
2389	else
2390		err = trace__run(&trace, argc, argv);
2391
2392out_close:
2393	if (output_name != NULL)
2394		fclose(trace.output);
2395out:
 
2396	return err;
2397}
v5.9
   1/*
   2 * builtin-trace.c
   3 *
   4 * Builtin 'trace' command:
   5 *
   6 * Display a continuously updated trace of any workload, CPU, specific PID,
   7 * system wide, etc.  Default format is loosely strace like, but any other
   8 * event may be specified using --event.
   9 *
  10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
  11 *
  12 * Initially based on the 'trace' prototype by Thomas Gleixner:
  13 *
  14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
  15 */
  16
  17#include "util/record.h"
  18#include <traceevent/event-parse.h>
  19#include <api/fs/tracing_path.h>
  20#include <bpf/bpf.h>
  21#include "util/bpf_map.h"
  22#include "util/rlimit.h"
  23#include "builtin.h"
  24#include "util/cgroup.h"
  25#include "util/color.h"
  26#include "util/config.h"
  27#include "util/debug.h"
  28#include "util/dso.h"
  29#include "util/env.h"
  30#include "util/event.h"
  31#include "util/evsel.h"
  32#include "util/evsel_fprintf.h"
  33#include "util/synthetic-events.h"
  34#include "util/evlist.h"
  35#include "util/evswitch.h"
  36#include "util/mmap.h"
  37#include <subcmd/pager.h>
  38#include <subcmd/exec-cmd.h>
  39#include "util/machine.h"
  40#include "util/map.h"
  41#include "util/symbol.h"
  42#include "util/path.h"
  43#include "util/session.h"
  44#include "util/thread.h"
  45#include <subcmd/parse-options.h>
  46#include "util/strlist.h"
  47#include "util/intlist.h"
  48#include "util/thread_map.h"
  49#include "util/stat.h"
  50#include "util/tool.h"
  51#include "util/util.h"
  52#include "trace/beauty/beauty.h"
  53#include "trace-event.h"
  54#include "util/parse-events.h"
  55#include "util/bpf-loader.h"
  56#include "callchain.h"
  57#include "print_binary.h"
  58#include "string2.h"
  59#include "syscalltbl.h"
  60#include "rb_resort.h"
  61#include "../perf.h"
  62
  63#include <errno.h>
  64#include <inttypes.h>
  65#include <poll.h>
  66#include <signal.h>
  67#include <stdlib.h>
  68#include <string.h>
  69#include <linux/err.h>
  70#include <linux/filter.h>
  71#include <linux/kernel.h>
  72#include <linux/random.h>
  73#include <linux/stringify.h>
  74#include <linux/time64.h>
  75#include <linux/zalloc.h>
  76#include <fcntl.h>
  77#include <sys/sysmacros.h>
  78
  79#include <linux/ctype.h>
  80#include <perf/mmap.h>
 
  81
  82#ifndef O_CLOEXEC
  83# define O_CLOEXEC		02000000
  84#endif
  85
  86#ifndef F_LINUX_SPECIFIC_BASE
  87# define F_LINUX_SPECIFIC_BASE	1024
  88#endif
  89
  90/*
  91 * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100
  92 */
  93struct syscall_arg_fmt {
  94	size_t	   (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
  95	bool	   (*strtoul)(char *bf, size_t size, struct syscall_arg *arg, u64 *val);
  96	unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
  97	void	   *parm;
  98	const char *name;
  99	u16	   nr_entries; // for arrays
 100	bool	   show_zero;
 101};
 102
 103struct syscall_fmt {
 104	const char *name;
 105	const char *alias;
 106	struct {
 107		const char *sys_enter,
 108			   *sys_exit;
 109	}	   bpf_prog_name;
 110	struct syscall_arg_fmt arg[6];
 111	u8	   nr_args;
 112	bool	   errpid;
 113	bool	   timeout;
 114	bool	   hexret;
 115};
 116
 117struct trace {
 118	struct perf_tool	tool;
 119	struct syscalltbl	*sctbl;
 120	struct {
 121		struct syscall  *table;
 122		struct bpf_map  *map;
 123		struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY
 124			struct bpf_map  *sys_enter,
 125					*sys_exit;
 126		}		prog_array;
 127		struct {
 128			struct evsel *sys_enter,
 129					  *sys_exit,
 130					  *augmented;
 131		}		events;
 132		struct bpf_program *unaugmented_prog;
 133	} syscalls;
 134	struct {
 135		struct bpf_map *map;
 136	} dump;
 137	struct record_opts	opts;
 138	struct evlist	*evlist;
 139	struct machine		*host;
 140	struct thread		*current;
 141	struct bpf_object	*bpf_obj;
 142	struct cgroup		*cgroup;
 143	u64			base_time;
 144	FILE			*output;
 145	unsigned long		nr_events;
 146	unsigned long		nr_events_printed;
 147	unsigned long		max_events;
 148	struct evswitch		evswitch;
 149	struct strlist		*ev_qualifier;
 150	struct {
 151		size_t		nr;
 152		int		*entries;
 153	}			ev_qualifier_ids;
 154	struct {
 155		size_t		nr;
 156		pid_t		*entries;
 157		struct bpf_map  *map;
 158	}			filter_pids;
 159	double			duration_filter;
 160	double			runtime_ms;
 161	struct {
 162		u64		vfs_getname,
 163				proc_getname;
 164	} stats;
 165	unsigned int		max_stack;
 166	unsigned int		min_stack;
 167	int			raw_augmented_syscalls_args_size;
 168	bool			raw_augmented_syscalls;
 169	bool			fd_path_disabled;
 170	bool			sort_events;
 171	bool			not_ev_qualifier;
 172	bool			live;
 173	bool			full_time;
 174	bool			sched;
 175	bool			multiple_threads;
 176	bool			summary;
 177	bool			summary_only;
 178	bool			errno_summary;
 179	bool			failure_only;
 180	bool			show_comm;
 181	bool			print_sample;
 182	bool			show_tool_stats;
 183	bool			trace_syscalls;
 184	bool			libtraceevent_print;
 185	bool			kernel_syscallchains;
 186	s16			args_alignment;
 187	bool			show_tstamp;
 188	bool			show_duration;
 189	bool			show_zeros;
 190	bool			show_arg_names;
 191	bool			show_string_prefix;
 192	bool			force;
 193	bool			vfs_getname;
 194	int			trace_pgfaults;
 195	char			*perfconfig_events;
 196	struct {
 197		struct ordered_events	data;
 198		u64			last;
 199	} oe;
 200};
 201
 202struct tp_field {
 203	int offset;
 204	union {
 205		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
 206		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
 207	};
 208};
 209
 210#define TP_UINT_FIELD(bits) \
 211static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
 212{ \
 213	u##bits value; \
 214	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 215	return value;  \
 216}
 217
 218TP_UINT_FIELD(8);
 219TP_UINT_FIELD(16);
 220TP_UINT_FIELD(32);
 221TP_UINT_FIELD(64);
 222
 223#define TP_UINT_FIELD__SWAPPED(bits) \
 224static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
 225{ \
 226	u##bits value; \
 227	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 228	return bswap_##bits(value);\
 229}
 230
 231TP_UINT_FIELD__SWAPPED(16);
 232TP_UINT_FIELD__SWAPPED(32);
 233TP_UINT_FIELD__SWAPPED(64);
 234
 235static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
 
 
 236{
 237	field->offset = offset;
 238
 239	switch (size) {
 240	case 1:
 241		field->integer = tp_field__u8;
 242		break;
 243	case 2:
 244		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
 245		break;
 246	case 4:
 247		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
 248		break;
 249	case 8:
 250		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
 251		break;
 252	default:
 253		return -1;
 254	}
 255
 256	return 0;
 257}
 258
 259static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap)
 260{
 261	return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
 262}
 263
 264static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
 265{
 266	return sample->raw_data + field->offset;
 267}
 268
 269static int __tp_field__init_ptr(struct tp_field *field, int offset)
 270{
 271	field->offset = offset;
 272	field->pointer = tp_field__ptr;
 273	return 0;
 274}
 275
 276static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field)
 277{
 278	return __tp_field__init_ptr(field, format_field->offset);
 279}
 280
 281struct syscall_tp {
 282	struct tp_field id;
 283	union {
 284		struct tp_field args, ret;
 285	};
 286};
 287
 288/*
 289 * The evsel->priv as used by 'perf trace'
 290 * sc:	for raw_syscalls:sys_{enter,exit} and syscalls:sys_{enter,exit}_SYSCALLNAME
 291 * fmt: for all the other tracepoints
 292 */
 293struct evsel_trace {
 294	struct syscall_tp	sc;
 295	struct syscall_arg_fmt  *fmt;
 296};
 297
 298static struct evsel_trace *evsel_trace__new(void)
 299{
 300	return zalloc(sizeof(struct evsel_trace));
 301}
 302
 303static void evsel_trace__delete(struct evsel_trace *et)
 304{
 305	if (et == NULL)
 306		return;
 307
 308	zfree(&et->fmt);
 309	free(et);
 310}
 311
 312/*
 313 * Used with raw_syscalls:sys_{enter,exit} and with the
 314 * syscalls:sys_{enter,exit}_SYSCALL tracepoints
 315 */
 316static inline struct syscall_tp *__evsel__syscall_tp(struct evsel *evsel)
 317{
 318	struct evsel_trace *et = evsel->priv;
 319
 320	return &et->sc;
 321}
 322
 323static struct syscall_tp *evsel__syscall_tp(struct evsel *evsel)
 324{
 325	if (evsel->priv == NULL) {
 326		evsel->priv = evsel_trace__new();
 327		if (evsel->priv == NULL)
 328			return NULL;
 329	}
 330
 331	return __evsel__syscall_tp(evsel);
 332}
 333
 334/*
 335 * Used with all the other tracepoints.
 336 */
 337static inline struct syscall_arg_fmt *__evsel__syscall_arg_fmt(struct evsel *evsel)
 338{
 339	struct evsel_trace *et = evsel->priv;
 340
 341	return et->fmt;
 342}
 343
 344static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel)
 345{
 346	struct evsel_trace *et = evsel->priv;
 347
 348	if (evsel->priv == NULL) {
 349		et = evsel->priv = evsel_trace__new();
 350
 351		if (et == NULL)
 352			return NULL;
 353	}
 354
 355	if (et->fmt == NULL) {
 356		et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
 357		if (et->fmt == NULL)
 358			goto out_delete;
 359	}
 360
 361	return __evsel__syscall_arg_fmt(evsel);
 362
 363out_delete:
 364	evsel_trace__delete(evsel->priv);
 365	evsel->priv = NULL;
 366	return NULL;
 367}
 368
 369static int evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name)
 370{
 371	struct tep_format_field *format_field = evsel__field(evsel, name);
 372
 373	if (format_field == NULL)
 374		return -1;
 375
 376	return tp_field__init_uint(field, format_field, evsel->needs_swap);
 377}
 378
 379#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
 380	({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
 381	   evsel__init_tp_uint_field(evsel, &sc->name, #name); })
 382
 383static int evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name)
 
 
 384{
 385	struct tep_format_field *format_field = evsel__field(evsel, name);
 386
 387	if (format_field == NULL)
 388		return -1;
 389
 390	return tp_field__init_ptr(field, format_field);
 391}
 392
 393#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
 394	({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
 395	   evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
 396
 397static void evsel__delete_priv(struct evsel *evsel)
 398{
 399	zfree(&evsel->priv);
 400	evsel__delete(evsel);
 401}
 402
 403static int evsel__init_syscall_tp(struct evsel *evsel)
 404{
 405	struct syscall_tp *sc = evsel__syscall_tp(evsel);
 406
 407	if (sc != NULL) {
 408		if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
 409		    evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
 410			return -ENOENT;
 411		return 0;
 412	}
 413
 414	return -ENOMEM;
 415}
 416
 417static int evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
 418{
 419	struct syscall_tp *sc = evsel__syscall_tp(evsel);
 420
 421	if (sc != NULL) {
 422		struct tep_format_field *syscall_id = evsel__field(tp, "id");
 423		if (syscall_id == NULL)
 424			syscall_id = evsel__field(tp, "__syscall_nr");
 425		if (syscall_id == NULL ||
 426		    __tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
 427			return -EINVAL;
 428
 429		return 0;
 430	}
 431
 432	return -ENOMEM;
 433}
 434
 435static int evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
 436{
 437	struct syscall_tp *sc = __evsel__syscall_tp(evsel);
 438
 439	return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
 440}
 441
 442static int evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
 443{
 444	struct syscall_tp *sc = __evsel__syscall_tp(evsel);
 445
 446	return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
 447}
 448
 449static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
 450{
 451	if (evsel__syscall_tp(evsel) != NULL) {
 
 452		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
 453			return -ENOENT;
 454
 455		evsel->handler = handler;
 456		return 0;
 457	}
 458
 459	return -ENOMEM;
 
 
 
 
 460}
 461
 462static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
 463{
 464	struct evsel *evsel = evsel__newtp("raw_syscalls", direction);
 465
 466	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
 467	if (IS_ERR(evsel))
 468		evsel = evsel__newtp("syscalls", direction);
 469
 470	if (IS_ERR(evsel))
 471		return NULL;
 472
 473	if (evsel__init_raw_syscall_tp(evsel, handler))
 474		goto out_delete;
 475
 476	return evsel;
 477
 478out_delete:
 479	evsel__delete_priv(evsel);
 480	return NULL;
 481}
 482
 483#define perf_evsel__sc_tp_uint(evsel, name, sample) \
 484	({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
 485	   fields->name.integer(&fields->name, sample); })
 486
 487#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
 488	({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
 489	   fields->name.pointer(&fields->name, sample); })
 490
 491size_t strarray__scnprintf_suffix(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_suffix, int val)
 
 
 492{
 493	int idx = val - sa->offset;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 494
 495	if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
 496		size_t printed = scnprintf(bf, size, intfmt, val);
 497		if (show_suffix)
 498			printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
 499		return printed;
 500	}
 501
 502	return scnprintf(bf, size, "%s%s", sa->entries[idx], show_suffix ? sa->prefix : "");
 
 
 
 
 503}
 504
 505size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
 506{
 507	int idx = val - sa->offset;
 508
 509	if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
 510		size_t printed = scnprintf(bf, size, intfmt, val);
 511		if (show_prefix)
 512			printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
 513		return printed;
 514	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 515
 516	return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
 
 
 
 517}
 518
 519static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
 520						const char *intfmt,
 521					        struct syscall_arg *arg)
 522{
 523	return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->show_string_prefix, arg->val);
 
 
 
 
 
 
 524}
 525
 526static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
 527					      struct syscall_arg *arg)
 528{
 529	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
 530}
 531
 532#define SCA_STRARRAY syscall_arg__scnprintf_strarray
 533
 534bool syscall_arg__strtoul_strarray(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
 
 
 
 
 
 
 535{
 536	return strarray__strtoul(arg->parm, bf, size, ret);
 537}
 538
 539bool syscall_arg__strtoul_strarray_flags(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
 540{
 541	return strarray__strtoul_flags(arg->parm, bf, size, ret);
 542}
 543
 544bool syscall_arg__strtoul_strarrays(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
 545{
 546	return strarrays__strtoul(arg->parm, bf, size, ret);
 547}
 548
 549size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg)
 550{
 551	return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val);
 552}
 553
 554size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
 
 555{
 556	size_t printed;
 557	int i;
 558
 559	for (i = 0; i < sas->nr_entries; ++i) {
 560		struct strarray *sa = sas->entries[i];
 561		int idx = val - sa->offset;
 562
 563		if (idx >= 0 && idx < sa->nr_entries) {
 564			if (sa->entries[idx] == NULL)
 565				break;
 566			return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
 567		}
 568	}
 569
 570	printed = scnprintf(bf, size, intfmt, val);
 571	if (show_prefix)
 572		printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sas->entries[0]->prefix);
 573	return printed;
 574}
 575
 576bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret)
 577{
 578	int i;
 579
 580	for (i = 0; i < sa->nr_entries; ++i) {
 581		if (sa->entries[i] && strncmp(sa->entries[i], bf, size) == 0 && sa->entries[i][size] == '\0') {
 582			*ret = sa->offset + i;
 583			return true;
 584		}
 585	}
 586
 587	return false;
 588}
 589
 590bool strarray__strtoul_flags(struct strarray *sa, char *bf, size_t size, u64 *ret)
 
 591{
 592	u64 val = 0;
 593	char *tok = bf, *sep, *end;
 594
 595	*ret = 0;
 596
 597	while (size != 0) {
 598		int toklen = size;
 
 
 599
 600		sep = memchr(tok, '|', size);
 601		if (sep != NULL) {
 602			size -= sep - tok + 1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 603
 604			end = sep - 1;
 605			while (end > tok && isspace(*end))
 606				--end;
 607
 608			toklen = end - tok + 1;
 609		}
 610
 611		while (isspace(*tok))
 612			++tok;
 613
 614		if (isalpha(*tok) || *tok == '_') {
 615			if (!strarray__strtoul(sa, tok, toklen, &val))
 616				return false;
 617		} else {
 618			bool is_hexa = tok[0] == 0 && (tok[1] = 'x' || tok[1] == 'X');
 619
 620			val = strtoul(tok, NULL, is_hexa ? 16 : 0);
 621		}
 
 
 
 622
 623		*ret |= (1 << (val - 1));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 624
 625		if (sep == NULL)
 626			break;
 627		tok = sep + 1;
 628	}
 629
 630	return true;
 631}
 632
 633bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret)
 
 
 
 634{
 635	int i;
 636
 637	for (i = 0; i < sas->nr_entries; ++i) {
 638		struct strarray *sa = sas->entries[i];
 639
 640		if (strarray__strtoul(sa, bf, size, ret))
 641			return true;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 642	}
 643
 644	return false;
 645}
 646
 647size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
 648					struct syscall_arg *arg)
 649{
 650	return strarrays__scnprintf(arg->parm, bf, size, "%d", arg->show_string_prefix, arg->val);
 651}
 652
 653#ifndef AT_FDCWD
 654#define AT_FDCWD	-100
 655#endif
 656
 657static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
 658					   struct syscall_arg *arg)
 659{
 660	int fd = arg->val;
 661	const char *prefix = "AT_FD";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 662
 663	if (fd == AT_FDCWD)
 664		return scnprintf(bf, size, "%s%s", arg->show_string_prefix ? prefix : "", "CWD");
 665
 666	return syscall_arg__scnprintf_fd(bf, size, arg);
 667}
 668
 669#define SCA_FDAT syscall_arg__scnprintf_fd_at
 670
 671static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
 672					      struct syscall_arg *arg);
 673
 674#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
 675
 676size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
 677{
 678	return scnprintf(bf, size, "%#lx", arg->val);
 679}
 
 
 
 
 
 
 
 
 
 680
 681size_t syscall_arg__scnprintf_ptr(char *bf, size_t size, struct syscall_arg *arg)
 682{
 683	if (arg->val == 0)
 684		return scnprintf(bf, size, "NULL");
 685	return syscall_arg__scnprintf_hex(bf, size, arg);
 686}
 
 
 
 
 
 
 
 
 
 
 
 687
 688size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
 689{
 690	return scnprintf(bf, size, "%d", arg->val);
 691}
 692
 693size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
 694{
 695	return scnprintf(bf, size, "%ld", arg->val);
 696}
 697
 698static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct syscall_arg *arg)
 699{
 700	// XXX Hey, maybe for sched:sched_switch prev/next comm fields we can
 701	//     fill missing comms using thread__set_comm()...
 702	//     here or in a special syscall_arg__scnprintf_pid_sched_tp...
 703	return scnprintf(bf, size, "\"%-.*s\"", arg->fmt->nr_entries ?: arg->len, arg->val);
 704}
 705
 706#define SCA_CHAR_ARRAY syscall_arg__scnprintf_char_array
 707
 708static const char *bpf_cmd[] = {
 709	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
 710	"MAP_GET_NEXT_KEY", "PROG_LOAD",
 711};
 712static DEFINE_STRARRAY(bpf_cmd, "BPF_");
 713
 714static const char *fsmount_flags[] = {
 715	[1] = "CLOEXEC",
 716};
 717static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");
 718
 719#include "trace/beauty/generated/fsconfig_arrays.c"
 720
 721static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_");
 722
 723static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
 724static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1);
 725
 726static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
 727static DEFINE_STRARRAY(itimers, "ITIMER_");
 728
 729static const char *keyctl_options[] = {
 730	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
 731	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
 732	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
 733	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
 734	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
 735};
 736static DEFINE_STRARRAY(keyctl_options, "KEYCTL_");
 737
 738static const char *whences[] = { "SET", "CUR", "END",
 739#ifdef SEEK_DATA
 740"DATA",
 741#endif
 742#ifdef SEEK_HOLE
 743"HOLE",
 744#endif
 745};
 746static DEFINE_STRARRAY(whences, "SEEK_");
 747
 748static const char *fcntl_cmds[] = {
 749	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
 750	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
 751	"SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
 752	"GETOWNER_UIDS",
 753};
 754static DEFINE_STRARRAY(fcntl_cmds, "F_");
 755
 756static const char *fcntl_linux_specific_cmds[] = {
 757	"SETLEASE", "GETLEASE", "NOTIFY", [5] =	"CANCELLK", "DUPFD_CLOEXEC",
 758	"SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
 759	"GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
 760};
 761
 762static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, "F_", F_LINUX_SPECIFIC_BASE);
 763
 764static struct strarray *fcntl_cmds_arrays[] = {
 765	&strarray__fcntl_cmds,
 766	&strarray__fcntl_linux_specific_cmds,
 767};
 768
 769static DEFINE_STRARRAYS(fcntl_cmds_arrays);
 770
 771static const char *rlimit_resources[] = {
 772	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
 773	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
 774	"RTTIME",
 775};
 776static DEFINE_STRARRAY(rlimit_resources, "RLIMIT_");
 777
 778static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
 779static DEFINE_STRARRAY(sighow, "SIG_");
 780
 781static const char *clockid[] = {
 782	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
 783	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
 784	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
 785};
 786static DEFINE_STRARRAY(clockid, "CLOCK_");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 787
 788static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 789						 struct syscall_arg *arg)
 790{
 791	bool show_prefix = arg->show_string_prefix;
 792	const char *suffix = "_OK";
 793	size_t printed = 0;
 794	int mode = arg->val;
 795
 796	if (mode == F_OK) /* 0 */
 797		return scnprintf(bf, size, "F%s", show_prefix ? suffix : "");
 798#define	P_MODE(n) \
 799	if (mode & n##_OK) { \
 800		printed += scnprintf(bf + printed, size - printed, "%s%s", #n, show_prefix ? suffix : ""); \
 801		mode &= ~n##_OK; \
 802	}
 803
 804	P_MODE(R);
 805	P_MODE(W);
 806	P_MODE(X);
 807#undef P_MODE
 808
 809	if (mode)
 810		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
 811
 812	return printed;
 813}
 814
 815#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
 816
 817static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
 818					      struct syscall_arg *arg);
 819
 820#define SCA_FILENAME syscall_arg__scnprintf_filename
 821
 822static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 823						struct syscall_arg *arg)
 824{
 825	bool show_prefix = arg->show_string_prefix;
 826	const char *prefix = "O_";
 827	int printed = 0, flags = arg->val;
 828
 
 
 
 
 
 829#define	P_FLAG(n) \
 830	if (flags & O_##n) { \
 831		printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
 832		flags &= ~O_##n; \
 833	}
 834
 
 
 835	P_FLAG(CLOEXEC);
 
 
 
 
 
 
 
 
 836	P_FLAG(NONBLOCK);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 837#undef P_FLAG
 838
 839	if (flags)
 840		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 841
 842	return printed;
 843}
 844
 845#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 846
 847#ifndef GRND_NONBLOCK
 848#define GRND_NONBLOCK	0x0001
 849#endif
 850#ifndef GRND_RANDOM
 851#define GRND_RANDOM	0x0002
 852#endif
 853
 854static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
 855						   struct syscall_arg *arg)
 856{
 857	bool show_prefix = arg->show_string_prefix;
 858	const char *prefix = "GRND_";
 859	int printed = 0, flags = arg->val;
 860
 861#define	P_FLAG(n) \
 862	if (flags & GRND_##n) { \
 863		printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
 864		flags &= ~GRND_##n; \
 865	}
 866
 867	P_FLAG(RANDOM);
 868	P_FLAG(NONBLOCK);
 869#undef P_FLAG
 870
 871	if (flags)
 872		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 873
 874	return printed;
 875}
 876
 877#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 878
 879#define STRARRAY(name, array) \
 880	  { .scnprintf	= SCA_STRARRAY, \
 881	    .strtoul	= STUL_STRARRAY, \
 882	    .parm	= &strarray__##array, }
 883
 884#define STRARRAY_FLAGS(name, array) \
 885	  { .scnprintf	= SCA_STRARRAY_FLAGS, \
 886	    .strtoul	= STUL_STRARRAY_FLAGS, \
 887	    .parm	= &strarray__##array, }
 888
 889#include "trace/beauty/arch_errno_names.c"
 890#include "trace/beauty/eventfd.c"
 891#include "trace/beauty/futex_op.c"
 892#include "trace/beauty/futex_val3.c"
 893#include "trace/beauty/mmap.c"
 894#include "trace/beauty/mode_t.c"
 895#include "trace/beauty/msg_flags.c"
 896#include "trace/beauty/open_flags.c"
 897#include "trace/beauty/perf_event_open.c"
 898#include "trace/beauty/pid.c"
 899#include "trace/beauty/sched_policy.c"
 900#include "trace/beauty/seccomp.c"
 901#include "trace/beauty/signum.c"
 902#include "trace/beauty/socket_type.c"
 903#include "trace/beauty/waitid_options.c"
 904
 905static struct syscall_fmt syscall_fmts[] = {
 906	{ .name	    = "access",
 907	  .arg = { [1] = { .scnprintf = SCA_ACCMODE,  /* mode */ }, }, },
 908	{ .name	    = "arch_prctl",
 909	  .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, /* code */ },
 910		   [1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, },
 911	{ .name	    = "bind",
 912	  .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
 913		   [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ },
 914		   [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
 915	{ .name	    = "bpf",
 916	  .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
 917	{ .name	    = "brk",	    .hexret = true,
 918	  .arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, },
 919	{ .name     = "clock_gettime",
 920	  .arg = { [0] = STRARRAY(clk_id, clockid), }, },
 921	{ .name	    = "clone",	    .errpid = true, .nr_args = 5,
 922	  .arg = { [0] = { .name = "flags",	    .scnprintf = SCA_CLONE_FLAGS, },
 923		   [1] = { .name = "child_stack",   .scnprintf = SCA_HEX, },
 924		   [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
 925		   [3] = { .name = "child_tidptr",  .scnprintf = SCA_HEX, },
 926		   [4] = { .name = "tls",	    .scnprintf = SCA_HEX, }, }, },
 927	{ .name	    = "close",
 928	  .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
 929	{ .name	    = "connect",
 930	  .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
 931		   [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ },
 932		   [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
 933	{ .name	    = "epoll_ctl",
 934	  .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
 935	{ .name	    = "eventfd2",
 936	  .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
 937	{ .name	    = "fchmodat",
 938	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
 939	{ .name	    = "fchownat",
 940	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
 941	{ .name	    = "fcntl",
 942	  .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD,  /* cmd */
 943			   .strtoul   = STUL_STRARRAYS,
 944			   .parm      = &strarrays__fcntl_cmds_arrays,
 945			   .show_zero = true, },
 946		   [2] = { .scnprintf =  SCA_FCNTL_ARG, /* arg */ }, }, },
 947	{ .name	    = "flock",
 948	  .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
 949	{ .name     = "fsconfig",
 950	  .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, },
 951	{ .name     = "fsmount",
 952	  .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags),
 953		   [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, },
 954	{ .name     = "fspick",
 955	  .arg = { [0] = { .scnprintf = SCA_FDAT,	  /* dfd */ },
 956		   [1] = { .scnprintf = SCA_FILENAME,	  /* path */ },
 957		   [2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, },
 958	{ .name	    = "fstat", .alias = "newfstat", },
 959	{ .name	    = "fstatat", .alias = "newfstatat", },
 960	{ .name	    = "futex",
 961	  .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ },
 962		   [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, },
 963	{ .name	    = "futimesat",
 964	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
 965	{ .name	    = "getitimer",
 966	  .arg = { [0] = STRARRAY(which, itimers), }, },
 967	{ .name	    = "getpid",	    .errpid = true, },
 968	{ .name	    = "getpgid",    .errpid = true, },
 969	{ .name	    = "getppid",    .errpid = true, },
 970	{ .name	    = "getrandom",
 971	  .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
 972	{ .name	    = "getrlimit",
 973	  .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
 974	{ .name	    = "gettid",	    .errpid = true, },
 975	{ .name	    = "ioctl",
 976	  .arg = {
 
 
 
 
 977#if defined(__i386__) || defined(__x86_64__)
 978/*
 979 * FIXME: Make this available to all arches.
 980 */
 981		   [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
 982		   [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
 
 983#else
 984		   [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
 985#endif
 986	{ .name	    = "kcmp",	    .nr_args = 5,
 987	  .arg = { [0] = { .name = "pid1",	.scnprintf = SCA_PID, },
 988		   [1] = { .name = "pid2",	.scnprintf = SCA_PID, },
 989		   [2] = { .name = "type",	.scnprintf = SCA_KCMP_TYPE, },
 990		   [3] = { .name = "idx1",	.scnprintf = SCA_KCMP_IDX, },
 991		   [4] = { .name = "idx2",	.scnprintf = SCA_KCMP_IDX, }, }, },
 992	{ .name	    = "keyctl",
 993	  .arg = { [0] = STRARRAY(option, keyctl_options), }, },
 994	{ .name	    = "kill",
 995	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
 996	{ .name	    = "linkat",
 997	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
 998	{ .name	    = "lseek",
 999	  .arg = { [2] = STRARRAY(whence, whences), }, },
1000	{ .name	    = "lstat", .alias = "newlstat", },
1001	{ .name     = "madvise",
1002	  .arg = { [0] = { .scnprintf = SCA_HEX,      /* start */ },
1003		   [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
1004	{ .name	    = "mkdirat",
1005	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1006	{ .name	    = "mknodat",
1007	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1008	{ .name	    = "mmap",	    .hexret = true,
1009/* The standard mmap maps to old_mmap on s390x */
1010#if defined(__s390x__)
1011	.alias = "old_mmap",
1012#endif
1013	  .arg = { [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ },
1014		   [3] = { .scnprintf = SCA_MMAP_FLAGS,	/* flags */
1015			   .strtoul   = STUL_STRARRAY_FLAGS,
1016			   .parm      = &strarray__mmap_flags, },
1017		   [5] = { .scnprintf = SCA_HEX,	/* offset */ }, }, },
1018	{ .name	    = "mount",
1019	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
1020		   [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
1021			   .mask_val  = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
1022	{ .name	    = "move_mount",
1023	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* from_dfd */ },
1024		   [1] = { .scnprintf = SCA_FILENAME, /* from_pathname */ },
1025		   [2] = { .scnprintf = SCA_FDAT,	/* to_dfd */ },
1026		   [3] = { .scnprintf = SCA_FILENAME, /* to_pathname */ },
1027		   [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, },
1028	{ .name	    = "mprotect",
1029	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* start */ },
1030		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ }, }, },
1031	{ .name	    = "mq_unlink",
1032	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
1033	{ .name	    = "mremap",	    .hexret = true,
1034	  .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, }, },
1035	{ .name	    = "name_to_handle_at",
1036	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1037	{ .name	    = "newfstatat",
1038	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1039	{ .name	    = "open",
1040	  .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1041	{ .name	    = "open_by_handle_at",
1042	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* dfd */ },
1043		   [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1044	{ .name	    = "openat",
1045	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* dfd */ },
1046		   [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1047	{ .name	    = "perf_event_open",
1048	  .arg = { [2] = { .scnprintf = SCA_INT,	/* cpu */ },
1049		   [3] = { .scnprintf = SCA_FD,		/* group_fd */ },
1050		   [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
1051	{ .name	    = "pipe2",
1052	  .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
1053	{ .name	    = "pkey_alloc",
1054	  .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS,	/* access_rights */ }, }, },
1055	{ .name	    = "pkey_free",
1056	  .arg = { [0] = { .scnprintf = SCA_INT,	/* key */ }, }, },
1057	{ .name	    = "pkey_mprotect",
1058	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* start */ },
1059		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ },
1060		   [3] = { .scnprintf = SCA_INT,	/* pkey */ }, }, },
1061	{ .name	    = "poll", .timeout = true, },
1062	{ .name	    = "ppoll", .timeout = true, },
1063	{ .name	    = "prctl",
1064	  .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */
1065			   .strtoul   = STUL_STRARRAY,
1066			   .parm      = &strarray__prctl_options, },
1067		   [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
1068		   [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
1069	{ .name	    = "pread", .alias = "pread64", },
1070	{ .name	    = "preadv", .alias = "pread", },
1071	{ .name	    = "prlimit64",
1072	  .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
1073	{ .name	    = "pwrite", .alias = "pwrite64", },
1074	{ .name	    = "readlinkat",
1075	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1076	{ .name	    = "recvfrom",
1077	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1078	{ .name	    = "recvmmsg",
1079	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1080	{ .name	    = "recvmsg",
1081	  .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1082	{ .name	    = "renameat",
1083	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
1084		   [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, }, },
1085	{ .name	    = "renameat2",
1086	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
1087		   [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ },
1088		   [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, },
1089	{ .name	    = "rt_sigaction",
1090	  .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1091	{ .name	    = "rt_sigprocmask",
1092	  .arg = { [0] = STRARRAY(how, sighow), }, },
1093	{ .name	    = "rt_sigqueueinfo",
1094	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1095	{ .name	    = "rt_tgsigqueueinfo",
1096	  .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1097	{ .name	    = "sched_setscheduler",
1098	  .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
1099	{ .name	    = "seccomp",
1100	  .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP,	   /* op */ },
1101		   [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
1102	{ .name	    = "select", .timeout = true, },
1103	{ .name	    = "sendfile", .alias = "sendfile64", },
1104	{ .name	    = "sendmmsg",
1105	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1106	{ .name	    = "sendmsg",
1107	  .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1108	{ .name	    = "sendto",
1109	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ },
1110		   [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, },
1111	{ .name	    = "set_tid_address", .errpid = true, },
1112	{ .name	    = "setitimer",
1113	  .arg = { [0] = STRARRAY(which, itimers), }, },
1114	{ .name	    = "setrlimit",
1115	  .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1116	{ .name	    = "socket",
1117	  .arg = { [0] = STRARRAY(family, socket_families),
1118		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
1119		   [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
1120	{ .name	    = "socketpair",
1121	  .arg = { [0] = STRARRAY(family, socket_families),
1122		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
1123		   [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
1124	{ .name	    = "stat", .alias = "newstat", },
1125	{ .name	    = "statx",
1126	  .arg = { [0] = { .scnprintf = SCA_FDAT,	 /* fdat */ },
1127		   [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
1128		   [3] = { .scnprintf = SCA_STATX_MASK,	 /* mask */ }, }, },
1129	{ .name	    = "swapoff",
1130	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1131	{ .name	    = "swapon",
1132	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1133	{ .name	    = "symlinkat",
1134	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1135	{ .name	    = "sync_file_range",
1136	  .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, /* flags */ }, }, },
1137	{ .name	    = "tgkill",
1138	  .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1139	{ .name	    = "tkill",
1140	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1141	{ .name     = "umount2", .alias = "umount",
1142	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, },
1143	{ .name	    = "uname", .alias = "newuname", },
1144	{ .name	    = "unlinkat",
1145	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1146	{ .name	    = "utimensat",
1147	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
1148	{ .name	    = "wait4",	    .errpid = true,
1149	  .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
1150	{ .name	    = "waitid",	    .errpid = true,
1151	  .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
1152};
1153
1154static int syscall_fmt__cmp(const void *name, const void *fmtp)
1155{
1156	const struct syscall_fmt *fmt = fmtp;
1157	return strcmp(name, fmt->name);
1158}
1159
1160static struct syscall_fmt *__syscall_fmt__find(struct syscall_fmt *fmts, const int nmemb, const char *name)
1161{
1162	return bsearch(name, fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1163}
1164
1165static struct syscall_fmt *syscall_fmt__find(const char *name)
1166{
1167	const int nmemb = ARRAY_SIZE(syscall_fmts);
1168	return __syscall_fmt__find(syscall_fmts, nmemb, name);
1169}
1170
1171static struct syscall_fmt *__syscall_fmt__find_by_alias(struct syscall_fmt *fmts, const int nmemb, const char *alias)
1172{
1173	int i;
1174
1175	for (i = 0; i < nmemb; ++i) {
1176		if (fmts[i].alias && strcmp(fmts[i].alias, alias) == 0)
1177			return &fmts[i];
1178	}
1179
1180	return NULL;
1181}
1182
1183static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
1184{
1185	const int nmemb = ARRAY_SIZE(syscall_fmts);
1186	return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias);
1187}
1188
1189/*
1190 * is_exit: is this "exit" or "exit_group"?
1191 * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
1192 * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
1193 * nonexistent: Just a hole in the syscall table, syscall id not allocated
1194 */
1195struct syscall {
1196	struct tep_event    *tp_format;
1197	int		    nr_args;
1198	int		    args_size;
1199	struct {
1200		struct bpf_program *sys_enter,
1201				   *sys_exit;
1202	}		    bpf_prog;
1203	bool		    is_exit;
1204	bool		    is_open;
1205	bool		    nonexistent;
1206	struct tep_format_field *args;
1207	const char	    *name;
 
1208	struct syscall_fmt  *fmt;
1209	struct syscall_arg_fmt *arg_fmt;
1210};
1211
1212/*
1213 * Must match what is in the BPF program:
1214 *
1215 * tools/perf/examples/bpf/augmented_raw_syscalls.c
1216 */
1217struct bpf_map_syscall_entry {
1218	bool	enabled;
1219	u16	string_args_len[6];
1220};
1221
1222/*
1223 * We need to have this 'calculated' boolean because in some cases we really
1224 * don't know what is the duration of a syscall, for instance, when we start
1225 * a session and some threads are waiting for a syscall to finish, say 'poll',
1226 * in which case all we can do is to print "( ? ) for duration and for the
1227 * start timestamp.
1228 */
1229static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
1230{
1231	double duration = (double)t / NSEC_PER_MSEC;
1232	size_t printed = fprintf(fp, "(");
1233
1234	if (!calculated)
1235		printed += fprintf(fp, "         ");
1236	else if (duration >= 1.0)
1237		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1238	else if (duration >= 0.01)
1239		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1240	else
1241		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1242	return printed + fprintf(fp, "): ");
1243}
1244
1245/**
1246 * filename.ptr: The filename char pointer that will be vfs_getname'd
1247 * filename.entry_str_pos: Where to insert the string translated from
1248 *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1249 * ret_scnprintf: syscall args may set this to a different syscall return
1250 *                formatter, for instance, fcntl may return fds, file flags, etc.
1251 */
1252struct thread_trace {
1253	u64		  entry_time;
 
1254	bool		  entry_pending;
1255	unsigned long	  nr_events;
1256	unsigned long	  pfmaj, pfmin;
1257	char		  *entry_str;
1258	double		  runtime_ms;
1259	size_t		  (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1260        struct {
1261		unsigned long ptr;
1262		short int     entry_str_pos;
1263		bool	      pending_open;
1264		unsigned int  namelen;
1265		char	      *name;
1266	} filename;
1267	struct {
1268		int	      max;
1269		struct file   *table;
1270	} files;
1271
1272	struct intlist *syscall_stats;
1273};
1274
1275static struct thread_trace *thread_trace__new(void)
1276{
1277	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1278
1279	if (ttrace) {
1280		ttrace->files.max = -1;
1281		ttrace->syscall_stats = intlist__new(NULL);
1282	}
1283
1284	return ttrace;
1285}
1286
1287static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1288{
1289	struct thread_trace *ttrace;
1290
1291	if (thread == NULL)
1292		goto fail;
1293
1294	if (thread__priv(thread) == NULL)
1295		thread__set_priv(thread, thread_trace__new());
1296
1297	if (thread__priv(thread) == NULL)
1298		goto fail;
1299
1300	ttrace = thread__priv(thread);
1301	++ttrace->nr_events;
1302
1303	return ttrace;
1304fail:
1305	color_fprintf(fp, PERF_COLOR_RED,
1306		      "WARNING: not enough memory, dropping samples!\n");
1307	return NULL;
1308}
1309
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1310
1311void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
1312				    size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
1313{
1314	struct thread_trace *ttrace = thread__priv(arg->thread);
1315
1316	ttrace->ret_scnprintf = ret_scnprintf;
1317}
1318
1319#define TRACE_PFMAJ		(1 << 0)
1320#define TRACE_PFMIN		(1 << 1)
1321
1322static const size_t trace__entry_str_size = 2048;
1323
1324static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
1325{
1326	if (fd < 0)
1327		return NULL;
1328
1329	if (fd > ttrace->files.max) {
1330		struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
1331
1332		if (nfiles == NULL)
1333			return NULL;
1334
1335		if (ttrace->files.max != -1) {
1336			memset(nfiles + ttrace->files.max + 1, 0,
1337			       (fd - ttrace->files.max) * sizeof(struct file));
1338		} else {
1339			memset(nfiles, 0, (fd + 1) * sizeof(struct file));
1340		}
1341
1342		ttrace->files.table = nfiles;
1343		ttrace->files.max   = fd;
1344	}
1345
1346	return ttrace->files.table + fd;
1347}
1348
1349struct file *thread__files_entry(struct thread *thread, int fd)
1350{
1351	return thread_trace__files_entry(thread__priv(thread), fd);
1352}
1353
1354static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1355{
1356	struct thread_trace *ttrace = thread__priv(thread);
1357	struct file *file = thread_trace__files_entry(ttrace, fd);
1358
1359	if (file != NULL) {
1360		struct stat st;
1361		if (stat(pathname, &st) == 0)
1362			file->dev_maj = major(st.st_rdev);
1363		file->pathname = strdup(pathname);
1364		if (file->pathname)
1365			return 0;
1366	}
1367
1368	return -1;
1369}
1370
1371static int thread__read_fd_path(struct thread *thread, int fd)
1372{
1373	char linkname[PATH_MAX], pathname[PATH_MAX];
1374	struct stat st;
1375	int ret;
1376
1377	if (thread->pid_ == thread->tid) {
1378		scnprintf(linkname, sizeof(linkname),
1379			  "/proc/%d/fd/%d", thread->pid_, fd);
1380	} else {
1381		scnprintf(linkname, sizeof(linkname),
1382			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1383	}
1384
1385	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1386		return -1;
1387
1388	ret = readlink(linkname, pathname, sizeof(pathname));
1389
1390	if (ret < 0 || ret > st.st_size)
1391		return -1;
1392
1393	pathname[ret] = '\0';
1394	return trace__set_fd_pathname(thread, fd, pathname);
1395}
1396
1397static const char *thread__fd_path(struct thread *thread, int fd,
1398				   struct trace *trace)
1399{
1400	struct thread_trace *ttrace = thread__priv(thread);
1401
1402	if (ttrace == NULL || trace->fd_path_disabled)
1403		return NULL;
1404
1405	if (fd < 0)
1406		return NULL;
1407
1408	if ((fd > ttrace->files.max || ttrace->files.table[fd].pathname == NULL)) {
1409		if (!trace->live)
1410			return NULL;
1411		++trace->stats.proc_getname;
1412		if (thread__read_fd_path(thread, fd))
1413			return NULL;
1414	}
1415
1416	return ttrace->files.table[fd].pathname;
1417}
1418
1419size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
 
1420{
1421	int fd = arg->val;
1422	size_t printed = scnprintf(bf, size, "%d", fd);
1423	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1424
1425	if (path)
1426		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1427
1428	return printed;
1429}
1430
1431size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1432{
1433        size_t printed = scnprintf(bf, size, "%d", fd);
1434	struct thread *thread = machine__find_thread(trace->host, pid, pid);
1435
1436	if (thread) {
1437		const char *path = thread__fd_path(thread, fd, trace);
1438
1439		if (path)
1440			printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1441
1442		thread__put(thread);
1443	}
1444
1445        return printed;
1446}
1447
1448static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1449					      struct syscall_arg *arg)
1450{
1451	int fd = arg->val;
1452	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1453	struct thread_trace *ttrace = thread__priv(arg->thread);
1454
1455	if (ttrace && fd >= 0 && fd <= ttrace->files.max)
1456		zfree(&ttrace->files.table[fd].pathname);
1457
1458	return printed;
1459}
1460
1461static void thread__set_filename_pos(struct thread *thread, const char *bf,
1462				     unsigned long ptr)
1463{
1464	struct thread_trace *ttrace = thread__priv(thread);
1465
1466	ttrace->filename.ptr = ptr;
1467	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1468}
1469
1470static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
1471{
1472	struct augmented_arg *augmented_arg = arg->augmented.args;
1473	size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value);
1474	/*
1475	 * So that the next arg with a payload can consume its augmented arg, i.e. for rename* syscalls
1476	 * we would have two strings, each prefixed by its size.
1477	 */
1478	int consumed = sizeof(*augmented_arg) + augmented_arg->size;
1479
1480	arg->augmented.args = ((void *)arg->augmented.args) + consumed;
1481	arg->augmented.size -= consumed;
1482
1483	return printed;
1484}
1485
1486static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1487					      struct syscall_arg *arg)
1488{
1489	unsigned long ptr = arg->val;
1490
1491	if (arg->augmented.args)
1492		return syscall_arg__scnprintf_augmented_string(arg, bf, size);
1493
1494	if (!arg->trace->vfs_getname)
1495		return scnprintf(bf, size, "%#x", ptr);
1496
1497	thread__set_filename_pos(arg->thread, bf, ptr);
1498	return 0;
1499}
1500
1501static bool trace__filter_duration(struct trace *trace, double t)
1502{
1503	return t < (trace->duration_filter * NSEC_PER_MSEC);
1504}
1505
1506static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1507{
1508	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1509
1510	return fprintf(fp, "%10.3f ", ts);
1511}
1512
1513/*
1514 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1515 * using ttrace->entry_time for a thread that receives a sys_exit without
1516 * first having received a sys_enter ("poll" issued before tracing session
1517 * starts, lost sys_enter exit due to ring buffer overflow).
1518 */
1519static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1520{
1521	if (tstamp > 0)
1522		return __trace__fprintf_tstamp(trace, tstamp, fp);
1523
1524	return fprintf(fp, "         ? ");
1525}
1526
1527static bool done = false;
1528static bool interrupted = false;
1529
1530static void sig_handler(int sig)
1531{
1532	done = true;
1533	interrupted = sig == SIGINT;
1534}
1535
1536static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
 
1537{
1538	size_t printed = 0;
 
1539
1540	if (trace->multiple_threads) {
1541		if (trace->show_comm)
1542			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1543		printed += fprintf(fp, "%d ", thread->tid);
1544	}
1545
1546	return printed;
1547}
1548
1549static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1550					u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1551{
1552	size_t printed = 0;
1553
1554	if (trace->show_tstamp)
1555		printed = trace__fprintf_tstamp(trace, tstamp, fp);
1556	if (trace->show_duration)
1557		printed += fprintf_duration(duration, duration_calculated, fp);
1558	return printed + trace__fprintf_comm_tid(trace, thread, fp);
1559}
1560
1561static int trace__process_event(struct trace *trace, struct machine *machine,
1562				union perf_event *event, struct perf_sample *sample)
1563{
1564	int ret = 0;
1565
1566	switch (event->header.type) {
1567	case PERF_RECORD_LOST:
1568		color_fprintf(trace->output, PERF_COLOR_RED,
1569			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1570		ret = machine__process_lost_event(machine, event, sample);
1571		break;
1572	default:
1573		ret = machine__process_event(machine, event, sample);
1574		break;
1575	}
1576
1577	return ret;
1578}
1579
1580static int trace__tool_process(struct perf_tool *tool,
1581			       union perf_event *event,
1582			       struct perf_sample *sample,
1583			       struct machine *machine)
1584{
1585	struct trace *trace = container_of(tool, struct trace, tool);
1586	return trace__process_event(trace, machine, event, sample);
1587}
1588
1589static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1590{
1591	struct machine *machine = vmachine;
1592
1593	if (machine->kptr_restrict_warned)
1594		return NULL;
1595
1596	if (symbol_conf.kptr_restrict) {
1597		pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1598			   "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1599			   "Kernel samples will not be resolved.\n");
1600		machine->kptr_restrict_warned = true;
1601		return NULL;
1602	}
1603
1604	return machine__resolve_kernel_addr(vmachine, addrp, modp);
1605}
1606
1607static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
1608{
1609	int err = symbol__init(NULL);
1610
1611	if (err)
1612		return err;
1613
1614	trace->host = machine__new_host();
1615	if (trace->host == NULL)
1616		return -ENOMEM;
1617
1618	err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1619	if (err < 0)
1620		goto out;
1621
1622	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1623					    evlist->core.threads, trace__tool_process, false,
1624					    1);
1625out:
1626	if (err)
1627		symbol__exit();
1628
1629	return err;
1630}
1631
1632static void trace__symbols__exit(struct trace *trace)
1633{
1634	machine__exit(trace->host);
1635	trace->host = NULL;
1636
1637	symbol__exit();
1638}
1639
1640static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
1641{
1642	int idx;
 
1643
1644	if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1645		nr_args = sc->fmt->nr_args;
1646
1647	sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
1648	if (sc->arg_fmt == NULL)
1649		return -1;
1650
1651	for (idx = 0; idx < nr_args; ++idx) {
1652		if (sc->fmt)
1653			sc->arg_fmt[idx] = sc->fmt->arg[idx];
1654	}
1655
1656	sc->nr_args = nr_args;
1657	return 0;
1658}
1659
1660static struct syscall_arg_fmt syscall_arg_fmts__by_name[] = {
1661	{ .name = "msr",	.scnprintf = SCA_X86_MSR,	  .strtoul = STUL_X86_MSR,	   },
1662	{ .name = "vector",	.scnprintf = SCA_X86_IRQ_VECTORS, .strtoul = STUL_X86_IRQ_VECTORS, },
1663};
1664
1665static int syscall_arg_fmt__cmp(const void *name, const void *fmtp)
1666{
1667       const struct syscall_arg_fmt *fmt = fmtp;
1668       return strcmp(name, fmt->name);
1669}
1670
1671static struct syscall_arg_fmt *
1672__syscall_arg_fmt__find_by_name(struct syscall_arg_fmt *fmts, const int nmemb, const char *name)
1673{
1674       return bsearch(name, fmts, nmemb, sizeof(struct syscall_arg_fmt), syscall_arg_fmt__cmp);
1675}
1676
1677static struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name)
1678{
1679       const int nmemb = ARRAY_SIZE(syscall_arg_fmts__by_name);
1680       return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
1681}
1682
1683static struct tep_format_field *
1684syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field)
1685{
1686	struct tep_format_field *last_field = NULL;
1687	int len;
1688
1689	for (; field; field = field->next, ++arg) {
1690		last_field = field;
1691
1692		if (arg->scnprintf)
1693			continue;
1694
1695		len = strlen(field->name);
1696
1697		if (strcmp(field->type, "const char *") == 0 &&
1698		    ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
1699		     strstr(field->name, "path") != NULL))
1700			arg->scnprintf = SCA_FILENAME;
1701		else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
1702			arg->scnprintf = SCA_PTR;
1703		else if (strcmp(field->type, "pid_t") == 0)
1704			arg->scnprintf = SCA_PID;
1705		else if (strcmp(field->type, "umode_t") == 0)
1706			arg->scnprintf = SCA_MODE_T;
1707		else if ((field->flags & TEP_FIELD_IS_ARRAY) && strstr(field->type, "char")) {
1708			arg->scnprintf = SCA_CHAR_ARRAY;
1709			arg->nr_entries = field->arraylen;
1710		} else if ((strcmp(field->type, "int") == 0 ||
1711			  strcmp(field->type, "unsigned int") == 0 ||
1712			  strcmp(field->type, "long") == 0) &&
1713			 len >= 2 && strcmp(field->name + len - 2, "fd") == 0) {
1714			/*
1715			 * /sys/kernel/tracing/events/syscalls/sys_enter*
1716			 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1717			 * 65 int
1718			 * 23 unsigned int
1719			 * 7 unsigned long
1720			 */
1721			arg->scnprintf = SCA_FD;
1722               } else {
1723			struct syscall_arg_fmt *fmt = syscall_arg_fmt__find_by_name(field->name);
1724
1725			if (fmt) {
1726				arg->scnprintf = fmt->scnprintf;
1727				arg->strtoul   = fmt->strtoul;
1728			}
1729		}
1730	}
1731
1732	return last_field;
1733}
1734
1735static int syscall__set_arg_fmts(struct syscall *sc)
1736{
1737	struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args);
1738
1739	if (last_field)
1740		sc->args_size = last_field->offset + last_field->size;
1741
1742	return 0;
1743}
1744
1745static int trace__read_syscall_info(struct trace *trace, int id)
1746{
1747	char tp_name[128];
1748	struct syscall *sc;
1749	const char *name = syscalltbl__name(trace->sctbl, id);
 
 
 
1750
1751#ifdef HAVE_SYSCALL_TABLE_SUPPORT
1752	if (trace->syscalls.table == NULL) {
1753		trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
1754		if (trace->syscalls.table == NULL)
1755			return -ENOMEM;
1756	}
1757#else
1758	if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) {
1759		// When using libaudit we don't know beforehand what is the max syscall id
1760		struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1761
1762		if (table == NULL)
1763			return -ENOMEM;
1764
1765		memset(table + trace->sctbl->syscalls.max_id, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc));
 
 
 
 
 
1766
1767		trace->syscalls.table	      = table;
1768		trace->sctbl->syscalls.max_id = id;
1769	}
1770#endif
1771	sc = trace->syscalls.table + id;
1772	if (sc->nonexistent)
1773		return 0;
 
 
1774
1775	if (name == NULL) {
1776		sc->nonexistent = true;
1777		return 0;
 
 
 
 
 
1778	}
1779
1780	sc->name = name;
1781	sc->fmt  = syscall_fmt__find(sc->name);
1782
1783	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1784	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1785
1786	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1787		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1788		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1789	}
1790
1791	if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1792		return -ENOMEM;
1793
1794	if (IS_ERR(sc->tp_format))
1795		return PTR_ERR(sc->tp_format);
1796
1797	sc->args = sc->tp_format->format.fields;
1798	/*
1799	 * We need to check and discard the first variable '__syscall_nr'
1800	 * or 'nr' that mean the syscall number. It is needless here.
1801	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1802	 */
1803	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1804		sc->args = sc->args->next;
1805		--sc->nr_args;
1806	}
1807
1808	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1809	sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
1810
1811	return syscall__set_arg_fmts(sc);
1812}
1813
1814static int evsel__init_tp_arg_scnprintf(struct evsel *evsel)
1815{
1816	struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
1817
1818	if (fmt != NULL) {
1819		syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields);
1820		return 0;
1821	}
1822
1823	return -ENOMEM;
1824}
1825
1826static int intcmp(const void *a, const void *b)
1827{
1828	const int *one = a, *another = b;
1829
1830	return *one - *another;
1831}
1832
1833static int trace__validate_ev_qualifier(struct trace *trace)
1834{
1835	int err = 0;
1836	bool printed_invalid_prefix = false;
1837	struct str_node *pos;
1838	size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier);
1839
1840	trace->ev_qualifier_ids.entries = malloc(nr_allocated *
1841						 sizeof(trace->ev_qualifier_ids.entries[0]));
1842
1843	if (trace->ev_qualifier_ids.entries == NULL) {
1844		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1845		       trace->output);
1846		err = -EINVAL;
1847		goto out;
1848	}
1849
1850	strlist__for_each_entry(pos, trace->ev_qualifier) {
1851		const char *sc = pos->s;
1852		int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
1853
1854		if (id < 0) {
1855			id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1856			if (id >= 0)
1857				goto matches;
1858
1859			if (!printed_invalid_prefix) {
1860				pr_debug("Skipping unknown syscalls: ");
1861				printed_invalid_prefix = true;
1862			} else {
1863				pr_debug(", ");
1864			}
1865
1866			pr_debug("%s", sc);
1867			continue;
1868		}
1869matches:
1870		trace->ev_qualifier_ids.entries[nr_used++] = id;
1871		if (match_next == -1)
1872			continue;
1873
1874		while (1) {
1875			id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1876			if (id < 0)
1877				break;
1878			if (nr_allocated == nr_used) {
1879				void *entries;
1880
1881				nr_allocated += 8;
1882				entries = realloc(trace->ev_qualifier_ids.entries,
1883						  nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1884				if (entries == NULL) {
1885					err = -ENOMEM;
1886					fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1887					goto out_free;
1888				}
1889				trace->ev_qualifier_ids.entries = entries;
1890			}
1891			trace->ev_qualifier_ids.entries[nr_used++] = id;
1892		}
1893	}
1894
1895	trace->ev_qualifier_ids.nr = nr_used;
1896	qsort(trace->ev_qualifier_ids.entries, nr_used, sizeof(int), intcmp);
1897out:
1898	if (printed_invalid_prefix)
1899		pr_debug("\n");
1900	return err;
1901out_free:
1902	zfree(&trace->ev_qualifier_ids.entries);
1903	trace->ev_qualifier_ids.nr = 0;
1904	goto out;
1905}
1906
1907static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id)
1908{
1909	bool in_ev_qualifier;
1910
1911	if (trace->ev_qualifier_ids.nr == 0)
1912		return true;
1913
1914	in_ev_qualifier = bsearch(&id, trace->ev_qualifier_ids.entries,
1915				  trace->ev_qualifier_ids.nr, sizeof(int), intcmp) != NULL;
1916
1917	if (in_ev_qualifier)
1918	       return !trace->not_ev_qualifier;
1919
1920	return trace->not_ev_qualifier;
1921}
1922
1923/*
1924 * args is to be interpreted as a series of longs but we need to handle
1925 * 8-byte unaligned accesses. args points to raw_data within the event
1926 * and raw_data is guaranteed to be 8-byte unaligned because it is
1927 * preceded by raw_size which is a u32. So we need to copy args to a temp
1928 * variable to read it. Most notably this avoids extended load instructions
1929 * on unaligned addresses
1930 */
1931unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
1932{
1933	unsigned long val;
1934	unsigned char *p = arg->args + sizeof(unsigned long) * idx;
1935
1936	memcpy(&val, p, sizeof(val));
1937	return val;
1938}
1939
1940static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1941				      struct syscall_arg *arg)
1942{
1943	if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1944		return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1945
1946	return scnprintf(bf, size, "arg%d: ", arg->idx);
1947}
1948
1949/*
1950 * Check if the value is in fact zero, i.e. mask whatever needs masking, such
1951 * as mount 'flags' argument that needs ignoring some magic flag, see comment
1952 * in tools/perf/trace/beauty/mount_flags.c
1953 */
1954static unsigned long syscall_arg_fmt__mask_val(struct syscall_arg_fmt *fmt, struct syscall_arg *arg, unsigned long val)
1955{
1956	if (fmt && fmt->mask_val)
1957		return fmt->mask_val(arg, val);
1958
1959	return val;
1960}
1961
1962static size_t syscall_arg_fmt__scnprintf_val(struct syscall_arg_fmt *fmt, char *bf, size_t size,
1963					     struct syscall_arg *arg, unsigned long val)
1964{
1965	if (fmt && fmt->scnprintf) {
1966		arg->val = val;
1967		if (fmt->parm)
1968			arg->parm = fmt->parm;
1969		return fmt->scnprintf(bf, size, arg);
1970	}
1971	return scnprintf(bf, size, "%ld", val);
1972}
1973
1974static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1975				      unsigned char *args, void *augmented_args, int augmented_args_size,
1976				      struct trace *trace, struct thread *thread)
1977{
1978	size_t printed = 0;
1979	unsigned long val;
1980	u8 bit = 1;
1981	struct syscall_arg arg = {
1982		.args	= args,
1983		.augmented = {
1984			.size = augmented_args_size,
1985			.args = augmented_args,
1986		},
1987		.idx	= 0,
1988		.mask	= 0,
1989		.trace  = trace,
1990		.thread = thread,
1991		.show_string_prefix = trace->show_string_prefix,
1992	};
1993	struct thread_trace *ttrace = thread__priv(thread);
1994
1995	/*
1996	 * Things like fcntl will set this in its 'cmd' formatter to pick the
1997	 * right formatter for the return value (an fd? file flags?), which is
1998	 * not needed for syscalls that always return a given type, say an fd.
1999	 */
2000	ttrace->ret_scnprintf = NULL;
2001
2002	if (sc->args != NULL) {
2003		struct tep_format_field *field;
2004
2005		for (field = sc->args; field;
2006		     field = field->next, ++arg.idx, bit <<= 1) {
2007			if (arg.mask & bit)
2008				continue;
2009
2010			arg.fmt = &sc->arg_fmt[arg.idx];
2011			val = syscall_arg__val(&arg, arg.idx);
2012			/*
2013			 * Some syscall args need some mask, most don't and
2014			 * return val untouched.
2015			 */
2016			val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val);
2017
2018			/*
2019 			 * Suppress this argument if its value is zero and
2020 			 * and we don't have a string associated in an
2021 			 * strarray for it.
2022 			 */
2023			if (val == 0 &&
2024			    !trace->show_zeros &&
2025			    !(sc->arg_fmt &&
2026			      (sc->arg_fmt[arg.idx].show_zero ||
2027			       sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
2028			       sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
2029			      sc->arg_fmt[arg.idx].parm))
2030				continue;
2031
2032			printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2033
2034			if (trace->show_arg_names)
2035				printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
 
 
 
 
 
 
 
 
 
 
 
2036
2037			printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx],
2038								  bf + printed, size - printed, &arg, val);
2039		}
2040	} else if (IS_ERR(sc->tp_format)) {
2041		/*
2042		 * If we managed to read the tracepoint /format file, then we
2043		 * may end up not having any args, like with gettid(), so only
2044		 * print the raw args when we didn't manage to read it.
2045		 */
2046		while (arg.idx < sc->nr_args) {
2047			if (arg.mask & bit)
2048				goto next_arg;
2049			val = syscall_arg__val(&arg, arg.idx);
2050			if (printed)
2051				printed += scnprintf(bf + printed, size - printed, ", ");
2052			printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
2053			printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], bf + printed, size - printed, &arg, val);
2054next_arg:
2055			++arg.idx;
2056			bit <<= 1;
2057		}
2058	}
2059
2060	return printed;
2061}
2062
2063typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel,
2064				  union perf_event *event,
2065				  struct perf_sample *sample);
2066
2067static struct syscall *trace__syscall_info(struct trace *trace,
2068					   struct evsel *evsel, int id)
2069{
2070	int err = 0;
2071
2072	if (id < 0) {
2073
2074		/*
2075		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
2076		 * before that, leaving at a higher verbosity level till that is
2077		 * explained. Reproduced with plain ftrace with:
2078		 *
2079		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
2080		 * grep "NR -1 " /t/trace_pipe
2081		 *
2082		 * After generating some load on the machine.
2083 		 */
2084		if (verbose > 1) {
2085			static u64 n;
2086			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
2087				id, evsel__name(evsel), ++n);
2088		}
2089		return NULL;
2090	}
2091
2092	err = -EINVAL;
2093
2094#ifdef HAVE_SYSCALL_TABLE_SUPPORT
2095	if (id > trace->sctbl->syscalls.max_id) {
2096#else
2097	if (id >= trace->sctbl->syscalls.max_id) {
2098		/*
2099		 * With libaudit we don't know beforehand what is the max_id,
2100		 * so we let trace__read_syscall_info() figure that out as we
2101		 * go on reading syscalls.
2102		 */
2103		err = trace__read_syscall_info(trace, id);
2104		if (err)
2105#endif
2106		goto out_cant_read;
2107	}
2108
2109	if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
2110	    (err = trace__read_syscall_info(trace, id)) != 0)
2111		goto out_cant_read;
2112
2113	if (trace->syscalls.table[id].name == NULL) {
2114		if (trace->syscalls.table[id].nonexistent)
2115			return NULL;
2116		goto out_cant_read;
2117	}
2118
2119	return &trace->syscalls.table[id];
2120
2121out_cant_read:
2122	if (verbose > 0) {
2123		char sbuf[STRERR_BUFSIZE];
2124		fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf)));
2125		if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL)
2126			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
2127		fputs(" information\n", trace->output);
2128	}
2129	return NULL;
2130}
2131
2132struct syscall_stats {
2133	struct stats stats;
2134	u64	     nr_failures;
2135	int	     max_errno;
2136	u32	     *errnos;
2137};
2138
2139static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace,
2140				 int id, struct perf_sample *sample, long err, bool errno_summary)
2141{
2142	struct int_node *inode;
2143	struct syscall_stats *stats;
2144	u64 duration = 0;
2145
2146	inode = intlist__findnew(ttrace->syscall_stats, id);
2147	if (inode == NULL)
2148		return;
2149
2150	stats = inode->priv;
2151	if (stats == NULL) {
2152		stats = malloc(sizeof(*stats));
2153		if (stats == NULL)
2154			return;
2155
2156		stats->nr_failures = 0;
2157		stats->max_errno   = 0;
2158		stats->errnos	   = NULL;
2159		init_stats(&stats->stats);
2160		inode->priv = stats;
2161	}
2162
2163	if (ttrace->entry_time && sample->time > ttrace->entry_time)
2164		duration = sample->time - ttrace->entry_time;
2165
2166	update_stats(&stats->stats, duration);
2167
2168	if (err < 0) {
2169		++stats->nr_failures;
2170
2171		if (!errno_summary)
2172			return;
2173
2174		err = -err;
2175		if (err > stats->max_errno) {
2176			u32 *new_errnos = realloc(stats->errnos, err * sizeof(u32));
2177
2178			if (new_errnos) {
2179				memset(new_errnos + stats->max_errno, 0, (err - stats->max_errno) * sizeof(u32));
2180			} else {
2181				pr_debug("Not enough memory for errno stats for thread \"%s\"(%d/%d), results will be incomplete\n",
2182					 thread__comm_str(thread), thread->pid_, thread->tid);
2183				return;
2184			}
2185
2186			stats->errnos = new_errnos;
2187			stats->max_errno = err;
2188		}
2189
2190		++stats->errnos[err - 1];
2191	}
2192}
2193
2194static int trace__printf_interrupted_entry(struct trace *trace)
2195{
2196	struct thread_trace *ttrace;
2197	size_t printed;
2198	int len;
2199
2200	if (trace->failure_only || trace->current == NULL)
2201		return 0;
2202
2203	ttrace = thread__priv(trace->current);
2204
2205	if (!ttrace->entry_pending)
2206		return 0;
2207
2208	printed  = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
2209	printed += len = fprintf(trace->output, "%s)", ttrace->entry_str);
2210
2211	if (len < trace->args_alignment - 4)
2212		printed += fprintf(trace->output, "%-*s", trace->args_alignment - 4 - len, " ");
2213
2214	printed += fprintf(trace->output, " ...\n");
2215
2216	ttrace->entry_pending = false;
2217	++trace->nr_events_printed;
2218
2219	return printed;
2220}
2221
2222static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
2223				 struct perf_sample *sample, struct thread *thread)
2224{
2225	int printed = 0;
2226
2227	if (trace->print_sample) {
2228		double ts = (double)sample->time / NSEC_PER_MSEC;
2229
2230		printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
2231				   evsel__name(evsel), ts,
2232				   thread__comm_str(thread),
2233				   sample->pid, sample->tid, sample->cpu);
2234	}
2235
2236	return printed;
2237}
2238
2239static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
2240{
2241	void *augmented_args = NULL;
2242	/*
2243	 * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
2244	 * and there we get all 6 syscall args plus the tracepoint common fields
2245	 * that gets calculated at the start and the syscall_nr (another long).
2246	 * So we check if that is the case and if so don't look after the
2247	 * sc->args_size but always after the full raw_syscalls:sys_enter payload,
2248	 * which is fixed.
2249	 *
2250	 * We'll revisit this later to pass s->args_size to the BPF augmenter
2251	 * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
2252	 * copies only what we need for each syscall, like what happens when we
2253	 * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace
2254	 * traffic to just what is needed for each syscall.
2255	 */
2256	int args_size = raw_augmented_args_size ?: sc->args_size;
2257
2258	*augmented_args_size = sample->raw_size - args_size;
2259	if (*augmented_args_size > 0)
2260		augmented_args = sample->raw_data + args_size;
2261
2262	return augmented_args;
2263}
2264
2265static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
2266			    union perf_event *event __maybe_unused,
2267			    struct perf_sample *sample)
2268{
2269	char *msg;
2270	void *args;
2271	int printed = 0;
2272	struct thread *thread;
2273	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2274	int augmented_args_size = 0;
2275	void *augmented_args = NULL;
2276	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2277	struct thread_trace *ttrace;
2278
2279	if (sc == NULL)
2280		return -1;
2281
 
 
 
2282	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2283	ttrace = thread__trace(thread, trace->output);
2284	if (ttrace == NULL)
2285		goto out_put;
2286
2287	trace__fprintf_sample(trace, evsel, sample, thread);
2288
2289	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
 
2290
2291	if (ttrace->entry_str == NULL) {
2292		ttrace->entry_str = malloc(trace__entry_str_size);
2293		if (!ttrace->entry_str)
2294			goto out_put;
2295	}
2296
2297	if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
2298		trace__printf_interrupted_entry(trace);
2299	/*
2300	 * If this is raw_syscalls.sys_enter, then it always comes with the 6 possible
2301	 * arguments, even if the syscall being handled, say "openat", uses only 4 arguments
2302	 * this breaks syscall__augmented_args() check for augmented args, as we calculate
2303	 * syscall->args_size using each syscalls:sys_enter_NAME tracefs format file,
2304	 * so when handling, say the openat syscall, we end up getting 6 args for the
2305	 * raw_syscalls:sys_enter event, when we expected just 4, we end up mistakenly
2306	 * thinking that the extra 2 u64 args are the augmented filename, so just check
2307	 * here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
2308	 */
2309	if (evsel != trace->syscalls.events.sys_enter)
2310		augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2311	ttrace->entry_time = sample->time;
2312	msg = ttrace->entry_str;
2313	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
2314
2315	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
2316					   args, augmented_args, augmented_args_size, trace, thread);
2317
2318	if (sc->is_exit) {
2319		if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
2320			int alignment = 0;
2321
2322			trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
2323			printed = fprintf(trace->output, "%s)", ttrace->entry_str);
2324			if (trace->args_alignment > printed)
2325				alignment = trace->args_alignment - printed;
2326			fprintf(trace->output, "%*s= ?\n", alignment, " ");
2327		}
2328	} else {
2329		ttrace->entry_pending = true;
2330		/* See trace__vfs_getname & trace__sys_exit */
2331		ttrace->filename.pending_open = false;
2332	}
2333
2334	if (trace->current != thread) {
2335		thread__put(trace->current);
2336		trace->current = thread__get(thread);
2337	}
2338	err = 0;
2339out_put:
2340	thread__put(thread);
2341	return err;
2342}
2343
2344static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
2345				    struct perf_sample *sample)
2346{
2347	struct thread_trace *ttrace;
2348	struct thread *thread;
2349	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2350	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2351	char msg[1024];
2352	void *args, *augmented_args = NULL;
2353	int augmented_args_size;
2354
2355	if (sc == NULL)
2356		return -1;
2357
2358	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2359	ttrace = thread__trace(thread, trace->output);
2360	/*
2361	 * We need to get ttrace just to make sure it is there when syscall__scnprintf_args()
2362	 * and the rest of the beautifiers accessing it via struct syscall_arg touches it.
2363	 */
2364	if (ttrace == NULL)
2365		goto out_put;
2366
2367	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2368	augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2369	syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
2370	fprintf(trace->output, "%s", msg);
2371	err = 0;
2372out_put:
2373	thread__put(thread);
2374	return err;
2375}
2376
2377static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel,
2378				    struct perf_sample *sample,
2379				    struct callchain_cursor *cursor)
2380{
2381	struct addr_location al;
2382	int max_stack = evsel->core.attr.sample_max_stack ?
2383			evsel->core.attr.sample_max_stack :
2384			trace->max_stack;
2385	int err;
2386
2387	if (machine__resolve(trace->host, &al, sample) < 0)
2388		return -1;
2389
2390	err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
2391	addr_location__put(&al);
2392	return err;
2393}
2394
2395static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
2396{
2397	/* TODO: user-configurable print_opts */
2398	const unsigned int print_opts = EVSEL__PRINT_SYM |
2399				        EVSEL__PRINT_DSO |
2400				        EVSEL__PRINT_UNKNOWN_AS_ADDR;
2401
2402	return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, symbol_conf.bt_stop_list, trace->output);
2403}
2404
2405static const char *errno_to_name(struct evsel *evsel, int err)
2406{
2407	struct perf_env *env = evsel__env(evsel);
2408	const char *arch_name = perf_env__arch(env);
2409
2410	return arch_syscalls__strerrno(arch_name, err);
2411}
2412
2413static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
2414			   union perf_event *event __maybe_unused,
2415			   struct perf_sample *sample)
2416{
2417	long ret;
2418	u64 duration = 0;
2419	bool duration_calculated = false;
2420	struct thread *thread;
2421	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0;
2422	int alignment = trace->args_alignment;
2423	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2424	struct thread_trace *ttrace;
2425
2426	if (sc == NULL)
2427		return -1;
2428
 
 
 
2429	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2430	ttrace = thread__trace(thread, trace->output);
2431	if (ttrace == NULL)
2432		goto out_put;
2433
2434	trace__fprintf_sample(trace, evsel, sample, thread);
 
2435
2436	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2437
2438	if (trace->summary)
2439		thread__update_stats(thread, ttrace, id, sample, ret, trace->errno_summary);
2440
2441	if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
2442		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2443		ttrace->filename.pending_open = false;
2444		++trace->stats.vfs_getname;
2445	}
2446
 
 
 
 
2447	if (ttrace->entry_time) {
2448		duration = sample->time - ttrace->entry_time;
2449		if (trace__filter_duration(trace, duration))
2450			goto out;
2451		duration_calculated = true;
2452	} else if (trace->duration_filter)
2453		goto out;
2454
2455	if (sample->callchain) {
2456		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2457		if (callchain_ret == 0) {
2458			if (callchain_cursor.nr < trace->min_stack)
2459				goto out;
2460			callchain_ret = 1;
2461		}
2462	}
2463
2464	if (trace->summary_only || (ret >= 0 && trace->failure_only))
2465		goto out;
2466
2467	trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
2468
2469	if (ttrace->entry_pending) {
2470		printed = fprintf(trace->output, "%s", ttrace->entry_str);
2471	} else {
2472		printed += fprintf(trace->output, " ... [");
2473		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2474		printed += 9;
2475		printed += fprintf(trace->output, "]: %s()", sc->name);
2476	}
2477
2478	printed++; /* the closing ')' */
2479
2480	if (alignment > printed)
2481		alignment -= printed;
2482	else
2483		alignment = 0;
2484
2485	fprintf(trace->output, ")%*s= ", alignment, " ");
2486
2487	if (sc->fmt == NULL) {
2488		if (ret < 0)
2489			goto errno_print;
2490signed_print:
2491		fprintf(trace->output, "%ld", ret);
2492	} else if (ret < 0) {
2493errno_print: {
2494		char bf[STRERR_BUFSIZE];
2495		const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
2496			   *e = errno_to_name(evsel, -ret);
2497
2498		fprintf(trace->output, "-1 %s (%s)", e, emsg);
2499	}
2500	} else if (ret == 0 && sc->fmt->timeout)
2501		fprintf(trace->output, "0 (Timeout)");
2502	else if (ttrace->ret_scnprintf) {
2503		char bf[1024];
2504		struct syscall_arg arg = {
2505			.val	= ret,
2506			.thread	= thread,
2507			.trace	= trace,
2508		};
2509		ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
2510		ttrace->ret_scnprintf = NULL;
2511		fprintf(trace->output, "%s", bf);
2512	} else if (sc->fmt->hexret)
2513		fprintf(trace->output, "%#lx", ret);
2514	else if (sc->fmt->errpid) {
2515		struct thread *child = machine__find_thread(trace->host, ret, ret);
2516
2517		if (child != NULL) {
2518			fprintf(trace->output, "%ld", ret);
2519			if (child->comm_set)
2520				fprintf(trace->output, " (%s)", thread__comm_str(child));
2521			thread__put(child);
2522		}
2523	} else
2524		goto signed_print;
2525
2526	fputc('\n', trace->output);
2527
2528	/*
2529	 * We only consider an 'event' for the sake of --max-events a non-filtered
2530	 * sys_enter + sys_exit and other tracepoint events.
2531	 */
2532	if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
2533		interrupted = true;
2534
2535	if (callchain_ret > 0)
2536		trace__fprintf_callchain(trace, sample);
2537	else if (callchain_ret < 0)
2538		pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2539out:
2540	ttrace->entry_pending = false;
2541	err = 0;
2542out_put:
2543	thread__put(thread);
2544	return err;
2545}
2546
2547static int trace__vfs_getname(struct trace *trace, struct evsel *evsel,
2548			      union perf_event *event __maybe_unused,
2549			      struct perf_sample *sample)
2550{
2551	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2552	struct thread_trace *ttrace;
2553	size_t filename_len, entry_str_len, to_move;
2554	ssize_t remaining_space;
2555	char *pos;
2556	const char *filename = evsel__rawptr(evsel, sample, "pathname");
2557
2558	if (!thread)
2559		goto out;
2560
2561	ttrace = thread__priv(thread);
2562	if (!ttrace)
2563		goto out_put;
2564
2565	filename_len = strlen(filename);
2566	if (filename_len == 0)
2567		goto out_put;
2568
2569	if (ttrace->filename.namelen < filename_len) {
2570		char *f = realloc(ttrace->filename.name, filename_len + 1);
2571
2572		if (f == NULL)
2573			goto out_put;
2574
2575		ttrace->filename.namelen = filename_len;
2576		ttrace->filename.name = f;
2577	}
2578
2579	strcpy(ttrace->filename.name, filename);
2580	ttrace->filename.pending_open = true;
2581
2582	if (!ttrace->filename.ptr)
2583		goto out_put;
2584
2585	entry_str_len = strlen(ttrace->entry_str);
2586	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2587	if (remaining_space <= 0)
2588		goto out_put;
2589
2590	if (filename_len > (size_t)remaining_space) {
2591		filename += filename_len - remaining_space;
2592		filename_len = remaining_space;
2593	}
2594
2595	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2596	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2597	memmove(pos + filename_len, pos, to_move);
2598	memcpy(pos, filename, filename_len);
2599
2600	ttrace->filename.ptr = 0;
2601	ttrace->filename.entry_str_pos = 0;
2602out_put:
2603	thread__put(thread);
2604out:
2605	return 0;
2606}
2607
2608static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
2609				     union perf_event *event __maybe_unused,
2610				     struct perf_sample *sample)
2611{
2612        u64 runtime = evsel__intval(evsel, sample, "runtime");
2613	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2614	struct thread *thread = machine__findnew_thread(trace->host,
2615							sample->pid,
2616							sample->tid);
2617	struct thread_trace *ttrace = thread__trace(thread, trace->output);
2618
2619	if (ttrace == NULL)
2620		goto out_dump;
2621
2622	ttrace->runtime_ms += runtime_ms;
2623	trace->runtime_ms += runtime_ms;
2624out_put:
2625	thread__put(thread);
2626	return 0;
2627
2628out_dump:
2629	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2630	       evsel->name,
2631	       evsel__strval(evsel, sample, "comm"),
2632	       (pid_t)evsel__intval(evsel, sample, "pid"),
2633	       runtime,
2634	       evsel__intval(evsel, sample, "vruntime"));
2635	goto out_put;
2636}
2637
2638static int bpf_output__printer(enum binary_printer_ops op,
2639			       unsigned int val, void *extra __maybe_unused, FILE *fp)
2640{
2641	unsigned char ch = (unsigned char)val;
 
 
2642
2643	switch (op) {
2644	case BINARY_PRINT_CHAR_DATA:
2645		return fprintf(fp, "%c", isprint(ch) ? ch : '.');
2646	case BINARY_PRINT_DATA_BEGIN:
2647	case BINARY_PRINT_LINE_BEGIN:
2648	case BINARY_PRINT_ADDR:
2649	case BINARY_PRINT_NUM_DATA:
2650	case BINARY_PRINT_NUM_PAD:
2651	case BINARY_PRINT_SEP:
2652	case BINARY_PRINT_CHAR_PAD:
2653	case BINARY_PRINT_LINE_END:
2654	case BINARY_PRINT_DATA_END:
2655	default:
2656		break;
2657	}
2658
2659	return 0;
2660}
2661
2662static void bpf_output__fprintf(struct trace *trace,
2663				struct perf_sample *sample)
 
 
 
2664{
2665	binary__fprintf(sample->raw_data, sample->raw_size, 8,
2666			bpf_output__printer, NULL, trace->output);
2667	++trace->nr_events_printed;
2668}
2669
2670static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, struct perf_sample *sample,
2671				       struct thread *thread, void *augmented_args, int augmented_args_size)
2672{
2673	char bf[2048];
2674	size_t size = sizeof(bf);
2675	struct tep_format_field *field = evsel->tp_format->format.fields;
2676	struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel);
2677	size_t printed = 0;
2678	unsigned long val;
2679	u8 bit = 1;
2680	struct syscall_arg syscall_arg = {
2681		.augmented = {
2682			.size = augmented_args_size,
2683			.args = augmented_args,
2684		},
2685		.idx	= 0,
2686		.mask	= 0,
2687		.trace  = trace,
2688		.thread = thread,
2689		.show_string_prefix = trace->show_string_prefix,
2690	};
2691
2692	for (; field && arg; field = field->next, ++syscall_arg.idx, bit <<= 1, ++arg) {
2693		if (syscall_arg.mask & bit)
2694			continue;
2695
2696		syscall_arg.len = 0;
2697		syscall_arg.fmt = arg;
2698		if (field->flags & TEP_FIELD_IS_ARRAY) {
2699			int offset = field->offset;
2700
2701			if (field->flags & TEP_FIELD_IS_DYNAMIC) {
2702				offset = format_field__intval(field, sample, evsel->needs_swap);
2703				syscall_arg.len = offset >> 16;
2704				offset &= 0xffff;
2705			}
2706
2707			val = (uintptr_t)(sample->raw_data + offset);
2708		} else
2709			val = format_field__intval(field, sample, evsel->needs_swap);
2710		/*
2711		 * Some syscall args need some mask, most don't and
2712		 * return val untouched.
2713		 */
2714		val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val);
2715
2716		/*
2717		 * Suppress this argument if its value is zero and
2718		 * and we don't have a string associated in an
2719		 * strarray for it.
2720		 */
2721		if (val == 0 &&
2722		    !trace->show_zeros &&
2723		    !((arg->show_zero ||
2724		       arg->scnprintf == SCA_STRARRAY ||
2725		       arg->scnprintf == SCA_STRARRAYS) &&
2726		      arg->parm))
2727			continue;
2728
2729		printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2730
2731		/*
2732		 * XXX Perhaps we should have a show_tp_arg_names,
2733		 * leaving show_arg_names just for syscalls?
2734		 */
2735		if (1 || trace->show_arg_names)
2736			printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
2737
2738		printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val);
2739	}
2740
2741	return printed + fprintf(trace->output, "%s", bf);
2742}
2743
2744static int trace__event_handler(struct trace *trace, struct evsel *evsel,
2745				union perf_event *event __maybe_unused,
2746				struct perf_sample *sample)
2747{
2748	struct thread *thread;
2749	int callchain_ret = 0;
2750	/*
2751	 * Check if we called perf_evsel__disable(evsel) due to, for instance,
2752	 * this event's max_events having been hit and this is an entry coming
2753	 * from the ring buffer that we should discard, since the max events
2754	 * have already been considered/printed.
2755	 */
2756	if (evsel->disabled)
2757		return 0;
2758
2759	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2760
2761	if (sample->callchain) {
2762		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2763		if (callchain_ret == 0) {
2764			if (callchain_cursor.nr < trace->min_stack)
2765				goto out;
2766			callchain_ret = 1;
2767		}
2768	}
2769
2770	trace__printf_interrupted_entry(trace);
2771	trace__fprintf_tstamp(trace, sample->time, trace->output);
2772
2773	if (trace->trace_syscalls && trace->show_duration)
2774		fprintf(trace->output, "(         ): ");
2775
2776	if (thread)
2777		trace__fprintf_comm_tid(trace, thread, trace->output);
2778
2779	if (evsel == trace->syscalls.events.augmented) {
2780		int id = perf_evsel__sc_tp_uint(evsel, id, sample);
2781		struct syscall *sc = trace__syscall_info(trace, evsel, id);
2782
2783		if (sc) {
2784			fprintf(trace->output, "%s(", sc->name);
2785			trace__fprintf_sys_enter(trace, evsel, sample);
2786			fputc(')', trace->output);
2787			goto newline;
2788		}
2789
2790		/*
2791		 * XXX: Not having the associated syscall info or not finding/adding
2792		 * 	the thread should never happen, but if it does...
2793		 * 	fall thru and print it as a bpf_output event.
2794		 */
2795	}
2796
2797	fprintf(trace->output, "%s(", evsel->name);
2798
2799	if (evsel__is_bpf_output(evsel)) {
2800		bpf_output__fprintf(trace, sample);
2801	} else if (evsel->tp_format) {
2802		if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
2803		    trace__fprintf_sys_enter(trace, evsel, sample)) {
2804			if (trace->libtraceevent_print) {
2805				event_format__fprintf(evsel->tp_format, sample->cpu,
2806						      sample->raw_data, sample->raw_size,
2807						      trace->output);
2808			} else {
2809				trace__fprintf_tp_fields(trace, evsel, sample, thread, NULL, 0);
2810			}
2811		}
2812	}
2813
2814newline:
2815	fprintf(trace->output, ")\n");
2816
2817	if (callchain_ret > 0)
2818		trace__fprintf_callchain(trace, sample);
2819	else if (callchain_ret < 0)
2820		pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2821
2822	++trace->nr_events_printed;
2823
2824	if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
2825		evsel__disable(evsel);
2826		evsel__close(evsel);
2827	}
2828out:
2829	thread__put(thread);
2830	return 0;
2831}
2832
2833static void print_location(FILE *f, struct perf_sample *sample,
2834			   struct addr_location *al,
2835			   bool print_dso, bool print_sym)
2836{
2837
2838	if ((verbose > 0 || print_dso) && al->map)
2839		fprintf(f, "%s@", al->map->dso->long_name);
2840
2841	if ((verbose > 0 || print_sym) && al->sym)
2842		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2843			al->addr - al->sym->start);
2844	else if (al->map)
2845		fprintf(f, "0x%" PRIx64, al->addr);
2846	else
2847		fprintf(f, "0x%" PRIx64, sample->addr);
2848}
2849
2850static int trace__pgfault(struct trace *trace,
2851			  struct evsel *evsel,
2852			  union perf_event *event __maybe_unused,
2853			  struct perf_sample *sample)
2854{
2855	struct thread *thread;
2856	struct addr_location al;
2857	char map_type = 'd';
2858	struct thread_trace *ttrace;
2859	int err = -1;
2860	int callchain_ret = 0;
2861
2862	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2863
2864	if (sample->callchain) {
2865		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2866		if (callchain_ret == 0) {
2867			if (callchain_cursor.nr < trace->min_stack)
2868				goto out_put;
2869			callchain_ret = 1;
2870		}
2871	}
2872
2873	ttrace = thread__trace(thread, trace->output);
2874	if (ttrace == NULL)
2875		goto out_put;
2876
2877	if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2878		ttrace->pfmaj++;
2879	else
2880		ttrace->pfmin++;
2881
2882	if (trace->summary_only)
2883		goto out;
2884
2885	thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
2886
2887	trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
2888
2889	fprintf(trace->output, "%sfault [",
2890		evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2891		"maj" : "min");
2892
2893	print_location(trace->output, sample, &al, false, true);
2894
2895	fprintf(trace->output, "] => ");
2896
2897	thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2898
2899	if (!al.map) {
2900		thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2901
2902		if (al.map)
2903			map_type = 'x';
2904		else
2905			map_type = '?';
2906	}
2907
2908	print_location(trace->output, sample, &al, true, false);
2909
2910	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2911
2912	if (callchain_ret > 0)
2913		trace__fprintf_callchain(trace, sample);
2914	else if (callchain_ret < 0)
2915		pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2916
2917	++trace->nr_events_printed;
2918out:
2919	err = 0;
2920out_put:
2921	thread__put(thread);
2922	return err;
2923}
2924
2925static void trace__set_base_time(struct trace *trace,
2926				 struct evsel *evsel,
2927				 struct perf_sample *sample)
2928{
2929	/*
2930	 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2931	 * and don't use sample->time unconditionally, we may end up having
2932	 * some other event in the future without PERF_SAMPLE_TIME for good
2933	 * reason, i.e. we may not be interested in its timestamps, just in
2934	 * it taking place, picking some piece of information when it
2935	 * appears in our event stream (vfs_getname comes to mind).
2936	 */
2937	if (trace->base_time == 0 && !trace->full_time &&
2938	    (evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
2939		trace->base_time = sample->time;
2940}
2941
2942static int trace__process_sample(struct perf_tool *tool,
2943				 union perf_event *event,
2944				 struct perf_sample *sample,
2945				 struct evsel *evsel,
2946				 struct machine *machine __maybe_unused)
2947{
2948	struct trace *trace = container_of(tool, struct trace, tool);
2949	struct thread *thread;
2950	int err = 0;
2951
2952	tracepoint_handler handler = evsel->handler;
2953
2954	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2955	if (thread && thread__is_filtered(thread))
2956		goto out;
2957
2958	trace__set_base_time(trace, evsel, sample);
2959
2960	if (handler) {
2961		++trace->nr_events;
2962		handler(trace, evsel, event, sample);
2963	}
2964out:
2965	thread__put(thread);
2966	return err;
2967}
2968
2969static int trace__record(struct trace *trace, int argc, const char **argv)
2970{
2971	unsigned int rec_argc, i, j;
2972	const char **rec_argv;
2973	const char * const record_args[] = {
2974		"record",
2975		"-R",
2976		"-m", "1024",
2977		"-c", "1",
 
2978	};
2979	pid_t pid = getpid();
2980	char *filter = asprintf__tp_filter_pids(1, &pid);
2981	const char * const sc_args[] = { "-e", };
2982	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2983	const char * const majpf_args[] = { "-e", "major-faults" };
2984	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2985	const char * const minpf_args[] = { "-e", "minor-faults" };
2986	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2987	int err = -1;
2988
2989	/* +3 is for the event string below and the pid filter */
2990	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 3 +
2991		majpf_args_nr + minpf_args_nr + argc;
2992	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2993
2994	if (rec_argv == NULL || filter == NULL)
2995		goto out_free;
2996
2997	j = 0;
2998	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2999		rec_argv[j++] = record_args[i];
3000
3001	if (trace->trace_syscalls) {
3002		for (i = 0; i < sc_args_nr; i++)
3003			rec_argv[j++] = sc_args[i];
3004
3005		/* event string may be different for older kernels - e.g., RHEL6 */
3006		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
3007			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
3008		else if (is_valid_tracepoint("syscalls:sys_enter"))
3009			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
3010		else {
3011			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
3012			goto out_free;
3013		}
3014	}
 
3015
3016	rec_argv[j++] = "--filter";
3017	rec_argv[j++] = filter;
3018
3019	if (trace->trace_pgfaults & TRACE_PFMAJ)
3020		for (i = 0; i < majpf_args_nr; i++)
3021			rec_argv[j++] = majpf_args[i];
3022
3023	if (trace->trace_pgfaults & TRACE_PFMIN)
3024		for (i = 0; i < minpf_args_nr; i++)
3025			rec_argv[j++] = minpf_args[i];
3026
3027	for (i = 0; i < (unsigned int)argc; i++)
3028		rec_argv[j++] = argv[i];
3029
3030	err = cmd_record(j, rec_argv);
3031out_free:
3032	free(filter);
3033	free(rec_argv);
3034	return err;
3035}
3036
3037static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
3038
3039static bool evlist__add_vfs_getname(struct evlist *evlist)
3040{
3041	bool found = false;
3042	struct evsel *evsel, *tmp;
3043	struct parse_events_error err;
3044	int ret;
3045
3046	bzero(&err, sizeof(err));
3047	ret = parse_events(evlist, "probe:vfs_getname*", &err);
3048	if (ret) {
3049		free(err.str);
3050		free(err.help);
3051		free(err.first_str);
3052		free(err.first_help);
3053		return false;
3054	}
3055
3056	evlist__for_each_entry_safe(evlist, evsel, tmp) {
3057		if (!strstarts(evsel__name(evsel), "probe:vfs_getname"))
3058			continue;
3059
3060		if (evsel__field(evsel, "pathname")) {
3061			evsel->handler = trace__vfs_getname;
3062			found = true;
3063			continue;
3064		}
3065
3066		list_del_init(&evsel->core.node);
3067		evsel->evlist = NULL;
3068		evsel__delete(evsel);
3069	}
3070
3071	return found;
3072}
3073
3074static struct evsel *evsel__new_pgfault(u64 config)
3075{
3076	struct evsel *evsel;
3077	struct perf_event_attr attr = {
3078		.type = PERF_TYPE_SOFTWARE,
3079		.mmap_data = 1,
3080	};
3081
3082	attr.config = config;
3083	attr.sample_period = 1;
3084
3085	event_attr_init(&attr);
3086
3087	evsel = evsel__new(&attr);
3088	if (evsel)
3089		evsel->handler = trace__pgfault;
3090
3091	return evsel;
3092}
3093
3094static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
3095{
3096	const u32 type = event->header.type;
3097	struct evsel *evsel;
3098
3099	if (type != PERF_RECORD_SAMPLE) {
3100		trace__process_event(trace, trace->host, event, sample);
3101		return;
3102	}
3103
3104	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
3105	if (evsel == NULL) {
3106		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
3107		return;
3108	}
3109
3110	if (evswitch__discard(&trace->evswitch, evsel))
 
3111		return;
3112
3113	trace__set_base_time(trace, evsel, sample);
3114
3115	if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
3116	    sample->raw_data == NULL) {
3117		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
3118		       evsel__name(evsel), sample->tid,
3119		       sample->cpu, sample->raw_size);
3120	} else {
3121		tracepoint_handler handler = evsel->handler;
3122		handler(trace, evsel, event, sample);
3123	}
3124
3125	if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
3126		interrupted = true;
3127}
3128
3129static int trace__add_syscall_newtp(struct trace *trace)
3130{
3131	int ret = -1;
3132	struct evlist *evlist = trace->evlist;
3133	struct evsel *sys_enter, *sys_exit;
3134
3135	sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
3136	if (sys_enter == NULL)
3137		goto out;
3138
3139	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
3140		goto out_delete_sys_enter;
3141
3142	sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
3143	if (sys_exit == NULL)
3144		goto out_delete_sys_enter;
3145
3146	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
3147		goto out_delete_sys_exit;
3148
3149	evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
3150	evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
3151
3152	evlist__add(evlist, sys_enter);
3153	evlist__add(evlist, sys_exit);
3154
3155	if (callchain_param.enabled && !trace->kernel_syscallchains) {
3156		/*
3157		 * We're interested only in the user space callchain
3158		 * leading to the syscall, allow overriding that for
3159		 * debugging reasons using --kernel_syscall_callchains
3160		 */
3161		sys_exit->core.attr.exclude_callchain_kernel = 1;
3162	}
3163
3164	trace->syscalls.events.sys_enter = sys_enter;
3165	trace->syscalls.events.sys_exit  = sys_exit;
3166
3167	ret = 0;
3168out:
3169	return ret;
3170
3171out_delete_sys_exit:
3172	evsel__delete_priv(sys_exit);
3173out_delete_sys_enter:
3174	evsel__delete_priv(sys_enter);
3175	goto out;
3176}
3177
3178static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
3179{
3180	int err = -1;
3181	struct evsel *sys_exit;
3182	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
3183						trace->ev_qualifier_ids.nr,
3184						trace->ev_qualifier_ids.entries);
3185
3186	if (filter == NULL)
3187		goto out_enomem;
3188
3189	if (!evsel__append_tp_filter(trace->syscalls.events.sys_enter, filter)) {
3190		sys_exit = trace->syscalls.events.sys_exit;
3191		err = evsel__append_tp_filter(sys_exit, filter);
3192	}
3193
3194	free(filter);
3195out:
3196	return err;
3197out_enomem:
3198	errno = ENOMEM;
3199	goto out;
3200}
3201
3202#ifdef HAVE_LIBBPF_SUPPORT
3203static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
3204{
3205	if (trace->bpf_obj == NULL)
3206		return NULL;
3207
3208	return bpf_object__find_map_by_name(trace->bpf_obj, name);
3209}
3210
3211static void trace__set_bpf_map_filtered_pids(struct trace *trace)
3212{
3213	trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
3214}
3215
3216static void trace__set_bpf_map_syscalls(struct trace *trace)
3217{
3218	trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
3219	trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
3220	trace->syscalls.prog_array.sys_exit  = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
3221}
3222
3223static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
3224{
3225	if (trace->bpf_obj == NULL)
3226		return NULL;
3227
3228	return bpf_object__find_program_by_title(trace->bpf_obj, name);
3229}
3230
3231static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc,
3232							const char *prog_name, const char *type)
3233{
3234	struct bpf_program *prog;
3235
3236	if (prog_name == NULL) {
3237		char default_prog_name[256];
3238		scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name);
3239		prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3240		if (prog != NULL)
3241			goto out_found;
3242		if (sc->fmt && sc->fmt->alias) {
3243			scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias);
3244			prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3245			if (prog != NULL)
3246				goto out_found;
3247		}
3248		goto out_unaugmented;
3249	}
3250
3251	prog = trace__find_bpf_program_by_title(trace, prog_name);
3252
3253	if (prog != NULL) {
3254out_found:
3255		return prog;
3256	}
3257
3258	pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
3259		 prog_name, type, sc->name);
3260out_unaugmented:
3261	return trace->syscalls.unaugmented_prog;
3262}
3263
3264static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
3265{
3266	struct syscall *sc = trace__syscall_info(trace, NULL, id);
3267
3268	if (sc == NULL)
3269		return;
3270
3271	sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3272	sc->bpf_prog.sys_exit  = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit  : NULL,  "exit");
3273}
3274
3275static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
3276{
3277	struct syscall *sc = trace__syscall_info(trace, NULL, id);
3278	return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
3279}
3280
3281static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
3282{
3283	struct syscall *sc = trace__syscall_info(trace, NULL, id);
3284	return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
3285}
3286
3287static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
3288{
3289	struct syscall *sc = trace__syscall_info(trace, NULL, id);
3290	int arg = 0;
3291
3292	if (sc == NULL)
3293		goto out;
3294
3295	for (; arg < sc->nr_args; ++arg) {
3296		entry->string_args_len[arg] = 0;
3297		if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) {
3298			/* Should be set like strace -s strsize */
3299			entry->string_args_len[arg] = PATH_MAX;
3300		}
3301	}
3302out:
3303	for (; arg < 6; ++arg)
3304		entry->string_args_len[arg] = 0;
3305}
3306static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
3307{
3308	int fd = bpf_map__fd(trace->syscalls.map);
3309	struct bpf_map_syscall_entry value = {
3310		.enabled = !trace->not_ev_qualifier,
3311	};
3312	int err = 0;
3313	size_t i;
3314
3315	for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
3316		int key = trace->ev_qualifier_ids.entries[i];
3317
3318		if (value.enabled) {
3319			trace__init_bpf_map_syscall_args(trace, key, &value);
3320			trace__init_syscall_bpf_progs(trace, key);
3321		}
3322
3323		err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
3324		if (err)
3325			break;
3326	}
3327
3328	return err;
3329}
3330
3331static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
3332{
3333	int fd = bpf_map__fd(trace->syscalls.map);
3334	struct bpf_map_syscall_entry value = {
3335		.enabled = enabled,
3336	};
3337	int err = 0, key;
3338
3339	for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3340		if (enabled)
3341			trace__init_bpf_map_syscall_args(trace, key, &value);
3342
3343		err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
3344		if (err)
3345			break;
3346	}
3347
3348	return err;
3349}
3350
3351static int trace__init_syscalls_bpf_map(struct trace *trace)
3352{
3353	bool enabled = true;
3354
3355	if (trace->ev_qualifier_ids.nr)
3356		enabled = trace->not_ev_qualifier;
3357
3358	return __trace__init_syscalls_bpf_map(trace, enabled);
3359}
3360
3361static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
3362{
3363	struct tep_format_field *field, *candidate_field;
3364	int id;
3365
3366	/*
3367	 * We're only interested in syscalls that have a pointer:
3368	 */
3369	for (field = sc->args; field; field = field->next) {
3370		if (field->flags & TEP_FIELD_IS_POINTER)
3371			goto try_to_find_pair;
3372	}
3373
3374	return NULL;
3375
3376try_to_find_pair:
3377	for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) {
3378		struct syscall *pair = trace__syscall_info(trace, NULL, id);
3379		struct bpf_program *pair_prog;
3380		bool is_candidate = false;
3381
3382		if (pair == NULL || pair == sc ||
3383		    pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
3384			continue;
3385
3386		for (field = sc->args, candidate_field = pair->args;
3387		     field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
3388			bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
3389			     candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
3390
3391			if (is_pointer) {
3392			       if (!candidate_is_pointer) {
3393					// The candidate just doesn't copies our pointer arg, might copy other pointers we want.
3394					continue;
3395			       }
3396			} else {
3397				if (candidate_is_pointer) {
3398					// The candidate might copy a pointer we don't have, skip it.
3399					goto next_candidate;
3400				}
3401				continue;
3402			}
3403
3404			if (strcmp(field->type, candidate_field->type))
3405				goto next_candidate;
3406
3407			is_candidate = true;
3408		}
3409
3410		if (!is_candidate)
3411			goto next_candidate;
3412
3413		/*
3414		 * Check if the tentative pair syscall augmenter has more pointers, if it has,
3415		 * then it may be collecting that and we then can't use it, as it would collect
3416		 * more than what is common to the two syscalls.
3417		 */
3418		if (candidate_field) {
3419			for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next)
3420				if (candidate_field->flags & TEP_FIELD_IS_POINTER)
3421					goto next_candidate;
3422		}
3423
3424		pair_prog = pair->bpf_prog.sys_enter;
3425		/*
3426		 * If the pair isn't enabled, then its bpf_prog.sys_enter will not
3427		 * have been searched for, so search it here and if it returns the
3428		 * unaugmented one, then ignore it, otherwise we'll reuse that BPF
3429		 * program for a filtered syscall on a non-filtered one.
3430		 *
3431		 * For instance, we have "!syscalls:sys_enter_renameat" and that is
3432		 * useful for "renameat2".
3433		 */
3434		if (pair_prog == NULL) {
3435			pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3436			if (pair_prog == trace->syscalls.unaugmented_prog)
3437				goto next_candidate;
3438		}
3439
3440		pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
3441		return pair_prog;
3442	next_candidate:
3443		continue;
3444	}
3445
3446	return NULL;
3447}
3448
3449static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
3450{
3451	int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
3452	    map_exit_fd  = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
3453	int err = 0, key;
3454
3455	for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3456		int prog_fd;
3457
3458		if (!trace__syscall_enabled(trace, key))
3459			continue;
3460
3461		trace__init_syscall_bpf_progs(trace, key);
3462
3463		// It'll get at least the "!raw_syscalls:unaugmented"
3464		prog_fd = trace__bpf_prog_sys_enter_fd(trace, key);
3465		err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3466		if (err)
3467			break;
3468		prog_fd = trace__bpf_prog_sys_exit_fd(trace, key);
3469		err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
3470		if (err)
3471			break;
3472	}
3473
3474	/*
3475	 * Now lets do a second pass looking for enabled syscalls without
3476	 * an augmenter that have a signature that is a superset of another
3477	 * syscall with an augmenter so that we can auto-reuse it.
3478	 *
3479	 * I.e. if we have an augmenter for the "open" syscall that has
3480	 * this signature:
3481	 *
3482	 *   int open(const char *pathname, int flags, mode_t mode);
3483	 *
3484	 * I.e. that will collect just the first string argument, then we
3485	 * can reuse it for the 'creat' syscall, that has this signature:
3486	 *
3487	 *   int creat(const char *pathname, mode_t mode);
3488	 *
3489	 * and for:
3490	 *
3491	 *   int stat(const char *pathname, struct stat *statbuf);
3492	 *   int lstat(const char *pathname, struct stat *statbuf);
3493	 *
3494	 * Because the 'open' augmenter will collect the first arg as a string,
3495	 * and leave alone all the other args, which already helps with
3496	 * beautifying 'stat' and 'lstat''s pathname arg.
3497	 *
3498	 * Then, in time, when 'stat' gets an augmenter that collects both
3499	 * first and second arg (this one on the raw_syscalls:sys_exit prog
3500	 * array tail call, then that one will be used.
3501	 */
3502	for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3503		struct syscall *sc = trace__syscall_info(trace, NULL, key);
3504		struct bpf_program *pair_prog;
3505		int prog_fd;
3506
3507		if (sc == NULL || sc->bpf_prog.sys_enter == NULL)
3508			continue;
3509
3510		/*
3511		 * For now we're just reusing the sys_enter prog, and if it
3512		 * already has an augmenter, we don't need to find one.
3513		 */
3514		if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
3515			continue;
3516
3517		/*
3518		 * Look at all the other syscalls for one that has a signature
3519		 * that is close enough that we can share:
3520		 */
3521		pair_prog = trace__find_usable_bpf_prog_entry(trace, sc);
3522		if (pair_prog == NULL)
3523			continue;
3524
3525		sc->bpf_prog.sys_enter = pair_prog;
3526
3527		/*
3528		 * Update the BPF_MAP_TYPE_PROG_SHARED for raw_syscalls:sys_enter
3529		 * with the fd for the program we're reusing:
3530		 */
3531		prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter);
3532		err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3533		if (err)
3534			break;
3535	}
3536
3537
3538	return err;
3539}
3540
3541static void trace__delete_augmented_syscalls(struct trace *trace)
3542{
3543	struct evsel *evsel, *tmp;
3544
3545	evlist__remove(trace->evlist, trace->syscalls.events.augmented);
3546	evsel__delete(trace->syscalls.events.augmented);
3547	trace->syscalls.events.augmented = NULL;
3548
3549	evlist__for_each_entry_safe(trace->evlist, tmp, evsel) {
3550		if (evsel->bpf_obj == trace->bpf_obj) {
3551			evlist__remove(trace->evlist, evsel);
3552			evsel__delete(evsel);
3553		}
3554
3555	}
3556
3557	bpf_object__close(trace->bpf_obj);
3558	trace->bpf_obj = NULL;
3559}
3560#else // HAVE_LIBBPF_SUPPORT
3561static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused,
3562						   const char *name __maybe_unused)
3563{
3564	return NULL;
3565}
3566
3567static void trace__set_bpf_map_filtered_pids(struct trace *trace __maybe_unused)
3568{
3569}
3570
3571static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused)
3572{
3573}
3574
3575static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
3576{
3577	return 0;
3578}
3579
3580static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused)
3581{
3582	return 0;
3583}
3584
3585static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused,
3586							    const char *name __maybe_unused)
3587{
3588	return NULL;
3589}
3590
3591static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
3592{
3593	return 0;
3594}
3595
3596static void trace__delete_augmented_syscalls(struct trace *trace __maybe_unused)
3597{
3598}
3599#endif // HAVE_LIBBPF_SUPPORT
3600
3601static bool trace__only_augmented_syscalls_evsels(struct trace *trace)
3602{
3603	struct evsel *evsel;
3604
3605	evlist__for_each_entry(trace->evlist, evsel) {
3606		if (evsel == trace->syscalls.events.augmented ||
3607		    evsel->bpf_obj == trace->bpf_obj)
3608			continue;
3609
3610		return false;
3611	}
3612
3613	return true;
3614}
3615
3616static int trace__set_ev_qualifier_filter(struct trace *trace)
3617{
3618	if (trace->syscalls.map)
3619		return trace__set_ev_qualifier_bpf_filter(trace);
3620	if (trace->syscalls.events.sys_enter)
3621		return trace__set_ev_qualifier_tp_filter(trace);
3622	return 0;
3623}
3624
3625static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused,
3626				    size_t npids __maybe_unused, pid_t *pids __maybe_unused)
3627{
3628	int err = 0;
3629#ifdef HAVE_LIBBPF_SUPPORT
3630	bool value = true;
3631	int map_fd = bpf_map__fd(map);
3632	size_t i;
3633
3634	for (i = 0; i < npids; ++i) {
3635		err = bpf_map_update_elem(map_fd, &pids[i], &value, BPF_ANY);
3636		if (err)
3637			break;
3638	}
3639#endif
3640	return err;
3641}
3642
3643static int trace__set_filter_loop_pids(struct trace *trace)
3644{
3645	unsigned int nr = 1, err;
3646	pid_t pids[32] = {
3647		getpid(),
3648	};
3649	struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
3650
3651	while (thread && nr < ARRAY_SIZE(pids)) {
3652		struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
3653
3654		if (parent == NULL)
3655			break;
3656
3657		if (!strcmp(thread__comm_str(parent), "sshd") ||
3658		    strstarts(thread__comm_str(parent), "gnome-terminal")) {
3659			pids[nr++] = parent->tid;
3660			break;
3661		}
3662		thread = parent;
3663	}
3664
3665	err = perf_evlist__append_tp_filter_pids(trace->evlist, nr, pids);
3666	if (!err && trace->filter_pids.map)
3667		err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids);
3668
3669	return err;
3670}
3671
3672static int trace__set_filter_pids(struct trace *trace)
3673{
3674	int err = 0;
3675	/*
3676	 * Better not use !target__has_task() here because we need to cover the
3677	 * case where no threads were specified in the command line, but a
3678	 * workload was, and in that case we will fill in the thread_map when
3679	 * we fork the workload in perf_evlist__prepare_workload.
3680	 */
3681	if (trace->filter_pids.nr > 0) {
3682		err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
3683							 trace->filter_pids.entries);
3684		if (!err && trace->filter_pids.map) {
3685			err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
3686						       trace->filter_pids.entries);
3687		}
3688	} else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) {
3689		err = trace__set_filter_loop_pids(trace);
3690	}
3691
3692	return err;
3693}
3694
3695static int __trace__deliver_event(struct trace *trace, union perf_event *event)
3696{
3697	struct evlist *evlist = trace->evlist;
3698	struct perf_sample sample;
3699	int err;
3700
3701	err = perf_evlist__parse_sample(evlist, event, &sample);
3702	if (err)
3703		fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
3704	else
3705		trace__handle_event(trace, event, &sample);
3706
3707	return 0;
3708}
3709
3710static int __trace__flush_events(struct trace *trace)
3711{
3712	u64 first = ordered_events__first_time(&trace->oe.data);
3713	u64 flush = trace->oe.last - NSEC_PER_SEC;
3714
3715	/* Is there some thing to flush.. */
3716	if (first && first < flush)
3717		return ordered_events__flush_time(&trace->oe.data, flush);
3718
3719	return 0;
3720}
3721
3722static int trace__flush_events(struct trace *trace)
3723{
3724	return !trace->sort_events ? 0 : __trace__flush_events(trace);
3725}
3726
3727static int trace__deliver_event(struct trace *trace, union perf_event *event)
3728{
3729	int err;
3730
3731	if (!trace->sort_events)
3732		return __trace__deliver_event(trace, event);
3733
3734	err = perf_evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
3735	if (err && err != -1)
3736		return err;
3737
3738	err = ordered_events__queue(&trace->oe.data, event, trace->oe.last, 0);
3739	if (err)
3740		return err;
3741
3742	return trace__flush_events(trace);
3743}
3744
3745static int ordered_events__deliver_event(struct ordered_events *oe,
3746					 struct ordered_event *event)
3747{
3748	struct trace *trace = container_of(oe, struct trace, oe.data);
3749
3750	return __trace__deliver_event(trace, event->event);
3751}
3752
3753static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg)
3754{
3755	struct tep_format_field *field;
3756	struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
3757
3758	if (evsel->tp_format == NULL || fmt == NULL)
3759		return NULL;
3760
3761	for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt)
3762		if (strcmp(field->name, arg) == 0)
3763			return fmt;
3764
3765	return NULL;
3766}
3767
3768static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel)
3769{
3770	char *tok, *left = evsel->filter, *new_filter = evsel->filter;
3771
3772	while ((tok = strpbrk(left, "=<>!")) != NULL) {
3773		char *right = tok + 1, *right_end;
3774
3775		if (*right == '=')
3776			++right;
3777
3778		while (isspace(*right))
3779			++right;
3780
3781		if (*right == '\0')
3782			break;
3783
3784		while (!isalpha(*left))
3785			if (++left == tok) {
3786				/*
3787				 * Bail out, can't find the name of the argument that is being
3788				 * used in the filter, let it try to set this filter, will fail later.
3789				 */
3790				return 0;
3791			}
3792
3793		right_end = right + 1;
3794		while (isalnum(*right_end) || *right_end == '_' || *right_end == '|')
3795			++right_end;
3796
3797		if (isalpha(*right)) {
3798			struct syscall_arg_fmt *fmt;
3799			int left_size = tok - left,
3800			    right_size = right_end - right;
3801			char arg[128];
3802
3803			while (isspace(left[left_size - 1]))
3804				--left_size;
3805
3806			scnprintf(arg, sizeof(arg), "%.*s", left_size, left);
3807
3808			fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg);
3809			if (fmt == NULL) {
3810				pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n",
3811				       arg, evsel->name, evsel->filter);
3812				return -1;
3813			}
3814
3815			pr_debug2("trying to expand \"%s\" \"%.*s\" \"%.*s\" -> ",
3816				 arg, (int)(right - tok), tok, right_size, right);
3817
3818			if (fmt->strtoul) {
3819				u64 val;
3820				struct syscall_arg syscall_arg = {
3821					.parm = fmt->parm,
3822				};
3823
3824				if (fmt->strtoul(right, right_size, &syscall_arg, &val)) {
3825					char *n, expansion[19];
3826					int expansion_lenght = scnprintf(expansion, sizeof(expansion), "%#" PRIx64, val);
3827					int expansion_offset = right - new_filter;
3828
3829					pr_debug("%s", expansion);
3830
3831					if (asprintf(&n, "%.*s%s%s", expansion_offset, new_filter, expansion, right_end) < 0) {
3832						pr_debug(" out of memory!\n");
3833						free(new_filter);
3834						return -1;
3835					}
3836					if (new_filter != evsel->filter)
3837						free(new_filter);
3838					left = n + expansion_offset + expansion_lenght;
3839					new_filter = n;
3840				} else {
3841					pr_err("\"%.*s\" not found for \"%s\" in \"%s\", can't set filter \"%s\"\n",
3842					       right_size, right, arg, evsel->name, evsel->filter);
3843					return -1;
3844				}
3845			} else {
3846				pr_err("No resolver (strtoul) for \"%s\" in \"%s\", can't set filter \"%s\"\n",
3847				       arg, evsel->name, evsel->filter);
3848				return -1;
3849			}
3850
3851			pr_debug("\n");
3852		} else {
3853			left = right_end;
3854		}
3855	}
3856
3857	if (new_filter != evsel->filter) {
3858		pr_debug("New filter for %s: %s\n", evsel->name, new_filter);
3859		evsel__set_filter(evsel, new_filter);
3860		free(new_filter);
3861	}
3862
3863	return 0;
3864}
3865
3866static int trace__expand_filters(struct trace *trace, struct evsel **err_evsel)
3867{
3868	struct evlist *evlist = trace->evlist;
3869	struct evsel *evsel;
3870
3871	evlist__for_each_entry(evlist, evsel) {
3872		if (evsel->filter == NULL)
3873			continue;
3874
3875		if (trace__expand_filter(trace, evsel)) {
3876			*err_evsel = evsel;
3877			return -1;
3878		}
3879	}
3880
3881	return 0;
3882}
3883
3884static int trace__run(struct trace *trace, int argc, const char **argv)
3885{
3886	struct evlist *evlist = trace->evlist;
3887	struct evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
3888	int err = -1, i;
3889	unsigned long before;
3890	const bool forks = argc > 0;
3891	bool draining = false;
3892
3893	trace->live = true;
3894
3895	if (!trace->raw_augmented_syscalls) {
3896		if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
3897			goto out_error_raw_syscalls;
3898
3899		if (trace->trace_syscalls)
3900			trace->vfs_getname = evlist__add_vfs_getname(evlist);
3901	}
3902
3903	if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
3904		pgfault_maj = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
3905		if (pgfault_maj == NULL)
3906			goto out_error_mem;
3907		evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
3908		evlist__add(evlist, pgfault_maj);
3909	}
3910
3911	if ((trace->trace_pgfaults & TRACE_PFMIN)) {
3912		pgfault_min = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
3913		if (pgfault_min == NULL)
3914			goto out_error_mem;
3915		evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
3916		evlist__add(evlist, pgfault_min);
3917	}
3918
 
 
 
 
 
3919	if (trace->sched &&
3920	    evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
3921		goto out_error_sched_stat_runtime;
3922	/*
3923	 * If a global cgroup was set, apply it to all the events without an
3924	 * explicit cgroup. I.e.:
3925	 *
3926	 * 	trace -G A -e sched:*switch
3927	 *
3928	 * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
3929	 * _and_ sched:sched_switch to the 'A' cgroup, while:
3930	 *
3931	 * trace -e sched:*switch -G A
3932	 *
3933	 * will only set the sched:sched_switch event to the 'A' cgroup, all the
3934	 * other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
3935	 * a cgroup (on the root cgroup, sys wide, etc).
3936	 *
3937	 * Multiple cgroups:
3938	 *
3939	 * trace -G A -e sched:*switch -G B
3940	 *
3941	 * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
3942	 * to the 'B' cgroup.
3943	 *
3944	 * evlist__set_default_cgroup() grabs a reference of the passed cgroup
3945	 * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL.
3946	 */
3947	if (trace->cgroup)
3948		evlist__set_default_cgroup(trace->evlist, trace->cgroup);
3949
3950	err = perf_evlist__create_maps(evlist, &trace->opts.target);
3951	if (err < 0) {
3952		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
3953		goto out_delete_evlist;
3954	}
3955
3956	err = trace__symbols_init(trace, evlist);
3957	if (err < 0) {
3958		fprintf(trace->output, "Problems initializing symbol libraries!\n");
3959		goto out_delete_evlist;
3960	}
3961
3962	perf_evlist__config(evlist, &trace->opts, &callchain_param);
3963
3964	signal(SIGCHLD, sig_handler);
3965	signal(SIGINT, sig_handler);
3966
3967	if (forks) {
3968		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
3969						    argv, false, NULL);
3970		if (err < 0) {
3971			fprintf(trace->output, "Couldn't run the workload!\n");
3972			goto out_delete_evlist;
3973		}
3974	}
3975
3976	err = evlist__open(evlist);
3977	if (err < 0)
3978		goto out_error_open;
3979
3980	err = bpf__apply_obj_config();
3981	if (err) {
3982		char errbuf[BUFSIZ];
3983
3984		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
3985		pr_err("ERROR: Apply config to BPF failed: %s\n",
3986			 errbuf);
3987		goto out_error_open;
3988	}
3989
3990	err = trace__set_filter_pids(trace);
3991	if (err < 0)
3992		goto out_error_mem;
3993
3994	if (trace->syscalls.map)
3995		trace__init_syscalls_bpf_map(trace);
3996
3997	if (trace->syscalls.prog_array.sys_enter)
3998		trace__init_syscalls_bpf_prog_array_maps(trace);
3999
4000	if (trace->ev_qualifier_ids.nr > 0) {
4001		err = trace__set_ev_qualifier_filter(trace);
4002		if (err < 0)
4003			goto out_errno;
4004
4005		if (trace->syscalls.events.sys_exit) {
4006			pr_debug("event qualifier tracepoint filter: %s\n",
4007				 trace->syscalls.events.sys_exit->filter);
4008		}
4009	}
4010
4011	/*
4012	 * If the "close" syscall is not traced, then we will not have the
4013	 * opportunity to, in syscall_arg__scnprintf_close_fd() invalidate the
4014	 * fd->pathname table and were ending up showing the last value set by
4015	 * syscalls opening a pathname and associating it with a descriptor or
4016	 * reading it from /proc/pid/fd/ in cases where that doesn't make
4017	 * sense.
4018	 *
4019	 *  So just disable this beautifier (SCA_FD, SCA_FDAT) when 'close' is
4020	 *  not in use.
4021	 */
4022	trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close"));
4023
4024	err = trace__expand_filters(trace, &evsel);
4025	if (err)
4026		goto out_delete_evlist;
4027	err = perf_evlist__apply_filters(evlist, &evsel);
4028	if (err < 0)
4029		goto out_error_apply_filters;
4030
4031	if (trace->dump.map)
4032		bpf_map__fprintf(trace->dump.map, trace->output);
4033
4034	err = evlist__mmap(evlist, trace->opts.mmap_pages);
4035	if (err < 0)
4036		goto out_error_mmap;
4037
4038	if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
4039		evlist__enable(evlist);
4040
4041	if (forks)
4042		perf_evlist__start_workload(evlist);
4043
4044	if (trace->opts.initial_delay) {
4045		usleep(trace->opts.initial_delay * 1000);
4046		evlist__enable(evlist);
4047	}
4048
4049	trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
4050				  evlist->core.threads->nr > 1 ||
4051				  evlist__first(evlist)->core.attr.inherit;
4052
4053	/*
4054	 * Now that we already used evsel->core.attr to ask the kernel to setup the
4055	 * events, lets reuse evsel->core.attr.sample_max_stack as the limit in
4056	 * trace__resolve_callchain(), allowing per-event max-stack settings
4057	 * to override an explicitly set --max-stack global setting.
4058	 */
4059	evlist__for_each_entry(evlist, evsel) {
4060		if (evsel__has_callchain(evsel) &&
4061		    evsel->core.attr.sample_max_stack == 0)
4062			evsel->core.attr.sample_max_stack = trace->max_stack;
4063	}
4064again:
4065	before = trace->nr_events;
4066
4067	for (i = 0; i < evlist->core.nr_mmaps; i++) {
4068		union perf_event *event;
4069		struct mmap *md;
4070
4071		md = &evlist->mmap[i];
4072		if (perf_mmap__read_init(&md->core) < 0)
4073			continue;
 
4074
4075		while ((event = perf_mmap__read_event(&md->core)) != NULL) {
4076			++trace->nr_events;
4077
4078			err = trace__deliver_event(trace, event);
4079			if (err)
4080				goto out_disable;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4081
4082			perf_mmap__consume(&md->core);
 
 
 
4083
4084			if (interrupted)
4085				goto out_disable;
4086
4087			if (done && !draining) {
4088				evlist__disable(evlist);
4089				draining = true;
4090			}
4091		}
4092		perf_mmap__read_done(&md->core);
4093	}
4094
4095	if (trace->nr_events == before) {
4096		int timeout = done ? 100 : -1;
4097
4098		if (!draining && evlist__poll(evlist, timeout) > 0) {
4099			if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
4100				draining = true;
4101
4102			goto again;
4103		} else {
4104			if (trace__flush_events(trace))
4105				goto out_disable;
4106		}
4107	} else {
4108		goto again;
4109	}
4110
4111out_disable:
4112	thread__zput(trace->current);
4113
4114	evlist__disable(evlist);
4115
4116	if (trace->sort_events)
4117		ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
4118
4119	if (!err) {
4120		if (trace->summary)
4121			trace__fprintf_thread_summary(trace, trace->output);
4122
4123		if (trace->show_tool_stats) {
4124			fprintf(trace->output, "Stats:\n "
4125					       " vfs_getname : %" PRIu64 "\n"
4126					       " proc_getname: %" PRIu64 "\n",
4127				trace->stats.vfs_getname,
4128				trace->stats.proc_getname);
4129		}
4130	}
4131
4132out_delete_evlist:
4133	trace__symbols__exit(trace);
4134
4135	evlist__delete(evlist);
4136	cgroup__put(trace->cgroup);
4137	trace->evlist = NULL;
4138	trace->live = false;
4139	return err;
4140{
4141	char errbuf[BUFSIZ];
4142
4143out_error_sched_stat_runtime:
4144	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
4145	goto out_error;
4146
4147out_error_raw_syscalls:
4148	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
4149	goto out_error;
4150
4151out_error_mmap:
4152	evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
4153	goto out_error;
4154
4155out_error_open:
4156	evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
4157
4158out_error:
4159	fprintf(trace->output, "%s\n", errbuf);
4160	goto out_delete_evlist;
4161
4162out_error_apply_filters:
4163	fprintf(trace->output,
4164		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
4165		evsel->filter, evsel__name(evsel), errno,
4166		str_error_r(errno, errbuf, sizeof(errbuf)));
4167	goto out_delete_evlist;
4168}
4169out_error_mem:
4170	fprintf(trace->output, "Not enough memory to run!\n");
4171	goto out_delete_evlist;
4172
4173out_errno:
4174	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
4175	goto out_delete_evlist;
4176}
4177
4178static int trace__replay(struct trace *trace)
4179{
4180	const struct evsel_str_handler handlers[] = {
4181		{ "probe:vfs_getname",	     trace__vfs_getname, },
4182	};
4183	struct perf_data data = {
4184		.path  = input_name,
4185		.mode  = PERF_DATA_MODE_READ,
4186		.force = trace->force,
4187	};
4188	struct perf_session *session;
4189	struct evsel *evsel;
4190	int err = -1;
4191
4192	trace->tool.sample	  = trace__process_sample;
4193	trace->tool.mmap	  = perf_event__process_mmap;
4194	trace->tool.mmap2	  = perf_event__process_mmap2;
4195	trace->tool.comm	  = perf_event__process_comm;
4196	trace->tool.exit	  = perf_event__process_exit;
4197	trace->tool.fork	  = perf_event__process_fork;
4198	trace->tool.attr	  = perf_event__process_attr;
4199	trace->tool.tracing_data  = perf_event__process_tracing_data;
4200	trace->tool.build_id	  = perf_event__process_build_id;
4201	trace->tool.namespaces	  = perf_event__process_namespaces;
4202
4203	trace->tool.ordered_events = true;
4204	trace->tool.ordering_requires_timestamps = true;
4205
4206	/* add tid to output */
4207	trace->multiple_threads = true;
4208
4209	session = perf_session__new(&data, false, &trace->tool);
4210	if (IS_ERR(session))
4211		return PTR_ERR(session);
4212
4213	if (trace->opts.target.pid)
4214		symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
4215
4216	if (trace->opts.target.tid)
4217		symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
4218
4219	if (symbol__init(&session->header.env) < 0)
4220		goto out;
4221
4222	trace->host = &session->machines.host;
4223
4224	err = perf_session__set_tracepoints_handlers(session, handlers);
4225	if (err)
4226		goto out;
4227
4228	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
4229						     "raw_syscalls:sys_enter");
4230	/* older kernels have syscalls tp versus raw_syscalls */
4231	if (evsel == NULL)
4232		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
4233							     "syscalls:sys_enter");
 
 
 
 
4234
4235	if (evsel &&
4236	    (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
4237	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
4238		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
4239		goto out;
4240	}
4241
4242	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
4243						     "raw_syscalls:sys_exit");
4244	if (evsel == NULL)
4245		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
4246							     "syscalls:sys_exit");
4247	if (evsel &&
4248	    (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
4249	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
 
 
 
 
4250		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
4251		goto out;
4252	}
4253
4254	evlist__for_each_entry(session->evlist, evsel) {
4255		if (evsel->core.attr.type == PERF_TYPE_SOFTWARE &&
4256		    (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
4257		     evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
4258		     evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS))
4259			evsel->handler = trace__pgfault;
4260	}
4261
4262	setup_pager();
4263
4264	err = perf_session__process_events(session);
4265	if (err)
4266		pr_err("Failed to process events, error %d", err);
4267
4268	else if (trace->summary)
4269		trace__fprintf_thread_summary(trace, trace->output);
4270
4271out:
4272	perf_session__delete(session);
4273
4274	return err;
4275}
4276
4277static size_t trace__fprintf_threads_header(FILE *fp)
4278{
4279	size_t printed;
4280
4281	printed  = fprintf(fp, "\n Summary of events:\n\n");
4282
4283	return printed;
4284}
4285
4286DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
4287	struct syscall_stats *stats;
4288	double		     msecs;
4289	int		     syscall;
4290)
4291{
4292	struct int_node *source = rb_entry(nd, struct int_node, rb_node);
4293	struct syscall_stats *stats = source->priv;
4294
4295	entry->syscall = source->i;
4296	entry->stats   = stats;
4297	entry->msecs   = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0;
4298}
4299
4300static size_t thread__dump_stats(struct thread_trace *ttrace,
4301				 struct trace *trace, FILE *fp)
4302{
 
4303	size_t printed = 0;
4304	struct syscall *sc;
4305	struct rb_node *nd;
4306	DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
4307
4308	if (syscall_stats == NULL)
4309		return 0;
4310
4311	printed += fprintf(fp, "\n");
4312
4313	printed += fprintf(fp, "   syscall            calls  errors  total       min       avg       max       stddev\n");
4314	printed += fprintf(fp, "                                     (msec)    (msec)    (msec)    (msec)        (%%)\n");
4315	printed += fprintf(fp, "   --------------- --------  ------ -------- --------- --------- ---------     ------\n");
4316
4317	resort_rb__for_each_entry(nd, syscall_stats) {
4318		struct syscall_stats *stats = syscall_stats_entry->stats;
 
4319		if (stats) {
4320			double min = (double)(stats->stats.min) / NSEC_PER_MSEC;
4321			double max = (double)(stats->stats.max) / NSEC_PER_MSEC;
4322			double avg = avg_stats(&stats->stats);
4323			double pct;
4324			u64 n = (u64)stats->stats.n;
4325
4326			pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0;
4327			avg /= NSEC_PER_MSEC;
4328
4329			sc = &trace->syscalls.table[syscall_stats_entry->syscall];
4330			printed += fprintf(fp, "   %-15s", sc->name);
4331			printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f",
4332					   n, stats->nr_failures, syscall_stats_entry->msecs, min, avg);
4333			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
 
4334
4335			if (trace->errno_summary && stats->nr_failures) {
4336				const char *arch_name = perf_env__arch(trace->host->env);
4337				int e;
4338
4339				for (e = 0; e < stats->max_errno; ++e) {
4340					if (stats->errnos[e] != 0)
4341						fprintf(fp, "\t\t\t\t%s: %d\n", arch_syscalls__strerrno(arch_name, e + 1), stats->errnos[e]);
4342				}
4343			}
4344		}
4345	}
4346
4347	resort_rb__delete(syscall_stats);
4348	printed += fprintf(fp, "\n\n");
4349
4350	return printed;
4351}
4352
4353static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
 
 
 
 
 
 
 
4354{
4355	size_t printed = 0;
4356	struct thread_trace *ttrace = thread__priv(thread);
 
 
 
4357	double ratio;
4358
4359	if (ttrace == NULL)
4360		return 0;
4361
4362	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
4363
4364	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
4365	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
4366	printed += fprintf(fp, "%.1f%%", ratio);
4367	if (ttrace->pfmaj)
4368		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
4369	if (ttrace->pfmin)
4370		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
4371	if (trace->sched)
4372		printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
4373	else if (fputc('\n', fp) != EOF)
4374		++printed;
4375
4376	printed += thread__dump_stats(ttrace, trace, fp);
4377
4378	return printed;
4379}
4380
4381static unsigned long thread__nr_events(struct thread_trace *ttrace)
4382{
4383	return ttrace ? ttrace->nr_events : 0;
4384}
4385
4386DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
4387	struct thread *thread;
4388)
4389{
4390	entry->thread = rb_entry(nd, struct thread, rb_node);
4391}
4392
4393static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
4394{
4395	size_t printed = trace__fprintf_threads_header(fp);
4396	struct rb_node *nd;
4397	int i;
4398
4399	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
4400		DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
4401
4402		if (threads == NULL) {
4403			fprintf(fp, "%s", "Error sorting output by nr_events!\n");
4404			return 0;
4405		}
4406
4407		resort_rb__for_each_entry(nd, threads)
4408			printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
4409
4410		resort_rb__delete(threads);
4411	}
4412	return printed;
4413}
4414
4415static int trace__set_duration(const struct option *opt, const char *str,
4416			       int unset __maybe_unused)
4417{
4418	struct trace *trace = opt->value;
4419
4420	trace->duration_filter = atof(str);
4421	return 0;
4422}
4423
4424static int trace__set_filter_pids_from_option(const struct option *opt, const char *str,
4425					      int unset __maybe_unused)
4426{
4427	int ret = -1;
4428	size_t i;
4429	struct trace *trace = opt->value;
4430	/*
4431	 * FIXME: introduce a intarray class, plain parse csv and create a
4432	 * { int nr, int entries[] } struct...
4433	 */
4434	struct intlist *list = intlist__new(str);
4435
4436	if (list == NULL)
4437		return -1;
4438
4439	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
4440	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
4441
4442	if (trace->filter_pids.entries == NULL)
4443		goto out;
4444
4445	trace->filter_pids.entries[0] = getpid();
4446
4447	for (i = 1; i < trace->filter_pids.nr; ++i)
4448		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
4449
4450	intlist__delete(list);
4451	ret = 0;
4452out:
4453	return ret;
4454}
4455
4456static int trace__open_output(struct trace *trace, const char *filename)
4457{
4458	struct stat st;
4459
4460	if (!stat(filename, &st) && st.st_size) {
4461		char oldname[PATH_MAX];
4462
4463		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
4464		unlink(oldname);
4465		rename(filename, oldname);
4466	}
4467
4468	trace->output = fopen(filename, "w");
4469
4470	return trace->output == NULL ? -errno : 0;
4471}
4472
4473static int parse_pagefaults(const struct option *opt, const char *str,
4474			    int unset __maybe_unused)
4475{
4476	int *trace_pgfaults = opt->value;
4477
4478	if (strcmp(str, "all") == 0)
4479		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
4480	else if (strcmp(str, "maj") == 0)
4481		*trace_pgfaults |= TRACE_PFMAJ;
4482	else if (strcmp(str, "min") == 0)
4483		*trace_pgfaults |= TRACE_PFMIN;
4484	else
4485		return -1;
4486
4487	return 0;
4488}
4489
4490static void evlist__set_default_evsel_handler(struct evlist *evlist, void *handler)
4491{
4492	struct evsel *evsel;
4493
4494	evlist__for_each_entry(evlist, evsel) {
4495		if (evsel->handler == NULL)
4496			evsel->handler = handler;
4497	}
4498}
4499
4500static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name)
4501{
4502	struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
4503
4504	if (fmt) {
4505		struct syscall_fmt *scfmt = syscall_fmt__find(name);
4506
4507		if (scfmt) {
4508			int skip = 0;
4509
4510			if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr") == 0 ||
4511			    strcmp(evsel->tp_format->format.fields->name, "nr") == 0)
4512				++skip;
4513
4514			memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt));
4515		}
4516	}
4517}
4518
4519static int evlist__set_syscall_tp_fields(struct evlist *evlist)
4520{
4521	struct evsel *evsel;
4522
4523	evlist__for_each_entry(evlist, evsel) {
4524		if (evsel->priv || !evsel->tp_format)
4525			continue;
4526
4527		if (strcmp(evsel->tp_format->system, "syscalls")) {
4528			evsel__init_tp_arg_scnprintf(evsel);
4529			continue;
4530		}
4531
4532		if (evsel__init_syscall_tp(evsel))
4533			return -1;
4534
4535		if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
4536			struct syscall_tp *sc = __evsel__syscall_tp(evsel);
4537
4538			if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
4539				return -1;
4540
4541			evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_enter_") - 1);
4542		} else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
4543			struct syscall_tp *sc = __evsel__syscall_tp(evsel);
4544
4545			if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
4546				return -1;
4547
4548			evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_exit_") - 1);
4549		}
4550	}
4551
4552	return 0;
4553}
4554
4555/*
4556 * XXX: Hackish, just splitting the combined -e+--event (syscalls
4557 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
4558 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
4559 *
4560 * It'd be better to introduce a parse_options() variant that would return a
4561 * list with the terms it didn't match to an event...
4562 */
4563static int trace__parse_events_option(const struct option *opt, const char *str,
4564				      int unset __maybe_unused)
4565{
4566	struct trace *trace = (struct trace *)opt->value;
4567	const char *s = str;
4568	char *sep = NULL, *lists[2] = { NULL, NULL, };
4569	int len = strlen(str) + 1, err = -1, list, idx;
4570	char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
4571	char group_name[PATH_MAX];
4572	struct syscall_fmt *fmt;
4573
4574	if (strace_groups_dir == NULL)
4575		return -1;
4576
4577	if (*s == '!') {
4578		++s;
4579		trace->not_ev_qualifier = true;
4580	}
4581
4582	while (1) {
4583		if ((sep = strchr(s, ',')) != NULL)
4584			*sep = '\0';
4585
4586		list = 0;
4587		if (syscalltbl__id(trace->sctbl, s) >= 0 ||
4588		    syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
4589			list = 1;
4590			goto do_concat;
4591		}
4592
4593		fmt = syscall_fmt__find_by_alias(s);
4594		if (fmt != NULL) {
4595			list = 1;
4596			s = fmt->name;
4597		} else {
4598			path__join(group_name, sizeof(group_name), strace_groups_dir, s);
4599			if (access(group_name, R_OK) == 0)
4600				list = 1;
4601		}
4602do_concat:
4603		if (lists[list]) {
4604			sprintf(lists[list] + strlen(lists[list]), ",%s", s);
4605		} else {
4606			lists[list] = malloc(len);
4607			if (lists[list] == NULL)
4608				goto out;
4609			strcpy(lists[list], s);
4610		}
4611
4612		if (!sep)
4613			break;
4614
4615		*sep = ',';
4616		s = sep + 1;
4617	}
4618
4619	if (lists[1] != NULL) {
4620		struct strlist_config slist_config = {
4621			.dirname = strace_groups_dir,
4622		};
4623
4624		trace->ev_qualifier = strlist__new(lists[1], &slist_config);
4625		if (trace->ev_qualifier == NULL) {
4626			fputs("Not enough memory to parse event qualifier", trace->output);
4627			goto out;
4628		}
4629
4630		if (trace__validate_ev_qualifier(trace))
4631			goto out;
4632		trace->trace_syscalls = true;
4633	}
4634
4635	err = 0;
4636
4637	if (lists[0]) {
4638		struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
4639					       "event selector. use 'perf list' to list available events",
4640					       parse_events_option);
4641		err = parse_events_option(&o, lists[0], 0);
4642	}
4643out:
4644	if (sep)
4645		*sep = ',';
4646
4647	return err;
4648}
4649
4650static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
4651{
4652	struct trace *trace = opt->value;
4653
4654	if (!list_empty(&trace->evlist->core.entries))
4655		return parse_cgroups(opt, str, unset);
4656
4657	trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
4658
4659	return 0;
4660}
4661
4662static int trace__config(const char *var, const char *value, void *arg)
4663{
4664	struct trace *trace = arg;
4665	int err = 0;
4666
4667	if (!strcmp(var, "trace.add_events")) {
4668		trace->perfconfig_events = strdup(value);
4669		if (trace->perfconfig_events == NULL) {
4670			pr_err("Not enough memory for %s\n", "trace.add_events");
4671			return -1;
4672		}
4673	} else if (!strcmp(var, "trace.show_timestamp")) {
4674		trace->show_tstamp = perf_config_bool(var, value);
4675	} else if (!strcmp(var, "trace.show_duration")) {
4676		trace->show_duration = perf_config_bool(var, value);
4677	} else if (!strcmp(var, "trace.show_arg_names")) {
4678		trace->show_arg_names = perf_config_bool(var, value);
4679		if (!trace->show_arg_names)
4680			trace->show_zeros = true;
4681	} else if (!strcmp(var, "trace.show_zeros")) {
4682		bool new_show_zeros = perf_config_bool(var, value);
4683		if (!trace->show_arg_names && !new_show_zeros) {
4684			pr_warning("trace.show_zeros has to be set when trace.show_arg_names=no\n");
4685			goto out;
4686		}
4687		trace->show_zeros = new_show_zeros;
4688	} else if (!strcmp(var, "trace.show_prefix")) {
4689		trace->show_string_prefix = perf_config_bool(var, value);
4690	} else if (!strcmp(var, "trace.no_inherit")) {
4691		trace->opts.no_inherit = perf_config_bool(var, value);
4692	} else if (!strcmp(var, "trace.args_alignment")) {
4693		int args_alignment = 0;
4694		if (perf_config_int(&args_alignment, var, value) == 0)
4695			trace->args_alignment = args_alignment;
4696	} else if (!strcmp(var, "trace.tracepoint_beautifiers")) {
4697		if (strcasecmp(value, "libtraceevent") == 0)
4698			trace->libtraceevent_print = true;
4699		else if (strcasecmp(value, "libbeauty") == 0)
4700			trace->libtraceevent_print = false;
4701	}
4702out:
4703	return err;
4704}
4705
4706int cmd_trace(int argc, const char **argv)
4707{
4708	const char *trace_usage[] = {
4709		"perf trace [<options>] [<command>]",
4710		"perf trace [<options>] -- <command> [<options>]",
4711		"perf trace record [<options>] [<command>]",
4712		"perf trace record [<options>] -- <command> [<options>]",
4713		NULL
4714	};
4715	struct trace trace = {
 
 
 
 
 
 
 
4716		.opts = {
4717			.target = {
4718				.uid	   = UINT_MAX,
4719				.uses_mmap = true,
4720			},
4721			.user_freq     = UINT_MAX,
4722			.user_interval = ULLONG_MAX,
4723			.no_buffering  = true,
4724			.mmap_pages    = UINT_MAX,
4725		},
4726		.output = stderr,
4727		.show_comm = true,
4728		.show_tstamp = true,
4729		.show_duration = true,
4730		.show_arg_names = true,
4731		.args_alignment = 70,
4732		.trace_syscalls = false,
4733		.kernel_syscallchains = false,
4734		.max_stack = UINT_MAX,
4735		.max_events = ULONG_MAX,
4736	};
4737	const char *map_dump_str = NULL;
4738	const char *output_name = NULL;
 
4739	const struct option trace_options[] = {
4740	OPT_CALLBACK('e', "event", &trace, "event",
4741		     "event/syscall selector. use 'perf list' to list available events",
4742		     trace__parse_events_option),
4743	OPT_CALLBACK(0, "filter", &trace.evlist, "filter",
4744		     "event filter", parse_filter),
4745	OPT_BOOLEAN(0, "comm", &trace.show_comm,
4746		    "show the thread COMM next to its id"),
4747	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
4748	OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
4749		     trace__parse_events_option),
4750	OPT_STRING('o', "output", &output_name, "file", "output file name"),
4751	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
4752	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
4753		    "trace events on existing process id"),
4754	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
4755		    "trace events on existing thread id"),
4756	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
4757		     "pids to filter (by the kernel)", trace__set_filter_pids_from_option),
4758	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
4759		    "system-wide collection from all CPUs"),
4760	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
4761		    "list of cpus to monitor"),
4762	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
4763		    "child tasks do not inherit counters"),
4764	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
4765		     "number of mmap data pages",
4766		     perf_evlist__parse_mmap_pages),
4767	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
4768		   "user to profile"),
4769	OPT_CALLBACK(0, "duration", &trace, "float",
4770		     "show only events with duration > N.M ms",
4771		     trace__set_duration),
4772#ifdef HAVE_LIBBPF_SUPPORT
4773	OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"),
4774#endif
4775	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
4776	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4777	OPT_BOOLEAN('T', "time", &trace.full_time,
4778		    "Show full timestamp, not time relative to first start"),
4779	OPT_BOOLEAN(0, "failure", &trace.failure_only,
4780		    "Show only syscalls that failed"),
4781	OPT_BOOLEAN('s', "summary", &trace.summary_only,
4782		    "Show only syscall summary with statistics"),
4783	OPT_BOOLEAN('S', "with-summary", &trace.summary,
4784		    "Show all syscalls and summary with statistics"),
4785	OPT_BOOLEAN(0, "errno-summary", &trace.errno_summary,
4786		    "Show errno stats per syscall, use with -s or -S"),
4787	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
4788		     "Trace pagefaults", parse_pagefaults, "maj"),
4789	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
4790	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
4791	OPT_CALLBACK(0, "call-graph", &trace.opts,
4792		     "record_mode[,record_size]", record_callchain_help,
4793		     &record_parse_callchain_opt),
4794	OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print,
4795		    "Use libtraceevent to print the tracepoint arguments."),
4796	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
4797		    "Show the kernel callchains on the syscall exit path"),
4798	OPT_ULONG(0, "max-events", &trace.max_events,
4799		"Set the maximum number of events to print, exit after that is reached. "),
4800	OPT_UINTEGER(0, "min-stack", &trace.min_stack,
4801		     "Set the minimum stack depth when parsing the callchain, "
4802		     "anything below the specified depth will be ignored."),
4803	OPT_UINTEGER(0, "max-stack", &trace.max_stack,
4804		     "Set the maximum stack depth when parsing the callchain, "
4805		     "anything beyond the specified depth will be ignored. "
4806		     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
4807	OPT_BOOLEAN(0, "sort-events", &trace.sort_events,
4808			"Sort batch of events before processing, use if getting out of order events"),
4809	OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
4810			"print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
4811	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
4812			"per thread proc mmap processing timeout in ms"),
4813	OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
4814		     trace__parse_cgroups),
4815	OPT_INTEGER('D', "delay", &trace.opts.initial_delay,
4816		     "ms to wait before starting measurement after program "
4817		     "start"),
4818	OPTS_EVSWITCH(&trace.evswitch),
4819	OPT_END()
4820	};
4821	bool __maybe_unused max_stack_user_set = true;
4822	bool mmap_pages_user_set = true;
4823	struct evsel *evsel;
4824	const char * const trace_subcommands[] = { "record", NULL };
4825	int err = -1;
4826	char bf[BUFSIZ];
4827
4828	signal(SIGSEGV, sighandler_dump_stack);
4829	signal(SIGFPE, sighandler_dump_stack);
4830
4831	trace.evlist = evlist__new();
4832	trace.sctbl = syscalltbl__new();
4833
4834	if (trace.evlist == NULL || trace.sctbl == NULL) {
4835		pr_err("Not enough memory to run!\n");
4836		err = -ENOMEM;
4837		goto out;
4838	}
4839
4840	/*
4841	 * Parsing .perfconfig may entail creating a BPF event, that may need
4842	 * to create BPF maps, so bump RLIM_MEMLOCK as the default 64K setting
4843	 * is too small. This affects just this process, not touching the
4844	 * global setting. If it fails we'll get something in 'perf trace -v'
4845	 * to help diagnose the problem.
4846	 */
4847	rlimit__bump_memlock();
4848
4849	err = perf_config(trace__config, &trace);
4850	if (err)
4851		goto out;
4852
4853	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
4854				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
4855
4856	/*
4857	 * Here we already passed thru trace__parse_events_option() and it has
4858	 * already figured out if -e syscall_name, if not but if --event
4859	 * foo:bar was used, the user is interested _just_ in those, say,
4860	 * tracepoint events, not in the strace-like syscall-name-based mode.
4861	 *
4862	 * This is important because we need to check if strace-like mode is
4863	 * needed to decided if we should filter out the eBPF
4864	 * __augmented_syscalls__ code, if it is in the mix, say, via
4865	 * .perfconfig trace.add_events, and filter those out.
4866	 */
4867	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
4868	    trace.evlist->core.nr_entries == 0 /* Was --events used? */) {
4869		trace.trace_syscalls = true;
4870	}
4871	/*
4872	 * Now that we have --verbose figured out, lets see if we need to parse
4873	 * events from .perfconfig, so that if those events fail parsing, say some
4874	 * BPF program fails, then we'll be able to use --verbose to see what went
4875	 * wrong in more detail.
4876	 */
4877	if (trace.perfconfig_events != NULL) {
4878		struct parse_events_error parse_err;
4879
4880		bzero(&parse_err, sizeof(parse_err));
4881		err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err);
4882		if (err) {
4883			parse_events_print_error(&parse_err, trace.perfconfig_events);
4884			goto out;
4885		}
4886	}
4887
4888	if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
4889		usage_with_options_msg(trace_usage, trace_options,
4890				       "cgroup monitoring only available in system-wide mode");
4891	}
4892
4893	evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
4894	if (IS_ERR(evsel)) {
4895		bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
4896		pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
4897		goto out;
4898	}
4899
4900	if (evsel) {
4901		trace.syscalls.events.augmented = evsel;
4902
4903		evsel = perf_evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
4904		if (evsel == NULL) {
4905			pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n");
4906			goto out;
4907		}
4908
4909		if (evsel->bpf_obj == NULL) {
4910			pr_err("ERROR: raw_syscalls:sys_enter not associated to a BPF object\n");
4911			goto out;
4912		}
4913
4914		trace.bpf_obj = evsel->bpf_obj;
4915
4916		/*
4917		 * If we have _just_ the augmenter event but don't have a
4918		 * explicit --syscalls, then assume we want all strace-like
4919		 * syscalls:
4920		 */
4921		if (!trace.trace_syscalls && trace__only_augmented_syscalls_evsels(&trace))
4922			trace.trace_syscalls = true;
4923		/*
4924		 * So, if we have a syscall augmenter, but trace_syscalls, aka
4925		 * strace-like syscall tracing is not set, then we need to trow
4926		 * away the augmenter, i.e. all the events that were created
4927		 * from that BPF object file.
4928		 *
4929		 * This is more to fix the current .perfconfig trace.add_events
4930		 * style of setting up the strace-like eBPF based syscall point
4931		 * payload augmenter.
4932		 *
4933		 * All this complexity will be avoided by adding an alternative
4934		 * to trace.add_events in the form of
4935		 * trace.bpf_augmented_syscalls, that will be only parsed if we
4936		 * need it.
4937		 *
4938		 * .perfconfig trace.add_events is still useful if we want, for
4939		 * instance, have msr_write.msr in some .perfconfig profile based
4940		 * 'perf trace --config determinism.profile' mode, where for some
4941		 * particular goal/workload type we want a set of events and
4942		 * output mode (with timings, etc) instead of having to add
4943		 * all via the command line.
4944		 *
4945		 * Also --config to specify an alternate .perfconfig file needs
4946		 * to be implemented.
4947		 */
4948		if (!trace.trace_syscalls) {
4949			trace__delete_augmented_syscalls(&trace);
4950		} else {
4951			trace__set_bpf_map_filtered_pids(&trace);
4952			trace__set_bpf_map_syscalls(&trace);
4953			trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented");
4954		}
4955	}
4956
4957	err = bpf__setup_stdout(trace.evlist);
4958	if (err) {
4959		bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
4960		pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
4961		goto out;
4962	}
4963
4964	err = -1;
4965
4966	if (map_dump_str) {
4967		trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str);
4968		if (trace.dump.map == NULL) {
4969			pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
4970			goto out;
4971		}
4972	}
4973
4974	if (trace.trace_pgfaults) {
4975		trace.opts.sample_address = true;
4976		trace.opts.sample_time = true;
4977	}
4978
4979	if (trace.opts.mmap_pages == UINT_MAX)
4980		mmap_pages_user_set = false;
4981
4982	if (trace.max_stack == UINT_MAX) {
4983		trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
4984		max_stack_user_set = false;
4985	}
4986
4987#ifdef HAVE_DWARF_UNWIND_SUPPORT
4988	if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
4989		record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
4990	}
4991#endif
4992
4993	if (callchain_param.enabled) {
4994		if (!mmap_pages_user_set && geteuid() == 0)
4995			trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
4996
4997		symbol_conf.use_callchain = true;
4998	}
4999
5000	if (trace.evlist->core.nr_entries > 0) {
5001		evlist__set_default_evsel_handler(trace.evlist, trace__event_handler);
5002		if (evlist__set_syscall_tp_fields(trace.evlist)) {
5003			perror("failed to set syscalls:* tracepoint fields");
5004			goto out;
5005		}
5006	}
5007
5008	if (trace.sort_events) {
5009		ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace);
5010		ordered_events__set_copy_on_queue(&trace.oe.data, true);
5011	}
5012
5013	/*
5014	 * If we are augmenting syscalls, then combine what we put in the
5015	 * __augmented_syscalls__ BPF map with what is in the
5016	 * syscalls:sys_exit_FOO tracepoints, i.e. just like we do without BPF,
5017	 * combining raw_syscalls:sys_enter with raw_syscalls:sys_exit.
5018	 *
5019	 * We'll switch to look at two BPF maps, one for sys_enter and the
5020	 * other for sys_exit when we start augmenting the sys_exit paths with
5021	 * buffers that are being copied from kernel to userspace, think 'read'
5022	 * syscall.
5023	 */
5024	if (trace.syscalls.events.augmented) {
5025		evlist__for_each_entry(trace.evlist, evsel) {
5026			bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
5027
5028			if (raw_syscalls_sys_exit) {
5029				trace.raw_augmented_syscalls = true;
5030				goto init_augmented_syscall_tp;
5031			}
5032
5033			if (trace.syscalls.events.augmented->priv == NULL &&
5034			    strstr(evsel__name(evsel), "syscalls:sys_enter")) {
5035				struct evsel *augmented = trace.syscalls.events.augmented;
5036				if (evsel__init_augmented_syscall_tp(augmented, evsel) ||
5037				    evsel__init_augmented_syscall_tp_args(augmented))
5038					goto out;
5039				/*
5040				 * Augmented is __augmented_syscalls__ BPF_OUTPUT event
5041				 * Above we made sure we can get from the payload the tp fields
5042				 * that we get from syscalls:sys_enter tracefs format file.
5043				 */
5044				augmented->handler = trace__sys_enter;
5045				/*
5046				 * Now we do the same for the *syscalls:sys_enter event so that
5047				 * if we handle it directly, i.e. if the BPF prog returns 0 so
5048				 * as not to filter it, then we'll handle it just like we would
5049				 * for the BPF_OUTPUT one:
5050				 */
5051				if (evsel__init_augmented_syscall_tp(evsel, evsel) ||
5052				    evsel__init_augmented_syscall_tp_args(evsel))
5053					goto out;
5054				evsel->handler = trace__sys_enter;
5055			}
5056
5057			if (strstarts(evsel__name(evsel), "syscalls:sys_exit_")) {
5058				struct syscall_tp *sc;
5059init_augmented_syscall_tp:
5060				if (evsel__init_augmented_syscall_tp(evsel, evsel))
5061					goto out;
5062				sc = __evsel__syscall_tp(evsel);
5063				/*
5064				 * For now with BPF raw_augmented we hook into
5065				 * raw_syscalls:sys_enter and there we get all
5066				 * 6 syscall args plus the tracepoint common
5067				 * fields and the syscall_nr (another long).
5068				 * So we check if that is the case and if so
5069				 * don't look after the sc->args_size but
5070				 * always after the full raw_syscalls:sys_enter
5071				 * payload, which is fixed.
5072				 *
5073				 * We'll revisit this later to pass
5074				 * s->args_size to the BPF augmenter (now
5075				 * tools/perf/examples/bpf/augmented_raw_syscalls.c,
5076				 * so that it copies only what we need for each
5077				 * syscall, like what happens when we use
5078				 * syscalls:sys_enter_NAME, so that we reduce
5079				 * the kernel/userspace traffic to just what is
5080				 * needed for each syscall.
5081				 */
5082				if (trace.raw_augmented_syscalls)
5083					trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
5084				evsel__init_augmented_syscall_tp_ret(evsel);
5085				evsel->handler = trace__sys_exit;
5086			}
5087		}
5088	}
5089
5090	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
5091		return trace__record(&trace, argc-1, &argv[1]);
5092
5093	/* Using just --errno-summary will trigger --summary */
5094	if (trace.errno_summary && !trace.summary && !trace.summary_only)
5095		trace.summary_only = true;
5096
5097	/* summary_only implies summary option, but don't overwrite summary if set */
5098	if (trace.summary_only)
5099		trace.summary = trace.summary_only;
5100
5101	if (output_name != NULL) {
5102		err = trace__open_output(&trace, output_name);
5103		if (err < 0) {
5104			perror("failed to create output file");
5105			goto out;
5106		}
5107	}
5108
5109	err = evswitch__init(&trace.evswitch, trace.evlist, stderr);
5110	if (err)
5111		goto out_close;
 
 
 
 
 
 
 
 
 
 
 
5112
5113	err = target__validate(&trace.opts.target);
5114	if (err) {
5115		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
5116		fprintf(trace.output, "%s", bf);
5117		goto out_close;
5118	}
5119
5120	err = target__parse_uid(&trace.opts.target);
5121	if (err) {
5122		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
5123		fprintf(trace.output, "%s", bf);
5124		goto out_close;
5125	}
5126
5127	if (!argc && target__none(&trace.opts.target))
5128		trace.opts.target.system_wide = true;
5129
5130	if (input_name)
5131		err = trace__replay(&trace);
5132	else
5133		err = trace__run(&trace, argc, argv);
5134
5135out_close:
5136	if (output_name != NULL)
5137		fclose(trace.output);
5138out:
5139	zfree(&trace.perfconfig_events);
5140	return err;
5141}