Linux Audio

Check our new training course

Loading...
v4.6
   1/*
   2 * builtin-trace.c
   3 *
   4 * Builtin 'trace' command:
   5 *
   6 * Display a continuously updated trace of any workload, CPU, specific PID,
   7 * system wide, etc.  Default format is loosely strace like, but any other
   8 * event may be specified using --event.
   9 *
  10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
  11 *
  12 * Initially based on the 'trace' prototype by Thomas Gleixner:
  13 *
  14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
  15 *
  16 * Released under the GPL v2. (and only v2, not any later version)
  17 */
  18
  19#include <traceevent/event-parse.h>
  20#include <api/fs/tracing_path.h>
 
 
 
 
 
 
 
 
 
 
  21#include "builtin.h"
 
  22#include "util/color.h"
 
  23#include "util/debug.h"
 
 
 
 
 
 
  24#include "util/evlist.h"
 
 
 
  25#include <subcmd/exec-cmd.h>
  26#include "util/machine.h"
 
 
 
  27#include "util/session.h"
  28#include "util/thread.h"
  29#include <subcmd/parse-options.h>
  30#include "util/strlist.h"
  31#include "util/intlist.h"
  32#include "util/thread_map.h"
  33#include "util/stat.h"
 
 
 
  34#include "trace-event.h"
  35#include "util/parse-events.h"
  36#include "util/bpf-loader.h"
  37
  38#include <libaudit.h>
 
 
 
 
 
 
 
 
 
 
  39#include <stdlib.h>
  40#include <sys/mman.h>
  41#include <linux/futex.h>
  42#include <linux/err.h>
 
 
 
 
 
 
 
 
 
  43
  44/* For older distros: */
  45#ifndef MAP_STACK
  46# define MAP_STACK		0x20000
  47#endif
  48
  49#ifndef MADV_HWPOISON
  50# define MADV_HWPOISON		100
  51
  52#endif
  53
  54#ifndef MADV_MERGEABLE
  55# define MADV_MERGEABLE		12
  56#endif
  57
  58#ifndef MADV_UNMERGEABLE
  59# define MADV_UNMERGEABLE	13
  60#endif
  61
  62#ifndef EFD_SEMAPHORE
  63# define EFD_SEMAPHORE		1
  64#endif
  65
  66#ifndef EFD_NONBLOCK
  67# define EFD_NONBLOCK		00004000
  68#endif
  69
  70#ifndef EFD_CLOEXEC
  71# define EFD_CLOEXEC		02000000
  72#endif
  73
  74#ifndef O_CLOEXEC
  75# define O_CLOEXEC		02000000
  76#endif
  77
  78#ifndef SOCK_DCCP
  79# define SOCK_DCCP		6
  80#endif
  81
  82#ifndef SOCK_CLOEXEC
  83# define SOCK_CLOEXEC		02000000
  84#endif
  85
  86#ifndef SOCK_NONBLOCK
  87# define SOCK_NONBLOCK		00004000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  88#endif
 
  89
  90#ifndef MSG_CMSG_CLOEXEC
  91# define MSG_CMSG_CLOEXEC	0x40000000
  92#endif
 
 
 
 
 
 
 
 
 
 
  93
  94#ifndef PERF_FLAG_FD_NO_GROUP
  95# define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
 
 
 
 
 
 
 
 
 
 
 
  96#endif
  97
  98#ifndef PERF_FLAG_FD_OUTPUT
  99# define PERF_FLAG_FD_OUTPUT		(1UL << 1)
 100#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 101
 102#ifndef PERF_FLAG_PID_CGROUP
 103# define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
 104#endif
 
 
 105
 106#ifndef PERF_FLAG_FD_CLOEXEC
 107# define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
 
 
 
 108#endif
 109
 110
 111struct tp_field {
 112	int offset;
 113	union {
 114		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
 115		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
 116	};
 117};
 118
 119#define TP_UINT_FIELD(bits) \
 120static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
 121{ \
 122	u##bits value; \
 123	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 124	return value;  \
 125}
 126
 127TP_UINT_FIELD(8);
 128TP_UINT_FIELD(16);
 129TP_UINT_FIELD(32);
 130TP_UINT_FIELD(64);
 131
 132#define TP_UINT_FIELD__SWAPPED(bits) \
 133static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
 134{ \
 135	u##bits value; \
 136	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 137	return bswap_##bits(value);\
 138}
 139
 140TP_UINT_FIELD__SWAPPED(16);
 141TP_UINT_FIELD__SWAPPED(32);
 142TP_UINT_FIELD__SWAPPED(64);
 143
 144static int tp_field__init_uint(struct tp_field *field,
 145			       struct format_field *format_field,
 146			       bool needs_swap)
 147{
 148	field->offset = format_field->offset;
 149
 150	switch (format_field->size) {
 151	case 1:
 152		field->integer = tp_field__u8;
 153		break;
 154	case 2:
 155		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
 156		break;
 157	case 4:
 158		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
 159		break;
 160	case 8:
 161		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
 162		break;
 163	default:
 164		return -1;
 165	}
 166
 167	return 0;
 168}
 169
 
 
 
 
 
 170static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
 171{
 172	return sample->raw_data + field->offset;
 173}
 174
 175static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
 176{
 177	field->offset = format_field->offset;
 178	field->pointer = tp_field__ptr;
 179	return 0;
 180}
 181
 
 
 
 
 
 182struct syscall_tp {
 183	struct tp_field id;
 184	union {
 185		struct tp_field args, ret;
 186	};
 187};
 188
 189static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
 190					  struct tp_field *field,
 191					  const char *name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 192{
 193	struct format_field *format_field = perf_evsel__field(evsel, name);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 194
 195	if (format_field == NULL)
 196		return -1;
 197
 198	return tp_field__init_uint(field, format_field, evsel->needs_swap);
 199}
 200
 201#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
 202	({ struct syscall_tp *sc = evsel->priv;\
 203	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
 204
 205static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
 206					 struct tp_field *field,
 207					 const char *name)
 208{
 209	struct format_field *format_field = perf_evsel__field(evsel, name);
 210
 211	if (format_field == NULL)
 212		return -1;
 213
 214	return tp_field__init_ptr(field, format_field);
 215}
 216
 217#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
 218	({ struct syscall_tp *sc = evsel->priv;\
 219	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
 220
 221static void perf_evsel__delete_priv(struct perf_evsel *evsel)
 222{
 223	zfree(&evsel->priv);
 224	perf_evsel__delete(evsel);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 225}
 226
 227static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
 228{
 229	evsel->priv = malloc(sizeof(struct syscall_tp));
 230	if (evsel->priv != NULL) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 231		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
 232			goto out_delete;
 233
 234		evsel->handler = handler;
 235		return 0;
 236	}
 237
 238	return -ENOMEM;
 239
 240out_delete:
 241	zfree(&evsel->priv);
 242	return -ENOENT;
 243}
 244
 245static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
 246{
 247	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
 248
 249	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
 250	if (IS_ERR(evsel))
 251		evsel = perf_evsel__newtp("syscalls", direction);
 252
 253	if (IS_ERR(evsel))
 254		return NULL;
 255
 256	if (perf_evsel__init_syscall_tp(evsel, handler))
 257		goto out_delete;
 258
 259	return evsel;
 260
 261out_delete:
 262	perf_evsel__delete_priv(evsel);
 263	return NULL;
 264}
 265
 266#define perf_evsel__sc_tp_uint(evsel, name, sample) \
 267	({ struct syscall_tp *fields = evsel->priv; \
 268	   fields->name.integer(&fields->name, sample); })
 269
 270#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
 271	({ struct syscall_tp *fields = evsel->priv; \
 272	   fields->name.pointer(&fields->name, sample); })
 273
 274struct syscall_arg {
 275	unsigned long val;
 276	struct thread *thread;
 277	struct trace  *trace;
 278	void	      *parm;
 279	u8	      idx;
 280	u8	      mask;
 281};
 282
 283struct strarray {
 284	int	    offset;
 285	int	    nr_entries;
 286	const char **entries;
 287};
 
 288
 289#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
 290	.nr_entries = ARRAY_SIZE(array), \
 291	.entries = array, \
 292}
 293
 294#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
 295	.offset	    = off, \
 296	.nr_entries = ARRAY_SIZE(array), \
 297	.entries = array, \
 
 
 
 
 
 
 
 
 298}
 299
 300static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
 301						const char *intfmt,
 302					        struct syscall_arg *arg)
 303{
 304	struct strarray *sa = arg->parm;
 305	int idx = arg->val - sa->offset;
 306
 307	if (idx < 0 || idx >= sa->nr_entries)
 308		return scnprintf(bf, size, intfmt, arg->val);
 309
 310	return scnprintf(bf, size, "%s", sa->entries[idx]);
 311}
 312
 313static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
 314					      struct syscall_arg *arg)
 315{
 316	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
 317}
 318
 319#define SCA_STRARRAY syscall_arg__scnprintf_strarray
 320
 321#if defined(__i386__) || defined(__x86_64__)
 322/*
 323 * FIXME: Make this available to all arches as soon as the ioctl beautifier
 324 * 	  gets rewritten to support all arches.
 325 */
 326static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
 327						 struct syscall_arg *arg)
 328{
 329	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
 330}
 331
 332#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
 333#endif /* defined(__i386__) || defined(__x86_64__) */
 334
 335static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
 336					struct syscall_arg *arg);
 337
 338#define SCA_FD syscall_arg__scnprintf_fd
 339
 340static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
 341					   struct syscall_arg *arg)
 342{
 343	int fd = arg->val;
 344
 345	if (fd == AT_FDCWD)
 346		return scnprintf(bf, size, "CWD");
 347
 348	return syscall_arg__scnprintf_fd(bf, size, arg);
 349}
 350
 351#define SCA_FDAT syscall_arg__scnprintf_fd_at
 352
 353static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
 354					      struct syscall_arg *arg);
 355
 356#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
 357
 358static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
 359					 struct syscall_arg *arg)
 360{
 361	return scnprintf(bf, size, "%#lx", arg->val);
 362}
 363
 364#define SCA_HEX syscall_arg__scnprintf_hex
 365
 366static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
 367					 struct syscall_arg *arg)
 368{
 369	return scnprintf(bf, size, "%d", arg->val);
 370}
 371
 372#define SCA_INT syscall_arg__scnprintf_int
 373
 374static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 375					       struct syscall_arg *arg)
 376{
 377	int printed = 0, prot = arg->val;
 
 378
 379	if (prot == PROT_NONE)
 380		return scnprintf(bf, size, "NONE");
 381#define	P_MMAP_PROT(n) \
 382	if (prot & PROT_##n) { \
 383		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 384		prot &= ~PROT_##n; \
 
 
 
 385	}
 386
 387	P_MMAP_PROT(EXEC);
 388	P_MMAP_PROT(READ);
 389	P_MMAP_PROT(WRITE);
 390#ifdef PROT_SEM
 391	P_MMAP_PROT(SEM);
 392#endif
 393	P_MMAP_PROT(GROWSDOWN);
 394	P_MMAP_PROT(GROWSUP);
 395#undef P_MMAP_PROT
 396
 397	if (prot)
 398		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
 399
 400	return printed;
 401}
 402
 403#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 404
 405static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 406						struct syscall_arg *arg)
 407{
 408	int printed = 0, flags = arg->val;
 409
 410#define	P_MMAP_FLAG(n) \
 411	if (flags & MAP_##n) { \
 412		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 413		flags &= ~MAP_##n; \
 414	}
 415
 416	P_MMAP_FLAG(SHARED);
 417	P_MMAP_FLAG(PRIVATE);
 418#ifdef MAP_32BIT
 419	P_MMAP_FLAG(32BIT);
 420#endif
 421	P_MMAP_FLAG(ANONYMOUS);
 422	P_MMAP_FLAG(DENYWRITE);
 423	P_MMAP_FLAG(EXECUTABLE);
 424	P_MMAP_FLAG(FILE);
 425	P_MMAP_FLAG(FIXED);
 426	P_MMAP_FLAG(GROWSDOWN);
 427#ifdef MAP_HUGETLB
 428	P_MMAP_FLAG(HUGETLB);
 429#endif
 430	P_MMAP_FLAG(LOCKED);
 431	P_MMAP_FLAG(NONBLOCK);
 432	P_MMAP_FLAG(NORESERVE);
 433	P_MMAP_FLAG(POPULATE);
 434	P_MMAP_FLAG(STACK);
 435#ifdef MAP_UNINITIALIZED
 436	P_MMAP_FLAG(UNINITIALIZED);
 437#endif
 438#undef P_MMAP_FLAG
 439
 440	if (flags)
 441		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 
 
 
 
 442
 443	return printed;
 444}
 445
 446#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 447
 448static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
 449						  struct syscall_arg *arg)
 450{
 451	int printed = 0, flags = arg->val;
 
 452
 453#define P_MREMAP_FLAG(n) \
 454	if (flags & MREMAP_##n) { \
 455		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 456		flags &= ~MREMAP_##n; \
 457	}
 458
 459	P_MREMAP_FLAG(MAYMOVE);
 460#ifdef MREMAP_FIXED
 461	P_MREMAP_FLAG(FIXED);
 462#endif
 463#undef P_MREMAP_FLAG
 464
 465	if (flags)
 466		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 
 467
 468	return printed;
 469}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 470
 471#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
 
 472
 473static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 474						      struct syscall_arg *arg)
 475{
 476	int behavior = arg->val;
 477
 478	switch (behavior) {
 479#define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
 480	P_MADV_BHV(NORMAL);
 481	P_MADV_BHV(RANDOM);
 482	P_MADV_BHV(SEQUENTIAL);
 483	P_MADV_BHV(WILLNEED);
 484	P_MADV_BHV(DONTNEED);
 485	P_MADV_BHV(REMOVE);
 486	P_MADV_BHV(DONTFORK);
 487	P_MADV_BHV(DOFORK);
 488	P_MADV_BHV(HWPOISON);
 489#ifdef MADV_SOFT_OFFLINE
 490	P_MADV_BHV(SOFT_OFFLINE);
 491#endif
 492	P_MADV_BHV(MERGEABLE);
 493	P_MADV_BHV(UNMERGEABLE);
 494#ifdef MADV_HUGEPAGE
 495	P_MADV_BHV(HUGEPAGE);
 496#endif
 497#ifdef MADV_NOHUGEPAGE
 498	P_MADV_BHV(NOHUGEPAGE);
 499#endif
 500#ifdef MADV_DONTDUMP
 501	P_MADV_BHV(DONTDUMP);
 502#endif
 503#ifdef MADV_DODUMP
 504	P_MADV_BHV(DODUMP);
 505#endif
 506#undef P_MADV_PHV
 507	default: break;
 508	}
 509
 510	return scnprintf(bf, size, "%#x", behavior);
 511}
 512
 513#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
 
 
 
 
 
 
 
 
 514
 515static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
 516					   struct syscall_arg *arg)
 517{
 518	int printed = 0, op = arg->val;
 519
 520	if (op == 0)
 521		return scnprintf(bf, size, "NONE");
 522#define	P_CMD(cmd) \
 523	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
 524		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
 525		op &= ~LOCK_##cmd; \
 526	}
 527
 528	P_CMD(SH);
 529	P_CMD(EX);
 530	P_CMD(NB);
 531	P_CMD(UN);
 532	P_CMD(MAND);
 533	P_CMD(RW);
 534	P_CMD(READ);
 535	P_CMD(WRITE);
 536#undef P_OP
 537
 538	if (op)
 539		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
 540
 541	return printed;
 542}
 543
 544#define SCA_FLOCK syscall_arg__scnprintf_flock
 545
 546static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
 
 
 
 
 
 547{
 548	enum syscall_futex_args {
 549		SCF_UADDR   = (1 << 0),
 550		SCF_OP	    = (1 << 1),
 551		SCF_VAL	    = (1 << 2),
 552		SCF_TIMEOUT = (1 << 3),
 553		SCF_UADDR2  = (1 << 4),
 554		SCF_VAL3    = (1 << 5),
 555	};
 556	int op = arg->val;
 557	int cmd = op & FUTEX_CMD_MASK;
 558	size_t printed = 0;
 559
 560	switch (cmd) {
 561#define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
 562	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
 563	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 564	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 565	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
 566	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
 567	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
 568	P_FUTEX_OP(WAKE_OP);							  break;
 569	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 570	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 571	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
 572	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
 573	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
 574	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
 575	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
 576	}
 577
 578	if (op & FUTEX_PRIVATE_FLAG)
 579		printed += scnprintf(bf + printed, size - printed, "|PRIV");
 
 
 580
 581	if (op & FUTEX_CLOCK_REALTIME)
 582		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
 
 
 583
 584	return printed;
 
 
 
 
 
 585}
 586
 587#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
 588
 589static const char *bpf_cmd[] = {
 590	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
 591	"MAP_GET_NEXT_KEY", "PROG_LOAD",
 
 
 
 
 
 
 
 
 
 
 
 
 
 592};
 593static DEFINE_STRARRAY(bpf_cmd);
 
 
 
 
 594
 595static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
 596static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
 597
 598static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
 599static DEFINE_STRARRAY(itimers);
 600
 601static const char *keyctl_options[] = {
 602	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
 603	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
 604	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
 605	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
 606	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
 607};
 608static DEFINE_STRARRAY(keyctl_options);
 609
 610static const char *whences[] = { "SET", "CUR", "END",
 611#ifdef SEEK_DATA
 612"DATA",
 613#endif
 614#ifdef SEEK_HOLE
 615"HOLE",
 616#endif
 617};
 618static DEFINE_STRARRAY(whences);
 619
 620static const char *fcntl_cmds[] = {
 621	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
 622	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
 623	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
 624	"F_GETOWNER_UIDS",
 625};
 626static DEFINE_STRARRAY(fcntl_cmds);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 627
 628static const char *rlimit_resources[] = {
 629	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
 630	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
 631	"RTTIME",
 632};
 633static DEFINE_STRARRAY(rlimit_resources);
 634
 635static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
 636static DEFINE_STRARRAY(sighow);
 637
 638static const char *clockid[] = {
 639	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
 640	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
 641	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
 642};
 643static DEFINE_STRARRAY(clockid);
 644
 645static const char *socket_families[] = {
 646	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
 647	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
 648	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
 649	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
 650	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
 651	"ALG", "NFC", "VSOCK",
 652};
 653static DEFINE_STRARRAY(socket_families);
 654
 655#ifndef SOCK_TYPE_MASK
 656#define SOCK_TYPE_MASK 0xf
 657#endif
 658
 659static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
 660						      struct syscall_arg *arg)
 661{
 662	size_t printed;
 663	int type = arg->val,
 664	    flags = type & ~SOCK_TYPE_MASK;
 665
 666	type &= SOCK_TYPE_MASK;
 667	/*
 668 	 * Can't use a strarray, MIPS may override for ABI reasons.
 669 	 */
 670	switch (type) {
 671#define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
 672	P_SK_TYPE(STREAM);
 673	P_SK_TYPE(DGRAM);
 674	P_SK_TYPE(RAW);
 675	P_SK_TYPE(RDM);
 676	P_SK_TYPE(SEQPACKET);
 677	P_SK_TYPE(DCCP);
 678	P_SK_TYPE(PACKET);
 679#undef P_SK_TYPE
 680	default:
 681		printed = scnprintf(bf, size, "%#x", type);
 682	}
 683
 684#define	P_SK_FLAG(n) \
 685	if (flags & SOCK_##n) { \
 686		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
 687		flags &= ~SOCK_##n; \
 688	}
 689
 690	P_SK_FLAG(CLOEXEC);
 691	P_SK_FLAG(NONBLOCK);
 692#undef P_SK_FLAG
 693
 694	if (flags)
 695		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
 696
 697	return printed;
 698}
 699
 700#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
 701
 702#ifndef MSG_PROBE
 703#define MSG_PROBE	     0x10
 704#endif
 705#ifndef MSG_WAITFORONE
 706#define MSG_WAITFORONE	0x10000
 707#endif
 708#ifndef MSG_SENDPAGE_NOTLAST
 709#define MSG_SENDPAGE_NOTLAST 0x20000
 710#endif
 711#ifndef MSG_FASTOPEN
 712#define MSG_FASTOPEN	     0x20000000
 713#endif
 714
 715static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
 716					       struct syscall_arg *arg)
 717{
 718	int printed = 0, flags = arg->val;
 719
 720	if (flags == 0)
 721		return scnprintf(bf, size, "NONE");
 722#define	P_MSG_FLAG(n) \
 723	if (flags & MSG_##n) { \
 724		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 725		flags &= ~MSG_##n; \
 726	}
 727
 728	P_MSG_FLAG(OOB);
 729	P_MSG_FLAG(PEEK);
 730	P_MSG_FLAG(DONTROUTE);
 731	P_MSG_FLAG(TRYHARD);
 732	P_MSG_FLAG(CTRUNC);
 733	P_MSG_FLAG(PROBE);
 734	P_MSG_FLAG(TRUNC);
 735	P_MSG_FLAG(DONTWAIT);
 736	P_MSG_FLAG(EOR);
 737	P_MSG_FLAG(WAITALL);
 738	P_MSG_FLAG(FIN);
 739	P_MSG_FLAG(SYN);
 740	P_MSG_FLAG(CONFIRM);
 741	P_MSG_FLAG(RST);
 742	P_MSG_FLAG(ERRQUEUE);
 743	P_MSG_FLAG(NOSIGNAL);
 744	P_MSG_FLAG(MORE);
 745	P_MSG_FLAG(WAITFORONE);
 746	P_MSG_FLAG(SENDPAGE_NOTLAST);
 747	P_MSG_FLAG(FASTOPEN);
 748	P_MSG_FLAG(CMSG_CLOEXEC);
 749#undef P_MSG_FLAG
 750
 751	if (flags)
 752		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 753
 754	return printed;
 755}
 756
 757#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
 758
 759static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 760						 struct syscall_arg *arg)
 761{
 
 
 762	size_t printed = 0;
 763	int mode = arg->val;
 764
 765	if (mode == F_OK) /* 0 */
 766		return scnprintf(bf, size, "F");
 767#define	P_MODE(n) \
 768	if (mode & n##_OK) { \
 769		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
 770		mode &= ~n##_OK; \
 771	}
 772
 773	P_MODE(R);
 774	P_MODE(W);
 775	P_MODE(X);
 776#undef P_MODE
 777
 778	if (mode)
 779		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
 780
 781	return printed;
 782}
 783
 784#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
 785
 786static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
 787					      struct syscall_arg *arg);
 788
 789#define SCA_FILENAME syscall_arg__scnprintf_filename
 790
 791static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
 792					       struct syscall_arg *arg)
 
 
 
 
 
 
 
 
 
 793{
 
 
 794	int printed = 0, flags = arg->val;
 795
 796	if (!(flags & O_CREAT))
 797		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
 798
 799	if (flags == 0)
 800		return scnprintf(bf, size, "RDONLY");
 801#define	P_FLAG(n) \
 802	if (flags & O_##n) { \
 803		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 804		flags &= ~O_##n; \
 805	}
 806
 807	P_FLAG(APPEND);
 808	P_FLAG(ASYNC);
 809	P_FLAG(CLOEXEC);
 810	P_FLAG(CREAT);
 811	P_FLAG(DIRECT);
 812	P_FLAG(DIRECTORY);
 813	P_FLAG(EXCL);
 814	P_FLAG(LARGEFILE);
 815	P_FLAG(NOATIME);
 816	P_FLAG(NOCTTY);
 817#ifdef O_NONBLOCK
 818	P_FLAG(NONBLOCK);
 819#elif O_NDELAY
 820	P_FLAG(NDELAY);
 821#endif
 822#ifdef O_PATH
 823	P_FLAG(PATH);
 824#endif
 825	P_FLAG(RDWR);
 826#ifdef O_DSYNC
 827	if ((flags & O_SYNC) == O_SYNC)
 828		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
 829	else {
 830		P_FLAG(DSYNC);
 831	}
 832#else
 833	P_FLAG(SYNC);
 834#endif
 835	P_FLAG(TRUNC);
 836	P_FLAG(WRONLY);
 837#undef P_FLAG
 838
 839	if (flags)
 840		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 841
 842	return printed;
 843}
 844
 845#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
 846
 847static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
 848						struct syscall_arg *arg)
 
 
 
 
 
 
 
 849{
 
 
 850	int printed = 0, flags = arg->val;
 851
 852	if (flags == 0)
 853		return 0;
 854
 855#define	P_FLAG(n) \
 856	if (flags & PERF_FLAG_##n) { \
 857		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 858		flags &= ~PERF_FLAG_##n; \
 859	}
 860
 861	P_FLAG(FD_NO_GROUP);
 862	P_FLAG(FD_OUTPUT);
 863	P_FLAG(PID_CGROUP);
 864	P_FLAG(FD_CLOEXEC);
 865#undef P_FLAG
 866
 867	if (flags)
 868		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 869
 870	return printed;
 871}
 872
 873#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
 874
 875static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
 876						   struct syscall_arg *arg)
 877{
 878	int printed = 0, flags = arg->val;
 879
 880	if (flags == 0)
 881		return scnprintf(bf, size, "NONE");
 882#define	P_FLAG(n) \
 883	if (flags & EFD_##n) { \
 884		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 885		flags &= ~EFD_##n; \
 886	}
 887
 888	P_FLAG(SEMAPHORE);
 889	P_FLAG(CLOEXEC);
 890	P_FLAG(NONBLOCK);
 891#undef P_FLAG
 892
 893	if (flags)
 894		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 
 895
 896	return printed;
 897}
 898
 899#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
 
 
 
 
 900
 901static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 902						struct syscall_arg *arg)
 
 
 
 
 
 
 
 
 
 
 
 
 903{
 904	int printed = 0, flags = arg->val;
 
 
 905
 906#define	P_FLAG(n) \
 907	if (flags & O_##n) { \
 908		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 909		flags &= ~O_##n; \
 
 
 
 
 
 910	}
 911
 912	P_FLAG(CLOEXEC);
 913	P_FLAG(NONBLOCK);
 914#undef P_FLAG
 
 915
 916	if (flags)
 917		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 
 918
 919	return printed;
 920}
 921
 922#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 
 
 
 923
 924static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 925{
 926	int sig = arg->val;
 927
 928	switch (sig) {
 929#define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
 930	P_SIGNUM(HUP);
 931	P_SIGNUM(INT);
 932	P_SIGNUM(QUIT);
 933	P_SIGNUM(ILL);
 934	P_SIGNUM(TRAP);
 935	P_SIGNUM(ABRT);
 936	P_SIGNUM(BUS);
 937	P_SIGNUM(FPE);
 938	P_SIGNUM(KILL);
 939	P_SIGNUM(USR1);
 940	P_SIGNUM(SEGV);
 941	P_SIGNUM(USR2);
 942	P_SIGNUM(PIPE);
 943	P_SIGNUM(ALRM);
 944	P_SIGNUM(TERM);
 945	P_SIGNUM(CHLD);
 946	P_SIGNUM(CONT);
 947	P_SIGNUM(STOP);
 948	P_SIGNUM(TSTP);
 949	P_SIGNUM(TTIN);
 950	P_SIGNUM(TTOU);
 951	P_SIGNUM(URG);
 952	P_SIGNUM(XCPU);
 953	P_SIGNUM(XFSZ);
 954	P_SIGNUM(VTALRM);
 955	P_SIGNUM(PROF);
 956	P_SIGNUM(WINCH);
 957	P_SIGNUM(IO);
 958	P_SIGNUM(PWR);
 959	P_SIGNUM(SYS);
 960#ifdef SIGEMT
 961	P_SIGNUM(EMT);
 962#endif
 963#ifdef SIGSTKFLT
 964	P_SIGNUM(STKFLT);
 965#endif
 966#ifdef SIGSWI
 967	P_SIGNUM(SWI);
 968#endif
 969	default: break;
 
 
 
 
 
 
 
 
 970	}
 971
 972	return scnprintf(bf, size, "%#x", sig);
 
 
 
 
 
 
 
 
 
 973}
 974
 975#define SCA_SIGNUM syscall_arg__scnprintf_signum
 
 
 
 
 
 
 976
 977#if defined(__i386__) || defined(__x86_64__)
 978/*
 979 * FIXME: Make this available to all arches.
 980 */
 981#define TCGETS		0x5401
 
 982
 983static const char *tioctls[] = {
 984	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
 985	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
 986	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
 987	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
 988	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
 989	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
 990	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
 991	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
 992	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
 993	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
 994	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
 995	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
 996	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
 997	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
 998	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
 999};
1000
1001static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1002#endif /* defined(__i386__) || defined(__x86_64__) */
1003
1004#define STRARRAY(arg, name, array) \
1005	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1006	  .arg_parm	 = { [arg] = &strarray__##array, }
1007
1008static struct syscall_fmt {
1009	const char *name;
1010	const char *alias;
1011	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1012	void	   *arg_parm[6];
1013	bool	   errmsg;
1014	bool	   timeout;
1015	bool	   hexret;
1016} syscall_fmts[] = {
1017	{ .name	    = "access",	    .errmsg = true,
1018	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1019			     [1] = SCA_ACCMODE,  /* mode */ }, },
1020	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
1021	{ .name	    = "bpf",	    .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1022	{ .name	    = "brk",	    .hexret = true,
1023	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1024	{ .name	    = "chdir",	    .errmsg = true,
1025	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1026	{ .name	    = "chmod",	    .errmsg = true,
1027	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1028	{ .name	    = "chroot",	    .errmsg = true,
1029	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1030	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1031	{ .name	    = "close",	    .errmsg = true,
1032	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1033	{ .name	    = "connect",    .errmsg = true, },
1034	{ .name	    = "creat",	    .errmsg = true,
1035	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1036	{ .name	    = "dup",	    .errmsg = true,
1037	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1038	{ .name	    = "dup2",	    .errmsg = true,
1039	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1040	{ .name	    = "dup3",	    .errmsg = true,
1041	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1042	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1043	{ .name	    = "eventfd2",   .errmsg = true,
1044	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1045	{ .name	    = "faccessat",  .errmsg = true,
1046	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1047			     [1] = SCA_FILENAME, /* filename */ }, },
1048	{ .name	    = "fadvise64",  .errmsg = true,
1049	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1050	{ .name	    = "fallocate",  .errmsg = true,
1051	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1052	{ .name	    = "fchdir",	    .errmsg = true,
1053	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1054	{ .name	    = "fchmod",	    .errmsg = true,
1055	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056	{ .name	    = "fchmodat",   .errmsg = true,
1057	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1058			     [1] = SCA_FILENAME, /* filename */ }, },
1059	{ .name	    = "fchown",	    .errmsg = true,
1060	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061	{ .name	    = "fchownat",   .errmsg = true,
1062	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1063			     [1] = SCA_FILENAME, /* filename */ }, },
1064	{ .name	    = "fcntl",	    .errmsg = true,
1065	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1066			     [1] = SCA_STRARRAY, /* cmd */ },
1067	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1068	{ .name	    = "fdatasync",  .errmsg = true,
1069	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1070	{ .name	    = "flock",	    .errmsg = true,
1071	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1072			     [1] = SCA_FLOCK, /* cmd */ }, },
1073	{ .name	    = "fsetxattr",  .errmsg = true,
1074	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1075	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
1076	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1077	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
1078	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1079			     [1] = SCA_FILENAME, /* filename */ }, },
1080	{ .name	    = "fstatfs",    .errmsg = true,
1081	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1082	{ .name	    = "fsync",    .errmsg = true,
1083	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1084	{ .name	    = "ftruncate", .errmsg = true,
1085	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1086	{ .name	    = "futex",	    .errmsg = true,
1087	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1088	{ .name	    = "futimesat", .errmsg = true,
1089	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1090			     [1] = SCA_FILENAME, /* filename */ }, },
1091	{ .name	    = "getdents",   .errmsg = true,
1092	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1093	{ .name	    = "getdents64", .errmsg = true,
1094	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1095	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1096	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1097	{ .name	    = "getxattr",    .errmsg = true,
1098	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1099	{ .name	    = "inotify_add_watch",	    .errmsg = true,
1100	  .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1101	{ .name	    = "ioctl",	    .errmsg = true,
1102	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1103#if defined(__i386__) || defined(__x86_64__)
1104/*
1105 * FIXME: Make this available to all arches.
1106 */
1107			     [1] = SCA_STRHEXARRAY, /* cmd */
1108			     [2] = SCA_HEX, /* arg */ },
1109	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
1110#else
1111			     [2] = SCA_HEX, /* arg */ }, },
1112#endif
1113	{ .name	    = "keyctl",	    .errmsg = true, STRARRAY(0, option, keyctl_options), },
1114	{ .name	    = "kill",	    .errmsg = true,
1115	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1116	{ .name	    = "lchown",    .errmsg = true,
1117	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1118	{ .name	    = "lgetxattr",  .errmsg = true,
1119	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1120	{ .name	    = "linkat",	    .errmsg = true,
1121	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1122	{ .name	    = "listxattr",  .errmsg = true,
1123	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1124	{ .name	    = "llistxattr", .errmsg = true,
1125	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1126	{ .name	    = "lremovexattr",  .errmsg = true,
1127	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1128	{ .name	    = "lseek",	    .errmsg = true,
1129	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1130			     [2] = SCA_STRARRAY, /* whence */ },
1131	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
1132	{ .name	    = "lsetxattr",  .errmsg = true,
1133	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1134	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat",
1135	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1136	{ .name	    = "lsxattr",    .errmsg = true,
1137	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1138	{ .name     = "madvise",    .errmsg = true,
1139	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
1140			     [2] = SCA_MADV_BHV, /* behavior */ }, },
1141	{ .name	    = "mkdir",    .errmsg = true,
1142	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1143	{ .name	    = "mkdirat",    .errmsg = true,
1144	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1145			     [1] = SCA_FILENAME, /* pathname */ }, },
1146	{ .name	    = "mknod",      .errmsg = true,
1147	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1148	{ .name	    = "mknodat",    .errmsg = true,
1149	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1150			     [1] = SCA_FILENAME, /* filename */ }, },
1151	{ .name	    = "mlock",	    .errmsg = true,
1152	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1153	{ .name	    = "mlockall",   .errmsg = true,
1154	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1155	{ .name	    = "mmap",	    .hexret = true,
1156	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
1157			     [2] = SCA_MMAP_PROT, /* prot */
1158			     [3] = SCA_MMAP_FLAGS, /* flags */
1159			     [4] = SCA_FD, 	  /* fd */ }, },
1160	{ .name	    = "mprotect",   .errmsg = true,
1161	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
1162			     [2] = SCA_MMAP_PROT, /* prot */ }, },
1163	{ .name	    = "mq_unlink", .errmsg = true,
1164	  .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1165	{ .name	    = "mremap",	    .hexret = true,
1166	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1167			     [3] = SCA_MREMAP_FLAGS, /* flags */
1168			     [4] = SCA_HEX, /* new_addr */ }, },
1169	{ .name	    = "munlock",    .errmsg = true,
1170	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1171	{ .name	    = "munmap",	    .errmsg = true,
1172	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1173	{ .name	    = "name_to_handle_at", .errmsg = true,
1174	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1175	{ .name	    = "newfstatat", .errmsg = true,
1176	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1177			     [1] = SCA_FILENAME, /* filename */ }, },
1178	{ .name	    = "open",	    .errmsg = true,
1179	  .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1180			     [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1181	{ .name	    = "open_by_handle_at", .errmsg = true,
1182	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1183			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1184	{ .name	    = "openat",	    .errmsg = true,
1185	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1186			     [1] = SCA_FILENAME, /* filename */
1187			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1188	{ .name	    = "perf_event_open", .errmsg = true,
1189	  .arg_scnprintf = { [1] = SCA_INT, /* pid */
1190			     [2] = SCA_INT, /* cpu */
1191			     [3] = SCA_FD,  /* group_fd */
1192			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1193	{ .name	    = "pipe2",	    .errmsg = true,
1194	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1195	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
1196	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
1197	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
1198	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1199	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
1200	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1201	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1202	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
1203	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1204	{ .name	    = "pwritev",    .errmsg = true,
1205	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1206	{ .name	    = "read",	    .errmsg = true,
1207	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1208	{ .name	    = "readlink",   .errmsg = true,
1209	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1210	{ .name	    = "readlinkat", .errmsg = true,
1211	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1212			     [1] = SCA_FILENAME, /* pathname */ }, },
1213	{ .name	    = "readv",	    .errmsg = true,
1214	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1215	{ .name	    = "recvfrom",   .errmsg = true,
1216	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1217			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1218	{ .name	    = "recvmmsg",   .errmsg = true,
1219	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1220			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1221	{ .name	    = "recvmsg",    .errmsg = true,
1222	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1223			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1224	{ .name	    = "removexattr", .errmsg = true,
1225	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1226	{ .name	    = "renameat",   .errmsg = true,
1227	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1228	{ .name	    = "rmdir",    .errmsg = true,
1229	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1230	{ .name	    = "rt_sigaction", .errmsg = true,
1231	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1232	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1233	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1234	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1235	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1236	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1237	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1238	{ .name	    = "sendmmsg",    .errmsg = true,
1239	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1240			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1241	{ .name	    = "sendmsg",    .errmsg = true,
1242	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1243			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1244	{ .name	    = "sendto",	    .errmsg = true,
1245	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1246			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1247	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1248	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1249	{ .name	    = "setxattr",   .errmsg = true,
1250	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1251	{ .name	    = "shutdown",   .errmsg = true,
1252	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1253	{ .name	    = "socket",	    .errmsg = true,
1254	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1255			     [1] = SCA_SK_TYPE, /* type */ },
1256	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1257	{ .name	    = "socketpair", .errmsg = true,
1258	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1259			     [1] = SCA_SK_TYPE, /* type */ },
1260	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1261	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat",
1262	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1263	{ .name	    = "statfs",	    .errmsg = true,
1264	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1265	{ .name	    = "swapoff",    .errmsg = true,
1266	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1267	{ .name	    = "swapon",	    .errmsg = true,
1268	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1269	{ .name	    = "symlinkat",  .errmsg = true,
1270	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1271	{ .name	    = "tgkill",	    .errmsg = true,
1272	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1273	{ .name	    = "tkill",	    .errmsg = true,
1274	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1275	{ .name	    = "truncate",   .errmsg = true,
1276	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1277	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1278	{ .name	    = "unlinkat",   .errmsg = true,
1279	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1280			     [1] = SCA_FILENAME, /* pathname */ }, },
1281	{ .name	    = "utime",  .errmsg = true,
1282	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1283	{ .name	    = "utimensat",  .errmsg = true,
1284	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1285			     [1] = SCA_FILENAME, /* filename */ }, },
1286	{ .name	    = "utimes",  .errmsg = true,
1287	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1288	{ .name	    = "vmsplice",  .errmsg = true,
1289	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1290	{ .name	    = "write",	    .errmsg = true,
1291	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1292	{ .name	    = "writev",	    .errmsg = true,
1293	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 
 
 
 
 
 
 
1294};
1295
1296static int syscall_fmt__cmp(const void *name, const void *fmtp)
1297{
1298	const struct syscall_fmt *fmt = fmtp;
1299	return strcmp(name, fmt->name);
1300}
1301
1302static struct syscall_fmt *syscall_fmt__find(const char *name)
 
 
 
 
 
 
 
1303{
1304	const int nmemb = ARRAY_SIZE(syscall_fmts);
1305	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1306}
1307
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1308struct syscall {
1309	struct event_format *tp_format;
1310	int		    nr_args;
1311	struct format_field *args;
1312	const char	    *name;
 
 
 
1313	bool		    is_exit;
1314	struct syscall_fmt  *fmt;
1315	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1316	void		    **arg_parm;
 
 
 
 
1317};
1318
1319static size_t fprintf_duration(unsigned long t, FILE *fp)
 
 
 
 
 
 
 
1320{
1321	double duration = (double)t / NSEC_PER_MSEC;
1322	size_t printed = fprintf(fp, "(");
1323
1324	if (duration >= 1.0)
 
 
1325		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1326	else if (duration >= 0.01)
1327		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1328	else
1329		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1330	return printed + fprintf(fp, "): ");
1331}
1332
1333/**
1334 * filename.ptr: The filename char pointer that will be vfs_getname'd
1335 * filename.entry_str_pos: Where to insert the string translated from
1336 *                         filename.ptr by the vfs_getname tracepoint/kprobe.
 
 
1337 */
1338struct thread_trace {
1339	u64		  entry_time;
1340	u64		  exit_time;
1341	bool		  entry_pending;
1342	unsigned long	  nr_events;
1343	unsigned long	  pfmaj, pfmin;
1344	char		  *entry_str;
1345	double		  runtime_ms;
 
1346        struct {
1347		unsigned long ptr;
1348		short int     entry_str_pos;
1349		bool	      pending_open;
1350		unsigned int  namelen;
1351		char	      *name;
1352	} filename;
1353	struct {
1354		int	  max;
1355		char	  **table;
1356	} paths;
1357
1358	struct intlist *syscall_stats;
1359};
1360
1361static struct thread_trace *thread_trace__new(void)
1362{
1363	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1364
1365	if (ttrace)
1366		ttrace->paths.max = -1;
1367
1368	ttrace->syscall_stats = intlist__new(NULL);
1369
1370	return ttrace;
1371}
1372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1373static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1374{
1375	struct thread_trace *ttrace;
1376
1377	if (thread == NULL)
1378		goto fail;
1379
1380	if (thread__priv(thread) == NULL)
1381		thread__set_priv(thread, thread_trace__new());
1382
1383	if (thread__priv(thread) == NULL)
1384		goto fail;
1385
1386	ttrace = thread__priv(thread);
1387	++ttrace->nr_events;
1388
1389	return ttrace;
1390fail:
1391	color_fprintf(fp, PERF_COLOR_RED,
1392		      "WARNING: not enough memory, dropping samples!\n");
1393	return NULL;
1394}
1395
 
 
 
 
 
 
 
 
 
1396#define TRACE_PFMAJ		(1 << 0)
1397#define TRACE_PFMIN		(1 << 1)
1398
1399static const size_t trace__entry_str_size = 2048;
1400
1401struct trace {
1402	struct perf_tool	tool;
1403	struct {
1404		int		machine;
1405		int		open_id;
1406	}			audit;
1407	struct {
1408		int		max;
1409		struct syscall  *table;
1410		struct {
1411			struct perf_evsel *sys_enter,
1412					  *sys_exit;
1413		}		events;
1414	} syscalls;
1415	struct record_opts	opts;
1416	struct perf_evlist	*evlist;
1417	struct machine		*host;
1418	struct thread		*current;
1419	u64			base_time;
1420	FILE			*output;
1421	unsigned long		nr_events;
1422	struct strlist		*ev_qualifier;
1423	struct {
1424		size_t		nr;
1425		int		*entries;
1426	}			ev_qualifier_ids;
1427	struct intlist		*tid_list;
1428	struct intlist		*pid_list;
1429	struct {
1430		size_t		nr;
1431		pid_t		*entries;
1432	}			filter_pids;
1433	double			duration_filter;
1434	double			runtime_ms;
1435	struct {
1436		u64		vfs_getname,
1437				proc_getname;
1438	} stats;
1439	bool			not_ev_qualifier;
1440	bool			live;
1441	bool			full_time;
1442	bool			sched;
1443	bool			multiple_threads;
1444	bool			summary;
1445	bool			summary_only;
1446	bool			show_comm;
1447	bool			show_tool_stats;
1448	bool			trace_syscalls;
1449	bool			force;
1450	bool			vfs_getname;
1451	int			trace_pgfaults;
1452};
1453
1454static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
 
 
 
 
1455{
1456	struct thread_trace *ttrace = thread__priv(thread);
 
1457
1458	if (fd > ttrace->paths.max) {
1459		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1460
1461		if (npath == NULL)
1462			return -1;
1463
1464		if (ttrace->paths.max != -1) {
1465			memset(npath + ttrace->paths.max + 1, 0,
1466			       (fd - ttrace->paths.max) * sizeof(char *));
1467		} else {
1468			memset(npath, 0, (fd + 1) * sizeof(char *));
1469		}
1470
1471		ttrace->paths.table = npath;
1472		ttrace->paths.max   = fd;
1473	}
1474
1475	ttrace->paths.table[fd] = strdup(pathname);
 
1476
1477	return ttrace->paths.table[fd] != NULL ? 0 : -1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1478}
1479
1480static int thread__read_fd_path(struct thread *thread, int fd)
1481{
1482	char linkname[PATH_MAX], pathname[PATH_MAX];
1483	struct stat st;
1484	int ret;
1485
1486	if (thread->pid_ == thread->tid) {
1487		scnprintf(linkname, sizeof(linkname),
1488			  "/proc/%d/fd/%d", thread->pid_, fd);
1489	} else {
1490		scnprintf(linkname, sizeof(linkname),
1491			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
 
1492	}
1493
1494	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1495		return -1;
1496
1497	ret = readlink(linkname, pathname, sizeof(pathname));
1498
1499	if (ret < 0 || ret > st.st_size)
1500		return -1;
1501
1502	pathname[ret] = '\0';
1503	return trace__set_fd_pathname(thread, fd, pathname);
1504}
1505
1506static const char *thread__fd_path(struct thread *thread, int fd,
1507				   struct trace *trace)
1508{
1509	struct thread_trace *ttrace = thread__priv(thread);
1510
1511	if (ttrace == NULL)
1512		return NULL;
1513
1514	if (fd < 0)
1515		return NULL;
1516
1517	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1518		if (!trace->live)
1519			return NULL;
1520		++trace->stats.proc_getname;
1521		if (thread__read_fd_path(thread, fd))
1522			return NULL;
1523	}
1524
1525	return ttrace->paths.table[fd];
1526}
1527
1528static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1529					struct syscall_arg *arg)
1530{
1531	int fd = arg->val;
1532	size_t printed = scnprintf(bf, size, "%d", fd);
1533	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1534
1535	if (path)
1536		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1537
1538	return printed;
1539}
1540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1541static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1542					      struct syscall_arg *arg)
1543{
1544	int fd = arg->val;
1545	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1546	struct thread_trace *ttrace = thread__priv(arg->thread);
1547
1548	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1549		zfree(&ttrace->paths.table[fd]);
1550
1551	return printed;
1552}
1553
1554static void thread__set_filename_pos(struct thread *thread, const char *bf,
1555				     unsigned long ptr)
1556{
1557	struct thread_trace *ttrace = thread__priv(thread);
1558
1559	ttrace->filename.ptr = ptr;
1560	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1561}
1562
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1563static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1564					      struct syscall_arg *arg)
1565{
1566	unsigned long ptr = arg->val;
1567
 
 
 
1568	if (!arg->trace->vfs_getname)
1569		return scnprintf(bf, size, "%#x", ptr);
1570
1571	thread__set_filename_pos(arg->thread, bf, ptr);
1572	return 0;
1573}
1574
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1575static bool trace__filter_duration(struct trace *trace, double t)
1576{
1577	return t < (trace->duration_filter * NSEC_PER_MSEC);
1578}
1579
1580static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1581{
1582	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1583
1584	return fprintf(fp, "%10.3f ", ts);
1585}
1586
1587static bool done = false;
1588static bool interrupted = false;
 
 
 
 
 
 
 
 
 
 
 
1589
1590static void sig_handler(int sig)
 
 
 
 
1591{
1592	done = true;
1593	interrupted = sig == SIGINT;
1594}
1595
1596static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1597					u64 duration, u64 tstamp, FILE *fp)
1598{
1599	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1600	printed += fprintf_duration(duration, fp);
 
 
 
 
 
1601
1602	if (trace->multiple_threads) {
1603		if (trace->show_comm)
1604			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1605		printed += fprintf(fp, "%d ", thread->tid);
1606	}
1607
1608	return printed;
1609}
1610
 
 
 
 
 
 
 
 
 
 
 
 
1611static int trace__process_event(struct trace *trace, struct machine *machine,
1612				union perf_event *event, struct perf_sample *sample)
1613{
1614	int ret = 0;
1615
1616	switch (event->header.type) {
1617	case PERF_RECORD_LOST:
1618		color_fprintf(trace->output, PERF_COLOR_RED,
1619			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1620		ret = machine__process_lost_event(machine, event, sample);
 
1621	default:
1622		ret = machine__process_event(machine, event, sample);
1623		break;
1624	}
1625
1626	return ret;
1627}
1628
1629static int trace__tool_process(struct perf_tool *tool,
1630			       union perf_event *event,
1631			       struct perf_sample *sample,
1632			       struct machine *machine)
1633{
1634	struct trace *trace = container_of(tool, struct trace, tool);
1635	return trace__process_event(trace, machine, event, sample);
1636}
1637
1638static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1639{
1640	int err = symbol__init(NULL);
1641
1642	if (err)
1643		return err;
1644
1645	trace->host = machine__new_host();
1646	if (trace->host == NULL)
1647		return -ENOMEM;
1648
1649	if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1650		return -errno;
 
 
 
1651
1652	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1653					    evlist->threads, trace__tool_process, false,
1654					    trace->opts.proc_map_timeout);
 
1655	if (err)
1656		symbol__exit();
1657
1658	return err;
1659}
1660
1661static int syscall__set_arg_fmts(struct syscall *sc)
1662{
1663	struct format_field *field;
1664	int idx = 0;
1665
1666	sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1667	if (sc->arg_scnprintf == NULL)
1668		return -1;
 
 
 
1669
1670	if (sc->fmt)
1671		sc->arg_parm = sc->fmt->arg_parm;
1672
1673	for (field = sc->args; field; field = field->next) {
1674		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1675			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1676		else if (field->flags & FIELD_IS_POINTER)
1677			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1678		++idx;
 
1679	}
1680
 
1681	return 0;
1682}
1683
1684static int trace__read_syscall_info(struct trace *trace, int id)
 
 
 
 
 
1685{
1686	char tp_name[128];
1687	struct syscall *sc;
1688	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1689
1690	if (name == NULL)
1691		return -1;
 
 
 
 
1692
1693	if (id > trace->syscalls.max) {
1694		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
 
 
 
1695
1696		if (nsyscalls == NULL)
1697			return -1;
 
 
 
 
 
 
 
 
 
 
 
 
1698
1699		if (trace->syscalls.max != -1) {
1700			memset(nsyscalls + trace->syscalls.max + 1, 0,
1701			       (id - trace->syscalls.max) * sizeof(*sc));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1702		} else {
1703			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
 
 
 
 
 
 
1704		}
 
 
 
 
 
 
 
 
 
 
 
 
1705
1706		trace->syscalls.table = nsyscalls;
1707		trace->syscalls.max   = id;
 
 
 
 
 
 
 
 
 
 
 
 
 
1708	}
 
 
 
 
 
 
 
 
 
 
 
 
 
1709
 
 
 
 
1710	sc = trace->syscalls.table + id;
1711	sc->name = name;
 
1712
 
 
 
 
 
 
1713	sc->fmt  = syscall_fmt__find(sc->name);
1714
1715	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1716	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1717
1718	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1719		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1720		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1721	}
1722
1723	if (IS_ERR(sc->tp_format))
1724		return -1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1725
1726	sc->args = sc->tp_format->format.fields;
1727	sc->nr_args = sc->tp_format->format.nr_fields;
1728	/*
1729	 * We need to check and discard the first variable '__syscall_nr'
1730	 * or 'nr' that mean the syscall number. It is needless here.
1731	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1732	 */
1733	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1734		sc->args = sc->args->next;
1735		--sc->nr_args;
1736	}
1737
1738	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
 
 
 
1739
1740	return syscall__set_arg_fmts(sc);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1741}
1742
1743static int trace__validate_ev_qualifier(struct trace *trace)
1744{
1745	int err = 0, i;
 
1746	struct str_node *pos;
 
1747
1748	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1749	trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1750						 sizeof(trace->ev_qualifier_ids.entries[0]));
1751
1752	if (trace->ev_qualifier_ids.entries == NULL) {
1753		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1754		       trace->output);
1755		err = -EINVAL;
1756		goto out;
1757	}
1758
1759	i = 0;
1760
1761	strlist__for_each(pos, trace->ev_qualifier) {
1762		const char *sc = pos->s;
1763		int id = audit_name_to_syscall(sc, trace->audit.machine);
1764
1765		if (id < 0) {
1766			if (err == 0) {
1767				fputs("Error:\tInvalid syscall ", trace->output);
1768				err = -EINVAL;
 
 
 
 
1769			} else {
1770				fputs(", ", trace->output);
1771			}
1772
1773			fputs(sc, trace->output);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1774		}
1775
1776		trace->ev_qualifier_ids.entries[i++] = id;
1777	}
1778
1779	if (err < 0) {
1780		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1781		      "\nHint:\tand: 'man syscalls'\n", trace->output);
1782		zfree(&trace->ev_qualifier_ids.entries);
1783		trace->ev_qualifier_ids.nr = 0;
1784	}
1785out:
 
 
1786	return err;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1787}
1788
1789/*
1790 * args is to be interpreted as a series of longs but we need to handle
1791 * 8-byte unaligned accesses. args points to raw_data within the event
1792 * and raw_data is guaranteed to be 8-byte unaligned because it is
1793 * preceded by raw_size which is a u32. So we need to copy args to a temp
1794 * variable to read it. Most notably this avoids extended load instructions
1795 * on unaligned addresses
1796 */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1797
1798static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1799				      unsigned char *args, struct trace *trace,
1800				      struct thread *thread)
1801{
1802	size_t printed = 0;
1803	unsigned char *p;
1804	unsigned long val;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1805
1806	if (sc->args != NULL) {
1807		struct format_field *field;
1808		u8 bit = 1;
1809		struct syscall_arg arg = {
1810			.idx	= 0,
1811			.mask	= 0,
1812			.trace  = trace,
1813			.thread = thread,
1814		};
1815
1816		for (field = sc->args; field;
1817		     field = field->next, ++arg.idx, bit <<= 1) {
1818			if (arg.mask & bit)
1819				continue;
1820
1821			/* special care for unaligned accesses */
1822			p = args + sizeof(unsigned long) * arg.idx;
1823			memcpy(&val, p, sizeof(val));
 
 
 
 
1824
1825			/*
1826 			 * Suppress this argument if its value is zero and
1827 			 * and we don't have a string associated in an
1828 			 * strarray for it.
1829 			 */
1830			if (val == 0 &&
1831			    !(sc->arg_scnprintf &&
1832			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1833			      sc->arg_parm[arg.idx]))
 
1834				continue;
1835
1836			printed += scnprintf(bf + printed, size - printed,
1837					     "%s%s: ", printed ? ", " : "", field->name);
1838			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1839				arg.val = val;
1840				if (sc->arg_parm)
1841					arg.parm = sc->arg_parm[arg.idx];
1842				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1843								      size - printed, &arg);
1844			} else {
1845				printed += scnprintf(bf + printed, size - printed,
1846						     "%ld", val);
 
 
 
1847			}
1848		}
1849	} else {
1850		int i = 0;
1851
1852		while (i < 6) {
1853			/* special care for unaligned accesses */
1854			p = args + sizeof(unsigned long) * i;
1855			memcpy(&val, p, sizeof(val));
1856			printed += scnprintf(bf + printed, size - printed,
1857					     "%sarg%d: %ld",
1858					     printed ? ", " : "", i, val);
1859			++i;
 
 
 
 
 
 
 
 
 
 
 
 
1860		}
1861	}
1862
1863	return printed;
1864}
1865
1866typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1867				  union perf_event *event,
1868				  struct perf_sample *sample);
1869
1870static struct syscall *trace__syscall_info(struct trace *trace,
1871					   struct perf_evsel *evsel, int id)
1872{
 
1873
1874	if (id < 0) {
1875
1876		/*
1877		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1878		 * before that, leaving at a higher verbosity level till that is
1879		 * explained. Reproduced with plain ftrace with:
1880		 *
1881		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1882		 * grep "NR -1 " /t/trace_pipe
1883		 *
1884		 * After generating some load on the machine.
1885 		 */
1886		if (verbose > 1) {
1887			static u64 n;
1888			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1889				id, perf_evsel__name(evsel), ++n);
1890		}
1891		return NULL;
1892	}
1893
1894	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1895	    trace__read_syscall_info(trace, id))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1896		goto out_cant_read;
1897
1898	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1899		goto out_cant_read;
1900
1901	return &trace->syscalls.table[id];
1902
1903out_cant_read:
1904	if (verbose) {
1905		fprintf(trace->output, "Problems reading syscall %d", id);
1906		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
 
1907			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1908		fputs(" information\n", trace->output);
1909	}
1910	return NULL;
1911}
1912
1913static void thread__update_stats(struct thread_trace *ttrace,
1914				 int id, struct perf_sample *sample)
 
 
 
 
 
 
 
1915{
1916	struct int_node *inode;
1917	struct stats *stats;
1918	u64 duration = 0;
1919
1920	inode = intlist__findnew(ttrace->syscall_stats, id);
1921	if (inode == NULL)
1922		return;
1923
1924	stats = inode->priv;
1925	if (stats == NULL) {
1926		stats = malloc(sizeof(struct stats));
1927		if (stats == NULL)
1928			return;
1929		init_stats(stats);
 
1930		inode->priv = stats;
1931	}
1932
1933	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1934		duration = sample->time - ttrace->entry_time;
1935
1936	update_stats(stats, duration);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1937}
1938
1939static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1940{
1941	struct thread_trace *ttrace;
1942	u64 duration;
1943	size_t printed;
 
1944
1945	if (trace->current == NULL)
1946		return 0;
1947
1948	ttrace = thread__priv(trace->current);
1949
1950	if (!ttrace->entry_pending)
1951		return 0;
1952
1953	duration = sample->time - ttrace->entry_time;
 
 
 
 
 
 
1954
1955	printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1956	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1957	ttrace->entry_pending = false;
 
1958
1959	return printed;
1960}
1961
1962static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1963			    union perf_event *event __maybe_unused,
1964			    struct perf_sample *sample)
1965{
1966	char *msg;
1967	void *args;
1968	size_t printed = 0;
1969	struct thread *thread;
1970	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
 
 
1971	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1972	struct thread_trace *ttrace;
1973
1974	if (sc == NULL)
1975		return -1;
1976
1977	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1978	ttrace = thread__trace(thread, trace->output);
1979	if (ttrace == NULL)
1980		goto out_put;
1981
 
 
1982	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1983
1984	if (ttrace->entry_str == NULL) {
1985		ttrace->entry_str = malloc(trace__entry_str_size);
1986		if (!ttrace->entry_str)
1987			goto out_put;
1988	}
1989
1990	if (!trace->summary_only)
1991		trace__printf_interrupted_entry(trace, sample);
1992
 
 
 
 
 
 
 
 
 
 
 
1993	ttrace->entry_time = sample->time;
1994	msg = ttrace->entry_str;
1995	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1996
1997	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1998					   args, trace, thread);
1999
2000	if (sc->is_exit) {
2001		if (!trace->duration_filter && !trace->summary_only) {
2002			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
2003			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
 
 
 
 
 
2004		}
2005	} else {
2006		ttrace->entry_pending = true;
2007		/* See trace__vfs_getname & trace__sys_exit */
2008		ttrace->filename.pending_open = false;
2009	}
2010
2011	if (trace->current != thread) {
2012		thread__put(trace->current);
2013		trace->current = thread__get(thread);
2014	}
2015	err = 0;
2016out_put:
2017	thread__put(thread);
2018	return err;
2019}
2020
2021static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2022			   union perf_event *event __maybe_unused,
2023			   struct perf_sample *sample)
2024{
2025	long ret;
2026	u64 duration = 0;
 
2027	struct thread *thread;
2028	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
 
2029	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2030	struct thread_trace *ttrace;
2031
2032	if (sc == NULL)
2033		return -1;
2034
2035	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2036	ttrace = thread__trace(thread, trace->output);
2037	if (ttrace == NULL)
2038		goto out_put;
2039
2040	if (trace->summary)
2041		thread__update_stats(ttrace, id, sample);
2042
2043	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2044
2045	if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
 
 
 
2046		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2047		ttrace->filename.pending_open = false;
2048		++trace->stats.vfs_getname;
2049	}
2050
2051	ttrace->exit_time = sample->time;
2052
2053	if (ttrace->entry_time) {
2054		duration = sample->time - ttrace->entry_time;
2055		if (trace__filter_duration(trace, duration))
2056			goto out;
 
2057	} else if (trace->duration_filter)
2058		goto out;
2059
2060	if (trace->summary_only)
 
 
 
 
 
 
 
 
 
 
 
2061		goto out;
2062
2063	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2064
2065	if (ttrace->entry_pending) {
2066		fprintf(trace->output, "%-70s", ttrace->entry_str);
2067	} else {
2068		fprintf(trace->output, " ... [");
2069		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2070		fprintf(trace->output, "]: %s()", sc->name);
 
2071	}
2072
 
 
 
 
 
 
 
 
 
2073	if (sc->fmt == NULL) {
 
 
2074signed_print:
2075		fprintf(trace->output, ") = %ld", ret);
2076	} else if (ret < 0 && sc->fmt->errmsg) {
 
2077		char bf[STRERR_BUFSIZE];
2078		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2079			   *e = audit_errno_to_name(-ret);
2080
2081		fprintf(trace->output, ") = -1 %s %s", e, emsg);
 
2082	} else if (ret == 0 && sc->fmt->timeout)
2083		fprintf(trace->output, ") = 0 Timeout");
2084	else if (sc->fmt->hexret)
2085		fprintf(trace->output, ") = %#lx", ret);
2086	else
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2087		goto signed_print;
2088
2089	fputc('\n', trace->output);
 
 
 
 
 
 
 
 
 
 
 
 
2090out:
2091	ttrace->entry_pending = false;
2092	err = 0;
2093out_put:
2094	thread__put(thread);
2095	return err;
2096}
2097
2098static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2099			      union perf_event *event __maybe_unused,
2100			      struct perf_sample *sample)
2101{
2102	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2103	struct thread_trace *ttrace;
2104	size_t filename_len, entry_str_len, to_move;
2105	ssize_t remaining_space;
2106	char *pos;
2107	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2108
2109	if (!thread)
2110		goto out;
2111
2112	ttrace = thread__priv(thread);
2113	if (!ttrace)
2114		goto out;
2115
2116	filename_len = strlen(filename);
 
 
2117
2118	if (ttrace->filename.namelen < filename_len) {
2119		char *f = realloc(ttrace->filename.name, filename_len + 1);
2120
2121		if (f == NULL)
2122				goto out;
2123
2124		ttrace->filename.namelen = filename_len;
2125		ttrace->filename.name = f;
2126	}
2127
2128	strcpy(ttrace->filename.name, filename);
2129	ttrace->filename.pending_open = true;
2130
2131	if (!ttrace->filename.ptr)
2132		goto out;
2133
2134	entry_str_len = strlen(ttrace->entry_str);
2135	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2136	if (remaining_space <= 0)
2137		goto out;
2138
2139	if (filename_len > (size_t)remaining_space) {
2140		filename += filename_len - remaining_space;
2141		filename_len = remaining_space;
2142	}
2143
2144	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2145	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2146	memmove(pos + filename_len, pos, to_move);
2147	memcpy(pos, filename, filename_len);
2148
2149	ttrace->filename.ptr = 0;
2150	ttrace->filename.entry_str_pos = 0;
 
 
2151out:
2152	return 0;
2153}
2154
2155static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2156				     union perf_event *event __maybe_unused,
2157				     struct perf_sample *sample)
2158{
2159        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2160	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2161	struct thread *thread = machine__findnew_thread(trace->host,
2162							sample->pid,
2163							sample->tid);
2164	struct thread_trace *ttrace = thread__trace(thread, trace->output);
2165
2166	if (ttrace == NULL)
2167		goto out_dump;
2168
2169	ttrace->runtime_ms += runtime_ms;
2170	trace->runtime_ms += runtime_ms;
 
2171	thread__put(thread);
2172	return 0;
2173
2174out_dump:
2175	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2176	       evsel->name,
2177	       perf_evsel__strval(evsel, sample, "comm"),
2178	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2179	       runtime,
2180	       perf_evsel__intval(evsel, sample, "vruntime"));
2181	thread__put(thread);
2182	return 0;
2183}
2184
2185static void bpf_output__printer(enum binary_printer_ops op,
2186				unsigned int val, void *extra)
2187{
2188	FILE *output = extra;
2189	unsigned char ch = (unsigned char)val;
2190
2191	switch (op) {
2192	case BINARY_PRINT_CHAR_DATA:
2193		fprintf(output, "%c", isprint(ch) ? ch : '.');
2194		break;
2195	case BINARY_PRINT_DATA_BEGIN:
2196	case BINARY_PRINT_LINE_BEGIN:
2197	case BINARY_PRINT_ADDR:
2198	case BINARY_PRINT_NUM_DATA:
2199	case BINARY_PRINT_NUM_PAD:
2200	case BINARY_PRINT_SEP:
2201	case BINARY_PRINT_CHAR_PAD:
2202	case BINARY_PRINT_LINE_END:
2203	case BINARY_PRINT_DATA_END:
2204	default:
2205		break;
2206	}
 
 
2207}
2208
2209static void bpf_output__fprintf(struct trace *trace,
2210				struct perf_sample *sample)
2211{
2212	print_binary(sample->raw_data, sample->raw_size, 8,
2213		     bpf_output__printer, trace->output);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2214}
2215
2216static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2217				union perf_event *event __maybe_unused,
2218				struct perf_sample *sample)
2219{
2220	trace__printf_interrupted_entry(trace, sample);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2221	trace__fprintf_tstamp(trace, sample->time, trace->output);
2222
2223	if (trace->trace_syscalls)
2224		fprintf(trace->output, "(         ): ");
2225
2226	fprintf(trace->output, "%s:", evsel->name);
 
 
 
 
 
 
 
 
 
 
 
 
2227
2228	if (perf_evsel__is_bpf_output(evsel)) {
 
 
 
 
 
 
 
 
 
2229		bpf_output__fprintf(trace, sample);
2230	} else if (evsel->tp_format) {
2231		event_format__fprintf(evsel->tp_format, sample->cpu,
2232				      sample->raw_data, sample->raw_size,
2233				      trace->output);
 
 
 
 
 
 
 
2234	}
2235
 
2236	fprintf(trace->output, ")\n");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2237	return 0;
2238}
2239
2240static void print_location(FILE *f, struct perf_sample *sample,
2241			   struct addr_location *al,
2242			   bool print_dso, bool print_sym)
2243{
2244
2245	if ((verbose || print_dso) && al->map)
2246		fprintf(f, "%s@", al->map->dso->long_name);
2247
2248	if ((verbose || print_sym) && al->sym)
2249		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2250			al->addr - al->sym->start);
2251	else if (al->map)
2252		fprintf(f, "0x%" PRIx64, al->addr);
2253	else
2254		fprintf(f, "0x%" PRIx64, sample->addr);
2255}
2256
2257static int trace__pgfault(struct trace *trace,
2258			  struct perf_evsel *evsel,
2259			  union perf_event *event __maybe_unused,
2260			  struct perf_sample *sample)
2261{
2262	struct thread *thread;
2263	struct addr_location al;
2264	char map_type = 'd';
2265	struct thread_trace *ttrace;
2266	int err = -1;
 
2267
 
2268	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
 
 
 
 
 
 
 
 
 
 
 
 
2269	ttrace = thread__trace(thread, trace->output);
2270	if (ttrace == NULL)
2271		goto out_put;
2272
2273	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2274		ttrace->pfmaj++;
2275	else
2276		ttrace->pfmin++;
2277
2278	if (trace->summary_only)
2279		goto out;
2280
2281	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
2282			      sample->ip, &al);
2283
2284	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2285
2286	fprintf(trace->output, "%sfault [",
2287		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2288		"maj" : "min");
2289
2290	print_location(trace->output, sample, &al, false, true);
2291
2292	fprintf(trace->output, "] => ");
2293
2294	thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
2295				   sample->addr, &al);
2296
2297	if (!al.map) {
2298		thread__find_addr_location(thread, sample->cpumode,
2299					   MAP__FUNCTION, sample->addr, &al);
2300
2301		if (al.map)
2302			map_type = 'x';
2303		else
2304			map_type = '?';
2305	}
2306
2307	print_location(trace->output, sample, &al, true, false);
2308
2309	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
 
 
 
 
 
 
 
2310out:
2311	err = 0;
2312out_put:
2313	thread__put(thread);
 
2314	return err;
2315}
2316
2317static bool skip_sample(struct trace *trace, struct perf_sample *sample)
 
 
2318{
2319	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2320	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2321		return false;
2322
2323	if (trace->pid_list || trace->tid_list)
2324		return true;
2325
2326	return false;
 
 
 
2327}
2328
2329static int trace__process_sample(struct perf_tool *tool,
2330				 union perf_event *event,
2331				 struct perf_sample *sample,
2332				 struct perf_evsel *evsel,
2333				 struct machine *machine __maybe_unused)
2334{
2335	struct trace *trace = container_of(tool, struct trace, tool);
 
2336	int err = 0;
2337
2338	tracepoint_handler handler = evsel->handler;
2339
2340	if (skip_sample(trace, sample))
2341		return 0;
 
2342
2343	if (!trace->full_time && trace->base_time == 0)
2344		trace->base_time = sample->time;
2345
2346	if (handler) {
2347		++trace->nr_events;
2348		handler(trace, evsel, event, sample);
2349	}
2350
 
2351	return err;
2352}
2353
2354static int parse_target_str(struct trace *trace)
2355{
2356	if (trace->opts.target.pid) {
2357		trace->pid_list = intlist__new(trace->opts.target.pid);
2358		if (trace->pid_list == NULL) {
2359			pr_err("Error parsing process id string\n");
2360			return -EINVAL;
2361		}
2362	}
2363
2364	if (trace->opts.target.tid) {
2365		trace->tid_list = intlist__new(trace->opts.target.tid);
2366		if (trace->tid_list == NULL) {
2367			pr_err("Error parsing thread id string\n");
2368			return -EINVAL;
2369		}
2370	}
2371
2372	return 0;
2373}
2374
2375static int trace__record(struct trace *trace, int argc, const char **argv)
2376{
2377	unsigned int rec_argc, i, j;
2378	const char **rec_argv;
2379	const char * const record_args[] = {
2380		"record",
2381		"-R",
2382		"-m", "1024",
2383		"-c", "1",
2384	};
2385
 
2386	const char * const sc_args[] = { "-e", };
2387	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2388	const char * const majpf_args[] = { "-e", "major-faults" };
2389	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2390	const char * const minpf_args[] = { "-e", "minor-faults" };
2391	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
 
2392
2393	/* +1 is for the event string below */
2394	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2395		majpf_args_nr + minpf_args_nr + argc;
2396	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2397
2398	if (rec_argv == NULL)
2399		return -ENOMEM;
2400
2401	j = 0;
2402	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2403		rec_argv[j++] = record_args[i];
2404
2405	if (trace->trace_syscalls) {
2406		for (i = 0; i < sc_args_nr; i++)
2407			rec_argv[j++] = sc_args[i];
2408
2409		/* event string may be different for older kernels - e.g., RHEL6 */
2410		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2411			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2412		else if (is_valid_tracepoint("syscalls:sys_enter"))
2413			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2414		else {
2415			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2416			return -1;
2417		}
2418	}
2419
 
 
 
2420	if (trace->trace_pgfaults & TRACE_PFMAJ)
2421		for (i = 0; i < majpf_args_nr; i++)
2422			rec_argv[j++] = majpf_args[i];
2423
2424	if (trace->trace_pgfaults & TRACE_PFMIN)
2425		for (i = 0; i < minpf_args_nr; i++)
2426			rec_argv[j++] = minpf_args[i];
2427
2428	for (i = 0; i < (unsigned int)argc; i++)
2429		rec_argv[j++] = argv[i];
2430
2431	return cmd_record(j, rec_argv, NULL);
 
 
 
 
2432}
2433
2434static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2435
2436static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2437{
2438	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
 
 
 
2439
2440	if (IS_ERR(evsel))
 
 
 
2441		return false;
2442
2443	if (perf_evsel__field(evsel, "pathname") == NULL) {
2444		perf_evsel__delete(evsel);
2445		return false;
 
 
 
 
 
 
 
 
 
 
2446	}
2447
2448	evsel->handler = trace__vfs_getname;
2449	perf_evlist__add(evlist, evsel);
2450	return true;
2451}
2452
2453static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2454				    u64 config)
2455{
2456	struct perf_evsel *evsel;
2457	struct perf_event_attr attr = {
2458		.type = PERF_TYPE_SOFTWARE,
2459		.mmap_data = 1,
2460	};
2461
2462	attr.config = config;
2463	attr.sample_period = 1;
2464
2465	event_attr_init(&attr);
2466
2467	evsel = perf_evsel__new(&attr);
2468	if (!evsel)
2469		return -ENOMEM;
 
 
 
2470
2471	evsel->handler = trace__pgfault;
2472	perf_evlist__add(evlist, evsel);
 
2473
2474	return 0;
 
 
 
2475}
2476
2477static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2478{
2479	const u32 type = event->header.type;
2480	struct perf_evsel *evsel;
2481
2482	if (!trace->full_time && trace->base_time == 0)
2483		trace->base_time = sample->time;
2484
2485	if (type != PERF_RECORD_SAMPLE) {
2486		trace__process_event(trace, trace->host, event, sample);
2487		return;
2488	}
2489
2490	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2491	if (evsel == NULL) {
2492		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2493		return;
2494	}
2495
2496	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
 
 
 
 
 
2497	    sample->raw_data == NULL) {
2498		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2499		       perf_evsel__name(evsel), sample->tid,
2500		       sample->cpu, sample->raw_size);
2501	} else {
2502		tracepoint_handler handler = evsel->handler;
2503		handler(trace, evsel, event, sample);
2504	}
 
 
 
2505}
2506
2507static int trace__add_syscall_newtp(struct trace *trace)
2508{
2509	int ret = -1;
2510	struct perf_evlist *evlist = trace->evlist;
2511	struct perf_evsel *sys_enter, *sys_exit;
2512
2513	sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2514	if (sys_enter == NULL)
2515		goto out;
2516
2517	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2518		goto out_delete_sys_enter;
2519
2520	sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2521	if (sys_exit == NULL)
2522		goto out_delete_sys_enter;
2523
2524	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2525		goto out_delete_sys_exit;
2526
2527	perf_evlist__add(evlist, sys_enter);
2528	perf_evlist__add(evlist, sys_exit);
 
 
 
 
 
 
 
 
 
 
 
 
2529
2530	trace->syscalls.events.sys_enter = sys_enter;
2531	trace->syscalls.events.sys_exit  = sys_exit;
2532
2533	ret = 0;
2534out:
2535	return ret;
2536
2537out_delete_sys_exit:
2538	perf_evsel__delete_priv(sys_exit);
2539out_delete_sys_enter:
2540	perf_evsel__delete_priv(sys_enter);
2541	goto out;
2542}
2543
2544static int trace__set_ev_qualifier_filter(struct trace *trace)
2545{
2546	int err = -1;
 
2547	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2548						trace->ev_qualifier_ids.nr,
2549						trace->ev_qualifier_ids.entries);
2550
2551	if (filter == NULL)
2552		goto out_enomem;
2553
2554	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2555		err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
 
 
2556
2557	free(filter);
2558out:
2559	return err;
2560out_enomem:
2561	errno = ENOMEM;
2562	goto out;
2563}
2564
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2565static int trace__run(struct trace *trace, int argc, const char **argv)
2566{
2567	struct perf_evlist *evlist = trace->evlist;
2568	struct perf_evsel *evsel;
2569	int err = -1, i;
2570	unsigned long before;
2571	const bool forks = argc > 0;
2572	bool draining = false;
2573
2574	trace->live = true;
2575
2576	if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2577		goto out_error_raw_syscalls;
 
2578
2579	if (trace->trace_syscalls)
2580		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
 
2581
2582	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2583	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2584		goto out_error_mem;
 
 
 
2585	}
2586
2587	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2588	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2589		goto out_error_mem;
 
 
 
 
 
 
 
2590
2591	if (trace->sched &&
2592	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2593				   trace__sched_stat_runtime))
2594		goto out_error_sched_stat_runtime;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2595
2596	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2597	if (err < 0) {
2598		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2599		goto out_delete_evlist;
2600	}
2601
2602	err = trace__symbols_init(trace, evlist);
2603	if (err < 0) {
2604		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2605		goto out_delete_evlist;
2606	}
2607
2608	perf_evlist__config(evlist, &trace->opts);
2609
2610	signal(SIGCHLD, sig_handler);
2611	signal(SIGINT, sig_handler);
2612
2613	if (forks) {
2614		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2615						    argv, false, NULL);
2616		if (err < 0) {
2617			fprintf(trace->output, "Couldn't run the workload!\n");
2618			goto out_delete_evlist;
2619		}
 
2620	}
2621
2622	err = perf_evlist__open(evlist);
2623	if (err < 0)
2624		goto out_error_open;
 
 
 
2625
2626	err = bpf__apply_obj_config();
2627	if (err) {
2628		char errbuf[BUFSIZ];
2629
2630		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2631		pr_err("ERROR: Apply config to BPF failed: %s\n",
2632			 errbuf);
2633		goto out_error_open;
 
 
 
2634	}
2635
2636	/*
2637	 * Better not use !target__has_task() here because we need to cover the
2638	 * case where no threads were specified in the command line, but a
2639	 * workload was, and in that case we will fill in the thread_map when
2640	 * we fork the workload in perf_evlist__prepare_workload.
2641	 */
2642	if (trace->filter_pids.nr > 0)
2643		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2644	else if (thread_map__pid(evlist->threads, 0) == -1)
2645		err = perf_evlist__set_filter_pid(evlist, getpid());
2646
2647	if (err < 0)
2648		goto out_error_mem;
2649
 
 
 
 
 
2650	if (trace->ev_qualifier_ids.nr > 0) {
2651		err = trace__set_ev_qualifier_filter(trace);
2652		if (err < 0)
2653			goto out_errno;
2654
2655		pr_debug("event qualifier tracepoint filter: %s\n",
2656			 trace->syscalls.events.sys_exit->filter);
 
 
2657	}
2658
2659	err = perf_evlist__apply_filters(evlist, &evsel);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2660	if (err < 0)
2661		goto out_error_apply_filters;
2662
2663	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2664	if (err < 0)
2665		goto out_error_mmap;
2666
2667	if (!target__none(&trace->opts.target))
2668		perf_evlist__enable(evlist);
2669
2670	if (forks)
2671		perf_evlist__start_workload(evlist);
 
 
 
 
 
 
 
 
 
2672
2673	trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2674				  evlist->threads->nr > 1 ||
2675				  perf_evlist__first(evlist)->attr.inherit;
 
 
 
 
 
 
 
 
2676again:
2677	before = trace->nr_events;
2678
2679	for (i = 0; i < evlist->nr_mmaps; i++) {
2680		union perf_event *event;
 
2681
2682		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2683			struct perf_sample sample;
 
2684
 
2685			++trace->nr_events;
2686
2687			err = perf_evlist__parse_sample(evlist, event, &sample);
2688			if (err) {
2689				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2690				goto next_event;
2691			}
2692
2693			trace__handle_event(trace, event, &sample);
2694next_event:
2695			perf_evlist__mmap_consume(evlist, i);
2696
2697			if (interrupted)
2698				goto out_disable;
2699
2700			if (done && !draining) {
2701				perf_evlist__disable(evlist);
2702				draining = true;
2703			}
2704		}
 
2705	}
2706
2707	if (trace->nr_events == before) {
2708		int timeout = done ? 100 : -1;
2709
2710		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2711			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2712				draining = true;
2713
2714			goto again;
 
 
 
2715		}
2716	} else {
2717		goto again;
2718	}
2719
2720out_disable:
2721	thread__zput(trace->current);
2722
2723	perf_evlist__disable(evlist);
 
 
 
2724
2725	if (!err) {
2726		if (trace->summary)
2727			trace__fprintf_thread_summary(trace, trace->output);
2728
2729		if (trace->show_tool_stats) {
2730			fprintf(trace->output, "Stats:\n "
2731					       " vfs_getname : %" PRIu64 "\n"
2732					       " proc_getname: %" PRIu64 "\n",
2733				trace->stats.vfs_getname,
2734				trace->stats.proc_getname);
2735		}
2736	}
2737
2738out_delete_evlist:
2739	perf_evlist__delete(evlist);
 
 
 
2740	trace->evlist = NULL;
2741	trace->live = false;
2742	return err;
2743{
2744	char errbuf[BUFSIZ];
2745
2746out_error_sched_stat_runtime:
2747	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2748	goto out_error;
2749
2750out_error_raw_syscalls:
2751	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2752	goto out_error;
2753
2754out_error_mmap:
2755	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2756	goto out_error;
2757
2758out_error_open:
2759	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2760
2761out_error:
2762	fprintf(trace->output, "%s\n", errbuf);
2763	goto out_delete_evlist;
2764
2765out_error_apply_filters:
2766	fprintf(trace->output,
2767		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
2768		evsel->filter, perf_evsel__name(evsel), errno,
2769		strerror_r(errno, errbuf, sizeof(errbuf)));
2770	goto out_delete_evlist;
2771}
2772out_error_mem:
2773	fprintf(trace->output, "Not enough memory to run!\n");
2774	goto out_delete_evlist;
2775
2776out_errno:
2777	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2778	goto out_delete_evlist;
2779}
2780
2781static int trace__replay(struct trace *trace)
2782{
2783	const struct perf_evsel_str_handler handlers[] = {
2784		{ "probe:vfs_getname",	     trace__vfs_getname, },
2785	};
2786	struct perf_data_file file = {
2787		.path  = input_name,
2788		.mode  = PERF_DATA_MODE_READ,
2789		.force = trace->force,
2790	};
2791	struct perf_session *session;
2792	struct perf_evsel *evsel;
2793	int err = -1;
2794
2795	trace->tool.sample	  = trace__process_sample;
2796	trace->tool.mmap	  = perf_event__process_mmap;
2797	trace->tool.mmap2	  = perf_event__process_mmap2;
2798	trace->tool.comm	  = perf_event__process_comm;
2799	trace->tool.exit	  = perf_event__process_exit;
2800	trace->tool.fork	  = perf_event__process_fork;
2801	trace->tool.attr	  = perf_event__process_attr;
2802	trace->tool.tracing_data = perf_event__process_tracing_data;
2803	trace->tool.build_id	  = perf_event__process_build_id;
 
2804
2805	trace->tool.ordered_events = true;
2806	trace->tool.ordering_requires_timestamps = true;
2807
2808	/* add tid to output */
2809	trace->multiple_threads = true;
2810
2811	session = perf_session__new(&file, false, &trace->tool);
2812	if (session == NULL)
2813		return -1;
 
 
 
 
 
 
2814
2815	if (symbol__init(&session->header.env) < 0)
2816		goto out;
2817
2818	trace->host = &session->machines.host;
2819
2820	err = perf_session__set_tracepoints_handlers(session, handlers);
2821	if (err)
2822		goto out;
2823
2824	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2825						     "raw_syscalls:sys_enter");
2826	/* older kernels have syscalls tp versus raw_syscalls */
2827	if (evsel == NULL)
2828		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2829							     "syscalls:sys_enter");
2830
2831	if (evsel &&
2832	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2833	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2834		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2835		goto out;
2836	}
2837
2838	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2839						     "raw_syscalls:sys_exit");
2840	if (evsel == NULL)
2841		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2842							     "syscalls:sys_exit");
2843	if (evsel &&
2844	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2845	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2846		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2847		goto out;
2848	}
2849
2850	evlist__for_each(session->evlist, evsel) {
2851		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2852		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2853		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2854		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2855			evsel->handler = trace__pgfault;
2856	}
2857
2858	err = parse_target_str(trace);
2859	if (err != 0)
2860		goto out;
2861
2862	setup_pager();
2863
2864	err = perf_session__process_events(session);
2865	if (err)
2866		pr_err("Failed to process events, error %d", err);
2867
2868	else if (trace->summary)
2869		trace__fprintf_thread_summary(trace, trace->output);
2870
2871out:
2872	perf_session__delete(session);
2873
2874	return err;
2875}
2876
2877static size_t trace__fprintf_threads_header(FILE *fp)
2878{
2879	size_t printed;
2880
2881	printed  = fprintf(fp, "\n Summary of events:\n\n");
2882
2883	return printed;
2884}
2885
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2886static size_t thread__dump_stats(struct thread_trace *ttrace,
2887				 struct trace *trace, FILE *fp)
2888{
2889	struct stats *stats;
2890	size_t printed = 0;
2891	struct syscall *sc;
2892	struct int_node *inode = intlist__first(ttrace->syscall_stats);
 
2893
2894	if (inode == NULL)
2895		return 0;
2896
2897	printed += fprintf(fp, "\n");
2898
2899	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2900	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2901	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2902
2903	/* each int_node is a syscall */
2904	while (inode) {
2905		stats = inode->priv;
2906		if (stats) {
2907			double min = (double)(stats->min) / NSEC_PER_MSEC;
2908			double max = (double)(stats->max) / NSEC_PER_MSEC;
2909			double avg = avg_stats(stats);
2910			double pct;
2911			u64 n = (u64) stats->n;
2912
2913			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2914			avg /= NSEC_PER_MSEC;
2915
2916			sc = &trace->syscalls.table[inode->i];
2917			printed += fprintf(fp, "   %-15s", sc->name);
2918			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2919					   n, avg * n, min, avg);
2920			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2921		}
2922
2923		inode = intlist__next(inode);
 
 
 
 
 
 
 
 
2924	}
2925
 
2926	printed += fprintf(fp, "\n\n");
2927
2928	return printed;
2929}
2930
2931/* struct used to pass data to per-thread function */
2932struct summary_data {
2933	FILE *fp;
2934	struct trace *trace;
2935	size_t printed;
2936};
2937
2938static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2939{
2940	struct summary_data *data = priv;
2941	FILE *fp = data->fp;
2942	size_t printed = data->printed;
2943	struct trace *trace = data->trace;
2944	struct thread_trace *ttrace = thread__priv(thread);
2945	double ratio;
2946
2947	if (ttrace == NULL)
2948		return 0;
2949
2950	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2951
2952	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2953	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2954	printed += fprintf(fp, "%.1f%%", ratio);
2955	if (ttrace->pfmaj)
2956		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2957	if (ttrace->pfmin)
2958		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2959	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
 
 
 
 
2960	printed += thread__dump_stats(ttrace, trace, fp);
2961
2962	data->printed += printed;
 
2963
2964	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2965}
2966
2967static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2968{
2969	struct summary_data data = {
2970		.fp = fp,
2971		.trace = trace
2972	};
2973	data.printed = trace__fprintf_threads_header(fp);
2974
2975	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2976
2977	return data.printed;
 
 
 
 
2978}
2979
2980static int trace__set_duration(const struct option *opt, const char *str,
2981			       int unset __maybe_unused)
2982{
2983	struct trace *trace = opt->value;
2984
2985	trace->duration_filter = atof(str);
2986	return 0;
2987}
2988
2989static int trace__set_filter_pids(const struct option *opt, const char *str,
2990				  int unset __maybe_unused)
2991{
2992	int ret = -1;
2993	size_t i;
2994	struct trace *trace = opt->value;
2995	/*
2996	 * FIXME: introduce a intarray class, plain parse csv and create a
2997	 * { int nr, int entries[] } struct...
2998	 */
2999	struct intlist *list = intlist__new(str);
3000
3001	if (list == NULL)
3002		return -1;
3003
3004	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
3005	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
3006
3007	if (trace->filter_pids.entries == NULL)
3008		goto out;
3009
3010	trace->filter_pids.entries[0] = getpid();
3011
3012	for (i = 1; i < trace->filter_pids.nr; ++i)
3013		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
3014
3015	intlist__delete(list);
3016	ret = 0;
3017out:
3018	return ret;
3019}
3020
3021static int trace__open_output(struct trace *trace, const char *filename)
3022{
3023	struct stat st;
3024
3025	if (!stat(filename, &st) && st.st_size) {
3026		char oldname[PATH_MAX];
3027
3028		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
3029		unlink(oldname);
3030		rename(filename, oldname);
3031	}
3032
3033	trace->output = fopen(filename, "w");
3034
3035	return trace->output == NULL ? -errno : 0;
3036}
3037
3038static int parse_pagefaults(const struct option *opt, const char *str,
3039			    int unset __maybe_unused)
3040{
3041	int *trace_pgfaults = opt->value;
3042
3043	if (strcmp(str, "all") == 0)
3044		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3045	else if (strcmp(str, "maj") == 0)
3046		*trace_pgfaults |= TRACE_PFMAJ;
3047	else if (strcmp(str, "min") == 0)
3048		*trace_pgfaults |= TRACE_PFMIN;
3049	else
3050		return -1;
3051
3052	return 0;
3053}
3054
3055static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3056{
3057	struct perf_evsel *evsel;
3058
3059	evlist__for_each(evlist, evsel)
3060		evsel->handler = handler;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3061}
 
3062
3063int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3064{
3065	const char *trace_usage[] = {
3066		"perf trace [<options>] [<command>]",
3067		"perf trace [<options>] -- <command> [<options>]",
3068		"perf trace record [<options>] [<command>]",
3069		"perf trace record [<options>] -- <command> [<options>]",
3070		NULL
3071	};
3072	struct trace trace = {
3073		.audit = {
3074			.machine = audit_detect_machine(),
3075			.open_id = audit_name_to_syscall("open", trace.audit.machine),
3076		},
3077		.syscalls = {
3078			. max = -1,
3079		},
3080		.opts = {
3081			.target = {
3082				.uid	   = UINT_MAX,
3083				.uses_mmap = true,
3084			},
3085			.user_freq     = UINT_MAX,
3086			.user_interval = ULLONG_MAX,
3087			.no_buffering  = true,
3088			.mmap_pages    = UINT_MAX,
3089			.proc_map_timeout  = 500,
3090		},
3091		.output = stderr,
3092		.show_comm = true,
3093		.trace_syscalls = true,
 
 
 
 
 
 
 
3094	};
3095	const char *output_name = NULL;
3096	const char *ev_qualifier_str = NULL;
3097	const struct option trace_options[] = {
3098	OPT_CALLBACK(0, "event", &trace.evlist, "event",
3099		     "event selector. use 'perf list' to list available events",
3100		     parse_events_option),
 
 
3101	OPT_BOOLEAN(0, "comm", &trace.show_comm,
3102		    "show the thread COMM next to its id"),
3103	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3104	OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
 
3105	OPT_STRING('o', "output", &output_name, "file", "output file name"),
3106	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3107	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3108		    "trace events on existing process id"),
3109	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3110		    "trace events on existing thread id"),
3111	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3112		     "pids to filter (by the kernel)", trace__set_filter_pids),
3113	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3114		    "system-wide collection from all CPUs"),
3115	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3116		    "list of cpus to monitor"),
3117	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3118		    "child tasks do not inherit counters"),
3119	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3120		     "number of mmap data pages",
3121		     perf_evlist__parse_mmap_pages),
3122	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3123		   "user to profile"),
3124	OPT_CALLBACK(0, "duration", &trace, "float",
3125		     "show only events with duration > N.M ms",
3126		     trace__set_duration),
3127	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3128	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3129	OPT_BOOLEAN('T', "time", &trace.full_time,
3130		    "Show full timestamp, not time relative to first start"),
 
 
3131	OPT_BOOLEAN('s', "summary", &trace.summary_only,
3132		    "Show only syscall summary with statistics"),
3133	OPT_BOOLEAN('S', "with-summary", &trace.summary,
3134		    "Show all syscalls and summary with statistics"),
 
 
3135	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3136		     "Trace pagefaults", parse_pagefaults, "maj"),
3137	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3138	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3139	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3140			"per thread proc mmap processing timeout in ms"),
 
 
 
 
 
 
 
 
3141	OPT_END()
3142	};
 
 
 
3143	const char * const trace_subcommands[] = { "record", NULL };
3144	int err;
3145	char bf[BUFSIZ];
 
3146
3147	signal(SIGSEGV, sighandler_dump_stack);
3148	signal(SIGFPE, sighandler_dump_stack);
 
 
 
 
 
 
3149
3150	trace.evlist = perf_evlist__new();
 
3151
3152	if (trace.evlist == NULL) {
3153		pr_err("Not enough memory to run!\n");
3154		err = -ENOMEM;
3155		goto out;
3156	}
3157
 
 
 
 
 
 
 
 
 
 
 
 
 
3158	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3159				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3161	if (trace.trace_pgfaults) {
3162		trace.opts.sample_address = true;
3163		trace.opts.sample_time = true;
3164	}
3165
3166	if (trace.evlist->nr_entries > 0)
3167		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3168
3169	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3170		return trace__record(&trace, argc-1, &argv[1]);
3171
 
 
 
 
3172	/* summary_only implies summary option, but don't overwrite summary if set */
3173	if (trace.summary_only)
3174		trace.summary = trace.summary_only;
3175
3176	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3177	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
3178		pr_err("Please specify something to trace.\n");
3179		return -1;
3180	}
3181
3182	if (output_name != NULL) {
3183		err = trace__open_output(&trace, output_name);
3184		if (err < 0) {
3185			perror("failed to create output file");
3186			goto out;
3187		}
3188	}
3189
3190	if (ev_qualifier_str != NULL) {
3191		const char *s = ev_qualifier_str;
3192		struct strlist_config slist_config = {
3193			.dirname = system_path(STRACE_GROUPS_DIR),
3194		};
3195
3196		trace.not_ev_qualifier = *s == '!';
3197		if (trace.not_ev_qualifier)
3198			++s;
3199		trace.ev_qualifier = strlist__new(s, &slist_config);
3200		if (trace.ev_qualifier == NULL) {
3201			fputs("Not enough memory to parse event qualifier",
3202			      trace.output);
3203			err = -ENOMEM;
3204			goto out_close;
3205		}
3206
3207		err = trace__validate_ev_qualifier(&trace);
3208		if (err)
3209			goto out_close;
3210	}
3211
3212	err = target__validate(&trace.opts.target);
3213	if (err) {
3214		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3215		fprintf(trace.output, "%s", bf);
3216		goto out_close;
3217	}
3218
3219	err = target__parse_uid(&trace.opts.target);
3220	if (err) {
3221		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3222		fprintf(trace.output, "%s", bf);
3223		goto out_close;
3224	}
3225
3226	if (!argc && target__none(&trace.opts.target))
3227		trace.opts.target.system_wide = true;
3228
3229	if (input_name)
3230		err = trace__replay(&trace);
3231	else
3232		err = trace__run(&trace, argc, argv);
3233
3234out_close:
3235	if (output_name != NULL)
3236		fclose(trace.output);
3237out:
 
 
 
 
3238	return err;
3239}
v6.13.7
   1/*
   2 * builtin-trace.c
   3 *
   4 * Builtin 'trace' command:
   5 *
   6 * Display a continuously updated trace of any workload, CPU, specific PID,
   7 * system wide, etc.  Default format is loosely strace like, but any other
   8 * event may be specified using --event.
   9 *
  10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
  11 *
  12 * Initially based on the 'trace' prototype by Thomas Gleixner:
  13 *
  14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
 
 
  15 */
  16
  17#include "util/record.h"
  18#include <api/fs/tracing_path.h>
  19#ifdef HAVE_LIBBPF_SUPPORT
  20#include <bpf/bpf.h>
  21#include <bpf/libbpf.h>
  22#include <bpf/btf.h>
  23#ifdef HAVE_BPF_SKEL
  24#include "bpf_skel/augmented_raw_syscalls.skel.h"
  25#endif
  26#endif
  27#include "util/bpf_map.h"
  28#include "util/rlimit.h"
  29#include "builtin.h"
  30#include "util/cgroup.h"
  31#include "util/color.h"
  32#include "util/config.h"
  33#include "util/debug.h"
  34#include "util/dso.h"
  35#include "util/env.h"
  36#include "util/event.h"
  37#include "util/evsel.h"
  38#include "util/evsel_fprintf.h"
  39#include "util/synthetic-events.h"
  40#include "util/evlist.h"
  41#include "util/evswitch.h"
  42#include "util/mmap.h"
  43#include <subcmd/pager.h>
  44#include <subcmd/exec-cmd.h>
  45#include "util/machine.h"
  46#include "util/map.h"
  47#include "util/symbol.h"
  48#include "util/path.h"
  49#include "util/session.h"
  50#include "util/thread.h"
  51#include <subcmd/parse-options.h>
  52#include "util/strlist.h"
  53#include "util/intlist.h"
  54#include "util/thread_map.h"
  55#include "util/stat.h"
  56#include "util/tool.h"
  57#include "util/util.h"
  58#include "trace/beauty/beauty.h"
  59#include "trace-event.h"
  60#include "util/parse-events.h"
  61#include "util/tracepoint.h"
  62#include "callchain.h"
  63#include "print_binary.h"
  64#include "string2.h"
  65#include "syscalltbl.h"
  66#include "rb_resort.h"
  67#include "../perf.h"
  68#include "trace_augment.h"
  69
  70#include <errno.h>
  71#include <inttypes.h>
  72#include <poll.h>
  73#include <signal.h>
  74#include <stdlib.h>
  75#include <string.h>
 
  76#include <linux/err.h>
  77#include <linux/filter.h>
  78#include <linux/kernel.h>
  79#include <linux/list_sort.h>
  80#include <linux/random.h>
  81#include <linux/stringify.h>
  82#include <linux/time64.h>
  83#include <linux/zalloc.h>
  84#include <fcntl.h>
  85#include <sys/sysmacros.h>
  86
  87#include <linux/ctype.h>
  88#include <perf/mmap.h>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  89
  90#ifdef HAVE_LIBTRACEEVENT
  91#include <event-parse.h>
 
 
 
 
 
 
 
 
  92#endif
  93
  94#ifndef O_CLOEXEC
  95# define O_CLOEXEC		02000000
  96#endif
  97
  98#ifndef F_LINUX_SPECIFIC_BASE
  99# define F_LINUX_SPECIFIC_BASE	1024
 100#endif
 101
 102#define RAW_SYSCALL_ARGS_NUM	6
 
 
 103
 104/*
 105 * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100
 106 *
 107 * We have to explicitely mark the direction of the flow of data, if from the
 108 * kernel to user space or the other way around, since the BPF collector we
 109 * have so far copies only from user to kernel space, mark the arguments that
 110 * go that direction, so that we don´t end up collecting the previous contents
 111 * for syscall args that goes from kernel to user space.
 112 */
 113struct syscall_arg_fmt {
 114	size_t	   (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
 115	bool	   (*strtoul)(char *bf, size_t size, struct syscall_arg *arg, u64 *val);
 116	unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
 117	void	   *parm;
 118	const char *name;
 119	u16	   nr_entries; // for arrays
 120	bool	   from_user;
 121	bool	   show_zero;
 122#ifdef HAVE_LIBBPF_SUPPORT
 123	const struct btf_type *type;
 124	int	   type_id; /* used in btf_dump */
 125#endif
 126};
 127
 128struct syscall_fmt {
 129	const char *name;
 130	const char *alias;
 131	struct {
 132		const char *sys_enter,
 133			   *sys_exit;
 134	}	   bpf_prog_name;
 135	struct syscall_arg_fmt arg[RAW_SYSCALL_ARGS_NUM];
 136	u8	   nr_args;
 137	bool	   errpid;
 138	bool	   timeout;
 139	bool	   hexret;
 140};
 141
 142struct trace {
 143	struct perf_tool	tool;
 144	struct syscalltbl	*sctbl;
 145	struct {
 146		struct syscall  *table;
 147		struct {
 148			struct evsel *sys_enter,
 149				*sys_exit,
 150				*bpf_output;
 151		}		events;
 152	} syscalls;
 153#ifdef HAVE_BPF_SKEL
 154	struct augmented_raw_syscalls_bpf *skel;
 155#endif
 156#ifdef HAVE_LIBBPF_SUPPORT
 157	struct btf		*btf;
 
 158#endif
 159	struct record_opts	opts;
 160	struct evlist	*evlist;
 161	struct machine		*host;
 162	struct thread		*current;
 163	struct cgroup		*cgroup;
 164	u64			base_time;
 165	FILE			*output;
 166	unsigned long		nr_events;
 167	unsigned long		nr_events_printed;
 168	unsigned long		max_events;
 169	struct evswitch		evswitch;
 170	struct strlist		*ev_qualifier;
 171	struct {
 172		size_t		nr;
 173		int		*entries;
 174	}			ev_qualifier_ids;
 175	struct {
 176		size_t		nr;
 177		pid_t		*entries;
 178		struct bpf_map  *map;
 179	}			filter_pids;
 180	double			duration_filter;
 181	double			runtime_ms;
 182	struct {
 183		u64		vfs_getname,
 184				proc_getname;
 185	} stats;
 186	unsigned int		max_stack;
 187	unsigned int		min_stack;
 188	int			raw_augmented_syscalls_args_size;
 189	bool			raw_augmented_syscalls;
 190	bool			fd_path_disabled;
 191	bool			sort_events;
 192	bool			not_ev_qualifier;
 193	bool			live;
 194	bool			full_time;
 195	bool			sched;
 196	bool			multiple_threads;
 197	bool			summary;
 198	bool			summary_only;
 199	bool			errno_summary;
 200	bool			failure_only;
 201	bool			show_comm;
 202	bool			print_sample;
 203	bool			show_tool_stats;
 204	bool			trace_syscalls;
 205	bool			libtraceevent_print;
 206	bool			kernel_syscallchains;
 207	s16			args_alignment;
 208	bool			show_tstamp;
 209	bool			show_duration;
 210	bool			show_zeros;
 211	bool			show_arg_names;
 212	bool			show_string_prefix;
 213	bool			force;
 214	bool			vfs_getname;
 215	bool			force_btf;
 216	int			trace_pgfaults;
 217	char			*perfconfig_events;
 218	struct {
 219		struct ordered_events	data;
 220		u64			last;
 221	} oe;
 222};
 223
 224static void trace__load_vmlinux_btf(struct trace *trace __maybe_unused)
 225{
 226#ifdef HAVE_LIBBPF_SUPPORT
 227	if (trace->btf != NULL)
 228		return;
 229
 230	trace->btf = btf__load_vmlinux_btf();
 231	if (verbose > 0) {
 232		fprintf(trace->output, trace->btf ? "vmlinux BTF loaded\n" :
 233						    "Failed to load vmlinux BTF\n");
 234	}
 235#endif
 236}
 237
 238struct tp_field {
 239	int offset;
 240	union {
 241		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
 242		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
 243	};
 244};
 245
 246#define TP_UINT_FIELD(bits) \
 247static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
 248{ \
 249	u##bits value; \
 250	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 251	return value;  \
 252}
 253
 254TP_UINT_FIELD(8);
 255TP_UINT_FIELD(16);
 256TP_UINT_FIELD(32);
 257TP_UINT_FIELD(64);
 258
 259#define TP_UINT_FIELD__SWAPPED(bits) \
 260static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
 261{ \
 262	u##bits value; \
 263	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 264	return bswap_##bits(value);\
 265}
 266
 267TP_UINT_FIELD__SWAPPED(16);
 268TP_UINT_FIELD__SWAPPED(32);
 269TP_UINT_FIELD__SWAPPED(64);
 270
 271static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
 
 
 272{
 273	field->offset = offset;
 274
 275	switch (size) {
 276	case 1:
 277		field->integer = tp_field__u8;
 278		break;
 279	case 2:
 280		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
 281		break;
 282	case 4:
 283		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
 284		break;
 285	case 8:
 286		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
 287		break;
 288	default:
 289		return -1;
 290	}
 291
 292	return 0;
 293}
 294
 295static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap)
 296{
 297	return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
 298}
 299
 300static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
 301{
 302	return sample->raw_data + field->offset;
 303}
 304
 305static int __tp_field__init_ptr(struct tp_field *field, int offset)
 306{
 307	field->offset = offset;
 308	field->pointer = tp_field__ptr;
 309	return 0;
 310}
 311
 312static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field)
 313{
 314	return __tp_field__init_ptr(field, format_field->offset);
 315}
 316
 317struct syscall_tp {
 318	struct tp_field id;
 319	union {
 320		struct tp_field args, ret;
 321	};
 322};
 323
 324/*
 325 * The evsel->priv as used by 'perf trace'
 326 * sc:	for raw_syscalls:sys_{enter,exit} and syscalls:sys_{enter,exit}_SYSCALLNAME
 327 * fmt: for all the other tracepoints
 328 */
 329struct evsel_trace {
 330	struct syscall_tp	sc;
 331	struct syscall_arg_fmt  *fmt;
 332};
 333
 334static struct evsel_trace *evsel_trace__new(void)
 335{
 336	return zalloc(sizeof(struct evsel_trace));
 337}
 338
 339static void evsel_trace__delete(struct evsel_trace *et)
 340{
 341	if (et == NULL)
 342		return;
 343
 344	zfree(&et->fmt);
 345	free(et);
 346}
 347
 348/*
 349 * Used with raw_syscalls:sys_{enter,exit} and with the
 350 * syscalls:sys_{enter,exit}_SYSCALL tracepoints
 351 */
 352static inline struct syscall_tp *__evsel__syscall_tp(struct evsel *evsel)
 353{
 354	struct evsel_trace *et = evsel->priv;
 355
 356	return &et->sc;
 357}
 358
 359static struct syscall_tp *evsel__syscall_tp(struct evsel *evsel)
 360{
 361	if (evsel->priv == NULL) {
 362		evsel->priv = evsel_trace__new();
 363		if (evsel->priv == NULL)
 364			return NULL;
 365	}
 366
 367	return __evsel__syscall_tp(evsel);
 368}
 369
 370/*
 371 * Used with all the other tracepoints.
 372 */
 373static inline struct syscall_arg_fmt *__evsel__syscall_arg_fmt(struct evsel *evsel)
 374{
 375	struct evsel_trace *et = evsel->priv;
 376
 377	return et->fmt;
 378}
 379
 380static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel)
 381{
 382	struct evsel_trace *et = evsel->priv;
 383
 384	if (evsel->priv == NULL) {
 385		et = evsel->priv = evsel_trace__new();
 386
 387		if (et == NULL)
 388			return NULL;
 389	}
 390
 391	if (et->fmt == NULL) {
 392		et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
 393		if (et->fmt == NULL)
 394			goto out_delete;
 395	}
 396
 397	return __evsel__syscall_arg_fmt(evsel);
 398
 399out_delete:
 400	evsel_trace__delete(evsel->priv);
 401	evsel->priv = NULL;
 402	return NULL;
 403}
 404
 405static int evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name)
 406{
 407	struct tep_format_field *format_field = evsel__field(evsel, name);
 408
 409	if (format_field == NULL)
 410		return -1;
 411
 412	return tp_field__init_uint(field, format_field, evsel->needs_swap);
 413}
 414
 415#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
 416	({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
 417	   evsel__init_tp_uint_field(evsel, &sc->name, #name); })
 418
 419static int evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name)
 
 
 420{
 421	struct tep_format_field *format_field = evsel__field(evsel, name);
 422
 423	if (format_field == NULL)
 424		return -1;
 425
 426	return tp_field__init_ptr(field, format_field);
 427}
 428
 429#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
 430	({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
 431	   evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
 432
 433static void evsel__delete_priv(struct evsel *evsel)
 434{
 435	zfree(&evsel->priv);
 436	evsel__delete(evsel);
 437}
 438
 439static int evsel__init_syscall_tp(struct evsel *evsel)
 440{
 441	struct syscall_tp *sc = evsel__syscall_tp(evsel);
 442
 443	if (sc != NULL) {
 444		if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
 445		    evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
 446			return -ENOENT;
 447
 448		return 0;
 449	}
 450
 451	return -ENOMEM;
 452}
 453
 454static int evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
 455{
 456	struct syscall_tp *sc = evsel__syscall_tp(evsel);
 457
 458	if (sc != NULL) {
 459		struct tep_format_field *syscall_id = evsel__field(tp, "id");
 460		if (syscall_id == NULL)
 461			syscall_id = evsel__field(tp, "__syscall_nr");
 462		if (syscall_id == NULL ||
 463		    __tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
 464			return -EINVAL;
 465
 466		return 0;
 467	}
 468
 469	return -ENOMEM;
 470}
 471
 472static int evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
 473{
 474	struct syscall_tp *sc = __evsel__syscall_tp(evsel);
 475
 476	return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
 477}
 478
 479static int evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
 480{
 481	struct syscall_tp *sc = __evsel__syscall_tp(evsel);
 482
 483	return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
 484}
 485
 486static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
 487{
 488	if (evsel__syscall_tp(evsel) != NULL) {
 489		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
 490			return -ENOENT;
 491
 492		evsel->handler = handler;
 493		return 0;
 494	}
 495
 496	return -ENOMEM;
 
 
 
 
 497}
 498
 499static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
 500{
 501	struct evsel *evsel = evsel__newtp("raw_syscalls", direction);
 502
 503	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
 504	if (IS_ERR(evsel))
 505		evsel = evsel__newtp("syscalls", direction);
 506
 507	if (IS_ERR(evsel))
 508		return NULL;
 509
 510	if (evsel__init_raw_syscall_tp(evsel, handler))
 511		goto out_delete;
 512
 513	return evsel;
 514
 515out_delete:
 516	evsel__delete_priv(evsel);
 517	return NULL;
 518}
 519
 520#define perf_evsel__sc_tp_uint(evsel, name, sample) \
 521	({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
 522	   fields->name.integer(&fields->name, sample); })
 523
 524#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
 525	({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
 526	   fields->name.pointer(&fields->name, sample); })
 527
 528size_t strarray__scnprintf_suffix(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_suffix, int val)
 529{
 530	int idx = val - sa->offset;
 
 
 
 
 
 531
 532	if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
 533		size_t printed = scnprintf(bf, size, intfmt, val);
 534		if (show_suffix)
 535			printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
 536		return printed;
 537	}
 538
 539	return scnprintf(bf, size, "%s%s", sa->entries[idx], show_suffix ? sa->prefix : "");
 
 
 540}
 541
 542size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
 543{
 544	int idx = val - sa->offset;
 545
 546	if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
 547		size_t printed = scnprintf(bf, size, intfmt, val);
 548		if (show_prefix)
 549			printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
 550		return printed;
 551	}
 552
 553	return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
 554}
 555
 556static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
 557						const char *intfmt,
 558					        struct syscall_arg *arg)
 559{
 560	return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->show_string_prefix, arg->val);
 
 
 
 
 
 
 561}
 562
 563static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
 564					      struct syscall_arg *arg)
 565{
 566	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
 567}
 568
 569#define SCA_STRARRAY syscall_arg__scnprintf_strarray
 570
 571bool syscall_arg__strtoul_strarray(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
 
 
 
 
 
 
 572{
 573	return strarray__strtoul(arg->parm, bf, size, ret);
 574}
 575
 576bool syscall_arg__strtoul_strarray_flags(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
 
 
 
 
 
 
 
 
 
 577{
 578	return strarray__strtoul_flags(arg->parm, bf, size, ret);
 
 
 
 
 
 579}
 580
 581bool syscall_arg__strtoul_strarrays(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
 
 
 
 
 
 
 
 
 582{
 583	return strarrays__strtoul(arg->parm, bf, size, ret);
 584}
 585
 586size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg)
 
 
 
 587{
 588	return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val);
 589}
 590
 591size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
 
 
 
 592{
 593	size_t printed;
 594	int i;
 595
 596	for (i = 0; i < sas->nr_entries; ++i) {
 597		struct strarray *sa = sas->entries[i];
 598		int idx = val - sa->offset;
 599
 600		if (idx >= 0 && idx < sa->nr_entries) {
 601			if (sa->entries[idx] == NULL)
 602				break;
 603			return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
 604		}
 605	}
 606
 607	printed = scnprintf(bf, size, intfmt, val);
 608	if (show_prefix)
 609		printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sas->entries[0]->prefix);
 
 
 
 
 
 
 
 
 
 
 610	return printed;
 611}
 612
 613bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret)
 
 
 
 614{
 615	int i;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 616
 617	for (i = 0; i < sa->nr_entries; ++i) {
 618		if (sa->entries[i] && strncmp(sa->entries[i], bf, size) == 0 && sa->entries[i][size] == '\0') {
 619			*ret = sa->offset + i;
 620			return true;
 621		}
 622	}
 623
 624	return false;
 625}
 626
 627bool strarray__strtoul_flags(struct strarray *sa, char *bf, size_t size, u64 *ret)
 
 
 
 628{
 629	u64 val = 0;
 630	char *tok = bf, *sep, *end;
 631
 632	*ret = 0;
 
 
 
 
 633
 634	while (size != 0) {
 635		int toklen = size;
 
 
 
 636
 637		sep = memchr(tok, '|', size);
 638		if (sep != NULL) {
 639			size -= sep - tok + 1;
 640
 641			end = sep - 1;
 642			while (end > tok && isspace(*end))
 643				--end;
 644
 645			toklen = end - tok + 1;
 646		}
 647
 648		while (isspace(*tok))
 649			++tok;
 650
 651		if (isalpha(*tok) || *tok == '_') {
 652			if (!strarray__strtoul(sa, tok, toklen, &val))
 653				return false;
 654		} else
 655			val = strtoul(tok, NULL, 0);
 656
 657		*ret |= (1 << (val - 1));
 658
 659		if (sep == NULL)
 660			break;
 661		tok = sep + 1;
 662	}
 663
 664	return true;
 665}
 666
 667bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret)
 
 668{
 669	int i;
 670
 671	for (i = 0; i < sas->nr_entries; ++i) {
 672		struct strarray *sa = sas->entries[i];
 673
 674		if (strarray__strtoul(sa, bf, size, ret))
 675			return true;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 676	}
 677
 678	return false;
 679}
 680
 681size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
 682					struct syscall_arg *arg)
 683{
 684	return strarrays__scnprintf(arg->parm, bf, size, "%d", arg->show_string_prefix, arg->val);
 685}
 686
 687#ifndef AT_FDCWD
 688#define AT_FDCWD	-100
 689#endif
 690
 691static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
 692					   struct syscall_arg *arg)
 693{
 694	int fd = arg->val;
 695	const char *prefix = "AT_FD";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 696
 697	if (fd == AT_FDCWD)
 698		return scnprintf(bf, size, "%s%s", arg->show_string_prefix ? prefix : "", "CWD");
 699
 700	return syscall_arg__scnprintf_fd(bf, size, arg);
 701}
 702
 703#define SCA_FDAT syscall_arg__scnprintf_fd_at
 704
 705static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
 706					      struct syscall_arg *arg);
 707
 708#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
 709
 710size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
 711{
 712	return scnprintf(bf, size, "%#lx", arg->val);
 713}
 
 
 
 
 
 
 
 
 
 714
 715size_t syscall_arg__scnprintf_ptr(char *bf, size_t size, struct syscall_arg *arg)
 716{
 717	if (arg->val == 0)
 718		return scnprintf(bf, size, "NULL");
 719	return syscall_arg__scnprintf_hex(bf, size, arg);
 720}
 
 
 
 
 
 
 
 
 
 
 
 721
 722size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
 723{
 724	return scnprintf(bf, size, "%d", arg->val);
 725}
 726
 727size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
 728{
 729	return scnprintf(bf, size, "%ld", arg->val);
 730}
 731
 732static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct syscall_arg *arg)
 733{
 734	// XXX Hey, maybe for sched:sched_switch prev/next comm fields we can
 735	//     fill missing comms using thread__set_comm()...
 736	//     here or in a special syscall_arg__scnprintf_pid_sched_tp...
 737	return scnprintf(bf, size, "\"%-.*s\"", arg->fmt->nr_entries ?: arg->len, arg->val);
 738}
 739
 740#define SCA_CHAR_ARRAY syscall_arg__scnprintf_char_array
 741
 742static const char *bpf_cmd[] = {
 743	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
 744	"MAP_GET_NEXT_KEY", "PROG_LOAD", "OBJ_PIN", "OBJ_GET", "PROG_ATTACH",
 745	"PROG_DETACH", "PROG_TEST_RUN", "PROG_GET_NEXT_ID", "MAP_GET_NEXT_ID",
 746	"PROG_GET_FD_BY_ID", "MAP_GET_FD_BY_ID", "OBJ_GET_INFO_BY_FD",
 747	"PROG_QUERY", "RAW_TRACEPOINT_OPEN", "BTF_LOAD", "BTF_GET_FD_BY_ID",
 748	"TASK_FD_QUERY", "MAP_LOOKUP_AND_DELETE_ELEM", "MAP_FREEZE",
 749	"BTF_GET_NEXT_ID", "MAP_LOOKUP_BATCH", "MAP_LOOKUP_AND_DELETE_BATCH",
 750	"MAP_UPDATE_BATCH", "MAP_DELETE_BATCH", "LINK_CREATE", "LINK_UPDATE",
 751	"LINK_GET_FD_BY_ID", "LINK_GET_NEXT_ID", "ENABLE_STATS", "ITER_CREATE",
 752	"LINK_DETACH", "PROG_BIND_MAP",
 753};
 754static DEFINE_STRARRAY(bpf_cmd, "BPF_");
 755
 756static const char *fsmount_flags[] = {
 757	[1] = "CLOEXEC",
 758};
 759static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");
 760
 761#include "trace/beauty/generated/fsconfig_arrays.c"
 762
 763static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_");
 764
 765static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
 766static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1);
 767
 768static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
 769static DEFINE_STRARRAY(itimers, "ITIMER_");
 770
 771static const char *keyctl_options[] = {
 772	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
 773	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
 774	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
 775	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
 776	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
 777};
 778static DEFINE_STRARRAY(keyctl_options, "KEYCTL_");
 779
 780static const char *whences[] = { "SET", "CUR", "END",
 781#ifdef SEEK_DATA
 782"DATA",
 783#endif
 784#ifdef SEEK_HOLE
 785"HOLE",
 786#endif
 787};
 788static DEFINE_STRARRAY(whences, "SEEK_");
 789
 790static const char *fcntl_cmds[] = {
 791	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
 792	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
 793	"SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
 794	"GETOWNER_UIDS",
 795};
 796static DEFINE_STRARRAY(fcntl_cmds, "F_");
 797
 798static const char *fcntl_linux_specific_cmds[] = {
 799	"SETLEASE", "GETLEASE", "NOTIFY", "DUPFD_QUERY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
 800	"SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
 801	"GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
 802};
 803
 804static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, "F_", F_LINUX_SPECIFIC_BASE);
 805
 806static struct strarray *fcntl_cmds_arrays[] = {
 807	&strarray__fcntl_cmds,
 808	&strarray__fcntl_linux_specific_cmds,
 809};
 810
 811static DEFINE_STRARRAYS(fcntl_cmds_arrays);
 812
 813static const char *rlimit_resources[] = {
 814	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
 815	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
 816	"RTTIME",
 817};
 818static DEFINE_STRARRAY(rlimit_resources, "RLIMIT_");
 819
 820static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
 821static DEFINE_STRARRAY(sighow, "SIG_");
 822
 823static const char *clockid[] = {
 824	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
 825	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
 826	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
 827};
 828static DEFINE_STRARRAY(clockid, "CLOCK_");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 829
 830static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 831						 struct syscall_arg *arg)
 832{
 833	bool show_prefix = arg->show_string_prefix;
 834	const char *suffix = "_OK";
 835	size_t printed = 0;
 836	int mode = arg->val;
 837
 838	if (mode == F_OK) /* 0 */
 839		return scnprintf(bf, size, "F%s", show_prefix ? suffix : "");
 840#define	P_MODE(n) \
 841	if (mode & n##_OK) { \
 842		printed += scnprintf(bf + printed, size - printed, "%s%s", #n, show_prefix ? suffix : ""); \
 843		mode &= ~n##_OK; \
 844	}
 845
 846	P_MODE(R);
 847	P_MODE(W);
 848	P_MODE(X);
 849#undef P_MODE
 850
 851	if (mode)
 852		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
 853
 854	return printed;
 855}
 856
 857#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
 858
 859static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
 860					      struct syscall_arg *arg);
 861
 862#define SCA_FILENAME syscall_arg__scnprintf_filename
 863
 864// 'argname' is just documentational at this point, to remove the previous comment with that info
 865#define SCA_FILENAME_FROM_USER(argname) \
 866	  { .scnprintf	= SCA_FILENAME, \
 867	    .from_user	= true, }
 868
 869static size_t syscall_arg__scnprintf_buf(char *bf, size_t size, struct syscall_arg *arg);
 870
 871#define SCA_BUF syscall_arg__scnprintf_buf
 872
 873static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 874						struct syscall_arg *arg)
 875{
 876	bool show_prefix = arg->show_string_prefix;
 877	const char *prefix = "O_";
 878	int printed = 0, flags = arg->val;
 879
 
 
 
 
 
 880#define	P_FLAG(n) \
 881	if (flags & O_##n) { \
 882		printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
 883		flags &= ~O_##n; \
 884	}
 885
 
 
 886	P_FLAG(CLOEXEC);
 
 
 
 
 
 
 
 
 887	P_FLAG(NONBLOCK);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 888#undef P_FLAG
 889
 890	if (flags)
 891		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 892
 893	return printed;
 894}
 895
 896#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 897
 898#ifndef GRND_NONBLOCK
 899#define GRND_NONBLOCK	0x0001
 900#endif
 901#ifndef GRND_RANDOM
 902#define GRND_RANDOM	0x0002
 903#endif
 904
 905static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
 906						   struct syscall_arg *arg)
 907{
 908	bool show_prefix = arg->show_string_prefix;
 909	const char *prefix = "GRND_";
 910	int printed = 0, flags = arg->val;
 911
 
 
 
 912#define	P_FLAG(n) \
 913	if (flags & GRND_##n) { \
 914		printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
 915		flags &= ~GRND_##n; \
 916	}
 917
 918	P_FLAG(RANDOM);
 919	P_FLAG(NONBLOCK);
 
 
 920#undef P_FLAG
 921
 922	if (flags)
 923		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 924
 925	return printed;
 926}
 927
 928#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
 929
 930#ifdef HAVE_LIBBPF_SUPPORT
 931static void syscall_arg_fmt__cache_btf_enum(struct syscall_arg_fmt *arg_fmt, struct btf *btf, char *type)
 932{
 933	int id;
 934
 935	type = strstr(type, "enum ");
 936	if (type == NULL)
 937		return;
 
 
 
 
 938
 939	type += 5; // skip "enum " to get the enumeration name
 
 
 
 940
 941	id = btf__find_by_name(btf, type);
 942	if (id < 0)
 943		return;
 944
 945	arg_fmt->type = btf__type_by_id(btf, id);
 946}
 947
 948static bool syscall_arg__strtoul_btf_enum(char *bf, size_t size, struct syscall_arg *arg, u64 *val)
 949{
 950	const struct btf_type *bt = arg->fmt->type;
 951	struct btf *btf = arg->trace->btf;
 952	struct btf_enum *be = btf_enum(bt);
 953
 954	for (int i = 0; i < btf_vlen(bt); ++i, ++be) {
 955		const char *name = btf__name_by_offset(btf, be->name_off);
 956		int max_len = max(size, strlen(name));
 957
 958		if (strncmp(name, bf, max_len) == 0) {
 959			*val = be->val;
 960			return true;
 961		}
 962	}
 963
 964	return false;
 965}
 966
 967static bool syscall_arg__strtoul_btf_type(char *bf, size_t size, struct syscall_arg *arg, u64 *val)
 968{
 969	const struct btf_type *bt;
 970	char *type = arg->type_name;
 971	struct btf *btf;
 972
 973	trace__load_vmlinux_btf(arg->trace);
 974
 975	btf = arg->trace->btf;
 976	if (btf == NULL)
 977		return false;
 978
 979	if (arg->fmt->type == NULL) {
 980		// See if this is an enum
 981		syscall_arg_fmt__cache_btf_enum(arg->fmt, btf, type);
 982	}
 983
 984	// Now let's see if we have a BTF type resolved
 985	bt = arg->fmt->type;
 986	if (bt == NULL)
 987		return false;
 988
 989	// If it is an enum:
 990	if (btf_is_enum(arg->fmt->type))
 991		return syscall_arg__strtoul_btf_enum(bf, size, arg, val);
 992
 993	return false;
 994}
 995
 996static size_t btf_enum_scnprintf(const struct btf_type *type, struct btf *btf, char *bf, size_t size, int val)
 997{
 998	struct btf_enum *be = btf_enum(type);
 999	const int nr_entries = btf_vlen(type);
1000
1001	for (int i = 0; i < nr_entries; ++i, ++be) {
1002		if (be->val == val) {
1003			return scnprintf(bf, size, "%s",
1004					 btf__name_by_offset(btf, be->name_off));
1005		}
1006	}
1007
1008	return 0;
1009}
1010
1011struct trace_btf_dump_snprintf_ctx {
1012	char   *bf;
1013	size_t printed, size;
1014};
1015
1016static void trace__btf_dump_snprintf(void *vctx, const char *fmt, va_list args)
1017{
1018	struct trace_btf_dump_snprintf_ctx *ctx = vctx;
1019
1020	ctx->printed += vscnprintf(ctx->bf + ctx->printed, ctx->size - ctx->printed, fmt, args);
1021}
1022
1023static size_t btf_struct_scnprintf(const struct btf_type *type, struct btf *btf, char *bf, size_t size, struct syscall_arg *arg)
1024{
1025	struct trace_btf_dump_snprintf_ctx ctx = {
1026		.bf   = bf,
1027		.size = size,
1028	};
1029	struct augmented_arg *augmented_arg = arg->augmented.args;
1030	int type_id = arg->fmt->type_id, consumed;
1031	struct btf_dump *btf_dump;
1032
1033	LIBBPF_OPTS(btf_dump_opts, dump_opts);
1034	LIBBPF_OPTS(btf_dump_type_data_opts, dump_data_opts);
1035
1036	if (arg == NULL || arg->augmented.args == NULL)
1037		return 0;
1038
1039	dump_data_opts.compact	  = true;
1040	dump_data_opts.skip_names = !arg->trace->show_arg_names;
1041
1042	btf_dump = btf_dump__new(btf, trace__btf_dump_snprintf, &ctx, &dump_opts);
1043	if (btf_dump == NULL)
1044		return 0;
1045
1046	/* pretty print the struct data here */
1047	if (btf_dump__dump_type_data(btf_dump, type_id, arg->augmented.args->value, type->size, &dump_data_opts) == 0)
1048		return 0;
1049
1050	consumed = sizeof(*augmented_arg) + augmented_arg->size;
1051	arg->augmented.args = ((void *)arg->augmented.args) + consumed;
1052	arg->augmented.size -= consumed;
1053
1054	btf_dump__free(btf_dump);
1055
1056	return ctx.printed;
1057}
1058
1059static size_t trace__btf_scnprintf(struct trace *trace, struct syscall_arg *arg, char *bf,
1060				   size_t size, int val, char *type)
1061{
1062	struct syscall_arg_fmt *arg_fmt = arg->fmt;
1063
1064	if (trace->btf == NULL)
1065		return 0;
1066
1067	if (arg_fmt->type == NULL) {
1068		// Check if this is an enum and if we have the BTF type for it.
1069		syscall_arg_fmt__cache_btf_enum(arg_fmt, trace->btf, type);
1070	}
1071
1072	// Did we manage to find a BTF type for the syscall/tracepoint argument?
1073	if (arg_fmt->type == NULL)
1074		return 0;
1075
1076	if (btf_is_enum(arg_fmt->type))
1077		return btf_enum_scnprintf(arg_fmt->type, trace->btf, bf, size, val);
1078	else if (btf_is_struct(arg_fmt->type) || btf_is_union(arg_fmt->type))
1079		return btf_struct_scnprintf(arg_fmt->type, trace->btf, bf, size, arg);
1080
1081	return 0;
1082}
1083
1084#else // HAVE_LIBBPF_SUPPORT
1085static size_t trace__btf_scnprintf(struct trace *trace __maybe_unused, struct syscall_arg *arg __maybe_unused,
1086				   char *bf __maybe_unused, size_t size __maybe_unused, int val __maybe_unused,
1087				   char *type __maybe_unused)
1088{
1089	return 0;
1090}
1091
1092static bool syscall_arg__strtoul_btf_type(char *bf __maybe_unused, size_t size __maybe_unused,
1093					  struct syscall_arg *arg __maybe_unused, u64 *val __maybe_unused)
1094{
1095	return false;
1096}
1097#endif // HAVE_LIBBPF_SUPPORT
1098
1099#define STUL_BTF_TYPE syscall_arg__strtoul_btf_type
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1100
1101#define STRARRAY(name, array) \
1102	  { .scnprintf	= SCA_STRARRAY, \
1103	    .strtoul	= STUL_STRARRAY, \
1104	    .parm	= &strarray__##array, }
1105
1106#define STRARRAY_FLAGS(name, array) \
1107	  { .scnprintf	= SCA_STRARRAY_FLAGS, \
1108	    .strtoul	= STUL_STRARRAY_FLAGS, \
1109	    .parm	= &strarray__##array, }
1110
1111#include "trace/beauty/arch_errno_names.c"
1112#include "trace/beauty/eventfd.c"
1113#include "trace/beauty/futex_op.c"
1114#include "trace/beauty/futex_val3.c"
1115#include "trace/beauty/mmap.c"
1116#include "trace/beauty/mode_t.c"
1117#include "trace/beauty/msg_flags.c"
1118#include "trace/beauty/open_flags.c"
1119#include "trace/beauty/perf_event_open.c"
1120#include "trace/beauty/pid.c"
1121#include "trace/beauty/sched_policy.c"
1122#include "trace/beauty/seccomp.c"
1123#include "trace/beauty/signum.c"
1124#include "trace/beauty/socket_type.c"
1125#include "trace/beauty/waitid_options.c"
1126
1127static const struct syscall_fmt syscall_fmts[] = {
1128	{ .name	    = "access",
1129	  .arg = { [1] = { .scnprintf = SCA_ACCMODE,  /* mode */ }, }, },
1130	{ .name	    = "arch_prctl",
1131	  .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, /* code */ },
1132		   [1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, },
1133	{ .name	    = "bind",
1134	  .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
1135		   [1] = SCA_SOCKADDR_FROM_USER(umyaddr),
1136		   [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
1137	{ .name	    = "bpf",
1138	  .arg = { [0] = STRARRAY(cmd, bpf_cmd),
1139		   [1] = { .from_user = true /* attr */, }, } },
1140	{ .name	    = "brk",	    .hexret = true,
1141	  .arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, },
1142	{ .name     = "clock_gettime",
1143	  .arg = { [0] = STRARRAY(clk_id, clockid), }, },
1144	{ .name	    = "clock_nanosleep",
1145	  .arg = { [2] = SCA_TIMESPEC_FROM_USER(req), }, },
1146	{ .name	    = "clone",	    .errpid = true, .nr_args = 5,
1147	  .arg = { [0] = { .name = "flags",	    .scnprintf = SCA_CLONE_FLAGS, },
1148		   [1] = { .name = "child_stack",   .scnprintf = SCA_HEX, },
1149		   [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
1150		   [3] = { .name = "child_tidptr",  .scnprintf = SCA_HEX, },
1151		   [4] = { .name = "tls",	    .scnprintf = SCA_HEX, }, }, },
1152	{ .name	    = "close",
1153	  .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
1154	{ .name	    = "connect",
1155	  .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
1156		   [1] = SCA_SOCKADDR_FROM_USER(servaddr),
1157		   [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
1158	{ .name	    = "epoll_ctl",
1159	  .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
1160	{ .name	    = "eventfd2",
1161	  .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
1162	{ .name     = "faccessat",
1163	  .arg = { [0] = { .scnprintf = SCA_FDAT,	  /* dirfd */ },
1164		   [1] = SCA_FILENAME_FROM_USER(pathname),
1165		   [2] = { .scnprintf = SCA_ACCMODE,	  /* mode */ }, }, },
1166	{ .name     = "faccessat2",
1167	  .arg = { [0] = { .scnprintf = SCA_FDAT,	  /* dirfd */ },
1168		   [1] = SCA_FILENAME_FROM_USER(pathname),
1169		   [2] = { .scnprintf = SCA_ACCMODE,	  /* mode */ },
1170		   [3] = { .scnprintf = SCA_FACCESSAT2_FLAGS, /* flags */ }, }, },
1171	{ .name	    = "fchmodat",
1172	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1173	{ .name	    = "fchownat",
1174	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1175	{ .name	    = "fcntl",
1176	  .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD,  /* cmd */
1177			   .strtoul   = STUL_STRARRAYS,
1178			   .parm      = &strarrays__fcntl_cmds_arrays,
1179			   .show_zero = true, },
1180		   [2] = { .scnprintf =  SCA_FCNTL_ARG, /* arg */ }, }, },
1181	{ .name	    = "flock",
1182	  .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
1183	{ .name     = "fsconfig",
1184	  .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, },
1185	{ .name     = "fsmount",
1186	  .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags),
1187		   [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, },
1188	{ .name     = "fspick",
1189	  .arg = { [0] = { .scnprintf = SCA_FDAT,	  /* dfd */ },
1190		   [1] = SCA_FILENAME_FROM_USER(path),
1191		   [2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, },
1192	{ .name	    = "fstat", .alias = "newfstat", },
1193	{ .name	    = "futex",
1194	  .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ },
1195		   [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, },
1196	{ .name	    = "futimesat",
1197	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1198	{ .name	    = "getitimer",
1199	  .arg = { [0] = STRARRAY(which, itimers), }, },
1200	{ .name	    = "getpid",	    .errpid = true, },
1201	{ .name	    = "getpgid",    .errpid = true, },
1202	{ .name	    = "getppid",    .errpid = true, },
1203	{ .name	    = "getrandom",
1204	  .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
1205	{ .name	    = "getrlimit",
1206	  .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1207	{ .name	    = "getsockopt",
1208	  .arg = { [1] = STRARRAY(level, socket_level), }, },
1209	{ .name	    = "gettid",	    .errpid = true, },
1210	{ .name	    = "ioctl",
1211	  .arg = {
 
 
 
 
 
 
 
 
 
1212#if defined(__i386__) || defined(__x86_64__)
1213/*
1214 * FIXME: Make this available to all arches.
1215 */
1216		   [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
1217		   [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
 
1218#else
1219		   [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
1220#endif
1221	{ .name	    = "kcmp",	    .nr_args = 5,
1222	  .arg = { [0] = { .name = "pid1",	.scnprintf = SCA_PID, },
1223		   [1] = { .name = "pid2",	.scnprintf = SCA_PID, },
1224		   [2] = { .name = "type",	.scnprintf = SCA_KCMP_TYPE, },
1225		   [3] = { .name = "idx1",	.scnprintf = SCA_KCMP_IDX, },
1226		   [4] = { .name = "idx2",	.scnprintf = SCA_KCMP_IDX, }, }, },
1227	{ .name	    = "keyctl",
1228	  .arg = { [0] = STRARRAY(option, keyctl_options), }, },
1229	{ .name	    = "kill",
1230	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1231	{ .name	    = "linkat",
1232	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1233	{ .name	    = "lseek",
1234	  .arg = { [2] = STRARRAY(whence, whences), }, },
1235	{ .name	    = "lstat", .alias = "newlstat", },
1236	{ .name     = "madvise",
1237	  .arg = { [0] = { .scnprintf = SCA_HEX,      /* start */ },
1238		   [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
1239	{ .name	    = "mkdirat",
1240	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1241	{ .name	    = "mknodat",
1242	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1243	{ .name	    = "mmap",	    .hexret = true,
1244/* The standard mmap maps to old_mmap on s390x */
1245#if defined(__s390x__)
1246	.alias = "old_mmap",
1247#endif
1248	  .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, .show_zero = true, /* prot */ },
1249		   [3] = { .scnprintf = SCA_MMAP_FLAGS,	/* flags */
1250			   .strtoul   = STUL_STRARRAY_FLAGS,
1251			   .parm      = &strarray__mmap_flags, },
1252		   [5] = { .scnprintf = SCA_HEX,	/* offset */ }, }, },
1253	{ .name	    = "mount",
1254	  .arg = { [0] = SCA_FILENAME_FROM_USER(devname),
1255		   [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
1256			   .mask_val  = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
1257	{ .name	    = "move_mount",
1258	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* from_dfd */ },
1259		   [1] = SCA_FILENAME_FROM_USER(pathname),
1260		   [2] = { .scnprintf = SCA_FDAT,	/* to_dfd */ },
1261		   [3] = SCA_FILENAME_FROM_USER(pathname),
1262		   [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, },
1263	{ .name	    = "mprotect",
1264	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* start */ },
1265		   [2] = { .scnprintf = SCA_MMAP_PROT, .show_zero = true, /* prot */ }, }, },
1266	{ .name	    = "mq_unlink",
1267	  .arg = { [0] = SCA_FILENAME_FROM_USER(u_name), }, },
1268	{ .name	    = "mremap",	    .hexret = true,
1269	  .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, }, },
1270	{ .name	    = "name_to_handle_at",
1271	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1272	{ .name	    = "nanosleep",
1273	  .arg = { [0] = SCA_TIMESPEC_FROM_USER(req), }, },
1274	{ .name	    = "newfstatat", .alias = "fstatat",
1275	  .arg = { [0] = { .scnprintf = SCA_FDAT,	  /* dirfd */ },
1276		   [1] = SCA_FILENAME_FROM_USER(pathname),
1277		   [3] = { .scnprintf = SCA_FS_AT_FLAGS, /* flags */ }, }, },
1278	{ .name	    = "open",
1279	  .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1280	{ .name	    = "open_by_handle_at",
1281	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* dfd */ },
1282		   [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1283	{ .name	    = "openat",
1284	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* dfd */ },
1285		   [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1286	{ .name	    = "perf_event_open",
1287	  .arg = { [0] = SCA_PERF_ATTR_FROM_USER(attr),
1288		   [2] = { .scnprintf = SCA_INT,	/* cpu */ },
1289		   [3] = { .scnprintf = SCA_FD,		/* group_fd */ },
1290		   [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
1291	{ .name	    = "pipe2",
1292	  .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
1293	{ .name	    = "pkey_alloc",
1294	  .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS,	/* access_rights */ }, }, },
1295	{ .name	    = "pkey_free",
1296	  .arg = { [0] = { .scnprintf = SCA_INT,	/* key */ }, }, },
1297	{ .name	    = "pkey_mprotect",
1298	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* start */ },
1299		   [2] = { .scnprintf = SCA_MMAP_PROT, .show_zero = true, /* prot */ },
1300		   [3] = { .scnprintf = SCA_INT,	/* pkey */ }, }, },
1301	{ .name	    = "poll", .timeout = true, },
1302	{ .name	    = "ppoll", .timeout = true, },
1303	{ .name	    = "prctl",
1304	  .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */
1305			   .strtoul   = STUL_STRARRAY,
1306			   .parm      = &strarray__prctl_options, },
1307		   [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
1308		   [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
1309	{ .name	    = "pread", .alias = "pread64", },
1310	{ .name	    = "preadv", .alias = "pread", },
1311	{ .name	    = "prlimit64",
1312	  .arg = { [1] = STRARRAY(resource, rlimit_resources),
1313		   [2] = { .from_user = true /* new_rlim */, }, }, },
1314	{ .name	    = "pwrite", .alias = "pwrite64", },
1315	{ .name	    = "readlinkat",
1316	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1317	{ .name	    = "recvfrom",
1318	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1319	{ .name	    = "recvmmsg",
1320	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1321	{ .name	    = "recvmsg",
1322	  .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1323	{ .name	    = "renameat",
1324	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
1325		   [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, }, },
1326	{ .name	    = "renameat2",
1327	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
1328		   [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ },
1329		   [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, },
1330	{ .name	    = "rseq",	    .errpid = true,
1331	  .arg = { [0] = { .from_user = true /* rseq */, }, }, },
1332	{ .name	    = "rt_sigaction",
1333	  .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1334	{ .name	    = "rt_sigprocmask",
1335	  .arg = { [0] = STRARRAY(how, sighow), }, },
1336	{ .name	    = "rt_sigqueueinfo",
1337	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1338	{ .name	    = "rt_tgsigqueueinfo",
1339	  .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1340	{ .name	    = "sched_setscheduler",
1341	  .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
1342	{ .name	    = "seccomp",
1343	  .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP,	   /* op */ },
1344		   [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
1345	{ .name	    = "select", .timeout = true, },
1346	{ .name	    = "sendfile", .alias = "sendfile64", },
1347	{ .name	    = "sendmmsg",
1348	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1349	{ .name	    = "sendmsg",
1350	  .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1351	{ .name	    = "sendto",
1352	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ },
1353		   [4] = SCA_SOCKADDR_FROM_USER(addr), }, },
1354	{ .name	    = "set_robust_list",	    .errpid = true,
1355	  .arg = { [0] = { .from_user = true /* head */, }, }, },
1356	{ .name	    = "set_tid_address", .errpid = true, },
1357	{ .name	    = "setitimer",
1358	  .arg = { [0] = STRARRAY(which, itimers), }, },
1359	{ .name	    = "setrlimit",
1360	  .arg = { [0] = STRARRAY(resource, rlimit_resources),
1361		   [1] = { .from_user = true /* rlim */, }, }, },
1362	{ .name	    = "setsockopt",
1363	  .arg = { [1] = STRARRAY(level, socket_level), }, },
1364	{ .name	    = "socket",
1365	  .arg = { [0] = STRARRAY(family, socket_families),
1366		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
1367		   [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
1368	{ .name	    = "socketpair",
1369	  .arg = { [0] = STRARRAY(family, socket_families),
1370		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
1371		   [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
1372	{ .name	    = "stat", .alias = "newstat", },
1373	{ .name	    = "statx",
1374	  .arg = { [0] = { .scnprintf = SCA_FDAT,	 /* fdat */ },
1375		   [2] = { .scnprintf = SCA_FS_AT_FLAGS, /* flags */ } ,
1376		   [3] = { .scnprintf = SCA_STATX_MASK,	 /* mask */ }, }, },
1377	{ .name	    = "swapoff",
1378	  .arg = { [0] = SCA_FILENAME_FROM_USER(specialfile), }, },
1379	{ .name	    = "swapon",
1380	  .arg = { [0] = SCA_FILENAME_FROM_USER(specialfile), }, },
1381	{ .name	    = "symlinkat",
1382	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1383	{ .name	    = "sync_file_range",
1384	  .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, /* flags */ }, }, },
1385	{ .name	    = "tgkill",
1386	  .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1387	{ .name	    = "tkill",
1388	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1389	{ .name     = "umount2", .alias = "umount",
1390	  .arg = { [0] = SCA_FILENAME_FROM_USER(name), }, },
1391	{ .name	    = "uname", .alias = "newuname", },
1392	{ .name	    = "unlinkat",
1393	  .arg = { [0] = { .scnprintf = SCA_FDAT,	  /* dfd */ },
1394		   [1] = SCA_FILENAME_FROM_USER(pathname),
1395		   [2] = { .scnprintf = SCA_FS_AT_FLAGS,  /* flags */ }, }, },
1396	{ .name	    = "utimensat",
1397	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
1398	{ .name	    = "wait4",	    .errpid = true,
1399	  .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
1400	{ .name	    = "waitid",	    .errpid = true,
1401	  .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
1402	{ .name	    = "write",
1403	  .arg = { [1] = { .scnprintf = SCA_BUF /* buf */, .from_user = true, }, }, },
1404};
1405
1406static int syscall_fmt__cmp(const void *name, const void *fmtp)
1407{
1408	const struct syscall_fmt *fmt = fmtp;
1409	return strcmp(name, fmt->name);
1410}
1411
1412static const struct syscall_fmt *__syscall_fmt__find(const struct syscall_fmt *fmts,
1413						     const int nmemb,
1414						     const char *name)
1415{
1416	return bsearch(name, fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1417}
1418
1419static const struct syscall_fmt *syscall_fmt__find(const char *name)
1420{
1421	const int nmemb = ARRAY_SIZE(syscall_fmts);
1422	return __syscall_fmt__find(syscall_fmts, nmemb, name);
1423}
1424
1425static const struct syscall_fmt *__syscall_fmt__find_by_alias(const struct syscall_fmt *fmts,
1426							      const int nmemb, const char *alias)
1427{
1428	int i;
1429
1430	for (i = 0; i < nmemb; ++i) {
1431		if (fmts[i].alias && strcmp(fmts[i].alias, alias) == 0)
1432			return &fmts[i];
1433	}
1434
1435	return NULL;
1436}
1437
1438static const struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
1439{
1440	const int nmemb = ARRAY_SIZE(syscall_fmts);
1441	return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias);
1442}
1443
1444/*
1445 * is_exit: is this "exit" or "exit_group"?
1446 * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
1447 * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
1448 * nonexistent: Just a hole in the syscall table, syscall id not allocated
1449 */
1450struct syscall {
1451	struct tep_event    *tp_format;
1452	int		    nr_args;
1453	int		    args_size;
1454	struct {
1455		struct bpf_program *sys_enter,
1456				   *sys_exit;
1457	}		    bpf_prog;
1458	bool		    is_exit;
1459	bool		    is_open;
1460	bool		    nonexistent;
1461	bool		    use_btf;
1462	struct tep_format_field *args;
1463	const char	    *name;
1464	const struct syscall_fmt  *fmt;
1465	struct syscall_arg_fmt *arg_fmt;
1466};
1467
1468/*
1469 * We need to have this 'calculated' boolean because in some cases we really
1470 * don't know what is the duration of a syscall, for instance, when we start
1471 * a session and some threads are waiting for a syscall to finish, say 'poll',
1472 * in which case all we can do is to print "( ? ) for duration and for the
1473 * start timestamp.
1474 */
1475static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
1476{
1477	double duration = (double)t / NSEC_PER_MSEC;
1478	size_t printed = fprintf(fp, "(");
1479
1480	if (!calculated)
1481		printed += fprintf(fp, "         ");
1482	else if (duration >= 1.0)
1483		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1484	else if (duration >= 0.01)
1485		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1486	else
1487		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1488	return printed + fprintf(fp, "): ");
1489}
1490
1491/**
1492 * filename.ptr: The filename char pointer that will be vfs_getname'd
1493 * filename.entry_str_pos: Where to insert the string translated from
1494 *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1495 * ret_scnprintf: syscall args may set this to a different syscall return
1496 *                formatter, for instance, fcntl may return fds, file flags, etc.
1497 */
1498struct thread_trace {
1499	u64		  entry_time;
 
1500	bool		  entry_pending;
1501	unsigned long	  nr_events;
1502	unsigned long	  pfmaj, pfmin;
1503	char		  *entry_str;
1504	double		  runtime_ms;
1505	size_t		  (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1506        struct {
1507		unsigned long ptr;
1508		short int     entry_str_pos;
1509		bool	      pending_open;
1510		unsigned int  namelen;
1511		char	      *name;
1512	} filename;
1513	struct {
1514		int	      max;
1515		struct file   *table;
1516	} files;
1517
1518	struct intlist *syscall_stats;
1519};
1520
1521static struct thread_trace *thread_trace__new(void)
1522{
1523	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1524
1525	if (ttrace) {
1526		ttrace->files.max = -1;
1527		ttrace->syscall_stats = intlist__new(NULL);
1528	}
1529
1530	return ttrace;
1531}
1532
1533static void thread_trace__free_files(struct thread_trace *ttrace);
1534
1535static void thread_trace__delete(void *pttrace)
1536{
1537	struct thread_trace *ttrace = pttrace;
1538
1539	if (!ttrace)
1540		return;
1541
1542	intlist__delete(ttrace->syscall_stats);
1543	ttrace->syscall_stats = NULL;
1544	thread_trace__free_files(ttrace);
1545	zfree(&ttrace->entry_str);
1546	free(ttrace);
1547}
1548
1549static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1550{
1551	struct thread_trace *ttrace;
1552
1553	if (thread == NULL)
1554		goto fail;
1555
1556	if (thread__priv(thread) == NULL)
1557		thread__set_priv(thread, thread_trace__new());
1558
1559	if (thread__priv(thread) == NULL)
1560		goto fail;
1561
1562	ttrace = thread__priv(thread);
1563	++ttrace->nr_events;
1564
1565	return ttrace;
1566fail:
1567	color_fprintf(fp, PERF_COLOR_RED,
1568		      "WARNING: not enough memory, dropping samples!\n");
1569	return NULL;
1570}
1571
1572
1573void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
1574				    size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
1575{
1576	struct thread_trace *ttrace = thread__priv(arg->thread);
1577
1578	ttrace->ret_scnprintf = ret_scnprintf;
1579}
1580
1581#define TRACE_PFMAJ		(1 << 0)
1582#define TRACE_PFMIN		(1 << 1)
1583
1584static const size_t trace__entry_str_size = 2048;
1585
1586static void thread_trace__free_files(struct thread_trace *ttrace)
1587{
1588	for (int i = 0; i < ttrace->files.max; ++i) {
1589		struct file *file = ttrace->files.table + i;
1590		zfree(&file->pathname);
1591	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1592
1593	zfree(&ttrace->files.table);
1594	ttrace->files.max  = -1;
1595}
1596
1597static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
1598{
1599	if (fd < 0)
1600		return NULL;
1601
1602	if (fd > ttrace->files.max) {
1603		struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
1604
1605		if (nfiles == NULL)
1606			return NULL;
1607
1608		if (ttrace->files.max != -1) {
1609			memset(nfiles + ttrace->files.max + 1, 0,
1610			       (fd - ttrace->files.max) * sizeof(struct file));
1611		} else {
1612			memset(nfiles, 0, (fd + 1) * sizeof(struct file));
1613		}
1614
1615		ttrace->files.table = nfiles;
1616		ttrace->files.max   = fd;
1617	}
1618
1619	return ttrace->files.table + fd;
1620}
1621
1622struct file *thread__files_entry(struct thread *thread, int fd)
1623{
1624	return thread_trace__files_entry(thread__priv(thread), fd);
1625}
1626
1627static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1628{
1629	struct thread_trace *ttrace = thread__priv(thread);
1630	struct file *file = thread_trace__files_entry(ttrace, fd);
1631
1632	if (file != NULL) {
1633		struct stat st;
1634		if (stat(pathname, &st) == 0)
1635			file->dev_maj = major(st.st_rdev);
1636		file->pathname = strdup(pathname);
1637		if (file->pathname)
1638			return 0;
1639	}
1640
1641	return -1;
1642}
1643
1644static int thread__read_fd_path(struct thread *thread, int fd)
1645{
1646	char linkname[PATH_MAX], pathname[PATH_MAX];
1647	struct stat st;
1648	int ret;
1649
1650	if (thread__pid(thread) == thread__tid(thread)) {
1651		scnprintf(linkname, sizeof(linkname),
1652			  "/proc/%d/fd/%d", thread__pid(thread), fd);
1653	} else {
1654		scnprintf(linkname, sizeof(linkname),
1655			  "/proc/%d/task/%d/fd/%d",
1656			  thread__pid(thread), thread__tid(thread), fd);
1657	}
1658
1659	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1660		return -1;
1661
1662	ret = readlink(linkname, pathname, sizeof(pathname));
1663
1664	if (ret < 0 || ret > st.st_size)
1665		return -1;
1666
1667	pathname[ret] = '\0';
1668	return trace__set_fd_pathname(thread, fd, pathname);
1669}
1670
1671static const char *thread__fd_path(struct thread *thread, int fd,
1672				   struct trace *trace)
1673{
1674	struct thread_trace *ttrace = thread__priv(thread);
1675
1676	if (ttrace == NULL || trace->fd_path_disabled)
1677		return NULL;
1678
1679	if (fd < 0)
1680		return NULL;
1681
1682	if ((fd > ttrace->files.max || ttrace->files.table[fd].pathname == NULL)) {
1683		if (!trace->live)
1684			return NULL;
1685		++trace->stats.proc_getname;
1686		if (thread__read_fd_path(thread, fd))
1687			return NULL;
1688	}
1689
1690	return ttrace->files.table[fd].pathname;
1691}
1692
1693size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
 
1694{
1695	int fd = arg->val;
1696	size_t printed = scnprintf(bf, size, "%d", fd);
1697	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1698
1699	if (path)
1700		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1701
1702	return printed;
1703}
1704
1705size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1706{
1707        size_t printed = scnprintf(bf, size, "%d", fd);
1708	struct thread *thread = machine__find_thread(trace->host, pid, pid);
1709
1710	if (thread) {
1711		const char *path = thread__fd_path(thread, fd, trace);
1712
1713		if (path)
1714			printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1715
1716		thread__put(thread);
1717	}
1718
1719        return printed;
1720}
1721
1722static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1723					      struct syscall_arg *arg)
1724{
1725	int fd = arg->val;
1726	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1727	struct thread_trace *ttrace = thread__priv(arg->thread);
1728
1729	if (ttrace && fd >= 0 && fd <= ttrace->files.max)
1730		zfree(&ttrace->files.table[fd].pathname);
1731
1732	return printed;
1733}
1734
1735static void thread__set_filename_pos(struct thread *thread, const char *bf,
1736				     unsigned long ptr)
1737{
1738	struct thread_trace *ttrace = thread__priv(thread);
1739
1740	ttrace->filename.ptr = ptr;
1741	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1742}
1743
1744static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
1745{
1746	struct augmented_arg *augmented_arg = arg->augmented.args;
1747	size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value);
1748	/*
1749	 * So that the next arg with a payload can consume its augmented arg, i.e. for rename* syscalls
1750	 * we would have two strings, each prefixed by its size.
1751	 */
1752	int consumed = sizeof(*augmented_arg) + augmented_arg->size;
1753
1754	arg->augmented.args = ((void *)arg->augmented.args) + consumed;
1755	arg->augmented.size -= consumed;
1756
1757	return printed;
1758}
1759
1760static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1761					      struct syscall_arg *arg)
1762{
1763	unsigned long ptr = arg->val;
1764
1765	if (arg->augmented.args)
1766		return syscall_arg__scnprintf_augmented_string(arg, bf, size);
1767
1768	if (!arg->trace->vfs_getname)
1769		return scnprintf(bf, size, "%#x", ptr);
1770
1771	thread__set_filename_pos(arg->thread, bf, ptr);
1772	return 0;
1773}
1774
1775#define MAX_CONTROL_CHAR 31
1776#define MAX_ASCII 127
1777
1778static size_t syscall_arg__scnprintf_buf(char *bf, size_t size, struct syscall_arg *arg)
1779{
1780	struct augmented_arg *augmented_arg = arg->augmented.args;
1781	unsigned char *orig = (unsigned char *)augmented_arg->value;
1782	size_t printed = 0;
1783	int consumed;
1784
1785	if (augmented_arg == NULL)
1786		return 0;
1787
1788	for (int j = 0; j < augmented_arg->size; ++j) {
1789		bool control_char = orig[j] <= MAX_CONTROL_CHAR || orig[j] >= MAX_ASCII;
1790		/* print control characters (0~31 and 127), and non-ascii characters in \(digits) */
1791		printed += scnprintf(bf + printed, size - printed, control_char ? "\\%d" : "%c", (int)orig[j]);
1792	}
1793
1794	consumed = sizeof(*augmented_arg) + augmented_arg->size;
1795	arg->augmented.args = ((void *)arg->augmented.args) + consumed;
1796	arg->augmented.size -= consumed;
1797
1798	return printed;
1799}
1800
1801static bool trace__filter_duration(struct trace *trace, double t)
1802{
1803	return t < (trace->duration_filter * NSEC_PER_MSEC);
1804}
1805
1806static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1807{
1808	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1809
1810	return fprintf(fp, "%10.3f ", ts);
1811}
1812
1813/*
1814 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1815 * using ttrace->entry_time for a thread that receives a sys_exit without
1816 * first having received a sys_enter ("poll" issued before tracing session
1817 * starts, lost sys_enter exit due to ring buffer overflow).
1818 */
1819static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1820{
1821	if (tstamp > 0)
1822		return __trace__fprintf_tstamp(trace, tstamp, fp);
1823
1824	return fprintf(fp, "         ? ");
1825}
1826
1827static pid_t workload_pid = -1;
1828static volatile sig_atomic_t done = false;
1829static volatile sig_atomic_t interrupted = false;
1830
1831static void sighandler_interrupt(int sig __maybe_unused)
1832{
1833	done = interrupted = true;
 
1834}
1835
1836static void sighandler_chld(int sig __maybe_unused, siginfo_t *info,
1837			    void *context __maybe_unused)
1838{
1839	if (info->si_pid == workload_pid)
1840		done = true;
1841}
1842
1843static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
1844{
1845	size_t printed = 0;
1846
1847	if (trace->multiple_threads) {
1848		if (trace->show_comm)
1849			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1850		printed += fprintf(fp, "%d ", thread__tid(thread));
1851	}
1852
1853	return printed;
1854}
1855
1856static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1857					u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1858{
1859	size_t printed = 0;
1860
1861	if (trace->show_tstamp)
1862		printed = trace__fprintf_tstamp(trace, tstamp, fp);
1863	if (trace->show_duration)
1864		printed += fprintf_duration(duration, duration_calculated, fp);
1865	return printed + trace__fprintf_comm_tid(trace, thread, fp);
1866}
1867
1868static int trace__process_event(struct trace *trace, struct machine *machine,
1869				union perf_event *event, struct perf_sample *sample)
1870{
1871	int ret = 0;
1872
1873	switch (event->header.type) {
1874	case PERF_RECORD_LOST:
1875		color_fprintf(trace->output, PERF_COLOR_RED,
1876			      "LOST %" PRIu64 " events!\n", (u64)event->lost.lost);
1877		ret = machine__process_lost_event(machine, event, sample);
1878		break;
1879	default:
1880		ret = machine__process_event(machine, event, sample);
1881		break;
1882	}
1883
1884	return ret;
1885}
1886
1887static int trace__tool_process(const struct perf_tool *tool,
1888			       union perf_event *event,
1889			       struct perf_sample *sample,
1890			       struct machine *machine)
1891{
1892	struct trace *trace = container_of(tool, struct trace, tool);
1893	return trace__process_event(trace, machine, event, sample);
1894}
1895
1896static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1897{
1898	struct machine *machine = vmachine;
1899
1900	if (machine->kptr_restrict_warned)
1901		return NULL;
1902
1903	if (symbol_conf.kptr_restrict) {
1904		pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1905			   "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1906			   "Kernel samples will not be resolved.\n");
1907		machine->kptr_restrict_warned = true;
1908		return NULL;
1909	}
1910
1911	return machine__resolve_kernel_addr(vmachine, addrp, modp);
1912}
1913
1914static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
1915{
1916	int err = symbol__init(NULL);
1917
1918	if (err)
1919		return err;
1920
1921	trace->host = machine__new_host();
1922	if (trace->host == NULL)
1923		return -ENOMEM;
1924
1925	thread__set_priv_destructor(thread_trace__delete);
1926
1927	err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1928	if (err < 0)
1929		goto out;
1930
1931	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1932					    evlist->core.threads, trace__tool_process,
1933					    true, false, 1);
1934out:
1935	if (err)
1936		symbol__exit();
1937
1938	return err;
1939}
1940
1941static void trace__symbols__exit(struct trace *trace)
1942{
1943	machine__exit(trace->host);
1944	trace->host = NULL;
1945
1946	symbol__exit();
1947}
1948
1949static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
1950{
1951	int idx;
1952
1953	if (nr_args == RAW_SYSCALL_ARGS_NUM && sc->fmt && sc->fmt->nr_args != 0)
1954		nr_args = sc->fmt->nr_args;
1955
1956	sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
1957	if (sc->arg_fmt == NULL)
1958		return -1;
1959
1960	for (idx = 0; idx < nr_args; ++idx) {
1961		if (sc->fmt)
1962			sc->arg_fmt[idx] = sc->fmt->arg[idx];
1963	}
1964
1965	sc->nr_args = nr_args;
1966	return 0;
1967}
1968
1969static const struct syscall_arg_fmt syscall_arg_fmts__by_name[] = {
1970	{ .name = "msr",	.scnprintf = SCA_X86_MSR,	  .strtoul = STUL_X86_MSR,	   },
1971	{ .name = "vector",	.scnprintf = SCA_X86_IRQ_VECTORS, .strtoul = STUL_X86_IRQ_VECTORS, },
1972};
1973
1974static int syscall_arg_fmt__cmp(const void *name, const void *fmtp)
1975{
1976       const struct syscall_arg_fmt *fmt = fmtp;
1977       return strcmp(name, fmt->name);
1978}
1979
1980static const struct syscall_arg_fmt *
1981__syscall_arg_fmt__find_by_name(const struct syscall_arg_fmt *fmts, const int nmemb,
1982				const char *name)
1983{
1984       return bsearch(name, fmts, nmemb, sizeof(struct syscall_arg_fmt), syscall_arg_fmt__cmp);
1985}
1986
1987static const struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name)
1988{
1989       const int nmemb = ARRAY_SIZE(syscall_arg_fmts__by_name);
1990       return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
1991}
1992
1993static struct tep_format_field *
1994syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field,
1995			    bool *use_btf)
1996{
1997	struct tep_format_field *last_field = NULL;
1998	int len;
1999
2000	for (; field; field = field->next, ++arg) {
2001		last_field = field;
2002
2003		if (arg->scnprintf)
2004			continue;
2005
2006		len = strlen(field->name);
2007
2008		// As far as heuristics (or intention) goes this seems to hold true, and makes sense!
2009		if ((field->flags & TEP_FIELD_IS_POINTER) && strstarts(field->type, "const "))
2010			arg->from_user = true;
2011
2012		if (strcmp(field->type, "const char *") == 0 &&
2013		    ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
2014		     strstr(field->name, "path") != NULL)) {
2015			arg->scnprintf = SCA_FILENAME;
2016		} else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
2017			arg->scnprintf = SCA_PTR;
2018		else if (strcmp(field->type, "pid_t") == 0)
2019			arg->scnprintf = SCA_PID;
2020		else if (strcmp(field->type, "umode_t") == 0)
2021			arg->scnprintf = SCA_MODE_T;
2022		else if ((field->flags & TEP_FIELD_IS_ARRAY) && strstr(field->type, "char")) {
2023			arg->scnprintf = SCA_CHAR_ARRAY;
2024			arg->nr_entries = field->arraylen;
2025		} else if ((strcmp(field->type, "int") == 0 ||
2026			  strcmp(field->type, "unsigned int") == 0 ||
2027			  strcmp(field->type, "long") == 0) &&
2028			 len >= 2 && strcmp(field->name + len - 2, "fd") == 0) {
2029			/*
2030			 * /sys/kernel/tracing/events/syscalls/sys_enter*
2031			 * grep -E 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
2032			 * 65 int
2033			 * 23 unsigned int
2034			 * 7 unsigned long
2035			 */
2036			arg->scnprintf = SCA_FD;
2037		} else if (strstr(field->type, "enum") && use_btf != NULL) {
2038			*use_btf = true;
2039			arg->strtoul = STUL_BTF_TYPE;
2040		} else {
2041			const struct syscall_arg_fmt *fmt =
2042				syscall_arg_fmt__find_by_name(field->name);
2043
2044			if (fmt) {
2045				arg->scnprintf = fmt->scnprintf;
2046				arg->strtoul   = fmt->strtoul;
2047			}
2048		}
2049	}
2050
2051	return last_field;
2052}
2053
2054static int syscall__set_arg_fmts(struct syscall *sc)
2055{
2056	struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args,
2057									  &sc->use_btf);
2058
2059	if (last_field)
2060		sc->args_size = last_field->offset + last_field->size;
2061
2062	return 0;
2063}
2064
2065static int trace__read_syscall_info(struct trace *trace, int id)
2066{
2067	char tp_name[128];
2068	struct syscall *sc;
2069	const char *name = syscalltbl__name(trace->sctbl, id);
2070	int err;
2071
2072#ifdef HAVE_SYSCALL_TABLE_SUPPORT
2073	if (trace->syscalls.table == NULL) {
2074		trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
2075		if (trace->syscalls.table == NULL)
2076			return -ENOMEM;
2077	}
2078#else
2079	if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) {
2080		// When using libaudit we don't know beforehand what is the max syscall id
2081		struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
2082
2083		if (table == NULL)
2084			return -ENOMEM;
2085
2086		// Need to memset from offset 0 and +1 members if brand new
2087		if (trace->syscalls.table == NULL)
2088			memset(table, 0, (id + 1) * sizeof(*sc));
2089		else
2090			memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc));
2091
2092		trace->syscalls.table	      = table;
2093		trace->sctbl->syscalls.max_id = id;
2094	}
2095#endif
2096	sc = trace->syscalls.table + id;
2097	if (sc->nonexistent)
2098		return -EEXIST;
2099
2100	if (name == NULL) {
2101		sc->nonexistent = true;
2102		return -EEXIST;
2103	}
2104
2105	sc->name = name;
2106	sc->fmt  = syscall_fmt__find(sc->name);
2107
2108	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
2109	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
2110
2111	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
2112		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
2113		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
2114	}
2115
2116	/*
2117	 * Fails to read trace point format via sysfs node, so the trace point
2118	 * doesn't exist.  Set the 'nonexistent' flag as true.
2119	 */
2120	if (IS_ERR(sc->tp_format)) {
2121		sc->nonexistent = true;
2122		return PTR_ERR(sc->tp_format);
2123	}
2124
2125	/*
2126	 * The tracepoint format contains __syscall_nr field, so it's one more
2127	 * than the actual number of syscall arguments.
2128	 */
2129	if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ?
2130					RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields - 1))
2131		return -ENOMEM;
2132
2133	sc->args = sc->tp_format->format.fields;
 
2134	/*
2135	 * We need to check and discard the first variable '__syscall_nr'
2136	 * or 'nr' that mean the syscall number. It is needless here.
2137	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
2138	 */
2139	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
2140		sc->args = sc->args->next;
2141		--sc->nr_args;
2142	}
2143
2144	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
2145	sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
2146
2147	err = syscall__set_arg_fmts(sc);
2148
2149	/* after calling syscall__set_arg_fmts() we'll know whether use_btf is true */
2150	if (sc->use_btf)
2151		trace__load_vmlinux_btf(trace);
2152
2153	return err;
2154}
2155
2156static int evsel__init_tp_arg_scnprintf(struct evsel *evsel, bool *use_btf)
2157{
2158	struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
2159
2160	if (fmt != NULL) {
2161		syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields, use_btf);
2162		return 0;
2163	}
2164
2165	return -ENOMEM;
2166}
2167
2168static int intcmp(const void *a, const void *b)
2169{
2170	const int *one = a, *another = b;
2171
2172	return *one - *another;
2173}
2174
2175static int trace__validate_ev_qualifier(struct trace *trace)
2176{
2177	int err = 0;
2178	bool printed_invalid_prefix = false;
2179	struct str_node *pos;
2180	size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier);
2181
2182	trace->ev_qualifier_ids.entries = malloc(nr_allocated *
 
2183						 sizeof(trace->ev_qualifier_ids.entries[0]));
2184
2185	if (trace->ev_qualifier_ids.entries == NULL) {
2186		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
2187		       trace->output);
2188		err = -EINVAL;
2189		goto out;
2190	}
2191
2192	strlist__for_each_entry(pos, trace->ev_qualifier) {
 
 
2193		const char *sc = pos->s;
2194		int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
2195
2196		if (id < 0) {
2197			id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
2198			if (id >= 0)
2199				goto matches;
2200
2201			if (!printed_invalid_prefix) {
2202				pr_debug("Skipping unknown syscalls: ");
2203				printed_invalid_prefix = true;
2204			} else {
2205				pr_debug(", ");
2206			}
2207
2208			pr_debug("%s", sc);
2209			continue;
2210		}
2211matches:
2212		trace->ev_qualifier_ids.entries[nr_used++] = id;
2213		if (match_next == -1)
2214			continue;
2215
2216		while (1) {
2217			id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
2218			if (id < 0)
2219				break;
2220			if (nr_allocated == nr_used) {
2221				void *entries;
2222
2223				nr_allocated += 8;
2224				entries = realloc(trace->ev_qualifier_ids.entries,
2225						  nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
2226				if (entries == NULL) {
2227					err = -ENOMEM;
2228					fputs("\nError:\t Not enough memory for parsing\n", trace->output);
2229					goto out_free;
2230				}
2231				trace->ev_qualifier_ids.entries = entries;
2232			}
2233			trace->ev_qualifier_ids.entries[nr_used++] = id;
2234		}
 
 
2235	}
2236
2237	trace->ev_qualifier_ids.nr = nr_used;
2238	qsort(trace->ev_qualifier_ids.entries, nr_used, sizeof(int), intcmp);
 
 
 
 
2239out:
2240	if (printed_invalid_prefix)
2241		pr_debug("\n");
2242	return err;
2243out_free:
2244	zfree(&trace->ev_qualifier_ids.entries);
2245	trace->ev_qualifier_ids.nr = 0;
2246	goto out;
2247}
2248
2249static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id)
2250{
2251	bool in_ev_qualifier;
2252
2253	if (trace->ev_qualifier_ids.nr == 0)
2254		return true;
2255
2256	in_ev_qualifier = bsearch(&id, trace->ev_qualifier_ids.entries,
2257				  trace->ev_qualifier_ids.nr, sizeof(int), intcmp) != NULL;
2258
2259	if (in_ev_qualifier)
2260	       return !trace->not_ev_qualifier;
2261
2262	return trace->not_ev_qualifier;
2263}
2264
2265/*
2266 * args is to be interpreted as a series of longs but we need to handle
2267 * 8-byte unaligned accesses. args points to raw_data within the event
2268 * and raw_data is guaranteed to be 8-byte unaligned because it is
2269 * preceded by raw_size which is a u32. So we need to copy args to a temp
2270 * variable to read it. Most notably this avoids extended load instructions
2271 * on unaligned addresses
2272 */
2273unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
2274{
2275	unsigned long val;
2276	unsigned char *p = arg->args + sizeof(unsigned long) * idx;
2277
2278	memcpy(&val, p, sizeof(val));
2279	return val;
2280}
2281
2282static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
2283				      struct syscall_arg *arg)
2284{
2285	if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
2286		return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
2287
2288	return scnprintf(bf, size, "arg%d: ", arg->idx);
2289}
2290
2291/*
2292 * Check if the value is in fact zero, i.e. mask whatever needs masking, such
2293 * as mount 'flags' argument that needs ignoring some magic flag, see comment
2294 * in tools/perf/trace/beauty/mount_flags.c
2295 */
2296static unsigned long syscall_arg_fmt__mask_val(struct syscall_arg_fmt *fmt, struct syscall_arg *arg, unsigned long val)
2297{
2298	if (fmt && fmt->mask_val)
2299		return fmt->mask_val(arg, val);
2300
2301	return val;
2302}
2303
2304static size_t syscall_arg_fmt__scnprintf_val(struct syscall_arg_fmt *fmt, char *bf, size_t size,
2305					     struct syscall_arg *arg, unsigned long val)
2306{
2307	if (fmt && fmt->scnprintf) {
2308		arg->val = val;
2309		if (fmt->parm)
2310			arg->parm = fmt->parm;
2311		return fmt->scnprintf(bf, size, arg);
2312	}
2313	return scnprintf(bf, size, "%ld", val);
2314}
2315
2316static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
2317				      unsigned char *args, void *augmented_args, int augmented_args_size,
2318				      struct trace *trace, struct thread *thread)
2319{
2320	size_t printed = 0, btf_printed;
 
2321	unsigned long val;
2322	u8 bit = 1;
2323	struct syscall_arg arg = {
2324		.args	= args,
2325		.augmented = {
2326			.size = augmented_args_size,
2327			.args = augmented_args,
2328		},
2329		.idx	= 0,
2330		.mask	= 0,
2331		.trace  = trace,
2332		.thread = thread,
2333		.show_string_prefix = trace->show_string_prefix,
2334	};
2335	struct thread_trace *ttrace = thread__priv(thread);
2336	void *default_scnprintf;
2337
2338	/*
2339	 * Things like fcntl will set this in its 'cmd' formatter to pick the
2340	 * right formatter for the return value (an fd? file flags?), which is
2341	 * not needed for syscalls that always return a given type, say an fd.
2342	 */
2343	ttrace->ret_scnprintf = NULL;
2344
2345	if (sc->args != NULL) {
2346		struct tep_format_field *field;
 
 
 
 
 
 
 
2347
2348		for (field = sc->args; field;
2349		     field = field->next, ++arg.idx, bit <<= 1) {
2350			if (arg.mask & bit)
2351				continue;
2352
2353			arg.fmt = &sc->arg_fmt[arg.idx];
2354			val = syscall_arg__val(&arg, arg.idx);
2355			/*
2356			 * Some syscall args need some mask, most don't and
2357			 * return val untouched.
2358			 */
2359			val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val);
2360
2361			/*
2362			 * Suppress this argument if its value is zero and show_zero
2363			 * property isn't set.
2364			 *
2365			 * If it has a BTF type, then override the zero suppression knob
2366			 * as the common case is for zero in an enum to have an associated entry.
2367			 */
2368			if (val == 0 && !trace->show_zeros &&
2369			    !(sc->arg_fmt && sc->arg_fmt[arg.idx].show_zero) &&
2370			    !(sc->arg_fmt && sc->arg_fmt[arg.idx].strtoul == STUL_BTF_TYPE))
2371				continue;
2372
2373			printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2374
2375			if (trace->show_arg_names)
2376				printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
2377
2378			default_scnprintf = sc->arg_fmt[arg.idx].scnprintf;
2379
2380			if (trace->force_btf || default_scnprintf == NULL || default_scnprintf == SCA_PTR) {
2381				btf_printed = trace__btf_scnprintf(trace, &arg, bf + printed,
2382								   size - printed, val, field->type);
2383				if (btf_printed) {
2384					printed += btf_printed;
2385					continue;
2386				}
2387			}
 
 
 
2388
2389			printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx],
2390								  bf + printed, size - printed, &arg, val);
2391		}
2392	} else if (IS_ERR(sc->tp_format)) {
2393		/*
2394		 * If we managed to read the tracepoint /format file, then we
2395		 * may end up not having any args, like with gettid(), so only
2396		 * print the raw args when we didn't manage to read it.
2397		 */
2398		while (arg.idx < sc->nr_args) {
2399			if (arg.mask & bit)
2400				goto next_arg;
2401			val = syscall_arg__val(&arg, arg.idx);
2402			if (printed)
2403				printed += scnprintf(bf + printed, size - printed, ", ");
2404			printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
2405			printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], bf + printed, size - printed, &arg, val);
2406next_arg:
2407			++arg.idx;
2408			bit <<= 1;
2409		}
2410	}
2411
2412	return printed;
2413}
2414
2415typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel,
2416				  union perf_event *event,
2417				  struct perf_sample *sample);
2418
2419static struct syscall *trace__syscall_info(struct trace *trace,
2420					   struct evsel *evsel, int id)
2421{
2422	int err = 0;
2423
2424	if (id < 0) {
2425
2426		/*
2427		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
2428		 * before that, leaving at a higher verbosity level till that is
2429		 * explained. Reproduced with plain ftrace with:
2430		 *
2431		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
2432		 * grep "NR -1 " /t/trace_pipe
2433		 *
2434		 * After generating some load on the machine.
2435 		 */
2436		if (verbose > 1) {
2437			static u64 n;
2438			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
2439				id, evsel__name(evsel), ++n);
2440		}
2441		return NULL;
2442	}
2443
2444	err = -EINVAL;
2445
2446#ifdef HAVE_SYSCALL_TABLE_SUPPORT
2447	if (id > trace->sctbl->syscalls.max_id) {
2448#else
2449	if (id >= trace->sctbl->syscalls.max_id) {
2450		/*
2451		 * With libaudit we don't know beforehand what is the max_id,
2452		 * so we let trace__read_syscall_info() figure that out as we
2453		 * go on reading syscalls.
2454		 */
2455		err = trace__read_syscall_info(trace, id);
2456		if (err)
2457#endif
2458		goto out_cant_read;
2459	}
2460
2461	if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
2462	    (err = trace__read_syscall_info(trace, id)) != 0)
2463		goto out_cant_read;
2464
2465	if (trace->syscalls.table && trace->syscalls.table[id].nonexistent)
2466		goto out_cant_read;
2467
2468	return &trace->syscalls.table[id];
2469
2470out_cant_read:
2471	if (verbose > 0) {
2472		char sbuf[STRERR_BUFSIZE];
2473		fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf)));
2474		if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL)
2475			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
2476		fputs(" information\n", trace->output);
2477	}
2478	return NULL;
2479}
2480
2481struct syscall_stats {
2482	struct stats stats;
2483	u64	     nr_failures;
2484	int	     max_errno;
2485	u32	     *errnos;
2486};
2487
2488static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace,
2489				 int id, struct perf_sample *sample, long err, bool errno_summary)
2490{
2491	struct int_node *inode;
2492	struct syscall_stats *stats;
2493	u64 duration = 0;
2494
2495	inode = intlist__findnew(ttrace->syscall_stats, id);
2496	if (inode == NULL)
2497		return;
2498
2499	stats = inode->priv;
2500	if (stats == NULL) {
2501		stats = zalloc(sizeof(*stats));
2502		if (stats == NULL)
2503			return;
2504
2505		init_stats(&stats->stats);
2506		inode->priv = stats;
2507	}
2508
2509	if (ttrace->entry_time && sample->time > ttrace->entry_time)
2510		duration = sample->time - ttrace->entry_time;
2511
2512	update_stats(&stats->stats, duration);
2513
2514	if (err < 0) {
2515		++stats->nr_failures;
2516
2517		if (!errno_summary)
2518			return;
2519
2520		err = -err;
2521		if (err > stats->max_errno) {
2522			u32 *new_errnos = realloc(stats->errnos, err * sizeof(u32));
2523
2524			if (new_errnos) {
2525				memset(new_errnos + stats->max_errno, 0, (err - stats->max_errno) * sizeof(u32));
2526			} else {
2527				pr_debug("Not enough memory for errno stats for thread \"%s\"(%d/%d), results will be incomplete\n",
2528					 thread__comm_str(thread), thread__pid(thread),
2529					 thread__tid(thread));
2530				return;
2531			}
2532
2533			stats->errnos = new_errnos;
2534			stats->max_errno = err;
2535		}
2536
2537		++stats->errnos[err - 1];
2538	}
2539}
2540
2541static int trace__printf_interrupted_entry(struct trace *trace)
2542{
2543	struct thread_trace *ttrace;
 
2544	size_t printed;
2545	int len;
2546
2547	if (trace->failure_only || trace->current == NULL)
2548		return 0;
2549
2550	ttrace = thread__priv(trace->current);
2551
2552	if (!ttrace->entry_pending)
2553		return 0;
2554
2555	printed  = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
2556	printed += len = fprintf(trace->output, "%s)", ttrace->entry_str);
2557
2558	if (len < trace->args_alignment - 4)
2559		printed += fprintf(trace->output, "%-*s", trace->args_alignment - 4 - len, " ");
2560
2561	printed += fprintf(trace->output, " ...\n");
2562
 
 
2563	ttrace->entry_pending = false;
2564	++trace->nr_events_printed;
2565
2566	return printed;
2567}
2568
2569static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
2570				 struct perf_sample *sample, struct thread *thread)
2571{
2572	int printed = 0;
2573
2574	if (trace->print_sample) {
2575		double ts = (double)sample->time / NSEC_PER_MSEC;
2576
2577		printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
2578				   evsel__name(evsel), ts,
2579				   thread__comm_str(thread),
2580				   sample->pid, sample->tid, sample->cpu);
2581	}
2582
2583	return printed;
2584}
2585
2586static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
2587{
2588	void *augmented_args = NULL;
2589	/*
2590	 * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
2591	 * and there we get all 6 syscall args plus the tracepoint common fields
2592	 * that gets calculated at the start and the syscall_nr (another long).
2593	 * So we check if that is the case and if so don't look after the
2594	 * sc->args_size but always after the full raw_syscalls:sys_enter payload,
2595	 * which is fixed.
2596	 *
2597	 * We'll revisit this later to pass s->args_size to the BPF augmenter
2598	 * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
2599	 * copies only what we need for each syscall, like what happens when we
2600	 * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace
2601	 * traffic to just what is needed for each syscall.
2602	 */
2603	int args_size = raw_augmented_args_size ?: sc->args_size;
2604
2605	*augmented_args_size = sample->raw_size - args_size;
2606	if (*augmented_args_size > 0)
2607		augmented_args = sample->raw_data + args_size;
2608
2609	return augmented_args;
2610}
2611
2612static void syscall__exit(struct syscall *sc)
2613{
2614	if (!sc)
2615		return;
2616
2617	zfree(&sc->arg_fmt);
2618}
2619
2620static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
2621			    union perf_event *event __maybe_unused,
2622			    struct perf_sample *sample)
2623{
2624	char *msg;
2625	void *args;
2626	int printed = 0;
2627	struct thread *thread;
2628	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2629	int augmented_args_size = 0;
2630	void *augmented_args = NULL;
2631	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2632	struct thread_trace *ttrace;
2633
2634	if (sc == NULL)
2635		return -1;
2636
2637	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2638	ttrace = thread__trace(thread, trace->output);
2639	if (ttrace == NULL)
2640		goto out_put;
2641
2642	trace__fprintf_sample(trace, evsel, sample, thread);
2643
2644	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2645
2646	if (ttrace->entry_str == NULL) {
2647		ttrace->entry_str = malloc(trace__entry_str_size);
2648		if (!ttrace->entry_str)
2649			goto out_put;
2650	}
2651
2652	if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
2653		trace__printf_interrupted_entry(trace);
2654	/*
2655	 * If this is raw_syscalls.sys_enter, then it always comes with the 6 possible
2656	 * arguments, even if the syscall being handled, say "openat", uses only 4 arguments
2657	 * this breaks syscall__augmented_args() check for augmented args, as we calculate
2658	 * syscall->args_size using each syscalls:sys_enter_NAME tracefs format file,
2659	 * so when handling, say the openat syscall, we end up getting 6 args for the
2660	 * raw_syscalls:sys_enter event, when we expected just 4, we end up mistakenly
2661	 * thinking that the extra 2 u64 args are the augmented filename, so just check
2662	 * here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
2663	 */
2664	if (evsel != trace->syscalls.events.sys_enter)
2665		augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2666	ttrace->entry_time = sample->time;
2667	msg = ttrace->entry_str;
2668	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
2669
2670	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
2671					   args, augmented_args, augmented_args_size, trace, thread);
2672
2673	if (sc->is_exit) {
2674		if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
2675			int alignment = 0;
2676
2677			trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
2678			printed = fprintf(trace->output, "%s)", ttrace->entry_str);
2679			if (trace->args_alignment > printed)
2680				alignment = trace->args_alignment - printed;
2681			fprintf(trace->output, "%*s= ?\n", alignment, " ");
2682		}
2683	} else {
2684		ttrace->entry_pending = true;
2685		/* See trace__vfs_getname & trace__sys_exit */
2686		ttrace->filename.pending_open = false;
2687	}
2688
2689	if (trace->current != thread) {
2690		thread__put(trace->current);
2691		trace->current = thread__get(thread);
2692	}
2693	err = 0;
2694out_put:
2695	thread__put(thread);
2696	return err;
2697}
2698
2699static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
2700				    struct perf_sample *sample)
2701{
2702	struct thread_trace *ttrace;
2703	struct thread *thread;
2704	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2705	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2706	char msg[1024];
2707	void *args, *augmented_args = NULL;
2708	int augmented_args_size;
2709	size_t printed = 0;
2710
2711	if (sc == NULL)
2712		return -1;
2713
2714	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2715	ttrace = thread__trace(thread, trace->output);
2716	/*
2717	 * We need to get ttrace just to make sure it is there when syscall__scnprintf_args()
2718	 * and the rest of the beautifiers accessing it via struct syscall_arg touches it.
2719	 */
2720	if (ttrace == NULL)
2721		goto out_put;
2722
2723	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2724	augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2725	printed += syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
2726	fprintf(trace->output, "%.*s", (int)printed, msg);
2727	err = 0;
2728out_put:
2729	thread__put(thread);
2730	return err;
2731}
2732
2733static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel,
2734				    struct perf_sample *sample,
2735				    struct callchain_cursor *cursor)
2736{
2737	struct addr_location al;
2738	int max_stack = evsel->core.attr.sample_max_stack ?
2739			evsel->core.attr.sample_max_stack :
2740			trace->max_stack;
2741	int err = -1;
2742
2743	addr_location__init(&al);
2744	if (machine__resolve(trace->host, &al, sample) < 0)
2745		goto out;
2746
2747	err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
2748out:
2749	addr_location__exit(&al);
2750	return err;
2751}
2752
2753static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
2754{
2755	/* TODO: user-configurable print_opts */
2756	const unsigned int print_opts = EVSEL__PRINT_SYM |
2757				        EVSEL__PRINT_DSO |
2758				        EVSEL__PRINT_UNKNOWN_AS_ADDR;
2759
2760	return sample__fprintf_callchain(sample, 38, print_opts, get_tls_callchain_cursor(), symbol_conf.bt_stop_list, trace->output);
2761}
2762
2763static const char *errno_to_name(struct evsel *evsel, int err)
2764{
2765	struct perf_env *env = evsel__env(evsel);
2766
2767	return perf_env__arch_strerrno(env, err);
2768}
2769
2770static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
2771			   union perf_event *event __maybe_unused,
2772			   struct perf_sample *sample)
2773{
2774	long ret;
2775	u64 duration = 0;
2776	bool duration_calculated = false;
2777	struct thread *thread;
2778	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0;
2779	int alignment = trace->args_alignment;
2780	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2781	struct thread_trace *ttrace;
2782
2783	if (sc == NULL)
2784		return -1;
2785
2786	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2787	ttrace = thread__trace(thread, trace->output);
2788	if (ttrace == NULL)
2789		goto out_put;
2790
2791	trace__fprintf_sample(trace, evsel, sample, thread);
 
2792
2793	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2794
2795	if (trace->summary)
2796		thread__update_stats(thread, ttrace, id, sample, ret, trace->errno_summary);
2797
2798	if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
2799		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2800		ttrace->filename.pending_open = false;
2801		++trace->stats.vfs_getname;
2802	}
2803
 
 
2804	if (ttrace->entry_time) {
2805		duration = sample->time - ttrace->entry_time;
2806		if (trace__filter_duration(trace, duration))
2807			goto out;
2808		duration_calculated = true;
2809	} else if (trace->duration_filter)
2810		goto out;
2811
2812	if (sample->callchain) {
2813		struct callchain_cursor *cursor = get_tls_callchain_cursor();
2814
2815		callchain_ret = trace__resolve_callchain(trace, evsel, sample, cursor);
2816		if (callchain_ret == 0) {
2817			if (cursor->nr < trace->min_stack)
2818				goto out;
2819			callchain_ret = 1;
2820		}
2821	}
2822
2823	if (trace->summary_only || (ret >= 0 && trace->failure_only))
2824		goto out;
2825
2826	trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
2827
2828	if (ttrace->entry_pending) {
2829		printed = fprintf(trace->output, "%s", ttrace->entry_str);
2830	} else {
2831		printed += fprintf(trace->output, " ... [");
2832		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2833		printed += 9;
2834		printed += fprintf(trace->output, "]: %s()", sc->name);
2835	}
2836
2837	printed++; /* the closing ')' */
2838
2839	if (alignment > printed)
2840		alignment -= printed;
2841	else
2842		alignment = 0;
2843
2844	fprintf(trace->output, ")%*s= ", alignment, " ");
2845
2846	if (sc->fmt == NULL) {
2847		if (ret < 0)
2848			goto errno_print;
2849signed_print:
2850		fprintf(trace->output, "%ld", ret);
2851	} else if (ret < 0) {
2852errno_print: {
2853		char bf[STRERR_BUFSIZE];
2854		const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
2855			   *e = errno_to_name(evsel, -ret);
2856
2857		fprintf(trace->output, "-1 %s (%s)", e, emsg);
2858	}
2859	} else if (ret == 0 && sc->fmt->timeout)
2860		fprintf(trace->output, "0 (Timeout)");
2861	else if (ttrace->ret_scnprintf) {
2862		char bf[1024];
2863		struct syscall_arg arg = {
2864			.val	= ret,
2865			.thread	= thread,
2866			.trace	= trace,
2867		};
2868		ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
2869		ttrace->ret_scnprintf = NULL;
2870		fprintf(trace->output, "%s", bf);
2871	} else if (sc->fmt->hexret)
2872		fprintf(trace->output, "%#lx", ret);
2873	else if (sc->fmt->errpid) {
2874		struct thread *child = machine__find_thread(trace->host, ret, ret);
2875
2876		if (child != NULL) {
2877			fprintf(trace->output, "%ld", ret);
2878			if (thread__comm_set(child))
2879				fprintf(trace->output, " (%s)", thread__comm_str(child));
2880			thread__put(child);
2881		}
2882	} else
2883		goto signed_print;
2884
2885	fputc('\n', trace->output);
2886
2887	/*
2888	 * We only consider an 'event' for the sake of --max-events a non-filtered
2889	 * sys_enter + sys_exit and other tracepoint events.
2890	 */
2891	if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
2892		interrupted = true;
2893
2894	if (callchain_ret > 0)
2895		trace__fprintf_callchain(trace, sample);
2896	else if (callchain_ret < 0)
2897		pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2898out:
2899	ttrace->entry_pending = false;
2900	err = 0;
2901out_put:
2902	thread__put(thread);
2903	return err;
2904}
2905
2906static int trace__vfs_getname(struct trace *trace, struct evsel *evsel,
2907			      union perf_event *event __maybe_unused,
2908			      struct perf_sample *sample)
2909{
2910	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2911	struct thread_trace *ttrace;
2912	size_t filename_len, entry_str_len, to_move;
2913	ssize_t remaining_space;
2914	char *pos;
2915	const char *filename = evsel__rawptr(evsel, sample, "pathname");
2916
2917	if (!thread)
2918		goto out;
2919
2920	ttrace = thread__priv(thread);
2921	if (!ttrace)
2922		goto out_put;
2923
2924	filename_len = strlen(filename);
2925	if (filename_len == 0)
2926		goto out_put;
2927
2928	if (ttrace->filename.namelen < filename_len) {
2929		char *f = realloc(ttrace->filename.name, filename_len + 1);
2930
2931		if (f == NULL)
2932			goto out_put;
2933
2934		ttrace->filename.namelen = filename_len;
2935		ttrace->filename.name = f;
2936	}
2937
2938	strcpy(ttrace->filename.name, filename);
2939	ttrace->filename.pending_open = true;
2940
2941	if (!ttrace->filename.ptr)
2942		goto out_put;
2943
2944	entry_str_len = strlen(ttrace->entry_str);
2945	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2946	if (remaining_space <= 0)
2947		goto out_put;
2948
2949	if (filename_len > (size_t)remaining_space) {
2950		filename += filename_len - remaining_space;
2951		filename_len = remaining_space;
2952	}
2953
2954	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2955	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2956	memmove(pos + filename_len, pos, to_move);
2957	memcpy(pos, filename, filename_len);
2958
2959	ttrace->filename.ptr = 0;
2960	ttrace->filename.entry_str_pos = 0;
2961out_put:
2962	thread__put(thread);
2963out:
2964	return 0;
2965}
2966
2967static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
2968				     union perf_event *event __maybe_unused,
2969				     struct perf_sample *sample)
2970{
2971        u64 runtime = evsel__intval(evsel, sample, "runtime");
2972	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2973	struct thread *thread = machine__findnew_thread(trace->host,
2974							sample->pid,
2975							sample->tid);
2976	struct thread_trace *ttrace = thread__trace(thread, trace->output);
2977
2978	if (ttrace == NULL)
2979		goto out_dump;
2980
2981	ttrace->runtime_ms += runtime_ms;
2982	trace->runtime_ms += runtime_ms;
2983out_put:
2984	thread__put(thread);
2985	return 0;
2986
2987out_dump:
2988	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2989	       evsel->name,
2990	       evsel__strval(evsel, sample, "comm"),
2991	       (pid_t)evsel__intval(evsel, sample, "pid"),
2992	       runtime,
2993	       evsel__intval(evsel, sample, "vruntime"));
2994	goto out_put;
 
2995}
2996
2997static int bpf_output__printer(enum binary_printer_ops op,
2998			       unsigned int val, void *extra __maybe_unused, FILE *fp)
2999{
 
3000	unsigned char ch = (unsigned char)val;
3001
3002	switch (op) {
3003	case BINARY_PRINT_CHAR_DATA:
3004		return fprintf(fp, "%c", isprint(ch) ? ch : '.');
 
3005	case BINARY_PRINT_DATA_BEGIN:
3006	case BINARY_PRINT_LINE_BEGIN:
3007	case BINARY_PRINT_ADDR:
3008	case BINARY_PRINT_NUM_DATA:
3009	case BINARY_PRINT_NUM_PAD:
3010	case BINARY_PRINT_SEP:
3011	case BINARY_PRINT_CHAR_PAD:
3012	case BINARY_PRINT_LINE_END:
3013	case BINARY_PRINT_DATA_END:
3014	default:
3015		break;
3016	}
3017
3018	return 0;
3019}
3020
3021static void bpf_output__fprintf(struct trace *trace,
3022				struct perf_sample *sample)
3023{
3024	binary__fprintf(sample->raw_data, sample->raw_size, 8,
3025			bpf_output__printer, NULL, trace->output);
3026	++trace->nr_events_printed;
3027}
3028
3029static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, struct perf_sample *sample,
3030				       struct thread *thread, void *augmented_args, int augmented_args_size)
3031{
3032	char bf[2048];
3033	size_t size = sizeof(bf);
3034	struct tep_format_field *field = evsel->tp_format->format.fields;
3035	struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel);
3036	size_t printed = 0, btf_printed;
3037	unsigned long val;
3038	u8 bit = 1;
3039	struct syscall_arg syscall_arg = {
3040		.augmented = {
3041			.size = augmented_args_size,
3042			.args = augmented_args,
3043		},
3044		.idx	= 0,
3045		.mask	= 0,
3046		.trace  = trace,
3047		.thread = thread,
3048		.show_string_prefix = trace->show_string_prefix,
3049	};
3050
3051	for (; field && arg; field = field->next, ++syscall_arg.idx, bit <<= 1, ++arg) {
3052		if (syscall_arg.mask & bit)
3053			continue;
3054
3055		syscall_arg.len = 0;
3056		syscall_arg.fmt = arg;
3057		if (field->flags & TEP_FIELD_IS_ARRAY) {
3058			int offset = field->offset;
3059
3060			if (field->flags & TEP_FIELD_IS_DYNAMIC) {
3061				offset = format_field__intval(field, sample, evsel->needs_swap);
3062				syscall_arg.len = offset >> 16;
3063				offset &= 0xffff;
3064				if (tep_field_is_relative(field->flags))
3065					offset += field->offset + field->size;
3066			}
3067
3068			val = (uintptr_t)(sample->raw_data + offset);
3069		} else
3070			val = format_field__intval(field, sample, evsel->needs_swap);
3071		/*
3072		 * Some syscall args need some mask, most don't and
3073		 * return val untouched.
3074		 */
3075		val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val);
3076
3077		/* Suppress this argument if its value is zero and show_zero property isn't set. */
3078		if (val == 0 && !trace->show_zeros && !arg->show_zero && arg->strtoul != STUL_BTF_TYPE)
3079			continue;
3080
3081		printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
3082
3083		if (trace->show_arg_names)
3084			printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
3085
3086		btf_printed = trace__btf_scnprintf(trace, &syscall_arg, bf + printed, size - printed, val, field->type);
3087		if (btf_printed) {
3088			printed += btf_printed;
3089			continue;
3090		}
3091
3092		printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val);
3093	}
3094
3095	return printed + fprintf(trace->output, "%.*s", (int)printed, bf);
3096}
3097
3098static int trace__event_handler(struct trace *trace, struct evsel *evsel,
3099				union perf_event *event __maybe_unused,
3100				struct perf_sample *sample)
3101{
3102	struct thread *thread;
3103	int callchain_ret = 0;
3104
3105	if (evsel->nr_events_printed >= evsel->max_events)
3106		return 0;
3107
3108	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
3109
3110	if (sample->callchain) {
3111		struct callchain_cursor *cursor = get_tls_callchain_cursor();
3112
3113		callchain_ret = trace__resolve_callchain(trace, evsel, sample, cursor);
3114		if (callchain_ret == 0) {
3115			if (cursor->nr < trace->min_stack)
3116				goto out;
3117			callchain_ret = 1;
3118		}
3119	}
3120
3121	trace__printf_interrupted_entry(trace);
3122	trace__fprintf_tstamp(trace, sample->time, trace->output);
3123
3124	if (trace->trace_syscalls && trace->show_duration)
3125		fprintf(trace->output, "(         ): ");
3126
3127	if (thread)
3128		trace__fprintf_comm_tid(trace, thread, trace->output);
3129
3130	if (evsel == trace->syscalls.events.bpf_output) {
3131		int id = perf_evsel__sc_tp_uint(evsel, id, sample);
3132		struct syscall *sc = trace__syscall_info(trace, evsel, id);
3133
3134		if (sc) {
3135			fprintf(trace->output, "%s(", sc->name);
3136			trace__fprintf_sys_enter(trace, evsel, sample);
3137			fputc(')', trace->output);
3138			goto newline;
3139		}
3140
3141		/*
3142		 * XXX: Not having the associated syscall info or not finding/adding
3143		 * 	the thread should never happen, but if it does...
3144		 * 	fall thru and print it as a bpf_output event.
3145		 */
3146	}
3147
3148	fprintf(trace->output, "%s(", evsel->name);
3149
3150	if (evsel__is_bpf_output(evsel)) {
3151		bpf_output__fprintf(trace, sample);
3152	} else if (evsel->tp_format) {
3153		if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
3154		    trace__fprintf_sys_enter(trace, evsel, sample)) {
3155			if (trace->libtraceevent_print) {
3156				event_format__fprintf(evsel->tp_format, sample->cpu,
3157						      sample->raw_data, sample->raw_size,
3158						      trace->output);
3159			} else {
3160				trace__fprintf_tp_fields(trace, evsel, sample, thread, NULL, 0);
3161			}
3162		}
3163	}
3164
3165newline:
3166	fprintf(trace->output, ")\n");
3167
3168	if (callchain_ret > 0)
3169		trace__fprintf_callchain(trace, sample);
3170	else if (callchain_ret < 0)
3171		pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
3172
3173	++trace->nr_events_printed;
3174
3175	if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
3176		evsel__disable(evsel);
3177		evsel__close(evsel);
3178	}
3179out:
3180	thread__put(thread);
3181	return 0;
3182}
3183
3184static void print_location(FILE *f, struct perf_sample *sample,
3185			   struct addr_location *al,
3186			   bool print_dso, bool print_sym)
3187{
3188
3189	if ((verbose > 0 || print_dso) && al->map)
3190		fprintf(f, "%s@", dso__long_name(map__dso(al->map)));
3191
3192	if ((verbose > 0 || print_sym) && al->sym)
3193		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
3194			al->addr - al->sym->start);
3195	else if (al->map)
3196		fprintf(f, "0x%" PRIx64, al->addr);
3197	else
3198		fprintf(f, "0x%" PRIx64, sample->addr);
3199}
3200
3201static int trace__pgfault(struct trace *trace,
3202			  struct evsel *evsel,
3203			  union perf_event *event __maybe_unused,
3204			  struct perf_sample *sample)
3205{
3206	struct thread *thread;
3207	struct addr_location al;
3208	char map_type = 'd';
3209	struct thread_trace *ttrace;
3210	int err = -1;
3211	int callchain_ret = 0;
3212
3213	addr_location__init(&al);
3214	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
3215
3216	if (sample->callchain) {
3217		struct callchain_cursor *cursor = get_tls_callchain_cursor();
3218
3219		callchain_ret = trace__resolve_callchain(trace, evsel, sample, cursor);
3220		if (callchain_ret == 0) {
3221			if (cursor->nr < trace->min_stack)
3222				goto out_put;
3223			callchain_ret = 1;
3224		}
3225	}
3226
3227	ttrace = thread__trace(thread, trace->output);
3228	if (ttrace == NULL)
3229		goto out_put;
3230
3231	if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
3232		ttrace->pfmaj++;
3233	else
3234		ttrace->pfmin++;
3235
3236	if (trace->summary_only)
3237		goto out;
3238
3239	thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
 
3240
3241	trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
3242
3243	fprintf(trace->output, "%sfault [",
3244		evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
3245		"maj" : "min");
3246
3247	print_location(trace->output, sample, &al, false, true);
3248
3249	fprintf(trace->output, "] => ");
3250
3251	thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
 
3252
3253	if (!al.map) {
3254		thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
 
3255
3256		if (al.map)
3257			map_type = 'x';
3258		else
3259			map_type = '?';
3260	}
3261
3262	print_location(trace->output, sample, &al, true, false);
3263
3264	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
3265
3266	if (callchain_ret > 0)
3267		trace__fprintf_callchain(trace, sample);
3268	else if (callchain_ret < 0)
3269		pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
3270
3271	++trace->nr_events_printed;
3272out:
3273	err = 0;
3274out_put:
3275	thread__put(thread);
3276	addr_location__exit(&al);
3277	return err;
3278}
3279
3280static void trace__set_base_time(struct trace *trace,
3281				 struct evsel *evsel,
3282				 struct perf_sample *sample)
3283{
3284	/*
3285	 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
3286	 * and don't use sample->time unconditionally, we may end up having
3287	 * some other event in the future without PERF_SAMPLE_TIME for good
3288	 * reason, i.e. we may not be interested in its timestamps, just in
3289	 * it taking place, picking some piece of information when it
3290	 * appears in our event stream (vfs_getname comes to mind).
3291	 */
3292	if (trace->base_time == 0 && !trace->full_time &&
3293	    (evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
3294		trace->base_time = sample->time;
3295}
3296
3297static int trace__process_sample(const struct perf_tool *tool,
3298				 union perf_event *event,
3299				 struct perf_sample *sample,
3300				 struct evsel *evsel,
3301				 struct machine *machine __maybe_unused)
3302{
3303	struct trace *trace = container_of(tool, struct trace, tool);
3304	struct thread *thread;
3305	int err = 0;
3306
3307	tracepoint_handler handler = evsel->handler;
3308
3309	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
3310	if (thread && thread__is_filtered(thread))
3311		goto out;
3312
3313	trace__set_base_time(trace, evsel, sample);
 
3314
3315	if (handler) {
3316		++trace->nr_events;
3317		handler(trace, evsel, event, sample);
3318	}
3319out:
3320	thread__put(thread);
3321	return err;
3322}
3323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3324static int trace__record(struct trace *trace, int argc, const char **argv)
3325{
3326	unsigned int rec_argc, i, j;
3327	const char **rec_argv;
3328	const char * const record_args[] = {
3329		"record",
3330		"-R",
3331		"-m", "1024",
3332		"-c", "1",
3333	};
3334	pid_t pid = getpid();
3335	char *filter = asprintf__tp_filter_pids(1, &pid);
3336	const char * const sc_args[] = { "-e", };
3337	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
3338	const char * const majpf_args[] = { "-e", "major-faults" };
3339	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
3340	const char * const minpf_args[] = { "-e", "minor-faults" };
3341	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
3342	int err = -1;
3343
3344	/* +3 is for the event string below and the pid filter */
3345	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 3 +
3346		majpf_args_nr + minpf_args_nr + argc;
3347	rec_argv = calloc(rec_argc + 1, sizeof(char *));
3348
3349	if (rec_argv == NULL || filter == NULL)
3350		goto out_free;
3351
3352	j = 0;
3353	for (i = 0; i < ARRAY_SIZE(record_args); i++)
3354		rec_argv[j++] = record_args[i];
3355
3356	if (trace->trace_syscalls) {
3357		for (i = 0; i < sc_args_nr; i++)
3358			rec_argv[j++] = sc_args[i];
3359
3360		/* event string may be different for older kernels - e.g., RHEL6 */
3361		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
3362			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
3363		else if (is_valid_tracepoint("syscalls:sys_enter"))
3364			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
3365		else {
3366			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
3367			goto out_free;
3368		}
3369	}
3370
3371	rec_argv[j++] = "--filter";
3372	rec_argv[j++] = filter;
3373
3374	if (trace->trace_pgfaults & TRACE_PFMAJ)
3375		for (i = 0; i < majpf_args_nr; i++)
3376			rec_argv[j++] = majpf_args[i];
3377
3378	if (trace->trace_pgfaults & TRACE_PFMIN)
3379		for (i = 0; i < minpf_args_nr; i++)
3380			rec_argv[j++] = minpf_args[i];
3381
3382	for (i = 0; i < (unsigned int)argc; i++)
3383		rec_argv[j++] = argv[i];
3384
3385	err = cmd_record(j, rec_argv);
3386out_free:
3387	free(filter);
3388	free(rec_argv);
3389	return err;
3390}
3391
3392static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
3393
3394static bool evlist__add_vfs_getname(struct evlist *evlist)
3395{
3396	bool found = false;
3397	struct evsel *evsel, *tmp;
3398	struct parse_events_error err;
3399	int ret;
3400
3401	parse_events_error__init(&err);
3402	ret = parse_events(evlist, "probe:vfs_getname*", &err);
3403	parse_events_error__exit(&err);
3404	if (ret)
3405		return false;
3406
3407	evlist__for_each_entry_safe(evlist, evsel, tmp) {
3408		if (!strstarts(evsel__name(evsel), "probe:vfs_getname"))
3409			continue;
3410
3411		if (evsel__field(evsel, "pathname")) {
3412			evsel->handler = trace__vfs_getname;
3413			found = true;
3414			continue;
3415		}
3416
3417		list_del_init(&evsel->core.node);
3418		evsel->evlist = NULL;
3419		evsel__delete(evsel);
3420	}
3421
3422	return found;
 
 
3423}
3424
3425static struct evsel *evsel__new_pgfault(u64 config)
 
3426{
3427	struct evsel *evsel;
3428	struct perf_event_attr attr = {
3429		.type = PERF_TYPE_SOFTWARE,
3430		.mmap_data = 1,
3431	};
3432
3433	attr.config = config;
3434	attr.sample_period = 1;
3435
3436	event_attr_init(&attr);
3437
3438	evsel = evsel__new(&attr);
3439	if (evsel)
3440		evsel->handler = trace__pgfault;
3441
3442	return evsel;
3443}
3444
3445static void evlist__free_syscall_tp_fields(struct evlist *evlist)
3446{
3447	struct evsel *evsel;
3448
3449	evlist__for_each_entry(evlist, evsel) {
3450		evsel_trace__delete(evsel->priv);
3451		evsel->priv = NULL;
3452	}
3453}
3454
3455static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
3456{
3457	const u32 type = event->header.type;
3458	struct evsel *evsel;
 
 
 
3459
3460	if (type != PERF_RECORD_SAMPLE) {
3461		trace__process_event(trace, trace->host, event, sample);
3462		return;
3463	}
3464
3465	evsel = evlist__id2evsel(trace->evlist, sample->id);
3466	if (evsel == NULL) {
3467		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
3468		return;
3469	}
3470
3471	if (evswitch__discard(&trace->evswitch, evsel))
3472		return;
3473
3474	trace__set_base_time(trace, evsel, sample);
3475
3476	if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
3477	    sample->raw_data == NULL) {
3478		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
3479		       evsel__name(evsel), sample->tid,
3480		       sample->cpu, sample->raw_size);
3481	} else {
3482		tracepoint_handler handler = evsel->handler;
3483		handler(trace, evsel, event, sample);
3484	}
3485
3486	if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
3487		interrupted = true;
3488}
3489
3490static int trace__add_syscall_newtp(struct trace *trace)
3491{
3492	int ret = -1;
3493	struct evlist *evlist = trace->evlist;
3494	struct evsel *sys_enter, *sys_exit;
3495
3496	sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
3497	if (sys_enter == NULL)
3498		goto out;
3499
3500	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
3501		goto out_delete_sys_enter;
3502
3503	sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
3504	if (sys_exit == NULL)
3505		goto out_delete_sys_enter;
3506
3507	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
3508		goto out_delete_sys_exit;
3509
3510	evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
3511	evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
3512
3513	evlist__add(evlist, sys_enter);
3514	evlist__add(evlist, sys_exit);
3515
3516	if (callchain_param.enabled && !trace->kernel_syscallchains) {
3517		/*
3518		 * We're interested only in the user space callchain
3519		 * leading to the syscall, allow overriding that for
3520		 * debugging reasons using --kernel_syscall_callchains
3521		 */
3522		sys_exit->core.attr.exclude_callchain_kernel = 1;
3523	}
3524
3525	trace->syscalls.events.sys_enter = sys_enter;
3526	trace->syscalls.events.sys_exit  = sys_exit;
3527
3528	ret = 0;
3529out:
3530	return ret;
3531
3532out_delete_sys_exit:
3533	evsel__delete_priv(sys_exit);
3534out_delete_sys_enter:
3535	evsel__delete_priv(sys_enter);
3536	goto out;
3537}
3538
3539static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
3540{
3541	int err = -1;
3542	struct evsel *sys_exit;
3543	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
3544						trace->ev_qualifier_ids.nr,
3545						trace->ev_qualifier_ids.entries);
3546
3547	if (filter == NULL)
3548		goto out_enomem;
3549
3550	if (!evsel__append_tp_filter(trace->syscalls.events.sys_enter, filter)) {
3551		sys_exit = trace->syscalls.events.sys_exit;
3552		err = evsel__append_tp_filter(sys_exit, filter);
3553	}
3554
3555	free(filter);
3556out:
3557	return err;
3558out_enomem:
3559	errno = ENOMEM;
3560	goto out;
3561}
3562
3563#ifdef HAVE_BPF_SKEL
3564static int syscall_arg_fmt__cache_btf_struct(struct syscall_arg_fmt *arg_fmt, struct btf *btf, char *type)
3565{
3566       int id;
3567
3568	if (arg_fmt->type != NULL)
3569		return -1;
3570
3571       id = btf__find_by_name(btf, type);
3572       if (id < 0)
3573		return -1;
3574
3575       arg_fmt->type    = btf__type_by_id(btf, id);
3576       arg_fmt->type_id = id;
3577
3578       return 0;
3579}
3580
3581static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
3582{
3583	struct bpf_program *pos, *prog = NULL;
3584	const char *sec_name;
3585
3586	if (trace->skel->obj == NULL)
3587		return NULL;
3588
3589	bpf_object__for_each_program(pos, trace->skel->obj) {
3590		sec_name = bpf_program__section_name(pos);
3591		if (sec_name && !strcmp(sec_name, name)) {
3592			prog = pos;
3593			break;
3594		}
3595	}
3596
3597	return prog;
3598}
3599
3600static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc,
3601							const char *prog_name, const char *type)
3602{
3603	struct bpf_program *prog;
3604
3605	if (prog_name == NULL) {
3606		char default_prog_name[256];
3607		scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s", type, sc->name);
3608		prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3609		if (prog != NULL)
3610			goto out_found;
3611		if (sc->fmt && sc->fmt->alias) {
3612			scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s", type, sc->fmt->alias);
3613			prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3614			if (prog != NULL)
3615				goto out_found;
3616		}
3617		goto out_unaugmented;
3618	}
3619
3620	prog = trace__find_bpf_program_by_title(trace, prog_name);
3621
3622	if (prog != NULL) {
3623out_found:
3624		return prog;
3625	}
3626
3627	pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
3628		 prog_name, type, sc->name);
3629out_unaugmented:
3630	return trace->skel->progs.syscall_unaugmented;
3631}
3632
3633static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
3634{
3635	struct syscall *sc = trace__syscall_info(trace, NULL, id);
3636
3637	if (sc == NULL)
3638		return;
3639
3640	sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3641	sc->bpf_prog.sys_exit  = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit  : NULL,  "exit");
3642}
3643
3644static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
3645{
3646	struct syscall *sc = trace__syscall_info(trace, NULL, id);
3647	return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->skel->progs.syscall_unaugmented);
3648}
3649
3650static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
3651{
3652	struct syscall *sc = trace__syscall_info(trace, NULL, id);
3653	return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented);
3654}
3655
3656static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigned int *beauty_array)
3657{
3658	struct tep_format_field *field;
3659	struct syscall *sc = trace__syscall_info(trace, NULL, key);
3660	const struct btf_type *bt;
3661	char *struct_offset, *tmp, name[32];
3662	bool can_augment = false;
3663	int i, cnt;
3664
3665	if (sc == NULL)
3666		return -1;
3667
3668	trace__load_vmlinux_btf(trace);
3669	if (trace->btf == NULL)
3670		return -1;
3671
3672	for (i = 0, field = sc->args; field; ++i, field = field->next) {
3673		// XXX We're only collecting pointer payloads _from_ user space
3674		if (!sc->arg_fmt[i].from_user)
3675			continue;
3676
3677		struct_offset = strstr(field->type, "struct ");
3678		if (struct_offset == NULL)
3679			struct_offset = strstr(field->type, "union ");
3680		else
3681			struct_offset++; // "union" is shorter
3682
3683		if (field->flags & TEP_FIELD_IS_POINTER && struct_offset) { /* struct or union (think BPF's attr arg) */
3684			struct_offset += 6;
3685
3686			/* for 'struct foo *', we only want 'foo' */
3687			for (tmp = struct_offset, cnt = 0; *tmp != ' ' && *tmp != '\0'; ++tmp, ++cnt) {
3688			}
3689
3690			strncpy(name, struct_offset, cnt);
3691			name[cnt] = '\0';
3692
3693			/* cache struct's btf_type and type_id */
3694			if (syscall_arg_fmt__cache_btf_struct(&sc->arg_fmt[i], trace->btf, name))
3695				continue;
3696
3697			bt = sc->arg_fmt[i].type;
3698			beauty_array[i] = bt->size;
3699			can_augment = true;
3700		} else if (field->flags & TEP_FIELD_IS_POINTER && /* string */
3701			   strcmp(field->type, "const char *") == 0 &&
3702			   (strstr(field->name, "name") ||
3703			    strstr(field->name, "path") ||
3704			    strstr(field->name, "file") ||
3705			    strstr(field->name, "root") ||
3706			    strstr(field->name, "key") ||
3707			    strstr(field->name, "special") ||
3708			    strstr(field->name, "type") ||
3709			    strstr(field->name, "description"))) {
3710			beauty_array[i] = 1;
3711			can_augment = true;
3712		} else if (field->flags & TEP_FIELD_IS_POINTER && /* buffer */
3713			   strstr(field->type, "char *") &&
3714			   (strstr(field->name, "buf") ||
3715			    strstr(field->name, "val") ||
3716			    strstr(field->name, "msg"))) {
3717			int j;
3718			struct tep_format_field *field_tmp;
3719
3720			/* find the size of the buffer that appears in pairs with buf */
3721			for (j = 0, field_tmp = sc->args; field_tmp; ++j, field_tmp = field_tmp->next) {
3722				if (!(field_tmp->flags & TEP_FIELD_IS_POINTER) && /* only integers */
3723				    (strstr(field_tmp->name, "count") ||
3724				     strstr(field_tmp->name, "siz") ||  /* size, bufsiz */
3725				     (strstr(field_tmp->name, "len") && strcmp(field_tmp->name, "filename")))) {
3726					 /* filename's got 'len' in it, we don't want that */
3727					beauty_array[i] = -(j + 1);
3728					can_augment = true;
3729					break;
3730				}
3731			}
3732		}
3733	}
3734
3735	if (can_augment)
3736		return 0;
3737
3738	return -1;
3739}
3740
3741static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
3742{
3743	struct tep_format_field *field, *candidate_field;
3744	/*
3745	 * We're only interested in syscalls that have a pointer:
3746	 */
3747	for (field = sc->args; field; field = field->next) {
3748		if (field->flags & TEP_FIELD_IS_POINTER)
3749			goto try_to_find_pair;
3750	}
3751
3752	return NULL;
3753
3754try_to_find_pair:
3755	for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) {
3756		int id = syscalltbl__id_at_idx(trace->sctbl, i);
3757		struct syscall *pair = trace__syscall_info(trace, NULL, id);
3758		struct bpf_program *pair_prog;
3759		bool is_candidate = false;
3760
3761		if (pair == NULL || pair == sc ||
3762		    pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
3763			continue;
3764
3765		for (field = sc->args, candidate_field = pair->args;
3766		     field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
3767			bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
3768			     candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
3769
3770			if (is_pointer) {
3771			       if (!candidate_is_pointer) {
3772					// The candidate just doesn't copies our pointer arg, might copy other pointers we want.
3773					continue;
3774			       }
3775			} else {
3776				if (candidate_is_pointer) {
3777					// The candidate might copy a pointer we don't have, skip it.
3778					goto next_candidate;
3779				}
3780				continue;
3781			}
3782
3783			if (strcmp(field->type, candidate_field->type))
3784				goto next_candidate;
3785
3786			/*
3787			 * This is limited in the BPF program but sys_write
3788			 * uses "const char *" for its "buf" arg so we need to
3789			 * use some heuristic that is kinda future proof...
3790			 */
3791			if (strcmp(field->type, "const char *") == 0 &&
3792			    !(strstr(field->name, "name") ||
3793			      strstr(field->name, "path") ||
3794			      strstr(field->name, "file") ||
3795			      strstr(field->name, "root") ||
3796			      strstr(field->name, "description")))
3797				goto next_candidate;
3798
3799			is_candidate = true;
3800		}
3801
3802		if (!is_candidate)
3803			goto next_candidate;
3804
3805		/*
3806		 * Check if the tentative pair syscall augmenter has more pointers, if it has,
3807		 * then it may be collecting that and we then can't use it, as it would collect
3808		 * more than what is common to the two syscalls.
3809		 */
3810		if (candidate_field) {
3811			for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next)
3812				if (candidate_field->flags & TEP_FIELD_IS_POINTER)
3813					goto next_candidate;
3814		}
3815
3816		pair_prog = pair->bpf_prog.sys_enter;
3817		/*
3818		 * If the pair isn't enabled, then its bpf_prog.sys_enter will not
3819		 * have been searched for, so search it here and if it returns the
3820		 * unaugmented one, then ignore it, otherwise we'll reuse that BPF
3821		 * program for a filtered syscall on a non-filtered one.
3822		 *
3823		 * For instance, we have "!syscalls:sys_enter_renameat" and that is
3824		 * useful for "renameat2".
3825		 */
3826		if (pair_prog == NULL) {
3827			pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3828			if (pair_prog == trace->skel->progs.syscall_unaugmented)
3829				goto next_candidate;
3830		}
3831
3832		pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
3833		return pair_prog;
3834	next_candidate:
3835		continue;
3836	}
3837
3838	return NULL;
3839}
3840
3841static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
3842{
3843	int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter);
3844	int map_exit_fd  = bpf_map__fd(trace->skel->maps.syscalls_sys_exit);
3845	int beauty_map_fd = bpf_map__fd(trace->skel->maps.beauty_map_enter);
3846	int err = 0;
3847	unsigned int beauty_array[6];
3848
3849	for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) {
3850		int prog_fd, key = syscalltbl__id_at_idx(trace->sctbl, i);
3851
3852		if (!trace__syscall_enabled(trace, key))
3853			continue;
3854
3855		trace__init_syscall_bpf_progs(trace, key);
3856
3857		// It'll get at least the "!raw_syscalls:unaugmented"
3858		prog_fd = trace__bpf_prog_sys_enter_fd(trace, key);
3859		err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3860		if (err)
3861			break;
3862		prog_fd = trace__bpf_prog_sys_exit_fd(trace, key);
3863		err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
3864		if (err)
3865			break;
3866
3867		/* use beauty_map to tell BPF how many bytes to collect, set beauty_map's value here */
3868		memset(beauty_array, 0, sizeof(beauty_array));
3869		err = trace__bpf_sys_enter_beauty_map(trace, key, (unsigned int *)beauty_array);
3870		if (err)
3871			continue;
3872		err = bpf_map_update_elem(beauty_map_fd, &key, beauty_array, BPF_ANY);
3873		if (err)
3874			break;
3875	}
3876
3877	/*
3878	 * Now lets do a second pass looking for enabled syscalls without
3879	 * an augmenter that have a signature that is a superset of another
3880	 * syscall with an augmenter so that we can auto-reuse it.
3881	 *
3882	 * I.e. if we have an augmenter for the "open" syscall that has
3883	 * this signature:
3884	 *
3885	 *   int open(const char *pathname, int flags, mode_t mode);
3886	 *
3887	 * I.e. that will collect just the first string argument, then we
3888	 * can reuse it for the 'creat' syscall, that has this signature:
3889	 *
3890	 *   int creat(const char *pathname, mode_t mode);
3891	 *
3892	 * and for:
3893	 *
3894	 *   int stat(const char *pathname, struct stat *statbuf);
3895	 *   int lstat(const char *pathname, struct stat *statbuf);
3896	 *
3897	 * Because the 'open' augmenter will collect the first arg as a string,
3898	 * and leave alone all the other args, which already helps with
3899	 * beautifying 'stat' and 'lstat''s pathname arg.
3900	 *
3901	 * Then, in time, when 'stat' gets an augmenter that collects both
3902	 * first and second arg (this one on the raw_syscalls:sys_exit prog
3903	 * array tail call, then that one will be used.
3904	 */
3905	for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) {
3906		int key = syscalltbl__id_at_idx(trace->sctbl, i);
3907		struct syscall *sc = trace__syscall_info(trace, NULL, key);
3908		struct bpf_program *pair_prog;
3909		int prog_fd;
3910
3911		if (sc == NULL || sc->bpf_prog.sys_enter == NULL)
3912			continue;
3913
3914		/*
3915		 * For now we're just reusing the sys_enter prog, and if it
3916		 * already has an augmenter, we don't need to find one.
3917		 */
3918		if (sc->bpf_prog.sys_enter != trace->skel->progs.syscall_unaugmented)
3919			continue;
3920
3921		/*
3922		 * Look at all the other syscalls for one that has a signature
3923		 * that is close enough that we can share:
3924		 */
3925		pair_prog = trace__find_usable_bpf_prog_entry(trace, sc);
3926		if (pair_prog == NULL)
3927			continue;
3928
3929		sc->bpf_prog.sys_enter = pair_prog;
3930
3931		/*
3932		 * Update the BPF_MAP_TYPE_PROG_SHARED for raw_syscalls:sys_enter
3933		 * with the fd for the program we're reusing:
3934		 */
3935		prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter);
3936		err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3937		if (err)
3938			break;
3939	}
3940
3941	return err;
3942}
3943#endif // HAVE_BPF_SKEL
3944
3945static int trace__set_ev_qualifier_filter(struct trace *trace)
3946{
3947	if (trace->syscalls.events.sys_enter)
3948		return trace__set_ev_qualifier_tp_filter(trace);
3949	return 0;
3950}
3951
3952static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused,
3953				    size_t npids __maybe_unused, pid_t *pids __maybe_unused)
3954{
3955	int err = 0;
3956#ifdef HAVE_LIBBPF_SUPPORT
3957	bool value = true;
3958	int map_fd = bpf_map__fd(map);
3959	size_t i;
3960
3961	for (i = 0; i < npids; ++i) {
3962		err = bpf_map_update_elem(map_fd, &pids[i], &value, BPF_ANY);
3963		if (err)
3964			break;
3965	}
3966#endif
3967	return err;
3968}
3969
3970static int trace__set_filter_loop_pids(struct trace *trace)
3971{
3972	unsigned int nr = 1, err;
3973	pid_t pids[32] = {
3974		getpid(),
3975	};
3976	struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
3977
3978	while (thread && nr < ARRAY_SIZE(pids)) {
3979		struct thread *parent = machine__find_thread(trace->host,
3980							     thread__ppid(thread),
3981							     thread__ppid(thread));
3982
3983		if (parent == NULL)
3984			break;
3985
3986		if (!strcmp(thread__comm_str(parent), "sshd") ||
3987		    strstarts(thread__comm_str(parent), "gnome-terminal")) {
3988			pids[nr++] = thread__tid(parent);
3989			break;
3990		}
3991		thread = parent;
3992	}
3993
3994	err = evlist__append_tp_filter_pids(trace->evlist, nr, pids);
3995	if (!err && trace->filter_pids.map)
3996		err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids);
3997
3998	return err;
3999}
4000
4001static int trace__set_filter_pids(struct trace *trace)
4002{
4003	int err = 0;
4004	/*
4005	 * Better not use !target__has_task() here because we need to cover the
4006	 * case where no threads were specified in the command line, but a
4007	 * workload was, and in that case we will fill in the thread_map when
4008	 * we fork the workload in evlist__prepare_workload.
4009	 */
4010	if (trace->filter_pids.nr > 0) {
4011		err = evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
4012						    trace->filter_pids.entries);
4013		if (!err && trace->filter_pids.map) {
4014			err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
4015						       trace->filter_pids.entries);
4016		}
4017	} else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) {
4018		err = trace__set_filter_loop_pids(trace);
4019	}
4020
4021	return err;
4022}
4023
4024static int __trace__deliver_event(struct trace *trace, union perf_event *event)
4025{
4026	struct evlist *evlist = trace->evlist;
4027	struct perf_sample sample;
4028	int err = evlist__parse_sample(evlist, event, &sample);
4029
4030	if (err)
4031		fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
4032	else
4033		trace__handle_event(trace, event, &sample);
4034
4035	return 0;
4036}
4037
4038static int __trace__flush_events(struct trace *trace)
4039{
4040	u64 first = ordered_events__first_time(&trace->oe.data);
4041	u64 flush = trace->oe.last - NSEC_PER_SEC;
4042
4043	/* Is there some thing to flush.. */
4044	if (first && first < flush)
4045		return ordered_events__flush_time(&trace->oe.data, flush);
4046
4047	return 0;
4048}
4049
4050static int trace__flush_events(struct trace *trace)
4051{
4052	return !trace->sort_events ? 0 : __trace__flush_events(trace);
4053}
4054
4055static int trace__deliver_event(struct trace *trace, union perf_event *event)
4056{
4057	int err;
4058
4059	if (!trace->sort_events)
4060		return __trace__deliver_event(trace, event);
4061
4062	err = evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
4063	if (err && err != -1)
4064		return err;
4065
4066	err = ordered_events__queue(&trace->oe.data, event, trace->oe.last, 0, NULL);
4067	if (err)
4068		return err;
4069
4070	return trace__flush_events(trace);
4071}
4072
4073static int ordered_events__deliver_event(struct ordered_events *oe,
4074					 struct ordered_event *event)
4075{
4076	struct trace *trace = container_of(oe, struct trace, oe.data);
4077
4078	return __trace__deliver_event(trace, event->event);
4079}
4080
4081static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg,
4082								   char **type)
4083{
4084	struct tep_format_field *field;
4085	struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
4086
4087	if (evsel->tp_format == NULL || fmt == NULL)
4088		return NULL;
4089
4090	for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt)
4091		if (strcmp(field->name, arg) == 0) {
4092			*type = field->type;
4093			return fmt;
4094		}
4095
4096	return NULL;
4097}
4098
4099static int trace__expand_filter(struct trace *trace, struct evsel *evsel)
4100{
4101	char *tok, *left = evsel->filter, *new_filter = evsel->filter;
4102
4103	while ((tok = strpbrk(left, "=<>!")) != NULL) {
4104		char *right = tok + 1, *right_end;
4105
4106		if (*right == '=')
4107			++right;
4108
4109		while (isspace(*right))
4110			++right;
4111
4112		if (*right == '\0')
4113			break;
4114
4115		while (!isalpha(*left))
4116			if (++left == tok) {
4117				/*
4118				 * Bail out, can't find the name of the argument that is being
4119				 * used in the filter, let it try to set this filter, will fail later.
4120				 */
4121				return 0;
4122			}
4123
4124		right_end = right + 1;
4125		while (isalnum(*right_end) || *right_end == '_' || *right_end == '|')
4126			++right_end;
4127
4128		if (isalpha(*right)) {
4129			struct syscall_arg_fmt *fmt;
4130			int left_size = tok - left,
4131			    right_size = right_end - right;
4132			char arg[128], *type;
4133
4134			while (isspace(left[left_size - 1]))
4135				--left_size;
4136
4137			scnprintf(arg, sizeof(arg), "%.*s", left_size, left);
4138
4139			fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg, &type);
4140			if (fmt == NULL) {
4141				pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n",
4142				       arg, evsel->name, evsel->filter);
4143				return -1;
4144			}
4145
4146			pr_debug2("trying to expand \"%s\" \"%.*s\" \"%.*s\" -> ",
4147				 arg, (int)(right - tok), tok, right_size, right);
4148
4149			if (fmt->strtoul) {
4150				u64 val;
4151				struct syscall_arg syscall_arg = {
4152					.trace = trace,
4153					.fmt   = fmt,
4154					.type_name = type,
4155					.parm = fmt->parm,
4156				};
4157
4158				if (fmt->strtoul(right, right_size, &syscall_arg, &val)) {
4159					char *n, expansion[19];
4160					int expansion_lenght = scnprintf(expansion, sizeof(expansion), "%#" PRIx64, val);
4161					int expansion_offset = right - new_filter;
4162
4163					pr_debug("%s", expansion);
4164
4165					if (asprintf(&n, "%.*s%s%s", expansion_offset, new_filter, expansion, right_end) < 0) {
4166						pr_debug(" out of memory!\n");
4167						free(new_filter);
4168						return -1;
4169					}
4170					if (new_filter != evsel->filter)
4171						free(new_filter);
4172					left = n + expansion_offset + expansion_lenght;
4173					new_filter = n;
4174				} else {
4175					pr_err("\"%.*s\" not found for \"%s\" in \"%s\", can't set filter \"%s\"\n",
4176					       right_size, right, arg, evsel->name, evsel->filter);
4177					return -1;
4178				}
4179			} else {
4180				pr_err("No resolver (strtoul) for \"%s\" in \"%s\", can't set filter \"%s\"\n",
4181				       arg, evsel->name, evsel->filter);
4182				return -1;
4183			}
4184
4185			pr_debug("\n");
4186		} else {
4187			left = right_end;
4188		}
4189	}
4190
4191	if (new_filter != evsel->filter) {
4192		pr_debug("New filter for %s: %s\n", evsel->name, new_filter);
4193		evsel__set_filter(evsel, new_filter);
4194		free(new_filter);
4195	}
4196
4197	return 0;
4198}
4199
4200static int trace__expand_filters(struct trace *trace, struct evsel **err_evsel)
4201{
4202	struct evlist *evlist = trace->evlist;
4203	struct evsel *evsel;
4204
4205	evlist__for_each_entry(evlist, evsel) {
4206		if (evsel->filter == NULL)
4207			continue;
4208
4209		if (trace__expand_filter(trace, evsel)) {
4210			*err_evsel = evsel;
4211			return -1;
4212		}
4213	}
4214
4215	return 0;
4216}
4217
4218static int trace__run(struct trace *trace, int argc, const char **argv)
4219{
4220	struct evlist *evlist = trace->evlist;
4221	struct evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
4222	int err = -1, i;
4223	unsigned long before;
4224	const bool forks = argc > 0;
4225	bool draining = false;
4226
4227	trace->live = true;
4228
4229	if (!trace->raw_augmented_syscalls) {
4230		if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
4231			goto out_error_raw_syscalls;
4232
4233		if (trace->trace_syscalls)
4234			trace->vfs_getname = evlist__add_vfs_getname(evlist);
4235	}
4236
4237	if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
4238		pgfault_maj = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
4239		if (pgfault_maj == NULL)
4240			goto out_error_mem;
4241		evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
4242		evlist__add(evlist, pgfault_maj);
4243	}
4244
4245	if ((trace->trace_pgfaults & TRACE_PFMIN)) {
4246		pgfault_min = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
4247		if (pgfault_min == NULL)
4248			goto out_error_mem;
4249		evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
4250		evlist__add(evlist, pgfault_min);
4251	}
4252
4253	/* Enable ignoring missing threads when -u/-p option is defined. */
4254	trace->opts.ignore_missing_thread = trace->opts.target.uid != UINT_MAX || trace->opts.target.pid;
4255
4256	if (trace->sched &&
4257	    evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
 
4258		goto out_error_sched_stat_runtime;
4259	/*
4260	 * If a global cgroup was set, apply it to all the events without an
4261	 * explicit cgroup. I.e.:
4262	 *
4263	 * 	trace -G A -e sched:*switch
4264	 *
4265	 * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
4266	 * _and_ sched:sched_switch to the 'A' cgroup, while:
4267	 *
4268	 * trace -e sched:*switch -G A
4269	 *
4270	 * will only set the sched:sched_switch event to the 'A' cgroup, all the
4271	 * other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
4272	 * a cgroup (on the root cgroup, sys wide, etc).
4273	 *
4274	 * Multiple cgroups:
4275	 *
4276	 * trace -G A -e sched:*switch -G B
4277	 *
4278	 * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
4279	 * to the 'B' cgroup.
4280	 *
4281	 * evlist__set_default_cgroup() grabs a reference of the passed cgroup
4282	 * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL.
4283	 */
4284	if (trace->cgroup)
4285		evlist__set_default_cgroup(trace->evlist, trace->cgroup);
4286
4287	err = evlist__create_maps(evlist, &trace->opts.target);
4288	if (err < 0) {
4289		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
4290		goto out_delete_evlist;
4291	}
4292
4293	err = trace__symbols_init(trace, evlist);
4294	if (err < 0) {
4295		fprintf(trace->output, "Problems initializing symbol libraries!\n");
4296		goto out_delete_evlist;
4297	}
4298
4299	evlist__config(evlist, &trace->opts, &callchain_param);
 
 
 
4300
4301	if (forks) {
4302		err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL);
 
4303		if (err < 0) {
4304			fprintf(trace->output, "Couldn't run the workload!\n");
4305			goto out_delete_evlist;
4306		}
4307		workload_pid = evlist->workload.pid;
4308	}
4309
4310	err = evlist__open(evlist);
4311	if (err < 0)
4312		goto out_error_open;
4313#ifdef HAVE_BPF_SKEL
4314	if (trace->syscalls.events.bpf_output) {
4315		struct perf_cpu cpu;
4316
4317		/*
4318		 * Set up the __augmented_syscalls__ BPF map to hold for each
4319		 * CPU the bpf-output event's file descriptor.
4320		 */
4321		perf_cpu_map__for_each_cpu(cpu, i, trace->syscalls.events.bpf_output->core.cpus) {
4322			bpf_map__update_elem(trace->skel->maps.__augmented_syscalls__,
4323					&cpu.cpu, sizeof(int),
4324					xyarray__entry(trace->syscalls.events.bpf_output->core.fd,
4325						       cpu.cpu, 0),
4326					sizeof(__u32), BPF_ANY);
4327		}
4328	}
4329
4330	if (trace->skel)
4331		trace->filter_pids.map = trace->skel->maps.pids_filtered;
4332#endif
4333	err = trace__set_filter_pids(trace);
 
 
 
 
 
 
 
4334	if (err < 0)
4335		goto out_error_mem;
4336
4337#ifdef HAVE_BPF_SKEL
4338	if (trace->skel && trace->skel->progs.sys_enter)
4339		trace__init_syscalls_bpf_prog_array_maps(trace);
4340#endif
4341
4342	if (trace->ev_qualifier_ids.nr > 0) {
4343		err = trace__set_ev_qualifier_filter(trace);
4344		if (err < 0)
4345			goto out_errno;
4346
4347		if (trace->syscalls.events.sys_exit) {
4348			pr_debug("event qualifier tracepoint filter: %s\n",
4349				 trace->syscalls.events.sys_exit->filter);
4350		}
4351	}
4352
4353	/*
4354	 * If the "close" syscall is not traced, then we will not have the
4355	 * opportunity to, in syscall_arg__scnprintf_close_fd() invalidate the
4356	 * fd->pathname table and were ending up showing the last value set by
4357	 * syscalls opening a pathname and associating it with a descriptor or
4358	 * reading it from /proc/pid/fd/ in cases where that doesn't make
4359	 * sense.
4360	 *
4361	 *  So just disable this beautifier (SCA_FD, SCA_FDAT) when 'close' is
4362	 *  not in use.
4363	 */
4364	trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close"));
4365
4366	err = trace__expand_filters(trace, &evsel);
4367	if (err)
4368		goto out_delete_evlist;
4369	err = evlist__apply_filters(evlist, &evsel, &trace->opts.target);
4370	if (err < 0)
4371		goto out_error_apply_filters;
4372
4373	err = evlist__mmap(evlist, trace->opts.mmap_pages);
4374	if (err < 0)
4375		goto out_error_mmap;
4376
4377	if (!target__none(&trace->opts.target) && !trace->opts.target.initial_delay)
4378		evlist__enable(evlist);
4379
4380	if (forks)
4381		evlist__start_workload(evlist);
4382
4383	if (trace->opts.target.initial_delay) {
4384		usleep(trace->opts.target.initial_delay * 1000);
4385		evlist__enable(evlist);
4386	}
4387
4388	trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
4389		perf_thread_map__nr(evlist->core.threads) > 1 ||
4390		evlist__first(evlist)->core.attr.inherit;
4391
4392	/*
4393	 * Now that we already used evsel->core.attr to ask the kernel to setup the
4394	 * events, lets reuse evsel->core.attr.sample_max_stack as the limit in
4395	 * trace__resolve_callchain(), allowing per-event max-stack settings
4396	 * to override an explicitly set --max-stack global setting.
4397	 */
4398	evlist__for_each_entry(evlist, evsel) {
4399		if (evsel__has_callchain(evsel) &&
4400		    evsel->core.attr.sample_max_stack == 0)
4401			evsel->core.attr.sample_max_stack = trace->max_stack;
4402	}
4403again:
4404	before = trace->nr_events;
4405
4406	for (i = 0; i < evlist->core.nr_mmaps; i++) {
4407		union perf_event *event;
4408		struct mmap *md;
4409
4410		md = &evlist->mmap[i];
4411		if (perf_mmap__read_init(&md->core) < 0)
4412			continue;
4413
4414		while ((event = perf_mmap__read_event(&md->core)) != NULL) {
4415			++trace->nr_events;
4416
4417			err = trace__deliver_event(trace, event);
4418			if (err)
4419				goto out_disable;
 
 
4420
4421			perf_mmap__consume(&md->core);
 
 
4422
4423			if (interrupted)
4424				goto out_disable;
4425
4426			if (done && !draining) {
4427				evlist__disable(evlist);
4428				draining = true;
4429			}
4430		}
4431		perf_mmap__read_done(&md->core);
4432	}
4433
4434	if (trace->nr_events == before) {
4435		int timeout = done ? 100 : -1;
4436
4437		if (!draining && evlist__poll(evlist, timeout) > 0) {
4438			if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
4439				draining = true;
4440
4441			goto again;
4442		} else {
4443			if (trace__flush_events(trace))
4444				goto out_disable;
4445		}
4446	} else {
4447		goto again;
4448	}
4449
4450out_disable:
4451	thread__zput(trace->current);
4452
4453	evlist__disable(evlist);
4454
4455	if (trace->sort_events)
4456		ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
4457
4458	if (!err) {
4459		if (trace->summary)
4460			trace__fprintf_thread_summary(trace, trace->output);
4461
4462		if (trace->show_tool_stats) {
4463			fprintf(trace->output, "Stats:\n "
4464					       " vfs_getname : %" PRIu64 "\n"
4465					       " proc_getname: %" PRIu64 "\n",
4466				trace->stats.vfs_getname,
4467				trace->stats.proc_getname);
4468		}
4469	}
4470
4471out_delete_evlist:
4472	trace__symbols__exit(trace);
4473	evlist__free_syscall_tp_fields(evlist);
4474	evlist__delete(evlist);
4475	cgroup__put(trace->cgroup);
4476	trace->evlist = NULL;
4477	trace->live = false;
4478	return err;
4479{
4480	char errbuf[BUFSIZ];
4481
4482out_error_sched_stat_runtime:
4483	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
4484	goto out_error;
4485
4486out_error_raw_syscalls:
4487	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
4488	goto out_error;
4489
4490out_error_mmap:
4491	evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
4492	goto out_error;
4493
4494out_error_open:
4495	evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
4496
4497out_error:
4498	fprintf(trace->output, "%s\n", errbuf);
4499	goto out_delete_evlist;
4500
4501out_error_apply_filters:
4502	fprintf(trace->output,
4503		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
4504		evsel->filter, evsel__name(evsel), errno,
4505		str_error_r(errno, errbuf, sizeof(errbuf)));
4506	goto out_delete_evlist;
4507}
4508out_error_mem:
4509	fprintf(trace->output, "Not enough memory to run!\n");
4510	goto out_delete_evlist;
4511
4512out_errno:
4513	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
4514	goto out_delete_evlist;
4515}
4516
4517static int trace__replay(struct trace *trace)
4518{
4519	const struct evsel_str_handler handlers[] = {
4520		{ "probe:vfs_getname",	     trace__vfs_getname, },
4521	};
4522	struct perf_data data = {
4523		.path  = input_name,
4524		.mode  = PERF_DATA_MODE_READ,
4525		.force = trace->force,
4526	};
4527	struct perf_session *session;
4528	struct evsel *evsel;
4529	int err = -1;
4530
4531	trace->tool.sample	  = trace__process_sample;
4532	trace->tool.mmap	  = perf_event__process_mmap;
4533	trace->tool.mmap2	  = perf_event__process_mmap2;
4534	trace->tool.comm	  = perf_event__process_comm;
4535	trace->tool.exit	  = perf_event__process_exit;
4536	trace->tool.fork	  = perf_event__process_fork;
4537	trace->tool.attr	  = perf_event__process_attr;
4538	trace->tool.tracing_data  = perf_event__process_tracing_data;
4539	trace->tool.build_id	  = perf_event__process_build_id;
4540	trace->tool.namespaces	  = perf_event__process_namespaces;
4541
4542	trace->tool.ordered_events = true;
4543	trace->tool.ordering_requires_timestamps = true;
4544
4545	/* add tid to output */
4546	trace->multiple_threads = true;
4547
4548	session = perf_session__new(&data, &trace->tool);
4549	if (IS_ERR(session))
4550		return PTR_ERR(session);
4551
4552	if (trace->opts.target.pid)
4553		symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
4554
4555	if (trace->opts.target.tid)
4556		symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
4557
4558	if (symbol__init(&session->header.env) < 0)
4559		goto out;
4560
4561	trace->host = &session->machines.host;
4562
4563	err = perf_session__set_tracepoints_handlers(session, handlers);
4564	if (err)
4565		goto out;
4566
4567	evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter");
4568	trace->syscalls.events.sys_enter = evsel;
4569	/* older kernels have syscalls tp versus raw_syscalls */
4570	if (evsel == NULL)
4571		evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter");
 
4572
4573	if (evsel &&
4574	    (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
4575	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
4576		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
4577		goto out;
4578	}
4579
4580	evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit");
4581	trace->syscalls.events.sys_exit = evsel;
4582	if (evsel == NULL)
4583		evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit");
 
4584	if (evsel &&
4585	    (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
4586	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
4587		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
4588		goto out;
4589	}
4590
4591	evlist__for_each_entry(session->evlist, evsel) {
4592		if (evsel->core.attr.type == PERF_TYPE_SOFTWARE &&
4593		    (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
4594		     evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
4595		     evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS))
4596			evsel->handler = trace__pgfault;
4597	}
4598
 
 
 
 
4599	setup_pager();
4600
4601	err = perf_session__process_events(session);
4602	if (err)
4603		pr_err("Failed to process events, error %d", err);
4604
4605	else if (trace->summary)
4606		trace__fprintf_thread_summary(trace, trace->output);
4607
4608out:
4609	perf_session__delete(session);
4610
4611	return err;
4612}
4613
4614static size_t trace__fprintf_threads_header(FILE *fp)
4615{
4616	size_t printed;
4617
4618	printed  = fprintf(fp, "\n Summary of events:\n\n");
4619
4620	return printed;
4621}
4622
4623DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
4624	struct syscall_stats *stats;
4625	double		     msecs;
4626	int		     syscall;
4627)
4628{
4629	struct int_node *source = rb_entry(nd, struct int_node, rb_node);
4630	struct syscall_stats *stats = source->priv;
4631
4632	entry->syscall = source->i;
4633	entry->stats   = stats;
4634	entry->msecs   = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0;
4635}
4636
4637static size_t thread__dump_stats(struct thread_trace *ttrace,
4638				 struct trace *trace, FILE *fp)
4639{
 
4640	size_t printed = 0;
4641	struct syscall *sc;
4642	struct rb_node *nd;
4643	DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
4644
4645	if (syscall_stats == NULL)
4646		return 0;
4647
4648	printed += fprintf(fp, "\n");
4649
4650	printed += fprintf(fp, "   syscall            calls  errors  total       min       avg       max       stddev\n");
4651	printed += fprintf(fp, "                                     (msec)    (msec)    (msec)    (msec)        (%%)\n");
4652	printed += fprintf(fp, "   --------------- --------  ------ -------- --------- --------- ---------     ------\n");
4653
4654	resort_rb__for_each_entry(nd, syscall_stats) {
4655		struct syscall_stats *stats = syscall_stats_entry->stats;
 
4656		if (stats) {
4657			double min = (double)(stats->stats.min) / NSEC_PER_MSEC;
4658			double max = (double)(stats->stats.max) / NSEC_PER_MSEC;
4659			double avg = avg_stats(&stats->stats);
4660			double pct;
4661			u64 n = (u64)stats->stats.n;
4662
4663			pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0;
4664			avg /= NSEC_PER_MSEC;
4665
4666			sc = &trace->syscalls.table[syscall_stats_entry->syscall];
4667			printed += fprintf(fp, "   %-15s", sc->name);
4668			printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f",
4669					   n, stats->nr_failures, syscall_stats_entry->msecs, min, avg);
4670			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
 
4671
4672			if (trace->errno_summary && stats->nr_failures) {
4673				int e;
4674
4675				for (e = 0; e < stats->max_errno; ++e) {
4676					if (stats->errnos[e] != 0)
4677						fprintf(fp, "\t\t\t\t%s: %d\n", perf_env__arch_strerrno(trace->host->env, e + 1), stats->errnos[e]);
4678				}
4679			}
4680		}
4681	}
4682
4683	resort_rb__delete(syscall_stats);
4684	printed += fprintf(fp, "\n\n");
4685
4686	return printed;
4687}
4688
4689static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
 
 
 
 
 
 
 
4690{
4691	size_t printed = 0;
 
 
 
4692	struct thread_trace *ttrace = thread__priv(thread);
4693	double ratio;
4694
4695	if (ttrace == NULL)
4696		return 0;
4697
4698	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
4699
4700	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread__tid(thread));
4701	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
4702	printed += fprintf(fp, "%.1f%%", ratio);
4703	if (ttrace->pfmaj)
4704		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
4705	if (ttrace->pfmin)
4706		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
4707	if (trace->sched)
4708		printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
4709	else if (fputc('\n', fp) != EOF)
4710		++printed;
4711
4712	printed += thread__dump_stats(ttrace, trace, fp);
4713
4714	return printed;
4715}
4716
4717static unsigned long thread__nr_events(struct thread_trace *ttrace)
4718{
4719	return ttrace ? ttrace->nr_events : 0;
4720}
4721
4722static int trace_nr_events_cmp(void *priv __maybe_unused,
4723			       const struct list_head *la,
4724			       const struct list_head *lb)
4725{
4726	struct thread_list *a = list_entry(la, struct thread_list, list);
4727	struct thread_list *b = list_entry(lb, struct thread_list, list);
4728	unsigned long a_nr_events = thread__nr_events(thread__priv(a->thread));
4729	unsigned long b_nr_events = thread__nr_events(thread__priv(b->thread));
4730
4731	if (a_nr_events != b_nr_events)
4732		return a_nr_events < b_nr_events ? -1 : 1;
4733
4734	/* Identical number of threads, place smaller tids first. */
4735	return thread__tid(a->thread) < thread__tid(b->thread)
4736		? -1
4737		: (thread__tid(a->thread) > thread__tid(b->thread) ? 1 : 0);
4738}
4739
4740static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
4741{
4742	size_t printed = trace__fprintf_threads_header(fp);
4743	LIST_HEAD(threads);
4744
4745	if (machine__thread_list(trace->host, &threads) == 0) {
4746		struct thread_list *pos;
4747
4748		list_sort(NULL, &threads, trace_nr_events_cmp);
4749
4750		list_for_each_entry(pos, &threads, list)
4751			printed += trace__fprintf_thread(fp, pos->thread, trace);
4752	}
4753	thread_list__delete(&threads);
4754	return printed;
4755}
4756
4757static int trace__set_duration(const struct option *opt, const char *str,
4758			       int unset __maybe_unused)
4759{
4760	struct trace *trace = opt->value;
4761
4762	trace->duration_filter = atof(str);
4763	return 0;
4764}
4765
4766static int trace__set_filter_pids_from_option(const struct option *opt, const char *str,
4767					      int unset __maybe_unused)
4768{
4769	int ret = -1;
4770	size_t i;
4771	struct trace *trace = opt->value;
4772	/*
4773	 * FIXME: introduce a intarray class, plain parse csv and create a
4774	 * { int nr, int entries[] } struct...
4775	 */
4776	struct intlist *list = intlist__new(str);
4777
4778	if (list == NULL)
4779		return -1;
4780
4781	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
4782	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
4783
4784	if (trace->filter_pids.entries == NULL)
4785		goto out;
4786
4787	trace->filter_pids.entries[0] = getpid();
4788
4789	for (i = 1; i < trace->filter_pids.nr; ++i)
4790		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
4791
4792	intlist__delete(list);
4793	ret = 0;
4794out:
4795	return ret;
4796}
4797
4798static int trace__open_output(struct trace *trace, const char *filename)
4799{
4800	struct stat st;
4801
4802	if (!stat(filename, &st) && st.st_size) {
4803		char oldname[PATH_MAX];
4804
4805		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
4806		unlink(oldname);
4807		rename(filename, oldname);
4808	}
4809
4810	trace->output = fopen(filename, "w");
4811
4812	return trace->output == NULL ? -errno : 0;
4813}
4814
4815static int parse_pagefaults(const struct option *opt, const char *str,
4816			    int unset __maybe_unused)
4817{
4818	int *trace_pgfaults = opt->value;
4819
4820	if (strcmp(str, "all") == 0)
4821		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
4822	else if (strcmp(str, "maj") == 0)
4823		*trace_pgfaults |= TRACE_PFMAJ;
4824	else if (strcmp(str, "min") == 0)
4825		*trace_pgfaults |= TRACE_PFMIN;
4826	else
4827		return -1;
4828
4829	return 0;
4830}
4831
4832static void evlist__set_default_evsel_handler(struct evlist *evlist, void *handler)
4833{
4834	struct evsel *evsel;
4835
4836	evlist__for_each_entry(evlist, evsel) {
4837		if (evsel->handler == NULL)
4838			evsel->handler = handler;
4839	}
4840}
4841
4842static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name)
4843{
4844	struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
4845
4846	if (fmt) {
4847		const struct syscall_fmt *scfmt = syscall_fmt__find(name);
4848
4849		if (scfmt) {
4850			int skip = 0;
4851
4852			if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr") == 0 ||
4853			    strcmp(evsel->tp_format->format.fields->name, "nr") == 0)
4854				++skip;
4855
4856			memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt));
4857		}
4858	}
4859}
4860
4861static int evlist__set_syscall_tp_fields(struct evlist *evlist, bool *use_btf)
4862{
4863	struct evsel *evsel;
4864
4865	evlist__for_each_entry(evlist, evsel) {
4866		if (evsel->priv || !evsel->tp_format)
4867			continue;
4868
4869		if (strcmp(evsel->tp_format->system, "syscalls")) {
4870			evsel__init_tp_arg_scnprintf(evsel, use_btf);
4871			continue;
4872		}
4873
4874		if (evsel__init_syscall_tp(evsel))
4875			return -1;
4876
4877		if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
4878			struct syscall_tp *sc = __evsel__syscall_tp(evsel);
4879
4880			if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
4881				return -1;
4882
4883			evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_enter_") - 1);
4884		} else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
4885			struct syscall_tp *sc = __evsel__syscall_tp(evsel);
4886
4887			if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
4888				return -1;
4889
4890			evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_exit_") - 1);
4891		}
4892	}
4893
4894	return 0;
4895}
4896
4897/*
4898 * XXX: Hackish, just splitting the combined -e+--event (syscalls
4899 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
4900 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
4901 *
4902 * It'd be better to introduce a parse_options() variant that would return a
4903 * list with the terms it didn't match to an event...
4904 */
4905static int trace__parse_events_option(const struct option *opt, const char *str,
4906				      int unset __maybe_unused)
4907{
4908	struct trace *trace = (struct trace *)opt->value;
4909	const char *s = str;
4910	char *sep = NULL, *lists[2] = { NULL, NULL, };
4911	int len = strlen(str) + 1, err = -1, list, idx;
4912	char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
4913	char group_name[PATH_MAX];
4914	const struct syscall_fmt *fmt;
4915
4916	if (strace_groups_dir == NULL)
4917		return -1;
4918
4919	if (*s == '!') {
4920		++s;
4921		trace->not_ev_qualifier = true;
4922	}
4923
4924	while (1) {
4925		if ((sep = strchr(s, ',')) != NULL)
4926			*sep = '\0';
4927
4928		list = 0;
4929		if (syscalltbl__id(trace->sctbl, s) >= 0 ||
4930		    syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
4931			list = 1;
4932			goto do_concat;
4933		}
4934
4935		fmt = syscall_fmt__find_by_alias(s);
4936		if (fmt != NULL) {
4937			list = 1;
4938			s = fmt->name;
4939		} else {
4940			path__join(group_name, sizeof(group_name), strace_groups_dir, s);
4941			if (access(group_name, R_OK) == 0)
4942				list = 1;
4943		}
4944do_concat:
4945		if (lists[list]) {
4946			sprintf(lists[list] + strlen(lists[list]), ",%s", s);
4947		} else {
4948			lists[list] = malloc(len);
4949			if (lists[list] == NULL)
4950				goto out;
4951			strcpy(lists[list], s);
4952		}
4953
4954		if (!sep)
4955			break;
4956
4957		*sep = ',';
4958		s = sep + 1;
4959	}
4960
4961	if (lists[1] != NULL) {
4962		struct strlist_config slist_config = {
4963			.dirname = strace_groups_dir,
4964		};
4965
4966		trace->ev_qualifier = strlist__new(lists[1], &slist_config);
4967		if (trace->ev_qualifier == NULL) {
4968			fputs("Not enough memory to parse event qualifier", trace->output);
4969			goto out;
4970		}
4971
4972		if (trace__validate_ev_qualifier(trace))
4973			goto out;
4974		trace->trace_syscalls = true;
4975	}
4976
4977	err = 0;
4978
4979	if (lists[0]) {
4980		struct parse_events_option_args parse_events_option_args = {
4981			.evlistp = &trace->evlist,
4982		};
4983		struct option o = {
4984			.value = &parse_events_option_args,
4985		};
4986		err = parse_events_option(&o, lists[0], 0);
4987	}
4988out:
4989	free(strace_groups_dir);
4990	free(lists[0]);
4991	free(lists[1]);
4992	if (sep)
4993		*sep = ',';
4994
4995	return err;
4996}
4997
4998static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
4999{
5000	struct trace *trace = opt->value;
5001
5002	if (!list_empty(&trace->evlist->core.entries)) {
5003		struct option o = {
5004			.value = &trace->evlist,
5005		};
5006		return parse_cgroups(&o, str, unset);
5007	}
5008	trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
5009
5010	return 0;
5011}
5012
5013static int trace__config(const char *var, const char *value, void *arg)
5014{
5015	struct trace *trace = arg;
5016	int err = 0;
5017
5018	if (!strcmp(var, "trace.add_events")) {
5019		trace->perfconfig_events = strdup(value);
5020		if (trace->perfconfig_events == NULL) {
5021			pr_err("Not enough memory for %s\n", "trace.add_events");
5022			return -1;
5023		}
5024	} else if (!strcmp(var, "trace.show_timestamp")) {
5025		trace->show_tstamp = perf_config_bool(var, value);
5026	} else if (!strcmp(var, "trace.show_duration")) {
5027		trace->show_duration = perf_config_bool(var, value);
5028	} else if (!strcmp(var, "trace.show_arg_names")) {
5029		trace->show_arg_names = perf_config_bool(var, value);
5030		if (!trace->show_arg_names)
5031			trace->show_zeros = true;
5032	} else if (!strcmp(var, "trace.show_zeros")) {
5033		bool new_show_zeros = perf_config_bool(var, value);
5034		if (!trace->show_arg_names && !new_show_zeros) {
5035			pr_warning("trace.show_zeros has to be set when trace.show_arg_names=no\n");
5036			goto out;
5037		}
5038		trace->show_zeros = new_show_zeros;
5039	} else if (!strcmp(var, "trace.show_prefix")) {
5040		trace->show_string_prefix = perf_config_bool(var, value);
5041	} else if (!strcmp(var, "trace.no_inherit")) {
5042		trace->opts.no_inherit = perf_config_bool(var, value);
5043	} else if (!strcmp(var, "trace.args_alignment")) {
5044		int args_alignment = 0;
5045		if (perf_config_int(&args_alignment, var, value) == 0)
5046			trace->args_alignment = args_alignment;
5047	} else if (!strcmp(var, "trace.tracepoint_beautifiers")) {
5048		if (strcasecmp(value, "libtraceevent") == 0)
5049			trace->libtraceevent_print = true;
5050		else if (strcasecmp(value, "libbeauty") == 0)
5051			trace->libtraceevent_print = false;
5052	}
5053out:
5054	return err;
5055}
5056
5057static void trace__exit(struct trace *trace)
5058{
5059	int i;
5060
5061	strlist__delete(trace->ev_qualifier);
5062	zfree(&trace->ev_qualifier_ids.entries);
5063	if (trace->syscalls.table) {
5064		for (i = 0; i <= trace->sctbl->syscalls.max_id; i++)
5065			syscall__exit(&trace->syscalls.table[i]);
5066		zfree(&trace->syscalls.table);
5067	}
5068	syscalltbl__delete(trace->sctbl);
5069	zfree(&trace->perfconfig_events);
5070}
5071
5072#ifdef HAVE_BPF_SKEL
5073static int bpf__setup_bpf_output(struct evlist *evlist)
5074{
5075	int err = parse_event(evlist, "bpf-output/no-inherit=1,name=__augmented_syscalls__/");
5076
5077	if (err)
5078		pr_debug("ERROR: failed to create the \"__augmented_syscalls__\" bpf-output event\n");
5079
5080	return err;
5081}
5082#endif
5083
5084int cmd_trace(int argc, const char **argv)
5085{
5086	const char *trace_usage[] = {
5087		"perf trace [<options>] [<command>]",
5088		"perf trace [<options>] -- <command> [<options>]",
5089		"perf trace record [<options>] [<command>]",
5090		"perf trace record [<options>] -- <command> [<options>]",
5091		NULL
5092	};
5093	struct trace trace = {
 
 
 
 
 
 
 
5094		.opts = {
5095			.target = {
5096				.uid	   = UINT_MAX,
5097				.uses_mmap = true,
5098			},
5099			.user_freq     = UINT_MAX,
5100			.user_interval = ULLONG_MAX,
5101			.no_buffering  = true,
5102			.mmap_pages    = UINT_MAX,
 
5103		},
5104		.output = stderr,
5105		.show_comm = true,
5106		.show_tstamp = true,
5107		.show_duration = true,
5108		.show_arg_names = true,
5109		.args_alignment = 70,
5110		.trace_syscalls = false,
5111		.kernel_syscallchains = false,
5112		.max_stack = UINT_MAX,
5113		.max_events = ULONG_MAX,
5114	};
5115	const char *output_name = NULL;
 
5116	const struct option trace_options[] = {
5117	OPT_CALLBACK('e', "event", &trace, "event",
5118		     "event/syscall selector. use 'perf list' to list available events",
5119		     trace__parse_events_option),
5120	OPT_CALLBACK(0, "filter", &trace.evlist, "filter",
5121		     "event filter", parse_filter),
5122	OPT_BOOLEAN(0, "comm", &trace.show_comm,
5123		    "show the thread COMM next to its id"),
5124	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
5125	OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
5126		     trace__parse_events_option),
5127	OPT_STRING('o', "output", &output_name, "file", "output file name"),
5128	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
5129	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
5130		    "trace events on existing process id"),
5131	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
5132		    "trace events on existing thread id"),
5133	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
5134		     "pids to filter (by the kernel)", trace__set_filter_pids_from_option),
5135	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
5136		    "system-wide collection from all CPUs"),
5137	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
5138		    "list of cpus to monitor"),
5139	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
5140		    "child tasks do not inherit counters"),
5141	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
5142		     "number of mmap data pages", evlist__parse_mmap_pages),
 
5143	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
5144		   "user to profile"),
5145	OPT_CALLBACK(0, "duration", &trace, "float",
5146		     "show only events with duration > N.M ms",
5147		     trace__set_duration),
5148	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
5149	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
5150	OPT_BOOLEAN('T', "time", &trace.full_time,
5151		    "Show full timestamp, not time relative to first start"),
5152	OPT_BOOLEAN(0, "failure", &trace.failure_only,
5153		    "Show only syscalls that failed"),
5154	OPT_BOOLEAN('s', "summary", &trace.summary_only,
5155		    "Show only syscall summary with statistics"),
5156	OPT_BOOLEAN('S', "with-summary", &trace.summary,
5157		    "Show all syscalls and summary with statistics"),
5158	OPT_BOOLEAN(0, "errno-summary", &trace.errno_summary,
5159		    "Show errno stats per syscall, use with -s or -S"),
5160	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
5161		     "Trace pagefaults", parse_pagefaults, "maj"),
5162	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
5163	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
5164	OPT_CALLBACK(0, "call-graph", &trace.opts,
5165		     "record_mode[,record_size]", record_callchain_help,
5166		     &record_parse_callchain_opt),
5167	OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print,
5168		    "Use libtraceevent to print the tracepoint arguments."),
5169	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
5170		    "Show the kernel callchains on the syscall exit path"),
5171	OPT_ULONG(0, "max-events", &trace.max_events,
5172		"Set the maximum number of events to print, exit after that is reached. "),
5173	OPT_UINTEGER(0, "min-stack", &trace.min_stack,
5174		     "Set the minimum stack depth when parsing the callchain, "
5175		     "anything below the specified depth will be ignored."),
5176	OPT_UINTEGER(0, "max-stack", &trace.max_stack,
5177		     "Set the maximum stack depth when parsing the callchain, "
5178		     "anything beyond the specified depth will be ignored. "
5179		     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
5180	OPT_BOOLEAN(0, "sort-events", &trace.sort_events,
5181			"Sort batch of events before processing, use if getting out of order events"),
5182	OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
5183			"print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
5184	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
5185			"per thread proc mmap processing timeout in ms"),
5186	OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
5187		     trace__parse_cgroups),
5188	OPT_INTEGER('D', "delay", &trace.opts.target.initial_delay,
5189		     "ms to wait before starting measurement after program "
5190		     "start"),
5191	OPT_BOOLEAN(0, "force-btf", &trace.force_btf, "Prefer btf_dump general pretty printer"
5192		       "to customized ones"),
5193	OPTS_EVSWITCH(&trace.evswitch),
5194	OPT_END()
5195	};
5196	bool __maybe_unused max_stack_user_set = true;
5197	bool mmap_pages_user_set = true;
5198	struct evsel *evsel;
5199	const char * const trace_subcommands[] = { "record", NULL };
5200	int err = -1;
5201	char bf[BUFSIZ];
5202	struct sigaction sigchld_act;
5203
5204	signal(SIGSEGV, sighandler_dump_stack);
5205	signal(SIGFPE, sighandler_dump_stack);
5206	signal(SIGINT, sighandler_interrupt);
5207
5208	memset(&sigchld_act, 0, sizeof(sigchld_act));
5209	sigchld_act.sa_flags = SA_SIGINFO;
5210	sigchld_act.sa_sigaction = sighandler_chld;
5211	sigaction(SIGCHLD, &sigchld_act, NULL);
5212
5213	trace.evlist = evlist__new();
5214	trace.sctbl = syscalltbl__new();
5215
5216	if (trace.evlist == NULL || trace.sctbl == NULL) {
5217		pr_err("Not enough memory to run!\n");
5218		err = -ENOMEM;
5219		goto out;
5220	}
5221
5222	/*
5223	 * Parsing .perfconfig may entail creating a BPF event, that may need
5224	 * to create BPF maps, so bump RLIM_MEMLOCK as the default 64K setting
5225	 * is too small. This affects just this process, not touching the
5226	 * global setting. If it fails we'll get something in 'perf trace -v'
5227	 * to help diagnose the problem.
5228	 */
5229	rlimit__bump_memlock();
5230
5231	err = perf_config(trace__config, &trace);
5232	if (err)
5233		goto out;
5234
5235	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
5236				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
5237
5238	/*
5239	 * Here we already passed thru trace__parse_events_option() and it has
5240	 * already figured out if -e syscall_name, if not but if --event
5241	 * foo:bar was used, the user is interested _just_ in those, say,
5242	 * tracepoint events, not in the strace-like syscall-name-based mode.
5243	 *
5244	 * This is important because we need to check if strace-like mode is
5245	 * needed to decided if we should filter out the eBPF
5246	 * __augmented_syscalls__ code, if it is in the mix, say, via
5247	 * .perfconfig trace.add_events, and filter those out.
5248	 */
5249	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
5250	    trace.evlist->core.nr_entries == 0 /* Was --events used? */) {
5251		trace.trace_syscalls = true;
5252	}
5253	/*
5254	 * Now that we have --verbose figured out, lets see if we need to parse
5255	 * events from .perfconfig, so that if those events fail parsing, say some
5256	 * BPF program fails, then we'll be able to use --verbose to see what went
5257	 * wrong in more detail.
5258	 */
5259	if (trace.perfconfig_events != NULL) {
5260		struct parse_events_error parse_err;
5261
5262		parse_events_error__init(&parse_err);
5263		err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err);
5264		if (err)
5265			parse_events_error__print(&parse_err, trace.perfconfig_events);
5266		parse_events_error__exit(&parse_err);
5267		if (err)
5268			goto out;
5269	}
5270
5271	if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
5272		usage_with_options_msg(trace_usage, trace_options,
5273				       "cgroup monitoring only available in system-wide mode");
5274	}
5275
5276#ifdef HAVE_BPF_SKEL
5277	if (!trace.trace_syscalls)
5278		goto skip_augmentation;
5279
5280	if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) {
5281		pr_debug("Syscall augmentation fails with record, disabling augmentation");
5282		goto skip_augmentation;
5283	}
5284
5285	trace.skel = augmented_raw_syscalls_bpf__open();
5286	if (!trace.skel) {
5287		pr_debug("Failed to open augmented syscalls BPF skeleton");
5288	} else {
5289		/*
5290		 * Disable attaching the BPF programs except for sys_enter and
5291		 * sys_exit that tail call into this as necessary.
5292		 */
5293		struct bpf_program *prog;
5294
5295		bpf_object__for_each_program(prog, trace.skel->obj) {
5296			if (prog != trace.skel->progs.sys_enter && prog != trace.skel->progs.sys_exit)
5297				bpf_program__set_autoattach(prog, /*autoattach=*/false);
5298		}
5299
5300		err = augmented_raw_syscalls_bpf__load(trace.skel);
5301
5302		if (err < 0) {
5303			libbpf_strerror(err, bf, sizeof(bf));
5304			pr_debug("Failed to load augmented syscalls BPF skeleton: %s\n", bf);
5305		} else {
5306			augmented_raw_syscalls_bpf__attach(trace.skel);
5307			trace__add_syscall_newtp(&trace);
5308		}
5309	}
5310
5311	err = bpf__setup_bpf_output(trace.evlist);
5312	if (err) {
5313		libbpf_strerror(err, bf, sizeof(bf));
5314		pr_err("ERROR: Setup BPF output event failed: %s\n", bf);
5315		goto out;
5316	}
5317	trace.syscalls.events.bpf_output = evlist__last(trace.evlist);
5318	assert(evsel__name_is(trace.syscalls.events.bpf_output, "__augmented_syscalls__"));
5319skip_augmentation:
5320#endif
5321	err = -1;
5322
5323	if (trace.trace_pgfaults) {
5324		trace.opts.sample_address = true;
5325		trace.opts.sample_time = true;
5326	}
5327
5328	if (trace.opts.mmap_pages == UINT_MAX)
5329		mmap_pages_user_set = false;
5330
5331	if (trace.max_stack == UINT_MAX) {
5332		trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
5333		max_stack_user_set = false;
5334	}
5335
5336#ifdef HAVE_DWARF_UNWIND_SUPPORT
5337	if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
5338		record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
5339	}
5340#endif
5341
5342	if (callchain_param.enabled) {
5343		if (!mmap_pages_user_set && geteuid() == 0)
5344			trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
5345
5346		symbol_conf.use_callchain = true;
5347	}
5348
5349	if (trace.evlist->core.nr_entries > 0) {
5350		bool use_btf = false;
5351
5352		evlist__set_default_evsel_handler(trace.evlist, trace__event_handler);
5353		if (evlist__set_syscall_tp_fields(trace.evlist, &use_btf)) {
5354			perror("failed to set syscalls:* tracepoint fields");
5355			goto out;
5356		}
5357
5358		if (use_btf)
5359			trace__load_vmlinux_btf(&trace);
5360	}
5361
5362	if (trace.sort_events) {
5363		ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace);
5364		ordered_events__set_copy_on_queue(&trace.oe.data, true);
5365	}
5366
5367	/*
5368	 * If we are augmenting syscalls, then combine what we put in the
5369	 * __augmented_syscalls__ BPF map with what is in the
5370	 * syscalls:sys_exit_FOO tracepoints, i.e. just like we do without BPF,
5371	 * combining raw_syscalls:sys_enter with raw_syscalls:sys_exit.
5372	 *
5373	 * We'll switch to look at two BPF maps, one for sys_enter and the
5374	 * other for sys_exit when we start augmenting the sys_exit paths with
5375	 * buffers that are being copied from kernel to userspace, think 'read'
5376	 * syscall.
5377	 */
5378	if (trace.syscalls.events.bpf_output) {
5379		evlist__for_each_entry(trace.evlist, evsel) {
5380			bool raw_syscalls_sys_exit = evsel__name_is(evsel, "raw_syscalls:sys_exit");
5381
5382			if (raw_syscalls_sys_exit) {
5383				trace.raw_augmented_syscalls = true;
5384				goto init_augmented_syscall_tp;
5385			}
5386
5387			if (trace.syscalls.events.bpf_output->priv == NULL &&
5388			    strstr(evsel__name(evsel), "syscalls:sys_enter")) {
5389				struct evsel *augmented = trace.syscalls.events.bpf_output;
5390				if (evsel__init_augmented_syscall_tp(augmented, evsel) ||
5391				    evsel__init_augmented_syscall_tp_args(augmented))
5392					goto out;
5393				/*
5394				 * Augmented is __augmented_syscalls__ BPF_OUTPUT event
5395				 * Above we made sure we can get from the payload the tp fields
5396				 * that we get from syscalls:sys_enter tracefs format file.
5397				 */
5398				augmented->handler = trace__sys_enter;
5399				/*
5400				 * Now we do the same for the *syscalls:sys_enter event so that
5401				 * if we handle it directly, i.e. if the BPF prog returns 0 so
5402				 * as not to filter it, then we'll handle it just like we would
5403				 * for the BPF_OUTPUT one:
5404				 */
5405				if (evsel__init_augmented_syscall_tp(evsel, evsel) ||
5406				    evsel__init_augmented_syscall_tp_args(evsel))
5407					goto out;
5408				evsel->handler = trace__sys_enter;
5409			}
5410
5411			if (strstarts(evsel__name(evsel), "syscalls:sys_exit_")) {
5412				struct syscall_tp *sc;
5413init_augmented_syscall_tp:
5414				if (evsel__init_augmented_syscall_tp(evsel, evsel))
5415					goto out;
5416				sc = __evsel__syscall_tp(evsel);
5417				/*
5418				 * For now with BPF raw_augmented we hook into
5419				 * raw_syscalls:sys_enter and there we get all
5420				 * 6 syscall args plus the tracepoint common
5421				 * fields and the syscall_nr (another long).
5422				 * So we check if that is the case and if so
5423				 * don't look after the sc->args_size but
5424				 * always after the full raw_syscalls:sys_enter
5425				 * payload, which is fixed.
5426				 *
5427				 * We'll revisit this later to pass
5428				 * s->args_size to the BPF augmenter (now
5429				 * tools/perf/examples/bpf/augmented_raw_syscalls.c,
5430				 * so that it copies only what we need for each
5431				 * syscall, like what happens when we use
5432				 * syscalls:sys_enter_NAME, so that we reduce
5433				 * the kernel/userspace traffic to just what is
5434				 * needed for each syscall.
5435				 */
5436				if (trace.raw_augmented_syscalls)
5437					trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
5438				evsel__init_augmented_syscall_tp_ret(evsel);
5439				evsel->handler = trace__sys_exit;
5440			}
5441		}
5442	}
5443
5444	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
5445		return trace__record(&trace, argc-1, &argv[1]);
5446
5447	/* Using just --errno-summary will trigger --summary */
5448	if (trace.errno_summary && !trace.summary && !trace.summary_only)
5449		trace.summary_only = true;
5450
5451	/* summary_only implies summary option, but don't overwrite summary if set */
5452	if (trace.summary_only)
5453		trace.summary = trace.summary_only;
5454
5455	/* Keep exited threads, otherwise information might be lost for summary */
5456	if (trace.summary)
5457		symbol_conf.keep_exited_threads = true;
 
 
5458
5459	if (output_name != NULL) {
5460		err = trace__open_output(&trace, output_name);
5461		if (err < 0) {
5462			perror("failed to create output file");
5463			goto out;
5464		}
5465	}
5466
5467	err = evswitch__init(&trace.evswitch, trace.evlist, stderr);
5468	if (err)
5469		goto out_close;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5470
5471	err = target__validate(&trace.opts.target);
5472	if (err) {
5473		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
5474		fprintf(trace.output, "%s", bf);
5475		goto out_close;
5476	}
5477
5478	err = target__parse_uid(&trace.opts.target);
5479	if (err) {
5480		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
5481		fprintf(trace.output, "%s", bf);
5482		goto out_close;
5483	}
5484
5485	if (!argc && target__none(&trace.opts.target))
5486		trace.opts.target.system_wide = true;
5487
5488	if (input_name)
5489		err = trace__replay(&trace);
5490	else
5491		err = trace__run(&trace, argc, argv);
5492
5493out_close:
5494	if (output_name != NULL)
5495		fclose(trace.output);
5496out:
5497	trace__exit(&trace);
5498#ifdef HAVE_BPF_SKEL
5499	augmented_raw_syscalls_bpf__destroy(trace.skel);
5500#endif
5501	return err;
5502}