Linux Audio

Check our new training course

Loading...
v6.13.7
   1// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
   2/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
   3#define _GNU_SOURCE
   4#include <argp.h>
   5#include <libgen.h>
   6#include <string.h>
   7#include <stdlib.h>
 
   8#include <sched.h>
   9#include <pthread.h>
  10#include <dirent.h>
  11#include <signal.h>
  12#include <fcntl.h>
  13#include <unistd.h>
  14#include <sys/time.h>
  15#include <sys/sysinfo.h>
  16#include <sys/stat.h>
  17#include <bpf/libbpf.h>
  18#include <bpf/btf.h>
  19#include <bpf/bpf.h>
  20#include <libelf.h>
  21#include <gelf.h>
  22#include <float.h>
  23#include <math.h>
  24#include <limits.h>
  25
  26#ifndef ARRAY_SIZE
  27#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
  28#endif
  29
  30enum stat_id {
  31	VERDICT,
  32	DURATION,
  33	TOTAL_INSNS,
  34	TOTAL_STATES,
  35	PEAK_STATES,
  36	MAX_STATES_PER_INSN,
  37	MARK_READ_MAX_LEN,
  38
  39	FILE_NAME,
  40	PROG_NAME,
  41
  42	ALL_STATS_CNT,
  43	NUM_STATS_CNT = FILE_NAME - VERDICT,
  44};
  45
  46/* In comparison mode each stat can specify up to four different values:
  47 *   - A side value;
  48 *   - B side value;
  49 *   - absolute diff value;
  50 *   - relative (percentage) diff value.
  51 *
  52 * When specifying stat specs in comparison mode, user can use one of the
  53 * following variant suffixes to specify which exact variant should be used for
  54 * ordering or filtering:
  55 *   - `_a` for A side value;
  56 *   - `_b` for B side value;
  57 *   - `_diff` for absolute diff value;
  58 *   - `_pct` for relative (percentage) diff value.
  59 *
  60 * If no variant suffix is provided, then `_b` (control data) is assumed.
  61 *
  62 * As an example, let's say instructions stat has the following output:
  63 *
  64 * Insns (A)  Insns (B)  Insns   (DIFF)
  65 * ---------  ---------  --------------
  66 * 21547      20920       -627 (-2.91%)
  67 *
  68 * Then:
  69 *   - 21547 is A side value (insns_a);
  70 *   - 20920 is B side value (insns_b);
  71 *   - -627 is absolute diff value (insns_diff);
  72 *   - -2.91% is relative diff value (insns_pct).
  73 *
  74 * For verdict there is no verdict_pct variant.
  75 * For file and program name, _a and _b variants are equivalent and there are
  76 * no _diff or _pct variants.
  77 */
  78enum stat_variant {
  79	VARIANT_A,
  80	VARIANT_B,
  81	VARIANT_DIFF,
  82	VARIANT_PCT,
  83};
  84
  85struct verif_stats {
  86	char *file_name;
  87	char *prog_name;
  88
  89	long stats[NUM_STATS_CNT];
  90};
  91
  92/* joined comparison mode stats */
  93struct verif_stats_join {
  94	char *file_name;
  95	char *prog_name;
  96
  97	const struct verif_stats *stats_a;
  98	const struct verif_stats *stats_b;
  99};
 100
 101struct stat_specs {
 102	int spec_cnt;
 103	enum stat_id ids[ALL_STATS_CNT];
 104	enum stat_variant variants[ALL_STATS_CNT];
 105	bool asc[ALL_STATS_CNT];
 106	bool abs[ALL_STATS_CNT];
 107	int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
 108};
 109
 110enum resfmt {
 111	RESFMT_TABLE,
 112	RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
 113	RESFMT_CSV,
 114};
 115
 116enum filter_kind {
 117	FILTER_NAME,
 118	FILTER_STAT,
 119};
 120
 121enum operator_kind {
 122	OP_EQ,		/* == or = */
 123	OP_NEQ,		/* != or <> */
 124	OP_LT,		/* < */
 125	OP_LE,		/* <= */
 126	OP_GT,		/* > */
 127	OP_GE,		/* >= */
 128};
 129
 130struct filter {
 131	enum filter_kind kind;
 132	/* FILTER_NAME */
 133	char *any_glob;
 134	char *file_glob;
 135	char *prog_glob;
 136	/* FILTER_STAT */
 137	enum operator_kind op;
 138	int stat_id;
 139	enum stat_variant stat_var;
 140	long value;
 141	bool abs;
 142};
 143
 144static struct env {
 145	char **filenames;
 146	int filename_cnt;
 147	bool verbose;
 148	bool debug;
 149	bool quiet;
 150	bool force_checkpoints;
 151	bool force_reg_invariants;
 152	enum resfmt out_fmt;
 153	bool show_version;
 154	bool comparison_mode;
 155	bool replay_mode;
 156	int top_n;
 157
 158	int log_level;
 159	int log_size;
 160	bool log_fixed;
 161
 162	struct verif_stats *prog_stats;
 163	int prog_stat_cnt;
 164
 165	/* baseline_stats is allocated and used only in comparison mode */
 166	struct verif_stats *baseline_stats;
 167	int baseline_stat_cnt;
 168
 169	struct verif_stats_join *join_stats;
 170	int join_stat_cnt;
 171
 172	struct stat_specs output_spec;
 173	struct stat_specs sort_spec;
 174
 175	struct filter *allow_filters;
 176	struct filter *deny_filters;
 177	int allow_filter_cnt;
 178	int deny_filter_cnt;
 179
 180	int files_processed;
 181	int files_skipped;
 182	int progs_processed;
 183	int progs_skipped;
 184	int top_src_lines;
 185} env;
 186
 187static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
 188{
 189	if (!env.verbose)
 190		return 0;
 191	if (level == LIBBPF_DEBUG  && !env.debug)
 192		return 0;
 193	return vfprintf(stderr, format, args);
 194}
 195
 196#ifndef VERISTAT_VERSION
 197#define VERISTAT_VERSION "<kernel>"
 198#endif
 199
 200const char *argp_program_version = "veristat v" VERISTAT_VERSION;
 201const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
 202const char argp_program_doc[] =
 203"veristat    BPF verifier stats collection and comparison tool.\n"
 204"\n"
 205"USAGE: veristat <obj-file> [<obj-file>...]\n"
 206"   OR: veristat -C <baseline.csv> <comparison.csv>\n"
 207"   OR: veristat -R <results.csv>\n";
 208
 209enum {
 210	OPT_LOG_FIXED = 1000,
 211	OPT_LOG_SIZE = 1001,
 212};
 213
 214static const struct argp_option opts[] = {
 215	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
 216	{ "version", 'V', NULL, 0, "Print version" },
 217	{ "verbose", 'v', NULL, 0, "Verbose mode" },
 218	{ "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
 219	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
 220	{ "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
 221	{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
 222	{ "top-n", 'n', "N", 0, "Emit only up to first N results." },
 223	{ "quiet", 'q', NULL, 0, "Quiet mode" },
 224	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
 225	{ "sort", 's', "SPEC", 0, "Specify sort order" },
 226	{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
 227	{ "compare", 'C', NULL, 0, "Comparison mode" },
 228	{ "replay", 'R', NULL, 0, "Replay mode" },
 229	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
 230	{ "test-states", 't', NULL, 0,
 231	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
 232	{ "test-reg-invariants", 'r', NULL, 0,
 233	  "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
 234	{ "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" },
 235	{},
 236};
 237
 238static int parse_stats(const char *stats_str, struct stat_specs *specs);
 239static int append_filter(struct filter **filters, int *cnt, const char *str);
 240static int append_filter_file(const char *path);
 241
 242static error_t parse_arg(int key, char *arg, struct argp_state *state)
 243{
 244	void *tmp;
 245	int err;
 246
 247	switch (key) {
 248	case 'h':
 249		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
 250		break;
 251	case 'V':
 252		env.show_version = true;
 253		break;
 254	case 'v':
 255		env.verbose = true;
 256		break;
 257	case 'd':
 258		env.debug = true;
 259		env.verbose = true;
 260		break;
 261	case 'q':
 262		env.quiet = true;
 263		break;
 264	case 'e':
 265		err = parse_stats(arg, &env.output_spec);
 266		if (err)
 267			return err;
 268		break;
 269	case 's':
 270		err = parse_stats(arg, &env.sort_spec);
 271		if (err)
 272			return err;
 273		break;
 274	case 'o':
 275		if (strcmp(arg, "table") == 0) {
 276			env.out_fmt = RESFMT_TABLE;
 277		} else if (strcmp(arg, "csv") == 0) {
 278			env.out_fmt = RESFMT_CSV;
 279		} else {
 280			fprintf(stderr, "Unrecognized output format '%s'\n", arg);
 281			return -EINVAL;
 282		}
 283		break;
 284	case 'l':
 285		errno = 0;
 286		env.log_level = strtol(arg, NULL, 10);
 287		if (errno) {
 288			fprintf(stderr, "invalid log level: %s\n", arg);
 289			argp_usage(state);
 290		}
 291		break;
 292	case OPT_LOG_FIXED:
 293		env.log_fixed = true;
 294		break;
 295	case OPT_LOG_SIZE:
 296		errno = 0;
 297		env.log_size = strtol(arg, NULL, 10);
 298		if (errno) {
 299			fprintf(stderr, "invalid log size: %s\n", arg);
 300			argp_usage(state);
 301		}
 302		break;
 303	case 't':
 304		env.force_checkpoints = true;
 305		break;
 306	case 'r':
 307		env.force_reg_invariants = true;
 308		break;
 309	case 'n':
 310		errno = 0;
 311		env.top_n = strtol(arg, NULL, 10);
 312		if (errno) {
 313			fprintf(stderr, "invalid top N specifier: %s\n", arg);
 314			argp_usage(state);
 315		}
 316	case 'C':
 317		env.comparison_mode = true;
 318		break;
 319	case 'R':
 320		env.replay_mode = true;
 321		break;
 322	case 'f':
 323		if (arg[0] == '@')
 324			err = append_filter_file(arg + 1);
 325		else if (arg[0] == '!')
 326			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
 327		else
 328			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
 329		if (err) {
 330			fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
 331			return err;
 332		}
 333		break;
 334	case 'S':
 335		errno = 0;
 336		env.top_src_lines = strtol(arg, NULL, 10);
 337		if (errno) {
 338			fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
 339			argp_usage(state);
 340		}
 341		break;
 342	case ARGP_KEY_ARG:
 343		tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
 344		if (!tmp)
 345			return -ENOMEM;
 346		env.filenames = tmp;
 347		env.filenames[env.filename_cnt] = strdup(arg);
 348		if (!env.filenames[env.filename_cnt])
 349			return -ENOMEM;
 350		env.filename_cnt++;
 351		break;
 352	default:
 353		return ARGP_ERR_UNKNOWN;
 354	}
 355	return 0;
 356}
 357
 358static const struct argp argp = {
 359	.options = opts,
 360	.parser = parse_arg,
 361	.doc = argp_program_doc,
 362};
 363
 364
 365/* Adapted from perf/util/string.c */
 366static bool glob_matches(const char *str, const char *pat)
 367{
 368	while (*str && *pat && *pat != '*') {
 369		if (*str != *pat)
 370			return false;
 371		str++;
 372		pat++;
 373	}
 374	/* Check wild card */
 375	if (*pat == '*') {
 376		while (*pat == '*')
 377			pat++;
 378		if (!*pat) /* Tail wild card matches all */
 379			return true;
 380		while (*str)
 381			if (glob_matches(str++, pat))
 382				return true;
 383	}
 384	return !*str && !*pat;
 385}
 386
 387static bool is_bpf_obj_file(const char *path) {
 388	Elf64_Ehdr *ehdr;
 389	int fd, err = -EINVAL;
 390	Elf *elf = NULL;
 391
 392	fd = open(path, O_RDONLY | O_CLOEXEC);
 393	if (fd < 0)
 394		return true; /* we'll fail later and propagate error */
 395
 396	/* ensure libelf is initialized */
 397	(void)elf_version(EV_CURRENT);
 398
 399	elf = elf_begin(fd, ELF_C_READ, NULL);
 400	if (!elf)
 401		goto cleanup;
 402
 403	if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
 404		goto cleanup;
 405
 406	ehdr = elf64_getehdr(elf);
 407	/* Old LLVM set e_machine to EM_NONE */
 408	if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
 409		goto cleanup;
 410
 411	err = 0;
 412cleanup:
 413	if (elf)
 414		elf_end(elf);
 415	close(fd);
 416	return err == 0;
 417}
 418
 419static bool should_process_file_prog(const char *filename, const char *prog_name)
 420{
 421	struct filter *f;
 422	int i, allow_cnt = 0;
 423
 424	for (i = 0; i < env.deny_filter_cnt; i++) {
 425		f = &env.deny_filters[i];
 426		if (f->kind != FILTER_NAME)
 427			continue;
 428
 429		if (f->any_glob && glob_matches(filename, f->any_glob))
 430			return false;
 431		if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
 432			return false;
 433		if (f->file_glob && glob_matches(filename, f->file_glob))
 434			return false;
 435		if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
 436			return false;
 437	}
 438
 439	for (i = 0; i < env.allow_filter_cnt; i++) {
 440		f = &env.allow_filters[i];
 441		if (f->kind != FILTER_NAME)
 442			continue;
 443
 444		allow_cnt++;
 445		if (f->any_glob) {
 446			if (glob_matches(filename, f->any_glob))
 447				return true;
 448			/* If we don't know program name yet, any_glob filter
 449			 * has to assume that current BPF object file might be
 450			 * relevant; we'll check again later on after opening
 451			 * BPF object file, at which point program name will
 452			 * be known finally.
 453			 */
 454			if (!prog_name || glob_matches(prog_name, f->any_glob))
 455				return true;
 456		} else {
 457			if (f->file_glob && !glob_matches(filename, f->file_glob))
 458				continue;
 459			if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
 460				continue;
 461			return true;
 462		}
 463	}
 464
 465	/* if there are no file/prog name allow filters, allow all progs,
 466	 * unless they are denied earlier explicitly
 467	 */
 468	return allow_cnt == 0;
 469}
 470
 471static struct {
 472	enum operator_kind op_kind;
 473	const char *op_str;
 474} operators[] = {
 475	/* Order of these definitions matter to avoid situations like '<'
 476	 * matching part of what is actually a '<>' operator. That is,
 477	 * substrings should go last.
 478	 */
 479	{ OP_EQ, "==" },
 480	{ OP_NEQ, "!=" },
 481	{ OP_NEQ, "<>" },
 482	{ OP_LE, "<=" },
 483	{ OP_LT, "<" },
 484	{ OP_GE, ">=" },
 485	{ OP_GT, ">" },
 486	{ OP_EQ, "=" },
 487};
 488
 489static bool parse_stat_id_var(const char *name, size_t len, int *id,
 490			      enum stat_variant *var, bool *is_abs);
 491
 492static int append_filter(struct filter **filters, int *cnt, const char *str)
 493{
 494	struct filter *f;
 495	void *tmp;
 496	const char *p;
 497	int i;
 498
 499	tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
 500	if (!tmp)
 501		return -ENOMEM;
 502	*filters = tmp;
 503
 504	f = &(*filters)[*cnt];
 505	memset(f, 0, sizeof(*f));
 506
 507	/* First, let's check if it's a stats filter of the following form:
 508	 * <stat><op><value, where:
 509	 *   - <stat> is one of supported numerical stats (verdict is also
 510	 *     considered numerical, failure == 0, success == 1);
 511	 *   - <op> is comparison operator (see `operators` definitions);
 512	 *   - <value> is an integer (or failure/success, or false/true as
 513	 *     special aliases for 0 and 1, respectively).
 514	 * If the form doesn't match what user provided, we assume file/prog
 515	 * glob filter.
 516	 */
 517	for (i = 0; i < ARRAY_SIZE(operators); i++) {
 518		enum stat_variant var;
 519		int id;
 520		long val;
 521		const char *end = str;
 522		const char *op_str;
 523		bool is_abs;
 524
 525		op_str = operators[i].op_str;
 526		p = strstr(str, op_str);
 527		if (!p)
 528			continue;
 529
 530		if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
 531			fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
 532			return -EINVAL;
 533		}
 534		if (id >= FILE_NAME) {
 535			fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
 536			return -EINVAL;
 537		}
 538
 539		p += strlen(op_str);
 540
 541		if (strcasecmp(p, "true") == 0 ||
 542		    strcasecmp(p, "t") == 0 ||
 543		    strcasecmp(p, "success") == 0 ||
 544		    strcasecmp(p, "succ") == 0 ||
 545		    strcasecmp(p, "s") == 0 ||
 546		    strcasecmp(p, "match") == 0 ||
 547		    strcasecmp(p, "m") == 0) {
 548			val = 1;
 549		} else if (strcasecmp(p, "false") == 0 ||
 550			   strcasecmp(p, "f") == 0 ||
 551			   strcasecmp(p, "failure") == 0 ||
 552			   strcasecmp(p, "fail") == 0 ||
 553			   strcasecmp(p, "mismatch") == 0 ||
 554			   strcasecmp(p, "mis") == 0) {
 555			val = 0;
 556		} else {
 557			errno = 0;
 558			val = strtol(p, (char **)&end, 10);
 559			if (errno || end == p || *end != '\0' ) {
 560				fprintf(stderr, "Invalid integer value in '%s'!\n", str);
 561				return -EINVAL;
 562			}
 563		}
 564
 565		f->kind = FILTER_STAT;
 566		f->stat_id = id;
 567		f->stat_var = var;
 568		f->op = operators[i].op_kind;
 569		f->abs = true;
 570		f->value = val;
 571
 572		*cnt += 1;
 573		return 0;
 574	}
 575
 576	/* File/prog filter can be specified either as '<glob>' or
 577	 * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
 578	 * both file and program names. This seems to be way more useful in
 579	 * practice. If user needs full control, they can use '/<prog-glob>'
 580	 * form to glob just program name, or '<file-glob>/' to glob only file
 581	 * name. But usually common <glob> seems to be the most useful and
 582	 * ergonomic way.
 583	 */
 584	f->kind = FILTER_NAME;
 585	p = strchr(str, '/');
 586	if (!p) {
 587		f->any_glob = strdup(str);
 588		if (!f->any_glob)
 589			return -ENOMEM;
 590	} else {
 591		if (str != p) {
 592			/* non-empty file glob */
 593			f->file_glob = strndup(str, p - str);
 594			if (!f->file_glob)
 595				return -ENOMEM;
 596		}
 597		if (strlen(p + 1) > 0) {
 598			/* non-empty prog glob */
 599			f->prog_glob = strdup(p + 1);
 600			if (!f->prog_glob) {
 601				free(f->file_glob);
 602				f->file_glob = NULL;
 603				return -ENOMEM;
 604			}
 605		}
 606	}
 607
 608	*cnt += 1;
 609	return 0;
 610}
 611
 612static int append_filter_file(const char *path)
 613{
 614	char buf[1024];
 615	FILE *f;
 616	int err = 0;
 617
 618	f = fopen(path, "r");
 619	if (!f) {
 620		err = -errno;
 621		fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
 622		return err;
 623	}
 624
 625	while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
 626		/* lines starting with # are comments, skip them */
 627		if (buf[0] == '\0' || buf[0] == '#')
 628			continue;
 629		/* lines starting with ! are negative match filters */
 630		if (buf[0] == '!')
 631			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
 632		else
 633			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
 634		if (err)
 635			goto cleanup;
 636	}
 637
 638cleanup:
 639	fclose(f);
 640	return err;
 641}
 642
 643static const struct stat_specs default_output_spec = {
 644	.spec_cnt = 7,
 645	.ids = {
 646		FILE_NAME, PROG_NAME, VERDICT, DURATION,
 647		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
 648	},
 649};
 650
 651static const struct stat_specs default_csv_output_spec = {
 652	.spec_cnt = 9,
 653	.ids = {
 654		FILE_NAME, PROG_NAME, VERDICT, DURATION,
 655		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
 656		MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
 657	},
 658};
 659
 660static const struct stat_specs default_sort_spec = {
 661	.spec_cnt = 2,
 662	.ids = {
 663		FILE_NAME, PROG_NAME,
 664	},
 665	.asc = { true, true, },
 666};
 667
 668/* sorting for comparison mode to join two data sets */
 669static const struct stat_specs join_sort_spec = {
 670	.spec_cnt = 2,
 671	.ids = {
 672		FILE_NAME, PROG_NAME,
 673	},
 674	.asc = { true, true, },
 675};
 676
 677static struct stat_def {
 678	const char *header;
 679	const char *names[4];
 680	bool asc_by_default;
 681	bool left_aligned;
 682} stat_defs[] = {
 683	[FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
 684	[PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
 685	[VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
 686	[DURATION] = { "Duration (us)", {"duration", "dur"}, },
 687	[TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
 688	[TOTAL_STATES] = { "States", {"total_states", "states"}, },
 689	[PEAK_STATES] = { "Peak states", {"peak_states"}, },
 690	[MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
 691	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
 692};
 693
 694static bool parse_stat_id_var(const char *name, size_t len, int *id,
 695			      enum stat_variant *var, bool *is_abs)
 696{
 697	static const char *var_sfxs[] = {
 698		[VARIANT_A] = "_a",
 699		[VARIANT_B] = "_b",
 700		[VARIANT_DIFF] = "_diff",
 701		[VARIANT_PCT] = "_pct",
 702	};
 703	int i, j, k;
 704
 705	/* |<stat>| means we take absolute value of given stat */
 706	*is_abs = false;
 707	if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
 708		*is_abs = true;
 709		name += 1;
 710		len -= 2;
 711	}
 712
 713	for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
 714		struct stat_def *def = &stat_defs[i];
 715		size_t alias_len, sfx_len;
 716		const char *alias;
 717
 718		for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
 719			alias = def->names[j];
 720			if (!alias)
 721				continue;
 722
 723			alias_len = strlen(alias);
 724			if (strncmp(name, alias, alias_len) != 0)
 725				continue;
 726
 727			if (alias_len == len) {
 728				/* If no variant suffix is specified, we
 729				 * assume control group (just in case we are
 730				 * in comparison mode. Variant is ignored in
 731				 * non-comparison mode.
 732				 */
 733				*var = VARIANT_B;
 734				*id = i;
 735				return true;
 736			}
 737
 738			for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
 739				sfx_len = strlen(var_sfxs[k]);
 740				if (alias_len + sfx_len != len)
 741					continue;
 742
 743				if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
 744					*var = (enum stat_variant)k;
 745					*id = i;
 746					return true;
 747				}
 748			}
 749		}
 750	}
 751
 752	return false;
 753}
 754
 755static bool is_asc_sym(char c)
 756{
 757	return c == '^';
 758}
 759
 760static bool is_desc_sym(char c)
 761{
 762	return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
 763}
 764
 765static int parse_stat(const char *stat_name, struct stat_specs *specs)
 766{
 767	int id;
 768	bool has_order = false, is_asc = false, is_abs = false;
 769	size_t len = strlen(stat_name);
 770	enum stat_variant var;
 771
 772	if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
 773		fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
 774		return -E2BIG;
 775	}
 776
 777	if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
 778		has_order = true;
 779		is_asc = is_asc_sym(stat_name[len - 1]);
 780		len -= 1;
 781	}
 782
 783	if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
 784		fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
 785		return -ESRCH;
 786	}
 787
 788	specs->ids[specs->spec_cnt] = id;
 789	specs->variants[specs->spec_cnt] = var;
 790	specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
 791	specs->abs[specs->spec_cnt] = is_abs;
 792	specs->spec_cnt++;
 793
 794	return 0;
 795}
 796
 797static int parse_stats(const char *stats_str, struct stat_specs *specs)
 798{
 799	char *input, *state = NULL, *next;
 800	int err, cnt = 0;
 801
 802	input = strdup(stats_str);
 803	if (!input)
 804		return -ENOMEM;
 805
 806	while ((next = strtok_r(cnt++ ? NULL : input, ",", &state))) {
 807		err = parse_stat(next, specs);
 808		if (err) {
 809			free(input);
 810			return err;
 811		}
 812	}
 813
 814	free(input);
 815	return 0;
 816}
 817
 818static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
 819{
 820	int i;
 821
 822	if (!stats)
 823		return;
 824
 825	for (i = 0; i < stat_cnt; i++) {
 826		free(stats[i].file_name);
 827		free(stats[i].prog_name);
 828	}
 829	free(stats);
 830}
 831
 832static char verif_log_buf[64 * 1024];
 833
 834#define MAX_PARSED_LOG_LINES 100
 835
 836static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
 837{
 838	const char *cur;
 839	int pos, lines;
 840
 841	buf[buf_sz - 1] = '\0';
 842
 843	for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
 844		/* find previous endline or otherwise take the start of log buf */
 845		for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
 846		}
 847		/* next time start from end of previous line (or pos goes to <0) */
 848		pos--;
 849		/* if we found endline, point right after endline symbol;
 850		 * otherwise, stay at the beginning of log buf
 851		 */
 852		if (cur[0] == '\n')
 853			cur++;
 854
 855		if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
 856			continue;
 857		if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
 858				&s->stats[TOTAL_INSNS],
 859				&s->stats[MAX_STATES_PER_INSN],
 860				&s->stats[TOTAL_STATES],
 861				&s->stats[PEAK_STATES],
 862				&s->stats[MARK_READ_MAX_LEN]))
 863			continue;
 864	}
 865
 866	return 0;
 867}
 868
 869struct line_cnt {
 870	char *line;
 871	int cnt;
 872};
 873
 874static int str_cmp(const void *a, const void *b)
 875{
 876	const char **str1 = (const char **)a;
 877	const char **str2 = (const char **)b;
 878
 879	return strcmp(*str1, *str2);
 880}
 881
 882static int line_cnt_cmp(const void *a, const void *b)
 883{
 884	const struct line_cnt *a_cnt = (const struct line_cnt *)a;
 885	const struct line_cnt *b_cnt = (const struct line_cnt *)b;
 886
 887	if (a_cnt->cnt != b_cnt->cnt)
 888		return a_cnt->cnt < b_cnt->cnt ? -1 : 1;
 889	return strcmp(a_cnt->line, b_cnt->line);
 890}
 891
 892static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name)
 893{
 894	int lines_cap = 0;
 895	int lines_size = 0;
 896	char **lines = NULL;
 897	char *line = NULL;
 898	char *state;
 899	struct line_cnt *freq = NULL;
 900	struct line_cnt *cur;
 901	int unique_lines;
 902	int err = 0;
 903	int i;
 904
 905	while ((line = strtok_r(line ? NULL : buf, "\n", &state))) {
 906		if (strncmp(line, "; ", 2) != 0)
 907			continue;
 908		line += 2;
 909
 910		if (lines_size == lines_cap) {
 911			char **tmp;
 912
 913			lines_cap = max(16, lines_cap * 2);
 914			tmp = realloc(lines, lines_cap * sizeof(*tmp));
 915			if (!tmp) {
 916				err = -ENOMEM;
 917				goto cleanup;
 918			}
 919			lines = tmp;
 920		}
 921		lines[lines_size] = line;
 922		lines_size++;
 923	}
 924
 925	if (lines_size == 0)
 926		goto cleanup;
 927
 928	qsort(lines, lines_size, sizeof(*lines), str_cmp);
 929
 930	freq = calloc(lines_size, sizeof(*freq));
 931	if (!freq) {
 932		err = -ENOMEM;
 933		goto cleanup;
 934	}
 935
 936	cur = freq;
 937	cur->line = lines[0];
 938	cur->cnt = 1;
 939	for (i = 1; i < lines_size; ++i) {
 940		if (strcmp(lines[i], cur->line) != 0) {
 941			cur++;
 942			cur->line = lines[i];
 943			cur->cnt = 0;
 944		}
 945		cur->cnt++;
 946	}
 947	unique_lines = cur - freq + 1;
 948
 949	qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp);
 950
 951	printf("Top source lines (%s):\n", prog_name);
 952	for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
 953		const char *src_code = freq[i].line;
 954		const char *src_line = NULL;
 955		char *split = strrchr(freq[i].line, '@');
 956
 957		if (split) {
 958			src_line = split + 1;
 959
 960			while (*src_line && isspace(*src_line))
 961				src_line++;
 962
 963			while (split > src_code && isspace(*split))
 964				split--;
 965			*split = '\0';
 966		}
 967
 968		if (src_line)
 969			printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code);
 970		else
 971			printf("%5d: %s\n", freq[i].cnt, src_code);
 972	}
 973	printf("\n");
 974
 975cleanup:
 976	free(freq);
 977	free(lines);
 978	return err;
 979}
 980
 981static int guess_prog_type_by_ctx_name(const char *ctx_name,
 982				       enum bpf_prog_type *prog_type,
 983				       enum bpf_attach_type *attach_type)
 984{
 985	/* We need to guess program type based on its declared context type.
 986	 * This guess can't be perfect as many different program types might
 987	 * share the same context type.  So we can only hope to reasonably
 988	 * well guess this and get lucky.
 989	 *
 990	 * Just in case, we support both UAPI-side type names and
 991	 * kernel-internal names.
 992	 */
 993	static struct {
 994		const char *uapi_name;
 995		const char *kern_name;
 996		enum bpf_prog_type prog_type;
 997		enum bpf_attach_type attach_type;
 998	} ctx_map[] = {
 999		/* __sk_buff is most ambiguous, we assume TC program */
1000		{ "__sk_buff", "sk_buff", BPF_PROG_TYPE_SCHED_CLS },
1001		{ "bpf_sock", "sock", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND },
1002		{ "bpf_sock_addr", "bpf_sock_addr_kern",  BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND },
1003		{ "bpf_sock_ops", "bpf_sock_ops_kern", BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS },
1004		{ "sk_msg_md", "sk_msg", BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT },
1005		{ "bpf_cgroup_dev_ctx", "bpf_cgroup_dev_ctx", BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE },
1006		{ "bpf_sysctl", "bpf_sysctl_kern", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL },
1007		{ "bpf_sockopt", "bpf_sockopt_kern", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT },
1008		{ "sk_reuseport_md", "sk_reuseport_kern", BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE },
1009		{ "bpf_sk_lookup", "bpf_sk_lookup_kern", BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP },
1010		{ "xdp_md", "xdp_buff", BPF_PROG_TYPE_XDP, BPF_XDP },
1011		/* tracing types with no expected attach type */
1012		{ "bpf_user_pt_regs_t", "pt_regs", BPF_PROG_TYPE_KPROBE },
1013		{ "bpf_perf_event_data", "bpf_perf_event_data_kern", BPF_PROG_TYPE_PERF_EVENT },
1014		/* raw_tp programs use u64[] from kernel side, we don't want
1015		 * to match on that, probably; so NULL for kern-side type
1016		 */
1017		{ "bpf_raw_tracepoint_args", NULL, BPF_PROG_TYPE_RAW_TRACEPOINT },
1018	};
1019	int i;
1020
1021	if (!ctx_name)
1022		return -EINVAL;
1023
1024	for (i = 0; i < ARRAY_SIZE(ctx_map); i++) {
1025		if (strcmp(ctx_map[i].uapi_name, ctx_name) == 0 ||
1026		    (ctx_map[i].kern_name && strcmp(ctx_map[i].kern_name, ctx_name) == 0)) {
1027			*prog_type = ctx_map[i].prog_type;
1028			*attach_type = ctx_map[i].attach_type;
1029			return 0;
1030		}
1031	}
1032
1033	return -ESRCH;
1034}
1035
1036static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
1037{
1038	struct bpf_map *map;
1039
1040	bpf_object__for_each_map(map, obj) {
1041		/* disable pinning */
1042		bpf_map__set_pin_path(map, NULL);
1043
1044		/* fix up map size, if necessary */
1045		switch (bpf_map__type(map)) {
1046		case BPF_MAP_TYPE_SK_STORAGE:
1047		case BPF_MAP_TYPE_TASK_STORAGE:
1048		case BPF_MAP_TYPE_INODE_STORAGE:
1049		case BPF_MAP_TYPE_CGROUP_STORAGE:
1050			break;
1051		default:
1052			if (bpf_map__max_entries(map) == 0)
1053				bpf_map__set_max_entries(map, 1);
1054		}
1055	}
1056
1057	/* SEC(freplace) programs can't be loaded with veristat as is,
1058	 * but we can try guessing their target program's expected type by
1059	 * looking at the type of program's first argument and substituting
1060	 * corresponding program type
1061	 */
1062	if (bpf_program__type(prog) == BPF_PROG_TYPE_EXT) {
1063		const struct btf *btf = bpf_object__btf(obj);
1064		const char *prog_name = bpf_program__name(prog);
1065		enum bpf_prog_type prog_type;
1066		enum bpf_attach_type attach_type;
1067		const struct btf_type *t;
1068		const char *ctx_name;
1069		int id;
1070
1071		if (!btf)
1072			goto skip_freplace_fixup;
1073
1074		id = btf__find_by_name_kind(btf, prog_name, BTF_KIND_FUNC);
1075		t = btf__type_by_id(btf, id);
1076		t = btf__type_by_id(btf, t->type);
1077		if (!btf_is_func_proto(t) || btf_vlen(t) != 1)
1078			goto skip_freplace_fixup;
1079
1080		/* context argument is a pointer to a struct/typedef */
1081		t = btf__type_by_id(btf, btf_params(t)[0].type);
1082		while (t && btf_is_mod(t))
1083			t = btf__type_by_id(btf, t->type);
1084		if (!t || !btf_is_ptr(t))
1085			goto skip_freplace_fixup;
1086		t = btf__type_by_id(btf, t->type);
1087		while (t && btf_is_mod(t))
1088			t = btf__type_by_id(btf, t->type);
1089		if (!t)
1090			goto skip_freplace_fixup;
1091
1092		ctx_name = btf__name_by_offset(btf, t->name_off);
1093
1094		if (guess_prog_type_by_ctx_name(ctx_name, &prog_type, &attach_type) == 0) {
1095			bpf_program__set_type(prog, prog_type);
1096			bpf_program__set_expected_attach_type(prog, attach_type);
1097
1098			if (!env.quiet) {
1099				printf("Using guessed program type '%s' for %s/%s...\n",
1100					libbpf_bpf_prog_type_str(prog_type),
1101					filename, prog_name);
1102			}
1103		} else {
1104			if (!env.quiet) {
1105				printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
1106					ctx_name, filename, prog_name);
1107			}
1108		}
1109	}
1110skip_freplace_fixup:
1111	return;
1112}
1113
1114static int max_verifier_log_size(void)
1115{
1116	const int SMALL_LOG_SIZE = UINT_MAX >> 8;
1117	const int BIG_LOG_SIZE = UINT_MAX >> 2;
1118	struct bpf_insn insns[] = {
1119		{ .code = BPF_ALU | BPF_MOV | BPF_X, .dst_reg = BPF_REG_0, },
1120		{ .code  = BPF_JMP | BPF_EXIT, },
1121	};
1122	LIBBPF_OPTS(bpf_prog_load_opts, opts,
1123		    .log_size = BIG_LOG_SIZE,
1124		    .log_buf = (void *)-1,
1125		    .log_level = 4
1126	);
1127	int ret, insn_cnt = ARRAY_SIZE(insns);
1128	static int log_size;
1129
1130	if (log_size != 0)
1131		return log_size;
1132
1133	ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
1134
1135	if (ret == -EFAULT)
1136		log_size = BIG_LOG_SIZE;
1137	else /* ret == -EINVAL, big log size is not supported by the verifier */
1138		log_size = SMALL_LOG_SIZE;
1139
1140	return log_size;
1141}
1142
1143static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
1144{
1145	const char *base_filename = basename(strdupa(filename));
1146	const char *prog_name = bpf_program__name(prog);
1147	char *buf;
1148	int buf_sz, log_level;
1149	struct verif_stats *stats;
1150	int err = 0;
1151	void *tmp;
1152
1153	if (!should_process_file_prog(base_filename, bpf_program__name(prog))) {
1154		env.progs_skipped++;
1155		return 0;
1156	}
1157
1158	tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
1159	if (!tmp)
1160		return -ENOMEM;
1161	env.prog_stats = tmp;
1162	stats = &env.prog_stats[env.prog_stat_cnt++];
1163	memset(stats, 0, sizeof(*stats));
1164
1165	if (env.verbose || env.top_src_lines > 0) {
1166		buf_sz = env.log_size ? env.log_size : max_verifier_log_size();
1167		buf = malloc(buf_sz);
1168		if (!buf)
1169			return -ENOMEM;
1170		/* ensure we always request stats */
1171		log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
1172		/* --top-src-lines needs verifier log */
1173		if (env.top_src_lines > 0 && env.log_level == 0)
1174			log_level |= 2;
1175	} else {
1176		buf = verif_log_buf;
1177		buf_sz = sizeof(verif_log_buf);
1178		/* request only verifier stats */
1179		log_level = 4 | (env.log_fixed ? 8 : 0);
1180	}
1181	verif_log_buf[0] = '\0';
1182
1183	bpf_program__set_log_buf(prog, buf, buf_sz);
1184	bpf_program__set_log_level(prog, log_level);
1185
1186	/* increase chances of successful BPF object loading */
1187	fixup_obj(obj, prog, base_filename);
1188
1189	if (env.force_checkpoints)
1190		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
1191	if (env.force_reg_invariants)
1192		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
1193
1194	err = bpf_object__load(obj);
1195	env.progs_processed++;
1196
1197	stats->file_name = strdup(base_filename);
1198	stats->prog_name = strdup(bpf_program__name(prog));
1199	stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
1200	parse_verif_log(buf, buf_sz, stats);
1201
1202	if (env.verbose) {
1203		printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
1204		       filename, prog_name, stats->stats[DURATION],
1205		       err ? "failure" : "success", buf);
1206	}
1207	if (env.top_src_lines > 0)
1208		print_top_src_lines(buf, buf_sz, stats->prog_name);
1209
1210	if (verif_log_buf != buf)
1211		free(buf);
1212
1213	return 0;
1214};
1215
1216static int process_obj(const char *filename)
1217{
1218	const char *base_filename = basename(strdupa(filename));
1219	struct bpf_object *obj = NULL, *tobj;
1220	struct bpf_program *prog, *tprog, *lprog;
1221	libbpf_print_fn_t old_libbpf_print_fn;
1222	LIBBPF_OPTS(bpf_object_open_opts, opts);
1223	int err = 0, prog_cnt = 0;
1224
1225	if (!should_process_file_prog(base_filename, NULL)) {
1226		if (env.verbose)
1227			printf("Skipping '%s' due to filters...\n", filename);
1228		env.files_skipped++;
1229		return 0;
1230	}
1231	if (!is_bpf_obj_file(filename)) {
1232		if (env.verbose)
1233			printf("Skipping '%s' as it's not a BPF object file...\n", filename);
1234		env.files_skipped++;
1235		return 0;
1236	}
1237
1238	if (!env.quiet && env.out_fmt == RESFMT_TABLE)
1239		printf("Processing '%s'...\n", base_filename);
1240
1241	old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
1242	obj = bpf_object__open_file(filename, &opts);
1243	if (!obj) {
1244		/* if libbpf can't open BPF object file, it could be because
1245		 * that BPF object file is incomplete and has to be statically
1246		 * linked into a final BPF object file; instead of bailing
1247		 * out, report it into stderr, mark it as skipped, and
1248		 * proceed
1249		 */
1250		fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
1251		env.files_skipped++;
1252		err = 0;
1253		goto cleanup;
1254	}
1255
1256	env.files_processed++;
1257
1258	bpf_object__for_each_program(prog, obj) {
1259		prog_cnt++;
1260	}
1261
1262	if (prog_cnt == 1) {
1263		prog = bpf_object__next_program(obj, NULL);
1264		bpf_program__set_autoload(prog, true);
1265		process_prog(filename, obj, prog);
1266		goto cleanup;
1267	}
1268
1269	bpf_object__for_each_program(prog, obj) {
1270		const char *prog_name = bpf_program__name(prog);
1271
1272		tobj = bpf_object__open_file(filename, &opts);
1273		if (!tobj) {
1274			err = -errno;
1275			fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1276			goto cleanup;
1277		}
1278
1279		lprog = NULL;
1280		bpf_object__for_each_program(tprog, tobj) {
1281			const char *tprog_name = bpf_program__name(tprog);
1282
1283			if (strcmp(prog_name, tprog_name) == 0) {
1284				bpf_program__set_autoload(tprog, true);
1285				lprog = tprog;
1286			} else {
1287				bpf_program__set_autoload(tprog, false);
1288			}
1289		}
1290
1291		process_prog(filename, tobj, lprog);
1292		bpf_object__close(tobj);
1293	}
1294
1295cleanup:
1296	bpf_object__close(obj);
1297	libbpf_set_print(old_libbpf_print_fn);
1298	return err;
1299}
1300
1301static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
1302		    enum stat_id id, bool asc, bool abs)
1303{
1304	int cmp = 0;
1305
1306	switch (id) {
1307	case FILE_NAME:
1308		cmp = strcmp(s1->file_name, s2->file_name);
1309		break;
1310	case PROG_NAME:
1311		cmp = strcmp(s1->prog_name, s2->prog_name);
1312		break;
1313	case VERDICT:
1314	case DURATION:
1315	case TOTAL_INSNS:
1316	case TOTAL_STATES:
1317	case PEAK_STATES:
1318	case MAX_STATES_PER_INSN:
1319	case MARK_READ_MAX_LEN: {
1320		long v1 = s1->stats[id];
1321		long v2 = s2->stats[id];
1322
1323		if (abs) {
1324			v1 = v1 < 0 ? -v1 : v1;
1325			v2 = v2 < 0 ? -v2 : v2;
1326		}
1327
1328		if (v1 != v2)
1329			cmp = v1 < v2 ? -1 : 1;
1330		break;
1331	}
1332	default:
1333		fprintf(stderr, "Unrecognized stat #%d\n", id);
1334		exit(1);
1335	}
1336
1337	return asc ? cmp : -cmp;
1338}
1339
1340static int cmp_prog_stats(const void *v1, const void *v2)
1341{
1342	const struct verif_stats *s1 = v1, *s2 = v2;
1343	int i, cmp;
1344
1345	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1346		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
1347			       env.sort_spec.asc[i], env.sort_spec.abs[i]);
1348		if (cmp != 0)
1349			return cmp;
1350	}
1351
1352	/* always disambiguate with file+prog, which are unique */
1353	cmp = strcmp(s1->file_name, s2->file_name);
1354	if (cmp != 0)
1355		return cmp;
1356	return strcmp(s1->prog_name, s2->prog_name);
1357}
1358
1359static void fetch_join_stat_value(const struct verif_stats_join *s,
1360				  enum stat_id id, enum stat_variant var,
1361				  const char **str_val,
1362				  double *num_val)
1363{
1364	long v1, v2;
1365
1366	if (id == FILE_NAME) {
1367		*str_val = s->file_name;
1368		return;
1369	}
1370	if (id == PROG_NAME) {
1371		*str_val = s->prog_name;
1372		return;
1373	}
1374
1375	v1 = s->stats_a ? s->stats_a->stats[id] : 0;
1376	v2 = s->stats_b ? s->stats_b->stats[id] : 0;
1377
1378	switch (var) {
1379	case VARIANT_A:
1380		if (!s->stats_a)
1381			*num_val = -DBL_MAX;
1382		else
1383			*num_val = s->stats_a->stats[id];
1384		return;
1385	case VARIANT_B:
1386		if (!s->stats_b)
1387			*num_val = -DBL_MAX;
1388		else
1389			*num_val = s->stats_b->stats[id];
1390		return;
1391	case VARIANT_DIFF:
1392		if (!s->stats_a || !s->stats_b)
1393			*num_val = -DBL_MAX;
1394		else if (id == VERDICT)
1395			*num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
1396		else
1397			*num_val = (double)(v2 - v1);
1398		return;
1399	case VARIANT_PCT:
1400		if (!s->stats_a || !s->stats_b) {
1401			*num_val = -DBL_MAX;
1402		} else if (v1 == 0) {
1403			if (v1 == v2)
1404				*num_val = 0.0;
1405			else
1406				*num_val = v2 < v1 ? -100.0 : 100.0;
1407		} else {
1408			 *num_val = (v2 - v1) * 100.0 / v1;
1409		}
1410		return;
1411	}
1412}
1413
1414static int cmp_join_stat(const struct verif_stats_join *s1,
1415			 const struct verif_stats_join *s2,
1416			 enum stat_id id, enum stat_variant var,
1417			 bool asc, bool abs)
1418{
1419	const char *str1 = NULL, *str2 = NULL;
1420	double v1 = 0.0, v2 = 0.0;
1421	int cmp = 0;
1422
1423	fetch_join_stat_value(s1, id, var, &str1, &v1);
1424	fetch_join_stat_value(s2, id, var, &str2, &v2);
1425
1426	if (abs) {
1427		v1 = fabs(v1);
1428		v2 = fabs(v2);
1429	}
1430
1431	if (str1)
1432		cmp = strcmp(str1, str2);
1433	else if (v1 != v2)
1434		cmp = v1 < v2 ? -1 : 1;
1435
1436	return asc ? cmp : -cmp;
1437}
1438
1439static int cmp_join_stats(const void *v1, const void *v2)
1440{
1441	const struct verif_stats_join *s1 = v1, *s2 = v2;
1442	int i, cmp;
1443
1444	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1445		cmp = cmp_join_stat(s1, s2,
1446				    env.sort_spec.ids[i],
1447				    env.sort_spec.variants[i],
1448				    env.sort_spec.asc[i],
1449				    env.sort_spec.abs[i]);
1450		if (cmp != 0)
1451			return cmp;
1452	}
1453
1454	/* always disambiguate with file+prog, which are unique */
1455	cmp = strcmp(s1->file_name, s2->file_name);
1456	if (cmp != 0)
1457		return cmp;
1458	return strcmp(s1->prog_name, s2->prog_name);
1459}
1460
1461#define HEADER_CHAR '-'
1462#define COLUMN_SEP "  "
1463
1464static void output_header_underlines(void)
1465{
1466	int i, j, len;
1467
1468	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1469		len = env.output_spec.lens[i];
1470
1471		printf("%s", i == 0 ? "" : COLUMN_SEP);
1472		for (j = 0; j < len; j++)
1473			printf("%c", HEADER_CHAR);
1474	}
1475	printf("\n");
1476}
1477
1478static void output_headers(enum resfmt fmt)
1479{
1480	const char *fmt_str;
1481	int i, len;
1482
1483	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1484		int id = env.output_spec.ids[i];
1485		int *max_len = &env.output_spec.lens[i];
1486
1487		switch (fmt) {
1488		case RESFMT_TABLE_CALCLEN:
1489			len = snprintf(NULL, 0, "%s", stat_defs[id].header);
1490			if (len > *max_len)
1491				*max_len = len;
1492			break;
1493		case RESFMT_TABLE:
1494			fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
1495			printf(fmt_str, i == 0 ? "" : COLUMN_SEP,  *max_len, stat_defs[id].header);
1496			if (i == env.output_spec.spec_cnt - 1)
1497				printf("\n");
1498			break;
1499		case RESFMT_CSV:
1500			printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
1501			if (i == env.output_spec.spec_cnt - 1)
1502				printf("\n");
1503			break;
1504		}
1505	}
1506
1507	if (fmt == RESFMT_TABLE)
1508		output_header_underlines();
1509}
1510
1511static void prepare_value(const struct verif_stats *s, enum stat_id id,
1512			  const char **str, long *val)
1513{
1514	switch (id) {
1515	case FILE_NAME:
1516		*str = s ? s->file_name : "N/A";
1517		break;
1518	case PROG_NAME:
1519		*str = s ? s->prog_name : "N/A";
1520		break;
1521	case VERDICT:
1522		if (!s)
1523			*str = "N/A";
1524		else
1525			*str = s->stats[VERDICT] ? "success" : "failure";
1526		break;
1527	case DURATION:
1528	case TOTAL_INSNS:
1529	case TOTAL_STATES:
1530	case PEAK_STATES:
1531	case MAX_STATES_PER_INSN:
1532	case MARK_READ_MAX_LEN:
1533		*val = s ? s->stats[id] : 0;
1534		break;
1535	default:
1536		fprintf(stderr, "Unrecognized stat #%d\n", id);
1537		exit(1);
1538	}
1539}
1540
1541static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
1542{
1543	int i;
1544
1545	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1546		int id = env.output_spec.ids[i];
1547		int *max_len = &env.output_spec.lens[i], len;
1548		const char *str = NULL;
1549		long val = 0;
1550
1551		prepare_value(s, id, &str, &val);
1552
1553		switch (fmt) {
1554		case RESFMT_TABLE_CALCLEN:
1555			if (str)
1556				len = snprintf(NULL, 0, "%s", str);
1557			else
1558				len = snprintf(NULL, 0, "%ld", val);
1559			if (len > *max_len)
1560				*max_len = len;
1561			break;
1562		case RESFMT_TABLE:
1563			if (str)
1564				printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
1565			else
1566				printf("%s%*ld", i == 0 ? "" : COLUMN_SEP,  *max_len, val);
1567			if (i == env.output_spec.spec_cnt - 1)
1568				printf("\n");
1569			break;
1570		case RESFMT_CSV:
1571			if (str)
1572				printf("%s%s", i == 0 ? "" : ",", str);
1573			else
1574				printf("%s%ld", i == 0 ? "" : ",", val);
1575			if (i == env.output_spec.spec_cnt - 1)
1576				printf("\n");
1577			break;
1578		}
1579	}
1580
1581	if (last && fmt == RESFMT_TABLE) {
1582		output_header_underlines();
1583		printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
1584		       env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
1585	}
1586}
1587
1588static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
1589{
1590	switch (id) {
1591	case FILE_NAME:
1592		st->file_name = strdup(str);
1593		if (!st->file_name)
1594			return -ENOMEM;
1595		break;
1596	case PROG_NAME:
1597		st->prog_name = strdup(str);
1598		if (!st->prog_name)
1599			return -ENOMEM;
1600		break;
1601	case VERDICT:
1602		if (strcmp(str, "success") == 0) {
1603			st->stats[VERDICT] = true;
1604		} else if (strcmp(str, "failure") == 0) {
1605			st->stats[VERDICT] = false;
1606		} else {
1607			fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
1608			return -EINVAL;
1609		}
1610		break;
1611	case DURATION:
1612	case TOTAL_INSNS:
1613	case TOTAL_STATES:
1614	case PEAK_STATES:
1615	case MAX_STATES_PER_INSN:
1616	case MARK_READ_MAX_LEN: {
1617		long val;
1618		int err, n;
1619
1620		if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
1621			err = -errno;
1622			fprintf(stderr, "Failed to parse '%s' as integer\n", str);
1623			return err;
1624		}
1625
1626		st->stats[id] = val;
1627		break;
1628	}
1629	default:
1630		fprintf(stderr, "Unrecognized stat #%d\n", id);
1631		return -EINVAL;
1632	}
1633	return 0;
1634}
1635
1636static int parse_stats_csv(const char *filename, struct stat_specs *specs,
1637			   struct verif_stats **statsp, int *stat_cntp)
1638{
1639	char line[4096];
1640	FILE *f;
1641	int err = 0;
1642	bool header = true;
1643
1644	f = fopen(filename, "r");
1645	if (!f) {
1646		err = -errno;
1647		fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1648		return err;
1649	}
1650
1651	*stat_cntp = 0;
1652
1653	while (fgets(line, sizeof(line), f)) {
1654		char *input = line, *state = NULL, *next;
1655		struct verif_stats *st = NULL;
1656		int col = 0, cnt = 0;
1657
1658		if (!header) {
1659			void *tmp;
1660
1661			tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
1662			if (!tmp) {
1663				err = -ENOMEM;
1664				goto cleanup;
1665			}
1666			*statsp = tmp;
1667
1668			st = &(*statsp)[*stat_cntp];
1669			memset(st, 0, sizeof(*st));
1670
1671			*stat_cntp += 1;
1672		}
1673
1674		while ((next = strtok_r(cnt++ ? NULL : input, ",\n", &state))) {
1675			if (header) {
1676				/* for the first line, set up spec stats */
1677				err = parse_stat(next, specs);
1678				if (err)
1679					goto cleanup;
1680				continue;
1681			}
1682
1683			/* for all other lines, parse values based on spec */
1684			if (col >= specs->spec_cnt) {
1685				fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
1686					col, *stat_cntp, filename);
1687				err = -EINVAL;
1688				goto cleanup;
1689			}
1690			err = parse_stat_value(next, specs->ids[col], st);
1691			if (err)
1692				goto cleanup;
1693			col++;
1694		}
1695
1696		if (header) {
1697			header = false;
1698			continue;
1699		}
1700
1701		if (col < specs->spec_cnt) {
1702			fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
1703				*stat_cntp, filename);
1704			err = -EINVAL;
1705			goto cleanup;
1706		}
1707
1708		if (!st->file_name || !st->prog_name) {
1709			fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
1710				*stat_cntp, filename);
1711			err = -EINVAL;
1712			goto cleanup;
1713		}
1714
1715		/* in comparison mode we can only check filters after we
1716		 * parsed entire line; if row should be ignored we pretend we
1717		 * never parsed it
1718		 */
1719		if (!should_process_file_prog(st->file_name, st->prog_name)) {
1720			free(st->file_name);
1721			free(st->prog_name);
1722			*stat_cntp -= 1;
1723		}
1724	}
1725
1726	if (!feof(f)) {
1727		err = -errno;
1728		fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
1729	}
1730
1731cleanup:
1732	fclose(f);
1733	return err;
1734}
1735
1736/* empty/zero stats for mismatched rows */
1737static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
1738
1739static bool is_key_stat(enum stat_id id)
1740{
1741	return id == FILE_NAME || id == PROG_NAME;
1742}
1743
1744static void output_comp_header_underlines(void)
1745{
1746	int i, j, k;
1747
1748	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1749		int id = env.output_spec.ids[i];
1750		int max_j = is_key_stat(id) ? 1 : 3;
1751
1752		for (j = 0; j < max_j; j++) {
1753			int len = env.output_spec.lens[3 * i + j];
1754
1755			printf("%s", i + j == 0 ? "" : COLUMN_SEP);
1756
1757			for (k = 0; k < len; k++)
1758				printf("%c", HEADER_CHAR);
1759		}
1760	}
1761	printf("\n");
1762}
1763
1764static void output_comp_headers(enum resfmt fmt)
1765{
1766	static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
1767	static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
1768	int i, j, len;
1769
1770	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1771		int id = env.output_spec.ids[i];
1772		/* key stats don't have A/B/DIFF columns, they are common for both data sets */
1773		int max_j = is_key_stat(id) ? 1 : 3;
1774
1775		for (j = 0; j < max_j; j++) {
1776			int *max_len = &env.output_spec.lens[3 * i + j];
1777			bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
1778			const char *sfx;
1779
1780			switch (fmt) {
1781			case RESFMT_TABLE_CALCLEN:
1782				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1783				len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
1784				if (len > *max_len)
1785					*max_len = len;
1786				break;
1787			case RESFMT_TABLE:
1788				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1789				printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
1790				       *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
1791				if (last)
1792					printf("\n");
1793				break;
1794			case RESFMT_CSV:
1795				sfx = is_key_stat(id) ? "" : name_sfxs[j];
1796				printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
1797				if (last)
1798					printf("\n");
1799				break;
1800			}
1801		}
1802	}
1803
1804	if (fmt == RESFMT_TABLE)
1805		output_comp_header_underlines();
1806}
1807
1808static void output_comp_stats(const struct verif_stats_join *join_stats,
1809			      enum resfmt fmt, bool last)
1810{
1811	const struct verif_stats *base = join_stats->stats_a;
1812	const struct verif_stats *comp = join_stats->stats_b;
1813	char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
1814	int i;
1815
1816	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1817		int id = env.output_spec.ids[i], len;
1818		int *max_len_base = &env.output_spec.lens[3 * i + 0];
1819		int *max_len_comp = &env.output_spec.lens[3 * i + 1];
1820		int *max_len_diff = &env.output_spec.lens[3 * i + 2];
1821		const char *base_str = NULL, *comp_str = NULL;
1822		long base_val = 0, comp_val = 0, diff_val = 0;
1823
1824		prepare_value(base, id, &base_str, &base_val);
1825		prepare_value(comp, id, &comp_str, &comp_val);
1826
1827		/* normalize all the outputs to be in string buffers for simplicity */
1828		if (is_key_stat(id)) {
1829			/* key stats (file and program name) are always strings */
1830			if (base)
1831				snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1832			else
1833				snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
1834		} else if (base_str) {
1835			snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1836			snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
1837			if (!base || !comp)
1838				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1839			else if (strcmp(base_str, comp_str) == 0)
1840				snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
1841			else
1842				snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
1843		} else {
1844			double p = 0.0;
1845
1846			if (base)
1847				snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
1848			else
1849				snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
1850			if (comp)
1851				snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
1852			else
1853				snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
1854
1855			diff_val = comp_val - base_val;
1856			if (!base || !comp) {
1857				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1858			} else {
1859				if (base_val == 0) {
1860					if (comp_val == base_val)
1861						p = 0.0; /* avoid +0 (+100%) case */
1862					else
1863						p = comp_val < base_val ? -100.0 : 100.0;
1864				} else {
1865					 p = diff_val * 100.0 / base_val;
1866				}
1867				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
1868			}
1869		}
1870
1871		switch (fmt) {
1872		case RESFMT_TABLE_CALCLEN:
1873			len = strlen(base_buf);
1874			if (len > *max_len_base)
1875				*max_len_base = len;
1876			if (!is_key_stat(id)) {
1877				len = strlen(comp_buf);
1878				if (len > *max_len_comp)
1879					*max_len_comp = len;
1880				len = strlen(diff_buf);
1881				if (len > *max_len_diff)
1882					*max_len_diff = len;
1883			}
1884			break;
1885		case RESFMT_TABLE: {
1886			/* string outputs are left-aligned, number outputs are right-aligned */
1887			const char *fmt = base_str ? "%s%-*s" : "%s%*s";
1888
1889			printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
1890			if (!is_key_stat(id)) {
1891				printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
1892				printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
1893			}
1894			if (i == env.output_spec.spec_cnt - 1)
1895				printf("\n");
1896			break;
1897		}
1898		case RESFMT_CSV:
1899			printf("%s%s", i == 0 ? "" : ",", base_buf);
1900			if (!is_key_stat(id)) {
1901				printf("%s%s", i == 0 ? "" : ",", comp_buf);
1902				printf("%s%s", i == 0 ? "" : ",", diff_buf);
1903			}
1904			if (i == env.output_spec.spec_cnt - 1)
1905				printf("\n");
1906			break;
1907		}
1908	}
1909
1910	if (last && fmt == RESFMT_TABLE)
1911		output_comp_header_underlines();
1912}
1913
1914static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
1915{
1916	int r;
1917
1918	r = strcmp(base->file_name, comp->file_name);
1919	if (r != 0)
1920		return r;
1921	return strcmp(base->prog_name, comp->prog_name);
1922}
1923
1924static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
1925{
1926	static const double eps = 1e-9;
1927	const char *str = NULL;
1928	double value = 0.0;
1929
1930	fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
1931
1932	if (f->abs)
1933		value = fabs(value);
1934
1935	switch (f->op) {
1936	case OP_EQ: return value > f->value - eps && value < f->value + eps;
1937	case OP_NEQ: return value < f->value - eps || value > f->value + eps;
1938	case OP_LT: return value < f->value - eps;
1939	case OP_LE: return value <= f->value + eps;
1940	case OP_GT: return value > f->value + eps;
1941	case OP_GE: return value >= f->value - eps;
1942	}
1943
1944	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1945	return false;
1946}
1947
1948static bool should_output_join_stats(const struct verif_stats_join *stats)
1949{
1950	struct filter *f;
1951	int i, allow_cnt = 0;
1952
1953	for (i = 0; i < env.deny_filter_cnt; i++) {
1954		f = &env.deny_filters[i];
1955		if (f->kind != FILTER_STAT)
1956			continue;
1957
1958		if (is_join_stat_filter_matched(f, stats))
1959			return false;
1960	}
1961
1962	for (i = 0; i < env.allow_filter_cnt; i++) {
1963		f = &env.allow_filters[i];
1964		if (f->kind != FILTER_STAT)
1965			continue;
1966		allow_cnt++;
1967
1968		if (is_join_stat_filter_matched(f, stats))
1969			return true;
1970	}
1971
1972	/* if there are no stat allowed filters, pass everything through */
1973	return allow_cnt == 0;
1974}
1975
1976static int handle_comparison_mode(void)
1977{
1978	struct stat_specs base_specs = {}, comp_specs = {};
1979	struct stat_specs tmp_sort_spec;
1980	enum resfmt cur_fmt;
1981	int err, i, j, last_idx, cnt;
1982
1983	if (env.filename_cnt != 2) {
1984		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
1985		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1986		return -EINVAL;
1987	}
1988
1989	err = parse_stats_csv(env.filenames[0], &base_specs,
1990			      &env.baseline_stats, &env.baseline_stat_cnt);
1991	if (err) {
1992		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
1993		return err;
1994	}
1995	err = parse_stats_csv(env.filenames[1], &comp_specs,
1996			      &env.prog_stats, &env.prog_stat_cnt);
1997	if (err) {
1998		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
1999		return err;
2000	}
2001
2002	/* To keep it simple we validate that the set and order of stats in
2003	 * both CSVs are exactly the same. This can be lifted with a bit more
2004	 * pre-processing later.
2005	 */
2006	if (base_specs.spec_cnt != comp_specs.spec_cnt) {
2007		fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
2008			env.filenames[0], env.filenames[1],
2009			base_specs.spec_cnt, comp_specs.spec_cnt);
2010		return -EINVAL;
2011	}
2012	for (i = 0; i < base_specs.spec_cnt; i++) {
2013		if (base_specs.ids[i] != comp_specs.ids[i]) {
2014			fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
2015				env.filenames[0], env.filenames[1],
2016				stat_defs[base_specs.ids[i]].names[0],
2017				stat_defs[comp_specs.ids[i]].names[0]);
2018			return -EINVAL;
2019		}
2020	}
2021
2022	/* Replace user-specified sorting spec with file+prog sorting rule to
2023	 * be able to join two datasets correctly. Once we are done, we will
2024	 * restore the original sort spec.
2025	 */
2026	tmp_sort_spec = env.sort_spec;
2027	env.sort_spec = join_sort_spec;
2028	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2029	qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
2030	env.sort_spec = tmp_sort_spec;
2031
2032	/* Join two datasets together. If baseline and comparison datasets
2033	 * have different subset of rows (we match by 'object + prog' as
2034	 * a unique key) then assume empty/missing/zero value for rows that
2035	 * are missing in the opposite data set.
2036	 */
2037	i = j = 0;
2038	while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
2039		const struct verif_stats *base, *comp;
2040		struct verif_stats_join *join;
2041		void *tmp;
2042		int r;
2043
2044		base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
2045		comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
2046
2047		if (!base->file_name || !base->prog_name) {
2048			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
2049				i, env.filenames[0]);
2050			return -EINVAL;
2051		}
2052		if (!comp->file_name || !comp->prog_name) {
2053			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
2054				j, env.filenames[1]);
2055			return -EINVAL;
2056		}
2057
2058		tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
2059		if (!tmp)
2060			return -ENOMEM;
2061		env.join_stats = tmp;
2062
2063		join = &env.join_stats[env.join_stat_cnt];
2064		memset(join, 0, sizeof(*join));
2065
2066		r = cmp_stats_key(base, comp);
2067		if (r == 0) {
2068			join->file_name = base->file_name;
2069			join->prog_name = base->prog_name;
2070			join->stats_a = base;
2071			join->stats_b = comp;
2072			i++;
2073			j++;
2074		} else if (base != &fallback_stats && (comp == &fallback_stats || r < 0)) {
2075			join->file_name = base->file_name;
2076			join->prog_name = base->prog_name;
2077			join->stats_a = base;
2078			join->stats_b = NULL;
2079			i++;
2080		} else if (comp != &fallback_stats && (base == &fallback_stats || r > 0)) {
2081			join->file_name = comp->file_name;
2082			join->prog_name = comp->prog_name;
2083			join->stats_a = NULL;
2084			join->stats_b = comp;
2085			j++;
2086		} else {
2087			fprintf(stderr, "%s:%d: should never reach here i=%i, j=%i",
2088				__FILE__, __LINE__, i, j);
2089			return -EINVAL;
2090		}
2091		env.join_stat_cnt += 1;
2092	}
2093
2094	/* now sort joined results according to sort spec */
2095	qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
2096
2097	/* for human-readable table output we need to do extra pass to
2098	 * calculate column widths, so we substitute current output format
2099	 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
2100	 * and do everything again.
2101	 */
2102	if (env.out_fmt == RESFMT_TABLE)
2103		cur_fmt = RESFMT_TABLE_CALCLEN;
2104	else
2105		cur_fmt = env.out_fmt;
2106
2107one_more_time:
2108	output_comp_headers(cur_fmt);
2109
2110	last_idx = -1;
2111	cnt = 0;
2112	for (i = 0; i < env.join_stat_cnt; i++) {
2113		const struct verif_stats_join *join = &env.join_stats[i];
2114
2115		if (!should_output_join_stats(join))
2116			continue;
2117
2118		if (env.top_n && cnt >= env.top_n)
2119			break;
2120
2121		if (cur_fmt == RESFMT_TABLE_CALCLEN)
2122			last_idx = i;
2123
2124		output_comp_stats(join, cur_fmt, i == last_idx);
2125
2126		cnt++;
2127	}
2128
2129	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
2130		cur_fmt = RESFMT_TABLE;
2131		goto one_more_time; /* ... this time with feeling */
2132	}
2133
2134	return 0;
2135}
2136
2137static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
2138{
2139	long value = stats->stats[f->stat_id];
2140
2141	if (f->abs)
2142		value = value < 0 ? -value : value;
2143
2144	switch (f->op) {
2145	case OP_EQ: return value == f->value;
2146	case OP_NEQ: return value != f->value;
2147	case OP_LT: return value < f->value;
2148	case OP_LE: return value <= f->value;
2149	case OP_GT: return value > f->value;
2150	case OP_GE: return value >= f->value;
2151	}
2152
2153	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
2154	return false;
2155}
2156
2157static bool should_output_stats(const struct verif_stats *stats)
2158{
2159	struct filter *f;
2160	int i, allow_cnt = 0;
2161
2162	for (i = 0; i < env.deny_filter_cnt; i++) {
2163		f = &env.deny_filters[i];
2164		if (f->kind != FILTER_STAT)
2165			continue;
2166
2167		if (is_stat_filter_matched(f, stats))
2168			return false;
2169	}
2170
2171	for (i = 0; i < env.allow_filter_cnt; i++) {
2172		f = &env.allow_filters[i];
2173		if (f->kind != FILTER_STAT)
2174			continue;
2175		allow_cnt++;
2176
2177		if (is_stat_filter_matched(f, stats))
2178			return true;
2179	}
2180
2181	/* if there are no stat allowed filters, pass everything through */
2182	return allow_cnt == 0;
2183}
2184
2185static void output_prog_stats(void)
2186{
2187	const struct verif_stats *stats;
2188	int i, last_stat_idx = 0, cnt = 0;
2189
2190	if (env.out_fmt == RESFMT_TABLE) {
2191		/* calculate column widths */
2192		output_headers(RESFMT_TABLE_CALCLEN);
2193		for (i = 0; i < env.prog_stat_cnt; i++) {
2194			stats = &env.prog_stats[i];
2195			if (!should_output_stats(stats))
2196				continue;
2197			output_stats(stats, RESFMT_TABLE_CALCLEN, false);
2198			last_stat_idx = i;
2199		}
2200	}
2201
2202	/* actually output the table */
2203	output_headers(env.out_fmt);
2204	for (i = 0; i < env.prog_stat_cnt; i++) {
2205		stats = &env.prog_stats[i];
2206		if (!should_output_stats(stats))
2207			continue;
2208		if (env.top_n && cnt >= env.top_n)
2209			break;
2210		output_stats(stats, env.out_fmt, i == last_stat_idx);
2211		cnt++;
2212	}
2213}
2214
2215static int handle_verif_mode(void)
2216{
2217	int i, err;
2218
2219	if (env.filename_cnt == 0) {
2220		fprintf(stderr, "Please provide path to BPF object file!\n\n");
2221		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2222		return -EINVAL;
2223	}
2224
2225	for (i = 0; i < env.filename_cnt; i++) {
2226		err = process_obj(env.filenames[i]);
2227		if (err) {
2228			fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
2229			return err;
2230		}
2231	}
2232
2233	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2234
2235	output_prog_stats();
2236
2237	return 0;
2238}
2239
2240static int handle_replay_mode(void)
2241{
2242	struct stat_specs specs = {};
2243	int err;
2244
2245	if (env.filename_cnt != 1) {
2246		fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
2247		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2248		return -EINVAL;
2249	}
2250
2251	err = parse_stats_csv(env.filenames[0], &specs,
2252			      &env.prog_stats, &env.prog_stat_cnt);
2253	if (err) {
2254		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
2255		return err;
2256	}
2257
2258	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2259
2260	output_prog_stats();
2261
2262	return 0;
2263}
2264
2265int main(int argc, char **argv)
2266{
2267	int err = 0, i;
2268
2269	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
2270		return 1;
2271
2272	if (env.show_version) {
2273		printf("%s\n", argp_program_version);
2274		return 0;
2275	}
2276
2277	if (env.verbose && env.quiet) {
2278		fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
2279		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2280		return 1;
2281	}
2282	if (env.verbose && env.log_level == 0)
2283		env.log_level = 1;
2284
2285	if (env.output_spec.spec_cnt == 0) {
2286		if (env.out_fmt == RESFMT_CSV)
2287			env.output_spec = default_csv_output_spec;
2288		else
2289			env.output_spec = default_output_spec;
2290	}
2291	if (env.sort_spec.spec_cnt == 0)
2292		env.sort_spec = default_sort_spec;
2293
2294	if (env.comparison_mode && env.replay_mode) {
2295		fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
2296		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2297		return 1;
2298	}
2299
2300	if (env.comparison_mode)
2301		err = handle_comparison_mode();
2302	else if (env.replay_mode)
2303		err = handle_replay_mode();
2304	else
2305		err = handle_verif_mode();
2306
2307	free_verif_stats(env.prog_stats, env.prog_stat_cnt);
2308	free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
2309	free(env.join_stats);
2310	for (i = 0; i < env.filename_cnt; i++)
2311		free(env.filenames[i]);
2312	free(env.filenames);
2313	for (i = 0; i < env.allow_filter_cnt; i++) {
2314		free(env.allow_filters[i].any_glob);
2315		free(env.allow_filters[i].file_glob);
2316		free(env.allow_filters[i].prog_glob);
2317	}
2318	free(env.allow_filters);
2319	for (i = 0; i < env.deny_filter_cnt; i++) {
2320		free(env.deny_filters[i].any_glob);
2321		free(env.deny_filters[i].file_glob);
2322		free(env.deny_filters[i].prog_glob);
2323	}
2324	free(env.deny_filters);
2325	return -err;
2326}
v6.2
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
   3#define _GNU_SOURCE
   4#include <argp.h>
 
   5#include <string.h>
   6#include <stdlib.h>
   7#include <linux/compiler.h>
   8#include <sched.h>
   9#include <pthread.h>
  10#include <dirent.h>
  11#include <signal.h>
  12#include <fcntl.h>
  13#include <unistd.h>
  14#include <sys/time.h>
  15#include <sys/sysinfo.h>
  16#include <sys/stat.h>
  17#include <bpf/libbpf.h>
 
 
  18#include <libelf.h>
  19#include <gelf.h>
  20#include <float.h>
 
 
 
 
 
 
  21
  22enum stat_id {
  23	VERDICT,
  24	DURATION,
  25	TOTAL_INSNS,
  26	TOTAL_STATES,
  27	PEAK_STATES,
  28	MAX_STATES_PER_INSN,
  29	MARK_READ_MAX_LEN,
  30
  31	FILE_NAME,
  32	PROG_NAME,
  33
  34	ALL_STATS_CNT,
  35	NUM_STATS_CNT = FILE_NAME - VERDICT,
  36};
  37
  38/* In comparison mode each stat can specify up to four different values:
  39 *   - A side value;
  40 *   - B side value;
  41 *   - absolute diff value;
  42 *   - relative (percentage) diff value.
  43 *
  44 * When specifying stat specs in comparison mode, user can use one of the
  45 * following variant suffixes to specify which exact variant should be used for
  46 * ordering or filtering:
  47 *   - `_a` for A side value;
  48 *   - `_b` for B side value;
  49 *   - `_diff` for absolute diff value;
  50 *   - `_pct` for relative (percentage) diff value.
  51 *
  52 * If no variant suffix is provided, then `_b` (control data) is assumed.
  53 *
  54 * As an example, let's say instructions stat has the following output:
  55 *
  56 * Insns (A)  Insns (B)  Insns   (DIFF)
  57 * ---------  ---------  --------------
  58 * 21547      20920       -627 (-2.91%)
  59 *
  60 * Then:
  61 *   - 21547 is A side value (insns_a);
  62 *   - 20920 is B side value (insns_b);
  63 *   - -627 is absolute diff value (insns_diff);
  64 *   - -2.91% is relative diff value (insns_pct).
  65 *
  66 * For verdict there is no verdict_pct variant.
  67 * For file and program name, _a and _b variants are equivalent and there are
  68 * no _diff or _pct variants.
  69 */
  70enum stat_variant {
  71	VARIANT_A,
  72	VARIANT_B,
  73	VARIANT_DIFF,
  74	VARIANT_PCT,
  75};
  76
  77struct verif_stats {
  78	char *file_name;
  79	char *prog_name;
  80
  81	long stats[NUM_STATS_CNT];
  82};
  83
  84/* joined comparison mode stats */
  85struct verif_stats_join {
  86	char *file_name;
  87	char *prog_name;
  88
  89	const struct verif_stats *stats_a;
  90	const struct verif_stats *stats_b;
  91};
  92
  93struct stat_specs {
  94	int spec_cnt;
  95	enum stat_id ids[ALL_STATS_CNT];
  96	enum stat_variant variants[ALL_STATS_CNT];
  97	bool asc[ALL_STATS_CNT];
 
  98	int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
  99};
 100
 101enum resfmt {
 102	RESFMT_TABLE,
 103	RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
 104	RESFMT_CSV,
 105};
 106
 107enum filter_kind {
 108	FILTER_NAME,
 109	FILTER_STAT,
 110};
 111
 112enum operator_kind {
 113	OP_EQ,		/* == or = */
 114	OP_NEQ,		/* != or <> */
 115	OP_LT,		/* < */
 116	OP_LE,		/* <= */
 117	OP_GT,		/* > */
 118	OP_GE,		/* >= */
 119};
 120
 121struct filter {
 122	enum filter_kind kind;
 123	/* FILTER_NAME */
 124	char *any_glob;
 125	char *file_glob;
 126	char *prog_glob;
 127	/* FILTER_STAT */
 128	enum operator_kind op;
 129	int stat_id;
 130	enum stat_variant stat_var;
 131	long value;
 
 132};
 133
 134static struct env {
 135	char **filenames;
 136	int filename_cnt;
 137	bool verbose;
 
 138	bool quiet;
 139	int log_level;
 
 140	enum resfmt out_fmt;
 
 141	bool comparison_mode;
 142	bool replay_mode;
 
 
 
 
 
 143
 144	struct verif_stats *prog_stats;
 145	int prog_stat_cnt;
 146
 147	/* baseline_stats is allocated and used only in comparsion mode */
 148	struct verif_stats *baseline_stats;
 149	int baseline_stat_cnt;
 150
 151	struct verif_stats_join *join_stats;
 152	int join_stat_cnt;
 153
 154	struct stat_specs output_spec;
 155	struct stat_specs sort_spec;
 156
 157	struct filter *allow_filters;
 158	struct filter *deny_filters;
 159	int allow_filter_cnt;
 160	int deny_filter_cnt;
 161
 162	int files_processed;
 163	int files_skipped;
 164	int progs_processed;
 165	int progs_skipped;
 
 166} env;
 167
 168static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
 169{
 170	if (!env.verbose)
 171		return 0;
 172	if (level == LIBBPF_DEBUG /* && !env.verbose */)
 173		return 0;
 174	return vfprintf(stderr, format, args);
 175}
 176
 177const char *argp_program_version = "veristat";
 
 
 
 
 178const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
 179const char argp_program_doc[] =
 180"veristat    BPF verifier stats collection and comparison tool.\n"
 181"\n"
 182"USAGE: veristat <obj-file> [<obj-file>...]\n"
 183"   OR: veristat -C <baseline.csv> <comparison.csv>\n";
 
 
 
 
 
 
 184
 185static const struct argp_option opts[] = {
 186	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
 
 187	{ "verbose", 'v', NULL, 0, "Verbose mode" },
 
 188	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
 
 
 
 189	{ "quiet", 'q', NULL, 0, "Quiet mode" },
 190	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
 191	{ "sort", 's', "SPEC", 0, "Specify sort order" },
 192	{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
 193	{ "compare", 'C', NULL, 0, "Comparison mode" },
 194	{ "replay", 'R', NULL, 0, "Replay mode" },
 195	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
 
 
 
 
 
 196	{},
 197};
 198
 199static int parse_stats(const char *stats_str, struct stat_specs *specs);
 200static int append_filter(struct filter **filters, int *cnt, const char *str);
 201static int append_filter_file(const char *path);
 202
 203static error_t parse_arg(int key, char *arg, struct argp_state *state)
 204{
 205	void *tmp;
 206	int err;
 207
 208	switch (key) {
 209	case 'h':
 210		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
 211		break;
 
 
 
 212	case 'v':
 213		env.verbose = true;
 214		break;
 
 
 
 
 215	case 'q':
 216		env.quiet = true;
 217		break;
 218	case 'e':
 219		err = parse_stats(arg, &env.output_spec);
 220		if (err)
 221			return err;
 222		break;
 223	case 's':
 224		err = parse_stats(arg, &env.sort_spec);
 225		if (err)
 226			return err;
 227		break;
 228	case 'o':
 229		if (strcmp(arg, "table") == 0) {
 230			env.out_fmt = RESFMT_TABLE;
 231		} else if (strcmp(arg, "csv") == 0) {
 232			env.out_fmt = RESFMT_CSV;
 233		} else {
 234			fprintf(stderr, "Unrecognized output format '%s'\n", arg);
 235			return -EINVAL;
 236		}
 237		break;
 238	case 'l':
 239		errno = 0;
 240		env.log_level = strtol(arg, NULL, 10);
 241		if (errno) {
 242			fprintf(stderr, "invalid log level: %s\n", arg);
 243			argp_usage(state);
 244		}
 245		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 246	case 'C':
 247		env.comparison_mode = true;
 248		break;
 249	case 'R':
 250		env.replay_mode = true;
 251		break;
 252	case 'f':
 253		if (arg[0] == '@')
 254			err = append_filter_file(arg + 1);
 255		else if (arg[0] == '!')
 256			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
 257		else
 258			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
 259		if (err) {
 260			fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
 261			return err;
 262		}
 263		break;
 
 
 
 
 
 
 
 
 264	case ARGP_KEY_ARG:
 265		tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
 266		if (!tmp)
 267			return -ENOMEM;
 268		env.filenames = tmp;
 269		env.filenames[env.filename_cnt] = strdup(arg);
 270		if (!env.filenames[env.filename_cnt])
 271			return -ENOMEM;
 272		env.filename_cnt++;
 273		break;
 274	default:
 275		return ARGP_ERR_UNKNOWN;
 276	}
 277	return 0;
 278}
 279
 280static const struct argp argp = {
 281	.options = opts,
 282	.parser = parse_arg,
 283	.doc = argp_program_doc,
 284};
 285
 286
 287/* Adapted from perf/util/string.c */
 288static bool glob_matches(const char *str, const char *pat)
 289{
 290	while (*str && *pat && *pat != '*') {
 291		if (*str != *pat)
 292			return false;
 293		str++;
 294		pat++;
 295	}
 296	/* Check wild card */
 297	if (*pat == '*') {
 298		while (*pat == '*')
 299			pat++;
 300		if (!*pat) /* Tail wild card matches all */
 301			return true;
 302		while (*str)
 303			if (glob_matches(str++, pat))
 304				return true;
 305	}
 306	return !*str && !*pat;
 307}
 308
 309static bool is_bpf_obj_file(const char *path) {
 310	Elf64_Ehdr *ehdr;
 311	int fd, err = -EINVAL;
 312	Elf *elf = NULL;
 313
 314	fd = open(path, O_RDONLY | O_CLOEXEC);
 315	if (fd < 0)
 316		return true; /* we'll fail later and propagate error */
 317
 318	/* ensure libelf is initialized */
 319	(void)elf_version(EV_CURRENT);
 320
 321	elf = elf_begin(fd, ELF_C_READ, NULL);
 322	if (!elf)
 323		goto cleanup;
 324
 325	if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
 326		goto cleanup;
 327
 328	ehdr = elf64_getehdr(elf);
 329	/* Old LLVM set e_machine to EM_NONE */
 330	if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
 331		goto cleanup;
 332
 333	err = 0;
 334cleanup:
 335	if (elf)
 336		elf_end(elf);
 337	close(fd);
 338	return err == 0;
 339}
 340
 341static bool should_process_file_prog(const char *filename, const char *prog_name)
 342{
 343	struct filter *f;
 344	int i, allow_cnt = 0;
 345
 346	for (i = 0; i < env.deny_filter_cnt; i++) {
 347		f = &env.deny_filters[i];
 348		if (f->kind != FILTER_NAME)
 349			continue;
 350
 351		if (f->any_glob && glob_matches(filename, f->any_glob))
 352			return false;
 353		if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
 354			return false;
 355		if (f->file_glob && glob_matches(filename, f->file_glob))
 356			return false;
 357		if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
 358			return false;
 359	}
 360
 361	for (i = 0; i < env.allow_filter_cnt; i++) {
 362		f = &env.allow_filters[i];
 363		if (f->kind != FILTER_NAME)
 364			continue;
 365
 366		allow_cnt++;
 367		if (f->any_glob) {
 368			if (glob_matches(filename, f->any_glob))
 369				return true;
 370			/* If we don't know program name yet, any_glob filter
 371			 * has to assume that current BPF object file might be
 372			 * relevant; we'll check again later on after opening
 373			 * BPF object file, at which point program name will
 374			 * be known finally.
 375			 */
 376			if (!prog_name || glob_matches(prog_name, f->any_glob))
 377				return true;
 378		} else {
 379			if (f->file_glob && !glob_matches(filename, f->file_glob))
 380				continue;
 381			if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
 382				continue;
 383			return true;
 384		}
 385	}
 386
 387	/* if there are no file/prog name allow filters, allow all progs,
 388	 * unless they are denied earlier explicitly
 389	 */
 390	return allow_cnt == 0;
 391}
 392
 393static struct {
 394	enum operator_kind op_kind;
 395	const char *op_str;
 396} operators[] = {
 397	/* Order of these definitions matter to avoid situations like '<'
 398	 * matching part of what is actually a '<>' operator. That is,
 399	 * substrings should go last.
 400	 */
 401	{ OP_EQ, "==" },
 402	{ OP_NEQ, "!=" },
 403	{ OP_NEQ, "<>" },
 404	{ OP_LE, "<=" },
 405	{ OP_LT, "<" },
 406	{ OP_GE, ">=" },
 407	{ OP_GT, ">" },
 408	{ OP_EQ, "=" },
 409};
 410
 411static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var);
 
 412
 413static int append_filter(struct filter **filters, int *cnt, const char *str)
 414{
 415	struct filter *f;
 416	void *tmp;
 417	const char *p;
 418	int i;
 419
 420	tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
 421	if (!tmp)
 422		return -ENOMEM;
 423	*filters = tmp;
 424
 425	f = &(*filters)[*cnt];
 426	memset(f, 0, sizeof(*f));
 427
 428	/* First, let's check if it's a stats filter of the following form:
 429	 * <stat><op><value, where:
 430	 *   - <stat> is one of supported numerical stats (verdict is also
 431	 *     considered numerical, failure == 0, success == 1);
 432	 *   - <op> is comparison operator (see `operators` definitions);
 433	 *   - <value> is an integer (or failure/success, or false/true as
 434	 *     special aliases for 0 and 1, respectively).
 435	 * If the form doesn't match what user provided, we assume file/prog
 436	 * glob filter.
 437	 */
 438	for (i = 0; i < ARRAY_SIZE(operators); i++) {
 439		enum stat_variant var;
 440		int id;
 441		long val;
 442		const char *end = str;
 443		const char *op_str;
 
 444
 445		op_str = operators[i].op_str;
 446		p = strstr(str, op_str);
 447		if (!p)
 448			continue;
 449
 450		if (!parse_stat_id_var(str, p - str, &id, &var)) {
 451			fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
 452			return -EINVAL;
 453		}
 454		if (id >= FILE_NAME) {
 455			fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
 456			return -EINVAL;
 457		}
 458
 459		p += strlen(op_str);
 460
 461		if (strcasecmp(p, "true") == 0 ||
 462		    strcasecmp(p, "t") == 0 ||
 463		    strcasecmp(p, "success") == 0 ||
 464		    strcasecmp(p, "succ") == 0 ||
 465		    strcasecmp(p, "s") == 0 ||
 466		    strcasecmp(p, "match") == 0 ||
 467		    strcasecmp(p, "m") == 0) {
 468			val = 1;
 469		} else if (strcasecmp(p, "false") == 0 ||
 470			   strcasecmp(p, "f") == 0 ||
 471			   strcasecmp(p, "failure") == 0 ||
 472			   strcasecmp(p, "fail") == 0 ||
 473			   strcasecmp(p, "mismatch") == 0 ||
 474			   strcasecmp(p, "mis") == 0) {
 475			val = 0;
 476		} else {
 477			errno = 0;
 478			val = strtol(p, (char **)&end, 10);
 479			if (errno || end == p || *end != '\0' ) {
 480				fprintf(stderr, "Invalid integer value in '%s'!\n", str);
 481				return -EINVAL;
 482			}
 483		}
 484
 485		f->kind = FILTER_STAT;
 486		f->stat_id = id;
 487		f->stat_var = var;
 488		f->op = operators[i].op_kind;
 
 489		f->value = val;
 490
 491		*cnt += 1;
 492		return 0;
 493	}
 494
 495	/* File/prog filter can be specified either as '<glob>' or
 496	 * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
 497	 * both file and program names. This seems to be way more useful in
 498	 * practice. If user needs full control, they can use '/<prog-glob>'
 499	 * form to glob just program name, or '<file-glob>/' to glob only file
 500	 * name. But usually common <glob> seems to be the most useful and
 501	 * ergonomic way.
 502	 */
 503	f->kind = FILTER_NAME;
 504	p = strchr(str, '/');
 505	if (!p) {
 506		f->any_glob = strdup(str);
 507		if (!f->any_glob)
 508			return -ENOMEM;
 509	} else {
 510		if (str != p) {
 511			/* non-empty file glob */
 512			f->file_glob = strndup(str, p - str);
 513			if (!f->file_glob)
 514				return -ENOMEM;
 515		}
 516		if (strlen(p + 1) > 0) {
 517			/* non-empty prog glob */
 518			f->prog_glob = strdup(p + 1);
 519			if (!f->prog_glob) {
 520				free(f->file_glob);
 521				f->file_glob = NULL;
 522				return -ENOMEM;
 523			}
 524		}
 525	}
 526
 527	*cnt += 1;
 528	return 0;
 529}
 530
 531static int append_filter_file(const char *path)
 532{
 533	char buf[1024];
 534	FILE *f;
 535	int err = 0;
 536
 537	f = fopen(path, "r");
 538	if (!f) {
 539		err = -errno;
 540		fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
 541		return err;
 542	}
 543
 544	while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
 545		/* lines starting with # are comments, skip them */
 546		if (buf[0] == '\0' || buf[0] == '#')
 547			continue;
 548		/* lines starting with ! are negative match filters */
 549		if (buf[0] == '!')
 550			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
 551		else
 552			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
 553		if (err)
 554			goto cleanup;
 555	}
 556
 557cleanup:
 558	fclose(f);
 559	return err;
 560}
 561
 562static const struct stat_specs default_output_spec = {
 563	.spec_cnt = 7,
 564	.ids = {
 565		FILE_NAME, PROG_NAME, VERDICT, DURATION,
 566		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
 567	},
 568};
 569
 570static const struct stat_specs default_csv_output_spec = {
 571	.spec_cnt = 9,
 572	.ids = {
 573		FILE_NAME, PROG_NAME, VERDICT, DURATION,
 574		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
 575		MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
 576	},
 577};
 578
 579static const struct stat_specs default_sort_spec = {
 580	.spec_cnt = 2,
 581	.ids = {
 582		FILE_NAME, PROG_NAME,
 583	},
 584	.asc = { true, true, },
 585};
 586
 587/* sorting for comparison mode to join two data sets */
 588static const struct stat_specs join_sort_spec = {
 589	.spec_cnt = 2,
 590	.ids = {
 591		FILE_NAME, PROG_NAME,
 592	},
 593	.asc = { true, true, },
 594};
 595
 596static struct stat_def {
 597	const char *header;
 598	const char *names[4];
 599	bool asc_by_default;
 600	bool left_aligned;
 601} stat_defs[] = {
 602	[FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
 603	[PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
 604	[VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
 605	[DURATION] = { "Duration (us)", {"duration", "dur"}, },
 606	[TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
 607	[TOTAL_STATES] = { "States", {"total_states", "states"}, },
 608	[PEAK_STATES] = { "Peak states", {"peak_states"}, },
 609	[MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
 610	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
 611};
 612
 613static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var)
 
 614{
 615	static const char *var_sfxs[] = {
 616		[VARIANT_A] = "_a",
 617		[VARIANT_B] = "_b",
 618		[VARIANT_DIFF] = "_diff",
 619		[VARIANT_PCT] = "_pct",
 620	};
 621	int i, j, k;
 622
 
 
 
 
 
 
 
 
 623	for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
 624		struct stat_def *def = &stat_defs[i];
 625		size_t alias_len, sfx_len;
 626		const char *alias;
 627
 628		for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
 629			alias = def->names[j];
 630			if (!alias)
 631				continue;
 632
 633			alias_len = strlen(alias);
 634			if (strncmp(name, alias, alias_len) != 0)
 635				continue;
 636
 637			if (alias_len == len) {
 638				/* If no variant suffix is specified, we
 639				 * assume control group (just in case we are
 640				 * in comparison mode. Variant is ignored in
 641				 * non-comparison mode.
 642				 */
 643				*var = VARIANT_B;
 644				*id = i;
 645				return true;
 646			}
 647
 648			for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
 649				sfx_len = strlen(var_sfxs[k]);
 650				if (alias_len + sfx_len != len)
 651					continue;
 652
 653				if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
 654					*var = (enum stat_variant)k;
 655					*id = i;
 656					return true;
 657				}
 658			}
 659		}
 660	}
 661
 662	return false;
 663}
 664
 665static bool is_asc_sym(char c)
 666{
 667	return c == '^';
 668}
 669
 670static bool is_desc_sym(char c)
 671{
 672	return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
 673}
 674
 675static int parse_stat(const char *stat_name, struct stat_specs *specs)
 676{
 677	int id;
 678	bool has_order = false, is_asc = false;
 679	size_t len = strlen(stat_name);
 680	enum stat_variant var;
 681
 682	if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
 683		fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
 684		return -E2BIG;
 685	}
 686
 687	if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
 688		has_order = true;
 689		is_asc = is_asc_sym(stat_name[len - 1]);
 690		len -= 1;
 691	}
 692
 693	if (!parse_stat_id_var(stat_name, len, &id, &var)) {
 694		fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
 695		return -ESRCH;
 696	}
 697
 698	specs->ids[specs->spec_cnt] = id;
 699	specs->variants[specs->spec_cnt] = var;
 700	specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
 
 701	specs->spec_cnt++;
 702
 703	return 0;
 704}
 705
 706static int parse_stats(const char *stats_str, struct stat_specs *specs)
 707{
 708	char *input, *state = NULL, *next;
 709	int err;
 710
 711	input = strdup(stats_str);
 712	if (!input)
 713		return -ENOMEM;
 714
 715	while ((next = strtok_r(state ? NULL : input, ",", &state))) {
 716		err = parse_stat(next, specs);
 717		if (err)
 
 718			return err;
 
 719	}
 720
 
 721	return 0;
 722}
 723
 724static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
 725{
 726	int i;
 727
 728	if (!stats)
 729		return;
 730
 731	for (i = 0; i < stat_cnt; i++) {
 732		free(stats[i].file_name);
 733		free(stats[i].prog_name);
 734	}
 735	free(stats);
 736}
 737
 738static char verif_log_buf[64 * 1024];
 739
 740#define MAX_PARSED_LOG_LINES 100
 741
 742static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
 743{
 744	const char *cur;
 745	int pos, lines;
 746
 747	buf[buf_sz - 1] = '\0';
 748
 749	for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
 750		/* find previous endline or otherwise take the start of log buf */
 751		for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
 752		}
 753		/* next time start from end of previous line (or pos goes to <0) */
 754		pos--;
 755		/* if we found endline, point right after endline symbol;
 756		 * otherwise, stay at the beginning of log buf
 757		 */
 758		if (cur[0] == '\n')
 759			cur++;
 760
 761		if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
 762			continue;
 763		if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
 764				&s->stats[TOTAL_INSNS],
 765				&s->stats[MAX_STATES_PER_INSN],
 766				&s->stats[TOTAL_STATES],
 767				&s->stats[PEAK_STATES],
 768				&s->stats[MARK_READ_MAX_LEN]))
 769			continue;
 770	}
 771
 772	return 0;
 773}
 774
 775static void fixup_obj(struct bpf_object *obj)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 776{
 777	struct bpf_map *map;
 778
 779	bpf_object__for_each_map(map, obj) {
 780		/* disable pinning */
 781		bpf_map__set_pin_path(map, NULL);
 782
 783		/* fix up map size, if necessary */
 784		switch (bpf_map__type(map)) {
 785		case BPF_MAP_TYPE_SK_STORAGE:
 786		case BPF_MAP_TYPE_TASK_STORAGE:
 787		case BPF_MAP_TYPE_INODE_STORAGE:
 788		case BPF_MAP_TYPE_CGROUP_STORAGE:
 789			break;
 790		default:
 791			if (bpf_map__max_entries(map) == 0)
 792				bpf_map__set_max_entries(map, 1);
 793		}
 794	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 795}
 796
 797static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
 798{
 
 799	const char *prog_name = bpf_program__name(prog);
 800	size_t buf_sz = sizeof(verif_log_buf);
 801	char *buf = verif_log_buf;
 802	struct verif_stats *stats;
 803	int err = 0;
 804	void *tmp;
 805
 806	if (!should_process_file_prog(basename(filename), bpf_program__name(prog))) {
 807		env.progs_skipped++;
 808		return 0;
 809	}
 810
 811	tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
 812	if (!tmp)
 813		return -ENOMEM;
 814	env.prog_stats = tmp;
 815	stats = &env.prog_stats[env.prog_stat_cnt++];
 816	memset(stats, 0, sizeof(*stats));
 817
 818	if (env.verbose) {
 819		buf_sz = 16 * 1024 * 1024;
 820		buf = malloc(buf_sz);
 821		if (!buf)
 822			return -ENOMEM;
 823		bpf_program__set_log_buf(prog, buf, buf_sz);
 824		bpf_program__set_log_level(prog, env.log_level | 4); /* stats + log */
 
 
 
 825	} else {
 826		bpf_program__set_log_buf(prog, buf, buf_sz);
 827		bpf_program__set_log_level(prog, 4); /* only verifier stats */
 
 
 828	}
 829	verif_log_buf[0] = '\0';
 830
 
 
 
 831	/* increase chances of successful BPF object loading */
 832	fixup_obj(obj);
 
 
 
 
 
 833
 834	err = bpf_object__load(obj);
 835	env.progs_processed++;
 836
 837	stats->file_name = strdup(basename(filename));
 838	stats->prog_name = strdup(bpf_program__name(prog));
 839	stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
 840	parse_verif_log(buf, buf_sz, stats);
 841
 842	if (env.verbose) {
 843		printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
 844		       filename, prog_name, stats->stats[DURATION],
 845		       err ? "failure" : "success", buf);
 846	}
 
 
 847
 848	if (verif_log_buf != buf)
 849		free(buf);
 850
 851	return 0;
 852};
 853
 854static int process_obj(const char *filename)
 855{
 
 856	struct bpf_object *obj = NULL, *tobj;
 857	struct bpf_program *prog, *tprog, *lprog;
 858	libbpf_print_fn_t old_libbpf_print_fn;
 859	LIBBPF_OPTS(bpf_object_open_opts, opts);
 860	int err = 0, prog_cnt = 0;
 861
 862	if (!should_process_file_prog(basename(filename), NULL)) {
 863		if (env.verbose)
 864			printf("Skipping '%s' due to filters...\n", filename);
 865		env.files_skipped++;
 866		return 0;
 867	}
 868	if (!is_bpf_obj_file(filename)) {
 869		if (env.verbose)
 870			printf("Skipping '%s' as it's not a BPF object file...\n", filename);
 871		env.files_skipped++;
 872		return 0;
 873	}
 874
 875	if (!env.quiet && env.out_fmt == RESFMT_TABLE)
 876		printf("Processing '%s'...\n", basename(filename));
 877
 878	old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
 879	obj = bpf_object__open_file(filename, &opts);
 880	if (!obj) {
 881		/* if libbpf can't open BPF object file, it could be because
 882		 * that BPF object file is incomplete and has to be statically
 883		 * linked into a final BPF object file; instead of bailing
 884		 * out, report it into stderr, mark it as skipped, and
 885		 * proceeed
 886		 */
 887		fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
 888		env.files_skipped++;
 889		err = 0;
 890		goto cleanup;
 891	}
 892
 893	env.files_processed++;
 894
 895	bpf_object__for_each_program(prog, obj) {
 896		prog_cnt++;
 897	}
 898
 899	if (prog_cnt == 1) {
 900		prog = bpf_object__next_program(obj, NULL);
 901		bpf_program__set_autoload(prog, true);
 902		process_prog(filename, obj, prog);
 903		goto cleanup;
 904	}
 905
 906	bpf_object__for_each_program(prog, obj) {
 907		const char *prog_name = bpf_program__name(prog);
 908
 909		tobj = bpf_object__open_file(filename, &opts);
 910		if (!tobj) {
 911			err = -errno;
 912			fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
 913			goto cleanup;
 914		}
 915
 
 916		bpf_object__for_each_program(tprog, tobj) {
 917			const char *tprog_name = bpf_program__name(tprog);
 918
 919			if (strcmp(prog_name, tprog_name) == 0) {
 920				bpf_program__set_autoload(tprog, true);
 921				lprog = tprog;
 922			} else {
 923				bpf_program__set_autoload(tprog, false);
 924			}
 925		}
 926
 927		process_prog(filename, tobj, lprog);
 928		bpf_object__close(tobj);
 929	}
 930
 931cleanup:
 932	bpf_object__close(obj);
 933	libbpf_set_print(old_libbpf_print_fn);
 934	return err;
 935}
 936
 937static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
 938		    enum stat_id id, bool asc)
 939{
 940	int cmp = 0;
 941
 942	switch (id) {
 943	case FILE_NAME:
 944		cmp = strcmp(s1->file_name, s2->file_name);
 945		break;
 946	case PROG_NAME:
 947		cmp = strcmp(s1->prog_name, s2->prog_name);
 948		break;
 949	case VERDICT:
 950	case DURATION:
 951	case TOTAL_INSNS:
 952	case TOTAL_STATES:
 953	case PEAK_STATES:
 954	case MAX_STATES_PER_INSN:
 955	case MARK_READ_MAX_LEN: {
 956		long v1 = s1->stats[id];
 957		long v2 = s2->stats[id];
 958
 
 
 
 
 
 959		if (v1 != v2)
 960			cmp = v1 < v2 ? -1 : 1;
 961		break;
 962	}
 963	default:
 964		fprintf(stderr, "Unrecognized stat #%d\n", id);
 965		exit(1);
 966	}
 967
 968	return asc ? cmp : -cmp;
 969}
 970
 971static int cmp_prog_stats(const void *v1, const void *v2)
 972{
 973	const struct verif_stats *s1 = v1, *s2 = v2;
 974	int i, cmp;
 975
 976	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
 977		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]);
 
 978		if (cmp != 0)
 979			return cmp;
 980	}
 981
 982	/* always disambiguate with file+prog, which are unique */
 983	cmp = strcmp(s1->file_name, s2->file_name);
 984	if (cmp != 0)
 985		return cmp;
 986	return strcmp(s1->prog_name, s2->prog_name);
 987}
 988
 989static void fetch_join_stat_value(const struct verif_stats_join *s,
 990				  enum stat_id id, enum stat_variant var,
 991				  const char **str_val,
 992				  double *num_val)
 993{
 994	long v1, v2;
 995
 996	if (id == FILE_NAME) {
 997		*str_val = s->file_name;
 998		return;
 999	}
1000	if (id == PROG_NAME) {
1001		*str_val = s->prog_name;
1002		return;
1003	}
1004
1005	v1 = s->stats_a ? s->stats_a->stats[id] : 0;
1006	v2 = s->stats_b ? s->stats_b->stats[id] : 0;
1007
1008	switch (var) {
1009	case VARIANT_A:
1010		if (!s->stats_a)
1011			*num_val = -DBL_MAX;
1012		else
1013			*num_val = s->stats_a->stats[id];
1014		return;
1015	case VARIANT_B:
1016		if (!s->stats_b)
1017			*num_val = -DBL_MAX;
1018		else
1019			*num_val = s->stats_b->stats[id];
1020		return;
1021	case VARIANT_DIFF:
1022		if (!s->stats_a || !s->stats_b)
1023			*num_val = -DBL_MAX;
1024		else if (id == VERDICT)
1025			*num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
1026		else
1027			*num_val = (double)(v2 - v1);
1028		return;
1029	case VARIANT_PCT:
1030		if (!s->stats_a || !s->stats_b) {
1031			*num_val = -DBL_MAX;
1032		} else if (v1 == 0) {
1033			if (v1 == v2)
1034				*num_val = 0.0;
1035			else
1036				*num_val = v2 < v1 ? -100.0 : 100.0;
1037		} else {
1038			 *num_val = (v2 - v1) * 100.0 / v1;
1039		}
1040		return;
1041	}
1042}
1043
1044static int cmp_join_stat(const struct verif_stats_join *s1,
1045			 const struct verif_stats_join *s2,
1046			 enum stat_id id, enum stat_variant var, bool asc)
 
1047{
1048	const char *str1 = NULL, *str2 = NULL;
1049	double v1, v2;
1050	int cmp = 0;
1051
1052	fetch_join_stat_value(s1, id, var, &str1, &v1);
1053	fetch_join_stat_value(s2, id, var, &str2, &v2);
1054
 
 
 
 
 
1055	if (str1)
1056		cmp = strcmp(str1, str2);
1057	else if (v1 != v2)
1058		cmp = v1 < v2 ? -1 : 1;
1059
1060	return asc ? cmp : -cmp;
1061}
1062
1063static int cmp_join_stats(const void *v1, const void *v2)
1064{
1065	const struct verif_stats_join *s1 = v1, *s2 = v2;
1066	int i, cmp;
1067
1068	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1069		cmp = cmp_join_stat(s1, s2,
1070				    env.sort_spec.ids[i],
1071				    env.sort_spec.variants[i],
1072				    env.sort_spec.asc[i]);
 
1073		if (cmp != 0)
1074			return cmp;
1075	}
1076
1077	/* always disambiguate with file+prog, which are unique */
1078	cmp = strcmp(s1->file_name, s2->file_name);
1079	if (cmp != 0)
1080		return cmp;
1081	return strcmp(s1->prog_name, s2->prog_name);
1082}
1083
1084#define HEADER_CHAR '-'
1085#define COLUMN_SEP "  "
1086
1087static void output_header_underlines(void)
1088{
1089	int i, j, len;
1090
1091	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1092		len = env.output_spec.lens[i];
1093
1094		printf("%s", i == 0 ? "" : COLUMN_SEP);
1095		for (j = 0; j < len; j++)
1096			printf("%c", HEADER_CHAR);
1097	}
1098	printf("\n");
1099}
1100
1101static void output_headers(enum resfmt fmt)
1102{
1103	const char *fmt_str;
1104	int i, len;
1105
1106	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1107		int id = env.output_spec.ids[i];
1108		int *max_len = &env.output_spec.lens[i];
1109
1110		switch (fmt) {
1111		case RESFMT_TABLE_CALCLEN:
1112			len = snprintf(NULL, 0, "%s", stat_defs[id].header);
1113			if (len > *max_len)
1114				*max_len = len;
1115			break;
1116		case RESFMT_TABLE:
1117			fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
1118			printf(fmt_str, i == 0 ? "" : COLUMN_SEP,  *max_len, stat_defs[id].header);
1119			if (i == env.output_spec.spec_cnt - 1)
1120				printf("\n");
1121			break;
1122		case RESFMT_CSV:
1123			printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
1124			if (i == env.output_spec.spec_cnt - 1)
1125				printf("\n");
1126			break;
1127		}
1128	}
1129
1130	if (fmt == RESFMT_TABLE)
1131		output_header_underlines();
1132}
1133
1134static void prepare_value(const struct verif_stats *s, enum stat_id id,
1135			  const char **str, long *val)
1136{
1137	switch (id) {
1138	case FILE_NAME:
1139		*str = s ? s->file_name : "N/A";
1140		break;
1141	case PROG_NAME:
1142		*str = s ? s->prog_name : "N/A";
1143		break;
1144	case VERDICT:
1145		if (!s)
1146			*str = "N/A";
1147		else
1148			*str = s->stats[VERDICT] ? "success" : "failure";
1149		break;
1150	case DURATION:
1151	case TOTAL_INSNS:
1152	case TOTAL_STATES:
1153	case PEAK_STATES:
1154	case MAX_STATES_PER_INSN:
1155	case MARK_READ_MAX_LEN:
1156		*val = s ? s->stats[id] : 0;
1157		break;
1158	default:
1159		fprintf(stderr, "Unrecognized stat #%d\n", id);
1160		exit(1);
1161	}
1162}
1163
1164static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
1165{
1166	int i;
1167
1168	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1169		int id = env.output_spec.ids[i];
1170		int *max_len = &env.output_spec.lens[i], len;
1171		const char *str = NULL;
1172		long val = 0;
1173
1174		prepare_value(s, id, &str, &val);
1175
1176		switch (fmt) {
1177		case RESFMT_TABLE_CALCLEN:
1178			if (str)
1179				len = snprintf(NULL, 0, "%s", str);
1180			else
1181				len = snprintf(NULL, 0, "%ld", val);
1182			if (len > *max_len)
1183				*max_len = len;
1184			break;
1185		case RESFMT_TABLE:
1186			if (str)
1187				printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
1188			else
1189				printf("%s%*ld", i == 0 ? "" : COLUMN_SEP,  *max_len, val);
1190			if (i == env.output_spec.spec_cnt - 1)
1191				printf("\n");
1192			break;
1193		case RESFMT_CSV:
1194			if (str)
1195				printf("%s%s", i == 0 ? "" : ",", str);
1196			else
1197				printf("%s%ld", i == 0 ? "" : ",", val);
1198			if (i == env.output_spec.spec_cnt - 1)
1199				printf("\n");
1200			break;
1201		}
1202	}
1203
1204	if (last && fmt == RESFMT_TABLE) {
1205		output_header_underlines();
1206		printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
1207		       env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
1208	}
1209}
1210
1211static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
1212{
1213	switch (id) {
1214	case FILE_NAME:
1215		st->file_name = strdup(str);
1216		if (!st->file_name)
1217			return -ENOMEM;
1218		break;
1219	case PROG_NAME:
1220		st->prog_name = strdup(str);
1221		if (!st->prog_name)
1222			return -ENOMEM;
1223		break;
1224	case VERDICT:
1225		if (strcmp(str, "success") == 0) {
1226			st->stats[VERDICT] = true;
1227		} else if (strcmp(str, "failure") == 0) {
1228			st->stats[VERDICT] = false;
1229		} else {
1230			fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
1231			return -EINVAL;
1232		}
1233		break;
1234	case DURATION:
1235	case TOTAL_INSNS:
1236	case TOTAL_STATES:
1237	case PEAK_STATES:
1238	case MAX_STATES_PER_INSN:
1239	case MARK_READ_MAX_LEN: {
1240		long val;
1241		int err, n;
1242
1243		if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
1244			err = -errno;
1245			fprintf(stderr, "Failed to parse '%s' as integer\n", str);
1246			return err;
1247		}
1248
1249		st->stats[id] = val;
1250		break;
1251	}
1252	default:
1253		fprintf(stderr, "Unrecognized stat #%d\n", id);
1254		return -EINVAL;
1255	}
1256	return 0;
1257}
1258
1259static int parse_stats_csv(const char *filename, struct stat_specs *specs,
1260			   struct verif_stats **statsp, int *stat_cntp)
1261{
1262	char line[4096];
1263	FILE *f;
1264	int err = 0;
1265	bool header = true;
1266
1267	f = fopen(filename, "r");
1268	if (!f) {
1269		err = -errno;
1270		fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1271		return err;
1272	}
1273
1274	*stat_cntp = 0;
1275
1276	while (fgets(line, sizeof(line), f)) {
1277		char *input = line, *state = NULL, *next;
1278		struct verif_stats *st = NULL;
1279		int col = 0;
1280
1281		if (!header) {
1282			void *tmp;
1283
1284			tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
1285			if (!tmp) {
1286				err = -ENOMEM;
1287				goto cleanup;
1288			}
1289			*statsp = tmp;
1290
1291			st = &(*statsp)[*stat_cntp];
1292			memset(st, 0, sizeof(*st));
1293
1294			*stat_cntp += 1;
1295		}
1296
1297		while ((next = strtok_r(state ? NULL : input, ",\n", &state))) {
1298			if (header) {
1299				/* for the first line, set up spec stats */
1300				err = parse_stat(next, specs);
1301				if (err)
1302					goto cleanup;
1303				continue;
1304			}
1305
1306			/* for all other lines, parse values based on spec */
1307			if (col >= specs->spec_cnt) {
1308				fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
1309					col, *stat_cntp, filename);
1310				err = -EINVAL;
1311				goto cleanup;
1312			}
1313			err = parse_stat_value(next, specs->ids[col], st);
1314			if (err)
1315				goto cleanup;
1316			col++;
1317		}
1318
1319		if (header) {
1320			header = false;
1321			continue;
1322		}
1323
1324		if (col < specs->spec_cnt) {
1325			fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
1326				*stat_cntp, filename);
1327			err = -EINVAL;
1328			goto cleanup;
1329		}
1330
1331		if (!st->file_name || !st->prog_name) {
1332			fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
1333				*stat_cntp, filename);
1334			err = -EINVAL;
1335			goto cleanup;
1336		}
1337
1338		/* in comparison mode we can only check filters after we
1339		 * parsed entire line; if row should be ignored we pretend we
1340		 * never parsed it
1341		 */
1342		if (!should_process_file_prog(st->file_name, st->prog_name)) {
1343			free(st->file_name);
1344			free(st->prog_name);
1345			*stat_cntp -= 1;
1346		}
1347	}
1348
1349	if (!feof(f)) {
1350		err = -errno;
1351		fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
1352	}
1353
1354cleanup:
1355	fclose(f);
1356	return err;
1357}
1358
1359/* empty/zero stats for mismatched rows */
1360static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
1361
1362static bool is_key_stat(enum stat_id id)
1363{
1364	return id == FILE_NAME || id == PROG_NAME;
1365}
1366
1367static void output_comp_header_underlines(void)
1368{
1369	int i, j, k;
1370
1371	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1372		int id = env.output_spec.ids[i];
1373		int max_j = is_key_stat(id) ? 1 : 3;
1374
1375		for (j = 0; j < max_j; j++) {
1376			int len = env.output_spec.lens[3 * i + j];
1377
1378			printf("%s", i + j == 0 ? "" : COLUMN_SEP);
1379
1380			for (k = 0; k < len; k++)
1381				printf("%c", HEADER_CHAR);
1382		}
1383	}
1384	printf("\n");
1385}
1386
1387static void output_comp_headers(enum resfmt fmt)
1388{
1389	static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
1390	static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
1391	int i, j, len;
1392
1393	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1394		int id = env.output_spec.ids[i];
1395		/* key stats don't have A/B/DIFF columns, they are common for both data sets */
1396		int max_j = is_key_stat(id) ? 1 : 3;
1397
1398		for (j = 0; j < max_j; j++) {
1399			int *max_len = &env.output_spec.lens[3 * i + j];
1400			bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
1401			const char *sfx;
1402
1403			switch (fmt) {
1404			case RESFMT_TABLE_CALCLEN:
1405				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1406				len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
1407				if (len > *max_len)
1408					*max_len = len;
1409				break;
1410			case RESFMT_TABLE:
1411				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1412				printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
1413				       *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
1414				if (last)
1415					printf("\n");
1416				break;
1417			case RESFMT_CSV:
1418				sfx = is_key_stat(id) ? "" : name_sfxs[j];
1419				printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
1420				if (last)
1421					printf("\n");
1422				break;
1423			}
1424		}
1425	}
1426
1427	if (fmt == RESFMT_TABLE)
1428		output_comp_header_underlines();
1429}
1430
1431static void output_comp_stats(const struct verif_stats_join *join_stats,
1432			      enum resfmt fmt, bool last)
1433{
1434	const struct verif_stats *base = join_stats->stats_a;
1435	const struct verif_stats *comp = join_stats->stats_b;
1436	char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
1437	int i;
1438
1439	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1440		int id = env.output_spec.ids[i], len;
1441		int *max_len_base = &env.output_spec.lens[3 * i + 0];
1442		int *max_len_comp = &env.output_spec.lens[3 * i + 1];
1443		int *max_len_diff = &env.output_spec.lens[3 * i + 2];
1444		const char *base_str = NULL, *comp_str = NULL;
1445		long base_val = 0, comp_val = 0, diff_val = 0;
1446
1447		prepare_value(base, id, &base_str, &base_val);
1448		prepare_value(comp, id, &comp_str, &comp_val);
1449
1450		/* normalize all the outputs to be in string buffers for simplicity */
1451		if (is_key_stat(id)) {
1452			/* key stats (file and program name) are always strings */
1453			if (base)
1454				snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1455			else
1456				snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
1457		} else if (base_str) {
1458			snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1459			snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
1460			if (!base || !comp)
1461				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1462			else if (strcmp(base_str, comp_str) == 0)
1463				snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
1464			else
1465				snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
1466		} else {
1467			double p = 0.0;
1468
1469			if (base)
1470				snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
1471			else
1472				snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
1473			if (comp)
1474				snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
1475			else
1476				snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
1477
1478			diff_val = comp_val - base_val;
1479			if (!base || !comp) {
1480				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1481			} else {
1482				if (base_val == 0) {
1483					if (comp_val == base_val)
1484						p = 0.0; /* avoid +0 (+100%) case */
1485					else
1486						p = comp_val < base_val ? -100.0 : 100.0;
1487				} else {
1488					 p = diff_val * 100.0 / base_val;
1489				}
1490				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
1491			}
1492		}
1493
1494		switch (fmt) {
1495		case RESFMT_TABLE_CALCLEN:
1496			len = strlen(base_buf);
1497			if (len > *max_len_base)
1498				*max_len_base = len;
1499			if (!is_key_stat(id)) {
1500				len = strlen(comp_buf);
1501				if (len > *max_len_comp)
1502					*max_len_comp = len;
1503				len = strlen(diff_buf);
1504				if (len > *max_len_diff)
1505					*max_len_diff = len;
1506			}
1507			break;
1508		case RESFMT_TABLE: {
1509			/* string outputs are left-aligned, number outputs are right-aligned */
1510			const char *fmt = base_str ? "%s%-*s" : "%s%*s";
1511
1512			printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
1513			if (!is_key_stat(id)) {
1514				printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
1515				printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
1516			}
1517			if (i == env.output_spec.spec_cnt - 1)
1518				printf("\n");
1519			break;
1520		}
1521		case RESFMT_CSV:
1522			printf("%s%s", i == 0 ? "" : ",", base_buf);
1523			if (!is_key_stat(id)) {
1524				printf("%s%s", i == 0 ? "" : ",", comp_buf);
1525				printf("%s%s", i == 0 ? "" : ",", diff_buf);
1526			}
1527			if (i == env.output_spec.spec_cnt - 1)
1528				printf("\n");
1529			break;
1530		}
1531	}
1532
1533	if (last && fmt == RESFMT_TABLE)
1534		output_comp_header_underlines();
1535}
1536
1537static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
1538{
1539	int r;
1540
1541	r = strcmp(base->file_name, comp->file_name);
1542	if (r != 0)
1543		return r;
1544	return strcmp(base->prog_name, comp->prog_name);
1545}
1546
1547static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
1548{
1549	static const double eps = 1e-9;
1550	const char *str = NULL;
1551	double value = 0.0;
1552
1553	fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
1554
 
 
 
1555	switch (f->op) {
1556	case OP_EQ: return value > f->value - eps && value < f->value + eps;
1557	case OP_NEQ: return value < f->value - eps || value > f->value + eps;
1558	case OP_LT: return value < f->value - eps;
1559	case OP_LE: return value <= f->value + eps;
1560	case OP_GT: return value > f->value + eps;
1561	case OP_GE: return value >= f->value - eps;
1562	}
1563
1564	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1565	return false;
1566}
1567
1568static bool should_output_join_stats(const struct verif_stats_join *stats)
1569{
1570	struct filter *f;
1571	int i, allow_cnt = 0;
1572
1573	for (i = 0; i < env.deny_filter_cnt; i++) {
1574		f = &env.deny_filters[i];
1575		if (f->kind != FILTER_STAT)
1576			continue;
1577
1578		if (is_join_stat_filter_matched(f, stats))
1579			return false;
1580	}
1581
1582	for (i = 0; i < env.allow_filter_cnt; i++) {
1583		f = &env.allow_filters[i];
1584		if (f->kind != FILTER_STAT)
1585			continue;
1586		allow_cnt++;
1587
1588		if (is_join_stat_filter_matched(f, stats))
1589			return true;
1590	}
1591
1592	/* if there are no stat allowed filters, pass everything through */
1593	return allow_cnt == 0;
1594}
1595
1596static int handle_comparison_mode(void)
1597{
1598	struct stat_specs base_specs = {}, comp_specs = {};
1599	struct stat_specs tmp_sort_spec;
1600	enum resfmt cur_fmt;
1601	int err, i, j, last_idx;
1602
1603	if (env.filename_cnt != 2) {
1604		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
1605		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1606		return -EINVAL;
1607	}
1608
1609	err = parse_stats_csv(env.filenames[0], &base_specs,
1610			      &env.baseline_stats, &env.baseline_stat_cnt);
1611	if (err) {
1612		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
1613		return err;
1614	}
1615	err = parse_stats_csv(env.filenames[1], &comp_specs,
1616			      &env.prog_stats, &env.prog_stat_cnt);
1617	if (err) {
1618		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
1619		return err;
1620	}
1621
1622	/* To keep it simple we validate that the set and order of stats in
1623	 * both CSVs are exactly the same. This can be lifted with a bit more
1624	 * pre-processing later.
1625	 */
1626	if (base_specs.spec_cnt != comp_specs.spec_cnt) {
1627		fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
1628			env.filenames[0], env.filenames[1],
1629			base_specs.spec_cnt, comp_specs.spec_cnt);
1630		return -EINVAL;
1631	}
1632	for (i = 0; i < base_specs.spec_cnt; i++) {
1633		if (base_specs.ids[i] != comp_specs.ids[i]) {
1634			fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
1635				env.filenames[0], env.filenames[1],
1636				stat_defs[base_specs.ids[i]].names[0],
1637				stat_defs[comp_specs.ids[i]].names[0]);
1638			return -EINVAL;
1639		}
1640	}
1641
1642	/* Replace user-specified sorting spec with file+prog sorting rule to
1643	 * be able to join two datasets correctly. Once we are done, we will
1644	 * restore the original sort spec.
1645	 */
1646	tmp_sort_spec = env.sort_spec;
1647	env.sort_spec = join_sort_spec;
1648	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1649	qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
1650	env.sort_spec = tmp_sort_spec;
1651
1652	/* Join two datasets together. If baseline and comparison datasets
1653	 * have different subset of rows (we match by 'object + prog' as
1654	 * a unique key) then assume empty/missing/zero value for rows that
1655	 * are missing in the opposite data set.
1656	 */
1657	i = j = 0;
1658	while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
1659		const struct verif_stats *base, *comp;
1660		struct verif_stats_join *join;
1661		void *tmp;
1662		int r;
1663
1664		base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
1665		comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
1666
1667		if (!base->file_name || !base->prog_name) {
1668			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
1669				i, env.filenames[0]);
1670			return -EINVAL;
1671		}
1672		if (!comp->file_name || !comp->prog_name) {
1673			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
1674				j, env.filenames[1]);
1675			return -EINVAL;
1676		}
1677
1678		tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
1679		if (!tmp)
1680			return -ENOMEM;
1681		env.join_stats = tmp;
1682
1683		join = &env.join_stats[env.join_stat_cnt];
1684		memset(join, 0, sizeof(*join));
1685
1686		r = cmp_stats_key(base, comp);
1687		if (r == 0) {
1688			join->file_name = base->file_name;
1689			join->prog_name = base->prog_name;
1690			join->stats_a = base;
1691			join->stats_b = comp;
1692			i++;
1693			j++;
1694		} else if (comp == &fallback_stats || r < 0) {
1695			join->file_name = base->file_name;
1696			join->prog_name = base->prog_name;
1697			join->stats_a = base;
1698			join->stats_b = NULL;
1699			i++;
1700		} else {
1701			join->file_name = comp->file_name;
1702			join->prog_name = comp->prog_name;
1703			join->stats_a = NULL;
1704			join->stats_b = comp;
1705			j++;
 
 
 
 
1706		}
1707		env.join_stat_cnt += 1;
1708	}
1709
1710	/* now sort joined results accorsing to sort spec */
1711	qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
1712
1713	/* for human-readable table output we need to do extra pass to
1714	 * calculate column widths, so we substitute current output format
1715	 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
1716	 * and do everything again.
1717	 */
1718	if (env.out_fmt == RESFMT_TABLE)
1719		cur_fmt = RESFMT_TABLE_CALCLEN;
1720	else
1721		cur_fmt = env.out_fmt;
1722
1723one_more_time:
1724	output_comp_headers(cur_fmt);
1725
 
 
1726	for (i = 0; i < env.join_stat_cnt; i++) {
1727		const struct verif_stats_join *join = &env.join_stats[i];
1728
1729		if (!should_output_join_stats(join))
1730			continue;
1731
 
 
 
1732		if (cur_fmt == RESFMT_TABLE_CALCLEN)
1733			last_idx = i;
1734
1735		output_comp_stats(join, cur_fmt, i == last_idx);
 
 
1736	}
1737
1738	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
1739		cur_fmt = RESFMT_TABLE;
1740		goto one_more_time; /* ... this time with feeling */
1741	}
1742
1743	return 0;
1744}
1745
1746static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
1747{
1748	long value = stats->stats[f->stat_id];
1749
 
 
 
1750	switch (f->op) {
1751	case OP_EQ: return value == f->value;
1752	case OP_NEQ: return value != f->value;
1753	case OP_LT: return value < f->value;
1754	case OP_LE: return value <= f->value;
1755	case OP_GT: return value > f->value;
1756	case OP_GE: return value >= f->value;
1757	}
1758
1759	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1760	return false;
1761}
1762
1763static bool should_output_stats(const struct verif_stats *stats)
1764{
1765	struct filter *f;
1766	int i, allow_cnt = 0;
1767
1768	for (i = 0; i < env.deny_filter_cnt; i++) {
1769		f = &env.deny_filters[i];
1770		if (f->kind != FILTER_STAT)
1771			continue;
1772
1773		if (is_stat_filter_matched(f, stats))
1774			return false;
1775	}
1776
1777	for (i = 0; i < env.allow_filter_cnt; i++) {
1778		f = &env.allow_filters[i];
1779		if (f->kind != FILTER_STAT)
1780			continue;
1781		allow_cnt++;
1782
1783		if (is_stat_filter_matched(f, stats))
1784			return true;
1785	}
1786
1787	/* if there are no stat allowed filters, pass everything through */
1788	return allow_cnt == 0;
1789}
1790
1791static void output_prog_stats(void)
1792{
1793	const struct verif_stats *stats;
1794	int i, last_stat_idx = 0;
1795
1796	if (env.out_fmt == RESFMT_TABLE) {
1797		/* calculate column widths */
1798		output_headers(RESFMT_TABLE_CALCLEN);
1799		for (i = 0; i < env.prog_stat_cnt; i++) {
1800			stats = &env.prog_stats[i];
1801			if (!should_output_stats(stats))
1802				continue;
1803			output_stats(stats, RESFMT_TABLE_CALCLEN, false);
1804			last_stat_idx = i;
1805		}
1806	}
1807
1808	/* actually output the table */
1809	output_headers(env.out_fmt);
1810	for (i = 0; i < env.prog_stat_cnt; i++) {
1811		stats = &env.prog_stats[i];
1812		if (!should_output_stats(stats))
1813			continue;
 
 
1814		output_stats(stats, env.out_fmt, i == last_stat_idx);
 
1815	}
1816}
1817
1818static int handle_verif_mode(void)
1819{
1820	int i, err;
1821
1822	if (env.filename_cnt == 0) {
1823		fprintf(stderr, "Please provide path to BPF object file!\n\n");
1824		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1825		return -EINVAL;
1826	}
1827
1828	for (i = 0; i < env.filename_cnt; i++) {
1829		err = process_obj(env.filenames[i]);
1830		if (err) {
1831			fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
1832			return err;
1833		}
1834	}
1835
1836	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1837
1838	output_prog_stats();
1839
1840	return 0;
1841}
1842
1843static int handle_replay_mode(void)
1844{
1845	struct stat_specs specs = {};
1846	int err;
1847
1848	if (env.filename_cnt != 1) {
1849		fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
1850		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1851		return -EINVAL;
1852	}
1853
1854	err = parse_stats_csv(env.filenames[0], &specs,
1855			      &env.prog_stats, &env.prog_stat_cnt);
1856	if (err) {
1857		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
1858		return err;
1859	}
1860
1861	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1862
1863	output_prog_stats();
1864
1865	return 0;
1866}
1867
1868int main(int argc, char **argv)
1869{
1870	int err = 0, i;
1871
1872	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
1873		return 1;
 
 
 
 
 
1874
1875	if (env.verbose && env.quiet) {
1876		fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
1877		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1878		return 1;
1879	}
1880	if (env.verbose && env.log_level == 0)
1881		env.log_level = 1;
1882
1883	if (env.output_spec.spec_cnt == 0) {
1884		if (env.out_fmt == RESFMT_CSV)
1885			env.output_spec = default_csv_output_spec;
1886		else
1887			env.output_spec = default_output_spec;
1888	}
1889	if (env.sort_spec.spec_cnt == 0)
1890		env.sort_spec = default_sort_spec;
1891
1892	if (env.comparison_mode && env.replay_mode) {
1893		fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
1894		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1895		return 1;
1896	}
1897
1898	if (env.comparison_mode)
1899		err = handle_comparison_mode();
1900	else if (env.replay_mode)
1901		err = handle_replay_mode();
1902	else
1903		err = handle_verif_mode();
1904
1905	free_verif_stats(env.prog_stats, env.prog_stat_cnt);
1906	free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
1907	free(env.join_stats);
1908	for (i = 0; i < env.filename_cnt; i++)
1909		free(env.filenames[i]);
1910	free(env.filenames);
1911	for (i = 0; i < env.allow_filter_cnt; i++) {
1912		free(env.allow_filters[i].any_glob);
1913		free(env.allow_filters[i].file_glob);
1914		free(env.allow_filters[i].prog_glob);
1915	}
1916	free(env.allow_filters);
1917	for (i = 0; i < env.deny_filter_cnt; i++) {
1918		free(env.deny_filters[i].any_glob);
1919		free(env.deny_filters[i].file_glob);
1920		free(env.deny_filters[i].prog_glob);
1921	}
1922	free(env.deny_filters);
1923	return -err;
1924}