Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.15.
   1// SPDX-License-Identifier: LGPL-2.1
   2#define _GNU_SOURCE
   3#include <assert.h>
   4#include <linux/membarrier.h>
   5#include <pthread.h>
   6#include <sched.h>
   7#include <stdatomic.h>
   8#include <stdint.h>
   9#include <stdio.h>
  10#include <stdlib.h>
  11#include <string.h>
  12#include <syscall.h>
  13#include <unistd.h>
  14#include <poll.h>
  15#include <sys/types.h>
  16#include <signal.h>
  17#include <errno.h>
  18#include <stddef.h>
  19
  20static inline pid_t rseq_gettid(void)
  21{
  22	return syscall(__NR_gettid);
  23}
  24
  25#define NR_INJECT	9
  26static int loop_cnt[NR_INJECT + 1];
  27
  28static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
  29static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
  30static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
  31static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
  32static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
  33static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
  34
  35static int opt_modulo, verbose;
  36
  37static int opt_yield, opt_signal, opt_sleep,
  38		opt_disable_rseq, opt_threads = 200,
  39		opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
  40
  41#ifndef RSEQ_SKIP_FASTPATH
  42static long long opt_reps = 5000;
  43#else
  44static long long opt_reps = 100;
  45#endif
  46
  47static __thread __attribute__((tls_model("initial-exec")))
  48unsigned int signals_delivered;
  49
  50#ifndef BENCHMARK
  51
  52static __thread __attribute__((tls_model("initial-exec"), unused))
  53unsigned int yield_mod_cnt, nr_abort;
  54
  55#define printf_verbose(fmt, ...)			\
  56	do {						\
  57		if (verbose)				\
  58			printf(fmt, ## __VA_ARGS__);	\
  59	} while (0)
  60
  61#ifdef __i386__
  62
  63#define INJECT_ASM_REG	"eax"
  64
  65#define RSEQ_INJECT_CLOBBER \
  66	, INJECT_ASM_REG
  67
  68#define RSEQ_INJECT_ASM(n) \
  69	"mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
  70	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
  71	"jz 333f\n\t" \
  72	"222:\n\t" \
  73	"dec %%" INJECT_ASM_REG "\n\t" \
  74	"jnz 222b\n\t" \
  75	"333:\n\t"
  76
  77#elif defined(__x86_64__)
  78
  79#define INJECT_ASM_REG_P	"rax"
  80#define INJECT_ASM_REG		"eax"
  81
  82#define RSEQ_INJECT_CLOBBER \
  83	, INJECT_ASM_REG_P \
  84	, INJECT_ASM_REG
  85
  86#define RSEQ_INJECT_ASM(n) \
  87	"lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
  88	"mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
  89	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
  90	"jz 333f\n\t" \
  91	"222:\n\t" \
  92	"dec %%" INJECT_ASM_REG "\n\t" \
  93	"jnz 222b\n\t" \
  94	"333:\n\t"
  95
  96#elif defined(__s390__)
  97
  98#define RSEQ_INJECT_INPUT \
  99	, [loop_cnt_1]"m"(loop_cnt[1]) \
 100	, [loop_cnt_2]"m"(loop_cnt[2]) \
 101	, [loop_cnt_3]"m"(loop_cnt[3]) \
 102	, [loop_cnt_4]"m"(loop_cnt[4]) \
 103	, [loop_cnt_5]"m"(loop_cnt[5]) \
 104	, [loop_cnt_6]"m"(loop_cnt[6])
 105
 106#define INJECT_ASM_REG	"r12"
 107
 108#define RSEQ_INJECT_CLOBBER \
 109	, INJECT_ASM_REG
 110
 111#define RSEQ_INJECT_ASM(n) \
 112	"l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
 113	"ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
 114	"je 333f\n\t" \
 115	"222:\n\t" \
 116	"ahi %%" INJECT_ASM_REG ", -1\n\t" \
 117	"jnz 222b\n\t" \
 118	"333:\n\t"
 119
 120#elif defined(__ARMEL__)
 121
 122#define RSEQ_INJECT_INPUT \
 123	, [loop_cnt_1]"m"(loop_cnt[1]) \
 124	, [loop_cnt_2]"m"(loop_cnt[2]) \
 125	, [loop_cnt_3]"m"(loop_cnt[3]) \
 126	, [loop_cnt_4]"m"(loop_cnt[4]) \
 127	, [loop_cnt_5]"m"(loop_cnt[5]) \
 128	, [loop_cnt_6]"m"(loop_cnt[6])
 129
 130#define INJECT_ASM_REG	"r4"
 131
 132#define RSEQ_INJECT_CLOBBER \
 133	, INJECT_ASM_REG
 134
 135#define RSEQ_INJECT_ASM(n) \
 136	"ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
 137	"cmp " INJECT_ASM_REG ", #0\n\t" \
 138	"beq 333f\n\t" \
 139	"222:\n\t" \
 140	"subs " INJECT_ASM_REG ", #1\n\t" \
 141	"bne 222b\n\t" \
 142	"333:\n\t"
 143
 144#elif defined(__AARCH64EL__)
 145
 146#define RSEQ_INJECT_INPUT \
 147	, [loop_cnt_1] "Qo" (loop_cnt[1]) \
 148	, [loop_cnt_2] "Qo" (loop_cnt[2]) \
 149	, [loop_cnt_3] "Qo" (loop_cnt[3]) \
 150	, [loop_cnt_4] "Qo" (loop_cnt[4]) \
 151	, [loop_cnt_5] "Qo" (loop_cnt[5]) \
 152	, [loop_cnt_6] "Qo" (loop_cnt[6])
 153
 154#define INJECT_ASM_REG	RSEQ_ASM_TMP_REG32
 155
 156#define RSEQ_INJECT_ASM(n) \
 157	"	ldr	" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n"	\
 158	"	cbz	" INJECT_ASM_REG ", 333f\n"			\
 159	"222:\n"							\
 160	"	sub	" INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n"	\
 161	"	cbnz	" INJECT_ASM_REG ", 222b\n"			\
 162	"333:\n"
 163
 164#elif __PPC__
 165
 166#define RSEQ_INJECT_INPUT \
 167	, [loop_cnt_1]"m"(loop_cnt[1]) \
 168	, [loop_cnt_2]"m"(loop_cnt[2]) \
 169	, [loop_cnt_3]"m"(loop_cnt[3]) \
 170	, [loop_cnt_4]"m"(loop_cnt[4]) \
 171	, [loop_cnt_5]"m"(loop_cnt[5]) \
 172	, [loop_cnt_6]"m"(loop_cnt[6])
 173
 174#define INJECT_ASM_REG	"r18"
 175
 176#define RSEQ_INJECT_CLOBBER \
 177	, INJECT_ASM_REG
 178
 179#define RSEQ_INJECT_ASM(n) \
 180	"lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
 181	"cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
 182	"beq 333f\n\t" \
 183	"222:\n\t" \
 184	"subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
 185	"bne 222b\n\t" \
 186	"333:\n\t"
 187
 188#elif defined(__mips__)
 189
 190#define RSEQ_INJECT_INPUT \
 191	, [loop_cnt_1]"m"(loop_cnt[1]) \
 192	, [loop_cnt_2]"m"(loop_cnt[2]) \
 193	, [loop_cnt_3]"m"(loop_cnt[3]) \
 194	, [loop_cnt_4]"m"(loop_cnt[4]) \
 195	, [loop_cnt_5]"m"(loop_cnt[5]) \
 196	, [loop_cnt_6]"m"(loop_cnt[6])
 197
 198#define INJECT_ASM_REG	"$5"
 199
 200#define RSEQ_INJECT_CLOBBER \
 201	, INJECT_ASM_REG
 202
 203#define RSEQ_INJECT_ASM(n) \
 204	"lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
 205	"beqz " INJECT_ASM_REG ", 333f\n\t" \
 206	"222:\n\t" \
 207	"addiu " INJECT_ASM_REG ", -1\n\t" \
 208	"bnez " INJECT_ASM_REG ", 222b\n\t" \
 209	"333:\n\t"
 210
 211#else
 212#error unsupported target
 213#endif
 214
 215#define RSEQ_INJECT_FAILED \
 216	nr_abort++;
 217
 218#define RSEQ_INJECT_C(n) \
 219{ \
 220	int loc_i, loc_nr_loops = loop_cnt[n]; \
 221	\
 222	for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
 223		rseq_barrier(); \
 224	} \
 225	if (loc_nr_loops == -1 && opt_modulo) { \
 226		if (yield_mod_cnt == opt_modulo - 1) { \
 227			if (opt_sleep > 0) \
 228				poll(NULL, 0, opt_sleep); \
 229			if (opt_yield) \
 230				sched_yield(); \
 231			if (opt_signal) \
 232				raise(SIGUSR1); \
 233			yield_mod_cnt = 0; \
 234		} else { \
 235			yield_mod_cnt++; \
 236		} \
 237	} \
 238}
 239
 240#else
 241
 242#define printf_verbose(fmt, ...)
 243
 244#endif /* BENCHMARK */
 245
 246#include "rseq.h"
 247
 248struct percpu_lock_entry {
 249	intptr_t v;
 250} __attribute__((aligned(128)));
 251
 252struct percpu_lock {
 253	struct percpu_lock_entry c[CPU_SETSIZE];
 254};
 255
 256struct test_data_entry {
 257	intptr_t count;
 258} __attribute__((aligned(128)));
 259
 260struct spinlock_test_data {
 261	struct percpu_lock lock;
 262	struct test_data_entry c[CPU_SETSIZE];
 263};
 264
 265struct spinlock_thread_test_data {
 266	struct spinlock_test_data *data;
 267	long long reps;
 268	int reg;
 269};
 270
 271struct inc_test_data {
 272	struct test_data_entry c[CPU_SETSIZE];
 273};
 274
 275struct inc_thread_test_data {
 276	struct inc_test_data *data;
 277	long long reps;
 278	int reg;
 279};
 280
 281struct percpu_list_node {
 282	intptr_t data;
 283	struct percpu_list_node *next;
 284};
 285
 286struct percpu_list_entry {
 287	struct percpu_list_node *head;
 288} __attribute__((aligned(128)));
 289
 290struct percpu_list {
 291	struct percpu_list_entry c[CPU_SETSIZE];
 292};
 293
 294#define BUFFER_ITEM_PER_CPU	100
 295
 296struct percpu_buffer_node {
 297	intptr_t data;
 298};
 299
 300struct percpu_buffer_entry {
 301	intptr_t offset;
 302	intptr_t buflen;
 303	struct percpu_buffer_node **array;
 304} __attribute__((aligned(128)));
 305
 306struct percpu_buffer {
 307	struct percpu_buffer_entry c[CPU_SETSIZE];
 308};
 309
 310#define MEMCPY_BUFFER_ITEM_PER_CPU	100
 311
 312struct percpu_memcpy_buffer_node {
 313	intptr_t data1;
 314	uint64_t data2;
 315};
 316
 317struct percpu_memcpy_buffer_entry {
 318	intptr_t offset;
 319	intptr_t buflen;
 320	struct percpu_memcpy_buffer_node *array;
 321} __attribute__((aligned(128)));
 322
 323struct percpu_memcpy_buffer {
 324	struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
 325};
 326
 327/* A simple percpu spinlock. Grabs lock on current cpu. */
 328static int rseq_this_cpu_lock(struct percpu_lock *lock)
 329{
 330	int cpu;
 331
 332	for (;;) {
 333		int ret;
 334
 335		cpu = rseq_cpu_start();
 336		ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
 337					 0, 1, cpu);
 338		if (rseq_likely(!ret))
 339			break;
 340		/* Retry if comparison fails or rseq aborts. */
 341	}
 342	/*
 343	 * Acquire semantic when taking lock after control dependency.
 344	 * Matches rseq_smp_store_release().
 345	 */
 346	rseq_smp_acquire__after_ctrl_dep();
 347	return cpu;
 348}
 349
 350static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
 351{
 352	assert(lock->c[cpu].v == 1);
 353	/*
 354	 * Release lock, with release semantic. Matches
 355	 * rseq_smp_acquire__after_ctrl_dep().
 356	 */
 357	rseq_smp_store_release(&lock->c[cpu].v, 0);
 358}
 359
 360void *test_percpu_spinlock_thread(void *arg)
 361{
 362	struct spinlock_thread_test_data *thread_data = arg;
 363	struct spinlock_test_data *data = thread_data->data;
 364	long long i, reps;
 365
 366	if (!opt_disable_rseq && thread_data->reg &&
 367	    rseq_register_current_thread())
 368		abort();
 369	reps = thread_data->reps;
 370	for (i = 0; i < reps; i++) {
 371		int cpu = rseq_cpu_start();
 372
 373		cpu = rseq_this_cpu_lock(&data->lock);
 374		data->c[cpu].count++;
 375		rseq_percpu_unlock(&data->lock, cpu);
 376#ifndef BENCHMARK
 377		if (i != 0 && !(i % (reps / 10)))
 378			printf_verbose("tid %d: count %lld\n",
 379				       (int) rseq_gettid(), i);
 380#endif
 381	}
 382	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
 383		       (int) rseq_gettid(), nr_abort, signals_delivered);
 384	if (!opt_disable_rseq && thread_data->reg &&
 385	    rseq_unregister_current_thread())
 386		abort();
 387	return NULL;
 388}
 389
 390/*
 391 * A simple test which implements a sharded counter using a per-cpu
 392 * lock.  Obviously real applications might prefer to simply use a
 393 * per-cpu increment; however, this is reasonable for a test and the
 394 * lock can be extended to synchronize more complicated operations.
 395 */
 396void test_percpu_spinlock(void)
 397{
 398	const int num_threads = opt_threads;
 399	int i, ret;
 400	uint64_t sum;
 401	pthread_t test_threads[num_threads];
 402	struct spinlock_test_data data;
 403	struct spinlock_thread_test_data thread_data[num_threads];
 404
 405	memset(&data, 0, sizeof(data));
 406	for (i = 0; i < num_threads; i++) {
 407		thread_data[i].reps = opt_reps;
 408		if (opt_disable_mod <= 0 || (i % opt_disable_mod))
 409			thread_data[i].reg = 1;
 410		else
 411			thread_data[i].reg = 0;
 412		thread_data[i].data = &data;
 413		ret = pthread_create(&test_threads[i], NULL,
 414				     test_percpu_spinlock_thread,
 415				     &thread_data[i]);
 416		if (ret) {
 417			errno = ret;
 418			perror("pthread_create");
 419			abort();
 420		}
 421	}
 422
 423	for (i = 0; i < num_threads; i++) {
 424		ret = pthread_join(test_threads[i], NULL);
 425		if (ret) {
 426			errno = ret;
 427			perror("pthread_join");
 428			abort();
 429		}
 430	}
 431
 432	sum = 0;
 433	for (i = 0; i < CPU_SETSIZE; i++)
 434		sum += data.c[i].count;
 435
 436	assert(sum == (uint64_t)opt_reps * num_threads);
 437}
 438
 439void *test_percpu_inc_thread(void *arg)
 440{
 441	struct inc_thread_test_data *thread_data = arg;
 442	struct inc_test_data *data = thread_data->data;
 443	long long i, reps;
 444
 445	if (!opt_disable_rseq && thread_data->reg &&
 446	    rseq_register_current_thread())
 447		abort();
 448	reps = thread_data->reps;
 449	for (i = 0; i < reps; i++) {
 450		int ret;
 451
 452		do {
 453			int cpu;
 454
 455			cpu = rseq_cpu_start();
 456			ret = rseq_addv(&data->c[cpu].count, 1, cpu);
 457		} while (rseq_unlikely(ret));
 458#ifndef BENCHMARK
 459		if (i != 0 && !(i % (reps / 10)))
 460			printf_verbose("tid %d: count %lld\n",
 461				       (int) rseq_gettid(), i);
 462#endif
 463	}
 464	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
 465		       (int) rseq_gettid(), nr_abort, signals_delivered);
 466	if (!opt_disable_rseq && thread_data->reg &&
 467	    rseq_unregister_current_thread())
 468		abort();
 469	return NULL;
 470}
 471
 472void test_percpu_inc(void)
 473{
 474	const int num_threads = opt_threads;
 475	int i, ret;
 476	uint64_t sum;
 477	pthread_t test_threads[num_threads];
 478	struct inc_test_data data;
 479	struct inc_thread_test_data thread_data[num_threads];
 480
 481	memset(&data, 0, sizeof(data));
 482	for (i = 0; i < num_threads; i++) {
 483		thread_data[i].reps = opt_reps;
 484		if (opt_disable_mod <= 0 || (i % opt_disable_mod))
 485			thread_data[i].reg = 1;
 486		else
 487			thread_data[i].reg = 0;
 488		thread_data[i].data = &data;
 489		ret = pthread_create(&test_threads[i], NULL,
 490				     test_percpu_inc_thread,
 491				     &thread_data[i]);
 492		if (ret) {
 493			errno = ret;
 494			perror("pthread_create");
 495			abort();
 496		}
 497	}
 498
 499	for (i = 0; i < num_threads; i++) {
 500		ret = pthread_join(test_threads[i], NULL);
 501		if (ret) {
 502			errno = ret;
 503			perror("pthread_join");
 504			abort();
 505		}
 506	}
 507
 508	sum = 0;
 509	for (i = 0; i < CPU_SETSIZE; i++)
 510		sum += data.c[i].count;
 511
 512	assert(sum == (uint64_t)opt_reps * num_threads);
 513}
 514
 515void this_cpu_list_push(struct percpu_list *list,
 516			struct percpu_list_node *node,
 517			int *_cpu)
 518{
 519	int cpu;
 520
 521	for (;;) {
 522		intptr_t *targetptr, newval, expect;
 523		int ret;
 524
 525		cpu = rseq_cpu_start();
 526		/* Load list->c[cpu].head with single-copy atomicity. */
 527		expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
 528		newval = (intptr_t)node;
 529		targetptr = (intptr_t *)&list->c[cpu].head;
 530		node->next = (struct percpu_list_node *)expect;
 531		ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
 532		if (rseq_likely(!ret))
 533			break;
 534		/* Retry if comparison fails or rseq aborts. */
 535	}
 536	if (_cpu)
 537		*_cpu = cpu;
 538}
 539
 540/*
 541 * Unlike a traditional lock-less linked list; the availability of a
 542 * rseq primitive allows us to implement pop without concerns over
 543 * ABA-type races.
 544 */
 545struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
 546					   int *_cpu)
 547{
 548	struct percpu_list_node *node = NULL;
 549	int cpu;
 550
 551	for (;;) {
 552		struct percpu_list_node *head;
 553		intptr_t *targetptr, expectnot, *load;
 554		off_t offset;
 555		int ret;
 556
 557		cpu = rseq_cpu_start();
 558		targetptr = (intptr_t *)&list->c[cpu].head;
 559		expectnot = (intptr_t)NULL;
 560		offset = offsetof(struct percpu_list_node, next);
 561		load = (intptr_t *)&head;
 562		ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
 563						   offset, load, cpu);
 564		if (rseq_likely(!ret)) {
 565			node = head;
 566			break;
 567		}
 568		if (ret > 0)
 569			break;
 570		/* Retry if rseq aborts. */
 571	}
 572	if (_cpu)
 573		*_cpu = cpu;
 574	return node;
 575}
 576
 577/*
 578 * __percpu_list_pop is not safe against concurrent accesses. Should
 579 * only be used on lists that are not concurrently modified.
 580 */
 581struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
 582{
 583	struct percpu_list_node *node;
 584
 585	node = list->c[cpu].head;
 586	if (!node)
 587		return NULL;
 588	list->c[cpu].head = node->next;
 589	return node;
 590}
 591
 592void *test_percpu_list_thread(void *arg)
 593{
 594	long long i, reps;
 595	struct percpu_list *list = (struct percpu_list *)arg;
 596
 597	if (!opt_disable_rseq && rseq_register_current_thread())
 598		abort();
 599
 600	reps = opt_reps;
 601	for (i = 0; i < reps; i++) {
 602		struct percpu_list_node *node;
 603
 604		node = this_cpu_list_pop(list, NULL);
 605		if (opt_yield)
 606			sched_yield();  /* encourage shuffling */
 607		if (node)
 608			this_cpu_list_push(list, node, NULL);
 609	}
 610
 611	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
 612		       (int) rseq_gettid(), nr_abort, signals_delivered);
 613	if (!opt_disable_rseq && rseq_unregister_current_thread())
 614		abort();
 615
 616	return NULL;
 617}
 618
 619/* Simultaneous modification to a per-cpu linked list from many threads.  */
 620void test_percpu_list(void)
 621{
 622	const int num_threads = opt_threads;
 623	int i, j, ret;
 624	uint64_t sum = 0, expected_sum = 0;
 625	struct percpu_list list;
 626	pthread_t test_threads[num_threads];
 627	cpu_set_t allowed_cpus;
 628
 629	memset(&list, 0, sizeof(list));
 630
 631	/* Generate list entries for every usable cpu. */
 632	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
 633	for (i = 0; i < CPU_SETSIZE; i++) {
 634		if (!CPU_ISSET(i, &allowed_cpus))
 635			continue;
 636		for (j = 1; j <= 100; j++) {
 637			struct percpu_list_node *node;
 638
 639			expected_sum += j;
 640
 641			node = malloc(sizeof(*node));
 642			assert(node);
 643			node->data = j;
 644			node->next = list.c[i].head;
 645			list.c[i].head = node;
 646		}
 647	}
 648
 649	for (i = 0; i < num_threads; i++) {
 650		ret = pthread_create(&test_threads[i], NULL,
 651				     test_percpu_list_thread, &list);
 652		if (ret) {
 653			errno = ret;
 654			perror("pthread_create");
 655			abort();
 656		}
 657	}
 658
 659	for (i = 0; i < num_threads; i++) {
 660		ret = pthread_join(test_threads[i], NULL);
 661		if (ret) {
 662			errno = ret;
 663			perror("pthread_join");
 664			abort();
 665		}
 666	}
 667
 668	for (i = 0; i < CPU_SETSIZE; i++) {
 669		struct percpu_list_node *node;
 670
 671		if (!CPU_ISSET(i, &allowed_cpus))
 672			continue;
 673
 674		while ((node = __percpu_list_pop(&list, i))) {
 675			sum += node->data;
 676			free(node);
 677		}
 678	}
 679
 680	/*
 681	 * All entries should now be accounted for (unless some external
 682	 * actor is interfering with our allowed affinity while this
 683	 * test is running).
 684	 */
 685	assert(sum == expected_sum);
 686}
 687
 688bool this_cpu_buffer_push(struct percpu_buffer *buffer,
 689			  struct percpu_buffer_node *node,
 690			  int *_cpu)
 691{
 692	bool result = false;
 693	int cpu;
 694
 695	for (;;) {
 696		intptr_t *targetptr_spec, newval_spec;
 697		intptr_t *targetptr_final, newval_final;
 698		intptr_t offset;
 699		int ret;
 700
 701		cpu = rseq_cpu_start();
 702		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
 703		if (offset == buffer->c[cpu].buflen)
 704			break;
 705		newval_spec = (intptr_t)node;
 706		targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
 707		newval_final = offset + 1;
 708		targetptr_final = &buffer->c[cpu].offset;
 709		if (opt_mb)
 710			ret = rseq_cmpeqv_trystorev_storev_release(
 711				targetptr_final, offset, targetptr_spec,
 712				newval_spec, newval_final, cpu);
 713		else
 714			ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
 715				offset, targetptr_spec, newval_spec,
 716				newval_final, cpu);
 717		if (rseq_likely(!ret)) {
 718			result = true;
 719			break;
 720		}
 721		/* Retry if comparison fails or rseq aborts. */
 722	}
 723	if (_cpu)
 724		*_cpu = cpu;
 725	return result;
 726}
 727
 728struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
 729					       int *_cpu)
 730{
 731	struct percpu_buffer_node *head;
 732	int cpu;
 733
 734	for (;;) {
 735		intptr_t *targetptr, newval;
 736		intptr_t offset;
 737		int ret;
 738
 739		cpu = rseq_cpu_start();
 740		/* Load offset with single-copy atomicity. */
 741		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
 742		if (offset == 0) {
 743			head = NULL;
 744			break;
 745		}
 746		head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
 747		newval = offset - 1;
 748		targetptr = (intptr_t *)&buffer->c[cpu].offset;
 749		ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
 750			(intptr_t *)&buffer->c[cpu].array[offset - 1],
 751			(intptr_t)head, newval, cpu);
 752		if (rseq_likely(!ret))
 753			break;
 754		/* Retry if comparison fails or rseq aborts. */
 755	}
 756	if (_cpu)
 757		*_cpu = cpu;
 758	return head;
 759}
 760
 761/*
 762 * __percpu_buffer_pop is not safe against concurrent accesses. Should
 763 * only be used on buffers that are not concurrently modified.
 764 */
 765struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
 766					       int cpu)
 767{
 768	struct percpu_buffer_node *head;
 769	intptr_t offset;
 770
 771	offset = buffer->c[cpu].offset;
 772	if (offset == 0)
 773		return NULL;
 774	head = buffer->c[cpu].array[offset - 1];
 775	buffer->c[cpu].offset = offset - 1;
 776	return head;
 777}
 778
 779void *test_percpu_buffer_thread(void *arg)
 780{
 781	long long i, reps;
 782	struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
 783
 784	if (!opt_disable_rseq && rseq_register_current_thread())
 785		abort();
 786
 787	reps = opt_reps;
 788	for (i = 0; i < reps; i++) {
 789		struct percpu_buffer_node *node;
 790
 791		node = this_cpu_buffer_pop(buffer, NULL);
 792		if (opt_yield)
 793			sched_yield();  /* encourage shuffling */
 794		if (node) {
 795			if (!this_cpu_buffer_push(buffer, node, NULL)) {
 796				/* Should increase buffer size. */
 797				abort();
 798			}
 799		}
 800	}
 801
 802	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
 803		       (int) rseq_gettid(), nr_abort, signals_delivered);
 804	if (!opt_disable_rseq && rseq_unregister_current_thread())
 805		abort();
 806
 807	return NULL;
 808}
 809
 810/* Simultaneous modification to a per-cpu buffer from many threads.  */
 811void test_percpu_buffer(void)
 812{
 813	const int num_threads = opt_threads;
 814	int i, j, ret;
 815	uint64_t sum = 0, expected_sum = 0;
 816	struct percpu_buffer buffer;
 817	pthread_t test_threads[num_threads];
 818	cpu_set_t allowed_cpus;
 819
 820	memset(&buffer, 0, sizeof(buffer));
 821
 822	/* Generate list entries for every usable cpu. */
 823	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
 824	for (i = 0; i < CPU_SETSIZE; i++) {
 825		if (!CPU_ISSET(i, &allowed_cpus))
 826			continue;
 827		/* Worse-case is every item in same CPU. */
 828		buffer.c[i].array =
 829			malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
 830			       BUFFER_ITEM_PER_CPU);
 831		assert(buffer.c[i].array);
 832		buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
 833		for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
 834			struct percpu_buffer_node *node;
 835
 836			expected_sum += j;
 837
 838			/*
 839			 * We could theoretically put the word-sized
 840			 * "data" directly in the buffer. However, we
 841			 * want to model objects that would not fit
 842			 * within a single word, so allocate an object
 843			 * for each node.
 844			 */
 845			node = malloc(sizeof(*node));
 846			assert(node);
 847			node->data = j;
 848			buffer.c[i].array[j - 1] = node;
 849			buffer.c[i].offset++;
 850		}
 851	}
 852
 853	for (i = 0; i < num_threads; i++) {
 854		ret = pthread_create(&test_threads[i], NULL,
 855				     test_percpu_buffer_thread, &buffer);
 856		if (ret) {
 857			errno = ret;
 858			perror("pthread_create");
 859			abort();
 860		}
 861	}
 862
 863	for (i = 0; i < num_threads; i++) {
 864		ret = pthread_join(test_threads[i], NULL);
 865		if (ret) {
 866			errno = ret;
 867			perror("pthread_join");
 868			abort();
 869		}
 870	}
 871
 872	for (i = 0; i < CPU_SETSIZE; i++) {
 873		struct percpu_buffer_node *node;
 874
 875		if (!CPU_ISSET(i, &allowed_cpus))
 876			continue;
 877
 878		while ((node = __percpu_buffer_pop(&buffer, i))) {
 879			sum += node->data;
 880			free(node);
 881		}
 882		free(buffer.c[i].array);
 883	}
 884
 885	/*
 886	 * All entries should now be accounted for (unless some external
 887	 * actor is interfering with our allowed affinity while this
 888	 * test is running).
 889	 */
 890	assert(sum == expected_sum);
 891}
 892
 893bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
 894				 struct percpu_memcpy_buffer_node item,
 895				 int *_cpu)
 896{
 897	bool result = false;
 898	int cpu;
 899
 900	for (;;) {
 901		intptr_t *targetptr_final, newval_final, offset;
 902		char *destptr, *srcptr;
 903		size_t copylen;
 904		int ret;
 905
 906		cpu = rseq_cpu_start();
 907		/* Load offset with single-copy atomicity. */
 908		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
 909		if (offset == buffer->c[cpu].buflen)
 910			break;
 911		destptr = (char *)&buffer->c[cpu].array[offset];
 912		srcptr = (char *)&item;
 913		/* copylen must be <= 4kB. */
 914		copylen = sizeof(item);
 915		newval_final = offset + 1;
 916		targetptr_final = &buffer->c[cpu].offset;
 917		if (opt_mb)
 918			ret = rseq_cmpeqv_trymemcpy_storev_release(
 919				targetptr_final, offset,
 920				destptr, srcptr, copylen,
 921				newval_final, cpu);
 922		else
 923			ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
 924				offset, destptr, srcptr, copylen,
 925				newval_final, cpu);
 926		if (rseq_likely(!ret)) {
 927			result = true;
 928			break;
 929		}
 930		/* Retry if comparison fails or rseq aborts. */
 931	}
 932	if (_cpu)
 933		*_cpu = cpu;
 934	return result;
 935}
 936
 937bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
 938				struct percpu_memcpy_buffer_node *item,
 939				int *_cpu)
 940{
 941	bool result = false;
 942	int cpu;
 943
 944	for (;;) {
 945		intptr_t *targetptr_final, newval_final, offset;
 946		char *destptr, *srcptr;
 947		size_t copylen;
 948		int ret;
 949
 950		cpu = rseq_cpu_start();
 951		/* Load offset with single-copy atomicity. */
 952		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
 953		if (offset == 0)
 954			break;
 955		destptr = (char *)item;
 956		srcptr = (char *)&buffer->c[cpu].array[offset - 1];
 957		/* copylen must be <= 4kB. */
 958		copylen = sizeof(*item);
 959		newval_final = offset - 1;
 960		targetptr_final = &buffer->c[cpu].offset;
 961		ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
 962			offset, destptr, srcptr, copylen,
 963			newval_final, cpu);
 964		if (rseq_likely(!ret)) {
 965			result = true;
 966			break;
 967		}
 968		/* Retry if comparison fails or rseq aborts. */
 969	}
 970	if (_cpu)
 971		*_cpu = cpu;
 972	return result;
 973}
 974
 975/*
 976 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
 977 * only be used on buffers that are not concurrently modified.
 978 */
 979bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
 980				struct percpu_memcpy_buffer_node *item,
 981				int cpu)
 982{
 983	intptr_t offset;
 984
 985	offset = buffer->c[cpu].offset;
 986	if (offset == 0)
 987		return false;
 988	memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
 989	buffer->c[cpu].offset = offset - 1;
 990	return true;
 991}
 992
 993void *test_percpu_memcpy_buffer_thread(void *arg)
 994{
 995	long long i, reps;
 996	struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
 997
 998	if (!opt_disable_rseq && rseq_register_current_thread())
 999		abort();
1000
1001	reps = opt_reps;
1002	for (i = 0; i < reps; i++) {
1003		struct percpu_memcpy_buffer_node item;
1004		bool result;
1005
1006		result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1007		if (opt_yield)
1008			sched_yield();  /* encourage shuffling */
1009		if (result) {
1010			if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1011				/* Should increase buffer size. */
1012				abort();
1013			}
1014		}
1015	}
1016
1017	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1018		       (int) rseq_gettid(), nr_abort, signals_delivered);
1019	if (!opt_disable_rseq && rseq_unregister_current_thread())
1020		abort();
1021
1022	return NULL;
1023}
1024
1025/* Simultaneous modification to a per-cpu buffer from many threads.  */
1026void test_percpu_memcpy_buffer(void)
1027{
1028	const int num_threads = opt_threads;
1029	int i, j, ret;
1030	uint64_t sum = 0, expected_sum = 0;
1031	struct percpu_memcpy_buffer buffer;
1032	pthread_t test_threads[num_threads];
1033	cpu_set_t allowed_cpus;
1034
1035	memset(&buffer, 0, sizeof(buffer));
1036
1037	/* Generate list entries for every usable cpu. */
1038	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1039	for (i = 0; i < CPU_SETSIZE; i++) {
1040		if (!CPU_ISSET(i, &allowed_cpus))
1041			continue;
1042		/* Worse-case is every item in same CPU. */
1043		buffer.c[i].array =
1044			malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1045			       MEMCPY_BUFFER_ITEM_PER_CPU);
1046		assert(buffer.c[i].array);
1047		buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1048		for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1049			expected_sum += 2 * j + 1;
1050
1051			/*
1052			 * We could theoretically put the word-sized
1053			 * "data" directly in the buffer. However, we
1054			 * want to model objects that would not fit
1055			 * within a single word, so allocate an object
1056			 * for each node.
1057			 */
1058			buffer.c[i].array[j - 1].data1 = j;
1059			buffer.c[i].array[j - 1].data2 = j + 1;
1060			buffer.c[i].offset++;
1061		}
1062	}
1063
1064	for (i = 0; i < num_threads; i++) {
1065		ret = pthread_create(&test_threads[i], NULL,
1066				     test_percpu_memcpy_buffer_thread,
1067				     &buffer);
1068		if (ret) {
1069			errno = ret;
1070			perror("pthread_create");
1071			abort();
1072		}
1073	}
1074
1075	for (i = 0; i < num_threads; i++) {
1076		ret = pthread_join(test_threads[i], NULL);
1077		if (ret) {
1078			errno = ret;
1079			perror("pthread_join");
1080			abort();
1081		}
1082	}
1083
1084	for (i = 0; i < CPU_SETSIZE; i++) {
1085		struct percpu_memcpy_buffer_node item;
1086
1087		if (!CPU_ISSET(i, &allowed_cpus))
1088			continue;
1089
1090		while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1091			sum += item.data1;
1092			sum += item.data2;
1093		}
1094		free(buffer.c[i].array);
1095	}
1096
1097	/*
1098	 * All entries should now be accounted for (unless some external
1099	 * actor is interfering with our allowed affinity while this
1100	 * test is running).
1101	 */
1102	assert(sum == expected_sum);
1103}
1104
1105static void test_signal_interrupt_handler(int signo)
1106{
1107	signals_delivered++;
1108}
1109
1110static int set_signal_handler(void)
1111{
1112	int ret = 0;
1113	struct sigaction sa;
1114	sigset_t sigset;
1115
1116	ret = sigemptyset(&sigset);
1117	if (ret < 0) {
1118		perror("sigemptyset");
1119		return ret;
1120	}
1121
1122	sa.sa_handler = test_signal_interrupt_handler;
1123	sa.sa_mask = sigset;
1124	sa.sa_flags = 0;
1125	ret = sigaction(SIGUSR1, &sa, NULL);
1126	if (ret < 0) {
1127		perror("sigaction");
1128		return ret;
1129	}
1130
1131	printf_verbose("Signal handler set for SIGUSR1\n");
1132
1133	return ret;
1134}
1135
1136/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1137#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
1138struct test_membarrier_thread_args {
1139	int stop;
1140	intptr_t percpu_list_ptr;
1141};
1142
1143/* Worker threads modify data in their "active" percpu lists. */
1144void *test_membarrier_worker_thread(void *arg)
1145{
1146	struct test_membarrier_thread_args *args =
1147		(struct test_membarrier_thread_args *)arg;
1148	const int iters = opt_reps;
1149	int i;
1150
1151	if (rseq_register_current_thread()) {
1152		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1153			errno, strerror(errno));
1154		abort();
1155	}
1156
1157	/* Wait for initialization. */
1158	while (!atomic_load(&args->percpu_list_ptr)) {}
1159
1160	for (i = 0; i < iters; ++i) {
1161		int ret;
1162
1163		do {
1164			int cpu = rseq_cpu_start();
1165
1166			ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
1167				sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1168		} while (rseq_unlikely(ret));
1169	}
1170
1171	if (rseq_unregister_current_thread()) {
1172		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1173			errno, strerror(errno));
1174		abort();
1175	}
1176	return NULL;
1177}
1178
1179void test_membarrier_init_percpu_list(struct percpu_list *list)
1180{
1181	int i;
1182
1183	memset(list, 0, sizeof(*list));
1184	for (i = 0; i < CPU_SETSIZE; i++) {
1185		struct percpu_list_node *node;
1186
1187		node = malloc(sizeof(*node));
1188		assert(node);
1189		node->data = 0;
1190		node->next = NULL;
1191		list->c[i].head = node;
1192	}
1193}
1194
1195void test_membarrier_free_percpu_list(struct percpu_list *list)
1196{
1197	int i;
1198
1199	for (i = 0; i < CPU_SETSIZE; i++)
1200		free(list->c[i].head);
1201}
1202
1203static int sys_membarrier(int cmd, int flags, int cpu_id)
1204{
1205	return syscall(__NR_membarrier, cmd, flags, cpu_id);
1206}
1207
1208/*
1209 * The manager thread swaps per-cpu lists that worker threads see,
1210 * and validates that there are no unexpected modifications.
1211 */
1212void *test_membarrier_manager_thread(void *arg)
1213{
1214	struct test_membarrier_thread_args *args =
1215		(struct test_membarrier_thread_args *)arg;
1216	struct percpu_list list_a, list_b;
1217	intptr_t expect_a = 0, expect_b = 0;
1218	int cpu_a = 0, cpu_b = 0;
1219
1220	if (rseq_register_current_thread()) {
1221		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1222			errno, strerror(errno));
1223		abort();
1224	}
1225
1226	/* Init lists. */
1227	test_membarrier_init_percpu_list(&list_a);
1228	test_membarrier_init_percpu_list(&list_b);
1229
1230	atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1231
1232	while (!atomic_load(&args->stop)) {
1233		/* list_a is "active". */
1234		cpu_a = rand() % CPU_SETSIZE;
1235		/*
1236		 * As list_b is "inactive", we should never see changes
1237		 * to list_b.
1238		 */
1239		if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
1240			fprintf(stderr, "Membarrier test failed\n");
1241			abort();
1242		}
1243
1244		/* Make list_b "active". */
1245		atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
1246		if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1247					MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
1248				errno != ENXIO /* missing CPU */) {
1249			perror("sys_membarrier");
1250			abort();
1251		}
1252		/*
1253		 * Cpu A should now only modify list_b, so the values
1254		 * in list_a should be stable.
1255		 */
1256		expect_a = atomic_load(&list_a.c[cpu_a].head->data);
1257
1258		cpu_b = rand() % CPU_SETSIZE;
1259		/*
1260		 * As list_a is "inactive", we should never see changes
1261		 * to list_a.
1262		 */
1263		if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
1264			fprintf(stderr, "Membarrier test failed\n");
1265			abort();
1266		}
1267
1268		/* Make list_a "active". */
1269		atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1270		if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1271					MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
1272				errno != ENXIO /* missing CPU*/) {
1273			perror("sys_membarrier");
1274			abort();
1275		}
1276		/* Remember a value from list_b. */
1277		expect_b = atomic_load(&list_b.c[cpu_b].head->data);
1278	}
1279
1280	test_membarrier_free_percpu_list(&list_a);
1281	test_membarrier_free_percpu_list(&list_b);
1282
1283	if (rseq_unregister_current_thread()) {
1284		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1285			errno, strerror(errno));
1286		abort();
1287	}
1288	return NULL;
1289}
1290
1291void test_membarrier(void)
1292{
1293	const int num_threads = opt_threads;
1294	struct test_membarrier_thread_args thread_args;
1295	pthread_t worker_threads[num_threads];
1296	pthread_t manager_thread;
1297	int i, ret;
1298
1299	if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1300		perror("sys_membarrier");
1301		abort();
1302	}
1303
1304	thread_args.stop = 0;
1305	thread_args.percpu_list_ptr = 0;
1306	ret = pthread_create(&manager_thread, NULL,
1307			test_membarrier_manager_thread, &thread_args);
1308	if (ret) {
1309		errno = ret;
1310		perror("pthread_create");
1311		abort();
1312	}
1313
1314	for (i = 0; i < num_threads; i++) {
1315		ret = pthread_create(&worker_threads[i], NULL,
1316				test_membarrier_worker_thread, &thread_args);
1317		if (ret) {
1318			errno = ret;
1319			perror("pthread_create");
1320			abort();
1321		}
1322	}
1323
1324
1325	for (i = 0; i < num_threads; i++) {
1326		ret = pthread_join(worker_threads[i], NULL);
1327		if (ret) {
1328			errno = ret;
1329			perror("pthread_join");
1330			abort();
1331		}
1332	}
1333
1334	atomic_store(&thread_args.stop, 1);
1335	ret = pthread_join(manager_thread, NULL);
1336	if (ret) {
1337		errno = ret;
1338		perror("pthread_join");
1339		abort();
1340	}
1341}
1342#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
1343void test_membarrier(void)
1344{
1345	fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1346			"Skipping membarrier test.\n");
1347}
1348#endif
1349
1350static void show_usage(int argc, char **argv)
1351{
1352	printf("Usage : %s <OPTIONS>\n",
1353		argv[0]);
1354	printf("OPTIONS:\n");
1355	printf("	[-1 loops] Number of loops for delay injection 1\n");
1356	printf("	[-2 loops] Number of loops for delay injection 2\n");
1357	printf("	[-3 loops] Number of loops for delay injection 3\n");
1358	printf("	[-4 loops] Number of loops for delay injection 4\n");
1359	printf("	[-5 loops] Number of loops for delay injection 5\n");
1360	printf("	[-6 loops] Number of loops for delay injection 6\n");
1361	printf("	[-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1362	printf("	[-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1363	printf("	[-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1364	printf("	[-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1365	printf("	[-y] Yield\n");
1366	printf("	[-k] Kill thread with signal\n");
1367	printf("	[-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1368	printf("	[-t N] Number of threads (default 200)\n");
1369	printf("	[-r N] Number of repetitions per thread (default 5000)\n");
1370	printf("	[-d] Disable rseq system call (no initialization)\n");
1371	printf("	[-D M] Disable rseq for each M threads\n");
1372	printf("	[-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1373	printf("	[-M] Push into buffer and memcpy buffer with memory barriers.\n");
1374	printf("	[-v] Verbose output.\n");
1375	printf("	[-h] Show this help.\n");
1376	printf("\n");
1377}
1378
1379int main(int argc, char **argv)
1380{
1381	int i;
1382
1383	for (i = 1; i < argc; i++) {
1384		if (argv[i][0] != '-')
1385			continue;
1386		switch (argv[i][1]) {
1387		case '1':
1388		case '2':
1389		case '3':
1390		case '4':
1391		case '5':
1392		case '6':
1393		case '7':
1394		case '8':
1395		case '9':
1396			if (argc < i + 2) {
1397				show_usage(argc, argv);
1398				goto error;
1399			}
1400			loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1401			i++;
1402			break;
1403		case 'm':
1404			if (argc < i + 2) {
1405				show_usage(argc, argv);
1406				goto error;
1407			}
1408			opt_modulo = atol(argv[i + 1]);
1409			if (opt_modulo < 0) {
1410				show_usage(argc, argv);
1411				goto error;
1412			}
1413			i++;
1414			break;
1415		case 's':
1416			if (argc < i + 2) {
1417				show_usage(argc, argv);
1418				goto error;
1419			}
1420			opt_sleep = atol(argv[i + 1]);
1421			if (opt_sleep < 0) {
1422				show_usage(argc, argv);
1423				goto error;
1424			}
1425			i++;
1426			break;
1427		case 'y':
1428			opt_yield = 1;
1429			break;
1430		case 'k':
1431			opt_signal = 1;
1432			break;
1433		case 'd':
1434			opt_disable_rseq = 1;
1435			break;
1436		case 'D':
1437			if (argc < i + 2) {
1438				show_usage(argc, argv);
1439				goto error;
1440			}
1441			opt_disable_mod = atol(argv[i + 1]);
1442			if (opt_disable_mod < 0) {
1443				show_usage(argc, argv);
1444				goto error;
1445			}
1446			i++;
1447			break;
1448		case 't':
1449			if (argc < i + 2) {
1450				show_usage(argc, argv);
1451				goto error;
1452			}
1453			opt_threads = atol(argv[i + 1]);
1454			if (opt_threads < 0) {
1455				show_usage(argc, argv);
1456				goto error;
1457			}
1458			i++;
1459			break;
1460		case 'r':
1461			if (argc < i + 2) {
1462				show_usage(argc, argv);
1463				goto error;
1464			}
1465			opt_reps = atoll(argv[i + 1]);
1466			if (opt_reps < 0) {
1467				show_usage(argc, argv);
1468				goto error;
1469			}
1470			i++;
1471			break;
1472		case 'h':
1473			show_usage(argc, argv);
1474			goto end;
1475		case 'T':
1476			if (argc < i + 2) {
1477				show_usage(argc, argv);
1478				goto error;
1479			}
1480			opt_test = *argv[i + 1];
1481			switch (opt_test) {
1482			case 's':
1483			case 'l':
1484			case 'i':
1485			case 'b':
1486			case 'm':
1487			case 'r':
1488				break;
1489			default:
1490				show_usage(argc, argv);
1491				goto error;
1492			}
1493			i++;
1494			break;
1495		case 'v':
1496			verbose = 1;
1497			break;
1498		case 'M':
1499			opt_mb = 1;
1500			break;
1501		default:
1502			show_usage(argc, argv);
1503			goto error;
1504		}
1505	}
1506
1507	loop_cnt_1 = loop_cnt[1];
1508	loop_cnt_2 = loop_cnt[2];
1509	loop_cnt_3 = loop_cnt[3];
1510	loop_cnt_4 = loop_cnt[4];
1511	loop_cnt_5 = loop_cnt[5];
1512	loop_cnt_6 = loop_cnt[6];
1513
1514	if (set_signal_handler())
1515		goto error;
1516
1517	if (!opt_disable_rseq && rseq_register_current_thread())
1518		goto error;
1519	switch (opt_test) {
1520	case 's':
1521		printf_verbose("spinlock\n");
1522		test_percpu_spinlock();
1523		break;
1524	case 'l':
1525		printf_verbose("linked list\n");
1526		test_percpu_list();
1527		break;
1528	case 'b':
1529		printf_verbose("buffer\n");
1530		test_percpu_buffer();
1531		break;
1532	case 'm':
1533		printf_verbose("memcpy buffer\n");
1534		test_percpu_memcpy_buffer();
1535		break;
1536	case 'i':
1537		printf_verbose("counter increment\n");
1538		test_percpu_inc();
1539		break;
1540	case 'r':
1541		printf_verbose("membarrier\n");
1542		test_membarrier();
1543		break;
1544	}
1545	if (!opt_disable_rseq && rseq_unregister_current_thread())
1546		abort();
1547end:
1548	return 0;
1549
1550error:
1551	return -1;
1552}