basic_percpu_ops_test.c - tools/testing/selftests/rseq/basic_percpu_ops_test.c - Linux source code v3.15

Note: File does not exist in v3.15.
  1// SPDX-License-Identifier: LGPL-2.1
  2#define _GNU_SOURCE
  3#include <assert.h>
  4#include <pthread.h>
  5#include <sched.h>
  6#include <stdint.h>
  7#include <stdio.h>
  8#include <stdlib.h>
  9#include <string.h>
 10#include <stddef.h>
 11
 12#include "rseq.h"
 13
 14#define ARRAY_SIZE(arr)	(sizeof(arr) / sizeof((arr)[0]))
 15
 16struct percpu_lock_entry {
 17	intptr_t v;
 18} __attribute__((aligned(128)));
 19
 20struct percpu_lock {
 21	struct percpu_lock_entry c[CPU_SETSIZE];
 22};
 23
 24struct test_data_entry {
 25	intptr_t count;
 26} __attribute__((aligned(128)));
 27
 28struct spinlock_test_data {
 29	struct percpu_lock lock;
 30	struct test_data_entry c[CPU_SETSIZE];
 31	int reps;
 32};
 33
 34struct percpu_list_node {
 35	intptr_t data;
 36	struct percpu_list_node *next;
 37};
 38
 39struct percpu_list_entry {
 40	struct percpu_list_node *head;
 41} __attribute__((aligned(128)));
 42
 43struct percpu_list {
 44	struct percpu_list_entry c[CPU_SETSIZE];
 45};
 46
 47/* A simple percpu spinlock.  Returns the cpu lock was acquired on. */
 48int rseq_this_cpu_lock(struct percpu_lock *lock)
 49{
 50	int cpu;
 51
 52	for (;;) {
 53		int ret;
 54
 55		cpu = rseq_cpu_start();
 56		ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
 57					 0, 1, cpu);
 58		if (rseq_likely(!ret))
 59			break;
 60		/* Retry if comparison fails or rseq aborts. */
 61	}
 62	/*
 63	 * Acquire semantic when taking lock after control dependency.
 64	 * Matches rseq_smp_store_release().
 65	 */
 66	rseq_smp_acquire__after_ctrl_dep();
 67	return cpu;
 68}
 69
 70void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
 71{
 72	assert(lock->c[cpu].v == 1);
 73	/*
 74	 * Release lock, with release semantic. Matches
 75	 * rseq_smp_acquire__after_ctrl_dep().
 76	 */
 77	rseq_smp_store_release(&lock->c[cpu].v, 0);
 78}
 79
 80void *test_percpu_spinlock_thread(void *arg)
 81{
 82	struct spinlock_test_data *data = arg;
 83	int i, cpu;
 84
 85	if (rseq_register_current_thread()) {
 86		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
 87			errno, strerror(errno));
 88		abort();
 89	}
 90	for (i = 0; i < data->reps; i++) {
 91		cpu = rseq_this_cpu_lock(&data->lock);
 92		data->c[cpu].count++;
 93		rseq_percpu_unlock(&data->lock, cpu);
 94	}
 95	if (rseq_unregister_current_thread()) {
 96		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
 97			errno, strerror(errno));
 98		abort();
 99	}
100
101	return NULL;
102}
103
104/*
105 * A simple test which implements a sharded counter using a per-cpu
106 * lock.  Obviously real applications might prefer to simply use a
107 * per-cpu increment; however, this is reasonable for a test and the
108 * lock can be extended to synchronize more complicated operations.
109 */
110void test_percpu_spinlock(void)
111{
112	const int num_threads = 200;
113	int i;
114	uint64_t sum;
115	pthread_t test_threads[num_threads];
116	struct spinlock_test_data data;
117
118	memset(&data, 0, sizeof(data));
119	data.reps = 5000;
120
121	for (i = 0; i < num_threads; i++)
122		pthread_create(&test_threads[i], NULL,
123			       test_percpu_spinlock_thread, &data);
124
125	for (i = 0; i < num_threads; i++)
126		pthread_join(test_threads[i], NULL);
127
128	sum = 0;
129	for (i = 0; i < CPU_SETSIZE; i++)
130		sum += data.c[i].count;
131
132	assert(sum == (uint64_t)data.reps * num_threads);
133}
134
135void this_cpu_list_push(struct percpu_list *list,
136			struct percpu_list_node *node,
137			int *_cpu)
138{
139	int cpu;
140
141	for (;;) {
142		intptr_t *targetptr, newval, expect;
143		int ret;
144
145		cpu = rseq_cpu_start();
146		/* Load list->c[cpu].head with single-copy atomicity. */
147		expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
148		newval = (intptr_t)node;
149		targetptr = (intptr_t *)&list->c[cpu].head;
150		node->next = (struct percpu_list_node *)expect;
151		ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
152		if (rseq_likely(!ret))
153			break;
154		/* Retry if comparison fails or rseq aborts. */
155	}
156	if (_cpu)
157		*_cpu = cpu;
158}
159
160/*
161 * Unlike a traditional lock-less linked list; the availability of a
162 * rseq primitive allows us to implement pop without concerns over
163 * ABA-type races.
164 */
165struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
166					   int *_cpu)
167{
168	for (;;) {
169		struct percpu_list_node *head;
170		intptr_t *targetptr, expectnot, *load;
171		off_t offset;
172		int ret, cpu;
173
174		cpu = rseq_cpu_start();
175		targetptr = (intptr_t *)&list->c[cpu].head;
176		expectnot = (intptr_t)NULL;
177		offset = offsetof(struct percpu_list_node, next);
178		load = (intptr_t *)&head;
179		ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
180						 offset, load, cpu);
181		if (rseq_likely(!ret)) {
182			if (_cpu)
183				*_cpu = cpu;
184			return head;
185		}
186		if (ret > 0)
187			return NULL;
188		/* Retry if rseq aborts. */
189	}
190}
191
192/*
193 * __percpu_list_pop is not safe against concurrent accesses. Should
194 * only be used on lists that are not concurrently modified.
195 */
196struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
197{
198	struct percpu_list_node *node;
199
200	node = list->c[cpu].head;
201	if (!node)
202		return NULL;
203	list->c[cpu].head = node->next;
204	return node;
205}
206
207void *test_percpu_list_thread(void *arg)
208{
209	int i;
210	struct percpu_list *list = (struct percpu_list *)arg;
211
212	if (rseq_register_current_thread()) {
213		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
214			errno, strerror(errno));
215		abort();
216	}
217
218	for (i = 0; i < 100000; i++) {
219		struct percpu_list_node *node;
220
221		node = this_cpu_list_pop(list, NULL);
222		sched_yield();  /* encourage shuffling */
223		if (node)
224			this_cpu_list_push(list, node, NULL);
225	}
226
227	if (rseq_unregister_current_thread()) {
228		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
229			errno, strerror(errno));
230		abort();
231	}
232
233	return NULL;
234}
235
236/* Simultaneous modification to a per-cpu linked list from many threads.  */
237void test_percpu_list(void)
238{
239	int i, j;
240	uint64_t sum = 0, expected_sum = 0;
241	struct percpu_list list;
242	pthread_t test_threads[200];
243	cpu_set_t allowed_cpus;
244
245	memset(&list, 0, sizeof(list));
246
247	/* Generate list entries for every usable cpu. */
248	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
249	for (i = 0; i < CPU_SETSIZE; i++) {
250		if (!CPU_ISSET(i, &allowed_cpus))
251			continue;
252		for (j = 1; j <= 100; j++) {
253			struct percpu_list_node *node;
254
255			expected_sum += j;
256
257			node = malloc(sizeof(*node));
258			assert(node);
259			node->data = j;
260			node->next = list.c[i].head;
261			list.c[i].head = node;
262		}
263	}
264
265	for (i = 0; i < 200; i++)
266		pthread_create(&test_threads[i], NULL,
267		       test_percpu_list_thread, &list);
268
269	for (i = 0; i < 200; i++)
270		pthread_join(test_threads[i], NULL);
271
272	for (i = 0; i < CPU_SETSIZE; i++) {
273		struct percpu_list_node *node;
274
275		if (!CPU_ISSET(i, &allowed_cpus))
276			continue;
277
278		while ((node = __percpu_list_pop(&list, i))) {
279			sum += node->data;
280			free(node);
281		}
282	}
283
284	/*
285	 * All entries should now be accounted for (unless some external
286	 * actor is interfering with our allowed affinity while this
287	 * test is running).
288	 */
289	assert(sum == expected_sum);
290}
291
292int main(int argc, char **argv)
293{
294	if (rseq_register_current_thread()) {
295		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
296			errno, strerror(errno));
297		goto error;
298	}
299	printf("spinlock\n");
300	test_percpu_spinlock();
301	printf("percpu_list\n");
302	test_percpu_list();
303	if (rseq_unregister_current_thread()) {
304		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
305			errno, strerror(errno));
306		goto error;
307	}
308	return 0;
309
310error:
311	return -1;
312}