Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.15.
  1// SPDX-License-Identifier: LGPL-2.1
  2/*
  3 * rseq.c
  4 *
  5 * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  6 *
  7 * This library is free software; you can redistribute it and/or
  8 * modify it under the terms of the GNU Lesser General Public
  9 * License as published by the Free Software Foundation; only
 10 * version 2.1 of the License.
 11 *
 12 * This library is distributed in the hope that it will be useful,
 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 15 * Lesser General Public License for more details.
 16 */
 17
 18#define _GNU_SOURCE
 19#include <errno.h>
 20#include <sched.h>
 21#include <stdio.h>
 22#include <stdlib.h>
 23#include <string.h>
 24#include <unistd.h>
 25#include <syscall.h>
 26#include <assert.h>
 27#include <signal.h>
 28#include <limits.h>
 29#include <dlfcn.h>
 30#include <stddef.h>
 31#include <sys/auxv.h>
 32#include <linux/auxvec.h>
 33
 34#include <linux/compiler.h>
 35
 36#include "../kselftest.h"
 37#include "rseq.h"
 38
 39/*
 40 * Define weak versions to play nice with binaries that are statically linked
 41 * against a libc that doesn't support registering its own rseq.
 42 */
 43__weak ptrdiff_t __rseq_offset;
 44__weak unsigned int __rseq_size;
 45__weak unsigned int __rseq_flags;
 46
 47static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
 48static const unsigned int *libc_rseq_size_p = &__rseq_size;
 49static const unsigned int *libc_rseq_flags_p = &__rseq_flags;
 50
 51/* Offset from the thread pointer to the rseq area. */
 52ptrdiff_t rseq_offset;
 53
 54/*
 55 * Size of the registered rseq area. 0 if the registration was
 56 * unsuccessful.
 57 */
 58unsigned int rseq_size = -1U;
 59
 60/* Flags used during rseq registration.  */
 61unsigned int rseq_flags;
 62
 63static int rseq_ownership;
 64
 65/* Allocate a large area for the TLS. */
 66#define RSEQ_THREAD_AREA_ALLOC_SIZE	1024
 67
 68/* Original struct rseq feature size is 20 bytes. */
 69#define ORIG_RSEQ_FEATURE_SIZE		20
 70
 71/* Original struct rseq allocation size is 32 bytes. */
 72#define ORIG_RSEQ_ALLOC_SIZE		32
 73
 74static
 75__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
 76	.cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
 77};
 78
 79static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
 80		    int flags, uint32_t sig)
 81{
 82	return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
 83}
 84
 85static int sys_getcpu(unsigned *cpu, unsigned *node)
 86{
 87	return syscall(__NR_getcpu, cpu, node, NULL);
 88}
 89
 90int rseq_available(void)
 91{
 92	int rc;
 93
 94	rc = sys_rseq(NULL, 0, 0, 0);
 95	if (rc != -1)
 96		abort();
 97	switch (errno) {
 98	case ENOSYS:
 99		return 0;
100	case EINVAL:
101		return 1;
102	default:
103		abort();
104	}
105}
106
107/* The rseq areas need to be at least 32 bytes. */
108static
109unsigned int get_rseq_min_alloc_size(void)
110{
111	unsigned int alloc_size = rseq_size;
112
113	if (alloc_size < ORIG_RSEQ_ALLOC_SIZE)
114		alloc_size = ORIG_RSEQ_ALLOC_SIZE;
115	return alloc_size;
116}
117
118/*
119 * Return the feature size supported by the kernel.
120 *
121 * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE):
122 *
123 * 0:   Return ORIG_RSEQ_FEATURE_SIZE (20)
124 * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE).
125 *
126 * It should never return a value below ORIG_RSEQ_FEATURE_SIZE.
127 */
128static
129unsigned int get_rseq_kernel_feature_size(void)
130{
131	unsigned long auxv_rseq_feature_size, auxv_rseq_align;
132
133	auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
134	assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
135
136	auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
137	assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
138	if (auxv_rseq_feature_size)
139		return auxv_rseq_feature_size;
140	else
141		return ORIG_RSEQ_FEATURE_SIZE;
142}
143
144int rseq_register_current_thread(void)
145{
146	int rc;
147
148	if (!rseq_ownership) {
149		/* Treat libc's ownership as a successful registration. */
150		return 0;
151	}
152	rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG);
153	if (rc) {
154		/*
155		 * After at least one thread has registered successfully
156		 * (rseq_size > 0), the registration of other threads should
157		 * never fail.
158		 */
159		if (RSEQ_READ_ONCE(rseq_size) > 0) {
160			/* Incoherent success/failure within process. */
161			abort();
162		}
163		return -1;
164	}
165	assert(rseq_current_cpu_raw() >= 0);
166
167	/*
168	 * The first thread to register sets the rseq_size to mimic the libc
169	 * behavior.
170	 */
171	if (RSEQ_READ_ONCE(rseq_size) == 0) {
172		RSEQ_WRITE_ONCE(rseq_size, get_rseq_kernel_feature_size());
173	}
174
175	return 0;
176}
177
178int rseq_unregister_current_thread(void)
179{
180	int rc;
181
182	if (!rseq_ownership) {
183		/* Treat libc's ownership as a successful unregistration. */
184		return 0;
185	}
186	rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
187	if (rc)
188		return -1;
189	return 0;
190}
191
192static __attribute__((constructor))
193void rseq_init(void)
194{
195	/*
196	 * If the libc's registered rseq size isn't already valid, it may be
197	 * because the binary is dynamically linked and not necessarily due to
198	 * libc not having registered a restartable sequence.  Try to find the
199	 * symbols if that's the case.
200	 */
201	if (!*libc_rseq_size_p) {
202		libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
203		libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
204		libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
205	}
206	if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
207			*libc_rseq_size_p != 0) {
208		unsigned int libc_rseq_size;
209
210		/* rseq registration owned by glibc */
211		rseq_offset = *libc_rseq_offset_p;
212		libc_rseq_size = *libc_rseq_size_p;
213		rseq_flags = *libc_rseq_flags_p;
214
215		/*
216		 * Previous versions of glibc expose the value
217		 * 32 even though the kernel only supported 20
218		 * bytes initially. Therefore treat 32 as a
219		 * special-case. glibc 2.40 exposes a 20 bytes
220		 * __rseq_size without using getauxval(3) to
221		 * query the supported size, while still allocating a 32
222		 * bytes area. Also treat 20 as a special-case.
223		 *
224		 * Special-cases are handled by using the following
225		 * value as active feature set size:
226		 *
227		 *   rseq_size = min(32, get_rseq_kernel_feature_size())
228		 */
229		switch (libc_rseq_size) {
230		case ORIG_RSEQ_FEATURE_SIZE:
231			fallthrough;
232		case ORIG_RSEQ_ALLOC_SIZE:
233		{
234			unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size();
235
236			if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE)
237				rseq_size = rseq_kernel_feature_size;
238			else
239				rseq_size = ORIG_RSEQ_ALLOC_SIZE;
240			break;
241		}
242		default:
243			/* Otherwise just use the __rseq_size from libc as rseq_size. */
244			rseq_size = libc_rseq_size;
245			break;
246		}
247		return;
248	}
249	rseq_ownership = 1;
250
251	/* Calculate the offset of the rseq area from the thread pointer. */
252	rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
253
254	/* rseq flags are deprecated, always set to 0. */
255	rseq_flags = 0;
256
257	/*
258	 * Set the size to 0 until at least one thread registers to mimic the
259	 * libc behavior.
260	 */
261	rseq_size = 0;
262}
263
264static __attribute__((destructor))
265void rseq_exit(void)
266{
267	if (!rseq_ownership)
268		return;
269	rseq_offset = 0;
270	rseq_size = -1U;
271	rseq_ownership = 0;
272}
273
274int32_t rseq_fallback_current_cpu(void)
275{
276	int32_t cpu;
277
278	cpu = sched_getcpu();
279	if (cpu < 0) {
280		perror("sched_getcpu()");
281		abort();
282	}
283	return cpu;
284}
285
286int32_t rseq_fallback_current_node(void)
287{
288	uint32_t cpu_id, node_id;
289	int ret;
290
291	ret = sys_getcpu(&cpu_id, &node_id);
292	if (ret) {
293		perror("sys_getcpu()");
294		return ret;
295	}
296	return (int32_t) node_id;
297}