Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  4 */
  5
  6#include <linux/array_size.h>
  7#include <linux/minmax.h>
  8#include <vdso/datapage.h>
  9#include <vdso/getrandom.h>
 10#include <vdso/unaligned.h>
 11#include <asm/vdso/getrandom.h>
 12#include <uapi/linux/mman.h>
 13#include <uapi/linux/random.h>
 14
 15#undef PAGE_SIZE
 16#undef PAGE_MASK
 17#define PAGE_SIZE (1UL << CONFIG_PAGE_SHIFT)
 18#define PAGE_MASK (~(PAGE_SIZE - 1))
 19
 20#define MEMCPY_AND_ZERO_SRC(type, dst, src, len) do {				\
 21	while (len >= sizeof(type)) {						\
 22		__put_unaligned_t(type, __get_unaligned_t(type, src), dst);	\
 23		__put_unaligned_t(type, 0, src);				\
 24		dst += sizeof(type);						\
 25		src += sizeof(type);						\
 26		len -= sizeof(type);						\
 27	}									\
 28} while (0)
 29
 30static void memcpy_and_zero_src(void *dst, void *src, size_t len)
 31{
 32	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
 33		if (IS_ENABLED(CONFIG_64BIT))
 34			MEMCPY_AND_ZERO_SRC(u64, dst, src, len);
 35		MEMCPY_AND_ZERO_SRC(u32, dst, src, len);
 36		MEMCPY_AND_ZERO_SRC(u16, dst, src, len);
 37	}
 38	MEMCPY_AND_ZERO_SRC(u8, dst, src, len);
 39}
 40
 41/**
 42 * __cvdso_getrandom_data - Generic vDSO implementation of getrandom() syscall.
 43 * @rng_info:		Describes state of kernel RNG, memory shared with kernel.
 44 * @buffer:		Destination buffer to fill with random bytes.
 45 * @len:		Size of @buffer in bytes.
 46 * @flags:		Zero or more GRND_* flags.
 47 * @opaque_state:	Pointer to an opaque state area.
 48 * @opaque_len:		Length of opaque state area.
 49 *
 50 * This implements a "fast key erasure" RNG using ChaCha20, in the same way that the kernel's
 51 * getrandom() syscall does. It periodically reseeds its key from the kernel's RNG, at the same
 52 * schedule that the kernel's RNG is reseeded. If the kernel's RNG is not ready, then this always
 53 * calls into the syscall.
 54 *
 55 * If @buffer, @len, and @flags are 0, and @opaque_len is ~0UL, then @opaque_state is populated
 56 * with a struct vgetrandom_opaque_params and the function returns 0; if it does not return 0,
 57 * this function should not be used.
 58 *
 59 * @opaque_state *must* be allocated by calling mmap(2) using the mmap_prot and mmap_flags fields
 60 * from the struct vgetrandom_opaque_params, and states must not straddle pages. Unless external
 61 * locking is used, one state must be allocated per thread, as it is not safe to call this function
 62 * concurrently with the same @opaque_state. However, it is safe to call this using the same
 63 * @opaque_state that is shared between main code and signal handling code, within the same thread.
 64 *
 65 * Returns:	The number of random bytes written to @buffer, or a negative value indicating an error.
 66 */
 67static __always_inline ssize_t
 68__cvdso_getrandom_data(const struct vdso_rng_data *rng_info, void *buffer, size_t len,
 69		       unsigned int flags, void *opaque_state, size_t opaque_len)
 70{
 71	ssize_t ret = min_t(size_t, INT_MAX & PAGE_MASK /* = MAX_RW_COUNT */, len);
 72	struct vgetrandom_state *state = opaque_state;
 73	size_t batch_len, nblocks, orig_len = len;
 74	bool in_use, have_retried = false;
 75	void *orig_buffer = buffer;
 76	u64 current_generation;
 77	u32 counter[2] = { 0 };
 78
 79	if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags)) {
 80		struct vgetrandom_opaque_params *params = opaque_state;
 81		params->size_of_opaque_state = sizeof(*state);
 82		params->mmap_prot = PROT_READ | PROT_WRITE;
 83		params->mmap_flags = MAP_DROPPABLE | MAP_ANONYMOUS;
 84		for (size_t i = 0; i < ARRAY_SIZE(params->reserved); ++i)
 85			params->reserved[i] = 0;
 86		return 0;
 87	}
 88
 89	/* The state must not straddle a page, since pages can be zeroed at any time. */
 90	if (unlikely(((unsigned long)opaque_state & ~PAGE_MASK) + sizeof(*state) > PAGE_SIZE))
 91		return -EFAULT;
 92
 93	/* Handle unexpected flags by falling back to the kernel. */
 94	if (unlikely(flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)))
 95		goto fallback_syscall;
 96
 97	/* If the caller passes the wrong size, which might happen due to CRIU, fallback. */
 98	if (unlikely(opaque_len != sizeof(*state)))
 99		goto fallback_syscall;
100
101	/*
102	 * If the kernel's RNG is not yet ready, then it's not possible to provide random bytes from
103	 * userspace, because A) the various @flags require this to block, or not, depending on
104	 * various factors unavailable to userspace, and B) the kernel's behavior before the RNG is
105	 * ready is to reseed from the entropy pool at every invocation.
106	 */
107	if (unlikely(!READ_ONCE(rng_info->is_ready)))
108		goto fallback_syscall;
109
110	/*
111	 * This condition is checked after @rng_info->is_ready, because before the kernel's RNG is
112	 * initialized, the @flags parameter may require this to block or return an error, even when
113	 * len is zero.
114	 */
115	if (unlikely(!len))
116		return 0;
117
118	/*
119	 * @state->in_use is basic reentrancy protection against this running in a signal handler
120	 * with the same @opaque_state, but obviously not atomic wrt multiple CPUs or more than one
121	 * level of reentrancy. If a signal interrupts this after reading @state->in_use, but before
122	 * writing @state->in_use, there is still no race, because the signal handler will run to
123	 * its completion before returning execution.
124	 */
125	in_use = READ_ONCE(state->in_use);
126	if (unlikely(in_use))
127		/* The syscall simply fills the buffer and does not touch @state, so fallback. */
128		goto fallback_syscall;
129	WRITE_ONCE(state->in_use, true);
130
131retry_generation:
132	/*
133	 * @rng_info->generation must always be read here, as it serializes @state->key with the
134	 * kernel's RNG reseeding schedule.
135	 */
136	current_generation = READ_ONCE(rng_info->generation);
137
138	/*
139	 * If @state->generation doesn't match the kernel RNG's generation, then it means the
140	 * kernel's RNG has reseeded, and so @state->key is reseeded as well.
141	 */
142	if (unlikely(state->generation != current_generation)) {
143		/*
144		 * Write the generation before filling the key, in case of fork. If there is a fork
145		 * just after this line, the parent and child will get different random bytes from
146		 * the syscall, which is good. However, were this line to occur after the getrandom
147		 * syscall, then both child and parent could have the same bytes and the same
148		 * generation counter, so the fork would not be detected. Therefore, write
149		 * @state->generation before the call to the getrandom syscall.
150		 */
151		WRITE_ONCE(state->generation, current_generation);
152
153		/*
154		 * Prevent the syscall from being reordered wrt current_generation. Pairs with the
155		 * smp_store_release(&_vdso_rng_data.generation) in random.c.
156		 */
157		smp_rmb();
158
159		/* Reseed @state->key using fresh bytes from the kernel. */
160		if (getrandom_syscall(state->key, sizeof(state->key), 0) != sizeof(state->key)) {
161			/*
162			 * If the syscall failed to refresh the key, then @state->key is now
163			 * invalid, so invalidate the generation so that it is not used again, and
164			 * fallback to using the syscall entirely.
165			 */
166			WRITE_ONCE(state->generation, 0);
167
168			/*
169			 * Set @state->in_use to false only after the last write to @state in the
170			 * line above.
171			 */
172			WRITE_ONCE(state->in_use, false);
173
174			goto fallback_syscall;
175		}
176
177		/*
178		 * Set @state->pos to beyond the end of the batch, so that the batch is refilled
179		 * using the new key.
180		 */
181		state->pos = sizeof(state->batch);
182	}
183
184	/* Set len to the total amount of bytes that this function is allowed to read, ret. */
185	len = ret;
186more_batch:
187	/*
188	 * First use bytes out of @state->batch, which may have been filled by the last call to this
189	 * function.
190	 */
191	batch_len = min_t(size_t, sizeof(state->batch) - state->pos, len);
192	if (batch_len) {
193		/* Zeroing at the same time as memcpying helps preserve forward secrecy. */
194		memcpy_and_zero_src(buffer, state->batch + state->pos, batch_len);
195		state->pos += batch_len;
196		buffer += batch_len;
197		len -= batch_len;
198	}
199
200	if (!len) {
201		/* Prevent the loop from being reordered wrt ->generation. */
202		barrier();
203
204		/*
205		 * Since @rng_info->generation will never be 0, re-read @state->generation, rather
206		 * than using the local current_generation variable, to learn whether a fork
207		 * occurred or if @state was zeroed due to memory pressure. Primarily, though, this
208		 * indicates whether the kernel's RNG has reseeded, in which case generate a new key
209		 * and start over.
210		 */
211		if (unlikely(READ_ONCE(state->generation) != READ_ONCE(rng_info->generation))) {
212			/*
213			 * Prevent this from looping forever in case of low memory or racing with a
214			 * user force-reseeding the kernel's RNG using the ioctl.
215			 */
216			if (have_retried) {
217				WRITE_ONCE(state->in_use, false);
218				goto fallback_syscall;
219			}
220
221			have_retried = true;
222			buffer = orig_buffer;
223			goto retry_generation;
224		}
225
226		/*
227		 * Set @state->in_use to false only when there will be no more reads or writes of
228		 * @state.
229		 */
230		WRITE_ONCE(state->in_use, false);
231		return ret;
232	}
233
234	/* Generate blocks of RNG output directly into @buffer while there's enough room left. */
235	nblocks = len / CHACHA_BLOCK_SIZE;
236	if (nblocks) {
237		__arch_chacha20_blocks_nostack(buffer, state->key, counter, nblocks);
238		buffer += nblocks * CHACHA_BLOCK_SIZE;
239		len -= nblocks * CHACHA_BLOCK_SIZE;
240	}
241
242	BUILD_BUG_ON(sizeof(state->batch_key) % CHACHA_BLOCK_SIZE != 0);
243
244	/* Refill the batch and overwrite the key, in order to preserve forward secrecy. */
245	__arch_chacha20_blocks_nostack(state->batch_key, state->key, counter,
246				       sizeof(state->batch_key) / CHACHA_BLOCK_SIZE);
247
248	/* Since the batch was just refilled, set the position back to 0 to indicate a full batch. */
249	state->pos = 0;
250	goto more_batch;
251
252fallback_syscall:
253	return getrandom_syscall(orig_buffer, orig_len, flags);
254}
255
256static __always_inline ssize_t
257__cvdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
258{
259	return __cvdso_getrandom_data(__arch_get_vdso_rng_data(), buffer, len, flags, opaque_state, opaque_len);
260}