Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.9.
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Copyright 2024 Rivos Inc.
  4 */
  5
  6#include <linux/cpu.h>
  7#include <linux/cpumask.h>
  8#include <linux/jump_label.h>
  9#include <linux/mm.h>
 10#include <linux/smp.h>
 11#include <linux/types.h>
 12#include <asm/cpufeature.h>
 13#include <asm/hwprobe.h>
 14
 15#include "copy-unaligned.h"
 16
 17#define MISALIGNED_ACCESS_JIFFIES_LG2 1
 18#define MISALIGNED_BUFFER_SIZE 0x4000
 19#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
 20#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
 21
 22DEFINE_PER_CPU(long, misaligned_access_speed);
 23
 24#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
 25static cpumask_t fast_misaligned_access;
 26static int check_unaligned_access(void *param)
 27{
 28	int cpu = smp_processor_id();
 29	u64 start_cycles, end_cycles;
 30	u64 word_cycles;
 31	u64 byte_cycles;
 32	int ratio;
 33	unsigned long start_jiffies, now;
 34	struct page *page = param;
 35	void *dst;
 36	void *src;
 37	long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
 38
 39	if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
 40		return 0;
 41
 42	/* Make an unaligned destination buffer. */
 43	dst = (void *)((unsigned long)page_address(page) | 0x1);
 44	/* Unalign src as well, but differently (off by 1 + 2 = 3). */
 45	src = dst + (MISALIGNED_BUFFER_SIZE / 2);
 46	src += 2;
 47	word_cycles = -1ULL;
 48	/* Do a warmup. */
 49	__riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
 50	preempt_disable();
 51	start_jiffies = jiffies;
 52	while ((now = jiffies) == start_jiffies)
 53		cpu_relax();
 54
 55	/*
 56	 * For a fixed amount of time, repeatedly try the function, and take
 57	 * the best time in cycles as the measurement.
 58	 */
 59	while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
 60		start_cycles = get_cycles64();
 61		/* Ensure the CSR read can't reorder WRT to the copy. */
 62		mb();
 63		__riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
 64		/* Ensure the copy ends before the end time is snapped. */
 65		mb();
 66		end_cycles = get_cycles64();
 67		if ((end_cycles - start_cycles) < word_cycles)
 68			word_cycles = end_cycles - start_cycles;
 69	}
 70
 71	byte_cycles = -1ULL;
 72	__riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
 73	start_jiffies = jiffies;
 74	while ((now = jiffies) == start_jiffies)
 75		cpu_relax();
 76
 77	while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
 78		start_cycles = get_cycles64();
 79		mb();
 80		__riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
 81		mb();
 82		end_cycles = get_cycles64();
 83		if ((end_cycles - start_cycles) < byte_cycles)
 84			byte_cycles = end_cycles - start_cycles;
 85	}
 86
 87	preempt_enable();
 88
 89	/* Don't divide by zero. */
 90	if (!word_cycles || !byte_cycles) {
 91		pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
 92			cpu);
 93
 94		return 0;
 95	}
 96
 97	if (word_cycles < byte_cycles)
 98		speed = RISCV_HWPROBE_MISALIGNED_FAST;
 99
100	ratio = div_u64((byte_cycles * 100), word_cycles);
101	pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
102		cpu,
103		ratio / 100,
104		ratio % 100,
105		(speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
106
107	per_cpu(misaligned_access_speed, cpu) = speed;
108
109	/*
110	 * Set the value of fast_misaligned_access of a CPU. These operations
111	 * are atomic to avoid race conditions.
112	 */
113	if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
114		cpumask_set_cpu(cpu, &fast_misaligned_access);
115	else
116		cpumask_clear_cpu(cpu, &fast_misaligned_access);
117
118	return 0;
119}
120
121static void check_unaligned_access_nonboot_cpu(void *param)
122{
123	unsigned int cpu = smp_processor_id();
124	struct page **pages = param;
125
126	if (smp_processor_id() != 0)
127		check_unaligned_access(pages[cpu]);
128}
129
130DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
131
132static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
133{
134	if (cpumask_weight(mask) == weight)
135		static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key);
136	else
137		static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key);
138}
139
140static void set_unaligned_access_static_branches_except_cpu(int cpu)
141{
142	/*
143	 * Same as set_unaligned_access_static_branches, except excludes the
144	 * given CPU from the result. When a CPU is hotplugged into an offline
145	 * state, this function is called before the CPU is set to offline in
146	 * the cpumask, and thus the CPU needs to be explicitly excluded.
147	 */
148
149	cpumask_t fast_except_me;
150
151	cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
152	cpumask_clear_cpu(cpu, &fast_except_me);
153
154	modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
155}
156
157static void set_unaligned_access_static_branches(void)
158{
159	/*
160	 * This will be called after check_unaligned_access_all_cpus so the
161	 * result of unaligned access speed for all CPUs will be available.
162	 *
163	 * To avoid the number of online cpus changing between reading
164	 * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
165	 * held before calling this function.
166	 */
167
168	cpumask_t fast_and_online;
169
170	cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
171
172	modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
173}
174
175static int lock_and_set_unaligned_access_static_branch(void)
176{
177	cpus_read_lock();
178	set_unaligned_access_static_branches();
179	cpus_read_unlock();
180
181	return 0;
182}
183
184arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
185
186static int riscv_online_cpu(unsigned int cpu)
187{
188	static struct page *buf;
189
190	/* We are already set since the last check */
191	if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
192		goto exit;
193
194	buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
195	if (!buf) {
196		pr_warn("Allocation failure, not measuring misaligned performance\n");
197		return -ENOMEM;
198	}
199
200	check_unaligned_access(buf);
201	__free_pages(buf, MISALIGNED_BUFFER_ORDER);
202
203exit:
204	set_unaligned_access_static_branches();
205
206	return 0;
207}
208
209static int riscv_offline_cpu(unsigned int cpu)
210{
211	set_unaligned_access_static_branches_except_cpu(cpu);
212
213	return 0;
214}
215
216/* Measure unaligned access speed on all CPUs present at boot in parallel. */
217static int check_unaligned_access_speed_all_cpus(void)
218{
219	unsigned int cpu;
220	unsigned int cpu_count = num_possible_cpus();
221	struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL);
222
223	if (!bufs) {
224		pr_warn("Allocation failure, not measuring misaligned performance\n");
225		return 0;
226	}
227
228	/*
229	 * Allocate separate buffers for each CPU so there's no fighting over
230	 * cache lines.
231	 */
232	for_each_cpu(cpu, cpu_online_mask) {
233		bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
234		if (!bufs[cpu]) {
235			pr_warn("Allocation failure, not measuring misaligned performance\n");
236			goto out;
237		}
238	}
239
240	/* Check everybody except 0, who stays behind to tend jiffies. */
241	on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
242
243	/* Check core 0. */
244	smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
245
246	/*
247	 * Setup hotplug callbacks for any new CPUs that come online or go
248	 * offline.
249	 */
250	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
251				  riscv_online_cpu, riscv_offline_cpu);
252
253out:
254	for_each_cpu(cpu, cpu_online_mask) {
255		if (bufs[cpu])
256			__free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
257	}
258
259	kfree(bufs);
260	return 0;
261}
262
263static int check_unaligned_access_all_cpus(void)
264{
265	bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
266
267	if (!all_cpus_emulated)
268		return check_unaligned_access_speed_all_cpus();
269
270	return 0;
271}
272#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
273static int check_unaligned_access_all_cpus(void)
274{
275	check_unaligned_access_emulated_all_cpus();
276
277	return 0;
278}
279#endif
280
281arch_initcall(check_unaligned_access_all_cpus);