Linux Audio

Check our new training course

Loading...
v5.4
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * x86 APERF/MPERF KHz calculation for
  4 * /sys/.../cpufreq/scaling_cur_freq
  5 *
  6 * Copyright (C) 2017 Intel Corp.
  7 * Author: Len Brown <len.brown@intel.com>
  8 */
  9
 10#include <linux/delay.h>
 11#include <linux/ktime.h>
 12#include <linux/math64.h>
 13#include <linux/percpu.h>
 14#include <linux/cpufreq.h>
 15#include <linux/smp.h>
 16#include <linux/sched/isolation.h>
 
 
 
 
 
 
 
 17
 18#include "cpu.h"
 19
 20struct aperfmperf_sample {
 21	unsigned int	khz;
 22	ktime_t	time;
 23	u64	aperf;
 24	u64	mperf;
 
 
 25};
 26
 27static DEFINE_PER_CPU(struct aperfmperf_sample, samples);
 28
 29#define APERFMPERF_CACHE_THRESHOLD_MS	10
 30#define APERFMPERF_REFRESH_DELAY_MS	10
 31#define APERFMPERF_STALE_THRESHOLD_MS	1000
 32
 33/*
 34 * aperfmperf_snapshot_khz()
 35 * On the current CPU, snapshot APERF, MPERF, and jiffies
 36 * unless we already did it within 10ms
 37 * calculate kHz, save snapshot
 38 */
 39static void aperfmperf_snapshot_khz(void *dummy)
 40{
 41	u64 aperf, aperf_delta;
 42	u64 mperf, mperf_delta;
 43	struct aperfmperf_sample *s = this_cpu_ptr(&samples);
 44	unsigned long flags;
 45
 46	local_irq_save(flags);
 47	rdmsrl(MSR_IA32_APERF, aperf);
 48	rdmsrl(MSR_IA32_MPERF, mperf);
 49	local_irq_restore(flags);
 50
 51	aperf_delta = aperf - s->aperf;
 52	mperf_delta = mperf - s->mperf;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 53
 
 
 
 
 
 
 54	/*
 55	 * There is no architectural guarantee that MPERF
 56	 * increments faster than we can read it.
 
 
 57	 */
 58	if (mperf_delta == 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 59		return;
 
 
 
 
 
 60
 61	s->time = ktime_get();
 62	s->aperf = aperf;
 63	s->mperf = mperf;
 64	s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
 
 65}
 66
 67static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait)
 68{
 69	s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu));
 
 70
 71	/* Don't bother re-computing within the cache threshold time. */
 72	if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
 73		return true;
 74
 75	smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait);
 
 
 
 
 76
 77	/* Return false if the previous iteration was too long ago. */
 78	return time_delta <= APERFMPERF_STALE_THRESHOLD_MS;
 
 
 
 
 79}
 80
 81unsigned int aperfmperf_get_khz(int cpu)
 
 
 
 
 
 82{
 83	if (!cpu_khz)
 84		return 0;
 85
 86	if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
 87		return 0;
 88
 89	if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
 90		return 0;
 
 
 
 91
 92	aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
 93	return per_cpu(samples.khz, cpu);
 
 
 
 
 
 
 
 
 
 
 
 94}
 
 
 
 
 95
 96void arch_freq_prepare_all(void)
 97{
 98	ktime_t now = ktime_get();
 99	bool wait = false;
100	int cpu;
101
102	if (!cpu_khz)
103		return;
104
105	if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
106		return;
 
 
107
108	for_each_online_cpu(cpu) {
109		if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
110			continue;
111		if (!aperfmperf_snapshot_cpu(cpu, now, false))
112			wait = true;
113	}
 
 
114
115	if (wait)
116		msleep(APERFMPERF_REFRESH_DELAY_MS);
117}
118
 
 
 
 
 
 
 
119unsigned int arch_freq_get_on_cpu(int cpu)
120{
121	if (!cpu_khz)
122		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
123
124	if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
125		return 0;
 
 
 
 
126
127	if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
128		return 0;
129
130	if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
131		return per_cpu(samples.khz, cpu);
 
 
132
133	msleep(APERFMPERF_REFRESH_DELAY_MS);
134	smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
 
 
135
136	return per_cpu(samples.khz, cpu);
 
 
 
 
 
 
 
 
 
137}
v6.2
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * x86 APERF/MPERF KHz calculation for
  4 * /sys/.../cpufreq/scaling_cur_freq
  5 *
  6 * Copyright (C) 2017 Intel Corp.
  7 * Author: Len Brown <len.brown@intel.com>
  8 */
  9#include <linux/cpufreq.h>
 10#include <linux/delay.h>
 11#include <linux/ktime.h>
 12#include <linux/math64.h>
 13#include <linux/percpu.h>
 14#include <linux/rcupdate.h>
 
 15#include <linux/sched/isolation.h>
 16#include <linux/sched/topology.h>
 17#include <linux/smp.h>
 18#include <linux/syscore_ops.h>
 19
 20#include <asm/cpu.h>
 21#include <asm/cpu_device_id.h>
 22#include <asm/intel-family.h>
 23
 24#include "cpu.h"
 25
 26struct aperfmperf {
 27	seqcount_t	seq;
 28	unsigned long	last_update;
 29	u64		acnt;
 30	u64		mcnt;
 31	u64		aperf;
 32	u64		mperf;
 33};
 34
 35static DEFINE_PER_CPU_SHARED_ALIGNED(struct aperfmperf, cpu_samples) = {
 36	.seq = SEQCNT_ZERO(cpu_samples.seq)
 37};
 
 
 38
 39static void init_counter_refs(void)
 
 
 
 
 
 
 40{
 41	u64 aperf, mperf;
 
 
 
 42
 
 43	rdmsrl(MSR_IA32_APERF, aperf);
 44	rdmsrl(MSR_IA32_MPERF, mperf);
 
 45
 46	this_cpu_write(cpu_samples.aperf, aperf);
 47	this_cpu_write(cpu_samples.mperf, mperf);
 48}
 49
 50#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
 51/*
 52 * APERF/MPERF frequency ratio computation.
 53 *
 54 * The scheduler wants to do frequency invariant accounting and needs a <1
 55 * ratio to account for the 'current' frequency, corresponding to
 56 * freq_curr / freq_max.
 57 *
 58 * Since the frequency freq_curr on x86 is controlled by micro-controller and
 59 * our P-state setting is little more than a request/hint, we need to observe
 60 * the effective frequency 'BusyMHz', i.e. the average frequency over a time
 61 * interval after discarding idle time. This is given by:
 62 *
 63 *   BusyMHz = delta_APERF / delta_MPERF * freq_base
 64 *
 65 * where freq_base is the max non-turbo P-state.
 66 *
 67 * The freq_max term has to be set to a somewhat arbitrary value, because we
 68 * can't know which turbo states will be available at a given point in time:
 69 * it all depends on the thermal headroom of the entire package. We set it to
 70 * the turbo level with 4 cores active.
 71 *
 72 * Benchmarks show that's a good compromise between the 1C turbo ratio
 73 * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
 74 * which would ignore the entire turbo range (a conspicuous part, making
 75 * freq_curr/freq_max always maxed out).
 76 *
 77 * An exception to the heuristic above is the Atom uarch, where we choose the
 78 * highest turbo level for freq_max since Atom's are generally oriented towards
 79 * power efficiency.
 80 *
 81 * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
 82 * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
 83 */
 84
 85DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
 86
 87static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
 88static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
 89
 90void arch_set_max_freq_ratio(bool turbo_disabled)
 91{
 92	arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
 93					arch_turbo_freq_ratio;
 94}
 95EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
 96
 97static bool __init turbo_disabled(void)
 98{
 99	u64 misc_en;
100	int err;
101
102	err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
103	if (err)
104		return false;
105
106	return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
107}
108
109static bool __init slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
110{
111	int err;
112
113	err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
114	if (err)
115		return false;
116
117	err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
118	if (err)
119		return false;
120
121	*base_freq = (*base_freq >> 16) & 0x3F;     /* max P state */
122	*turbo_freq = *turbo_freq & 0x3F;           /* 1C turbo    */
123
124	return true;
125}
126
127#define X86_MATCH(model)					\
128	X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6,		\
129		INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
130
131static const struct x86_cpu_id has_knl_turbo_ratio_limits[] __initconst = {
132	X86_MATCH(XEON_PHI_KNL),
133	X86_MATCH(XEON_PHI_KNM),
134	{}
135};
136
137static const struct x86_cpu_id has_skx_turbo_ratio_limits[] __initconst = {
138	X86_MATCH(SKYLAKE_X),
139	{}
140};
141
142static const struct x86_cpu_id has_glm_turbo_ratio_limits[] __initconst = {
143	X86_MATCH(ATOM_GOLDMONT),
144	X86_MATCH(ATOM_GOLDMONT_D),
145	X86_MATCH(ATOM_GOLDMONT_PLUS),
146	{}
147};
148
149static bool __init knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
150					  int num_delta_fratio)
151{
152	int fratio, delta_fratio, found;
153	int err, i;
154	u64 msr;
155
156	err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
157	if (err)
158		return false;
159
160	*base_freq = (*base_freq >> 8) & 0xFF;	    /* max P state */
161
162	err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
163	if (err)
164		return false;
165
166	fratio = (msr >> 8) & 0xFF;
167	i = 16;
168	found = 0;
169	do {
170		if (found >= num_delta_fratio) {
171			*turbo_freq = fratio;
172			return true;
173		}
174
175		delta_fratio = (msr >> (i + 5)) & 0x7;
176
177		if (delta_fratio) {
178			found += 1;
179			fratio -= delta_fratio;
180		}
181
182		i += 8;
183	} while (i < 64);
184
185	return true;
186}
187
188static bool __init skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
189{
190	u64 ratios, counts;
191	u32 group_size;
192	int err, i;
193
194	err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
195	if (err)
196		return false;
197
198	*base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */
199
200	err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
201	if (err)
202		return false;
203
204	err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
205	if (err)
206		return false;
207
208	for (i = 0; i < 64; i += 8) {
209		group_size = (counts >> i) & 0xFF;
210		if (group_size >= size) {
211			*turbo_freq = (ratios >> i) & 0xFF;
212			return true;
213		}
214	}
215
216	return false;
217}
218
219static bool __init core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
220{
221	u64 msr;
222	int err;
223
224	err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
225	if (err)
226		return false;
227
228	err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
229	if (err)
230		return false;
231
232	*base_freq = (*base_freq >> 8) & 0xFF;    /* max P state */
233	*turbo_freq = (msr >> 24) & 0xFF;         /* 4C turbo    */
234
235	/* The CPU may have less than 4 cores */
236	if (!*turbo_freq)
237		*turbo_freq = msr & 0xFF;         /* 1C turbo    */
238
239	return true;
240}
241
242static bool __init intel_set_max_freq_ratio(void)
243{
244	u64 base_freq, turbo_freq;
245	u64 turbo_ratio;
246
247	if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
248		goto out;
249
250	if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
251	    skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
252		goto out;
253
254	if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
255	    knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
256		goto out;
257
258	if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
259	    skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
260		goto out;
261
262	if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
263		goto out;
264
265	return false;
266
267out:
268	/*
269	 * Some hypervisors advertise X86_FEATURE_APERFMPERF
270	 * but then fill all MSR's with zeroes.
271	 * Some CPUs have turbo boost but don't declare any turbo ratio
272	 * in MSR_TURBO_RATIO_LIMIT.
273	 */
274	if (!base_freq || !turbo_freq) {
275		pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
276		return false;
277	}
278
279	turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
280	if (!turbo_ratio) {
281		pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
282		return false;
283	}
284
285	arch_turbo_freq_ratio = turbo_ratio;
286	arch_set_max_freq_ratio(turbo_disabled());
287
288	return true;
289}
290
291#ifdef CONFIG_PM_SLEEP
292static struct syscore_ops freq_invariance_syscore_ops = {
293	.resume = init_counter_refs,
294};
295
296static void register_freq_invariance_syscore_ops(void)
297{
298	register_syscore_ops(&freq_invariance_syscore_ops);
299}
300#else
301static inline void register_freq_invariance_syscore_ops(void) {}
302#endif
303
304static void freq_invariance_enable(void)
305{
306	if (static_branch_unlikely(&arch_scale_freq_key)) {
307		WARN_ON_ONCE(1);
308		return;
309	}
310	static_branch_enable(&arch_scale_freq_key);
311	register_freq_invariance_syscore_ops();
312	pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
313}
314
315void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled)
316{
317	arch_turbo_freq_ratio = ratio;
318	arch_set_max_freq_ratio(turbo_disabled);
319	freq_invariance_enable();
320}
321
322static void __init bp_init_freq_invariance(void)
323{
324	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
325		return;
326
327	if (intel_set_max_freq_ratio())
328		freq_invariance_enable();
329}
330
331static void disable_freq_invariance_workfn(struct work_struct *work)
332{
333	int cpu;
334
335	static_branch_disable(&arch_scale_freq_key);
336
337	/*
338	 * Set arch_freq_scale to a default value on all cpus
339	 * This negates the effect of scaling
340	 */
341	for_each_possible_cpu(cpu)
342		per_cpu(arch_freq_scale, cpu) = SCHED_CAPACITY_SCALE;
343}
344
345static DECLARE_WORK(disable_freq_invariance_work,
346		    disable_freq_invariance_workfn);
347
348DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
349
350static void scale_freq_tick(u64 acnt, u64 mcnt)
351{
352	u64 freq_scale;
 
353
354	if (!arch_scale_freq_invariant())
355		return;
356
357	if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
358		goto error;
359
360	if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
361		goto error;
362
363	freq_scale = div64_u64(acnt, mcnt);
364	if (!freq_scale)
365		goto error;
366
367	if (freq_scale > SCHED_CAPACITY_SCALE)
368		freq_scale = SCHED_CAPACITY_SCALE;
369
370	this_cpu_write(arch_freq_scale, freq_scale);
371	return;
372
373error:
374	pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
375	schedule_work(&disable_freq_invariance_work);
376}
377#else
378static inline void bp_init_freq_invariance(void) { }
379static inline void scale_freq_tick(u64 acnt, u64 mcnt) { }
380#endif /* CONFIG_X86_64 && CONFIG_SMP */
381
382void arch_scale_freq_tick(void)
383{
384	struct aperfmperf *s = this_cpu_ptr(&cpu_samples);
385	u64 acnt, mcnt, aperf, mperf;
 
386
387	if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
388		return;
389
390	rdmsrl(MSR_IA32_APERF, aperf);
391	rdmsrl(MSR_IA32_MPERF, mperf);
392	acnt = aperf - s->aperf;
393	mcnt = mperf - s->mperf;
394
395	s->aperf = aperf;
396	s->mperf = mperf;
397
398	raw_write_seqcount_begin(&s->seq);
399	s->last_update = jiffies;
400	s->acnt = acnt;
401	s->mcnt = mcnt;
402	raw_write_seqcount_end(&s->seq);
403
404	scale_freq_tick(acnt, mcnt);
 
405}
406
407/*
408 * Discard samples older than the define maximum sample age of 20ms. There
409 * is no point in sending IPIs in such a case. If the scheduler tick was
410 * not running then the CPU is either idle or isolated.
411 */
412#define MAX_SAMPLE_AGE	((unsigned long)HZ / 50)
413
414unsigned int arch_freq_get_on_cpu(int cpu)
415{
416	struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu);
417	unsigned int seq, freq;
418	unsigned long last;
419	u64 acnt, mcnt;
420
421	if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
422		goto fallback;
423
424	do {
425		seq = raw_read_seqcount_begin(&s->seq);
426		last = s->last_update;
427		acnt = s->acnt;
428		mcnt = s->mcnt;
429	} while (read_seqcount_retry(&s->seq, seq));
430
431	/*
432	 * Bail on invalid count and when the last update was too long ago,
433	 * which covers idle and NOHZ full CPUs.
434	 */
435	if (!mcnt || (jiffies - last) > MAX_SAMPLE_AGE)
436		goto fallback;
437
438	return div64_u64((cpu_khz * acnt), mcnt);
 
439
440fallback:
441	freq = cpufreq_quick_get(cpu);
442	return freq ? freq : cpu_khz;
443}
444
445static int __init bp_init_aperfmperf(void)
446{
447	if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
448		return 0;
449
450	init_counter_refs();
451	bp_init_freq_invariance();
452	return 0;
453}
454early_initcall(bp_init_aperfmperf);
455
456void ap_init_aperfmperf(void)
457{
458	if (cpu_feature_enabled(X86_FEATURE_APERFMPERF))
459		init_counter_refs();
460}