Linux Audio

Check our new training course

Loading...
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * CPPC (Collaborative Processor Performance Control) driver for
  4 * interfacing with the CPUfreq layer and governors. See
  5 * cppc_acpi.c for CPPC specific methods.
  6 *
  7 * (C) Copyright 2014, 2015 Linaro Ltd.
  8 * Author: Ashwin Chaugule <ashwin.chaugule@linaro.org>
  9 */
 10
 11#define pr_fmt(fmt)	"CPPC Cpufreq:"	fmt
 12
 13#include <linux/arch_topology.h>
 14#include <linux/kernel.h>
 15#include <linux/module.h>
 16#include <linux/delay.h>
 17#include <linux/cpu.h>
 18#include <linux/cpufreq.h>
 
 19#include <linux/irq_work.h>
 20#include <linux/kthread.h>
 21#include <linux/time.h>
 22#include <linux/vmalloc.h>
 23#include <uapi/linux/sched/types.h>
 24
 25#include <linux/unaligned.h>
 26
 27#include <acpi/cppc_acpi.h>
 28
 
 
 
 
 
 
 29/*
 30 * This list contains information parsed from per CPU ACPI _CPC and _PSD
 31 * structures: e.g. the highest and lowest supported performance, capabilities,
 32 * desired performance, level requested etc. Depending on the share_type, not
 33 * all CPUs will have an entry in the list.
 34 */
 35static LIST_HEAD(cpu_data_list);
 36
 37static bool boost_supported;
 38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 39static struct cpufreq_driver cppc_cpufreq_driver;
 40
 41#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE
 42static enum {
 43	FIE_UNSET = -1,
 44	FIE_ENABLED,
 45	FIE_DISABLED
 46} fie_disabled = FIE_UNSET;
 47
 
 48module_param(fie_disabled, int, 0444);
 49MODULE_PARM_DESC(fie_disabled, "Disable Frequency Invariance Engine (FIE)");
 50
 51/* Frequency invariance support */
 52struct cppc_freq_invariance {
 53	int cpu;
 54	struct irq_work irq_work;
 55	struct kthread_work work;
 56	struct cppc_perf_fb_ctrs prev_perf_fb_ctrs;
 57	struct cppc_cpudata *cpu_data;
 58};
 59
 60static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv);
 61static struct kthread_worker *kworker_fie;
 62
 
 63static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
 64				 struct cppc_perf_fb_ctrs *fb_ctrs_t0,
 65				 struct cppc_perf_fb_ctrs *fb_ctrs_t1);
 66
 67/**
 68 * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance
 69 * @work: The work item.
 70 *
 71 * The CPPC driver register itself with the topology core to provide its own
 72 * implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which
 73 * gets called by the scheduler on every tick.
 74 *
 75 * Note that the arch specific counters have higher priority than CPPC counters,
 76 * if available, though the CPPC driver doesn't need to have any special
 77 * handling for that.
 78 *
 79 * On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we
 80 * reach here from hard-irq context), which then schedules a normal work item
 81 * and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable
 82 * based on the counter updates since the last tick.
 83 */
 84static void cppc_scale_freq_workfn(struct kthread_work *work)
 85{
 86	struct cppc_freq_invariance *cppc_fi;
 87	struct cppc_perf_fb_ctrs fb_ctrs = {0};
 88	struct cppc_cpudata *cpu_data;
 89	unsigned long local_freq_scale;
 90	u64 perf;
 91
 92	cppc_fi = container_of(work, struct cppc_freq_invariance, work);
 93	cpu_data = cppc_fi->cpu_data;
 94
 95	if (cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs)) {
 96		pr_warn("%s: failed to read perf counters\n", __func__);
 97		return;
 98	}
 99
100	perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs,
101				     &fb_ctrs);
102	if (!perf)
103		return;
104
105	cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
106
107	perf <<= SCHED_CAPACITY_SHIFT;
108	local_freq_scale = div64_u64(perf, cpu_data->perf_caps.highest_perf);
109
110	/* This can happen due to counter's overflow */
111	if (unlikely(local_freq_scale > 1024))
112		local_freq_scale = 1024;
113
114	per_cpu(arch_freq_scale, cppc_fi->cpu) = local_freq_scale;
115}
116
117static void cppc_irq_work(struct irq_work *irq_work)
118{
119	struct cppc_freq_invariance *cppc_fi;
120
121	cppc_fi = container_of(irq_work, struct cppc_freq_invariance, irq_work);
122	kthread_queue_work(kworker_fie, &cppc_fi->work);
123}
124
125static void cppc_scale_freq_tick(void)
126{
127	struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, smp_processor_id());
128
129	/*
130	 * cppc_get_perf_ctrs() can potentially sleep, call that from the right
131	 * context.
132	 */
133	irq_work_queue(&cppc_fi->irq_work);
134}
135
136static struct scale_freq_data cppc_sftd = {
137	.source = SCALE_FREQ_SOURCE_CPPC,
138	.set_freq_scale = cppc_scale_freq_tick,
139};
140
141static void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy)
142{
143	struct cppc_freq_invariance *cppc_fi;
144	int cpu, ret;
145
146	if (fie_disabled)
147		return;
148
149	for_each_cpu(cpu, policy->cpus) {
150		cppc_fi = &per_cpu(cppc_freq_inv, cpu);
151		cppc_fi->cpu = cpu;
152		cppc_fi->cpu_data = policy->driver_data;
153		kthread_init_work(&cppc_fi->work, cppc_scale_freq_workfn);
154		init_irq_work(&cppc_fi->irq_work, cppc_irq_work);
155
156		ret = cppc_get_perf_ctrs(cpu, &cppc_fi->prev_perf_fb_ctrs);
157		if (ret) {
158			pr_warn("%s: failed to read perf counters for cpu:%d: %d\n",
159				__func__, cpu, ret);
160
161			/*
162			 * Don't abort if the CPU was offline while the driver
163			 * was getting registered.
164			 */
165			if (cpu_online(cpu))
166				return;
167		}
168	}
169
170	/* Register for freq-invariance */
171	topology_set_scale_freq_source(&cppc_sftd, policy->cpus);
172}
173
174/*
175 * We free all the resources on policy's removal and not on CPU removal as the
176 * irq-work are per-cpu and the hotplug core takes care of flushing the pending
177 * irq-works (hint: smpcfd_dying_cpu()) on CPU hotplug. Even if the kthread-work
178 * fires on another CPU after the concerned CPU is removed, it won't harm.
179 *
180 * We just need to make sure to remove them all on policy->exit().
181 */
182static void cppc_cpufreq_cpu_fie_exit(struct cpufreq_policy *policy)
183{
184	struct cppc_freq_invariance *cppc_fi;
185	int cpu;
186
187	if (fie_disabled)
188		return;
189
190	/* policy->cpus will be empty here, use related_cpus instead */
191	topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, policy->related_cpus);
192
193	for_each_cpu(cpu, policy->related_cpus) {
194		cppc_fi = &per_cpu(cppc_freq_inv, cpu);
195		irq_work_sync(&cppc_fi->irq_work);
196		kthread_cancel_work_sync(&cppc_fi->work);
197	}
198}
199
200static void __init cppc_freq_invariance_init(void)
201{
202	struct sched_attr attr = {
203		.size		= sizeof(struct sched_attr),
204		.sched_policy	= SCHED_DEADLINE,
205		.sched_nice	= 0,
206		.sched_priority	= 0,
207		/*
208		 * Fake (unused) bandwidth; workaround to "fix"
209		 * priority inheritance.
210		 */
211		.sched_runtime	= NSEC_PER_MSEC,
212		.sched_deadline = 10 * NSEC_PER_MSEC,
213		.sched_period	= 10 * NSEC_PER_MSEC,
214	};
215	int ret;
216
217	if (fie_disabled != FIE_ENABLED && fie_disabled != FIE_DISABLED) {
218		fie_disabled = FIE_ENABLED;
219		if (cppc_perf_ctrs_in_pcc()) {
220			pr_info("FIE not enabled on systems with registers in PCC\n");
221			fie_disabled = FIE_DISABLED;
222		}
223	}
224
225	if (fie_disabled)
226		return;
227
228	kworker_fie = kthread_create_worker(0, "cppc_fie");
229	if (IS_ERR(kworker_fie)) {
230		pr_warn("%s: failed to create kworker_fie: %ld\n", __func__,
231			PTR_ERR(kworker_fie));
232		fie_disabled = FIE_DISABLED;
233		return;
234	}
235
236	ret = sched_setattr_nocheck(kworker_fie->task, &attr);
237	if (ret) {
238		pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__,
239			ret);
240		kthread_destroy_worker(kworker_fie);
241		fie_disabled = FIE_DISABLED;
242	}
243}
244
245static void cppc_freq_invariance_exit(void)
246{
247	if (fie_disabled)
248		return;
249
250	kthread_destroy_worker(kworker_fie);
 
251}
252
253#else
254static inline void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy)
255{
256}
257
258static inline void cppc_cpufreq_cpu_fie_exit(struct cpufreq_policy *policy)
259{
260}
261
262static inline void cppc_freq_invariance_init(void)
263{
264}
265
266static inline void cppc_freq_invariance_exit(void)
267{
268}
269#endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */
270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
272				   unsigned int target_freq,
273				   unsigned int relation)
 
274{
275	struct cppc_cpudata *cpu_data = policy->driver_data;
276	unsigned int cpu = policy->cpu;
277	struct cpufreq_freqs freqs;
 
278	int ret = 0;
279
280	cpu_data->perf_ctrls.desired_perf =
281			cppc_khz_to_perf(&cpu_data->perf_caps, target_freq);
 
 
 
 
282	freqs.old = policy->cur;
283	freqs.new = target_freq;
284
285	cpufreq_freq_transition_begin(policy, &freqs);
286	ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
287	cpufreq_freq_transition_end(policy, &freqs, ret != 0);
288
289	if (ret)
290		pr_debug("Failed to set target on CPU:%d. ret:%d\n",
291			 cpu, ret);
292
293	return ret;
294}
295
296static unsigned int cppc_cpufreq_fast_switch(struct cpufreq_policy *policy,
297					      unsigned int target_freq)
298{
299	struct cppc_cpudata *cpu_data = policy->driver_data;
300	unsigned int cpu = policy->cpu;
301	u32 desired_perf;
302	int ret;
303
304	desired_perf = cppc_khz_to_perf(&cpu_data->perf_caps, target_freq);
305	cpu_data->perf_ctrls.desired_perf = desired_perf;
306	ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
307
308	if (ret) {
309		pr_debug("Failed to set target on CPU:%d. ret:%d\n",
310			 cpu, ret);
311		return 0;
312	}
313
314	return target_freq;
315}
316
317static int cppc_verify_policy(struct cpufreq_policy_data *policy)
318{
319	cpufreq_verify_within_cpu_limits(policy);
320	return 0;
321}
322
323/*
324 * The PCC subspace describes the rate at which platform can accept commands
325 * on the shared PCC channel (including READs which do not count towards freq
326 * transition requests), so ideally we need to use the PCC values as a fallback
327 * if we don't have a platform specific transition_delay_us
328 */
329#ifdef CONFIG_ARM64
330#include <asm/cputype.h>
331
332static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
333{
334	unsigned long implementor = read_cpuid_implementor();
335	unsigned long part_num = read_cpuid_part_number();
336
337	switch (implementor) {
338	case ARM_CPU_IMP_QCOM:
339		switch (part_num) {
340		case QCOM_CPU_PART_FALKOR_V1:
341		case QCOM_CPU_PART_FALKOR:
342			return 10000;
343		}
344	}
345	return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
346}
347#else
348static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
349{
350	return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
351}
352#endif
353
354#if defined(CONFIG_ARM64) && defined(CONFIG_ENERGY_MODEL)
355
356static DEFINE_PER_CPU(unsigned int, efficiency_class);
357static void cppc_cpufreq_register_em(struct cpufreq_policy *policy);
358
359/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */
360#define CPPC_EM_CAP_STEP	(20)
361/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */
362#define CPPC_EM_COST_STEP	(1)
363/* Add a cost gap correspnding to the energy of 4 CPUs. */
364#define CPPC_EM_COST_GAP	(4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \
365				/ CPPC_EM_CAP_STEP)
366
367static unsigned int get_perf_level_count(struct cpufreq_policy *policy)
368{
369	struct cppc_perf_caps *perf_caps;
370	unsigned int min_cap, max_cap;
371	struct cppc_cpudata *cpu_data;
372	int cpu = policy->cpu;
373
374	cpu_data = policy->driver_data;
375	perf_caps = &cpu_data->perf_caps;
376	max_cap = arch_scale_cpu_capacity(cpu);
377	min_cap = div_u64((u64)max_cap * perf_caps->lowest_perf,
378			  perf_caps->highest_perf);
379	if ((min_cap == 0) || (max_cap < min_cap))
380		return 0;
381	return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP;
382}
383
384/*
385 * The cost is defined as:
386 *   cost = power * max_frequency / frequency
387 */
388static inline unsigned long compute_cost(int cpu, int step)
389{
390	return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) +
391			step * CPPC_EM_COST_STEP;
392}
393
394static int cppc_get_cpu_power(struct device *cpu_dev,
395		unsigned long *power, unsigned long *KHz)
396{
397	unsigned long perf_step, perf_prev, perf, perf_check;
398	unsigned int min_step, max_step, step, step_check;
399	unsigned long prev_freq = *KHz;
400	unsigned int min_cap, max_cap;
401	struct cpufreq_policy *policy;
402
403	struct cppc_perf_caps *perf_caps;
404	struct cppc_cpudata *cpu_data;
405
406	policy = cpufreq_cpu_get_raw(cpu_dev->id);
407	if (!policy)
408		return -EINVAL;
409
410	cpu_data = policy->driver_data;
411	perf_caps = &cpu_data->perf_caps;
412	max_cap = arch_scale_cpu_capacity(cpu_dev->id);
413	min_cap = div_u64((u64)max_cap * perf_caps->lowest_perf,
414			  perf_caps->highest_perf);
415	perf_step = div_u64((u64)CPPC_EM_CAP_STEP * perf_caps->highest_perf,
416			    max_cap);
417	min_step = min_cap / CPPC_EM_CAP_STEP;
418	max_step = max_cap / CPPC_EM_CAP_STEP;
419
420	perf_prev = cppc_khz_to_perf(perf_caps, *KHz);
421	step = perf_prev / perf_step;
422
423	if (step > max_step)
424		return -EINVAL;
425
426	if (min_step == max_step) {
427		step = max_step;
428		perf = perf_caps->highest_perf;
429	} else if (step < min_step) {
430		step = min_step;
431		perf = perf_caps->lowest_perf;
432	} else {
433		step++;
434		if (step == max_step)
435			perf = perf_caps->highest_perf;
436		else
437			perf = step * perf_step;
438	}
439
440	*KHz = cppc_perf_to_khz(perf_caps, perf);
441	perf_check = cppc_khz_to_perf(perf_caps, *KHz);
442	step_check = perf_check / perf_step;
443
444	/*
445	 * To avoid bad integer approximation, check that new frequency value
446	 * increased and that the new frequency will be converted to the
447	 * desired step value.
448	 */
449	while ((*KHz == prev_freq) || (step_check != step)) {
450		perf++;
451		*KHz = cppc_perf_to_khz(perf_caps, perf);
452		perf_check = cppc_khz_to_perf(perf_caps, *KHz);
453		step_check = perf_check / perf_step;
454	}
455
456	/*
457	 * With an artificial EM, only the cost value is used. Still the power
458	 * is populated such as 0 < power < EM_MAX_POWER. This allows to add
459	 * more sense to the artificial performance states.
460	 */
461	*power = compute_cost(cpu_dev->id, step);
462
463	return 0;
464}
465
466static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz,
467		unsigned long *cost)
468{
469	unsigned long perf_step, perf_prev;
470	struct cppc_perf_caps *perf_caps;
471	struct cpufreq_policy *policy;
472	struct cppc_cpudata *cpu_data;
473	unsigned int max_cap;
474	int step;
475
476	policy = cpufreq_cpu_get_raw(cpu_dev->id);
477	if (!policy)
478		return -EINVAL;
479
480	cpu_data = policy->driver_data;
481	perf_caps = &cpu_data->perf_caps;
482	max_cap = arch_scale_cpu_capacity(cpu_dev->id);
483
484	perf_prev = cppc_khz_to_perf(perf_caps, KHz);
485	perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
486	step = perf_prev / perf_step;
487
488	*cost = compute_cost(cpu_dev->id, step);
489
490	return 0;
491}
492
493static int populate_efficiency_class(void)
494{
495	struct acpi_madt_generic_interrupt *gicc;
496	DECLARE_BITMAP(used_classes, 256) = {};
497	int class, cpu, index;
498
499	for_each_possible_cpu(cpu) {
500		gicc = acpi_cpu_get_madt_gicc(cpu);
501		class = gicc->efficiency_class;
502		bitmap_set(used_classes, class, 1);
503	}
504
505	if (bitmap_weight(used_classes, 256) <= 1) {
506		pr_debug("Efficiency classes are all equal (=%d). "
507			"No EM registered", class);
508		return -EINVAL;
509	}
510
511	/*
512	 * Squeeze efficiency class values on [0:#efficiency_class-1].
513	 * Values are per spec in [0:255].
514	 */
515	index = 0;
516	for_each_set_bit(class, used_classes, 256) {
517		for_each_possible_cpu(cpu) {
518			gicc = acpi_cpu_get_madt_gicc(cpu);
519			if (gicc->efficiency_class == class)
520				per_cpu(efficiency_class, cpu) = index;
521		}
522		index++;
523	}
524	cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em;
525
526	return 0;
527}
528
529static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
530{
531	struct cppc_cpudata *cpu_data;
532	struct em_data_callback em_cb =
533		EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost);
534
535	cpu_data = policy->driver_data;
536	em_dev_register_perf_domain(get_cpu_device(policy->cpu),
537			get_perf_level_count(policy), &em_cb,
538			cpu_data->shared_cpu_map, 0);
539}
540
541#else
542static int populate_efficiency_class(void)
543{
544	return 0;
545}
546#endif
547
548static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu)
549{
550	struct cppc_cpudata *cpu_data;
551	int ret;
552
553	cpu_data = kzalloc(sizeof(struct cppc_cpudata), GFP_KERNEL);
554	if (!cpu_data)
555		goto out;
556
557	if (!zalloc_cpumask_var(&cpu_data->shared_cpu_map, GFP_KERNEL))
558		goto free_cpu;
559
560	ret = acpi_get_psd_map(cpu, cpu_data);
561	if (ret) {
562		pr_debug("Err parsing CPU%d PSD data: ret:%d\n", cpu, ret);
563		goto free_mask;
564	}
565
566	ret = cppc_get_perf_caps(cpu, &cpu_data->perf_caps);
567	if (ret) {
568		pr_debug("Err reading CPU%d perf caps: ret:%d\n", cpu, ret);
569		goto free_mask;
570	}
571
 
 
 
 
572	list_add(&cpu_data->node, &cpu_data_list);
573
574	return cpu_data;
575
576free_mask:
577	free_cpumask_var(cpu_data->shared_cpu_map);
578free_cpu:
579	kfree(cpu_data);
580out:
581	return NULL;
582}
583
584static void cppc_cpufreq_put_cpu_data(struct cpufreq_policy *policy)
585{
586	struct cppc_cpudata *cpu_data = policy->driver_data;
587
588	list_del(&cpu_data->node);
589	free_cpumask_var(cpu_data->shared_cpu_map);
590	kfree(cpu_data);
591	policy->driver_data = NULL;
592}
593
594static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
595{
596	unsigned int cpu = policy->cpu;
597	struct cppc_cpudata *cpu_data;
598	struct cppc_perf_caps *caps;
599	int ret;
600
601	cpu_data = cppc_cpufreq_get_cpu_data(cpu);
602	if (!cpu_data) {
603		pr_err("Error in acquiring _CPC/_PSD data for CPU%d.\n", cpu);
604		return -ENODEV;
605	}
606	caps = &cpu_data->perf_caps;
607	policy->driver_data = cpu_data;
608
609	/*
610	 * Set min to lowest nonlinear perf to avoid any efficiency penalty (see
611	 * Section 8.4.7.1.1.5 of ACPI 6.1 spec)
612	 */
613	policy->min = cppc_perf_to_khz(caps, caps->lowest_nonlinear_perf);
614	policy->max = cppc_perf_to_khz(caps, caps->nominal_perf);
 
 
615
616	/*
617	 * Set cpuinfo.min_freq to Lowest to make the full range of performance
618	 * available if userspace wants to use any perf between lowest & lowest
619	 * nonlinear perf
620	 */
621	policy->cpuinfo.min_freq = cppc_perf_to_khz(caps, caps->lowest_perf);
622	policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->nominal_perf);
 
 
623
624	policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu);
625	policy->shared_type = cpu_data->shared_type;
626
627	switch (policy->shared_type) {
628	case CPUFREQ_SHARED_TYPE_HW:
629	case CPUFREQ_SHARED_TYPE_NONE:
630		/* Nothing to be done - we'll have a policy for each CPU */
631		break;
632	case CPUFREQ_SHARED_TYPE_ANY:
633		/*
634		 * All CPUs in the domain will share a policy and all cpufreq
635		 * operations will use a single cppc_cpudata structure stored
636		 * in policy->driver_data.
637		 */
638		cpumask_copy(policy->cpus, cpu_data->shared_cpu_map);
639		break;
640	default:
641		pr_debug("Unsupported CPU co-ord type: %d\n",
642			 policy->shared_type);
643		ret = -EFAULT;
644		goto out;
645	}
646
647	policy->fast_switch_possible = cppc_allow_fast_switch();
648	policy->dvfs_possible_from_any_cpu = true;
649
650	/*
651	 * If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost
652	 * is supported.
653	 */
654	if (caps->highest_perf > caps->nominal_perf)
655		boost_supported = true;
656
657	/* Set policy->cur to max now. The governors will adjust later. */
658	policy->cur = cppc_perf_to_khz(caps, caps->highest_perf);
659	cpu_data->perf_ctrls.desired_perf =  caps->highest_perf;
660
661	ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
662	if (ret) {
663		pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
664			 caps->highest_perf, cpu, ret);
665		goto out;
666	}
667
668	cppc_cpufreq_cpu_fie_init(policy);
669	return 0;
670
671out:
672	cppc_cpufreq_put_cpu_data(policy);
673	return ret;
674}
675
676static void cppc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
677{
678	struct cppc_cpudata *cpu_data = policy->driver_data;
679	struct cppc_perf_caps *caps = &cpu_data->perf_caps;
680	unsigned int cpu = policy->cpu;
681	int ret;
682
683	cppc_cpufreq_cpu_fie_exit(policy);
684
685	cpu_data->perf_ctrls.desired_perf = caps->lowest_perf;
686
687	ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
688	if (ret)
689		pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
690			 caps->lowest_perf, cpu, ret);
691
692	cppc_cpufreq_put_cpu_data(policy);
 
693}
694
695static inline u64 get_delta(u64 t1, u64 t0)
696{
697	if (t1 > t0 || t0 > ~(u32)0)
698		return t1 - t0;
699
700	return (u32)t1 - (u32)t0;
701}
702
703static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
704				 struct cppc_perf_fb_ctrs *fb_ctrs_t0,
705				 struct cppc_perf_fb_ctrs *fb_ctrs_t1)
706{
707	u64 delta_reference, delta_delivered;
708	u64 reference_perf;
709
710	reference_perf = fb_ctrs_t0->reference_perf;
711
712	delta_reference = get_delta(fb_ctrs_t1->reference,
713				    fb_ctrs_t0->reference);
714	delta_delivered = get_delta(fb_ctrs_t1->delivered,
715				    fb_ctrs_t0->delivered);
716
717	/*
718	 * Avoid divide-by zero and unchanged feedback counters.
719	 * Leave it for callers to handle.
720	 */
721	if (!delta_reference || !delta_delivered)
722		return 0;
723
724	return (reference_perf * delta_delivered) / delta_reference;
725}
726
727static int cppc_get_perf_ctrs_sample(int cpu,
728				     struct cppc_perf_fb_ctrs *fb_ctrs_t0,
729				     struct cppc_perf_fb_ctrs *fb_ctrs_t1)
730{
731	int ret;
732
733	ret = cppc_get_perf_ctrs(cpu, fb_ctrs_t0);
734	if (ret)
735		return ret;
736
737	udelay(2); /* 2usec delay between sampling */
738
739	return cppc_get_perf_ctrs(cpu, fb_ctrs_t1);
740}
741
742static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
743{
744	struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
745	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
746	struct cppc_cpudata *cpu_data;
747	u64 delivered_perf;
748	int ret;
749
750	if (!policy)
751		return -ENODEV;
752
753	cpu_data = policy->driver_data;
 
 
754
755	cpufreq_cpu_put(policy);
756
757	ret = cppc_get_perf_ctrs_sample(cpu, &fb_ctrs_t0, &fb_ctrs_t1);
758	if (ret) {
759		if (ret == -EFAULT)
760			/* Any of the associated CPPC regs is 0. */
761			goto out_invalid_counters;
762		else
763			return 0;
764	}
765
766	delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
767					       &fb_ctrs_t1);
768	if (!delivered_perf)
769		goto out_invalid_counters;
770
771	return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
772
773out_invalid_counters:
774	/*
775	 * Feedback counters could be unchanged or 0 when a cpu enters a
776	 * low-power idle state, e.g. clock-gated or power-gated.
777	 * Use desired perf for reflecting frequency.  Get the latest register
778	 * value first as some platforms may update the actual delivered perf
779	 * there; if failed, resort to the cached desired perf.
780	 */
781	if (cppc_get_desired_perf(cpu, &delivered_perf))
782		delivered_perf = cpu_data->perf_ctrls.desired_perf;
783
784	return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
785}
786
787static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
788{
789	struct cppc_cpudata *cpu_data = policy->driver_data;
790	struct cppc_perf_caps *caps = &cpu_data->perf_caps;
791	int ret;
792
793	if (!boost_supported) {
794		pr_err("BOOST not supported by CPU or firmware\n");
795		return -EINVAL;
796	}
797
798	if (state)
799		policy->max = cppc_perf_to_khz(caps, caps->highest_perf);
 
800	else
801		policy->max = cppc_perf_to_khz(caps, caps->nominal_perf);
 
802	policy->cpuinfo.max_freq = policy->max;
803
804	ret = freq_qos_update_request(policy->max_freq_req, policy->max);
805	if (ret < 0)
806		return ret;
807
808	return 0;
809}
810
811static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
812{
813	struct cppc_cpudata *cpu_data = policy->driver_data;
814
815	return cpufreq_show_cpus(cpu_data->shared_cpu_map, buf);
816}
817cpufreq_freq_attr_ro(freqdomain_cpus);
818
819static struct freq_attr *cppc_cpufreq_attr[] = {
820	&freqdomain_cpus,
821	NULL,
822};
823
824static struct cpufreq_driver cppc_cpufreq_driver = {
825	.flags = CPUFREQ_CONST_LOOPS,
826	.verify = cppc_verify_policy,
827	.target = cppc_cpufreq_set_target,
828	.get = cppc_cpufreq_get_rate,
829	.fast_switch = cppc_cpufreq_fast_switch,
830	.init = cppc_cpufreq_cpu_init,
831	.exit = cppc_cpufreq_cpu_exit,
832	.set_boost = cppc_cpufreq_set_boost,
833	.attr = cppc_cpufreq_attr,
834	.name = "cppc_cpufreq",
835};
836
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
837static int __init cppc_cpufreq_init(void)
838{
839	int ret;
840
841	if (!acpi_cpc_valid())
842		return -ENODEV;
843
 
844	cppc_freq_invariance_init();
845	populate_efficiency_class();
846
847	ret = cpufreq_register_driver(&cppc_cpufreq_driver);
848	if (ret)
849		cppc_freq_invariance_exit();
850
851	return ret;
852}
853
854static inline void free_cpu_data(void)
855{
856	struct cppc_cpudata *iter, *tmp;
857
858	list_for_each_entry_safe(iter, tmp, &cpu_data_list, node) {
859		free_cpumask_var(iter->shared_cpu_map);
860		list_del(&iter->node);
861		kfree(iter);
862	}
863
864}
865
866static void __exit cppc_cpufreq_exit(void)
867{
868	cpufreq_unregister_driver(&cppc_cpufreq_driver);
869	cppc_freq_invariance_exit();
870
871	free_cpu_data();
872}
873
874module_exit(cppc_cpufreq_exit);
875MODULE_AUTHOR("Ashwin Chaugule");
876MODULE_DESCRIPTION("CPUFreq driver based on the ACPI CPPC v5.0+ spec");
877MODULE_LICENSE("GPL");
878
879late_initcall(cppc_cpufreq_init);
880
881static const struct acpi_device_id cppc_acpi_ids[] __used = {
882	{ACPI_PROCESSOR_DEVICE_HID, },
883	{}
884};
885
886MODULE_DEVICE_TABLE(acpi, cppc_acpi_ids);
v6.2
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * CPPC (Collaborative Processor Performance Control) driver for
   4 * interfacing with the CPUfreq layer and governors. See
   5 * cppc_acpi.c for CPPC specific methods.
   6 *
   7 * (C) Copyright 2014, 2015 Linaro Ltd.
   8 * Author: Ashwin Chaugule <ashwin.chaugule@linaro.org>
   9 */
  10
  11#define pr_fmt(fmt)	"CPPC Cpufreq:"	fmt
  12
  13#include <linux/arch_topology.h>
  14#include <linux/kernel.h>
  15#include <linux/module.h>
  16#include <linux/delay.h>
  17#include <linux/cpu.h>
  18#include <linux/cpufreq.h>
  19#include <linux/dmi.h>
  20#include <linux/irq_work.h>
  21#include <linux/kthread.h>
  22#include <linux/time.h>
  23#include <linux/vmalloc.h>
  24#include <uapi/linux/sched/types.h>
  25
  26#include <asm/unaligned.h>
  27
  28#include <acpi/cppc_acpi.h>
  29
  30/* Minimum struct length needed for the DMI processor entry we want */
  31#define DMI_ENTRY_PROCESSOR_MIN_LENGTH	48
  32
  33/* Offset in the DMI processor structure for the max frequency */
  34#define DMI_PROCESSOR_MAX_SPEED		0x14
  35
  36/*
  37 * This list contains information parsed from per CPU ACPI _CPC and _PSD
  38 * structures: e.g. the highest and lowest supported performance, capabilities,
  39 * desired performance, level requested etc. Depending on the share_type, not
  40 * all CPUs will have an entry in the list.
  41 */
  42static LIST_HEAD(cpu_data_list);
  43
  44static bool boost_supported;
  45
  46struct cppc_workaround_oem_info {
  47	char oem_id[ACPI_OEM_ID_SIZE + 1];
  48	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
  49	u32 oem_revision;
  50};
  51
  52static struct cppc_workaround_oem_info wa_info[] = {
  53	{
  54		.oem_id		= "HISI  ",
  55		.oem_table_id	= "HIP07   ",
  56		.oem_revision	= 0,
  57	}, {
  58		.oem_id		= "HISI  ",
  59		.oem_table_id	= "HIP08   ",
  60		.oem_revision	= 0,
  61	}
  62};
  63
  64static struct cpufreq_driver cppc_cpufreq_driver;
  65
 
  66static enum {
  67	FIE_UNSET = -1,
  68	FIE_ENABLED,
  69	FIE_DISABLED
  70} fie_disabled = FIE_UNSET;
  71
  72#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE
  73module_param(fie_disabled, int, 0444);
  74MODULE_PARM_DESC(fie_disabled, "Disable Frequency Invariance Engine (FIE)");
  75
  76/* Frequency invariance support */
  77struct cppc_freq_invariance {
  78	int cpu;
  79	struct irq_work irq_work;
  80	struct kthread_work work;
  81	struct cppc_perf_fb_ctrs prev_perf_fb_ctrs;
  82	struct cppc_cpudata *cpu_data;
  83};
  84
  85static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv);
  86static struct kthread_worker *kworker_fie;
  87
  88static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu);
  89static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
  90				 struct cppc_perf_fb_ctrs *fb_ctrs_t0,
  91				 struct cppc_perf_fb_ctrs *fb_ctrs_t1);
  92
  93/**
  94 * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance
  95 * @work: The work item.
  96 *
  97 * The CPPC driver register itself with the topology core to provide its own
  98 * implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which
  99 * gets called by the scheduler on every tick.
 100 *
 101 * Note that the arch specific counters have higher priority than CPPC counters,
 102 * if available, though the CPPC driver doesn't need to have any special
 103 * handling for that.
 104 *
 105 * On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we
 106 * reach here from hard-irq context), which then schedules a normal work item
 107 * and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable
 108 * based on the counter updates since the last tick.
 109 */
 110static void cppc_scale_freq_workfn(struct kthread_work *work)
 111{
 112	struct cppc_freq_invariance *cppc_fi;
 113	struct cppc_perf_fb_ctrs fb_ctrs = {0};
 114	struct cppc_cpudata *cpu_data;
 115	unsigned long local_freq_scale;
 116	u64 perf;
 117
 118	cppc_fi = container_of(work, struct cppc_freq_invariance, work);
 119	cpu_data = cppc_fi->cpu_data;
 120
 121	if (cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs)) {
 122		pr_warn("%s: failed to read perf counters\n", __func__);
 123		return;
 124	}
 125
 126	perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs,
 127				     &fb_ctrs);
 
 
 
 128	cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
 129
 130	perf <<= SCHED_CAPACITY_SHIFT;
 131	local_freq_scale = div64_u64(perf, cpu_data->perf_caps.highest_perf);
 132
 133	/* This can happen due to counter's overflow */
 134	if (unlikely(local_freq_scale > 1024))
 135		local_freq_scale = 1024;
 136
 137	per_cpu(arch_freq_scale, cppc_fi->cpu) = local_freq_scale;
 138}
 139
 140static void cppc_irq_work(struct irq_work *irq_work)
 141{
 142	struct cppc_freq_invariance *cppc_fi;
 143
 144	cppc_fi = container_of(irq_work, struct cppc_freq_invariance, irq_work);
 145	kthread_queue_work(kworker_fie, &cppc_fi->work);
 146}
 147
 148static void cppc_scale_freq_tick(void)
 149{
 150	struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, smp_processor_id());
 151
 152	/*
 153	 * cppc_get_perf_ctrs() can potentially sleep, call that from the right
 154	 * context.
 155	 */
 156	irq_work_queue(&cppc_fi->irq_work);
 157}
 158
 159static struct scale_freq_data cppc_sftd = {
 160	.source = SCALE_FREQ_SOURCE_CPPC,
 161	.set_freq_scale = cppc_scale_freq_tick,
 162};
 163
 164static void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy)
 165{
 166	struct cppc_freq_invariance *cppc_fi;
 167	int cpu, ret;
 168
 169	if (fie_disabled)
 170		return;
 171
 172	for_each_cpu(cpu, policy->cpus) {
 173		cppc_fi = &per_cpu(cppc_freq_inv, cpu);
 174		cppc_fi->cpu = cpu;
 175		cppc_fi->cpu_data = policy->driver_data;
 176		kthread_init_work(&cppc_fi->work, cppc_scale_freq_workfn);
 177		init_irq_work(&cppc_fi->irq_work, cppc_irq_work);
 178
 179		ret = cppc_get_perf_ctrs(cpu, &cppc_fi->prev_perf_fb_ctrs);
 180		if (ret) {
 181			pr_warn("%s: failed to read perf counters for cpu:%d: %d\n",
 182				__func__, cpu, ret);
 183
 184			/*
 185			 * Don't abort if the CPU was offline while the driver
 186			 * was getting registered.
 187			 */
 188			if (cpu_online(cpu))
 189				return;
 190		}
 191	}
 192
 193	/* Register for freq-invariance */
 194	topology_set_scale_freq_source(&cppc_sftd, policy->cpus);
 195}
 196
 197/*
 198 * We free all the resources on policy's removal and not on CPU removal as the
 199 * irq-work are per-cpu and the hotplug core takes care of flushing the pending
 200 * irq-works (hint: smpcfd_dying_cpu()) on CPU hotplug. Even if the kthread-work
 201 * fires on another CPU after the concerned CPU is removed, it won't harm.
 202 *
 203 * We just need to make sure to remove them all on policy->exit().
 204 */
 205static void cppc_cpufreq_cpu_fie_exit(struct cpufreq_policy *policy)
 206{
 207	struct cppc_freq_invariance *cppc_fi;
 208	int cpu;
 209
 210	if (fie_disabled)
 211		return;
 212
 213	/* policy->cpus will be empty here, use related_cpus instead */
 214	topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, policy->related_cpus);
 215
 216	for_each_cpu(cpu, policy->related_cpus) {
 217		cppc_fi = &per_cpu(cppc_freq_inv, cpu);
 218		irq_work_sync(&cppc_fi->irq_work);
 219		kthread_cancel_work_sync(&cppc_fi->work);
 220	}
 221}
 222
 223static void __init cppc_freq_invariance_init(void)
 224{
 225	struct sched_attr attr = {
 226		.size		= sizeof(struct sched_attr),
 227		.sched_policy	= SCHED_DEADLINE,
 228		.sched_nice	= 0,
 229		.sched_priority	= 0,
 230		/*
 231		 * Fake (unused) bandwidth; workaround to "fix"
 232		 * priority inheritance.
 233		 */
 234		.sched_runtime	= 1000000,
 235		.sched_deadline = 10000000,
 236		.sched_period	= 10000000,
 237	};
 238	int ret;
 239
 240	if (fie_disabled != FIE_ENABLED && fie_disabled != FIE_DISABLED) {
 241		fie_disabled = FIE_ENABLED;
 242		if (cppc_perf_ctrs_in_pcc()) {
 243			pr_info("FIE not enabled on systems with registers in PCC\n");
 244			fie_disabled = FIE_DISABLED;
 245		}
 246	}
 247
 248	if (fie_disabled)
 249		return;
 250
 251	kworker_fie = kthread_create_worker(0, "cppc_fie");
 252	if (IS_ERR(kworker_fie))
 
 
 
 253		return;
 
 254
 255	ret = sched_setattr_nocheck(kworker_fie->task, &attr);
 256	if (ret) {
 257		pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__,
 258			ret);
 259		kthread_destroy_worker(kworker_fie);
 260		return;
 261	}
 262}
 263
 264static void cppc_freq_invariance_exit(void)
 265{
 266	if (fie_disabled)
 267		return;
 268
 269	kthread_destroy_worker(kworker_fie);
 270	kworker_fie = NULL;
 271}
 272
 273#else
 274static inline void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy)
 275{
 276}
 277
 278static inline void cppc_cpufreq_cpu_fie_exit(struct cpufreq_policy *policy)
 279{
 280}
 281
 282static inline void cppc_freq_invariance_init(void)
 283{
 284}
 285
 286static inline void cppc_freq_invariance_exit(void)
 287{
 288}
 289#endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */
 290
 291/* Callback function used to retrieve the max frequency from DMI */
 292static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private)
 293{
 294	const u8 *dmi_data = (const u8 *)dm;
 295	u16 *mhz = (u16 *)private;
 296
 297	if (dm->type == DMI_ENTRY_PROCESSOR &&
 298	    dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) {
 299		u16 val = (u16)get_unaligned((const u16 *)
 300				(dmi_data + DMI_PROCESSOR_MAX_SPEED));
 301		*mhz = val > *mhz ? val : *mhz;
 302	}
 303}
 304
 305/* Look up the max frequency in DMI */
 306static u64 cppc_get_dmi_max_khz(void)
 307{
 308	u16 mhz = 0;
 309
 310	dmi_walk(cppc_find_dmi_mhz, &mhz);
 311
 312	/*
 313	 * Real stupid fallback value, just in case there is no
 314	 * actual value set.
 315	 */
 316	mhz = mhz ? mhz : 1;
 317
 318	return (1000 * mhz);
 319}
 320
 321/*
 322 * If CPPC lowest_freq and nominal_freq registers are exposed then we can
 323 * use them to convert perf to freq and vice versa. The conversion is
 324 * extrapolated as an affine function passing by the 2 points:
 325 *  - (Low perf, Low freq)
 326 *  - (Nominal perf, Nominal perf)
 327 */
 328static unsigned int cppc_cpufreq_perf_to_khz(struct cppc_cpudata *cpu_data,
 329					     unsigned int perf)
 330{
 331	struct cppc_perf_caps *caps = &cpu_data->perf_caps;
 332	s64 retval, offset = 0;
 333	static u64 max_khz;
 334	u64 mul, div;
 335
 336	if (caps->lowest_freq && caps->nominal_freq) {
 337		mul = caps->nominal_freq - caps->lowest_freq;
 338		div = caps->nominal_perf - caps->lowest_perf;
 339		offset = caps->nominal_freq - div64_u64(caps->nominal_perf * mul, div);
 340	} else {
 341		if (!max_khz)
 342			max_khz = cppc_get_dmi_max_khz();
 343		mul = max_khz;
 344		div = caps->highest_perf;
 345	}
 346
 347	retval = offset + div64_u64(perf * mul, div);
 348	if (retval >= 0)
 349		return retval;
 350	return 0;
 351}
 352
 353static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu_data,
 354					     unsigned int freq)
 355{
 356	struct cppc_perf_caps *caps = &cpu_data->perf_caps;
 357	s64 retval, offset = 0;
 358	static u64 max_khz;
 359	u64  mul, div;
 360
 361	if (caps->lowest_freq && caps->nominal_freq) {
 362		mul = caps->nominal_perf - caps->lowest_perf;
 363		div = caps->nominal_freq - caps->lowest_freq;
 364		offset = caps->nominal_perf - div64_u64(caps->nominal_freq * mul, div);
 365	} else {
 366		if (!max_khz)
 367			max_khz = cppc_get_dmi_max_khz();
 368		mul = caps->highest_perf;
 369		div = max_khz;
 370	}
 371
 372	retval = offset + div64_u64(freq * mul, div);
 373	if (retval >= 0)
 374		return retval;
 375	return 0;
 376}
 377
 378static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
 379				   unsigned int target_freq,
 380				   unsigned int relation)
 381
 382{
 383	struct cppc_cpudata *cpu_data = policy->driver_data;
 384	unsigned int cpu = policy->cpu;
 385	struct cpufreq_freqs freqs;
 386	u32 desired_perf;
 387	int ret = 0;
 388
 389	desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq);
 390	/* Return if it is exactly the same perf */
 391	if (desired_perf == cpu_data->perf_ctrls.desired_perf)
 392		return ret;
 393
 394	cpu_data->perf_ctrls.desired_perf = desired_perf;
 395	freqs.old = policy->cur;
 396	freqs.new = target_freq;
 397
 398	cpufreq_freq_transition_begin(policy, &freqs);
 399	ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
 400	cpufreq_freq_transition_end(policy, &freqs, ret != 0);
 401
 402	if (ret)
 403		pr_debug("Failed to set target on CPU:%d. ret:%d\n",
 404			 cpu, ret);
 405
 406	return ret;
 407}
 408
 409static unsigned int cppc_cpufreq_fast_switch(struct cpufreq_policy *policy,
 410					      unsigned int target_freq)
 411{
 412	struct cppc_cpudata *cpu_data = policy->driver_data;
 413	unsigned int cpu = policy->cpu;
 414	u32 desired_perf;
 415	int ret;
 416
 417	desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq);
 418	cpu_data->perf_ctrls.desired_perf = desired_perf;
 419	ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
 420
 421	if (ret) {
 422		pr_debug("Failed to set target on CPU:%d. ret:%d\n",
 423			 cpu, ret);
 424		return 0;
 425	}
 426
 427	return target_freq;
 428}
 429
 430static int cppc_verify_policy(struct cpufreq_policy_data *policy)
 431{
 432	cpufreq_verify_within_cpu_limits(policy);
 433	return 0;
 434}
 435
 436/*
 437 * The PCC subspace describes the rate at which platform can accept commands
 438 * on the shared PCC channel (including READs which do not count towards freq
 439 * transition requests), so ideally we need to use the PCC values as a fallback
 440 * if we don't have a platform specific transition_delay_us
 441 */
 442#ifdef CONFIG_ARM64
 443#include <asm/cputype.h>
 444
 445static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
 446{
 447	unsigned long implementor = read_cpuid_implementor();
 448	unsigned long part_num = read_cpuid_part_number();
 449
 450	switch (implementor) {
 451	case ARM_CPU_IMP_QCOM:
 452		switch (part_num) {
 453		case QCOM_CPU_PART_FALKOR_V1:
 454		case QCOM_CPU_PART_FALKOR:
 455			return 10000;
 456		}
 457	}
 458	return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
 459}
 460#else
 461static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
 462{
 463	return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
 464}
 465#endif
 466
 467#if defined(CONFIG_ARM64) && defined(CONFIG_ENERGY_MODEL)
 468
 469static DEFINE_PER_CPU(unsigned int, efficiency_class);
 470static void cppc_cpufreq_register_em(struct cpufreq_policy *policy);
 471
 472/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */
 473#define CPPC_EM_CAP_STEP	(20)
 474/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */
 475#define CPPC_EM_COST_STEP	(1)
 476/* Add a cost gap correspnding to the energy of 4 CPUs. */
 477#define CPPC_EM_COST_GAP	(4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \
 478				/ CPPC_EM_CAP_STEP)
 479
 480static unsigned int get_perf_level_count(struct cpufreq_policy *policy)
 481{
 482	struct cppc_perf_caps *perf_caps;
 483	unsigned int min_cap, max_cap;
 484	struct cppc_cpudata *cpu_data;
 485	int cpu = policy->cpu;
 486
 487	cpu_data = policy->driver_data;
 488	perf_caps = &cpu_data->perf_caps;
 489	max_cap = arch_scale_cpu_capacity(cpu);
 490	min_cap = div_u64((u64)max_cap * perf_caps->lowest_perf,
 491			  perf_caps->highest_perf);
 492	if ((min_cap == 0) || (max_cap < min_cap))
 493		return 0;
 494	return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP;
 495}
 496
 497/*
 498 * The cost is defined as:
 499 *   cost = power * max_frequency / frequency
 500 */
 501static inline unsigned long compute_cost(int cpu, int step)
 502{
 503	return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) +
 504			step * CPPC_EM_COST_STEP;
 505}
 506
 507static int cppc_get_cpu_power(struct device *cpu_dev,
 508		unsigned long *power, unsigned long *KHz)
 509{
 510	unsigned long perf_step, perf_prev, perf, perf_check;
 511	unsigned int min_step, max_step, step, step_check;
 512	unsigned long prev_freq = *KHz;
 513	unsigned int min_cap, max_cap;
 514	struct cpufreq_policy *policy;
 515
 516	struct cppc_perf_caps *perf_caps;
 517	struct cppc_cpudata *cpu_data;
 518
 519	policy = cpufreq_cpu_get_raw(cpu_dev->id);
 
 
 
 520	cpu_data = policy->driver_data;
 521	perf_caps = &cpu_data->perf_caps;
 522	max_cap = arch_scale_cpu_capacity(cpu_dev->id);
 523	min_cap = div_u64((u64)max_cap * perf_caps->lowest_perf,
 524			  perf_caps->highest_perf);
 525	perf_step = div_u64((u64)CPPC_EM_CAP_STEP * perf_caps->highest_perf,
 526			    max_cap);
 527	min_step = min_cap / CPPC_EM_CAP_STEP;
 528	max_step = max_cap / CPPC_EM_CAP_STEP;
 529
 530	perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
 531	step = perf_prev / perf_step;
 532
 533	if (step > max_step)
 534		return -EINVAL;
 535
 536	if (min_step == max_step) {
 537		step = max_step;
 538		perf = perf_caps->highest_perf;
 539	} else if (step < min_step) {
 540		step = min_step;
 541		perf = perf_caps->lowest_perf;
 542	} else {
 543		step++;
 544		if (step == max_step)
 545			perf = perf_caps->highest_perf;
 546		else
 547			perf = step * perf_step;
 548	}
 549
 550	*KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
 551	perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
 552	step_check = perf_check / perf_step;
 553
 554	/*
 555	 * To avoid bad integer approximation, check that new frequency value
 556	 * increased and that the new frequency will be converted to the
 557	 * desired step value.
 558	 */
 559	while ((*KHz == prev_freq) || (step_check != step)) {
 560		perf++;
 561		*KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
 562		perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
 563		step_check = perf_check / perf_step;
 564	}
 565
 566	/*
 567	 * With an artificial EM, only the cost value is used. Still the power
 568	 * is populated such as 0 < power < EM_MAX_POWER. This allows to add
 569	 * more sense to the artificial performance states.
 570	 */
 571	*power = compute_cost(cpu_dev->id, step);
 572
 573	return 0;
 574}
 575
 576static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz,
 577		unsigned long *cost)
 578{
 579	unsigned long perf_step, perf_prev;
 580	struct cppc_perf_caps *perf_caps;
 581	struct cpufreq_policy *policy;
 582	struct cppc_cpudata *cpu_data;
 583	unsigned int max_cap;
 584	int step;
 585
 586	policy = cpufreq_cpu_get_raw(cpu_dev->id);
 
 
 
 587	cpu_data = policy->driver_data;
 588	perf_caps = &cpu_data->perf_caps;
 589	max_cap = arch_scale_cpu_capacity(cpu_dev->id);
 590
 591	perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz);
 592	perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
 593	step = perf_prev / perf_step;
 594
 595	*cost = compute_cost(cpu_dev->id, step);
 596
 597	return 0;
 598}
 599
 600static int populate_efficiency_class(void)
 601{
 602	struct acpi_madt_generic_interrupt *gicc;
 603	DECLARE_BITMAP(used_classes, 256) = {};
 604	int class, cpu, index;
 605
 606	for_each_possible_cpu(cpu) {
 607		gicc = acpi_cpu_get_madt_gicc(cpu);
 608		class = gicc->efficiency_class;
 609		bitmap_set(used_classes, class, 1);
 610	}
 611
 612	if (bitmap_weight(used_classes, 256) <= 1) {
 613		pr_debug("Efficiency classes are all equal (=%d). "
 614			"No EM registered", class);
 615		return -EINVAL;
 616	}
 617
 618	/*
 619	 * Squeeze efficiency class values on [0:#efficiency_class-1].
 620	 * Values are per spec in [0:255].
 621	 */
 622	index = 0;
 623	for_each_set_bit(class, used_classes, 256) {
 624		for_each_possible_cpu(cpu) {
 625			gicc = acpi_cpu_get_madt_gicc(cpu);
 626			if (gicc->efficiency_class == class)
 627				per_cpu(efficiency_class, cpu) = index;
 628		}
 629		index++;
 630	}
 631	cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em;
 632
 633	return 0;
 634}
 635
 636static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
 637{
 638	struct cppc_cpudata *cpu_data;
 639	struct em_data_callback em_cb =
 640		EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost);
 641
 642	cpu_data = policy->driver_data;
 643	em_dev_register_perf_domain(get_cpu_device(policy->cpu),
 644			get_perf_level_count(policy), &em_cb,
 645			cpu_data->shared_cpu_map, 0);
 646}
 647
 648#else
 649static int populate_efficiency_class(void)
 650{
 651	return 0;
 652}
 653#endif
 654
 655static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu)
 656{
 657	struct cppc_cpudata *cpu_data;
 658	int ret;
 659
 660	cpu_data = kzalloc(sizeof(struct cppc_cpudata), GFP_KERNEL);
 661	if (!cpu_data)
 662		goto out;
 663
 664	if (!zalloc_cpumask_var(&cpu_data->shared_cpu_map, GFP_KERNEL))
 665		goto free_cpu;
 666
 667	ret = acpi_get_psd_map(cpu, cpu_data);
 668	if (ret) {
 669		pr_debug("Err parsing CPU%d PSD data: ret:%d\n", cpu, ret);
 670		goto free_mask;
 671	}
 672
 673	ret = cppc_get_perf_caps(cpu, &cpu_data->perf_caps);
 674	if (ret) {
 675		pr_debug("Err reading CPU%d perf caps: ret:%d\n", cpu, ret);
 676		goto free_mask;
 677	}
 678
 679	/* Convert the lowest and nominal freq from MHz to KHz */
 680	cpu_data->perf_caps.lowest_freq *= 1000;
 681	cpu_data->perf_caps.nominal_freq *= 1000;
 682
 683	list_add(&cpu_data->node, &cpu_data_list);
 684
 685	return cpu_data;
 686
 687free_mask:
 688	free_cpumask_var(cpu_data->shared_cpu_map);
 689free_cpu:
 690	kfree(cpu_data);
 691out:
 692	return NULL;
 693}
 694
 695static void cppc_cpufreq_put_cpu_data(struct cpufreq_policy *policy)
 696{
 697	struct cppc_cpudata *cpu_data = policy->driver_data;
 698
 699	list_del(&cpu_data->node);
 700	free_cpumask_var(cpu_data->shared_cpu_map);
 701	kfree(cpu_data);
 702	policy->driver_data = NULL;
 703}
 704
 705static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 706{
 707	unsigned int cpu = policy->cpu;
 708	struct cppc_cpudata *cpu_data;
 709	struct cppc_perf_caps *caps;
 710	int ret;
 711
 712	cpu_data = cppc_cpufreq_get_cpu_data(cpu);
 713	if (!cpu_data) {
 714		pr_err("Error in acquiring _CPC/_PSD data for CPU%d.\n", cpu);
 715		return -ENODEV;
 716	}
 717	caps = &cpu_data->perf_caps;
 718	policy->driver_data = cpu_data;
 719
 720	/*
 721	 * Set min to lowest nonlinear perf to avoid any efficiency penalty (see
 722	 * Section 8.4.7.1.1.5 of ACPI 6.1 spec)
 723	 */
 724	policy->min = cppc_cpufreq_perf_to_khz(cpu_data,
 725					       caps->lowest_nonlinear_perf);
 726	policy->max = cppc_cpufreq_perf_to_khz(cpu_data,
 727					       caps->nominal_perf);
 728
 729	/*
 730	 * Set cpuinfo.min_freq to Lowest to make the full range of performance
 731	 * available if userspace wants to use any perf between lowest & lowest
 732	 * nonlinear perf
 733	 */
 734	policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu_data,
 735							    caps->lowest_perf);
 736	policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu_data,
 737							    caps->nominal_perf);
 738
 739	policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu);
 740	policy->shared_type = cpu_data->shared_type;
 741
 742	switch (policy->shared_type) {
 743	case CPUFREQ_SHARED_TYPE_HW:
 744	case CPUFREQ_SHARED_TYPE_NONE:
 745		/* Nothing to be done - we'll have a policy for each CPU */
 746		break;
 747	case CPUFREQ_SHARED_TYPE_ANY:
 748		/*
 749		 * All CPUs in the domain will share a policy and all cpufreq
 750		 * operations will use a single cppc_cpudata structure stored
 751		 * in policy->driver_data.
 752		 */
 753		cpumask_copy(policy->cpus, cpu_data->shared_cpu_map);
 754		break;
 755	default:
 756		pr_debug("Unsupported CPU co-ord type: %d\n",
 757			 policy->shared_type);
 758		ret = -EFAULT;
 759		goto out;
 760	}
 761
 762	policy->fast_switch_possible = cppc_allow_fast_switch();
 763	policy->dvfs_possible_from_any_cpu = true;
 764
 765	/*
 766	 * If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost
 767	 * is supported.
 768	 */
 769	if (caps->highest_perf > caps->nominal_perf)
 770		boost_supported = true;
 771
 772	/* Set policy->cur to max now. The governors will adjust later. */
 773	policy->cur = cppc_cpufreq_perf_to_khz(cpu_data, caps->highest_perf);
 774	cpu_data->perf_ctrls.desired_perf =  caps->highest_perf;
 775
 776	ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
 777	if (ret) {
 778		pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
 779			 caps->highest_perf, cpu, ret);
 780		goto out;
 781	}
 782
 783	cppc_cpufreq_cpu_fie_init(policy);
 784	return 0;
 785
 786out:
 787	cppc_cpufreq_put_cpu_data(policy);
 788	return ret;
 789}
 790
 791static int cppc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 792{
 793	struct cppc_cpudata *cpu_data = policy->driver_data;
 794	struct cppc_perf_caps *caps = &cpu_data->perf_caps;
 795	unsigned int cpu = policy->cpu;
 796	int ret;
 797
 798	cppc_cpufreq_cpu_fie_exit(policy);
 799
 800	cpu_data->perf_ctrls.desired_perf = caps->lowest_perf;
 801
 802	ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
 803	if (ret)
 804		pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
 805			 caps->lowest_perf, cpu, ret);
 806
 807	cppc_cpufreq_put_cpu_data(policy);
 808	return 0;
 809}
 810
 811static inline u64 get_delta(u64 t1, u64 t0)
 812{
 813	if (t1 > t0 || t0 > ~(u32)0)
 814		return t1 - t0;
 815
 816	return (u32)t1 - (u32)t0;
 817}
 818
 819static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
 820				 struct cppc_perf_fb_ctrs *fb_ctrs_t0,
 821				 struct cppc_perf_fb_ctrs *fb_ctrs_t1)
 822{
 823	u64 delta_reference, delta_delivered;
 824	u64 reference_perf;
 825
 826	reference_perf = fb_ctrs_t0->reference_perf;
 827
 828	delta_reference = get_delta(fb_ctrs_t1->reference,
 829				    fb_ctrs_t0->reference);
 830	delta_delivered = get_delta(fb_ctrs_t1->delivered,
 831				    fb_ctrs_t0->delivered);
 832
 833	/* Check to avoid divide-by zero and invalid delivered_perf */
 
 
 
 834	if (!delta_reference || !delta_delivered)
 835		return cpu_data->perf_ctrls.desired_perf;
 836
 837	return (reference_perf * delta_delivered) / delta_reference;
 838}
 839
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 840static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
 841{
 842	struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
 843	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
 844	struct cppc_cpudata *cpu_data = policy->driver_data;
 845	u64 delivered_perf;
 846	int ret;
 847
 848	cpufreq_cpu_put(policy);
 
 849
 850	ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
 851	if (ret)
 852		return ret;
 853
 854	udelay(2); /* 2usec delay between sampling */
 855
 856	ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
 857	if (ret)
 858		return ret;
 
 
 
 
 
 859
 860	delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
 861					       &fb_ctrs_t1);
 
 
 862
 863	return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
 
 
 
 
 
 
 
 
 
 
 
 
 
 864}
 865
 866static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
 867{
 868	struct cppc_cpudata *cpu_data = policy->driver_data;
 869	struct cppc_perf_caps *caps = &cpu_data->perf_caps;
 870	int ret;
 871
 872	if (!boost_supported) {
 873		pr_err("BOOST not supported by CPU or firmware\n");
 874		return -EINVAL;
 875	}
 876
 877	if (state)
 878		policy->max = cppc_cpufreq_perf_to_khz(cpu_data,
 879						       caps->highest_perf);
 880	else
 881		policy->max = cppc_cpufreq_perf_to_khz(cpu_data,
 882						       caps->nominal_perf);
 883	policy->cpuinfo.max_freq = policy->max;
 884
 885	ret = freq_qos_update_request(policy->max_freq_req, policy->max);
 886	if (ret < 0)
 887		return ret;
 888
 889	return 0;
 890}
 891
 892static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
 893{
 894	struct cppc_cpudata *cpu_data = policy->driver_data;
 895
 896	return cpufreq_show_cpus(cpu_data->shared_cpu_map, buf);
 897}
 898cpufreq_freq_attr_ro(freqdomain_cpus);
 899
 900static struct freq_attr *cppc_cpufreq_attr[] = {
 901	&freqdomain_cpus,
 902	NULL,
 903};
 904
 905static struct cpufreq_driver cppc_cpufreq_driver = {
 906	.flags = CPUFREQ_CONST_LOOPS,
 907	.verify = cppc_verify_policy,
 908	.target = cppc_cpufreq_set_target,
 909	.get = cppc_cpufreq_get_rate,
 910	.fast_switch = cppc_cpufreq_fast_switch,
 911	.init = cppc_cpufreq_cpu_init,
 912	.exit = cppc_cpufreq_cpu_exit,
 913	.set_boost = cppc_cpufreq_set_boost,
 914	.attr = cppc_cpufreq_attr,
 915	.name = "cppc_cpufreq",
 916};
 917
 918/*
 919 * HISI platform does not support delivered performance counter and
 920 * reference performance counter. It can calculate the performance using the
 921 * platform specific mechanism. We reuse the desired performance register to
 922 * store the real performance calculated by the platform.
 923 */
 924static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu)
 925{
 926	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
 927	struct cppc_cpudata *cpu_data = policy->driver_data;
 928	u64 desired_perf;
 929	int ret;
 930
 931	cpufreq_cpu_put(policy);
 932
 933	ret = cppc_get_desired_perf(cpu, &desired_perf);
 934	if (ret < 0)
 935		return -EIO;
 936
 937	return cppc_cpufreq_perf_to_khz(cpu_data, desired_perf);
 938}
 939
 940static void cppc_check_hisi_workaround(void)
 941{
 942	struct acpi_table_header *tbl;
 943	acpi_status status = AE_OK;
 944	int i;
 945
 946	status = acpi_get_table(ACPI_SIG_PCCT, 0, &tbl);
 947	if (ACPI_FAILURE(status) || !tbl)
 948		return;
 949
 950	for (i = 0; i < ARRAY_SIZE(wa_info); i++) {
 951		if (!memcmp(wa_info[i].oem_id, tbl->oem_id, ACPI_OEM_ID_SIZE) &&
 952		    !memcmp(wa_info[i].oem_table_id, tbl->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
 953		    wa_info[i].oem_revision == tbl->oem_revision) {
 954			/* Overwrite the get() callback */
 955			cppc_cpufreq_driver.get = hisi_cppc_cpufreq_get_rate;
 956			fie_disabled = FIE_DISABLED;
 957			break;
 958		}
 959	}
 960
 961	acpi_put_table(tbl);
 962}
 963
 964static int __init cppc_cpufreq_init(void)
 965{
 966	int ret;
 967
 968	if (!acpi_cpc_valid())
 969		return -ENODEV;
 970
 971	cppc_check_hisi_workaround();
 972	cppc_freq_invariance_init();
 973	populate_efficiency_class();
 974
 975	ret = cpufreq_register_driver(&cppc_cpufreq_driver);
 976	if (ret)
 977		cppc_freq_invariance_exit();
 978
 979	return ret;
 980}
 981
 982static inline void free_cpu_data(void)
 983{
 984	struct cppc_cpudata *iter, *tmp;
 985
 986	list_for_each_entry_safe(iter, tmp, &cpu_data_list, node) {
 987		free_cpumask_var(iter->shared_cpu_map);
 988		list_del(&iter->node);
 989		kfree(iter);
 990	}
 991
 992}
 993
 994static void __exit cppc_cpufreq_exit(void)
 995{
 996	cpufreq_unregister_driver(&cppc_cpufreq_driver);
 997	cppc_freq_invariance_exit();
 998
 999	free_cpu_data();
1000}
1001
1002module_exit(cppc_cpufreq_exit);
1003MODULE_AUTHOR("Ashwin Chaugule");
1004MODULE_DESCRIPTION("CPUFreq driver based on the ACPI CPPC v5.0+ spec");
1005MODULE_LICENSE("GPL");
1006
1007late_initcall(cppc_cpufreq_init);
1008
1009static const struct acpi_device_id cppc_acpi_ids[] __used = {
1010	{ACPI_PROCESSOR_DEVICE_HID, },
1011	{}
1012};
1013
1014MODULE_DEVICE_TABLE(acpi, cppc_acpi_ids);