Loading...
Note: File does not exist in v3.1.
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * amd-pstate.c - AMD Processor P-state Frequency Driver
4 *
5 * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
6 *
7 * Author: Huang Rui <ray.huang@amd.com>
8 *
9 * AMD P-State introduces a new CPU performance scaling design for AMD
10 * processors using the ACPI Collaborative Performance and Power Control (CPPC)
11 * feature which works with the AMD SMU firmware providing a finer grained
12 * frequency control range. It is to replace the legacy ACPI P-States control,
13 * allows a flexible, low-latency interface for the Linux kernel to directly
14 * communicate the performance hints to hardware.
15 *
16 * AMD P-State is supported on recent AMD Zen base CPU series include some of
17 * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD
18 * P-State supported system. And there are two types of hardware implementations
19 * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution.
20 * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types.
21 */
22
23#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
24
25#include <linux/kernel.h>
26#include <linux/module.h>
27#include <linux/init.h>
28#include <linux/smp.h>
29#include <linux/sched.h>
30#include <linux/cpufreq.h>
31#include <linux/compiler.h>
32#include <linux/dmi.h>
33#include <linux/slab.h>
34#include <linux/acpi.h>
35#include <linux/io.h>
36#include <linux/delay.h>
37#include <linux/uaccess.h>
38#include <linux/static_call.h>
39#include <linux/amd-pstate.h>
40#include <linux/topology.h>
41
42#include <acpi/processor.h>
43#include <acpi/cppc_acpi.h>
44
45#include <asm/msr.h>
46#include <asm/processor.h>
47#include <asm/cpufeature.h>
48#include <asm/cpu_device_id.h>
49#include "amd-pstate-trace.h"
50
51#define AMD_PSTATE_TRANSITION_LATENCY 20000
52#define AMD_PSTATE_TRANSITION_DELAY 1000
53#define CPPC_HIGHEST_PERF_PERFORMANCE 196
54#define CPPC_HIGHEST_PERF_DEFAULT 166
55
56/*
57 * TODO: We need more time to fine tune processors with shared memory solution
58 * with community together.
59 *
60 * There are some performance drops on the CPU benchmarks which reports from
61 * Suse. We are co-working with them to fine tune the shared memory solution. So
62 * we disable it by default to go acpi-cpufreq on these processors and add a
63 * module parameter to be able to enable it manually for debugging.
64 */
65static struct cpufreq_driver *current_pstate_driver;
66static struct cpufreq_driver amd_pstate_driver;
67static struct cpufreq_driver amd_pstate_epp_driver;
68static int cppc_state = AMD_PSTATE_UNDEFINED;
69static bool cppc_enabled;
70static bool amd_pstate_prefcore = true;
71
72/*
73 * AMD Energy Preference Performance (EPP)
74 * The EPP is used in the CCLK DPM controller to drive
75 * the frequency that a core is going to operate during
76 * short periods of activity. EPP values will be utilized for
77 * different OS profiles (balanced, performance, power savings)
78 * display strings corresponding to EPP index in the
79 * energy_perf_strings[]
80 * index String
81 *-------------------------------------
82 * 0 default
83 * 1 performance
84 * 2 balance_performance
85 * 3 balance_power
86 * 4 power
87 */
88enum energy_perf_value_index {
89 EPP_INDEX_DEFAULT = 0,
90 EPP_INDEX_PERFORMANCE,
91 EPP_INDEX_BALANCE_PERFORMANCE,
92 EPP_INDEX_BALANCE_POWERSAVE,
93 EPP_INDEX_POWERSAVE,
94};
95
96static const char * const energy_perf_strings[] = {
97 [EPP_INDEX_DEFAULT] = "default",
98 [EPP_INDEX_PERFORMANCE] = "performance",
99 [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
100 [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
101 [EPP_INDEX_POWERSAVE] = "power",
102 NULL
103};
104
105static unsigned int epp_values[] = {
106 [EPP_INDEX_DEFAULT] = 0,
107 [EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE,
108 [EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE,
109 [EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE,
110 [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE,
111 };
112
113typedef int (*cppc_mode_transition_fn)(int);
114
115static inline int get_mode_idx_from_str(const char *str, size_t size)
116{
117 int i;
118
119 for (i=0; i < AMD_PSTATE_MAX; i++) {
120 if (!strncmp(str, amd_pstate_mode_string[i], size))
121 return i;
122 }
123 return -EINVAL;
124}
125
126static DEFINE_MUTEX(amd_pstate_limits_lock);
127static DEFINE_MUTEX(amd_pstate_driver_lock);
128
129static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
130{
131 u64 epp;
132 int ret;
133
134 if (boot_cpu_has(X86_FEATURE_CPPC)) {
135 if (!cppc_req_cached) {
136 epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
137 &cppc_req_cached);
138 if (epp)
139 return epp;
140 }
141 epp = (cppc_req_cached >> 24) & 0xFF;
142 } else {
143 ret = cppc_get_epp_perf(cpudata->cpu, &epp);
144 if (ret < 0) {
145 pr_debug("Could not retrieve energy perf value (%d)\n", ret);
146 return -EIO;
147 }
148 }
149
150 return (s16)(epp & 0xff);
151}
152
153static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
154{
155 s16 epp;
156 int index = -EINVAL;
157
158 epp = amd_pstate_get_epp(cpudata, 0);
159 if (epp < 0)
160 return epp;
161
162 switch (epp) {
163 case AMD_CPPC_EPP_PERFORMANCE:
164 index = EPP_INDEX_PERFORMANCE;
165 break;
166 case AMD_CPPC_EPP_BALANCE_PERFORMANCE:
167 index = EPP_INDEX_BALANCE_PERFORMANCE;
168 break;
169 case AMD_CPPC_EPP_BALANCE_POWERSAVE:
170 index = EPP_INDEX_BALANCE_POWERSAVE;
171 break;
172 case AMD_CPPC_EPP_POWERSAVE:
173 index = EPP_INDEX_POWERSAVE;
174 break;
175 default:
176 break;
177 }
178
179 return index;
180}
181
182static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
183{
184 int ret;
185 struct cppc_perf_ctrls perf_ctrls;
186
187 if (boot_cpu_has(X86_FEATURE_CPPC)) {
188 u64 value = READ_ONCE(cpudata->cppc_req_cached);
189
190 value &= ~GENMASK_ULL(31, 24);
191 value |= (u64)epp << 24;
192 WRITE_ONCE(cpudata->cppc_req_cached, value);
193
194 ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
195 if (!ret)
196 cpudata->epp_cached = epp;
197 } else {
198 perf_ctrls.energy_perf = epp;
199 ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
200 if (ret) {
201 pr_debug("failed to set energy perf value (%d)\n", ret);
202 return ret;
203 }
204 cpudata->epp_cached = epp;
205 }
206
207 return ret;
208}
209
210static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
211 int pref_index)
212{
213 int epp = -EINVAL;
214 int ret;
215
216 if (!pref_index) {
217 pr_debug("EPP pref_index is invalid\n");
218 return -EINVAL;
219 }
220
221 if (epp == -EINVAL)
222 epp = epp_values[pref_index];
223
224 if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
225 pr_debug("EPP cannot be set under performance policy\n");
226 return -EBUSY;
227 }
228
229 ret = amd_pstate_set_epp(cpudata, epp);
230
231 return ret;
232}
233
234static inline int pstate_enable(bool enable)
235{
236 int ret, cpu;
237 unsigned long logical_proc_id_mask = 0;
238
239 if (enable == cppc_enabled)
240 return 0;
241
242 for_each_present_cpu(cpu) {
243 unsigned long logical_id = topology_logical_die_id(cpu);
244
245 if (test_bit(logical_id, &logical_proc_id_mask))
246 continue;
247
248 set_bit(logical_id, &logical_proc_id_mask);
249
250 ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE,
251 enable);
252 if (ret)
253 return ret;
254 }
255
256 cppc_enabled = enable;
257 return 0;
258}
259
260static int cppc_enable(bool enable)
261{
262 int cpu, ret = 0;
263 struct cppc_perf_ctrls perf_ctrls;
264
265 if (enable == cppc_enabled)
266 return 0;
267
268 for_each_present_cpu(cpu) {
269 ret = cppc_set_enable(cpu, enable);
270 if (ret)
271 return ret;
272
273 /* Enable autonomous mode for EPP */
274 if (cppc_state == AMD_PSTATE_ACTIVE) {
275 /* Set desired perf as zero to allow EPP firmware control */
276 perf_ctrls.desired_perf = 0;
277 ret = cppc_set_perf(cpu, &perf_ctrls);
278 if (ret)
279 return ret;
280 }
281 }
282
283 cppc_enabled = enable;
284 return ret;
285}
286
287DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable);
288
289static inline int amd_pstate_enable(bool enable)
290{
291 return static_call(amd_pstate_enable)(enable);
292}
293
294static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
295{
296 struct cpuinfo_x86 *c = &cpu_data(0);
297
298 /*
299 * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
300 * the highest performance level is set to 196.
301 * https://bugzilla.kernel.org/show_bug.cgi?id=218759
302 */
303 if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <= 0x7f))
304 return CPPC_HIGHEST_PERF_PERFORMANCE;
305
306 return CPPC_HIGHEST_PERF_DEFAULT;
307}
308
309static int pstate_init_perf(struct amd_cpudata *cpudata)
310{
311 u64 cap1;
312 u32 highest_perf;
313
314 int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
315 &cap1);
316 if (ret)
317 return ret;
318
319 /* For platforms that do not support the preferred core feature, the
320 * highest_pef may be configured with 166 or 255, to avoid max frequency
321 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
322 * the default max perf.
323 */
324 if (cpudata->hw_prefcore)
325 highest_perf = amd_pstate_highest_perf_set(cpudata);
326 else
327 highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
328
329 WRITE_ONCE(cpudata->highest_perf, highest_perf);
330 WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
331 WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
332 WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
333 WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
334 WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
335 WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
336 return 0;
337}
338
339static int cppc_init_perf(struct amd_cpudata *cpudata)
340{
341 struct cppc_perf_caps cppc_perf;
342 u32 highest_perf;
343
344 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
345 if (ret)
346 return ret;
347
348 if (cpudata->hw_prefcore)
349 highest_perf = amd_pstate_highest_perf_set(cpudata);
350 else
351 highest_perf = cppc_perf.highest_perf;
352
353 WRITE_ONCE(cpudata->highest_perf, highest_perf);
354 WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
355 WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
356 WRITE_ONCE(cpudata->lowest_nonlinear_perf,
357 cppc_perf.lowest_nonlinear_perf);
358 WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
359 WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
360 WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
361
362 if (cppc_state == AMD_PSTATE_ACTIVE)
363 return 0;
364
365 ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf);
366 if (ret) {
367 pr_warn("failed to get auto_sel, ret: %d\n", ret);
368 return 0;
369 }
370
371 ret = cppc_set_auto_sel(cpudata->cpu,
372 (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
373
374 if (ret)
375 pr_warn("failed to set auto_sel, ret: %d\n", ret);
376
377 return ret;
378}
379
380DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf);
381
382static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata)
383{
384 return static_call(amd_pstate_init_perf)(cpudata);
385}
386
387static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
388 u32 des_perf, u32 max_perf, bool fast_switch)
389{
390 if (fast_switch)
391 wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached));
392 else
393 wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
394 READ_ONCE(cpudata->cppc_req_cached));
395}
396
397static void cppc_update_perf(struct amd_cpudata *cpudata,
398 u32 min_perf, u32 des_perf,
399 u32 max_perf, bool fast_switch)
400{
401 struct cppc_perf_ctrls perf_ctrls;
402
403 perf_ctrls.max_perf = max_perf;
404 perf_ctrls.min_perf = min_perf;
405 perf_ctrls.desired_perf = des_perf;
406
407 cppc_set_perf(cpudata->cpu, &perf_ctrls);
408}
409
410DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf);
411
412static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
413 u32 min_perf, u32 des_perf,
414 u32 max_perf, bool fast_switch)
415{
416 static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
417 max_perf, fast_switch);
418}
419
420static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
421{
422 u64 aperf, mperf, tsc;
423 unsigned long flags;
424
425 local_irq_save(flags);
426 rdmsrl(MSR_IA32_APERF, aperf);
427 rdmsrl(MSR_IA32_MPERF, mperf);
428 tsc = rdtsc();
429
430 if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
431 local_irq_restore(flags);
432 return false;
433 }
434
435 local_irq_restore(flags);
436
437 cpudata->cur.aperf = aperf;
438 cpudata->cur.mperf = mperf;
439 cpudata->cur.tsc = tsc;
440 cpudata->cur.aperf -= cpudata->prev.aperf;
441 cpudata->cur.mperf -= cpudata->prev.mperf;
442 cpudata->cur.tsc -= cpudata->prev.tsc;
443
444 cpudata->prev.aperf = aperf;
445 cpudata->prev.mperf = mperf;
446 cpudata->prev.tsc = tsc;
447
448 cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf);
449
450 return true;
451}
452
453static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
454 u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags)
455{
456 u64 prev = READ_ONCE(cpudata->cppc_req_cached);
457 u64 value = prev;
458
459 min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
460 cpudata->max_limit_perf);
461 max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
462 cpudata->max_limit_perf);
463 des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
464
465 if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
466 min_perf = des_perf;
467 des_perf = 0;
468 }
469
470 value &= ~AMD_CPPC_MIN_PERF(~0L);
471 value |= AMD_CPPC_MIN_PERF(min_perf);
472
473 value &= ~AMD_CPPC_DES_PERF(~0L);
474 value |= AMD_CPPC_DES_PERF(des_perf);
475
476 value &= ~AMD_CPPC_MAX_PERF(~0L);
477 value |= AMD_CPPC_MAX_PERF(max_perf);
478
479 if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
480 trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
481 cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc,
482 cpudata->cpu, (value != prev), fast_switch);
483 }
484
485 if (value == prev)
486 return;
487
488 WRITE_ONCE(cpudata->cppc_req_cached, value);
489
490 amd_pstate_update_perf(cpudata, min_perf, des_perf,
491 max_perf, fast_switch);
492}
493
494static int amd_pstate_verify(struct cpufreq_policy_data *policy)
495{
496 cpufreq_verify_within_cpu_limits(policy);
497
498 return 0;
499}
500
501static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
502{
503 u32 max_limit_perf, min_limit_perf, lowest_perf;
504 struct amd_cpudata *cpudata = policy->driver_data;
505
506 max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
507 min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
508
509 lowest_perf = READ_ONCE(cpudata->lowest_perf);
510 if (min_limit_perf < lowest_perf)
511 min_limit_perf = lowest_perf;
512
513 if (max_limit_perf < min_limit_perf)
514 max_limit_perf = min_limit_perf;
515
516 WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
517 WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
518 WRITE_ONCE(cpudata->max_limit_freq, policy->max);
519 WRITE_ONCE(cpudata->min_limit_freq, policy->min);
520
521 return 0;
522}
523
524static int amd_pstate_update_freq(struct cpufreq_policy *policy,
525 unsigned int target_freq, bool fast_switch)
526{
527 struct cpufreq_freqs freqs;
528 struct amd_cpudata *cpudata = policy->driver_data;
529 unsigned long max_perf, min_perf, des_perf, cap_perf;
530
531 if (!cpudata->max_freq)
532 return -ENODEV;
533
534 if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
535 amd_pstate_update_min_max_limit(policy);
536
537 cap_perf = READ_ONCE(cpudata->highest_perf);
538 min_perf = READ_ONCE(cpudata->lowest_perf);
539 max_perf = cap_perf;
540
541 freqs.old = policy->cur;
542 freqs.new = target_freq;
543
544 des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf,
545 cpudata->max_freq);
546
547 WARN_ON(fast_switch && !policy->fast_switch_enabled);
548 /*
549 * If fast_switch is desired, then there aren't any registered
550 * transition notifiers. See comment for
551 * cpufreq_enable_fast_switch().
552 */
553 if (!fast_switch)
554 cpufreq_freq_transition_begin(policy, &freqs);
555
556 amd_pstate_update(cpudata, min_perf, des_perf,
557 max_perf, fast_switch, policy->governor->flags);
558
559 if (!fast_switch)
560 cpufreq_freq_transition_end(policy, &freqs, false);
561
562 return 0;
563}
564
565static int amd_pstate_target(struct cpufreq_policy *policy,
566 unsigned int target_freq,
567 unsigned int relation)
568{
569 return amd_pstate_update_freq(policy, target_freq, false);
570}
571
572static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy,
573 unsigned int target_freq)
574{
575 if (!amd_pstate_update_freq(policy, target_freq, true))
576 return target_freq;
577 return policy->cur;
578}
579
580static void amd_pstate_adjust_perf(unsigned int cpu,
581 unsigned long _min_perf,
582 unsigned long target_perf,
583 unsigned long capacity)
584{
585 unsigned long max_perf, min_perf, des_perf,
586 cap_perf, lowest_nonlinear_perf, max_freq;
587 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
588 struct amd_cpudata *cpudata = policy->driver_data;
589 unsigned int target_freq;
590
591 if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
592 amd_pstate_update_min_max_limit(policy);
593
594
595 cap_perf = READ_ONCE(cpudata->highest_perf);
596 lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
597 max_freq = READ_ONCE(cpudata->max_freq);
598
599 des_perf = cap_perf;
600 if (target_perf < capacity)
601 des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
602
603 min_perf = READ_ONCE(cpudata->lowest_perf);
604 if (_min_perf < capacity)
605 min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
606
607 if (min_perf < lowest_nonlinear_perf)
608 min_perf = lowest_nonlinear_perf;
609
610 max_perf = cap_perf;
611 if (max_perf < min_perf)
612 max_perf = min_perf;
613
614 des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
615 target_freq = div_u64(des_perf * max_freq, max_perf);
616 policy->cur = target_freq;
617
618 amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
619 policy->governor->flags);
620 cpufreq_cpu_put(policy);
621}
622
623static int amd_get_min_freq(struct amd_cpudata *cpudata)
624{
625 struct cppc_perf_caps cppc_perf;
626
627 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
628 if (ret)
629 return ret;
630
631 /* Switch to khz */
632 return cppc_perf.lowest_freq * 1000;
633}
634
635static int amd_get_max_freq(struct amd_cpudata *cpudata)
636{
637 struct cppc_perf_caps cppc_perf;
638 u32 max_perf, max_freq, nominal_freq, nominal_perf;
639 u64 boost_ratio;
640
641 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
642 if (ret)
643 return ret;
644
645 nominal_freq = cppc_perf.nominal_freq;
646 nominal_perf = READ_ONCE(cpudata->nominal_perf);
647 max_perf = READ_ONCE(cpudata->highest_perf);
648
649 boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT,
650 nominal_perf);
651
652 max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT;
653
654 /* Switch to khz */
655 return max_freq * 1000;
656}
657
658static int amd_get_nominal_freq(struct amd_cpudata *cpudata)
659{
660 struct cppc_perf_caps cppc_perf;
661
662 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
663 if (ret)
664 return ret;
665
666 /* Switch to khz */
667 return cppc_perf.nominal_freq * 1000;
668}
669
670static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata)
671{
672 struct cppc_perf_caps cppc_perf;
673 u32 lowest_nonlinear_freq, lowest_nonlinear_perf,
674 nominal_freq, nominal_perf;
675 u64 lowest_nonlinear_ratio;
676
677 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
678 if (ret)
679 return ret;
680
681 nominal_freq = cppc_perf.nominal_freq;
682 nominal_perf = READ_ONCE(cpudata->nominal_perf);
683
684 lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
685
686 lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
687 nominal_perf);
688
689 lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT;
690
691 /* Switch to khz */
692 return lowest_nonlinear_freq * 1000;
693}
694
695static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
696{
697 struct amd_cpudata *cpudata = policy->driver_data;
698 int ret;
699
700 if (!cpudata->boost_supported) {
701 pr_err("Boost mode is not supported by this processor or SBIOS\n");
702 return -EINVAL;
703 }
704
705 if (state)
706 policy->cpuinfo.max_freq = cpudata->max_freq;
707 else
708 policy->cpuinfo.max_freq = cpudata->nominal_freq;
709
710 policy->max = policy->cpuinfo.max_freq;
711
712 ret = freq_qos_update_request(&cpudata->req[1],
713 policy->cpuinfo.max_freq);
714 if (ret < 0)
715 return ret;
716
717 return 0;
718}
719
720static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
721{
722 u32 highest_perf, nominal_perf;
723
724 highest_perf = READ_ONCE(cpudata->highest_perf);
725 nominal_perf = READ_ONCE(cpudata->nominal_perf);
726
727 if (highest_perf <= nominal_perf)
728 return;
729
730 cpudata->boost_supported = true;
731 current_pstate_driver->boost_enabled = true;
732}
733
734static void amd_perf_ctl_reset(unsigned int cpu)
735{
736 wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
737}
738
739/*
740 * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
741 * due to locking, so queue the work for later.
742 */
743static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
744{
745 sched_set_itmt_support();
746}
747static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
748
749/*
750 * Get the highest performance register value.
751 * @cpu: CPU from which to get highest performance.
752 * @highest_perf: Return address.
753 *
754 * Return: 0 for success, -EIO otherwise.
755 */
756static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf)
757{
758 int ret;
759
760 if (boot_cpu_has(X86_FEATURE_CPPC)) {
761 u64 cap1;
762
763 ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
764 if (ret)
765 return ret;
766 WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
767 } else {
768 u64 cppc_highest_perf;
769
770 ret = cppc_get_highest_perf(cpu, &cppc_highest_perf);
771 if (ret)
772 return ret;
773 WRITE_ONCE(*highest_perf, cppc_highest_perf);
774 }
775
776 return (ret);
777}
778
779#define CPPC_MAX_PERF U8_MAX
780
781static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
782{
783 int ret, prio;
784 u32 highest_perf;
785
786 ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf);
787 if (ret)
788 return;
789
790 cpudata->hw_prefcore = true;
791 /* check if CPPC preferred core feature is enabled*/
792 if (highest_perf < CPPC_MAX_PERF)
793 prio = (int)highest_perf;
794 else {
795 pr_debug("AMD CPPC preferred core is unsupported!\n");
796 cpudata->hw_prefcore = false;
797 return;
798 }
799
800 if (!amd_pstate_prefcore)
801 return;
802
803 /*
804 * The priorities can be set regardless of whether or not
805 * sched_set_itmt_support(true) has been called and it is valid to
806 * update them at any time after it has been called.
807 */
808 sched_set_itmt_core_prio(prio, cpudata->cpu);
809
810 schedule_work(&sched_prefcore_work);
811}
812
813static void amd_pstate_update_limits(unsigned int cpu)
814{
815 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
816 struct amd_cpudata *cpudata = policy->driver_data;
817 u32 prev_high = 0, cur_high = 0;
818 int ret;
819 bool highest_perf_changed = false;
820
821 mutex_lock(&amd_pstate_driver_lock);
822 if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
823 goto free_cpufreq_put;
824
825 ret = amd_pstate_get_highest_perf(cpu, &cur_high);
826 if (ret)
827 goto free_cpufreq_put;
828
829 prev_high = READ_ONCE(cpudata->prefcore_ranking);
830 if (prev_high != cur_high) {
831 highest_perf_changed = true;
832 WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
833
834 if (cur_high < CPPC_MAX_PERF)
835 sched_set_itmt_core_prio((int)cur_high, cpu);
836 }
837
838free_cpufreq_put:
839 cpufreq_cpu_put(policy);
840
841 if (!highest_perf_changed)
842 cpufreq_update_policy(cpu);
843
844 mutex_unlock(&amd_pstate_driver_lock);
845}
846
847static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
848{
849 int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
850 struct device *dev;
851 struct amd_cpudata *cpudata;
852
853 /*
854 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
855 * which is ideal for initialization process.
856 */
857 amd_perf_ctl_reset(policy->cpu);
858 dev = get_cpu_device(policy->cpu);
859 if (!dev)
860 return -ENODEV;
861
862 cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
863 if (!cpudata)
864 return -ENOMEM;
865
866 cpudata->cpu = policy->cpu;
867
868 amd_pstate_init_prefcore(cpudata);
869
870 ret = amd_pstate_init_perf(cpudata);
871 if (ret)
872 goto free_cpudata1;
873
874 min_freq = amd_get_min_freq(cpudata);
875 max_freq = amd_get_max_freq(cpudata);
876 nominal_freq = amd_get_nominal_freq(cpudata);
877 lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
878
879 if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
880 dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
881 min_freq, max_freq);
882 ret = -EINVAL;
883 goto free_cpudata1;
884 }
885
886 policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY;
887 policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY;
888
889 policy->min = min_freq;
890 policy->max = max_freq;
891
892 policy->cpuinfo.min_freq = min_freq;
893 policy->cpuinfo.max_freq = max_freq;
894
895 /* It will be updated by governor */
896 policy->cur = policy->cpuinfo.min_freq;
897
898 if (boot_cpu_has(X86_FEATURE_CPPC))
899 policy->fast_switch_possible = true;
900
901 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0],
902 FREQ_QOS_MIN, policy->cpuinfo.min_freq);
903 if (ret < 0) {
904 dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
905 goto free_cpudata1;
906 }
907
908 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1],
909 FREQ_QOS_MAX, policy->cpuinfo.max_freq);
910 if (ret < 0) {
911 dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
912 goto free_cpudata2;
913 }
914
915 /* Initial processor data capability frequencies */
916 cpudata->max_freq = max_freq;
917 cpudata->min_freq = min_freq;
918 cpudata->max_limit_freq = max_freq;
919 cpudata->min_limit_freq = min_freq;
920 cpudata->nominal_freq = nominal_freq;
921 cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
922
923 policy->driver_data = cpudata;
924
925 amd_pstate_boost_init(cpudata);
926 if (!current_pstate_driver->adjust_perf)
927 current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
928
929 return 0;
930
931free_cpudata2:
932 freq_qos_remove_request(&cpudata->req[0]);
933free_cpudata1:
934 kfree(cpudata);
935 return ret;
936}
937
938static int amd_pstate_cpu_exit(struct cpufreq_policy *policy)
939{
940 struct amd_cpudata *cpudata = policy->driver_data;
941
942 freq_qos_remove_request(&cpudata->req[1]);
943 freq_qos_remove_request(&cpudata->req[0]);
944 policy->fast_switch_possible = false;
945 kfree(cpudata);
946
947 return 0;
948}
949
950static int amd_pstate_cpu_resume(struct cpufreq_policy *policy)
951{
952 int ret;
953
954 ret = amd_pstate_enable(true);
955 if (ret)
956 pr_err("failed to enable amd-pstate during resume, return %d\n", ret);
957
958 return ret;
959}
960
961static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy)
962{
963 int ret;
964
965 ret = amd_pstate_enable(false);
966 if (ret)
967 pr_err("failed to disable amd-pstate during suspend, return %d\n", ret);
968
969 return ret;
970}
971
972/* Sysfs attributes */
973
974/*
975 * This frequency is to indicate the maximum hardware frequency.
976 * If boost is not active but supported, the frequency will be larger than the
977 * one in cpuinfo.
978 */
979static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
980 char *buf)
981{
982 int max_freq;
983 struct amd_cpudata *cpudata = policy->driver_data;
984
985 max_freq = amd_get_max_freq(cpudata);
986 if (max_freq < 0)
987 return max_freq;
988
989 return sysfs_emit(buf, "%u\n", max_freq);
990}
991
992static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
993 char *buf)
994{
995 int freq;
996 struct amd_cpudata *cpudata = policy->driver_data;
997
998 freq = amd_get_lowest_nonlinear_freq(cpudata);
999 if (freq < 0)
1000 return freq;
1001
1002 return sysfs_emit(buf, "%u\n", freq);
1003}
1004
1005/*
1006 * In some of ASICs, the highest_perf is not the one in the _CPC table, so we
1007 * need to expose it to sysfs.
1008 */
1009static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
1010 char *buf)
1011{
1012 u32 perf;
1013 struct amd_cpudata *cpudata = policy->driver_data;
1014
1015 perf = READ_ONCE(cpudata->highest_perf);
1016
1017 return sysfs_emit(buf, "%u\n", perf);
1018}
1019
1020static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
1021 char *buf)
1022{
1023 u32 perf;
1024 struct amd_cpudata *cpudata = policy->driver_data;
1025
1026 perf = READ_ONCE(cpudata->prefcore_ranking);
1027
1028 return sysfs_emit(buf, "%u\n", perf);
1029}
1030
1031static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy,
1032 char *buf)
1033{
1034 bool hw_prefcore;
1035 struct amd_cpudata *cpudata = policy->driver_data;
1036
1037 hw_prefcore = READ_ONCE(cpudata->hw_prefcore);
1038
1039 return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore));
1040}
1041
1042static ssize_t show_energy_performance_available_preferences(
1043 struct cpufreq_policy *policy, char *buf)
1044{
1045 int i = 0;
1046 int offset = 0;
1047 struct amd_cpudata *cpudata = policy->driver_data;
1048
1049 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1050 return sysfs_emit_at(buf, offset, "%s\n",
1051 energy_perf_strings[EPP_INDEX_PERFORMANCE]);
1052
1053 while (energy_perf_strings[i] != NULL)
1054 offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]);
1055
1056 offset += sysfs_emit_at(buf, offset, "\n");
1057
1058 return offset;
1059}
1060
1061static ssize_t store_energy_performance_preference(
1062 struct cpufreq_policy *policy, const char *buf, size_t count)
1063{
1064 struct amd_cpudata *cpudata = policy->driver_data;
1065 char str_preference[21];
1066 ssize_t ret;
1067
1068 ret = sscanf(buf, "%20s", str_preference);
1069 if (ret != 1)
1070 return -EINVAL;
1071
1072 ret = match_string(energy_perf_strings, -1, str_preference);
1073 if (ret < 0)
1074 return -EINVAL;
1075
1076 mutex_lock(&amd_pstate_limits_lock);
1077 ret = amd_pstate_set_energy_pref_index(cpudata, ret);
1078 mutex_unlock(&amd_pstate_limits_lock);
1079
1080 return ret ?: count;
1081}
1082
1083static ssize_t show_energy_performance_preference(
1084 struct cpufreq_policy *policy, char *buf)
1085{
1086 struct amd_cpudata *cpudata = policy->driver_data;
1087 int preference;
1088
1089 preference = amd_pstate_get_energy_pref_index(cpudata);
1090 if (preference < 0)
1091 return preference;
1092
1093 return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
1094}
1095
1096static void amd_pstate_driver_cleanup(void)
1097{
1098 amd_pstate_enable(false);
1099 cppc_state = AMD_PSTATE_DISABLE;
1100 current_pstate_driver = NULL;
1101}
1102
1103static int amd_pstate_register_driver(int mode)
1104{
1105 int ret;
1106
1107 if (mode == AMD_PSTATE_PASSIVE || mode == AMD_PSTATE_GUIDED)
1108 current_pstate_driver = &amd_pstate_driver;
1109 else if (mode == AMD_PSTATE_ACTIVE)
1110 current_pstate_driver = &amd_pstate_epp_driver;
1111 else
1112 return -EINVAL;
1113
1114 cppc_state = mode;
1115 ret = cpufreq_register_driver(current_pstate_driver);
1116 if (ret) {
1117 amd_pstate_driver_cleanup();
1118 return ret;
1119 }
1120 return 0;
1121}
1122
1123static int amd_pstate_unregister_driver(int dummy)
1124{
1125 cpufreq_unregister_driver(current_pstate_driver);
1126 amd_pstate_driver_cleanup();
1127 return 0;
1128}
1129
1130static int amd_pstate_change_mode_without_dvr_change(int mode)
1131{
1132 int cpu = 0;
1133
1134 cppc_state = mode;
1135
1136 if (boot_cpu_has(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE)
1137 return 0;
1138
1139 for_each_present_cpu(cpu) {
1140 cppc_set_auto_sel(cpu, (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
1141 }
1142
1143 return 0;
1144}
1145
1146static int amd_pstate_change_driver_mode(int mode)
1147{
1148 int ret;
1149
1150 ret = amd_pstate_unregister_driver(0);
1151 if (ret)
1152 return ret;
1153
1154 ret = amd_pstate_register_driver(mode);
1155 if (ret)
1156 return ret;
1157
1158 return 0;
1159}
1160
1161static cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = {
1162 [AMD_PSTATE_DISABLE] = {
1163 [AMD_PSTATE_DISABLE] = NULL,
1164 [AMD_PSTATE_PASSIVE] = amd_pstate_register_driver,
1165 [AMD_PSTATE_ACTIVE] = amd_pstate_register_driver,
1166 [AMD_PSTATE_GUIDED] = amd_pstate_register_driver,
1167 },
1168 [AMD_PSTATE_PASSIVE] = {
1169 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver,
1170 [AMD_PSTATE_PASSIVE] = NULL,
1171 [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode,
1172 [AMD_PSTATE_GUIDED] = amd_pstate_change_mode_without_dvr_change,
1173 },
1174 [AMD_PSTATE_ACTIVE] = {
1175 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver,
1176 [AMD_PSTATE_PASSIVE] = amd_pstate_change_driver_mode,
1177 [AMD_PSTATE_ACTIVE] = NULL,
1178 [AMD_PSTATE_GUIDED] = amd_pstate_change_driver_mode,
1179 },
1180 [AMD_PSTATE_GUIDED] = {
1181 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver,
1182 [AMD_PSTATE_PASSIVE] = amd_pstate_change_mode_without_dvr_change,
1183 [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode,
1184 [AMD_PSTATE_GUIDED] = NULL,
1185 },
1186};
1187
1188static ssize_t amd_pstate_show_status(char *buf)
1189{
1190 if (!current_pstate_driver)
1191 return sysfs_emit(buf, "disable\n");
1192
1193 return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]);
1194}
1195
1196static int amd_pstate_update_status(const char *buf, size_t size)
1197{
1198 int mode_idx;
1199
1200 if (size > strlen("passive") || size < strlen("active"))
1201 return -EINVAL;
1202
1203 mode_idx = get_mode_idx_from_str(buf, size);
1204
1205 if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX)
1206 return -EINVAL;
1207
1208 if (mode_state_machine[cppc_state][mode_idx])
1209 return mode_state_machine[cppc_state][mode_idx](mode_idx);
1210
1211 return 0;
1212}
1213
1214static ssize_t status_show(struct device *dev,
1215 struct device_attribute *attr, char *buf)
1216{
1217 ssize_t ret;
1218
1219 mutex_lock(&amd_pstate_driver_lock);
1220 ret = amd_pstate_show_status(buf);
1221 mutex_unlock(&amd_pstate_driver_lock);
1222
1223 return ret;
1224}
1225
1226static ssize_t status_store(struct device *a, struct device_attribute *b,
1227 const char *buf, size_t count)
1228{
1229 char *p = memchr(buf, '\n', count);
1230 int ret;
1231
1232 mutex_lock(&amd_pstate_driver_lock);
1233 ret = amd_pstate_update_status(buf, p ? p - buf : count);
1234 mutex_unlock(&amd_pstate_driver_lock);
1235
1236 return ret < 0 ? ret : count;
1237}
1238
1239static ssize_t prefcore_show(struct device *dev,
1240 struct device_attribute *attr, char *buf)
1241{
1242 return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore));
1243}
1244
1245cpufreq_freq_attr_ro(amd_pstate_max_freq);
1246cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
1247
1248cpufreq_freq_attr_ro(amd_pstate_highest_perf);
1249cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking);
1250cpufreq_freq_attr_ro(amd_pstate_hw_prefcore);
1251cpufreq_freq_attr_rw(energy_performance_preference);
1252cpufreq_freq_attr_ro(energy_performance_available_preferences);
1253static DEVICE_ATTR_RW(status);
1254static DEVICE_ATTR_RO(prefcore);
1255
1256static struct freq_attr *amd_pstate_attr[] = {
1257 &amd_pstate_max_freq,
1258 &amd_pstate_lowest_nonlinear_freq,
1259 &amd_pstate_highest_perf,
1260 &amd_pstate_prefcore_ranking,
1261 &amd_pstate_hw_prefcore,
1262 NULL,
1263};
1264
1265static struct freq_attr *amd_pstate_epp_attr[] = {
1266 &amd_pstate_max_freq,
1267 &amd_pstate_lowest_nonlinear_freq,
1268 &amd_pstate_highest_perf,
1269 &amd_pstate_prefcore_ranking,
1270 &amd_pstate_hw_prefcore,
1271 &energy_performance_preference,
1272 &energy_performance_available_preferences,
1273 NULL,
1274};
1275
1276static struct attribute *pstate_global_attributes[] = {
1277 &dev_attr_status.attr,
1278 &dev_attr_prefcore.attr,
1279 NULL
1280};
1281
1282static const struct attribute_group amd_pstate_global_attr_group = {
1283 .name = "amd_pstate",
1284 .attrs = pstate_global_attributes,
1285};
1286
1287static bool amd_pstate_acpi_pm_profile_server(void)
1288{
1289 switch (acpi_gbl_FADT.preferred_profile) {
1290 case PM_ENTERPRISE_SERVER:
1291 case PM_SOHO_SERVER:
1292 case PM_PERFORMANCE_SERVER:
1293 return true;
1294 }
1295 return false;
1296}
1297
1298static bool amd_pstate_acpi_pm_profile_undefined(void)
1299{
1300 if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED)
1301 return true;
1302 if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES)
1303 return true;
1304 return false;
1305}
1306
1307static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
1308{
1309 int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
1310 struct amd_cpudata *cpudata;
1311 struct device *dev;
1312 u64 value;
1313
1314 /*
1315 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
1316 * which is ideal for initialization process.
1317 */
1318 amd_perf_ctl_reset(policy->cpu);
1319 dev = get_cpu_device(policy->cpu);
1320 if (!dev)
1321 return -ENODEV;
1322
1323 cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
1324 if (!cpudata)
1325 return -ENOMEM;
1326
1327 cpudata->cpu = policy->cpu;
1328 cpudata->epp_policy = 0;
1329
1330 amd_pstate_init_prefcore(cpudata);
1331
1332 ret = amd_pstate_init_perf(cpudata);
1333 if (ret)
1334 goto free_cpudata1;
1335
1336 min_freq = amd_get_min_freq(cpudata);
1337 max_freq = amd_get_max_freq(cpudata);
1338 nominal_freq = amd_get_nominal_freq(cpudata);
1339 lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
1340 if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
1341 dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
1342 min_freq, max_freq);
1343 ret = -EINVAL;
1344 goto free_cpudata1;
1345 }
1346
1347 policy->cpuinfo.min_freq = min_freq;
1348 policy->cpuinfo.max_freq = max_freq;
1349 /* It will be updated by governor */
1350 policy->cur = policy->cpuinfo.min_freq;
1351
1352 /* Initial processor data capability frequencies */
1353 cpudata->max_freq = max_freq;
1354 cpudata->min_freq = min_freq;
1355 cpudata->nominal_freq = nominal_freq;
1356 cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
1357
1358 policy->driver_data = cpudata;
1359
1360 cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0);
1361
1362 policy->min = policy->cpuinfo.min_freq;
1363 policy->max = policy->cpuinfo.max_freq;
1364
1365 /*
1366 * Set the policy to provide a valid fallback value in case
1367 * the default cpufreq governor is neither powersave nor performance.
1368 */
1369 if (amd_pstate_acpi_pm_profile_server() ||
1370 amd_pstate_acpi_pm_profile_undefined())
1371 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
1372 else
1373 policy->policy = CPUFREQ_POLICY_POWERSAVE;
1374
1375 if (boot_cpu_has(X86_FEATURE_CPPC)) {
1376 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
1377 if (ret)
1378 return ret;
1379 WRITE_ONCE(cpudata->cppc_req_cached, value);
1380
1381 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
1382 if (ret)
1383 return ret;
1384 WRITE_ONCE(cpudata->cppc_cap1_cached, value);
1385 }
1386 amd_pstate_boost_init(cpudata);
1387
1388 return 0;
1389
1390free_cpudata1:
1391 kfree(cpudata);
1392 return ret;
1393}
1394
1395static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
1396{
1397 pr_debug("CPU %d exiting\n", policy->cpu);
1398 return 0;
1399}
1400
1401static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
1402{
1403 struct amd_cpudata *cpudata = policy->driver_data;
1404 u32 max_perf, min_perf, min_limit_perf, max_limit_perf;
1405 u64 value;
1406 s16 epp;
1407
1408 max_perf = READ_ONCE(cpudata->highest_perf);
1409 min_perf = READ_ONCE(cpudata->lowest_perf);
1410 max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
1411 min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
1412
1413 if (min_limit_perf < min_perf)
1414 min_limit_perf = min_perf;
1415
1416 if (max_limit_perf < min_limit_perf)
1417 max_limit_perf = min_limit_perf;
1418
1419 WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
1420 WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
1421
1422 max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
1423 cpudata->max_limit_perf);
1424 min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
1425 cpudata->max_limit_perf);
1426 value = READ_ONCE(cpudata->cppc_req_cached);
1427
1428 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1429 min_perf = max_perf;
1430
1431 /* Initial min/max values for CPPC Performance Controls Register */
1432 value &= ~AMD_CPPC_MIN_PERF(~0L);
1433 value |= AMD_CPPC_MIN_PERF(min_perf);
1434
1435 value &= ~AMD_CPPC_MAX_PERF(~0L);
1436 value |= AMD_CPPC_MAX_PERF(max_perf);
1437
1438 /* CPPC EPP feature require to set zero to the desire perf bit */
1439 value &= ~AMD_CPPC_DES_PERF(~0L);
1440 value |= AMD_CPPC_DES_PERF(0);
1441
1442 cpudata->epp_policy = cpudata->policy;
1443
1444 /* Get BIOS pre-defined epp value */
1445 epp = amd_pstate_get_epp(cpudata, value);
1446 if (epp < 0) {
1447 /**
1448 * This return value can only be negative for shared_memory
1449 * systems where EPP register read/write not supported.
1450 */
1451 return;
1452 }
1453
1454 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1455 epp = 0;
1456
1457 /* Set initial EPP value */
1458 if (boot_cpu_has(X86_FEATURE_CPPC)) {
1459 value &= ~GENMASK_ULL(31, 24);
1460 value |= (u64)epp << 24;
1461 }
1462
1463 WRITE_ONCE(cpudata->cppc_req_cached, value);
1464 amd_pstate_set_epp(cpudata, epp);
1465}
1466
1467static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
1468{
1469 struct amd_cpudata *cpudata = policy->driver_data;
1470
1471 if (!policy->cpuinfo.max_freq)
1472 return -ENODEV;
1473
1474 pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
1475 policy->cpuinfo.max_freq, policy->max);
1476
1477 cpudata->policy = policy->policy;
1478
1479 amd_pstate_epp_update_limit(policy);
1480
1481 return 0;
1482}
1483
1484static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata)
1485{
1486 struct cppc_perf_ctrls perf_ctrls;
1487 u64 value, max_perf;
1488 int ret;
1489
1490 ret = amd_pstate_enable(true);
1491 if (ret)
1492 pr_err("failed to enable amd pstate during resume, return %d\n", ret);
1493
1494 value = READ_ONCE(cpudata->cppc_req_cached);
1495 max_perf = READ_ONCE(cpudata->highest_perf);
1496
1497 if (boot_cpu_has(X86_FEATURE_CPPC)) {
1498 wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
1499 } else {
1500 perf_ctrls.max_perf = max_perf;
1501 perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached);
1502 cppc_set_perf(cpudata->cpu, &perf_ctrls);
1503 }
1504}
1505
1506static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy)
1507{
1508 struct amd_cpudata *cpudata = policy->driver_data;
1509
1510 pr_debug("AMD CPU Core %d going online\n", cpudata->cpu);
1511
1512 if (cppc_state == AMD_PSTATE_ACTIVE) {
1513 amd_pstate_epp_reenable(cpudata);
1514 cpudata->suspended = false;
1515 }
1516
1517 return 0;
1518}
1519
1520static void amd_pstate_epp_offline(struct cpufreq_policy *policy)
1521{
1522 struct amd_cpudata *cpudata = policy->driver_data;
1523 struct cppc_perf_ctrls perf_ctrls;
1524 int min_perf;
1525 u64 value;
1526
1527 min_perf = READ_ONCE(cpudata->lowest_perf);
1528 value = READ_ONCE(cpudata->cppc_req_cached);
1529
1530 mutex_lock(&amd_pstate_limits_lock);
1531 if (boot_cpu_has(X86_FEATURE_CPPC)) {
1532 cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN;
1533
1534 /* Set max perf same as min perf */
1535 value &= ~AMD_CPPC_MAX_PERF(~0L);
1536 value |= AMD_CPPC_MAX_PERF(min_perf);
1537 value &= ~AMD_CPPC_MIN_PERF(~0L);
1538 value |= AMD_CPPC_MIN_PERF(min_perf);
1539 wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
1540 } else {
1541 perf_ctrls.desired_perf = 0;
1542 perf_ctrls.max_perf = min_perf;
1543 perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE);
1544 cppc_set_perf(cpudata->cpu, &perf_ctrls);
1545 }
1546 mutex_unlock(&amd_pstate_limits_lock);
1547}
1548
1549static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
1550{
1551 struct amd_cpudata *cpudata = policy->driver_data;
1552
1553 pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu);
1554
1555 if (cpudata->suspended)
1556 return 0;
1557
1558 if (cppc_state == AMD_PSTATE_ACTIVE)
1559 amd_pstate_epp_offline(policy);
1560
1561 return 0;
1562}
1563
1564static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
1565{
1566 cpufreq_verify_within_cpu_limits(policy);
1567 pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
1568 return 0;
1569}
1570
1571static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
1572{
1573 struct amd_cpudata *cpudata = policy->driver_data;
1574 int ret;
1575
1576 /* avoid suspending when EPP is not enabled */
1577 if (cppc_state != AMD_PSTATE_ACTIVE)
1578 return 0;
1579
1580 /* set this flag to avoid setting core offline*/
1581 cpudata->suspended = true;
1582
1583 /* disable CPPC in lowlevel firmware */
1584 ret = amd_pstate_enable(false);
1585 if (ret)
1586 pr_err("failed to suspend, return %d\n", ret);
1587
1588 return 0;
1589}
1590
1591static int amd_pstate_epp_resume(struct cpufreq_policy *policy)
1592{
1593 struct amd_cpudata *cpudata = policy->driver_data;
1594
1595 if (cpudata->suspended) {
1596 mutex_lock(&amd_pstate_limits_lock);
1597
1598 /* enable amd pstate from suspend state*/
1599 amd_pstate_epp_reenable(cpudata);
1600
1601 mutex_unlock(&amd_pstate_limits_lock);
1602
1603 cpudata->suspended = false;
1604 }
1605
1606 return 0;
1607}
1608
1609static struct cpufreq_driver amd_pstate_driver = {
1610 .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
1611 .verify = amd_pstate_verify,
1612 .target = amd_pstate_target,
1613 .fast_switch = amd_pstate_fast_switch,
1614 .init = amd_pstate_cpu_init,
1615 .exit = amd_pstate_cpu_exit,
1616 .suspend = amd_pstate_cpu_suspend,
1617 .resume = amd_pstate_cpu_resume,
1618 .set_boost = amd_pstate_set_boost,
1619 .update_limits = amd_pstate_update_limits,
1620 .name = "amd-pstate",
1621 .attr = amd_pstate_attr,
1622};
1623
1624static struct cpufreq_driver amd_pstate_epp_driver = {
1625 .flags = CPUFREQ_CONST_LOOPS,
1626 .verify = amd_pstate_epp_verify_policy,
1627 .setpolicy = amd_pstate_epp_set_policy,
1628 .init = amd_pstate_epp_cpu_init,
1629 .exit = amd_pstate_epp_cpu_exit,
1630 .offline = amd_pstate_epp_cpu_offline,
1631 .online = amd_pstate_epp_cpu_online,
1632 .suspend = amd_pstate_epp_suspend,
1633 .resume = amd_pstate_epp_resume,
1634 .update_limits = amd_pstate_update_limits,
1635 .name = "amd-pstate-epp",
1636 .attr = amd_pstate_epp_attr,
1637};
1638
1639static int __init amd_pstate_set_driver(int mode_idx)
1640{
1641 if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
1642 cppc_state = mode_idx;
1643 if (cppc_state == AMD_PSTATE_DISABLE)
1644 pr_info("driver is explicitly disabled\n");
1645
1646 if (cppc_state == AMD_PSTATE_ACTIVE)
1647 current_pstate_driver = &amd_pstate_epp_driver;
1648
1649 if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
1650 current_pstate_driver = &amd_pstate_driver;
1651
1652 return 0;
1653 }
1654
1655 return -EINVAL;
1656}
1657
1658static int __init amd_pstate_init(void)
1659{
1660 struct device *dev_root;
1661 int ret;
1662
1663 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
1664 return -ENODEV;
1665
1666 if (!acpi_cpc_valid()) {
1667 pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
1668 return -ENODEV;
1669 }
1670
1671 /* don't keep reloading if cpufreq_driver exists */
1672 if (cpufreq_get_current_driver())
1673 return -EEXIST;
1674
1675 switch (cppc_state) {
1676 case AMD_PSTATE_UNDEFINED:
1677 /* Disable on the following configs by default:
1678 * 1. Undefined platforms
1679 * 2. Server platforms
1680 * 3. Shared memory designs
1681 */
1682 if (amd_pstate_acpi_pm_profile_undefined() ||
1683 amd_pstate_acpi_pm_profile_server() ||
1684 !boot_cpu_has(X86_FEATURE_CPPC)) {
1685 pr_info("driver load is disabled, boot with specific mode to enable this\n");
1686 return -ENODEV;
1687 }
1688 ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE);
1689 if (ret)
1690 return ret;
1691 break;
1692 case AMD_PSTATE_DISABLE:
1693 return -ENODEV;
1694 case AMD_PSTATE_PASSIVE:
1695 case AMD_PSTATE_ACTIVE:
1696 case AMD_PSTATE_GUIDED:
1697 break;
1698 default:
1699 return -EINVAL;
1700 }
1701
1702 /* capability check */
1703 if (boot_cpu_has(X86_FEATURE_CPPC)) {
1704 pr_debug("AMD CPPC MSR based functionality is supported\n");
1705 if (cppc_state != AMD_PSTATE_ACTIVE)
1706 current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
1707 } else {
1708 pr_debug("AMD CPPC shared memory based functionality is supported\n");
1709 static_call_update(amd_pstate_enable, cppc_enable);
1710 static_call_update(amd_pstate_init_perf, cppc_init_perf);
1711 static_call_update(amd_pstate_update_perf, cppc_update_perf);
1712 }
1713
1714 /* enable amd pstate feature */
1715 ret = amd_pstate_enable(true);
1716 if (ret) {
1717 pr_err("failed to enable with return %d\n", ret);
1718 return ret;
1719 }
1720
1721 ret = cpufreq_register_driver(current_pstate_driver);
1722 if (ret)
1723 pr_err("failed to register with return %d\n", ret);
1724
1725 dev_root = bus_get_dev_root(&cpu_subsys);
1726 if (dev_root) {
1727 ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group);
1728 put_device(dev_root);
1729 if (ret) {
1730 pr_err("sysfs attribute export failed with error %d.\n", ret);
1731 goto global_attr_free;
1732 }
1733 }
1734
1735 return ret;
1736
1737global_attr_free:
1738 cpufreq_unregister_driver(current_pstate_driver);
1739 return ret;
1740}
1741device_initcall(amd_pstate_init);
1742
1743static int __init amd_pstate_param(char *str)
1744{
1745 size_t size;
1746 int mode_idx;
1747
1748 if (!str)
1749 return -EINVAL;
1750
1751 size = strlen(str);
1752 mode_idx = get_mode_idx_from_str(str, size);
1753
1754 return amd_pstate_set_driver(mode_idx);
1755}
1756
1757static int __init amd_prefcore_param(char *str)
1758{
1759 if (!strcmp(str, "disable"))
1760 amd_pstate_prefcore = false;
1761
1762 return 0;
1763}
1764
1765early_param("amd_pstate", amd_pstate_param);
1766early_param("amd_prefcore", amd_prefcore_param);
1767
1768MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
1769MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");