Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * intel_powerclamp.c - package c-state idle injection
  4 *
  5 * Copyright (c) 2012, Intel Corporation.
  6 *
  7 * Authors:
  8 *     Arjan van de Ven <arjan@linux.intel.com>
  9 *     Jacob Pan <jacob.jun.pan@linux.intel.com>
 10 *
 11 *	TODO:
 12 *           1. better handle wakeup from external interrupts, currently a fixed
 13 *              compensation is added to clamping duration when excessive amount
 14 *              of wakeups are observed during idle time. the reason is that in
 15 *              case of external interrupts without need for ack, clamping down
 16 *              cpu in non-irq context does not reduce irq. for majority of the
 17 *              cases, clamping down cpu does help reduce irq as well, we should
 18 *              be able to differentiate the two cases and give a quantitative
 19 *              solution for the irqs that we can control. perhaps based on
 20 *              get_cpu_iowait_time_us()
 21 *
 22 *	     2. synchronization with other hw blocks
 23 */
 24
 25#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
 26
 27#include <linux/module.h>
 28#include <linux/kernel.h>
 29#include <linux/delay.h>
 30#include <linux/kthread.h>
 31#include <linux/cpu.h>
 32#include <linux/thermal.h>
 33#include <linux/slab.h>
 34#include <linux/tick.h>
 35#include <linux/debugfs.h>
 36#include <linux/seq_file.h>
 37#include <linux/sched/rt.h>
 38#include <uapi/linux/sched/types.h>
 39
 40#include <asm/nmi.h>
 41#include <asm/msr.h>
 42#include <asm/mwait.h>
 43#include <asm/cpu_device_id.h>
 44#include <asm/hardirq.h>
 45
 46#define MAX_TARGET_RATIO (50U)
 47/* For each undisturbed clamping period (no extra wake ups during idle time),
 48 * we increment the confidence counter for the given target ratio.
 49 * CONFIDENCE_OK defines the level where runtime calibration results are
 50 * valid.
 51 */
 52#define CONFIDENCE_OK (3)
 53/* Default idle injection duration, driver adjust sleep time to meet target
 54 * idle ratio. Similar to frequency modulation.
 55 */
 56#define DEFAULT_DURATION_JIFFIES (6)
 57
 58static unsigned int target_mwait;
 59static struct dentry *debug_dir;
 60
 61/* user selected target */
 62static unsigned int set_target_ratio;
 63static unsigned int current_ratio;
 64static bool should_skip;
 65static bool reduce_irq;
 66static atomic_t idle_wakeup_counter;
 67static unsigned int control_cpu; /* The cpu assigned to collect stat and update
 68				  * control parameters. default to BSP but BSP
 69				  * can be offlined.
 70				  */
 71static bool clamping;
 72
 73static const struct sched_param sparam = {
 74	.sched_priority = MAX_USER_RT_PRIO / 2,
 75};
 76struct powerclamp_worker_data {
 77	struct kthread_worker *worker;
 78	struct kthread_work balancing_work;
 79	struct kthread_delayed_work idle_injection_work;
 80	unsigned int cpu;
 81	unsigned int count;
 82	unsigned int guard;
 83	unsigned int window_size_now;
 84	unsigned int target_ratio;
 85	unsigned int duration_jiffies;
 86	bool clamping;
 87};
 88
 89static struct powerclamp_worker_data __percpu *worker_data;
 90static struct thermal_cooling_device *cooling_dev;
 91static unsigned long *cpu_clamping_mask;  /* bit map for tracking per cpu
 92					   * clamping kthread worker
 93					   */
 94
 95static unsigned int duration;
 96static unsigned int pkg_cstate_ratio_cur;
 97static unsigned int window_size;
 98
 99static int duration_set(const char *arg, const struct kernel_param *kp)
100{
101	int ret = 0;
102	unsigned long new_duration;
103
104	ret = kstrtoul(arg, 10, &new_duration);
105	if (ret)
106		goto exit;
107	if (new_duration > 25 || new_duration < 6) {
108		pr_err("Out of recommended range %lu, between 6-25ms\n",
109			new_duration);
110		ret = -EINVAL;
111	}
112
113	duration = clamp(new_duration, 6ul, 25ul);
114	smp_mb();
115
116exit:
117
118	return ret;
119}
120
121static const struct kernel_param_ops duration_ops = {
122	.set = duration_set,
123	.get = param_get_int,
124};
125
126
127module_param_cb(duration, &duration_ops, &duration, 0644);
128MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
129
130struct powerclamp_calibration_data {
131	unsigned long confidence;  /* used for calibration, basically a counter
132				    * gets incremented each time a clamping
133				    * period is completed without extra wakeups
134				    * once that counter is reached given level,
135				    * compensation is deemed usable.
136				    */
137	unsigned long steady_comp; /* steady state compensation used when
138				    * no extra wakeups occurred.
139				    */
140	unsigned long dynamic_comp; /* compensate excessive wakeup from idle
141				     * mostly from external interrupts.
142				     */
143};
144
145static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
146
147static int window_size_set(const char *arg, const struct kernel_param *kp)
148{
149	int ret = 0;
150	unsigned long new_window_size;
151
152	ret = kstrtoul(arg, 10, &new_window_size);
153	if (ret)
154		goto exit_win;
155	if (new_window_size > 10 || new_window_size < 2) {
156		pr_err("Out of recommended window size %lu, between 2-10\n",
157			new_window_size);
158		ret = -EINVAL;
159	}
160
161	window_size = clamp(new_window_size, 2ul, 10ul);
162	smp_mb();
163
164exit_win:
165
166	return ret;
167}
168
169static const struct kernel_param_ops window_size_ops = {
170	.set = window_size_set,
171	.get = param_get_int,
172};
173
174module_param_cb(window_size, &window_size_ops, &window_size, 0644);
175MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
176	"\tpowerclamp controls idle ratio within this window. larger\n"
177	"\twindow size results in slower response time but more smooth\n"
178	"\tclamping results. default to 2.");
179
180static void find_target_mwait(void)
181{
182	unsigned int eax, ebx, ecx, edx;
183	unsigned int highest_cstate = 0;
184	unsigned int highest_subcstate = 0;
185	int i;
186
187	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
188		return;
189
190	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
191
192	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
193	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
194		return;
195
196	edx >>= MWAIT_SUBSTATE_SIZE;
197	for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
198		if (edx & MWAIT_SUBSTATE_MASK) {
199			highest_cstate = i;
200			highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
201		}
202	}
203	target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
204		(highest_subcstate - 1);
205
206}
207
208struct pkg_cstate_info {
209	bool skip;
210	int msr_index;
211	int cstate_id;
212};
213
214#define PKG_CSTATE_INIT(id) {				\
215		.msr_index = MSR_PKG_C##id##_RESIDENCY, \
216		.cstate_id = id				\
217			}
218
219static struct pkg_cstate_info pkg_cstates[] = {
220	PKG_CSTATE_INIT(2),
221	PKG_CSTATE_INIT(3),
222	PKG_CSTATE_INIT(6),
223	PKG_CSTATE_INIT(7),
224	PKG_CSTATE_INIT(8),
225	PKG_CSTATE_INIT(9),
226	PKG_CSTATE_INIT(10),
227	{NULL},
228};
229
230static bool has_pkg_state_counter(void)
231{
232	u64 val;
233	struct pkg_cstate_info *info = pkg_cstates;
234
235	/* check if any one of the counter msrs exists */
236	while (info->msr_index) {
237		if (!rdmsrl_safe(info->msr_index, &val))
238			return true;
239		info++;
240	}
241
242	return false;
243}
244
245static u64 pkg_state_counter(void)
246{
247	u64 val;
248	u64 count = 0;
249	struct pkg_cstate_info *info = pkg_cstates;
250
251	while (info->msr_index) {
252		if (!info->skip) {
253			if (!rdmsrl_safe(info->msr_index, &val))
254				count += val;
255			else
256				info->skip = true;
257		}
258		info++;
259	}
260
261	return count;
262}
263
264static unsigned int get_compensation(int ratio)
265{
266	unsigned int comp = 0;
267
268	/* we only use compensation if all adjacent ones are good */
269	if (ratio == 1 &&
270		cal_data[ratio].confidence >= CONFIDENCE_OK &&
271		cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
272		cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
273		comp = (cal_data[ratio].steady_comp +
274			cal_data[ratio + 1].steady_comp +
275			cal_data[ratio + 2].steady_comp) / 3;
276	} else if (ratio == MAX_TARGET_RATIO - 1 &&
277		cal_data[ratio].confidence >= CONFIDENCE_OK &&
278		cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
279		cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
280		comp = (cal_data[ratio].steady_comp +
281			cal_data[ratio - 1].steady_comp +
282			cal_data[ratio - 2].steady_comp) / 3;
283	} else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
284		cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
285		cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
286		comp = (cal_data[ratio].steady_comp +
287			cal_data[ratio - 1].steady_comp +
288			cal_data[ratio + 1].steady_comp) / 3;
289	}
290
291	/* REVISIT: simple penalty of double idle injection */
292	if (reduce_irq)
293		comp = ratio;
294	/* do not exceed limit */
295	if (comp + ratio >= MAX_TARGET_RATIO)
296		comp = MAX_TARGET_RATIO - ratio - 1;
297
298	return comp;
299}
300
301static void adjust_compensation(int target_ratio, unsigned int win)
302{
303	int delta;
304	struct powerclamp_calibration_data *d = &cal_data[target_ratio];
305
306	/*
307	 * adjust compensations if confidence level has not been reached or
308	 * there are too many wakeups during the last idle injection period, we
309	 * cannot trust the data for compensation.
310	 */
311	if (d->confidence >= CONFIDENCE_OK ||
312		atomic_read(&idle_wakeup_counter) >
313		win * num_online_cpus())
314		return;
315
316	delta = set_target_ratio - current_ratio;
317	/* filter out bad data */
318	if (delta >= 0 && delta <= (1+target_ratio/10)) {
319		if (d->steady_comp)
320			d->steady_comp =
321				roundup(delta+d->steady_comp, 2)/2;
322		else
323			d->steady_comp = delta;
324		d->confidence++;
325	}
326}
327
328static bool powerclamp_adjust_controls(unsigned int target_ratio,
329				unsigned int guard, unsigned int win)
330{
331	static u64 msr_last, tsc_last;
332	u64 msr_now, tsc_now;
333	u64 val64;
334
335	/* check result for the last window */
336	msr_now = pkg_state_counter();
337	tsc_now = rdtsc();
338
339	/* calculate pkg cstate vs tsc ratio */
340	if (!msr_last || !tsc_last)
341		current_ratio = 1;
342	else if (tsc_now-tsc_last) {
343		val64 = 100*(msr_now-msr_last);
344		do_div(val64, (tsc_now-tsc_last));
345		current_ratio = val64;
346	}
347
348	/* update record */
349	msr_last = msr_now;
350	tsc_last = tsc_now;
351
352	adjust_compensation(target_ratio, win);
353	/*
354	 * too many external interrupts, set flag such
355	 * that we can take measure later.
356	 */
357	reduce_irq = atomic_read(&idle_wakeup_counter) >=
358		2 * win * num_online_cpus();
359
360	atomic_set(&idle_wakeup_counter, 0);
361	/* if we are above target+guard, skip */
362	return set_target_ratio + guard <= current_ratio;
363}
364
365static void clamp_balancing_func(struct kthread_work *work)
366{
367	struct powerclamp_worker_data *w_data;
368	int sleeptime;
369	unsigned long target_jiffies;
370	unsigned int compensated_ratio;
371	int interval; /* jiffies to sleep for each attempt */
372
373	w_data = container_of(work, struct powerclamp_worker_data,
374			      balancing_work);
375
376	/*
377	 * make sure user selected ratio does not take effect until
378	 * the next round. adjust target_ratio if user has changed
379	 * target such that we can converge quickly.
380	 */
381	w_data->target_ratio = READ_ONCE(set_target_ratio);
382	w_data->guard = 1 + w_data->target_ratio / 20;
383	w_data->window_size_now = window_size;
384	w_data->duration_jiffies = msecs_to_jiffies(duration);
385	w_data->count++;
386
387	/*
388	 * systems may have different ability to enter package level
389	 * c-states, thus we need to compensate the injected idle ratio
390	 * to achieve the actual target reported by the HW.
391	 */
392	compensated_ratio = w_data->target_ratio +
393		get_compensation(w_data->target_ratio);
394	if (compensated_ratio <= 0)
395		compensated_ratio = 1;
396	interval = w_data->duration_jiffies * 100 / compensated_ratio;
397
398	/* align idle time */
399	target_jiffies = roundup(jiffies, interval);
400	sleeptime = target_jiffies - jiffies;
401	if (sleeptime <= 0)
402		sleeptime = 1;
403
404	if (clamping && w_data->clamping && cpu_online(w_data->cpu))
405		kthread_queue_delayed_work(w_data->worker,
406					   &w_data->idle_injection_work,
407					   sleeptime);
408}
409
410static void clamp_idle_injection_func(struct kthread_work *work)
411{
412	struct powerclamp_worker_data *w_data;
413
414	w_data = container_of(work, struct powerclamp_worker_data,
415			      idle_injection_work.work);
416
417	/*
418	 * only elected controlling cpu can collect stats and update
419	 * control parameters.
420	 */
421	if (w_data->cpu == control_cpu &&
422	    !(w_data->count % w_data->window_size_now)) {
423		should_skip =
424			powerclamp_adjust_controls(w_data->target_ratio,
425						   w_data->guard,
426						   w_data->window_size_now);
427		smp_mb();
428	}
429
430	if (should_skip)
431		goto balance;
432
433	play_idle(jiffies_to_usecs(w_data->duration_jiffies));
434
435balance:
436	if (clamping && w_data->clamping && cpu_online(w_data->cpu))
437		kthread_queue_work(w_data->worker, &w_data->balancing_work);
438}
439
440/*
441 * 1 HZ polling while clamping is active, useful for userspace
442 * to monitor actual idle ratio.
443 */
444static void poll_pkg_cstate(struct work_struct *dummy);
445static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
446static void poll_pkg_cstate(struct work_struct *dummy)
447{
448	static u64 msr_last;
449	static u64 tsc_last;
450
451	u64 msr_now;
452	u64 tsc_now;
453	u64 val64;
454
455	msr_now = pkg_state_counter();
456	tsc_now = rdtsc();
457
458	/* calculate pkg cstate vs tsc ratio */
459	if (!msr_last || !tsc_last)
460		pkg_cstate_ratio_cur = 1;
461	else {
462		if (tsc_now - tsc_last) {
463			val64 = 100 * (msr_now - msr_last);
464			do_div(val64, (tsc_now - tsc_last));
465			pkg_cstate_ratio_cur = val64;
466		}
467	}
468
469	/* update record */
470	msr_last = msr_now;
471	tsc_last = tsc_now;
472
473	if (true == clamping)
474		schedule_delayed_work(&poll_pkg_cstate_work, HZ);
475}
476
477static void start_power_clamp_worker(unsigned long cpu)
478{
479	struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
480	struct kthread_worker *worker;
481
482	worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inj/%ld", cpu);
483	if (IS_ERR(worker))
484		return;
485
486	w_data->worker = worker;
487	w_data->count = 0;
488	w_data->cpu = cpu;
489	w_data->clamping = true;
490	set_bit(cpu, cpu_clamping_mask);
491	sched_setscheduler(worker->task, SCHED_FIFO, &sparam);
492	kthread_init_work(&w_data->balancing_work, clamp_balancing_func);
493	kthread_init_delayed_work(&w_data->idle_injection_work,
494				  clamp_idle_injection_func);
495	kthread_queue_work(w_data->worker, &w_data->balancing_work);
496}
497
498static void stop_power_clamp_worker(unsigned long cpu)
499{
500	struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
501
502	if (!w_data->worker)
503		return;
504
505	w_data->clamping = false;
506	/*
507	 * Make sure that all works that get queued after this point see
508	 * the clamping disabled. The counter part is not needed because
509	 * there is an implicit memory barrier when the queued work
510	 * is proceed.
511	 */
512	smp_wmb();
513	kthread_cancel_work_sync(&w_data->balancing_work);
514	kthread_cancel_delayed_work_sync(&w_data->idle_injection_work);
515	/*
516	 * The balancing work still might be queued here because
517	 * the handling of the "clapming" variable, cancel, and queue
518	 * operations are not synchronized via a lock. But it is not
519	 * a big deal. The balancing work is fast and destroy kthread
520	 * will wait for it.
521	 */
522	clear_bit(w_data->cpu, cpu_clamping_mask);
523	kthread_destroy_worker(w_data->worker);
524
525	w_data->worker = NULL;
526}
527
528static int start_power_clamp(void)
529{
530	unsigned long cpu;
531
532	set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
533	/* prevent cpu hotplug */
534	get_online_cpus();
535
536	/* prefer BSP */
537	control_cpu = 0;
538	if (!cpu_online(control_cpu))
539		control_cpu = smp_processor_id();
540
541	clamping = true;
542	schedule_delayed_work(&poll_pkg_cstate_work, 0);
543
544	/* start one kthread worker per online cpu */
545	for_each_online_cpu(cpu) {
546		start_power_clamp_worker(cpu);
547	}
548	put_online_cpus();
549
550	return 0;
551}
552
553static void end_power_clamp(void)
554{
555	int i;
556
557	/*
558	 * Block requeuing in all the kthread workers. They will flush and
559	 * stop faster.
560	 */
561	clamping = false;
562	if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
563		for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
564			pr_debug("clamping worker for cpu %d alive, destroy\n",
565				 i);
566			stop_power_clamp_worker(i);
567		}
568	}
569}
570
571static int powerclamp_cpu_online(unsigned int cpu)
572{
573	if (clamping == false)
574		return 0;
575	start_power_clamp_worker(cpu);
576	/* prefer BSP as controlling CPU */
577	if (cpu == 0) {
578		control_cpu = 0;
579		smp_mb();
580	}
581	return 0;
582}
583
584static int powerclamp_cpu_predown(unsigned int cpu)
585{
586	if (clamping == false)
587		return 0;
588
589	stop_power_clamp_worker(cpu);
590	if (cpu != control_cpu)
591		return 0;
592
593	control_cpu = cpumask_first(cpu_online_mask);
594	if (control_cpu == cpu)
595		control_cpu = cpumask_next(cpu, cpu_online_mask);
596	smp_mb();
597	return 0;
598}
599
600static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
601				 unsigned long *state)
602{
603	*state = MAX_TARGET_RATIO;
604
605	return 0;
606}
607
608static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
609				 unsigned long *state)
610{
611	if (true == clamping)
612		*state = pkg_cstate_ratio_cur;
613	else
614		/* to save power, do not poll idle ratio while not clamping */
615		*state = -1; /* indicates invalid state */
616
617	return 0;
618}
619
620static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
621				 unsigned long new_target_ratio)
622{
623	int ret = 0;
624
625	new_target_ratio = clamp(new_target_ratio, 0UL,
626				(unsigned long) (MAX_TARGET_RATIO-1));
627	if (set_target_ratio == 0 && new_target_ratio > 0) {
628		pr_info("Start idle injection to reduce power\n");
629		set_target_ratio = new_target_ratio;
630		ret = start_power_clamp();
631		goto exit_set;
632	} else	if (set_target_ratio > 0 && new_target_ratio == 0) {
633		pr_info("Stop forced idle injection\n");
634		end_power_clamp();
635		set_target_ratio = 0;
636	} else	/* adjust currently running */ {
637		set_target_ratio = new_target_ratio;
638		/* make new set_target_ratio visible to other cpus */
639		smp_mb();
640	}
641
642exit_set:
643	return ret;
644}
645
646/* bind to generic thermal layer as cooling device*/
647static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
648	.get_max_state = powerclamp_get_max_state,
649	.get_cur_state = powerclamp_get_cur_state,
650	.set_cur_state = powerclamp_set_cur_state,
651};
652
653static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
654	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_MWAIT },
655	{}
656};
657MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
658
659static int __init powerclamp_probe(void)
660{
661
662	if (!x86_match_cpu(intel_powerclamp_ids)) {
663		pr_err("CPU does not support MWAIT\n");
664		return -ENODEV;
665	}
666
667	/* The goal for idle time alignment is to achieve package cstate. */
668	if (!has_pkg_state_counter()) {
669		pr_info("No package C-state available\n");
670		return -ENODEV;
671	}
672
673	/* find the deepest mwait value */
674	find_target_mwait();
675
676	return 0;
677}
678
679static int powerclamp_debug_show(struct seq_file *m, void *unused)
680{
681	int i = 0;
682
683	seq_printf(m, "controlling cpu: %d\n", control_cpu);
684	seq_printf(m, "pct confidence steady dynamic (compensation)\n");
685	for (i = 0; i < MAX_TARGET_RATIO; i++) {
686		seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
687			i,
688			cal_data[i].confidence,
689			cal_data[i].steady_comp,
690			cal_data[i].dynamic_comp);
691	}
692
693	return 0;
694}
695
696DEFINE_SHOW_ATTRIBUTE(powerclamp_debug);
697
698static inline void powerclamp_create_debug_files(void)
699{
700	debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
701
702	debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, cal_data,
703			    &powerclamp_debug_fops);
704}
705
706static enum cpuhp_state hp_state;
707
708static int __init powerclamp_init(void)
709{
710	int retval;
711	int bitmap_size;
712
713	bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long);
714	cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL);
715	if (!cpu_clamping_mask)
716		return -ENOMEM;
717
718	/* probe cpu features and ids here */
719	retval = powerclamp_probe();
720	if (retval)
721		goto exit_free;
722
723	/* set default limit, maybe adjusted during runtime based on feedback */
724	window_size = 2;
725	retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
726					   "thermal/intel_powerclamp:online",
727					   powerclamp_cpu_online,
728					   powerclamp_cpu_predown);
729	if (retval < 0)
730		goto exit_free;
731
732	hp_state = retval;
733
734	worker_data = alloc_percpu(struct powerclamp_worker_data);
735	if (!worker_data) {
736		retval = -ENOMEM;
737		goto exit_unregister;
738	}
739
740	cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
741						&powerclamp_cooling_ops);
742	if (IS_ERR(cooling_dev)) {
743		retval = -ENODEV;
744		goto exit_free_thread;
745	}
746
747	if (!duration)
748		duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
749
750	powerclamp_create_debug_files();
751
752	return 0;
753
754exit_free_thread:
755	free_percpu(worker_data);
756exit_unregister:
757	cpuhp_remove_state_nocalls(hp_state);
758exit_free:
759	kfree(cpu_clamping_mask);
760	return retval;
761}
762module_init(powerclamp_init);
763
764static void __exit powerclamp_exit(void)
765{
766	end_power_clamp();
767	cpuhp_remove_state_nocalls(hp_state);
768	free_percpu(worker_data);
769	thermal_cooling_device_unregister(cooling_dev);
770	kfree(cpu_clamping_mask);
771
772	cancel_delayed_work_sync(&poll_pkg_cstate_work);
773	debugfs_remove_recursive(debug_dir);
774}
775module_exit(powerclamp_exit);
776
777MODULE_LICENSE("GPL");
778MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
779MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
780MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");