Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.15.
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * x86_pkg_temp_thermal driver
  4 * Copyright (c) 2013, Intel Corporation.
  5 */
  6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  7
  8#include <linux/module.h>
  9#include <linux/init.h>
 10#include <linux/intel_tcc.h>
 11#include <linux/err.h>
 12#include <linux/param.h>
 13#include <linux/device.h>
 14#include <linux/platform_device.h>
 15#include <linux/cpu.h>
 16#include <linux/smp.h>
 17#include <linux/slab.h>
 18#include <linux/pm.h>
 19#include <linux/thermal.h>
 20#include <linux/debugfs.h>
 21
 22#include <asm/cpu_device_id.h>
 23
 24#include "thermal_interrupt.h"
 25
 26/*
 27* Rate control delay: Idea is to introduce denounce effect
 28* This should be long enough to avoid reduce events, when
 29* threshold is set to a temperature, which is constantly
 30* violated, but at the short enough to take any action.
 31* The action can be remove threshold or change it to next
 32* interesting setting. Based on experiments, in around
 33* every 5 seconds under load will give us a significant
 34* temperature change.
 35*/
 36#define PKG_TEMP_THERMAL_NOTIFY_DELAY	5000
 37static int notify_delay_ms = PKG_TEMP_THERMAL_NOTIFY_DELAY;
 38module_param(notify_delay_ms, int, 0644);
 39MODULE_PARM_DESC(notify_delay_ms,
 40	"User space notification delay in milli seconds.");
 41
 42/* Number of trip points in thermal zone. Currently it can't
 43* be more than 2. MSR can allow setting and getting notifications
 44* for only 2 thresholds. This define enforces this, if there
 45* is some wrong values returned by cpuid for number of thresholds.
 46*/
 47#define MAX_NUMBER_OF_TRIPS	2
 48
 49struct zone_device {
 50	int				cpu;
 51	bool				work_scheduled;
 52	u32				msr_pkg_therm_low;
 53	u32				msr_pkg_therm_high;
 54	struct delayed_work		work;
 55	struct thermal_zone_device	*tzone;
 56	struct cpumask			cpumask;
 57};
 58
 59static struct thermal_zone_params pkg_temp_tz_params = {
 60	.no_hwmon	= true,
 61};
 62
 63/* Keep track of how many zone pointers we allocated in init() */
 64static int max_id __read_mostly;
 65/* Array of zone pointers */
 66static struct zone_device **zones;
 67/* Serializes interrupt notification, work and hotplug */
 68static DEFINE_RAW_SPINLOCK(pkg_temp_lock);
 69/* Protects zone operation in the work function against hotplug removal */
 70static DEFINE_MUTEX(thermal_zone_mutex);
 71
 72/* The dynamically assigned cpu hotplug state for module_exit() */
 73static enum cpuhp_state pkg_thermal_hp_state __read_mostly;
 74
 75/* Debug counters to show using debugfs */
 76static struct dentry *debugfs;
 77static unsigned int pkg_interrupt_cnt;
 78static unsigned int pkg_work_cnt;
 79
 80static void pkg_temp_debugfs_init(void)
 81{
 82	debugfs = debugfs_create_dir("pkg_temp_thermal", NULL);
 83
 84	debugfs_create_u32("pkg_thres_interrupt", S_IRUGO, debugfs,
 85			   &pkg_interrupt_cnt);
 86	debugfs_create_u32("pkg_thres_work", S_IRUGO, debugfs,
 87			   &pkg_work_cnt);
 88}
 89
 90/*
 91 * Protection:
 92 *
 93 * - cpu hotplug: Read serialized by cpu hotplug lock
 94 *		  Write must hold pkg_temp_lock
 95 *
 96 * - Other callsites: Must hold pkg_temp_lock
 97 */
 98static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
 99{
100	int id = topology_logical_die_id(cpu);
101
102	if (id >= 0 && id < max_id)
103		return zones[id];
104	return NULL;
105}
106
107static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
108{
109	struct zone_device *zonedev = thermal_zone_device_priv(tzd);
110	int val, ret;
111
112	ret = intel_tcc_get_temp(zonedev->cpu, &val, true);
113	if (ret < 0)
114		return ret;
115
116	*temp = val * 1000;
117	pr_debug("sys_get_curr_temp %d\n", *temp);
118	return 0;
119}
120
121static int
122sys_set_trip_temp(struct thermal_zone_device *tzd,
123		  const struct thermal_trip *trip, int temp)
124{
125	struct zone_device *zonedev = thermal_zone_device_priv(tzd);
126	unsigned int trip_index = THERMAL_TRIP_PRIV_TO_INT(trip->priv);
127	u32 l, h, mask, shift, intr;
128	int tj_max, val, ret;
129
130	tj_max = intel_tcc_get_tjmax(zonedev->cpu);
131	if (tj_max < 0)
132		return tj_max;
133	tj_max *= 1000;
134
135	val = (tj_max - temp)/1000;
136
137	if (trip_index >= MAX_NUMBER_OF_TRIPS || val < 0 || val > 0x7f)
138		return -EINVAL;
139
140	ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
141			   &l, &h);
142	if (ret < 0)
143		return ret;
144
145	if (trip_index) {
146		mask = THERM_MASK_THRESHOLD1;
147		shift = THERM_SHIFT_THRESHOLD1;
148		intr = THERM_INT_THRESHOLD1_ENABLE;
149	} else {
150		mask = THERM_MASK_THRESHOLD0;
151		shift = THERM_SHIFT_THRESHOLD0;
152		intr = THERM_INT_THRESHOLD0_ENABLE;
153	}
154	l &= ~mask;
155	/*
156	* When users space sets a trip temperature == 0, which is indication
157	* that, it is no longer interested in receiving notifications.
158	*/
159	if (!temp) {
160		l &= ~intr;
161	} else {
162		l |= val << shift;
163		l |= intr;
164	}
165
166	return wrmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
167			l, h);
168}
169
170/* Thermal zone callback registry */
171static const struct thermal_zone_device_ops tzone_ops = {
172	.get_temp = sys_get_curr_temp,
173	.set_trip_temp = sys_set_trip_temp,
174};
175
176static bool pkg_thermal_rate_control(void)
177{
178	return true;
179}
180
181/* Enable threshold interrupt on local package/cpu */
182static inline void enable_pkg_thres_interrupt(void)
183{
184	u8 thres_0, thres_1;
185	u32 l, h;
186
187	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
188	/* only enable/disable if it had valid threshold value */
189	thres_0 = (l & THERM_MASK_THRESHOLD0) >> THERM_SHIFT_THRESHOLD0;
190	thres_1 = (l & THERM_MASK_THRESHOLD1) >> THERM_SHIFT_THRESHOLD1;
191	if (thres_0)
192		l |= THERM_INT_THRESHOLD0_ENABLE;
193	if (thres_1)
194		l |= THERM_INT_THRESHOLD1_ENABLE;
195	wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
196}
197
198/* Disable threshold interrupt on local package/cpu */
199static inline void disable_pkg_thres_interrupt(void)
200{
201	u32 l, h;
202
203	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
204
205	l &= ~(THERM_INT_THRESHOLD0_ENABLE | THERM_INT_THRESHOLD1_ENABLE);
206	wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
207}
208
209static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
210{
211	struct thermal_zone_device *tzone = NULL;
212	int cpu = smp_processor_id();
213	struct zone_device *zonedev;
214
215	mutex_lock(&thermal_zone_mutex);
216	raw_spin_lock_irq(&pkg_temp_lock);
217	++pkg_work_cnt;
218
219	zonedev = pkg_temp_thermal_get_dev(cpu);
220	if (!zonedev) {
221		raw_spin_unlock_irq(&pkg_temp_lock);
222		mutex_unlock(&thermal_zone_mutex);
223		return;
224	}
225	zonedev->work_scheduled = false;
226
227	thermal_clear_package_intr_status(PACKAGE_LEVEL, THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
228	tzone = zonedev->tzone;
229
230	enable_pkg_thres_interrupt();
231	raw_spin_unlock_irq(&pkg_temp_lock);
232
233	/*
234	 * If tzone is not NULL, then thermal_zone_mutex will prevent the
235	 * concurrent removal in the cpu offline callback.
236	 */
237	if (tzone)
238		thermal_zone_device_update(tzone, THERMAL_EVENT_UNSPECIFIED);
239
240	mutex_unlock(&thermal_zone_mutex);
241}
242
243static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
244{
245	unsigned long ms = msecs_to_jiffies(notify_delay_ms);
246
247	schedule_delayed_work_on(cpu, work, ms);
248}
249
250static int pkg_thermal_notify(u64 msr_val)
251{
252	int cpu = smp_processor_id();
253	struct zone_device *zonedev;
254	unsigned long flags;
255
256	raw_spin_lock_irqsave(&pkg_temp_lock, flags);
257	++pkg_interrupt_cnt;
258
259	disable_pkg_thres_interrupt();
260
261	/* Work is per package, so scheduling it once is enough. */
262	zonedev = pkg_temp_thermal_get_dev(cpu);
263	if (zonedev && !zonedev->work_scheduled) {
264		zonedev->work_scheduled = true;
265		pkg_thermal_schedule_work(zonedev->cpu, &zonedev->work);
266	}
267
268	raw_spin_unlock_irqrestore(&pkg_temp_lock, flags);
269	return 0;
270}
271
272static int pkg_temp_thermal_trips_init(int cpu, int tj_max,
273				       struct thermal_trip *trips, int num_trips)
274{
275	unsigned long thres_reg_value;
276	u32 mask, shift, eax, edx;
277	int ret, i;
278
279	for (i = 0; i < num_trips; i++) {
280
281		if (i) {
282			mask = THERM_MASK_THRESHOLD1;
283			shift = THERM_SHIFT_THRESHOLD1;
284		} else {
285			mask = THERM_MASK_THRESHOLD0;
286			shift = THERM_SHIFT_THRESHOLD0;
287		}
288
289		ret = rdmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
290				   &eax, &edx);
291		if (ret < 0)
292			return ret;
293
294		thres_reg_value = (eax & mask) >> shift;
295
296		trips[i].temperature = thres_reg_value ?
297			tj_max - thres_reg_value * 1000 : THERMAL_TEMP_INVALID;
298
299		trips[i].type = THERMAL_TRIP_PASSIVE;
300		trips[i].flags |= THERMAL_TRIP_FLAG_RW_TEMP;
301		trips[i].priv = THERMAL_INT_TO_TRIP_PRIV(i);
302
303		pr_debug("%s: cpu=%d, trip=%d, temp=%d\n",
304			 __func__, cpu, i, trips[i].temperature);
305	}
306
307	return 0;
308}
309
310static int pkg_temp_thermal_device_add(unsigned int cpu)
311{
312	struct thermal_trip trips[MAX_NUMBER_OF_TRIPS] = { 0 };
313	int id = topology_logical_die_id(cpu);
314	u32 eax, ebx, ecx, edx;
315	struct zone_device *zonedev;
316	int thres_count, err;
317	int tj_max;
318
319	if (id >= max_id)
320		return -ENOMEM;
321
322	cpuid(6, &eax, &ebx, &ecx, &edx);
323	thres_count = ebx & 0x07;
324	if (!thres_count)
325		return -ENODEV;
326
327	thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
328
329	tj_max = intel_tcc_get_tjmax(cpu);
330	if (tj_max < 0)
331		return tj_max;
332
333	zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL);
334	if (!zonedev)
335		return -ENOMEM;
336
337	err = pkg_temp_thermal_trips_init(cpu, tj_max, trips, thres_count);
338	if (err)
339		goto out_kfree_zonedev;
340
341	INIT_DELAYED_WORK(&zonedev->work, pkg_temp_thermal_threshold_work_fn);
342	zonedev->cpu = cpu;
343	zonedev->tzone = thermal_zone_device_register_with_trips("x86_pkg_temp",
344			trips, thres_count,
345			zonedev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
346	if (IS_ERR(zonedev->tzone)) {
347		err = PTR_ERR(zonedev->tzone);
348		goto out_kfree_zonedev;
349	}
350	err = thermal_zone_device_enable(zonedev->tzone);
351	if (err)
352		goto out_unregister_tz;
353
354	/* Store MSR value for package thermal interrupt, to restore at exit */
355	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, zonedev->msr_pkg_therm_low,
356	      zonedev->msr_pkg_therm_high);
357
358	cpumask_set_cpu(cpu, &zonedev->cpumask);
359	raw_spin_lock_irq(&pkg_temp_lock);
360	zones[id] = zonedev;
361	raw_spin_unlock_irq(&pkg_temp_lock);
362
363	return 0;
364
365out_unregister_tz:
366	thermal_zone_device_unregister(zonedev->tzone);
367out_kfree_zonedev:
368	kfree(zonedev);
369	return err;
370}
371
372static int pkg_thermal_cpu_offline(unsigned int cpu)
373{
374	struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
375	bool lastcpu, was_target;
376	int target;
377
378	if (!zonedev)
379		return 0;
380
381	target = cpumask_any_but(&zonedev->cpumask, cpu);
382	cpumask_clear_cpu(cpu, &zonedev->cpumask);
383	lastcpu = target >= nr_cpu_ids;
384	/*
385	 * Remove the sysfs files, if this is the last cpu in the package
386	 * before doing further cleanups.
387	 */
388	if (lastcpu) {
389		struct thermal_zone_device *tzone = zonedev->tzone;
390
391		/*
392		 * We must protect against a work function calling
393		 * thermal_zone_update, after/while unregister. We null out
394		 * the pointer under the zone mutex, so the worker function
395		 * won't try to call.
396		 */
397		mutex_lock(&thermal_zone_mutex);
398		zonedev->tzone = NULL;
399		mutex_unlock(&thermal_zone_mutex);
400
401		thermal_zone_device_unregister(tzone);
402	}
403
404	/* Protect against work and interrupts */
405	raw_spin_lock_irq(&pkg_temp_lock);
406
407	/*
408	 * Check whether this cpu was the current target and store the new
409	 * one. When we drop the lock, then the interrupt notify function
410	 * will see the new target.
411	 */
412	was_target = zonedev->cpu == cpu;
413	zonedev->cpu = target;
414
415	/*
416	 * If this is the last CPU in the package remove the package
417	 * reference from the array and restore the interrupt MSR. When we
418	 * drop the lock neither the interrupt notify function nor the
419	 * worker will see the package anymore.
420	 */
421	if (lastcpu) {
422		zones[topology_logical_die_id(cpu)] = NULL;
423		/* After this point nothing touches the MSR anymore. */
424		wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
425		      zonedev->msr_pkg_therm_low, zonedev->msr_pkg_therm_high);
426	}
427
428	/*
429	 * Check whether there is work scheduled and whether the work is
430	 * targeted at the outgoing CPU.
431	 */
432	if (zonedev->work_scheduled && was_target) {
433		/*
434		 * To cancel the work we need to drop the lock, otherwise
435		 * we might deadlock if the work needs to be flushed.
436		 */
437		raw_spin_unlock_irq(&pkg_temp_lock);
438		cancel_delayed_work_sync(&zonedev->work);
439		raw_spin_lock_irq(&pkg_temp_lock);
440		/*
441		 * If this is not the last cpu in the package and the work
442		 * did not run after we dropped the lock above, then we
443		 * need to reschedule the work, otherwise the interrupt
444		 * stays disabled forever.
445		 */
446		if (!lastcpu && zonedev->work_scheduled)
447			pkg_thermal_schedule_work(target, &zonedev->work);
448	}
449
450	raw_spin_unlock_irq(&pkg_temp_lock);
451
452	/* Final cleanup if this is the last cpu */
453	if (lastcpu)
454		kfree(zonedev);
455
456	return 0;
457}
458
459static int pkg_thermal_cpu_online(unsigned int cpu)
460{
461	struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
462	struct cpuinfo_x86 *c = &cpu_data(cpu);
463
464	/* Paranoia check */
465	if (!cpu_has(c, X86_FEATURE_DTHERM) || !cpu_has(c, X86_FEATURE_PTS))
466		return -ENODEV;
467
468	/* If the package exists, nothing to do */
469	if (zonedev) {
470		cpumask_set_cpu(cpu, &zonedev->cpumask);
471		return 0;
472	}
473	return pkg_temp_thermal_device_add(cpu);
474}
475
476static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = {
477	X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_PTS, NULL),
478	{}
479};
480MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);
481
482static int __init pkg_temp_thermal_init(void)
483{
484	int ret;
485
486	if (!x86_match_cpu(pkg_temp_thermal_ids))
487		return -ENODEV;
488
489	max_id = topology_max_packages() * topology_max_dies_per_package();
490	zones = kcalloc(max_id, sizeof(struct zone_device *),
491			   GFP_KERNEL);
492	if (!zones)
493		return -ENOMEM;
494
495	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online",
496				pkg_thermal_cpu_online,	pkg_thermal_cpu_offline);
497	if (ret < 0)
498		goto err;
499
500	/* Store the state for module exit */
501	pkg_thermal_hp_state = ret;
502
503	platform_thermal_package_notify = pkg_thermal_notify;
504	platform_thermal_package_rate_control = pkg_thermal_rate_control;
505
506	 /* Don't care if it fails */
507	pkg_temp_debugfs_init();
508	return 0;
509
510err:
511	kfree(zones);
512	return ret;
513}
514module_init(pkg_temp_thermal_init)
515
516static void __exit pkg_temp_thermal_exit(void)
517{
518	platform_thermal_package_notify = NULL;
519	platform_thermal_package_rate_control = NULL;
520
521	cpuhp_remove_state(pkg_thermal_hp_state);
522	debugfs_remove_recursive(debugfs);
523	kfree(zones);
524}
525module_exit(pkg_temp_thermal_exit)
526
527MODULE_IMPORT_NS("INTEL_TCC");
528MODULE_DESCRIPTION("X86 PKG TEMP Thermal Driver");
529MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
530MODULE_LICENSE("GPL v2");