Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * x86_pkg_temp_thermal driver
  4 * Copyright (c) 2013, Intel Corporation.
  5 */
  6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  7
  8#include <linux/module.h>
  9#include <linux/init.h>
 10#include <linux/intel_tcc.h>
 11#include <linux/err.h>
 12#include <linux/param.h>
 13#include <linux/device.h>
 14#include <linux/platform_device.h>
 15#include <linux/cpu.h>
 16#include <linux/smp.h>
 17#include <linux/slab.h>
 18#include <linux/pm.h>
 19#include <linux/thermal.h>
 20#include <linux/debugfs.h>
 21
 22#include <asm/cpu_device_id.h>
 23
 24#include "thermal_interrupt.h"
 25
 26/*
 27* Rate control delay: Idea is to introduce denounce effect
 28* This should be long enough to avoid reduce events, when
 29* threshold is set to a temperature, which is constantly
 30* violated, but at the short enough to take any action.
 31* The action can be remove threshold or change it to next
 32* interesting setting. Based on experiments, in around
 33* every 5 seconds under load will give us a significant
 34* temperature change.
 35*/
 36#define PKG_TEMP_THERMAL_NOTIFY_DELAY	5000
 37static int notify_delay_ms = PKG_TEMP_THERMAL_NOTIFY_DELAY;
 38module_param(notify_delay_ms, int, 0644);
 39MODULE_PARM_DESC(notify_delay_ms,
 40	"User space notification delay in milli seconds.");
 41
 42/* Number of trip points in thermal zone. Currently it can't
 43* be more than 2. MSR can allow setting and getting notifications
 44* for only 2 thresholds. This define enforces this, if there
 45* is some wrong values returned by cpuid for number of thresholds.
 46*/
 47#define MAX_NUMBER_OF_TRIPS	2
 48
 49struct zone_device {
 50	int				cpu;
 51	bool				work_scheduled;
 52	u32				msr_pkg_therm_low;
 53	u32				msr_pkg_therm_high;
 54	struct delayed_work		work;
 55	struct thermal_zone_device	*tzone;
 56	struct thermal_trip		*trips;
 57	struct cpumask			cpumask;
 58};
 59
 60static struct thermal_zone_params pkg_temp_tz_params = {
 61	.no_hwmon	= true,
 62};
 63
 64/* Keep track of how many zone pointers we allocated in init() */
 65static int max_id __read_mostly;
 66/* Array of zone pointers */
 67static struct zone_device **zones;
 68/* Serializes interrupt notification, work and hotplug */
 69static DEFINE_RAW_SPINLOCK(pkg_temp_lock);
 70/* Protects zone operation in the work function against hotplug removal */
 71static DEFINE_MUTEX(thermal_zone_mutex);
 72
 73/* The dynamically assigned cpu hotplug state for module_exit() */
 74static enum cpuhp_state pkg_thermal_hp_state __read_mostly;
 75
 76/* Debug counters to show using debugfs */
 77static struct dentry *debugfs;
 78static unsigned int pkg_interrupt_cnt;
 79static unsigned int pkg_work_cnt;
 80
 81static void pkg_temp_debugfs_init(void)
 82{
 83	debugfs = debugfs_create_dir("pkg_temp_thermal", NULL);
 84
 85	debugfs_create_u32("pkg_thres_interrupt", S_IRUGO, debugfs,
 86			   &pkg_interrupt_cnt);
 87	debugfs_create_u32("pkg_thres_work", S_IRUGO, debugfs,
 88			   &pkg_work_cnt);
 89}
 90
 91/*
 92 * Protection:
 93 *
 94 * - cpu hotplug: Read serialized by cpu hotplug lock
 95 *		  Write must hold pkg_temp_lock
 96 *
 97 * - Other callsites: Must hold pkg_temp_lock
 98 */
 99static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
100{
101	int id = topology_logical_die_id(cpu);
102
103	if (id >= 0 && id < max_id)
104		return zones[id];
105	return NULL;
106}
107
108static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
109{
110	struct zone_device *zonedev = thermal_zone_device_priv(tzd);
111	int val;
112
113	val = intel_tcc_get_temp(zonedev->cpu, true);
114	if (val < 0)
115		return val;
116
117	*temp = val * 1000;
118	pr_debug("sys_get_curr_temp %d\n", *temp);
119	return 0;
120}
121
122static int
123sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
124{
125	struct zone_device *zonedev = thermal_zone_device_priv(tzd);
126	u32 l, h, mask, shift, intr;
127	int tj_max, val, ret;
128
129	tj_max = intel_tcc_get_tjmax(zonedev->cpu);
130	if (tj_max < 0)
131		return tj_max;
132	tj_max *= 1000;
133
134	val = (tj_max - temp)/1000;
135
136	if (trip >= MAX_NUMBER_OF_TRIPS || val < 0 || val > 0x7f)
137		return -EINVAL;
138
139	ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
140			   &l, &h);
141	if (ret < 0)
142		return ret;
143
144	if (trip) {
145		mask = THERM_MASK_THRESHOLD1;
146		shift = THERM_SHIFT_THRESHOLD1;
147		intr = THERM_INT_THRESHOLD1_ENABLE;
148	} else {
149		mask = THERM_MASK_THRESHOLD0;
150		shift = THERM_SHIFT_THRESHOLD0;
151		intr = THERM_INT_THRESHOLD0_ENABLE;
152	}
153	l &= ~mask;
154	/*
155	* When users space sets a trip temperature == 0, which is indication
156	* that, it is no longer interested in receiving notifications.
157	*/
158	if (!temp) {
159		l &= ~intr;
160	} else {
161		l |= val << shift;
162		l |= intr;
163	}
164
165	return wrmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
166			l, h);
167}
168
169/* Thermal zone callback registry */
170static struct thermal_zone_device_ops tzone_ops = {
171	.get_temp = sys_get_curr_temp,
172	.set_trip_temp = sys_set_trip_temp,
173};
174
175static bool pkg_thermal_rate_control(void)
176{
177	return true;
178}
179
180/* Enable threshold interrupt on local package/cpu */
181static inline void enable_pkg_thres_interrupt(void)
182{
183	u8 thres_0, thres_1;
184	u32 l, h;
185
186	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
187	/* only enable/disable if it had valid threshold value */
188	thres_0 = (l & THERM_MASK_THRESHOLD0) >> THERM_SHIFT_THRESHOLD0;
189	thres_1 = (l & THERM_MASK_THRESHOLD1) >> THERM_SHIFT_THRESHOLD1;
190	if (thres_0)
191		l |= THERM_INT_THRESHOLD0_ENABLE;
192	if (thres_1)
193		l |= THERM_INT_THRESHOLD1_ENABLE;
194	wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
195}
196
197/* Disable threshold interrupt on local package/cpu */
198static inline void disable_pkg_thres_interrupt(void)
199{
200	u32 l, h;
201
202	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
203
204	l &= ~(THERM_INT_THRESHOLD0_ENABLE | THERM_INT_THRESHOLD1_ENABLE);
205	wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
206}
207
208static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
209{
210	struct thermal_zone_device *tzone = NULL;
211	int cpu = smp_processor_id();
212	struct zone_device *zonedev;
213
214	mutex_lock(&thermal_zone_mutex);
215	raw_spin_lock_irq(&pkg_temp_lock);
216	++pkg_work_cnt;
217
218	zonedev = pkg_temp_thermal_get_dev(cpu);
219	if (!zonedev) {
220		raw_spin_unlock_irq(&pkg_temp_lock);
221		mutex_unlock(&thermal_zone_mutex);
222		return;
223	}
224	zonedev->work_scheduled = false;
225
226	thermal_clear_package_intr_status(PACKAGE_LEVEL, THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
227	tzone = zonedev->tzone;
228
229	enable_pkg_thres_interrupt();
230	raw_spin_unlock_irq(&pkg_temp_lock);
231
232	/*
233	 * If tzone is not NULL, then thermal_zone_mutex will prevent the
234	 * concurrent removal in the cpu offline callback.
235	 */
236	if (tzone)
237		thermal_zone_device_update(tzone, THERMAL_EVENT_UNSPECIFIED);
238
239	mutex_unlock(&thermal_zone_mutex);
240}
241
242static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
243{
244	unsigned long ms = msecs_to_jiffies(notify_delay_ms);
245
246	schedule_delayed_work_on(cpu, work, ms);
247}
248
249static int pkg_thermal_notify(u64 msr_val)
250{
251	int cpu = smp_processor_id();
252	struct zone_device *zonedev;
253	unsigned long flags;
254
255	raw_spin_lock_irqsave(&pkg_temp_lock, flags);
256	++pkg_interrupt_cnt;
257
258	disable_pkg_thres_interrupt();
259
260	/* Work is per package, so scheduling it once is enough. */
261	zonedev = pkg_temp_thermal_get_dev(cpu);
262	if (zonedev && !zonedev->work_scheduled) {
263		zonedev->work_scheduled = true;
264		pkg_thermal_schedule_work(zonedev->cpu, &zonedev->work);
265	}
266
267	raw_spin_unlock_irqrestore(&pkg_temp_lock, flags);
268	return 0;
269}
270
271static struct thermal_trip *pkg_temp_thermal_trips_init(int cpu, int tj_max, int num_trips)
272{
273	struct thermal_trip *trips;
274	unsigned long thres_reg_value;
275	u32 mask, shift, eax, edx;
276	int ret, i;
277
278	trips = kzalloc(sizeof(*trips) * num_trips, GFP_KERNEL);
279	if (!trips)
280		return ERR_PTR(-ENOMEM);
281
282	for (i = 0; i < num_trips; i++) {
283
284		if (i) {
285			mask = THERM_MASK_THRESHOLD1;
286			shift = THERM_SHIFT_THRESHOLD1;
287		} else {
288			mask = THERM_MASK_THRESHOLD0;
289			shift = THERM_SHIFT_THRESHOLD0;
290		}
291
292		ret = rdmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
293				   &eax, &edx);
294		if (ret < 0) {
295			kfree(trips);
296			return ERR_PTR(ret);
297		}
298
299		thres_reg_value = (eax & mask) >> shift;
300
301		trips[i].temperature = thres_reg_value ?
302			tj_max - thres_reg_value * 1000 : THERMAL_TEMP_INVALID;
303
304		trips[i].type = THERMAL_TRIP_PASSIVE;
305
306		pr_debug("%s: cpu=%d, trip=%d, temp=%d\n",
307			 __func__, cpu, i, trips[i].temperature);
308	}
309
310	return trips;
311}
312
313static int pkg_temp_thermal_device_add(unsigned int cpu)
314{
315	int id = topology_logical_die_id(cpu);
316	u32 eax, ebx, ecx, edx;
317	struct zone_device *zonedev;
318	int thres_count, err;
319	int tj_max;
320
321	if (id >= max_id)
322		return -ENOMEM;
323
324	cpuid(6, &eax, &ebx, &ecx, &edx);
325	thres_count = ebx & 0x07;
326	if (!thres_count)
327		return -ENODEV;
328
329	thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
330
331	tj_max = intel_tcc_get_tjmax(cpu);
332	if (tj_max < 0)
333		return tj_max;
334
335	zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL);
336	if (!zonedev)
337		return -ENOMEM;
338
339	zonedev->trips = pkg_temp_thermal_trips_init(cpu, tj_max, thres_count);
340	if (IS_ERR(zonedev->trips)) {
341		err = PTR_ERR(zonedev->trips);
342		goto out_kfree_zonedev;
343	}
344
345	INIT_DELAYED_WORK(&zonedev->work, pkg_temp_thermal_threshold_work_fn);
346	zonedev->cpu = cpu;
347	zonedev->tzone = thermal_zone_device_register_with_trips("x86_pkg_temp",
348			zonedev->trips, thres_count,
349			(thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01,
350			zonedev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
351	if (IS_ERR(zonedev->tzone)) {
352		err = PTR_ERR(zonedev->tzone);
353		goto out_kfree_trips;
354	}
355	err = thermal_zone_device_enable(zonedev->tzone);
356	if (err)
357		goto out_unregister_tz;
358
359	/* Store MSR value for package thermal interrupt, to restore at exit */
360	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, zonedev->msr_pkg_therm_low,
361	      zonedev->msr_pkg_therm_high);
362
363	cpumask_set_cpu(cpu, &zonedev->cpumask);
364	raw_spin_lock_irq(&pkg_temp_lock);
365	zones[id] = zonedev;
366	raw_spin_unlock_irq(&pkg_temp_lock);
367
368	return 0;
369
370out_unregister_tz:
371	thermal_zone_device_unregister(zonedev->tzone);
372out_kfree_trips:
373	kfree(zonedev->trips);
374out_kfree_zonedev:
375	kfree(zonedev);
376	return err;
377}
378
379static int pkg_thermal_cpu_offline(unsigned int cpu)
380{
381	struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
382	bool lastcpu, was_target;
383	int target;
384
385	if (!zonedev)
386		return 0;
387
388	target = cpumask_any_but(&zonedev->cpumask, cpu);
389	cpumask_clear_cpu(cpu, &zonedev->cpumask);
390	lastcpu = target >= nr_cpu_ids;
391	/*
392	 * Remove the sysfs files, if this is the last cpu in the package
393	 * before doing further cleanups.
394	 */
395	if (lastcpu) {
396		struct thermal_zone_device *tzone = zonedev->tzone;
397
398		/*
399		 * We must protect against a work function calling
400		 * thermal_zone_update, after/while unregister. We null out
401		 * the pointer under the zone mutex, so the worker function
402		 * won't try to call.
403		 */
404		mutex_lock(&thermal_zone_mutex);
405		zonedev->tzone = NULL;
406		mutex_unlock(&thermal_zone_mutex);
407
408		thermal_zone_device_unregister(tzone);
409	}
410
411	/* Protect against work and interrupts */
412	raw_spin_lock_irq(&pkg_temp_lock);
413
414	/*
415	 * Check whether this cpu was the current target and store the new
416	 * one. When we drop the lock, then the interrupt notify function
417	 * will see the new target.
418	 */
419	was_target = zonedev->cpu == cpu;
420	zonedev->cpu = target;
421
422	/*
423	 * If this is the last CPU in the package remove the package
424	 * reference from the array and restore the interrupt MSR. When we
425	 * drop the lock neither the interrupt notify function nor the
426	 * worker will see the package anymore.
427	 */
428	if (lastcpu) {
429		zones[topology_logical_die_id(cpu)] = NULL;
430		/* After this point nothing touches the MSR anymore. */
431		wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
432		      zonedev->msr_pkg_therm_low, zonedev->msr_pkg_therm_high);
433	}
434
435	/*
436	 * Check whether there is work scheduled and whether the work is
437	 * targeted at the outgoing CPU.
438	 */
439	if (zonedev->work_scheduled && was_target) {
440		/*
441		 * To cancel the work we need to drop the lock, otherwise
442		 * we might deadlock if the work needs to be flushed.
443		 */
444		raw_spin_unlock_irq(&pkg_temp_lock);
445		cancel_delayed_work_sync(&zonedev->work);
446		raw_spin_lock_irq(&pkg_temp_lock);
447		/*
448		 * If this is not the last cpu in the package and the work
449		 * did not run after we dropped the lock above, then we
450		 * need to reschedule the work, otherwise the interrupt
451		 * stays disabled forever.
452		 */
453		if (!lastcpu && zonedev->work_scheduled)
454			pkg_thermal_schedule_work(target, &zonedev->work);
455	}
456
457	raw_spin_unlock_irq(&pkg_temp_lock);
458
459	/* Final cleanup if this is the last cpu */
460	if (lastcpu) {
461		kfree(zonedev->trips);
462		kfree(zonedev);
463	}
464	return 0;
465}
466
467static int pkg_thermal_cpu_online(unsigned int cpu)
468{
469	struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
470	struct cpuinfo_x86 *c = &cpu_data(cpu);
471
472	/* Paranoia check */
473	if (!cpu_has(c, X86_FEATURE_DTHERM) || !cpu_has(c, X86_FEATURE_PTS))
474		return -ENODEV;
475
476	/* If the package exists, nothing to do */
477	if (zonedev) {
478		cpumask_set_cpu(cpu, &zonedev->cpumask);
479		return 0;
480	}
481	return pkg_temp_thermal_device_add(cpu);
482}
483
484static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = {
485	X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_PTS, NULL),
486	{}
487};
488MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);
489
490static int __init pkg_temp_thermal_init(void)
491{
492	int ret;
493
494	if (!x86_match_cpu(pkg_temp_thermal_ids))
495		return -ENODEV;
496
497	max_id = topology_max_packages() * topology_max_die_per_package();
498	zones = kcalloc(max_id, sizeof(struct zone_device *),
499			   GFP_KERNEL);
500	if (!zones)
501		return -ENOMEM;
502
503	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online",
504				pkg_thermal_cpu_online,	pkg_thermal_cpu_offline);
505	if (ret < 0)
506		goto err;
507
508	/* Store the state for module exit */
509	pkg_thermal_hp_state = ret;
510
511	platform_thermal_package_notify = pkg_thermal_notify;
512	platform_thermal_package_rate_control = pkg_thermal_rate_control;
513
514	 /* Don't care if it fails */
515	pkg_temp_debugfs_init();
516	return 0;
517
518err:
519	kfree(zones);
520	return ret;
521}
522module_init(pkg_temp_thermal_init)
523
524static void __exit pkg_temp_thermal_exit(void)
525{
526	platform_thermal_package_notify = NULL;
527	platform_thermal_package_rate_control = NULL;
528
529	cpuhp_remove_state(pkg_thermal_hp_state);
530	debugfs_remove_recursive(debugfs);
531	kfree(zones);
532}
533module_exit(pkg_temp_thermal_exit)
534
535MODULE_IMPORT_NS(INTEL_TCC);
536MODULE_DESCRIPTION("X86 PKG TEMP Thermal Driver");
537MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
538MODULE_LICENSE("GPL v2");