Linux Audio

Check our new training course

Loading...
v4.6
 
   1/*
   2 *  linux/drivers/thermal/cpu_cooling.c
   3 *
   4 *  Copyright (C) 2012	Samsung Electronics Co., Ltd(http://www.samsung.com)
   5 *  Copyright (C) 2012  Amit Daniel <amit.kachhap@linaro.org>
   6 *
   7 *  Copyright (C) 2014  Viresh Kumar <viresh.kumar@linaro.org>
   8 *
   9 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  10 *  This program is free software; you can redistribute it and/or modify
  11 *  it under the terms of the GNU General Public License as published by
  12 *  the Free Software Foundation; version 2 of the License.
  13 *
  14 *  This program is distributed in the hope that it will be useful, but
  15 *  WITHOUT ANY WARRANTY; without even the implied warranty of
  16 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 *  General Public License for more details.
  18 *
  19 *  You should have received a copy of the GNU General Public License along
  20 *  with this program; if not, write to the Free Software Foundation, Inc.,
  21 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  22 *
  23 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  24 */
  25#include <linux/module.h>
  26#include <linux/thermal.h>
  27#include <linux/cpufreq.h>
  28#include <linux/err.h>
 
  29#include <linux/pm_opp.h>
 
  30#include <linux/slab.h>
  31#include <linux/cpu.h>
  32#include <linux/cpu_cooling.h>
  33
  34#include <trace/events/thermal.h>
  35
  36/*
  37 * Cooling state <-> CPUFreq frequency
  38 *
  39 * Cooling states are translated to frequencies throughout this driver and this
  40 * is the relation between them.
  41 *
  42 * Highest cooling state corresponds to lowest possible frequency.
  43 *
  44 * i.e.
  45 *	level 0 --> 1st Max Freq
  46 *	level 1 --> 2nd Max Freq
  47 *	...
  48 */
  49
  50/**
  51 * struct power_table - frequency to power conversion
  52 * @frequency:	frequency in KHz
  53 * @power:	power in mW
  54 *
  55 * This structure is built when the cooling device registers and helps
  56 * in translating frequency to power and viceversa.
  57 */
  58struct power_table {
  59	u32 frequency;
  60	u32 power;
  61};
  62
  63/**
 
 
 
 
 
 
 
 
 
 
  64 * struct cpufreq_cooling_device - data for cooling device with cpufreq
  65 * @id: unique integer value corresponding to each cpufreq_cooling_device
  66 *	registered.
  67 * @cool_dev: thermal_cooling_device pointer to keep track of the
  68 *	registered cooling device.
  69 * @cpufreq_state: integer value representing the current state of cpufreq
  70 *	cooling	devices.
  71 * @clipped_freq: integer value representing the absolute value of the clipped
  72 *	frequency.
  73 * @max_level: maximum cooling level. One less than total number of valid
  74 *	cpufreq frequencies.
  75 * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device.
 
 
 
  76 * @node: list_head to link all cpufreq_cooling_device together.
  77 * @last_load: load measured by the latest call to cpufreq_get_actual_power()
  78 * @time_in_idle: previous reading of the absolute time that this cpu was idle
  79 * @time_in_idle_timestamp: wall time of the last invocation of
  80 *	get_cpu_idle_time_us()
  81 * @dyn_power_table: array of struct power_table for frequency to power
  82 *	conversion, sorted in ascending order.
  83 * @dyn_power_table_entries: number of entries in the @dyn_power_table array
  84 * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered
  85 * @plat_get_static_power: callback to calculate the static power
  86 *
  87 * This structure is required for keeping information of each registered
  88 * cpufreq_cooling_device.
  89 */
  90struct cpufreq_cooling_device {
  91	int id;
  92	struct thermal_cooling_device *cool_dev;
  93	unsigned int cpufreq_state;
  94	unsigned int clipped_freq;
  95	unsigned int max_level;
  96	unsigned int *freq_table;	/* In descending order */
  97	struct cpumask allowed_cpus;
  98	struct list_head node;
  99	u32 last_load;
 100	u64 *time_in_idle;
 101	u64 *time_in_idle_timestamp;
 102	struct power_table *dyn_power_table;
 103	int dyn_power_table_entries;
 104	struct device *cpu_dev;
 105	get_static_t plat_get_static_power;
 106};
 107static DEFINE_IDR(cpufreq_idr);
 108static DEFINE_MUTEX(cooling_cpufreq_lock);
 109
 110static unsigned int cpufreq_dev_count;
 111
 
 112static DEFINE_MUTEX(cooling_list_lock);
 113static LIST_HEAD(cpufreq_dev_list);
 114
 115/**
 116 * get_idr - function to get a unique id.
 117 * @idr: struct idr * handle used to create a id.
 118 * @id: int * value generated by this function.
 119 *
 120 * This function will populate @id with an unique
 121 * id, using the idr API.
 122 *
 123 * Return: 0 on success, an error code on failure.
 124 */
 125static int get_idr(struct idr *idr, int *id)
 126{
 127	int ret;
 128
 129	mutex_lock(&cooling_cpufreq_lock);
 130	ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
 131	mutex_unlock(&cooling_cpufreq_lock);
 132	if (unlikely(ret < 0))
 133		return ret;
 134	*id = ret;
 135
 136	return 0;
 137}
 138
 139/**
 140 * release_idr - function to free the unique id.
 141 * @idr: struct idr * handle used for creating the id.
 142 * @id: int value representing the unique id.
 143 */
 144static void release_idr(struct idr *idr, int id)
 145{
 146	mutex_lock(&cooling_cpufreq_lock);
 147	idr_remove(idr, id);
 148	mutex_unlock(&cooling_cpufreq_lock);
 149}
 150
 151/* Below code defines functions to be used for cpufreq as cooling device */
 152
 153/**
 154 * get_level: Find the level for a particular frequency
 155 * @cpufreq_dev: cpufreq_dev for which the property is required
 156 * @freq: Frequency
 157 *
 158 * Return: level on success, THERMAL_CSTATE_INVALID on error.
 159 */
 160static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev,
 161			       unsigned int freq)
 162{
 
 163	unsigned long level;
 164
 165	for (level = 0; level <= cpufreq_dev->max_level; level++) {
 166		if (freq == cpufreq_dev->freq_table[level])
 167			return level;
 168
 169		if (freq > cpufreq_dev->freq_table[level])
 170			break;
 171	}
 172
 173	return THERMAL_CSTATE_INVALID;
 174}
 175
 176/**
 177 * cpufreq_cooling_get_level - for a given cpu, return the cooling level.
 178 * @cpu: cpu for which the level is required
 179 * @freq: the frequency of interest
 180 *
 181 * This function will match the cooling level corresponding to the
 182 * requested @freq and return it.
 183 *
 184 * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID
 185 * otherwise.
 186 */
 187unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
 188{
 189	struct cpufreq_cooling_device *cpufreq_dev;
 190
 191	mutex_lock(&cooling_list_lock);
 192	list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
 193		if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) {
 194			mutex_unlock(&cooling_list_lock);
 195			return get_level(cpufreq_dev, freq);
 196		}
 197	}
 198	mutex_unlock(&cooling_list_lock);
 199
 200	pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu);
 201	return THERMAL_CSTATE_INVALID;
 202}
 203EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level);
 204
 205/**
 206 * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
 207 * @nb:	struct notifier_block * with callback info.
 208 * @event: value showing cpufreq event for which this function invoked.
 209 * @data: callback-specific data
 210 *
 211 * Callback to hijack the notification on cpufreq policy transition.
 212 * Every time there is a change in policy, we will intercept and
 213 * update the cpufreq policy with thermal constraints.
 214 *
 215 * Return: 0 (success)
 216 */
 217static int cpufreq_thermal_notifier(struct notifier_block *nb,
 218				    unsigned long event, void *data)
 219{
 220	struct cpufreq_policy *policy = data;
 221	unsigned long clipped_freq;
 222	struct cpufreq_cooling_device *cpufreq_dev;
 223
 224	if (event != CPUFREQ_ADJUST)
 225		return NOTIFY_DONE;
 226
 227	mutex_lock(&cooling_list_lock);
 228	list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
 229		if (!cpumask_test_cpu(policy->cpu, &cpufreq_dev->allowed_cpus))
 230			continue;
 231
 232		/*
 233		 * policy->max is the maximum allowed frequency defined by user
 234		 * and clipped_freq is the maximum that thermal constraints
 235		 * allow.
 236		 *
 237		 * If clipped_freq is lower than policy->max, then we need to
 238		 * readjust policy->max.
 239		 *
 240		 * But, if clipped_freq is greater than policy->max, we don't
 241		 * need to do anything.
 242		 */
 243		clipped_freq = cpufreq_dev->clipped_freq;
 244
 245		if (policy->max > clipped_freq)
 246			cpufreq_verify_within_limits(policy, 0, clipped_freq);
 247		break;
 248	}
 249	mutex_unlock(&cooling_list_lock);
 250
 251	return NOTIFY_OK;
 252}
 253
 254/**
 255 * build_dyn_power_table() - create a dynamic power to frequency table
 256 * @cpufreq_device:	the cpufreq cooling device in which to store the table
 257 * @capacitance: dynamic power coefficient for these cpus
 258 *
 259 * Build a dynamic power to frequency table for this cpu and store it
 260 * in @cpufreq_device.  This table will be used in cpu_power_to_freq() and
 261 * cpu_freq_to_power() to convert between power and frequency
 262 * efficiently.  Power is stored in mW, frequency in KHz.  The
 263 * resulting table is in ascending order.
 264 *
 265 * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs,
 266 * -ENOMEM if we run out of memory or -EAGAIN if an OPP was
 267 * added/enabled while the function was executing.
 268 */
 269static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
 270				 u32 capacitance)
 271{
 272	struct power_table *power_table;
 273	struct dev_pm_opp *opp;
 274	struct device *dev = NULL;
 275	int num_opps = 0, cpu, i, ret = 0;
 276	unsigned long freq;
 277
 278	for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
 279		dev = get_cpu_device(cpu);
 280		if (!dev) {
 281			dev_warn(&cpufreq_device->cool_dev->device,
 282				 "No cpu device for cpu %d\n", cpu);
 283			continue;
 284		}
 285
 286		num_opps = dev_pm_opp_get_opp_count(dev);
 287		if (num_opps > 0)
 288			break;
 289		else if (num_opps < 0)
 290			return num_opps;
 291	}
 292
 293	if (num_opps == 0)
 
 
 
 
 
 
 
 
 
 294		return -EINVAL;
 
 295
 296	power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL);
 297	if (!power_table)
 298		return -ENOMEM;
 299
 300	rcu_read_lock();
 301
 302	for (freq = 0, i = 0;
 303	     opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp);
 304	     freq++, i++) {
 305		u32 freq_mhz, voltage_mv;
 306		u64 power;
 
 307
 308		if (i >= num_opps) {
 309			rcu_read_unlock();
 310			ret = -EAGAIN;
 311			goto free_power_table;
 
 
 
 
 
 312		}
 313
 314		freq_mhz = freq / 1000000;
 315		voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
 
 316
 317		/*
 318		 * Do the multiplication with MHz and millivolt so as
 319		 * to not overflow.
 320		 */
 321		power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
 322		do_div(power, 1000000000);
 323
 324		/* frequency is stored in power_table in KHz */
 325		power_table[i].frequency = freq / 1000;
 326
 327		/* power is stored in mW */
 328		power_table[i].power = power;
 329	}
 330
 331	rcu_read_unlock();
 332
 333	if (i != num_opps) {
 334		ret = PTR_ERR(opp);
 335		goto free_power_table;
 336	}
 337
 338	cpufreq_device->cpu_dev = dev;
 339	cpufreq_device->dyn_power_table = power_table;
 340	cpufreq_device->dyn_power_table_entries = i;
 341
 342	return 0;
 343
 344free_power_table:
 345	kfree(power_table);
 346
 347	return ret;
 348}
 349
 350static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device,
 351			     u32 freq)
 352{
 353	int i;
 354	struct power_table *pt = cpufreq_device->dyn_power_table;
 355
 356	for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
 357		if (freq < pt[i].frequency)
 358			break;
 359
 360	return pt[i - 1].power;
 361}
 362
 363static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device,
 364			     u32 power)
 365{
 366	int i;
 367	struct power_table *pt = cpufreq_device->dyn_power_table;
 368
 369	for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
 370		if (power < pt[i].power)
 371			break;
 372
 373	return pt[i - 1].frequency;
 374}
 375
 376/**
 377 * get_load() - get load for a cpu since last updated
 378 * @cpufreq_device:	&struct cpufreq_cooling_device for this cpu
 379 * @cpu:	cpu number
 380 * @cpu_idx:	index of the cpu in cpufreq_device->allowed_cpus
 381 *
 382 * Return: The average load of cpu @cpu in percentage since this
 383 * function was last called.
 384 */
 385static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu,
 386		    int cpu_idx)
 387{
 388	u32 load;
 389	u64 now, now_idle, delta_time, delta_idle;
 
 390
 391	now_idle = get_cpu_idle_time(cpu, &now, 0);
 392	delta_idle = now_idle - cpufreq_device->time_in_idle[cpu_idx];
 393	delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu_idx];
 394
 395	if (delta_time <= delta_idle)
 396		load = 0;
 397	else
 398		load = div64_u64(100 * (delta_time - delta_idle), delta_time);
 399
 400	cpufreq_device->time_in_idle[cpu_idx] = now_idle;
 401	cpufreq_device->time_in_idle_timestamp[cpu_idx] = now;
 402
 403	return load;
 404}
 405
 406/**
 407 * get_static_power() - calculate the static power consumed by the cpus
 408 * @cpufreq_device:	struct &cpufreq_cooling_device for this cpu cdev
 409 * @tz:		thermal zone device in which we're operating
 410 * @freq:	frequency in KHz
 411 * @power:	pointer in which to store the calculated static power
 412 *
 413 * Calculate the static power consumed by the cpus described by
 414 * @cpu_actor running at frequency @freq.  This function relies on a
 415 * platform specific function that should have been provided when the
 416 * actor was registered.  If it wasn't, the static power is assumed to
 417 * be negligible.  The calculated static power is stored in @power.
 418 *
 419 * Return: 0 on success, -E* on failure.
 420 */
 421static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
 422			    struct thermal_zone_device *tz, unsigned long freq,
 423			    u32 *power)
 424{
 425	struct dev_pm_opp *opp;
 426	unsigned long voltage;
 427	struct cpumask *cpumask = &cpufreq_device->allowed_cpus;
 428	unsigned long freq_hz = freq * 1000;
 429
 430	if (!cpufreq_device->plat_get_static_power ||
 431	    !cpufreq_device->cpu_dev) {
 432		*power = 0;
 433		return 0;
 434	}
 435
 436	rcu_read_lock();
 437
 438	opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz,
 439					 true);
 440	voltage = dev_pm_opp_get_voltage(opp);
 441
 442	rcu_read_unlock();
 443
 444	if (voltage == 0) {
 445		dev_warn_ratelimited(cpufreq_device->cpu_dev,
 446				     "Failed to get voltage for frequency %lu: %ld\n",
 447				     freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0);
 448		return -EINVAL;
 449	}
 450
 451	return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay,
 452						     voltage, power);
 453}
 454
 455/**
 456 * get_dynamic_power() - calculate the dynamic power
 457 * @cpufreq_device:	&cpufreq_cooling_device for this cdev
 458 * @freq:	current frequency
 459 *
 460 * Return: the dynamic power consumed by the cpus described by
 461 * @cpufreq_device.
 462 */
 463static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device,
 464			     unsigned long freq)
 465{
 466	u32 raw_cpu_power;
 467
 468	raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq);
 469	return (raw_cpu_power * cpufreq_device->last_load) / 100;
 470}
 471
 472/* cpufreq cooling device callback functions are defined below */
 473
 474/**
 475 * cpufreq_get_max_state - callback function to get the max cooling state.
 476 * @cdev: thermal cooling device pointer.
 477 * @state: fill this variable with the max cooling state.
 478 *
 479 * Callback for the thermal cooling device to return the cpufreq
 480 * max cooling state.
 481 *
 482 * Return: 0 on success, an error code otherwise.
 483 */
 484static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
 485				 unsigned long *state)
 486{
 487	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
 488
 489	*state = cpufreq_device->max_level;
 490	return 0;
 491}
 492
 493/**
 494 * cpufreq_get_cur_state - callback function to get the current cooling state.
 495 * @cdev: thermal cooling device pointer.
 496 * @state: fill this variable with the current cooling state.
 497 *
 498 * Callback for the thermal cooling device to return the cpufreq
 499 * current cooling state.
 500 *
 501 * Return: 0 on success, an error code otherwise.
 502 */
 503static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
 504				 unsigned long *state)
 505{
 506	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
 507
 508	*state = cpufreq_device->cpufreq_state;
 509
 510	return 0;
 511}
 512
 513/**
 514 * cpufreq_set_cur_state - callback function to set the current cooling state.
 515 * @cdev: thermal cooling device pointer.
 516 * @state: set this variable to the current cooling state.
 517 *
 518 * Callback for the thermal cooling device to change the cpufreq
 519 * current cooling state.
 520 *
 521 * Return: 0 on success, an error code otherwise.
 522 */
 523static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
 524				 unsigned long state)
 525{
 526	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
 527	unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus);
 528	unsigned int clip_freq;
 529
 530	/* Request state should be less than max_level */
 531	if (WARN_ON(state > cpufreq_device->max_level))
 532		return -EINVAL;
 533
 534	/* Check if the old cooling action is same as new cooling action */
 535	if (cpufreq_device->cpufreq_state == state)
 536		return 0;
 537
 538	clip_freq = cpufreq_device->freq_table[state];
 539	cpufreq_device->cpufreq_state = state;
 540	cpufreq_device->clipped_freq = clip_freq;
 541
 542	cpufreq_update_policy(cpu);
 543
 544	return 0;
 
 545}
 546
 547/**
 548 * cpufreq_get_requested_power() - get the current power
 549 * @cdev:	&thermal_cooling_device pointer
 550 * @tz:		a valid thermal zone device pointer
 551 * @power:	pointer in which to store the resulting power
 552 *
 553 * Calculate the current power consumption of the cpus in milliwatts
 554 * and store it in @power.  This function should actually calculate
 555 * the requested power, but it's hard to get the frequency that
 556 * cpufreq would have assigned if there were no thermal limits.
 557 * Instead, we calculate the current power on the assumption that the
 558 * immediate future will look like the immediate past.
 559 *
 560 * We use the current frequency and the average load since this
 561 * function was last called.  In reality, there could have been
 562 * multiple opps since this function was last called and that affects
 563 * the load calculation.  While it's not perfectly accurate, this
 564 * simplification is good enough and works.  REVISIT this, as more
 565 * complex code may be needed if experiments show that it's not
 566 * accurate enough.
 567 *
 568 * Return: 0 on success, -E* if getting the static power failed.
 569 */
 570static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
 571				       struct thermal_zone_device *tz,
 572				       u32 *power)
 573{
 574	unsigned long freq;
 575	int i = 0, cpu, ret;
 576	u32 static_power, dynamic_power, total_load = 0;
 577	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
 
 578	u32 *load_cpu = NULL;
 579
 580	cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
 581
 582	/*
 583	 * All the CPUs are offline, thus the requested power by
 584	 * the cdev is 0
 585	 */
 586	if (cpu >= nr_cpu_ids) {
 587		*power = 0;
 588		return 0;
 589	}
 590
 591	freq = cpufreq_quick_get(cpu);
 592
 593	if (trace_thermal_power_cpu_get_power_enabled()) {
 594		u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus);
 595
 596		load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL);
 597	}
 598
 599	for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
 600		u32 load;
 601
 602		if (cpu_online(cpu))
 603			load = get_load(cpufreq_device, cpu, i);
 604		else
 605			load = 0;
 606
 607		total_load += load;
 608		if (trace_thermal_power_cpu_limit_enabled() && load_cpu)
 609			load_cpu[i] = load;
 610
 611		i++;
 612	}
 613
 614	cpufreq_device->last_load = total_load;
 615
 616	dynamic_power = get_dynamic_power(cpufreq_device, freq);
 617	ret = get_static_power(cpufreq_device, tz, freq, &static_power);
 618	if (ret) {
 619		kfree(load_cpu);
 620		return ret;
 621	}
 622
 623	if (load_cpu) {
 624		trace_thermal_power_cpu_get_power(
 625			&cpufreq_device->allowed_cpus,
 626			freq, load_cpu, i, dynamic_power, static_power);
 627
 628		kfree(load_cpu);
 629	}
 630
 631	*power = static_power + dynamic_power;
 632	return 0;
 633}
 634
 635/**
 636 * cpufreq_state2power() - convert a cpu cdev state to power consumed
 637 * @cdev:	&thermal_cooling_device pointer
 638 * @tz:		a valid thermal zone device pointer
 639 * @state:	cooling device state to be converted
 640 * @power:	pointer in which to store the resulting power
 641 *
 642 * Convert cooling device state @state into power consumption in
 643 * milliwatts assuming 100% load.  Store the calculated power in
 644 * @power.
 645 *
 646 * Return: 0 on success, -EINVAL if the cooling device state could not
 647 * be converted into a frequency or other -E* if there was an error
 648 * when calculating the static power.
 649 */
 650static int cpufreq_state2power(struct thermal_cooling_device *cdev,
 651			       struct thermal_zone_device *tz,
 652			       unsigned long state, u32 *power)
 653{
 654	unsigned int freq, num_cpus;
 655	cpumask_t cpumask;
 656	u32 static_power, dynamic_power;
 657	int ret;
 658	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
 659
 660	cpumask_and(&cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);
 661	num_cpus = cpumask_weight(&cpumask);
 662
 663	/* None of our cpus are online, so no power */
 664	if (num_cpus == 0) {
 665		*power = 0;
 666		return 0;
 667	}
 668
 669	freq = cpufreq_device->freq_table[state];
 670	if (!freq)
 671		return -EINVAL;
 672
 673	dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus;
 674	ret = get_static_power(cpufreq_device, tz, freq, &static_power);
 675	if (ret)
 676		return ret;
 677
 678	*power = static_power + dynamic_power;
 679	return 0;
 680}
 681
 682/**
 683 * cpufreq_power2state() - convert power to a cooling device state
 684 * @cdev:	&thermal_cooling_device pointer
 685 * @tz:		a valid thermal zone device pointer
 686 * @power:	power in milliwatts to be converted
 687 * @state:	pointer in which to store the resulting state
 688 *
 689 * Calculate a cooling device state for the cpus described by @cdev
 690 * that would allow them to consume at most @power mW and store it in
 691 * @state.  Note that this calculation depends on external factors
 692 * such as the cpu load or the current static power.  Calling this
 693 * function with the same power as input can yield different cooling
 694 * device states depending on those external factors.
 695 *
 696 * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if
 697 * the calculated frequency could not be converted to a valid state.
 698 * The latter should not happen unless the frequencies available to
 699 * cpufreq have changed since the initialization of the cpu cooling
 700 * device.
 701 */
 702static int cpufreq_power2state(struct thermal_cooling_device *cdev,
 703			       struct thermal_zone_device *tz, u32 power,
 704			       unsigned long *state)
 705{
 706	unsigned int cpu, cur_freq, target_freq;
 707	int ret;
 708	s32 dyn_power;
 709	u32 last_load, normalised_power, static_power;
 710	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
 711
 712	cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
 713
 714	/* None of our cpus are online */
 715	if (cpu >= nr_cpu_ids)
 716		return -ENODEV;
 717
 718	cur_freq = cpufreq_quick_get(cpu);
 719	ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power);
 720	if (ret)
 721		return ret;
 722
 723	dyn_power = power - static_power;
 724	dyn_power = dyn_power > 0 ? dyn_power : 0;
 725	last_load = cpufreq_device->last_load ?: 1;
 726	normalised_power = (dyn_power * 100) / last_load;
 727	target_freq = cpu_power_to_freq(cpufreq_device, normalised_power);
 728
 729	*state = cpufreq_cooling_get_level(cpu, target_freq);
 730	if (*state == THERMAL_CSTATE_INVALID) {
 731		dev_warn_ratelimited(&cdev->device,
 732				     "Failed to convert %dKHz for cpu %d into a cdev state\n",
 733				     target_freq, cpu);
 734		return -EINVAL;
 735	}
 736
 737	trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus,
 738				      target_freq, *state, power);
 739	return 0;
 740}
 741
 742/* Bind cpufreq callbacks to thermal cooling device ops */
 
 743static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
 744	.get_max_state = cpufreq_get_max_state,
 745	.get_cur_state = cpufreq_get_cur_state,
 746	.set_cur_state = cpufreq_set_cur_state,
 747};
 748
 749/* Notifier for cpufreq policy change */
 750static struct notifier_block thermal_cpufreq_notifier_block = {
 751	.notifier_call = cpufreq_thermal_notifier,
 
 
 
 
 752};
 753
 754static unsigned int find_next_max(struct cpufreq_frequency_table *table,
 755				  unsigned int prev_max)
 756{
 757	struct cpufreq_frequency_table *pos;
 758	unsigned int max = 0;
 759
 760	cpufreq_for_each_valid_entry(pos, table) {
 761		if (pos->frequency > max && pos->frequency < prev_max)
 762			max = pos->frequency;
 763	}
 764
 765	return max;
 766}
 767
 768/**
 769 * __cpufreq_cooling_register - helper function to create cpufreq cooling device
 770 * @np: a valid struct device_node to the cooling device device tree node
 771 * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
 772 * Normally this should be same as cpufreq policy->related_cpus.
 773 * @capacitance: dynamic power coefficient for these cpus
 774 * @plat_static_func: function to calculate the static power consumed by these
 775 *                    cpus (optional)
 776 *
 777 * This interface function registers the cpufreq cooling device with the name
 778 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
 779 * cooling devices. It also gives the opportunity to link the cooling device
 780 * with a device tree node, in order to bind it via the thermal DT code.
 781 *
 782 * Return: a valid struct thermal_cooling_device pointer on success,
 783 * on failure, it returns a corresponding ERR_PTR().
 784 */
 785static struct thermal_cooling_device *
 786__cpufreq_cooling_register(struct device_node *np,
 787			const struct cpumask *clip_cpus, u32 capacitance,
 788			get_static_t plat_static_func)
 789{
 790	struct thermal_cooling_device *cool_dev;
 791	struct cpufreq_cooling_device *cpufreq_dev;
 792	char dev_name[THERMAL_NAME_LENGTH];
 793	struct cpufreq_frequency_table *pos, *table;
 794	unsigned int freq, i, num_cpus;
 
 795	int ret;
 
 796
 797	table = cpufreq_frequency_get_table(cpumask_first(clip_cpus));
 798	if (!table) {
 799		pr_debug("%s: CPUFreq table not found\n", __func__);
 800		return ERR_PTR(-EPROBE_DEFER);
 801	}
 802
 803	cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL);
 804	if (!cpufreq_dev)
 805		return ERR_PTR(-ENOMEM);
 806
 807	num_cpus = cpumask_weight(clip_cpus);
 808	cpufreq_dev->time_in_idle = kcalloc(num_cpus,
 809					    sizeof(*cpufreq_dev->time_in_idle),
 810					    GFP_KERNEL);
 811	if (!cpufreq_dev->time_in_idle) {
 812		cool_dev = ERR_PTR(-ENOMEM);
 813		goto free_cdev;
 814	}
 815
 816	cpufreq_dev->time_in_idle_timestamp =
 817		kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp),
 818			GFP_KERNEL);
 819	if (!cpufreq_dev->time_in_idle_timestamp) {
 820		cool_dev = ERR_PTR(-ENOMEM);
 821		goto free_time_in_idle;
 822	}
 823
 824	/* Find max levels */
 825	cpufreq_for_each_valid_entry(pos, table)
 826		cpufreq_dev->max_level++;
 827
 828	cpufreq_dev->freq_table = kmalloc(sizeof(*cpufreq_dev->freq_table) *
 829					  cpufreq_dev->max_level, GFP_KERNEL);
 830	if (!cpufreq_dev->freq_table) {
 831		cool_dev = ERR_PTR(-ENOMEM);
 832		goto free_time_in_idle_timestamp;
 
 
 
 833	}
 834
 835	/* max_level is an index, not a counter */
 836	cpufreq_dev->max_level--;
 837
 838	cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
 839
 840	if (capacitance) {
 841		cpufreq_cooling_ops.get_requested_power =
 842			cpufreq_get_requested_power;
 843		cpufreq_cooling_ops.state2power = cpufreq_state2power;
 844		cpufreq_cooling_ops.power2state = cpufreq_power2state;
 845		cpufreq_dev->plat_get_static_power = plat_static_func;
 846
 847		ret = build_dyn_power_table(cpufreq_dev, capacitance);
 848		if (ret) {
 849			cool_dev = ERR_PTR(ret);
 850			goto free_table;
 851		}
 852	}
 853
 854	ret = get_idr(&cpufreq_idr, &cpufreq_dev->id);
 855	if (ret) {
 856		cool_dev = ERR_PTR(ret);
 857		goto free_power_table;
 858	}
 
 859
 860	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
 861		 cpufreq_dev->id);
 862
 863	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
 864						      &cpufreq_cooling_ops);
 865	if (IS_ERR(cool_dev))
 866		goto remove_idr;
 867
 868	/* Fill freq-table in descending order of frequencies */
 869	for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
 870		freq = find_next_max(table, freq);
 871		cpufreq_dev->freq_table[i] = freq;
 872
 873		/* Warn for duplicate entries */
 874		if (!freq)
 875			pr_warn("%s: table has duplicate entries\n", __func__);
 876		else
 877			pr_debug("%s: freq:%u KHz\n", __func__, freq);
 878	}
 879
 880	cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0];
 881	cpufreq_dev->cool_dev = cool_dev;
 
 
 
 
 882
 883	mutex_lock(&cooling_cpufreq_lock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 884
 885	mutex_lock(&cooling_list_lock);
 886	list_add(&cpufreq_dev->node, &cpufreq_dev_list);
 887	mutex_unlock(&cooling_list_lock);
 888
 889	/* Register the notifier for first cpufreq cooling device */
 890	if (!cpufreq_dev_count++)
 891		cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
 892					  CPUFREQ_POLICY_NOTIFIER);
 893	mutex_unlock(&cooling_cpufreq_lock);
 894
 895	return cool_dev;
 896
 897remove_idr:
 898	release_idr(&cpufreq_idr, cpufreq_dev->id);
 899free_power_table:
 900	kfree(cpufreq_dev->dyn_power_table);
 901free_table:
 902	kfree(cpufreq_dev->freq_table);
 903free_time_in_idle_timestamp:
 904	kfree(cpufreq_dev->time_in_idle_timestamp);
 905free_time_in_idle:
 906	kfree(cpufreq_dev->time_in_idle);
 907free_cdev:
 908	kfree(cpufreq_dev);
 909
 910	return cool_dev;
 911}
 912
 913/**
 914 * cpufreq_cooling_register - function to create cpufreq cooling device.
 915 * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
 916 *
 917 * This interface function registers the cpufreq cooling device with the name
 918 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
 919 * cooling devices.
 920 *
 921 * Return: a valid struct thermal_cooling_device pointer on success,
 922 * on failure, it returns a corresponding ERR_PTR().
 923 */
 924struct thermal_cooling_device *
 925cpufreq_cooling_register(const struct cpumask *clip_cpus)
 926{
 927	return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL);
 928}
 929EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
 930
 931/**
 932 * of_cpufreq_cooling_register - function to create cpufreq cooling device.
 933 * @np: a valid struct device_node to the cooling device device tree node
 934 * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
 935 *
 936 * This interface function registers the cpufreq cooling device with the name
 937 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
 938 * cooling devices. Using this API, the cpufreq cooling device will be
 939 * linked to the device tree node provided.
 940 *
 941 * Return: a valid struct thermal_cooling_device pointer on success,
 942 * on failure, it returns a corresponding ERR_PTR().
 943 */
 944struct thermal_cooling_device *
 945of_cpufreq_cooling_register(struct device_node *np,
 946			    const struct cpumask *clip_cpus)
 947{
 948	if (!np)
 949		return ERR_PTR(-EINVAL);
 950
 951	return __cpufreq_cooling_register(np, clip_cpus, 0, NULL);
 952}
 953EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
 954
 955/**
 956 * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
 957 * @clip_cpus:	cpumask of cpus where the frequency constraints will happen
 958 * @capacitance:	dynamic power coefficient for these cpus
 959 * @plat_static_func:	function to calculate the static power consumed by these
 960 *			cpus (optional)
 961 *
 962 * This interface function registers the cpufreq cooling device with
 963 * the name "thermal-cpufreq-%x".  This api can support multiple
 964 * instances of cpufreq cooling devices.  Using this function, the
 965 * cooling device will implement the power extensions by using a
 966 * simple cpu power model.  The cpus must have registered their OPPs
 967 * using the OPP library.
 968 *
 969 * An optional @plat_static_func may be provided to calculate the
 970 * static power consumed by these cpus.  If the platform's static
 971 * power consumption is unknown or negligible, make it NULL.
 972 *
 973 * Return: a valid struct thermal_cooling_device pointer on success,
 974 * on failure, it returns a corresponding ERR_PTR().
 975 */
 976struct thermal_cooling_device *
 977cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance,
 978			       get_static_t plat_static_func)
 979{
 980	return __cpufreq_cooling_register(NULL, clip_cpus, capacitance,
 981				plat_static_func);
 982}
 983EXPORT_SYMBOL(cpufreq_power_cooling_register);
 984
 985/**
 986 * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
 987 * @np:	a valid struct device_node to the cooling device device tree node
 988 * @clip_cpus:	cpumask of cpus where the frequency constraints will happen
 989 * @capacitance:	dynamic power coefficient for these cpus
 990 * @plat_static_func:	function to calculate the static power consumed by these
 991 *			cpus (optional)
 992 *
 993 * This interface function registers the cpufreq cooling device with
 994 * the name "thermal-cpufreq-%x".  This api can support multiple
 995 * instances of cpufreq cooling devices.  Using this API, the cpufreq
 996 * cooling device will be linked to the device tree node provided.
 997 * Using this function, the cooling device will implement the power
 998 * extensions by using a simple cpu power model.  The cpus must have
 999 * registered their OPPs using the OPP library.
1000 *
1001 * An optional @plat_static_func may be provided to calculate the
1002 * static power consumed by these cpus.  If the platform's static
1003 * power consumption is unknown or negligible, make it NULL.
1004 *
1005 * Return: a valid struct thermal_cooling_device pointer on success,
1006 * on failure, it returns a corresponding ERR_PTR().
1007 */
1008struct thermal_cooling_device *
1009of_cpufreq_power_cooling_register(struct device_node *np,
1010				  const struct cpumask *clip_cpus,
1011				  u32 capacitance,
1012				  get_static_t plat_static_func)
1013{
1014	if (!np)
1015		return ERR_PTR(-EINVAL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1016
1017	return __cpufreq_cooling_register(np, clip_cpus, capacitance,
1018				plat_static_func);
1019}
1020EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
1021
1022/**
1023 * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
1024 * @cdev: thermal cooling device pointer.
1025 *
1026 * This interface function unregisters the "thermal-cpufreq-%x" cooling device.
1027 */
1028void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
1029{
1030	struct cpufreq_cooling_device *cpufreq_dev;
1031
1032	if (!cdev)
1033		return;
1034
1035	cpufreq_dev = cdev->devdata;
1036
1037	/* Unregister the notifier for the last cpufreq cooling device */
1038	mutex_lock(&cooling_cpufreq_lock);
1039	if (!--cpufreq_dev_count)
1040		cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
1041					    CPUFREQ_POLICY_NOTIFIER);
1042
1043	mutex_lock(&cooling_list_lock);
1044	list_del(&cpufreq_dev->node);
1045	mutex_unlock(&cooling_list_lock);
1046
1047	mutex_unlock(&cooling_cpufreq_lock);
1048
1049	thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
1050	release_idr(&cpufreq_idr, cpufreq_dev->id);
1051	kfree(cpufreq_dev->dyn_power_table);
1052	kfree(cpufreq_dev->time_in_idle_timestamp);
1053	kfree(cpufreq_dev->time_in_idle);
1054	kfree(cpufreq_dev->freq_table);
1055	kfree(cpufreq_dev);
1056}
1057EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);
v5.4
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 *  linux/drivers/thermal/cpu_cooling.c
  4 *
  5 *  Copyright (C) 2012	Samsung Electronics Co., Ltd(http://www.samsung.com)
 
  6 *
  7 *  Copyright (C) 2012-2018 Linaro Limited.
  8 *
  9 *  Authors:	Amit Daniel <amit.kachhap@linaro.org>
 10 *		Viresh Kumar <viresh.kumar@linaro.org>
 
 
 11 *
 
 
 
 
 
 
 
 
 
 
 12 */
 13#include <linux/module.h>
 14#include <linux/thermal.h>
 15#include <linux/cpufreq.h>
 16#include <linux/err.h>
 17#include <linux/idr.h>
 18#include <linux/pm_opp.h>
 19#include <linux/pm_qos.h>
 20#include <linux/slab.h>
 21#include <linux/cpu.h>
 22#include <linux/cpu_cooling.h>
 23
 24#include <trace/events/thermal.h>
 25
 26/*
 27 * Cooling state <-> CPUFreq frequency
 28 *
 29 * Cooling states are translated to frequencies throughout this driver and this
 30 * is the relation between them.
 31 *
 32 * Highest cooling state corresponds to lowest possible frequency.
 33 *
 34 * i.e.
 35 *	level 0 --> 1st Max Freq
 36 *	level 1 --> 2nd Max Freq
 37 *	...
 38 */
 39
 40/**
 41 * struct freq_table - frequency table along with power entries
 42 * @frequency:	frequency in KHz
 43 * @power:	power in mW
 44 *
 45 * This structure is built when the cooling device registers and helps
 46 * in translating frequency to power and vice versa.
 47 */
 48struct freq_table {
 49	u32 frequency;
 50	u32 power;
 51};
 52
 53/**
 54 * struct time_in_idle - Idle time stats
 55 * @time: previous reading of the absolute time that this cpu was idle
 56 * @timestamp: wall time of the last invocation of get_cpu_idle_time_us()
 57 */
 58struct time_in_idle {
 59	u64 time;
 60	u64 timestamp;
 61};
 62
 63/**
 64 * struct cpufreq_cooling_device - data for cooling device with cpufreq
 65 * @id: unique integer value corresponding to each cpufreq_cooling_device
 66 *	registered.
 67 * @last_load: load measured by the latest call to cpufreq_get_requested_power()
 
 68 * @cpufreq_state: integer value representing the current state of cpufreq
 69 *	cooling	devices.
 
 
 70 * @max_level: maximum cooling level. One less than total number of valid
 71 *	cpufreq frequencies.
 72 * @freq_table: Freq table in descending order of frequencies
 73 * @cdev: thermal_cooling_device pointer to keep track of the
 74 *	registered cooling device.
 75 * @policy: cpufreq policy.
 76 * @node: list_head to link all cpufreq_cooling_device together.
 77 * @idle_time: idle time stats
 
 
 
 
 
 
 
 
 78 *
 79 * This structure is required for keeping information of each registered
 80 * cpufreq_cooling_device.
 81 */
 82struct cpufreq_cooling_device {
 83	int id;
 84	u32 last_load;
 85	unsigned int cpufreq_state;
 
 86	unsigned int max_level;
 87	struct freq_table *freq_table;	/* In descending order */
 88	struct cpufreq_policy *policy;
 89	struct list_head node;
 90	struct time_in_idle *idle_time;
 91	struct freq_qos_request qos_req;
 
 
 
 
 
 92};
 
 
 
 
 93
 94static DEFINE_IDA(cpufreq_ida);
 95static DEFINE_MUTEX(cooling_list_lock);
 96static LIST_HEAD(cpufreq_cdev_list);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 97
 98/* Below code defines functions to be used for cpufreq as cooling device */
 99
100/**
101 * get_level: Find the level for a particular frequency
102 * @cpufreq_cdev: cpufreq_cdev for which the property is required
103 * @freq: Frequency
104 *
105 * Return: level corresponding to the frequency.
106 */
107static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
108			       unsigned int freq)
109{
110	struct freq_table *freq_table = cpufreq_cdev->freq_table;
111	unsigned long level;
112
113	for (level = 1; level <= cpufreq_cdev->max_level; level++)
114		if (freq > freq_table[level].frequency)
 
 
 
115			break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
117	return level - 1;
 
118}
 
119
120/**
121 * update_freq_table() - Update the freq table with power numbers
122 * @cpufreq_cdev:	the cpufreq cooling device in which to update the table
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123 * @capacitance: dynamic power coefficient for these cpus
124 *
125 * Update the freq table with power numbers.  This table will be used in
126 * cpu_power_to_freq() and cpu_freq_to_power() to convert between power and
127 * frequency efficiently.  Power is stored in mW, frequency in KHz.  The
128 * resulting table is in descending order.
 
129 *
130 * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs,
131 * or -ENOMEM if we run out of memory.
 
132 */
133static int update_freq_table(struct cpufreq_cooling_device *cpufreq_cdev,
134			     u32 capacitance)
135{
136	struct freq_table *freq_table = cpufreq_cdev->freq_table;
137	struct dev_pm_opp *opp;
138	struct device *dev = NULL;
139	int num_opps = 0, cpu = cpufreq_cdev->policy->cpu, i;
 
140
141	dev = get_cpu_device(cpu);
142	if (unlikely(!dev)) {
143		pr_warn("No cpu device for cpu %d\n", cpu);
144		return -ENODEV;
 
 
 
 
 
 
 
 
 
145	}
146
147	num_opps = dev_pm_opp_get_opp_count(dev);
148	if (num_opps < 0)
149		return num_opps;
150
151	/*
152	 * The cpufreq table is also built from the OPP table and so the count
153	 * should match.
154	 */
155	if (num_opps != cpufreq_cdev->max_level + 1) {
156		dev_warn(dev, "Number of OPPs not matching with max_levels\n");
157		return -EINVAL;
158	}
159
160	for (i = 0; i <= cpufreq_cdev->max_level; i++) {
161		unsigned long freq = freq_table[i].frequency * 1000;
162		u32 freq_mhz = freq_table[i].frequency / 1000;
 
 
 
 
 
 
 
163		u64 power;
164		u32 voltage_mv;
165
166		/*
167		 * Find ceil frequency as 'freq' may be slightly lower than OPP
168		 * freq due to truncation while converting to kHz.
169		 */
170		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
171		if (IS_ERR(opp)) {
172			dev_err(dev, "failed to get opp for %lu frequency\n",
173				freq);
174			return -EINVAL;
175		}
176
 
177		voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
178		dev_pm_opp_put(opp);
179
180		/*
181		 * Do the multiplication with MHz and millivolt so as
182		 * to not overflow.
183		 */
184		power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
185		do_div(power, 1000000000);
186
 
 
 
187		/* power is stored in mW */
188		freq_table[i].power = power;
 
 
 
 
 
 
 
189	}
190
 
 
 
 
191	return 0;
 
 
 
 
 
192}
193
194static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
195			     u32 freq)
196{
197	int i;
198	struct freq_table *freq_table = cpufreq_cdev->freq_table;
199
200	for (i = 1; i <= cpufreq_cdev->max_level; i++)
201		if (freq > freq_table[i].frequency)
202			break;
203
204	return freq_table[i - 1].power;
205}
206
207static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
208			     u32 power)
209{
210	int i;
211	struct freq_table *freq_table = cpufreq_cdev->freq_table;
212
213	for (i = 1; i <= cpufreq_cdev->max_level; i++)
214		if (power > freq_table[i].power)
215			break;
216
217	return freq_table[i - 1].frequency;
218}
219
220/**
221 * get_load() - get load for a cpu since last updated
222 * @cpufreq_cdev:	&struct cpufreq_cooling_device for this cpu
223 * @cpu:	cpu number
224 * @cpu_idx:	index of the cpu in time_in_idle*
225 *
226 * Return: The average load of cpu @cpu in percentage since this
227 * function was last called.
228 */
229static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
230		    int cpu_idx)
231{
232	u32 load;
233	u64 now, now_idle, delta_time, delta_idle;
234	struct time_in_idle *idle_time = &cpufreq_cdev->idle_time[cpu_idx];
235
236	now_idle = get_cpu_idle_time(cpu, &now, 0);
237	delta_idle = now_idle - idle_time->time;
238	delta_time = now - idle_time->timestamp;
239
240	if (delta_time <= delta_idle)
241		load = 0;
242	else
243		load = div64_u64(100 * (delta_time - delta_idle), delta_time);
244
245	idle_time->time = now_idle;
246	idle_time->timestamp = now;
247
248	return load;
249}
250
251/**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252 * get_dynamic_power() - calculate the dynamic power
253 * @cpufreq_cdev:	&cpufreq_cooling_device for this cdev
254 * @freq:	current frequency
255 *
256 * Return: the dynamic power consumed by the cpus described by
257 * @cpufreq_cdev.
258 */
259static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_cdev,
260			     unsigned long freq)
261{
262	u32 raw_cpu_power;
263
264	raw_cpu_power = cpu_freq_to_power(cpufreq_cdev, freq);
265	return (raw_cpu_power * cpufreq_cdev->last_load) / 100;
266}
267
268/* cpufreq cooling device callback functions are defined below */
269
270/**
271 * cpufreq_get_max_state - callback function to get the max cooling state.
272 * @cdev: thermal cooling device pointer.
273 * @state: fill this variable with the max cooling state.
274 *
275 * Callback for the thermal cooling device to return the cpufreq
276 * max cooling state.
277 *
278 * Return: 0 on success, an error code otherwise.
279 */
280static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
281				 unsigned long *state)
282{
283	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
284
285	*state = cpufreq_cdev->max_level;
286	return 0;
287}
288
289/**
290 * cpufreq_get_cur_state - callback function to get the current cooling state.
291 * @cdev: thermal cooling device pointer.
292 * @state: fill this variable with the current cooling state.
293 *
294 * Callback for the thermal cooling device to return the cpufreq
295 * current cooling state.
296 *
297 * Return: 0 on success, an error code otherwise.
298 */
299static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
300				 unsigned long *state)
301{
302	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
303
304	*state = cpufreq_cdev->cpufreq_state;
305
306	return 0;
307}
308
309/**
310 * cpufreq_set_cur_state - callback function to set the current cooling state.
311 * @cdev: thermal cooling device pointer.
312 * @state: set this variable to the current cooling state.
313 *
314 * Callback for the thermal cooling device to change the cpufreq
315 * current cooling state.
316 *
317 * Return: 0 on success, an error code otherwise.
318 */
319static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
320				 unsigned long state)
321{
322	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 
 
323
324	/* Request state should be less than max_level */
325	if (WARN_ON(state > cpufreq_cdev->max_level))
326		return -EINVAL;
327
328	/* Check if the old cooling action is same as new cooling action */
329	if (cpufreq_cdev->cpufreq_state == state)
330		return 0;
331
332	cpufreq_cdev->cpufreq_state = state;
 
 
 
 
333
334	return freq_qos_update_request(&cpufreq_cdev->qos_req,
335				cpufreq_cdev->freq_table[state].frequency);
336}
337
338/**
339 * cpufreq_get_requested_power() - get the current power
340 * @cdev:	&thermal_cooling_device pointer
341 * @tz:		a valid thermal zone device pointer
342 * @power:	pointer in which to store the resulting power
343 *
344 * Calculate the current power consumption of the cpus in milliwatts
345 * and store it in @power.  This function should actually calculate
346 * the requested power, but it's hard to get the frequency that
347 * cpufreq would have assigned if there were no thermal limits.
348 * Instead, we calculate the current power on the assumption that the
349 * immediate future will look like the immediate past.
350 *
351 * We use the current frequency and the average load since this
352 * function was last called.  In reality, there could have been
353 * multiple opps since this function was last called and that affects
354 * the load calculation.  While it's not perfectly accurate, this
355 * simplification is good enough and works.  REVISIT this, as more
356 * complex code may be needed if experiments show that it's not
357 * accurate enough.
358 *
359 * Return: 0 on success, -E* if getting the static power failed.
360 */
361static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
362				       struct thermal_zone_device *tz,
363				       u32 *power)
364{
365	unsigned long freq;
366	int i = 0, cpu;
367	u32 total_load = 0;
368	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
369	struct cpufreq_policy *policy = cpufreq_cdev->policy;
370	u32 *load_cpu = NULL;
371
372	freq = cpufreq_quick_get(policy->cpu);
 
 
 
 
 
 
 
 
 
 
 
373
374	if (trace_thermal_power_cpu_get_power_enabled()) {
375		u32 ncpus = cpumask_weight(policy->related_cpus);
376
377		load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL);
378	}
379
380	for_each_cpu(cpu, policy->related_cpus) {
381		u32 load;
382
383		if (cpu_online(cpu))
384			load = get_load(cpufreq_cdev, cpu, i);
385		else
386			load = 0;
387
388		total_load += load;
389		if (load_cpu)
390			load_cpu[i] = load;
391
392		i++;
393	}
394
395	cpufreq_cdev->last_load = total_load;
396
397	*power = get_dynamic_power(cpufreq_cdev, freq);
 
 
 
 
 
398
399	if (load_cpu) {
400		trace_thermal_power_cpu_get_power(policy->related_cpus, freq,
401						  load_cpu, i, *power);
 
402
403		kfree(load_cpu);
404	}
405
 
406	return 0;
407}
408
409/**
410 * cpufreq_state2power() - convert a cpu cdev state to power consumed
411 * @cdev:	&thermal_cooling_device pointer
412 * @tz:		a valid thermal zone device pointer
413 * @state:	cooling device state to be converted
414 * @power:	pointer in which to store the resulting power
415 *
416 * Convert cooling device state @state into power consumption in
417 * milliwatts assuming 100% load.  Store the calculated power in
418 * @power.
419 *
420 * Return: 0 on success, -EINVAL if the cooling device state could not
421 * be converted into a frequency or other -E* if there was an error
422 * when calculating the static power.
423 */
424static int cpufreq_state2power(struct thermal_cooling_device *cdev,
425			       struct thermal_zone_device *tz,
426			       unsigned long state, u32 *power)
427{
428	unsigned int freq, num_cpus;
429	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 
 
 
430
431	/* Request state should be less than max_level */
432	if (WARN_ON(state > cpufreq_cdev->max_level))
 
 
 
 
 
 
 
 
 
433		return -EINVAL;
434
435	num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
436
437	freq = cpufreq_cdev->freq_table[state].frequency;
438	*power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
439
 
440	return 0;
441}
442
443/**
444 * cpufreq_power2state() - convert power to a cooling device state
445 * @cdev:	&thermal_cooling_device pointer
446 * @tz:		a valid thermal zone device pointer
447 * @power:	power in milliwatts to be converted
448 * @state:	pointer in which to store the resulting state
449 *
450 * Calculate a cooling device state for the cpus described by @cdev
451 * that would allow them to consume at most @power mW and store it in
452 * @state.  Note that this calculation depends on external factors
453 * such as the cpu load or the current static power.  Calling this
454 * function with the same power as input can yield different cooling
455 * device states depending on those external factors.
456 *
457 * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if
458 * the calculated frequency could not be converted to a valid state.
459 * The latter should not happen unless the frequencies available to
460 * cpufreq have changed since the initialization of the cpu cooling
461 * device.
462 */
463static int cpufreq_power2state(struct thermal_cooling_device *cdev,
464			       struct thermal_zone_device *tz, u32 power,
465			       unsigned long *state)
466{
467	unsigned int target_freq;
468	u32 last_load, normalised_power;
469	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
470	struct cpufreq_policy *policy = cpufreq_cdev->policy;
471
472	last_load = cpufreq_cdev->last_load ?: 1;
473	normalised_power = (power * 100) / last_load;
474	target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power);
475
476	*state = get_level(cpufreq_cdev, target_freq);
477	trace_thermal_power_cpu_limit(policy->related_cpus, target_freq, *state,
478				      power);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479	return 0;
480}
481
482/* Bind cpufreq callbacks to thermal cooling device ops */
483
484static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
485	.get_max_state = cpufreq_get_max_state,
486	.get_cur_state = cpufreq_get_cur_state,
487	.set_cur_state = cpufreq_set_cur_state,
488};
489
490static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = {
491	.get_max_state		= cpufreq_get_max_state,
492	.get_cur_state		= cpufreq_get_cur_state,
493	.set_cur_state		= cpufreq_set_cur_state,
494	.get_requested_power	= cpufreq_get_requested_power,
495	.state2power		= cpufreq_state2power,
496	.power2state		= cpufreq_power2state,
497};
498
499static unsigned int find_next_max(struct cpufreq_frequency_table *table,
500				  unsigned int prev_max)
501{
502	struct cpufreq_frequency_table *pos;
503	unsigned int max = 0;
504
505	cpufreq_for_each_valid_entry(pos, table) {
506		if (pos->frequency > max && pos->frequency < prev_max)
507			max = pos->frequency;
508	}
509
510	return max;
511}
512
513/**
514 * __cpufreq_cooling_register - helper function to create cpufreq cooling device
515 * @np: a valid struct device_node to the cooling device device tree node
516 * @policy: cpufreq policy
517 * Normally this should be same as cpufreq policy->related_cpus.
518 * @capacitance: dynamic power coefficient for these cpus
 
 
519 *
520 * This interface function registers the cpufreq cooling device with the name
521 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
522 * cooling devices. It also gives the opportunity to link the cooling device
523 * with a device tree node, in order to bind it via the thermal DT code.
524 *
525 * Return: a valid struct thermal_cooling_device pointer on success,
526 * on failure, it returns a corresponding ERR_PTR().
527 */
528static struct thermal_cooling_device *
529__cpufreq_cooling_register(struct device_node *np,
530			struct cpufreq_policy *policy, u32 capacitance)
 
531{
532	struct thermal_cooling_device *cdev;
533	struct cpufreq_cooling_device *cpufreq_cdev;
534	char dev_name[THERMAL_NAME_LENGTH];
 
535	unsigned int freq, i, num_cpus;
536	struct device *dev;
537	int ret;
538	struct thermal_cooling_device_ops *cooling_ops;
539
540	dev = get_cpu_device(policy->cpu);
541	if (unlikely(!dev)) {
542		pr_warn("No cpu device for cpu %d\n", policy->cpu);
543		return ERR_PTR(-ENODEV);
544	}
545
 
 
 
546
547	if (IS_ERR_OR_NULL(policy)) {
548		pr_err("%s: cpufreq policy isn't valid: %p\n", __func__, policy);
549		return ERR_PTR(-EINVAL);
 
 
 
 
550	}
551
552	i = cpufreq_table_count_valid_entries(policy);
553	if (!i) {
554		pr_debug("%s: CPUFreq table not found or has no valid entries\n",
555			 __func__);
556		return ERR_PTR(-ENODEV);
 
557	}
558
559	cpufreq_cdev = kzalloc(sizeof(*cpufreq_cdev), GFP_KERNEL);
560	if (!cpufreq_cdev)
561		return ERR_PTR(-ENOMEM);
562
563	cpufreq_cdev->policy = policy;
564	num_cpus = cpumask_weight(policy->related_cpus);
565	cpufreq_cdev->idle_time = kcalloc(num_cpus,
566					 sizeof(*cpufreq_cdev->idle_time),
567					 GFP_KERNEL);
568	if (!cpufreq_cdev->idle_time) {
569		cdev = ERR_PTR(-ENOMEM);
570		goto free_cdev;
571	}
572
573	/* max_level is an index, not a counter */
574	cpufreq_cdev->max_level = i - 1;
 
 
575
576	cpufreq_cdev->freq_table = kmalloc_array(i,
577					sizeof(*cpufreq_cdev->freq_table),
578					GFP_KERNEL);
579	if (!cpufreq_cdev->freq_table) {
580		cdev = ERR_PTR(-ENOMEM);
581		goto free_idle_time;
582	}
583
584	ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
585	if (ret < 0) {
586		cdev = ERR_PTR(ret);
587		goto free_table;
 
 
 
 
 
 
588	}
589	cpufreq_cdev->id = ret;
590
591	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
592		 cpufreq_cdev->id);
 
 
 
 
 
593
594	/* Fill freq-table in descending order of frequencies */
595	for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) {
596		freq = find_next_max(policy->freq_table, freq);
597		cpufreq_cdev->freq_table[i].frequency = freq;
598
599		/* Warn for duplicate entries */
600		if (!freq)
601			pr_warn("%s: table has duplicate entries\n", __func__);
602		else
603			pr_debug("%s: freq:%u KHz\n", __func__, freq);
604	}
605
606	if (capacitance) {
607		ret = update_freq_table(cpufreq_cdev, capacitance);
608		if (ret) {
609			cdev = ERR_PTR(ret);
610			goto remove_ida;
611		}
612
613		cooling_ops = &cpufreq_power_cooling_ops;
614	} else {
615		cooling_ops = &cpufreq_cooling_ops;
616	}
617
618	ret = freq_qos_add_request(&policy->constraints,
619				   &cpufreq_cdev->qos_req, FREQ_QOS_MAX,
620				   cpufreq_cdev->freq_table[0].frequency);
621	if (ret < 0) {
622		pr_err("%s: Failed to add freq constraint (%d)\n", __func__,
623		       ret);
624		cdev = ERR_PTR(ret);
625		goto remove_ida;
626	}
627
628	cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev,
629						  cooling_ops);
630	if (IS_ERR(cdev))
631		goto remove_qos_req;
632
633	mutex_lock(&cooling_list_lock);
634	list_add(&cpufreq_cdev->node, &cpufreq_cdev_list);
635	mutex_unlock(&cooling_list_lock);
636
637	return cdev;
638
639remove_qos_req:
640	freq_qos_remove_request(&cpufreq_cdev->qos_req);
641remove_ida:
642	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
 
 
 
 
 
 
643free_table:
644	kfree(cpufreq_cdev->freq_table);
645free_idle_time:
646	kfree(cpufreq_cdev->idle_time);
 
 
647free_cdev:
648	kfree(cpufreq_cdev);
649	return cdev;
 
650}
651
652/**
653 * cpufreq_cooling_register - function to create cpufreq cooling device.
654 * @policy: cpufreq policy
655 *
656 * This interface function registers the cpufreq cooling device with the name
657 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
658 * cooling devices.
659 *
660 * Return: a valid struct thermal_cooling_device pointer on success,
661 * on failure, it returns a corresponding ERR_PTR().
662 */
663struct thermal_cooling_device *
664cpufreq_cooling_register(struct cpufreq_policy *policy)
665{
666	return __cpufreq_cooling_register(NULL, policy, 0);
667}
668EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
669
670/**
671 * of_cpufreq_cooling_register - function to create cpufreq cooling device.
672 * @policy: cpufreq policy
 
673 *
674 * This interface function registers the cpufreq cooling device with the name
675 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
676 * cooling devices. Using this API, the cpufreq cooling device will be
677 * linked to the device tree node provided.
678 *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
679 * Using this function, the cooling device will implement the power
680 * extensions by using a simple cpu power model.  The cpus must have
681 * registered their OPPs using the OPP library.
682 *
683 * It also takes into account, if property present in policy CPU node, the
684 * static power consumed by the cpu.
 
685 *
686 * Return: a valid struct thermal_cooling_device pointer on success,
687 * and NULL on failure.
688 */
689struct thermal_cooling_device *
690of_cpufreq_cooling_register(struct cpufreq_policy *policy)
 
 
 
691{
692	struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
693	struct thermal_cooling_device *cdev = NULL;
694	u32 capacitance = 0;
695
696	if (!np) {
697		pr_err("cpu_cooling: OF node not available for cpu%d\n",
698		       policy->cpu);
699		return NULL;
700	}
701
702	if (of_find_property(np, "#cooling-cells", NULL)) {
703		of_property_read_u32(np, "dynamic-power-coefficient",
704				     &capacitance);
705
706		cdev = __cpufreq_cooling_register(np, policy, capacitance);
707		if (IS_ERR(cdev)) {
708			pr_err("cpu_cooling: cpu%d failed to register as cooling device: %ld\n",
709			       policy->cpu, PTR_ERR(cdev));
710			cdev = NULL;
711		}
712	}
713
714	of_node_put(np);
715	return cdev;
716}
717EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
718
719/**
720 * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
721 * @cdev: thermal cooling device pointer.
722 *
723 * This interface function unregisters the "thermal-cpufreq-%x" cooling device.
724 */
725void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
726{
727	struct cpufreq_cooling_device *cpufreq_cdev;
728
729	if (!cdev)
730		return;
731
732	cpufreq_cdev = cdev->devdata;
 
 
 
 
 
 
733
734	mutex_lock(&cooling_list_lock);
735	list_del(&cpufreq_cdev->node);
736	mutex_unlock(&cooling_list_lock);
737
738	thermal_cooling_device_unregister(cdev);
739	freq_qos_remove_request(&cpufreq_cdev->qos_req);
740	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
741	kfree(cpufreq_cdev->idle_time);
742	kfree(cpufreq_cdev->freq_table);
743	kfree(cpufreq_cdev);
 
 
 
744}
745EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);