Linux Audio

Check our new training course

Loading...
   1/*
   2 *  linux/drivers/cpufreq/cpufreq.c
   3 *
   4 *  Copyright (C) 2001 Russell King
   5 *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
   6 *
   7 *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
   8 *	Added handling for CPU hotplug
   9 *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
  10 *	Fix handling for CPU hotplug -- affected CPUs
  11 *
  12 * This program is free software; you can redistribute it and/or modify
  13 * it under the terms of the GNU General Public License version 2 as
  14 * published by the Free Software Foundation.
  15 *
  16 */
  17
  18#include <linux/kernel.h>
  19#include <linux/module.h>
  20#include <linux/init.h>
  21#include <linux/notifier.h>
  22#include <linux/cpufreq.h>
  23#include <linux/delay.h>
  24#include <linux/interrupt.h>
  25#include <linux/spinlock.h>
  26#include <linux/device.h>
  27#include <linux/slab.h>
  28#include <linux/cpu.h>
  29#include <linux/completion.h>
  30#include <linux/mutex.h>
  31#include <linux/syscore_ops.h>
  32
  33#include <trace/events/power.h>
  34
  35/**
  36 * The "cpufreq driver" - the arch- or hardware-dependent low
  37 * level driver of CPUFreq support, and its spinlock. This lock
  38 * also protects the cpufreq_cpu_data array.
  39 */
  40static struct cpufreq_driver *cpufreq_driver;
  41static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
  42#ifdef CONFIG_HOTPLUG_CPU
  43/* This one keeps track of the previously set governor of a removed CPU */
  44static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
  45#endif
  46static DEFINE_SPINLOCK(cpufreq_driver_lock);
  47
  48/*
  49 * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
  50 * all cpufreq/hotplug/workqueue/etc related lock issues.
  51 *
  52 * The rules for this semaphore:
  53 * - Any routine that wants to read from the policy structure will
  54 *   do a down_read on this semaphore.
  55 * - Any routine that will write to the policy structure and/or may take away
  56 *   the policy altogether (eg. CPU hotplug), will hold this lock in write
  57 *   mode before doing so.
  58 *
  59 * Additional rules:
  60 * - All holders of the lock should check to make sure that the CPU they
  61 *   are concerned with are online after they get the lock.
  62 * - Governor routines that can be called in cpufreq hotplug path should not
  63 *   take this sem as top level hotplug notifier handler takes this.
  64 * - Lock should not be held across
  65 *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
  66 */
  67static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
  68static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
  69
  70#define lock_policy_rwsem(mode, cpu)					\
  71static int lock_policy_rwsem_##mode					\
  72(int cpu)								\
  73{									\
  74	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);		\
  75	BUG_ON(policy_cpu == -1);					\
  76	down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));		\
  77	if (unlikely(!cpu_online(cpu))) {				\
  78		up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));	\
  79		return -1;						\
  80	}								\
  81									\
  82	return 0;							\
  83}
  84
  85lock_policy_rwsem(read, cpu);
  86
  87lock_policy_rwsem(write, cpu);
  88
  89static void unlock_policy_rwsem_read(int cpu)
  90{
  91	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
  92	BUG_ON(policy_cpu == -1);
  93	up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
  94}
  95
  96static void unlock_policy_rwsem_write(int cpu)
  97{
  98	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
  99	BUG_ON(policy_cpu == -1);
 100	up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
 101}
 102
 103
 104/* internal prototypes */
 105static int __cpufreq_governor(struct cpufreq_policy *policy,
 106		unsigned int event);
 107static unsigned int __cpufreq_get(unsigned int cpu);
 108static void handle_update(struct work_struct *work);
 109
 110/**
 111 * Two notifier lists: the "policy" list is involved in the
 112 * validation process for a new CPU frequency policy; the
 113 * "transition" list for kernel code that needs to handle
 114 * changes to devices when the CPU clock speed changes.
 115 * The mutex locks both lists.
 116 */
 117static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
 118static struct srcu_notifier_head cpufreq_transition_notifier_list;
 119
 120static bool init_cpufreq_transition_notifier_list_called;
 121static int __init init_cpufreq_transition_notifier_list(void)
 122{
 123	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
 124	init_cpufreq_transition_notifier_list_called = true;
 125	return 0;
 126}
 127pure_initcall(init_cpufreq_transition_notifier_list);
 128
 129static int off __read_mostly;
 130int cpufreq_disabled(void)
 131{
 132	return off;
 133}
 134void disable_cpufreq(void)
 135{
 136	off = 1;
 137}
 138static LIST_HEAD(cpufreq_governor_list);
 139static DEFINE_MUTEX(cpufreq_governor_mutex);
 140
 141struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
 142{
 143	struct cpufreq_policy *data;
 144	unsigned long flags;
 145
 146	if (cpu >= nr_cpu_ids)
 147		goto err_out;
 148
 149	/* get the cpufreq driver */
 150	spin_lock_irqsave(&cpufreq_driver_lock, flags);
 151
 152	if (!cpufreq_driver)
 153		goto err_out_unlock;
 154
 155	if (!try_module_get(cpufreq_driver->owner))
 156		goto err_out_unlock;
 157
 158
 159	/* get the CPU */
 160	data = per_cpu(cpufreq_cpu_data, cpu);
 161
 162	if (!data)
 163		goto err_out_put_module;
 164
 165	if (!kobject_get(&data->kobj))
 166		goto err_out_put_module;
 167
 168	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 169	return data;
 170
 171err_out_put_module:
 172	module_put(cpufreq_driver->owner);
 173err_out_unlock:
 174	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 175err_out:
 176	return NULL;
 177}
 178EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
 179
 180
 181void cpufreq_cpu_put(struct cpufreq_policy *data)
 182{
 183	kobject_put(&data->kobj);
 184	module_put(cpufreq_driver->owner);
 185}
 186EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
 187
 188
 189/*********************************************************************
 190 *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
 191 *********************************************************************/
 192
 193/**
 194 * adjust_jiffies - adjust the system "loops_per_jiffy"
 195 *
 196 * This function alters the system "loops_per_jiffy" for the clock
 197 * speed change. Note that loops_per_jiffy cannot be updated on SMP
 198 * systems as each CPU might be scaled differently. So, use the arch
 199 * per-CPU loops_per_jiffy value wherever possible.
 200 */
 201#ifndef CONFIG_SMP
 202static unsigned long l_p_j_ref;
 203static unsigned int  l_p_j_ref_freq;
 204
 205static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
 206{
 207	if (ci->flags & CPUFREQ_CONST_LOOPS)
 208		return;
 209
 210	if (!l_p_j_ref_freq) {
 211		l_p_j_ref = loops_per_jiffy;
 212		l_p_j_ref_freq = ci->old;
 213		pr_debug("saving %lu as reference value for loops_per_jiffy; "
 214			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
 215	}
 216	if ((val == CPUFREQ_POSTCHANGE  && ci->old != ci->new) ||
 217	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
 218		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
 219								ci->new);
 220		pr_debug("scaling loops_per_jiffy to %lu "
 221			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
 222	}
 223}
 224#else
 225static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
 226{
 227	return;
 228}
 229#endif
 230
 231
 232/**
 233 * cpufreq_notify_transition - call notifier chain and adjust_jiffies
 234 * on frequency transition.
 235 *
 236 * This function calls the transition notifiers and the "adjust_jiffies"
 237 * function. It is called twice on all CPU frequency changes that have
 238 * external effects.
 239 */
 240void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 241{
 242	struct cpufreq_policy *policy;
 243
 244	BUG_ON(irqs_disabled());
 245
 246	freqs->flags = cpufreq_driver->flags;
 247	pr_debug("notification %u of frequency transition to %u kHz\n",
 248		state, freqs->new);
 249
 250	policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
 251	switch (state) {
 252
 253	case CPUFREQ_PRECHANGE:
 254		/* detect if the driver reported a value as "old frequency"
 255		 * which is not equal to what the cpufreq core thinks is
 256		 * "old frequency".
 257		 */
 258		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
 259			if ((policy) && (policy->cpu == freqs->cpu) &&
 260			    (policy->cur) && (policy->cur != freqs->old)) {
 261				pr_debug("Warning: CPU frequency is"
 262					" %u, cpufreq assumed %u kHz.\n",
 263					freqs->old, policy->cur);
 264				freqs->old = policy->cur;
 265			}
 266		}
 267		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
 268				CPUFREQ_PRECHANGE, freqs);
 269		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
 270		break;
 271
 272	case CPUFREQ_POSTCHANGE:
 273		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
 274		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
 275			(unsigned long)freqs->cpu);
 276		trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
 277		trace_cpu_frequency(freqs->new, freqs->cpu);
 278		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
 279				CPUFREQ_POSTCHANGE, freqs);
 280		if (likely(policy) && likely(policy->cpu == freqs->cpu))
 281			policy->cur = freqs->new;
 282		break;
 283	}
 284}
 285EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
 286
 287
 288
 289/*********************************************************************
 290 *                          SYSFS INTERFACE                          *
 291 *********************************************************************/
 292
 293static struct cpufreq_governor *__find_governor(const char *str_governor)
 294{
 295	struct cpufreq_governor *t;
 296
 297	list_for_each_entry(t, &cpufreq_governor_list, governor_list)
 298		if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
 299			return t;
 300
 301	return NULL;
 302}
 303
 304/**
 305 * cpufreq_parse_governor - parse a governor string
 306 */
 307static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
 308				struct cpufreq_governor **governor)
 309{
 310	int err = -EINVAL;
 311
 312	if (!cpufreq_driver)
 313		goto out;
 314
 315	if (cpufreq_driver->setpolicy) {
 316		if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
 317			*policy = CPUFREQ_POLICY_PERFORMANCE;
 318			err = 0;
 319		} else if (!strnicmp(str_governor, "powersave",
 320						CPUFREQ_NAME_LEN)) {
 321			*policy = CPUFREQ_POLICY_POWERSAVE;
 322			err = 0;
 323		}
 324	} else if (cpufreq_driver->target) {
 325		struct cpufreq_governor *t;
 326
 327		mutex_lock(&cpufreq_governor_mutex);
 328
 329		t = __find_governor(str_governor);
 330
 331		if (t == NULL) {
 332			int ret;
 333
 334			mutex_unlock(&cpufreq_governor_mutex);
 335			ret = request_module("cpufreq_%s", str_governor);
 336			mutex_lock(&cpufreq_governor_mutex);
 337
 338			if (ret == 0)
 339				t = __find_governor(str_governor);
 340		}
 341
 342		if (t != NULL) {
 343			*governor = t;
 344			err = 0;
 345		}
 346
 347		mutex_unlock(&cpufreq_governor_mutex);
 348	}
 349out:
 350	return err;
 351}
 352
 353
 354/**
 355 * cpufreq_per_cpu_attr_read() / show_##file_name() -
 356 * print out cpufreq information
 357 *
 358 * Write out information from cpufreq_driver->policy[cpu]; object must be
 359 * "unsigned int".
 360 */
 361
 362#define show_one(file_name, object)			\
 363static ssize_t show_##file_name				\
 364(struct cpufreq_policy *policy, char *buf)		\
 365{							\
 366	return sprintf(buf, "%u\n", policy->object);	\
 367}
 368
 369show_one(cpuinfo_min_freq, cpuinfo.min_freq);
 370show_one(cpuinfo_max_freq, cpuinfo.max_freq);
 371show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
 372show_one(scaling_min_freq, min);
 373show_one(scaling_max_freq, max);
 374show_one(scaling_cur_freq, cur);
 375
 376static int __cpufreq_set_policy(struct cpufreq_policy *data,
 377				struct cpufreq_policy *policy);
 378
 379/**
 380 * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
 381 */
 382#define store_one(file_name, object)			\
 383static ssize_t store_##file_name					\
 384(struct cpufreq_policy *policy, const char *buf, size_t count)		\
 385{									\
 386	unsigned int ret = -EINVAL;					\
 387	struct cpufreq_policy new_policy;				\
 388									\
 389	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
 390	if (ret)							\
 391		return -EINVAL;						\
 392									\
 393	ret = sscanf(buf, "%u", &new_policy.object);			\
 394	if (ret != 1)							\
 395		return -EINVAL;						\
 396									\
 397	ret = __cpufreq_set_policy(policy, &new_policy);		\
 398	policy->user_policy.object = policy->object;			\
 399									\
 400	return ret ? ret : count;					\
 401}
 402
 403store_one(scaling_min_freq, min);
 404store_one(scaling_max_freq, max);
 405
 406/**
 407 * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
 408 */
 409static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
 410					char *buf)
 411{
 412	unsigned int cur_freq = __cpufreq_get(policy->cpu);
 413	if (!cur_freq)
 414		return sprintf(buf, "<unknown>");
 415	return sprintf(buf, "%u\n", cur_freq);
 416}
 417
 418
 419/**
 420 * show_scaling_governor - show the current policy for the specified CPU
 421 */
 422static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
 423{
 424	if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
 425		return sprintf(buf, "powersave\n");
 426	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
 427		return sprintf(buf, "performance\n");
 428	else if (policy->governor)
 429		return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
 430				policy->governor->name);
 431	return -EINVAL;
 432}
 433
 434
 435/**
 436 * store_scaling_governor - store policy for the specified CPU
 437 */
 438static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
 439					const char *buf, size_t count)
 440{
 441	unsigned int ret = -EINVAL;
 442	char	str_governor[16];
 443	struct cpufreq_policy new_policy;
 444
 445	ret = cpufreq_get_policy(&new_policy, policy->cpu);
 446	if (ret)
 447		return ret;
 448
 449	ret = sscanf(buf, "%15s", str_governor);
 450	if (ret != 1)
 451		return -EINVAL;
 452
 453	if (cpufreq_parse_governor(str_governor, &new_policy.policy,
 454						&new_policy.governor))
 455		return -EINVAL;
 456
 457	/* Do not use cpufreq_set_policy here or the user_policy.max
 458	   will be wrongly overridden */
 459	ret = __cpufreq_set_policy(policy, &new_policy);
 460
 461	policy->user_policy.policy = policy->policy;
 462	policy->user_policy.governor = policy->governor;
 463
 464	if (ret)
 465		return ret;
 466	else
 467		return count;
 468}
 469
 470/**
 471 * show_scaling_driver - show the cpufreq driver currently loaded
 472 */
 473static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
 474{
 475	return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
 476}
 477
 478/**
 479 * show_scaling_available_governors - show the available CPUfreq governors
 480 */
 481static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
 482						char *buf)
 483{
 484	ssize_t i = 0;
 485	struct cpufreq_governor *t;
 486
 487	if (!cpufreq_driver->target) {
 488		i += sprintf(buf, "performance powersave");
 489		goto out;
 490	}
 491
 492	list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
 493		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
 494		    - (CPUFREQ_NAME_LEN + 2)))
 495			goto out;
 496		i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
 497	}
 498out:
 499	i += sprintf(&buf[i], "\n");
 500	return i;
 501}
 502
 503static ssize_t show_cpus(const struct cpumask *mask, char *buf)
 504{
 505	ssize_t i = 0;
 506	unsigned int cpu;
 507
 508	for_each_cpu(cpu, mask) {
 509		if (i)
 510			i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
 511		i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
 512		if (i >= (PAGE_SIZE - 5))
 513			break;
 514	}
 515	i += sprintf(&buf[i], "\n");
 516	return i;
 517}
 518
 519/**
 520 * show_related_cpus - show the CPUs affected by each transition even if
 521 * hw coordination is in use
 522 */
 523static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
 524{
 525	if (cpumask_empty(policy->related_cpus))
 526		return show_cpus(policy->cpus, buf);
 527	return show_cpus(policy->related_cpus, buf);
 528}
 529
 530/**
 531 * show_affected_cpus - show the CPUs affected by each transition
 532 */
 533static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
 534{
 535	return show_cpus(policy->cpus, buf);
 536}
 537
 538static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
 539					const char *buf, size_t count)
 540{
 541	unsigned int freq = 0;
 542	unsigned int ret;
 543
 544	if (!policy->governor || !policy->governor->store_setspeed)
 545		return -EINVAL;
 546
 547	ret = sscanf(buf, "%u", &freq);
 548	if (ret != 1)
 549		return -EINVAL;
 550
 551	policy->governor->store_setspeed(policy, freq);
 552
 553	return count;
 554}
 555
 556static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
 557{
 558	if (!policy->governor || !policy->governor->show_setspeed)
 559		return sprintf(buf, "<unsupported>\n");
 560
 561	return policy->governor->show_setspeed(policy, buf);
 562}
 563
 564/**
 565 * show_scaling_driver - show the current cpufreq HW/BIOS limitation
 566 */
 567static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
 568{
 569	unsigned int limit;
 570	int ret;
 571	if (cpufreq_driver->bios_limit) {
 572		ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
 573		if (!ret)
 574			return sprintf(buf, "%u\n", limit);
 575	}
 576	return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
 577}
 578
 579cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
 580cpufreq_freq_attr_ro(cpuinfo_min_freq);
 581cpufreq_freq_attr_ro(cpuinfo_max_freq);
 582cpufreq_freq_attr_ro(cpuinfo_transition_latency);
 583cpufreq_freq_attr_ro(scaling_available_governors);
 584cpufreq_freq_attr_ro(scaling_driver);
 585cpufreq_freq_attr_ro(scaling_cur_freq);
 586cpufreq_freq_attr_ro(bios_limit);
 587cpufreq_freq_attr_ro(related_cpus);
 588cpufreq_freq_attr_ro(affected_cpus);
 589cpufreq_freq_attr_rw(scaling_min_freq);
 590cpufreq_freq_attr_rw(scaling_max_freq);
 591cpufreq_freq_attr_rw(scaling_governor);
 592cpufreq_freq_attr_rw(scaling_setspeed);
 593
 594static struct attribute *default_attrs[] = {
 595	&cpuinfo_min_freq.attr,
 596	&cpuinfo_max_freq.attr,
 597	&cpuinfo_transition_latency.attr,
 598	&scaling_min_freq.attr,
 599	&scaling_max_freq.attr,
 600	&affected_cpus.attr,
 601	&related_cpus.attr,
 602	&scaling_governor.attr,
 603	&scaling_driver.attr,
 604	&scaling_available_governors.attr,
 605	&scaling_setspeed.attr,
 606	NULL
 607};
 608
 609struct kobject *cpufreq_global_kobject;
 610EXPORT_SYMBOL(cpufreq_global_kobject);
 611
 612#define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
 613#define to_attr(a) container_of(a, struct freq_attr, attr)
 614
 615static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 616{
 617	struct cpufreq_policy *policy = to_policy(kobj);
 618	struct freq_attr *fattr = to_attr(attr);
 619	ssize_t ret = -EINVAL;
 620	policy = cpufreq_cpu_get(policy->cpu);
 621	if (!policy)
 622		goto no_policy;
 623
 624	if (lock_policy_rwsem_read(policy->cpu) < 0)
 625		goto fail;
 626
 627	if (fattr->show)
 628		ret = fattr->show(policy, buf);
 629	else
 630		ret = -EIO;
 631
 632	unlock_policy_rwsem_read(policy->cpu);
 633fail:
 634	cpufreq_cpu_put(policy);
 635no_policy:
 636	return ret;
 637}
 638
 639static ssize_t store(struct kobject *kobj, struct attribute *attr,
 640		     const char *buf, size_t count)
 641{
 642	struct cpufreq_policy *policy = to_policy(kobj);
 643	struct freq_attr *fattr = to_attr(attr);
 644	ssize_t ret = -EINVAL;
 645	policy = cpufreq_cpu_get(policy->cpu);
 646	if (!policy)
 647		goto no_policy;
 648
 649	if (lock_policy_rwsem_write(policy->cpu) < 0)
 650		goto fail;
 651
 652	if (fattr->store)
 653		ret = fattr->store(policy, buf, count);
 654	else
 655		ret = -EIO;
 656
 657	unlock_policy_rwsem_write(policy->cpu);
 658fail:
 659	cpufreq_cpu_put(policy);
 660no_policy:
 661	return ret;
 662}
 663
 664static void cpufreq_sysfs_release(struct kobject *kobj)
 665{
 666	struct cpufreq_policy *policy = to_policy(kobj);
 667	pr_debug("last reference is dropped\n");
 668	complete(&policy->kobj_unregister);
 669}
 670
 671static const struct sysfs_ops sysfs_ops = {
 672	.show	= show,
 673	.store	= store,
 674};
 675
 676static struct kobj_type ktype_cpufreq = {
 677	.sysfs_ops	= &sysfs_ops,
 678	.default_attrs	= default_attrs,
 679	.release	= cpufreq_sysfs_release,
 680};
 681
 682/*
 683 * Returns:
 684 *   Negative: Failure
 685 *   0:        Success
 686 *   Positive: When we have a managed CPU and the sysfs got symlinked
 687 */
 688static int cpufreq_add_dev_policy(unsigned int cpu,
 689				  struct cpufreq_policy *policy,
 690				  struct device *dev)
 691{
 692	int ret = 0;
 693#ifdef CONFIG_SMP
 694	unsigned long flags;
 695	unsigned int j;
 696#ifdef CONFIG_HOTPLUG_CPU
 697	struct cpufreq_governor *gov;
 698
 699	gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
 700	if (gov) {
 701		policy->governor = gov;
 702		pr_debug("Restoring governor %s for cpu %d\n",
 703		       policy->governor->name, cpu);
 704	}
 705#endif
 706
 707	for_each_cpu(j, policy->cpus) {
 708		struct cpufreq_policy *managed_policy;
 709
 710		if (cpu == j)
 711			continue;
 712
 713		/* Check for existing affected CPUs.
 714		 * They may not be aware of it due to CPU Hotplug.
 715		 * cpufreq_cpu_put is called when the device is removed
 716		 * in __cpufreq_remove_dev()
 717		 */
 718		managed_policy = cpufreq_cpu_get(j);
 719		if (unlikely(managed_policy)) {
 720
 721			/* Set proper policy_cpu */
 722			unlock_policy_rwsem_write(cpu);
 723			per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
 724
 725			if (lock_policy_rwsem_write(cpu) < 0) {
 726				/* Should not go through policy unlock path */
 727				if (cpufreq_driver->exit)
 728					cpufreq_driver->exit(policy);
 729				cpufreq_cpu_put(managed_policy);
 730				return -EBUSY;
 731			}
 732
 733			spin_lock_irqsave(&cpufreq_driver_lock, flags);
 734			cpumask_copy(managed_policy->cpus, policy->cpus);
 735			per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
 736			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 737
 738			pr_debug("CPU already managed, adding link\n");
 739			ret = sysfs_create_link(&dev->kobj,
 740						&managed_policy->kobj,
 741						"cpufreq");
 742			if (ret)
 743				cpufreq_cpu_put(managed_policy);
 744			/*
 745			 * Success. We only needed to be added to the mask.
 746			 * Call driver->exit() because only the cpu parent of
 747			 * the kobj needed to call init().
 748			 */
 749			if (cpufreq_driver->exit)
 750				cpufreq_driver->exit(policy);
 751
 752			if (!ret)
 753				return 1;
 754			else
 755				return ret;
 756		}
 757	}
 758#endif
 759	return ret;
 760}
 761
 762
 763/* symlink affected CPUs */
 764static int cpufreq_add_dev_symlink(unsigned int cpu,
 765				   struct cpufreq_policy *policy)
 766{
 767	unsigned int j;
 768	int ret = 0;
 769
 770	for_each_cpu(j, policy->cpus) {
 771		struct cpufreq_policy *managed_policy;
 772		struct device *cpu_dev;
 773
 774		if (j == cpu)
 775			continue;
 776		if (!cpu_online(j))
 777			continue;
 778
 779		pr_debug("CPU %u already managed, adding link\n", j);
 780		managed_policy = cpufreq_cpu_get(cpu);
 781		cpu_dev = get_cpu_device(j);
 782		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
 783					"cpufreq");
 784		if (ret) {
 785			cpufreq_cpu_put(managed_policy);
 786			return ret;
 787		}
 788	}
 789	return ret;
 790}
 791
 792static int cpufreq_add_dev_interface(unsigned int cpu,
 793				     struct cpufreq_policy *policy,
 794				     struct device *dev)
 795{
 796	struct cpufreq_policy new_policy;
 797	struct freq_attr **drv_attr;
 798	unsigned long flags;
 799	int ret = 0;
 800	unsigned int j;
 801
 802	/* prepare interface data */
 803	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
 804				   &dev->kobj, "cpufreq");
 805	if (ret)
 806		return ret;
 807
 808	/* set up files for this cpu device */
 809	drv_attr = cpufreq_driver->attr;
 810	while ((drv_attr) && (*drv_attr)) {
 811		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
 812		if (ret)
 813			goto err_out_kobj_put;
 814		drv_attr++;
 815	}
 816	if (cpufreq_driver->get) {
 817		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
 818		if (ret)
 819			goto err_out_kobj_put;
 820	}
 821	if (cpufreq_driver->target) {
 822		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
 823		if (ret)
 824			goto err_out_kobj_put;
 825	}
 826	if (cpufreq_driver->bios_limit) {
 827		ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
 828		if (ret)
 829			goto err_out_kobj_put;
 830	}
 831
 832	spin_lock_irqsave(&cpufreq_driver_lock, flags);
 833	for_each_cpu(j, policy->cpus) {
 834		if (!cpu_online(j))
 835			continue;
 836		per_cpu(cpufreq_cpu_data, j) = policy;
 837		per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
 838	}
 839	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 840
 841	ret = cpufreq_add_dev_symlink(cpu, policy);
 842	if (ret)
 843		goto err_out_kobj_put;
 844
 845	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
 846	/* assure that the starting sequence is run in __cpufreq_set_policy */
 847	policy->governor = NULL;
 848
 849	/* set default policy */
 850	ret = __cpufreq_set_policy(policy, &new_policy);
 851	policy->user_policy.policy = policy->policy;
 852	policy->user_policy.governor = policy->governor;
 853
 854	if (ret) {
 855		pr_debug("setting policy failed\n");
 856		if (cpufreq_driver->exit)
 857			cpufreq_driver->exit(policy);
 858	}
 859	return ret;
 860
 861err_out_kobj_put:
 862	kobject_put(&policy->kobj);
 863	wait_for_completion(&policy->kobj_unregister);
 864	return ret;
 865}
 866
 867
 868/**
 869 * cpufreq_add_dev - add a CPU device
 870 *
 871 * Adds the cpufreq interface for a CPU device.
 872 *
 873 * The Oracle says: try running cpufreq registration/unregistration concurrently
 874 * with with cpu hotplugging and all hell will break loose. Tried to clean this
 875 * mess up, but more thorough testing is needed. - Mathieu
 876 */
 877static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 878{
 879	unsigned int cpu = dev->id;
 880	int ret = 0, found = 0;
 881	struct cpufreq_policy *policy;
 882	unsigned long flags;
 883	unsigned int j;
 884#ifdef CONFIG_HOTPLUG_CPU
 885	int sibling;
 886#endif
 887
 888	if (cpu_is_offline(cpu))
 889		return 0;
 890
 891	pr_debug("adding CPU %u\n", cpu);
 892
 893#ifdef CONFIG_SMP
 894	/* check whether a different CPU already registered this
 895	 * CPU because it is in the same boat. */
 896	policy = cpufreq_cpu_get(cpu);
 897	if (unlikely(policy)) {
 898		cpufreq_cpu_put(policy);
 899		return 0;
 900	}
 901#endif
 902
 903	if (!try_module_get(cpufreq_driver->owner)) {
 904		ret = -EINVAL;
 905		goto module_out;
 906	}
 907
 908	ret = -ENOMEM;
 909	policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
 910	if (!policy)
 911		goto nomem_out;
 912
 913	if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
 914		goto err_free_policy;
 915
 916	if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
 917		goto err_free_cpumask;
 918
 919	policy->cpu = cpu;
 920	cpumask_copy(policy->cpus, cpumask_of(cpu));
 921
 922	/* Initially set CPU itself as the policy_cpu */
 923	per_cpu(cpufreq_policy_cpu, cpu) = cpu;
 924	ret = (lock_policy_rwsem_write(cpu) < 0);
 925	WARN_ON(ret);
 926
 927	init_completion(&policy->kobj_unregister);
 928	INIT_WORK(&policy->update, handle_update);
 929
 930	/* Set governor before ->init, so that driver could check it */
 931#ifdef CONFIG_HOTPLUG_CPU
 932	for_each_online_cpu(sibling) {
 933		struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
 934		if (cp && cp->governor &&
 935		    (cpumask_test_cpu(cpu, cp->related_cpus))) {
 936			policy->governor = cp->governor;
 937			found = 1;
 938			break;
 939		}
 940	}
 941#endif
 942	if (!found)
 943		policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
 944	/* call driver. From then on the cpufreq must be able
 945	 * to accept all calls to ->verify and ->setpolicy for this CPU
 946	 */
 947	ret = cpufreq_driver->init(policy);
 948	if (ret) {
 949		pr_debug("initialization failed\n");
 950		goto err_unlock_policy;
 951	}
 952	policy->user_policy.min = policy->min;
 953	policy->user_policy.max = policy->max;
 954
 955	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 956				     CPUFREQ_START, policy);
 957
 958	ret = cpufreq_add_dev_policy(cpu, policy, dev);
 959	if (ret) {
 960		if (ret > 0)
 961			/* This is a managed cpu, symlink created,
 962			   exit with 0 */
 963			ret = 0;
 964		goto err_unlock_policy;
 965	}
 966
 967	ret = cpufreq_add_dev_interface(cpu, policy, dev);
 968	if (ret)
 969		goto err_out_unregister;
 970
 971	unlock_policy_rwsem_write(cpu);
 972
 973	kobject_uevent(&policy->kobj, KOBJ_ADD);
 974	module_put(cpufreq_driver->owner);
 975	pr_debug("initialization complete\n");
 976
 977	return 0;
 978
 979
 980err_out_unregister:
 981	spin_lock_irqsave(&cpufreq_driver_lock, flags);
 982	for_each_cpu(j, policy->cpus)
 983		per_cpu(cpufreq_cpu_data, j) = NULL;
 984	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 985
 986	kobject_put(&policy->kobj);
 987	wait_for_completion(&policy->kobj_unregister);
 988
 989err_unlock_policy:
 990	unlock_policy_rwsem_write(cpu);
 991	free_cpumask_var(policy->related_cpus);
 992err_free_cpumask:
 993	free_cpumask_var(policy->cpus);
 994err_free_policy:
 995	kfree(policy);
 996nomem_out:
 997	module_put(cpufreq_driver->owner);
 998module_out:
 999	return ret;
1000}
1001
1002
1003/**
1004 * __cpufreq_remove_dev - remove a CPU device
1005 *
1006 * Removes the cpufreq interface for a CPU device.
1007 * Caller should already have policy_rwsem in write mode for this CPU.
1008 * This routine frees the rwsem before returning.
1009 */
1010static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1011{
1012	unsigned int cpu = dev->id;
1013	unsigned long flags;
1014	struct cpufreq_policy *data;
1015	struct kobject *kobj;
1016	struct completion *cmp;
1017#ifdef CONFIG_SMP
1018	struct device *cpu_dev;
1019	unsigned int j;
1020#endif
1021
1022	pr_debug("unregistering CPU %u\n", cpu);
1023
1024	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1025	data = per_cpu(cpufreq_cpu_data, cpu);
1026
1027	if (!data) {
1028		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1029		unlock_policy_rwsem_write(cpu);
1030		return -EINVAL;
1031	}
1032	per_cpu(cpufreq_cpu_data, cpu) = NULL;
1033
1034
1035#ifdef CONFIG_SMP
1036	/* if this isn't the CPU which is the parent of the kobj, we
1037	 * only need to unlink, put and exit
1038	 */
1039	if (unlikely(cpu != data->cpu)) {
1040		pr_debug("removing link\n");
1041		cpumask_clear_cpu(cpu, data->cpus);
1042		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1043		kobj = &dev->kobj;
1044		cpufreq_cpu_put(data);
1045		unlock_policy_rwsem_write(cpu);
1046		sysfs_remove_link(kobj, "cpufreq");
1047		return 0;
1048	}
1049#endif
1050
1051#ifdef CONFIG_SMP
1052
1053#ifdef CONFIG_HOTPLUG_CPU
1054	strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
1055			CPUFREQ_NAME_LEN);
1056#endif
1057
1058	/* if we have other CPUs still registered, we need to unlink them,
1059	 * or else wait_for_completion below will lock up. Clean the
1060	 * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1061	 * the sysfs links afterwards.
1062	 */
1063	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1064		for_each_cpu(j, data->cpus) {
1065			if (j == cpu)
1066				continue;
1067			per_cpu(cpufreq_cpu_data, j) = NULL;
1068		}
1069	}
1070
1071	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1072
1073	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1074		for_each_cpu(j, data->cpus) {
1075			if (j == cpu)
1076				continue;
1077			pr_debug("removing link for cpu %u\n", j);
1078#ifdef CONFIG_HOTPLUG_CPU
1079			strncpy(per_cpu(cpufreq_cpu_governor, j),
1080				data->governor->name, CPUFREQ_NAME_LEN);
1081#endif
1082			cpu_dev = get_cpu_device(j);
1083			kobj = &cpu_dev->kobj;
1084			unlock_policy_rwsem_write(cpu);
1085			sysfs_remove_link(kobj, "cpufreq");
1086			lock_policy_rwsem_write(cpu);
1087			cpufreq_cpu_put(data);
1088		}
1089	}
1090#else
1091	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1092#endif
1093
1094	if (cpufreq_driver->target)
1095		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1096
1097	kobj = &data->kobj;
1098	cmp = &data->kobj_unregister;
1099	unlock_policy_rwsem_write(cpu);
1100	kobject_put(kobj);
1101
1102	/* we need to make sure that the underlying kobj is actually
1103	 * not referenced anymore by anybody before we proceed with
1104	 * unloading.
1105	 */
1106	pr_debug("waiting for dropping of refcount\n");
1107	wait_for_completion(cmp);
1108	pr_debug("wait complete\n");
1109
1110	lock_policy_rwsem_write(cpu);
1111	if (cpufreq_driver->exit)
1112		cpufreq_driver->exit(data);
1113	unlock_policy_rwsem_write(cpu);
1114
1115#ifdef CONFIG_HOTPLUG_CPU
1116	/* when the CPU which is the parent of the kobj is hotplugged
1117	 * offline, check for siblings, and create cpufreq sysfs interface
1118	 * and symlinks
1119	 */
1120	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1121		/* first sibling now owns the new sysfs dir */
1122		cpumask_clear_cpu(cpu, data->cpus);
1123		cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL);
1124
1125		/* finally remove our own symlink */
1126		lock_policy_rwsem_write(cpu);
1127		__cpufreq_remove_dev(dev, sif);
1128	}
1129#endif
1130
1131	free_cpumask_var(data->related_cpus);
1132	free_cpumask_var(data->cpus);
1133	kfree(data);
1134
1135	return 0;
1136}
1137
1138
1139static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1140{
1141	unsigned int cpu = dev->id;
1142	int retval;
1143
1144	if (cpu_is_offline(cpu))
1145		return 0;
1146
1147	if (unlikely(lock_policy_rwsem_write(cpu)))
1148		BUG();
1149
1150	retval = __cpufreq_remove_dev(dev, sif);
1151	return retval;
1152}
1153
1154
1155static void handle_update(struct work_struct *work)
1156{
1157	struct cpufreq_policy *policy =
1158		container_of(work, struct cpufreq_policy, update);
1159	unsigned int cpu = policy->cpu;
1160	pr_debug("handle_update for cpu %u called\n", cpu);
1161	cpufreq_update_policy(cpu);
1162}
1163
1164/**
1165 *	cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1166 *	@cpu: cpu number
1167 *	@old_freq: CPU frequency the kernel thinks the CPU runs at
1168 *	@new_freq: CPU frequency the CPU actually runs at
1169 *
1170 *	We adjust to current frequency first, and need to clean up later.
1171 *	So either call to cpufreq_update_policy() or schedule handle_update()).
1172 */
1173static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1174				unsigned int new_freq)
1175{
1176	struct cpufreq_freqs freqs;
1177
1178	pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1179	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1180
1181	freqs.cpu = cpu;
1182	freqs.old = old_freq;
1183	freqs.new = new_freq;
1184	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1185	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1186}
1187
1188
1189/**
1190 * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1191 * @cpu: CPU number
1192 *
1193 * This is the last known freq, without actually getting it from the driver.
1194 * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1195 */
1196unsigned int cpufreq_quick_get(unsigned int cpu)
1197{
1198	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1199	unsigned int ret_freq = 0;
1200
1201	if (policy) {
1202		ret_freq = policy->cur;
1203		cpufreq_cpu_put(policy);
1204	}
1205
1206	return ret_freq;
1207}
1208EXPORT_SYMBOL(cpufreq_quick_get);
1209
1210/**
1211 * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1212 * @cpu: CPU number
1213 *
1214 * Just return the max possible frequency for a given CPU.
1215 */
1216unsigned int cpufreq_quick_get_max(unsigned int cpu)
1217{
1218	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1219	unsigned int ret_freq = 0;
1220
1221	if (policy) {
1222		ret_freq = policy->max;
1223		cpufreq_cpu_put(policy);
1224	}
1225
1226	return ret_freq;
1227}
1228EXPORT_SYMBOL(cpufreq_quick_get_max);
1229
1230
1231static unsigned int __cpufreq_get(unsigned int cpu)
1232{
1233	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1234	unsigned int ret_freq = 0;
1235
1236	if (!cpufreq_driver->get)
1237		return ret_freq;
1238
1239	ret_freq = cpufreq_driver->get(cpu);
1240
1241	if (ret_freq && policy->cur &&
1242		!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1243		/* verify no discrepancy between actual and
1244					saved value exists */
1245		if (unlikely(ret_freq != policy->cur)) {
1246			cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1247			schedule_work(&policy->update);
1248		}
1249	}
1250
1251	return ret_freq;
1252}
1253
1254/**
1255 * cpufreq_get - get the current CPU frequency (in kHz)
1256 * @cpu: CPU number
1257 *
1258 * Get the CPU current (static) CPU frequency
1259 */
1260unsigned int cpufreq_get(unsigned int cpu)
1261{
1262	unsigned int ret_freq = 0;
1263	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1264
1265	if (!policy)
1266		goto out;
1267
1268	if (unlikely(lock_policy_rwsem_read(cpu)))
1269		goto out_policy;
1270
1271	ret_freq = __cpufreq_get(cpu);
1272
1273	unlock_policy_rwsem_read(cpu);
1274
1275out_policy:
1276	cpufreq_cpu_put(policy);
1277out:
1278	return ret_freq;
1279}
1280EXPORT_SYMBOL(cpufreq_get);
1281
1282static struct subsys_interface cpufreq_interface = {
1283	.name		= "cpufreq",
1284	.subsys		= &cpu_subsys,
1285	.add_dev	= cpufreq_add_dev,
1286	.remove_dev	= cpufreq_remove_dev,
1287};
1288
1289
1290/**
1291 * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1292 *
1293 * This function is only executed for the boot processor.  The other CPUs
1294 * have been put offline by means of CPU hotplug.
1295 */
1296static int cpufreq_bp_suspend(void)
1297{
1298	int ret = 0;
1299
1300	int cpu = smp_processor_id();
1301	struct cpufreq_policy *cpu_policy;
1302
1303	pr_debug("suspending cpu %u\n", cpu);
1304
1305	/* If there's no policy for the boot CPU, we have nothing to do. */
1306	cpu_policy = cpufreq_cpu_get(cpu);
1307	if (!cpu_policy)
1308		return 0;
1309
1310	if (cpufreq_driver->suspend) {
1311		ret = cpufreq_driver->suspend(cpu_policy);
1312		if (ret)
1313			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1314					"step on CPU %u\n", cpu_policy->cpu);
1315	}
1316
1317	cpufreq_cpu_put(cpu_policy);
1318	return ret;
1319}
1320
1321/**
1322 * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1323 *
1324 *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1325 *	2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1326 *	    restored. It will verify that the current freq is in sync with
1327 *	    what we believe it to be. This is a bit later than when it
1328 *	    should be, but nonethteless it's better than calling
1329 *	    cpufreq_driver->get() here which might re-enable interrupts...
1330 *
1331 * This function is only executed for the boot CPU.  The other CPUs have not
1332 * been turned on yet.
1333 */
1334static void cpufreq_bp_resume(void)
1335{
1336	int ret = 0;
1337
1338	int cpu = smp_processor_id();
1339	struct cpufreq_policy *cpu_policy;
1340
1341	pr_debug("resuming cpu %u\n", cpu);
1342
1343	/* If there's no policy for the boot CPU, we have nothing to do. */
1344	cpu_policy = cpufreq_cpu_get(cpu);
1345	if (!cpu_policy)
1346		return;
1347
1348	if (cpufreq_driver->resume) {
1349		ret = cpufreq_driver->resume(cpu_policy);
1350		if (ret) {
1351			printk(KERN_ERR "cpufreq: resume failed in ->resume "
1352					"step on CPU %u\n", cpu_policy->cpu);
1353			goto fail;
1354		}
1355	}
1356
1357	schedule_work(&cpu_policy->update);
1358
1359fail:
1360	cpufreq_cpu_put(cpu_policy);
1361}
1362
1363static struct syscore_ops cpufreq_syscore_ops = {
1364	.suspend	= cpufreq_bp_suspend,
1365	.resume		= cpufreq_bp_resume,
1366};
1367
1368
1369/*********************************************************************
1370 *                     NOTIFIER LISTS INTERFACE                      *
1371 *********************************************************************/
1372
1373/**
1374 *	cpufreq_register_notifier - register a driver with cpufreq
1375 *	@nb: notifier function to register
1376 *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1377 *
1378 *	Add a driver to one of two lists: either a list of drivers that
1379 *      are notified about clock rate changes (once before and once after
1380 *      the transition), or a list of drivers that are notified about
1381 *      changes in cpufreq policy.
1382 *
1383 *	This function may sleep, and has the same return conditions as
1384 *	blocking_notifier_chain_register.
1385 */
1386int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1387{
1388	int ret;
1389
1390	WARN_ON(!init_cpufreq_transition_notifier_list_called);
1391
1392	switch (list) {
1393	case CPUFREQ_TRANSITION_NOTIFIER:
1394		ret = srcu_notifier_chain_register(
1395				&cpufreq_transition_notifier_list, nb);
1396		break;
1397	case CPUFREQ_POLICY_NOTIFIER:
1398		ret = blocking_notifier_chain_register(
1399				&cpufreq_policy_notifier_list, nb);
1400		break;
1401	default:
1402		ret = -EINVAL;
1403	}
1404
1405	return ret;
1406}
1407EXPORT_SYMBOL(cpufreq_register_notifier);
1408
1409
1410/**
1411 *	cpufreq_unregister_notifier - unregister a driver with cpufreq
1412 *	@nb: notifier block to be unregistered
1413 *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1414 *
1415 *	Remove a driver from the CPU frequency notifier list.
1416 *
1417 *	This function may sleep, and has the same return conditions as
1418 *	blocking_notifier_chain_unregister.
1419 */
1420int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1421{
1422	int ret;
1423
1424	switch (list) {
1425	case CPUFREQ_TRANSITION_NOTIFIER:
1426		ret = srcu_notifier_chain_unregister(
1427				&cpufreq_transition_notifier_list, nb);
1428		break;
1429	case CPUFREQ_POLICY_NOTIFIER:
1430		ret = blocking_notifier_chain_unregister(
1431				&cpufreq_policy_notifier_list, nb);
1432		break;
1433	default:
1434		ret = -EINVAL;
1435	}
1436
1437	return ret;
1438}
1439EXPORT_SYMBOL(cpufreq_unregister_notifier);
1440
1441
1442/*********************************************************************
1443 *                              GOVERNORS                            *
1444 *********************************************************************/
1445
1446
1447int __cpufreq_driver_target(struct cpufreq_policy *policy,
1448			    unsigned int target_freq,
1449			    unsigned int relation)
1450{
1451	int retval = -EINVAL;
1452
1453	if (cpufreq_disabled())
1454		return -ENODEV;
1455
1456	pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1457		target_freq, relation);
1458	if (cpu_online(policy->cpu) && cpufreq_driver->target)
1459		retval = cpufreq_driver->target(policy, target_freq, relation);
1460
1461	return retval;
1462}
1463EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1464
1465int cpufreq_driver_target(struct cpufreq_policy *policy,
1466			  unsigned int target_freq,
1467			  unsigned int relation)
1468{
1469	int ret = -EINVAL;
1470
1471	policy = cpufreq_cpu_get(policy->cpu);
1472	if (!policy)
1473		goto no_policy;
1474
1475	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1476		goto fail;
1477
1478	ret = __cpufreq_driver_target(policy, target_freq, relation);
1479
1480	unlock_policy_rwsem_write(policy->cpu);
1481
1482fail:
1483	cpufreq_cpu_put(policy);
1484no_policy:
1485	return ret;
1486}
1487EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1488
1489int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1490{
1491	int ret = 0;
1492
1493	policy = cpufreq_cpu_get(policy->cpu);
1494	if (!policy)
1495		return -EINVAL;
1496
1497	if (cpu_online(cpu) && cpufreq_driver->getavg)
1498		ret = cpufreq_driver->getavg(policy, cpu);
1499
1500	cpufreq_cpu_put(policy);
1501	return ret;
1502}
1503EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1504
1505/*
1506 * when "event" is CPUFREQ_GOV_LIMITS
1507 */
1508
1509static int __cpufreq_governor(struct cpufreq_policy *policy,
1510					unsigned int event)
1511{
1512	int ret;
1513
1514	/* Only must be defined when default governor is known to have latency
1515	   restrictions, like e.g. conservative or ondemand.
1516	   That this is the case is already ensured in Kconfig
1517	*/
1518#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1519	struct cpufreq_governor *gov = &cpufreq_gov_performance;
1520#else
1521	struct cpufreq_governor *gov = NULL;
1522#endif
1523
1524	if (policy->governor->max_transition_latency &&
1525	    policy->cpuinfo.transition_latency >
1526	    policy->governor->max_transition_latency) {
1527		if (!gov)
1528			return -EINVAL;
1529		else {
1530			printk(KERN_WARNING "%s governor failed, too long"
1531			       " transition latency of HW, fallback"
1532			       " to %s governor\n",
1533			       policy->governor->name,
1534			       gov->name);
1535			policy->governor = gov;
1536		}
1537	}
1538
1539	if (!try_module_get(policy->governor->owner))
1540		return -EINVAL;
1541
1542	pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1543						policy->cpu, event);
1544	ret = policy->governor->governor(policy, event);
1545
1546	/* we keep one module reference alive for
1547			each CPU governed by this CPU */
1548	if ((event != CPUFREQ_GOV_START) || ret)
1549		module_put(policy->governor->owner);
1550	if ((event == CPUFREQ_GOV_STOP) && !ret)
1551		module_put(policy->governor->owner);
1552
1553	return ret;
1554}
1555
1556
1557int cpufreq_register_governor(struct cpufreq_governor *governor)
1558{
1559	int err;
1560
1561	if (!governor)
1562		return -EINVAL;
1563
1564	if (cpufreq_disabled())
1565		return -ENODEV;
1566
1567	mutex_lock(&cpufreq_governor_mutex);
1568
1569	err = -EBUSY;
1570	if (__find_governor(governor->name) == NULL) {
1571		err = 0;
1572		list_add(&governor->governor_list, &cpufreq_governor_list);
1573	}
1574
1575	mutex_unlock(&cpufreq_governor_mutex);
1576	return err;
1577}
1578EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1579
1580
1581void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1582{
1583#ifdef CONFIG_HOTPLUG_CPU
1584	int cpu;
1585#endif
1586
1587	if (!governor)
1588		return;
1589
1590	if (cpufreq_disabled())
1591		return;
1592
1593#ifdef CONFIG_HOTPLUG_CPU
1594	for_each_present_cpu(cpu) {
1595		if (cpu_online(cpu))
1596			continue;
1597		if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1598			strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1599	}
1600#endif
1601
1602	mutex_lock(&cpufreq_governor_mutex);
1603	list_del(&governor->governor_list);
1604	mutex_unlock(&cpufreq_governor_mutex);
1605	return;
1606}
1607EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1608
1609
1610
1611/*********************************************************************
1612 *                          POLICY INTERFACE                         *
1613 *********************************************************************/
1614
1615/**
1616 * cpufreq_get_policy - get the current cpufreq_policy
1617 * @policy: struct cpufreq_policy into which the current cpufreq_policy
1618 *	is written
1619 *
1620 * Reads the current cpufreq policy.
1621 */
1622int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1623{
1624	struct cpufreq_policy *cpu_policy;
1625	if (!policy)
1626		return -EINVAL;
1627
1628	cpu_policy = cpufreq_cpu_get(cpu);
1629	if (!cpu_policy)
1630		return -EINVAL;
1631
1632	memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1633
1634	cpufreq_cpu_put(cpu_policy);
1635	return 0;
1636}
1637EXPORT_SYMBOL(cpufreq_get_policy);
1638
1639
1640/*
1641 * data   : current policy.
1642 * policy : policy to be set.
1643 */
1644static int __cpufreq_set_policy(struct cpufreq_policy *data,
1645				struct cpufreq_policy *policy)
1646{
1647	int ret = 0;
1648
1649	pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1650		policy->min, policy->max);
1651
1652	memcpy(&policy->cpuinfo, &data->cpuinfo,
1653				sizeof(struct cpufreq_cpuinfo));
1654
1655	if (policy->min > data->max || policy->max < data->min) {
1656		ret = -EINVAL;
1657		goto error_out;
1658	}
1659
1660	/* verify the cpu speed can be set within this limit */
1661	ret = cpufreq_driver->verify(policy);
1662	if (ret)
1663		goto error_out;
1664
1665	/* adjust if necessary - all reasons */
1666	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1667			CPUFREQ_ADJUST, policy);
1668
1669	/* adjust if necessary - hardware incompatibility*/
1670	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1671			CPUFREQ_INCOMPATIBLE, policy);
1672
1673	/* verify the cpu speed can be set within this limit,
1674	   which might be different to the first one */
1675	ret = cpufreq_driver->verify(policy);
1676	if (ret)
1677		goto error_out;
1678
1679	/* notification of the new policy */
1680	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1681			CPUFREQ_NOTIFY, policy);
1682
1683	data->min = policy->min;
1684	data->max = policy->max;
1685
1686	pr_debug("new min and max freqs are %u - %u kHz\n",
1687					data->min, data->max);
1688
1689	if (cpufreq_driver->setpolicy) {
1690		data->policy = policy->policy;
1691		pr_debug("setting range\n");
1692		ret = cpufreq_driver->setpolicy(policy);
1693	} else {
1694		if (policy->governor != data->governor) {
1695			/* save old, working values */
1696			struct cpufreq_governor *old_gov = data->governor;
1697
1698			pr_debug("governor switch\n");
1699
1700			/* end old governor */
1701			if (data->governor)
1702				__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1703
1704			/* start new governor */
1705			data->governor = policy->governor;
1706			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1707				/* new governor failed, so re-start old one */
1708				pr_debug("starting governor %s failed\n",
1709							data->governor->name);
1710				if (old_gov) {
1711					data->governor = old_gov;
1712					__cpufreq_governor(data,
1713							   CPUFREQ_GOV_START);
1714				}
1715				ret = -EINVAL;
1716				goto error_out;
1717			}
1718			/* might be a policy change, too, so fall through */
1719		}
1720		pr_debug("governor: change or update limits\n");
1721		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1722	}
1723
1724error_out:
1725	return ret;
1726}
1727
1728/**
1729 *	cpufreq_update_policy - re-evaluate an existing cpufreq policy
1730 *	@cpu: CPU which shall be re-evaluated
1731 *
1732 *	Useful for policy notifiers which have different necessities
1733 *	at different times.
1734 */
1735int cpufreq_update_policy(unsigned int cpu)
1736{
1737	struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1738	struct cpufreq_policy policy;
1739	int ret;
1740
1741	if (!data) {
1742		ret = -ENODEV;
1743		goto no_policy;
1744	}
1745
1746	if (unlikely(lock_policy_rwsem_write(cpu))) {
1747		ret = -EINVAL;
1748		goto fail;
1749	}
1750
1751	pr_debug("updating policy for CPU %u\n", cpu);
1752	memcpy(&policy, data, sizeof(struct cpufreq_policy));
1753	policy.min = data->user_policy.min;
1754	policy.max = data->user_policy.max;
1755	policy.policy = data->user_policy.policy;
1756	policy.governor = data->user_policy.governor;
1757
1758	/* BIOS might change freq behind our back
1759	  -> ask driver for current freq and notify governors about a change */
1760	if (cpufreq_driver->get) {
1761		policy.cur = cpufreq_driver->get(cpu);
1762		if (!data->cur) {
1763			pr_debug("Driver did not initialize current freq");
1764			data->cur = policy.cur;
1765		} else {
1766			if (data->cur != policy.cur)
1767				cpufreq_out_of_sync(cpu, data->cur,
1768								policy.cur);
1769		}
1770	}
1771
1772	ret = __cpufreq_set_policy(data, &policy);
1773
1774	unlock_policy_rwsem_write(cpu);
1775
1776fail:
1777	cpufreq_cpu_put(data);
1778no_policy:
1779	return ret;
1780}
1781EXPORT_SYMBOL(cpufreq_update_policy);
1782
1783static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1784					unsigned long action, void *hcpu)
1785{
1786	unsigned int cpu = (unsigned long)hcpu;
1787	struct device *dev;
1788
1789	dev = get_cpu_device(cpu);
1790	if (dev) {
1791		switch (action) {
1792		case CPU_ONLINE:
1793		case CPU_ONLINE_FROZEN:
1794			cpufreq_add_dev(dev, NULL);
1795			break;
1796		case CPU_DOWN_PREPARE:
1797		case CPU_DOWN_PREPARE_FROZEN:
1798			if (unlikely(lock_policy_rwsem_write(cpu)))
1799				BUG();
1800
1801			__cpufreq_remove_dev(dev, NULL);
1802			break;
1803		case CPU_DOWN_FAILED:
1804		case CPU_DOWN_FAILED_FROZEN:
1805			cpufreq_add_dev(dev, NULL);
1806			break;
1807		}
1808	}
1809	return NOTIFY_OK;
1810}
1811
1812static struct notifier_block __refdata cpufreq_cpu_notifier = {
1813    .notifier_call = cpufreq_cpu_callback,
1814};
1815
1816/*********************************************************************
1817 *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1818 *********************************************************************/
1819
1820/**
1821 * cpufreq_register_driver - register a CPU Frequency driver
1822 * @driver_data: A struct cpufreq_driver containing the values#
1823 * submitted by the CPU Frequency driver.
1824 *
1825 *   Registers a CPU Frequency driver to this core code. This code
1826 * returns zero on success, -EBUSY when another driver got here first
1827 * (and isn't unregistered in the meantime).
1828 *
1829 */
1830int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1831{
1832	unsigned long flags;
1833	int ret;
1834
1835	if (cpufreq_disabled())
1836		return -ENODEV;
1837
1838	if (!driver_data || !driver_data->verify || !driver_data->init ||
1839	    ((!driver_data->setpolicy) && (!driver_data->target)))
1840		return -EINVAL;
1841
1842	pr_debug("trying to register driver %s\n", driver_data->name);
1843
1844	if (driver_data->setpolicy)
1845		driver_data->flags |= CPUFREQ_CONST_LOOPS;
1846
1847	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1848	if (cpufreq_driver) {
1849		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1850		return -EBUSY;
1851	}
1852	cpufreq_driver = driver_data;
1853	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1854
1855	ret = subsys_interface_register(&cpufreq_interface);
1856	if (ret)
1857		goto err_null_driver;
1858
1859	if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1860		int i;
1861		ret = -ENODEV;
1862
1863		/* check for at least one working CPU */
1864		for (i = 0; i < nr_cpu_ids; i++)
1865			if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1866				ret = 0;
1867				break;
1868			}
1869
1870		/* if all ->init() calls failed, unregister */
1871		if (ret) {
1872			pr_debug("no CPU initialized for driver %s\n",
1873							driver_data->name);
1874			goto err_if_unreg;
1875		}
1876	}
1877
1878	register_hotcpu_notifier(&cpufreq_cpu_notifier);
1879	pr_debug("driver %s up and running\n", driver_data->name);
1880
1881	return 0;
1882err_if_unreg:
1883	subsys_interface_unregister(&cpufreq_interface);
1884err_null_driver:
1885	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1886	cpufreq_driver = NULL;
1887	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1888	return ret;
1889}
1890EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1891
1892
1893/**
1894 * cpufreq_unregister_driver - unregister the current CPUFreq driver
1895 *
1896 *    Unregister the current CPUFreq driver. Only call this if you have
1897 * the right to do so, i.e. if you have succeeded in initialising before!
1898 * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1899 * currently not initialised.
1900 */
1901int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1902{
1903	unsigned long flags;
1904
1905	if (!cpufreq_driver || (driver != cpufreq_driver))
1906		return -EINVAL;
1907
1908	pr_debug("unregistering driver %s\n", driver->name);
1909
1910	subsys_interface_unregister(&cpufreq_interface);
1911	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1912
1913	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1914	cpufreq_driver = NULL;
1915	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1916
1917	return 0;
1918}
1919EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1920
1921static int __init cpufreq_core_init(void)
1922{
1923	int cpu;
1924
1925	if (cpufreq_disabled())
1926		return -ENODEV;
1927
1928	for_each_possible_cpu(cpu) {
1929		per_cpu(cpufreq_policy_cpu, cpu) = -1;
1930		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1931	}
1932
1933	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
1934	BUG_ON(!cpufreq_global_kobject);
1935	register_syscore_ops(&cpufreq_syscore_ops);
1936
1937	return 0;
1938}
1939core_initcall(cpufreq_core_init);