Linux Audio

Check our new training course

Loading...
v3.1
  1/*
  2 *  drivers/cpufreq/cpufreq_ondemand.c
  3 *
  4 *  Copyright (C)  2001 Russell King
  5 *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
  6 *                      Jun Nakajima <jun.nakajima@intel.com>
  7 *
  8 * This program is free software; you can redistribute it and/or modify
  9 * it under the terms of the GNU General Public License version 2 as
 10 * published by the Free Software Foundation.
 11 */
 12
 13#include <linux/kernel.h>
 14#include <linux/module.h>
 15#include <linux/init.h>
 16#include <linux/cpufreq.h>
 17#include <linux/cpu.h>
 18#include <linux/jiffies.h>
 19#include <linux/kernel_stat.h>
 20#include <linux/mutex.h>
 21#include <linux/hrtimer.h>
 22#include <linux/tick.h>
 23#include <linux/ktime.h>
 24#include <linux/sched.h>
 25
 26/*
 27 * dbs is used in this file as a shortform for demandbased switching
 28 * It helps to keep variable names smaller, simpler
 29 */
 30
 31#define DEF_FREQUENCY_DOWN_DIFFERENTIAL		(10)
 32#define DEF_FREQUENCY_UP_THRESHOLD		(80)
 33#define DEF_SAMPLING_DOWN_FACTOR		(1)
 34#define MAX_SAMPLING_DOWN_FACTOR		(100000)
 35#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL	(3)
 36#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
 37#define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
 38#define MIN_FREQUENCY_UP_THRESHOLD		(11)
 39#define MAX_FREQUENCY_UP_THRESHOLD		(100)
 40
 41/*
 42 * The polling frequency of this governor depends on the capability of
 43 * the processor. Default polling frequency is 1000 times the transition
 44 * latency of the processor. The governor will work on any processor with
 45 * transition latency <= 10mS, using appropriate sampling
 46 * rate.
 47 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
 48 * this governor will not work.
 49 * All times here are in uS.
 50 */
 51#define MIN_SAMPLING_RATE_RATIO			(2)
 52
 53static unsigned int min_sampling_rate;
 54
 55#define LATENCY_MULTIPLIER			(1000)
 56#define MIN_LATENCY_MULTIPLIER			(100)
 57#define TRANSITION_LATENCY_LIMIT		(10 * 1000 * 1000)
 58
 59static void do_dbs_timer(struct work_struct *work);
 60static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 61				unsigned int event);
 62
 63#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
 64static
 65#endif
 66struct cpufreq_governor cpufreq_gov_ondemand = {
 67       .name                   = "ondemand",
 68       .governor               = cpufreq_governor_dbs,
 69       .max_transition_latency = TRANSITION_LATENCY_LIMIT,
 70       .owner                  = THIS_MODULE,
 71};
 72
 73/* Sampling types */
 74enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
 75
 76struct cpu_dbs_info_s {
 77	cputime64_t prev_cpu_idle;
 78	cputime64_t prev_cpu_iowait;
 79	cputime64_t prev_cpu_wall;
 80	cputime64_t prev_cpu_nice;
 81	struct cpufreq_policy *cur_policy;
 82	struct delayed_work work;
 83	struct cpufreq_frequency_table *freq_table;
 84	unsigned int freq_lo;
 85	unsigned int freq_lo_jiffies;
 86	unsigned int freq_hi_jiffies;
 87	unsigned int rate_mult;
 88	int cpu;
 89	unsigned int sample_type:1;
 90	/*
 91	 * percpu mutex that serializes governor limit change with
 92	 * do_dbs_timer invocation. We do not want do_dbs_timer to run
 93	 * when user is changing the governor or limits.
 94	 */
 95	struct mutex timer_mutex;
 96};
 97static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
 98
 99static unsigned int dbs_enable;	/* number of CPUs using this policy */
100
101/*
102 * dbs_mutex protects dbs_enable in governor start/stop.
103 */
104static DEFINE_MUTEX(dbs_mutex);
105
106static struct dbs_tuners {
107	unsigned int sampling_rate;
108	unsigned int up_threshold;
109	unsigned int down_differential;
110	unsigned int ignore_nice;
111	unsigned int sampling_down_factor;
112	unsigned int powersave_bias;
113	unsigned int io_is_busy;
114} dbs_tuners_ins = {
115	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
116	.sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
117	.down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
118	.ignore_nice = 0,
119	.powersave_bias = 0,
120};
121
122static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
123							cputime64_t *wall)
124{
125	cputime64_t idle_time;
126	cputime64_t cur_wall_time;
127	cputime64_t busy_time;
128
129	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
130	busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
131			kstat_cpu(cpu).cpustat.system);
132
133	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
134	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
135	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
136	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
137
138	idle_time = cputime64_sub(cur_wall_time, busy_time);
139	if (wall)
140		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
141
142	return (cputime64_t)jiffies_to_usecs(idle_time);
143}
144
145static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
146{
147	u64 idle_time = get_cpu_idle_time_us(cpu, wall);
148
149	if (idle_time == -1ULL)
150		return get_cpu_idle_time_jiffy(cpu, wall);
151
152	return idle_time;
153}
154
155static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall)
 
 
 
 
 
 
 
 
 
156{
157	u64 iowait_time = get_cpu_iowait_time_us(cpu, wall);
158
159	if (iowait_time == -1ULL)
160		return 0;
161
162	return iowait_time;
 
 
 
 
163}
164
165/*
166 * Find right freq to be set now with powersave_bias on.
167 * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
168 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
169 */
170static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
171					  unsigned int freq_next,
172					  unsigned int relation)
173{
174	unsigned int freq_req, freq_reduc, freq_avg;
175	unsigned int freq_hi, freq_lo;
176	unsigned int index = 0;
177	unsigned int jiffies_total, jiffies_hi, jiffies_lo;
178	struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
179						   policy->cpu);
 
 
180
181	if (!dbs_info->freq_table) {
182		dbs_info->freq_lo = 0;
183		dbs_info->freq_lo_jiffies = 0;
184		return freq_next;
185	}
186
187	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
188			relation, &index);
189	freq_req = dbs_info->freq_table[index].frequency;
190	freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
191	freq_avg = freq_req - freq_reduc;
192
193	/* Find freq bounds for freq_avg in freq_table */
194	index = 0;
195	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
196			CPUFREQ_RELATION_H, &index);
197	freq_lo = dbs_info->freq_table[index].frequency;
198	index = 0;
199	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
200			CPUFREQ_RELATION_L, &index);
201	freq_hi = dbs_info->freq_table[index].frequency;
202
203	/* Find out how long we have to be in hi and lo freqs */
204	if (freq_hi == freq_lo) {
205		dbs_info->freq_lo = 0;
206		dbs_info->freq_lo_jiffies = 0;
207		return freq_lo;
208	}
209	jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
210	jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
211	jiffies_hi += ((freq_hi - freq_lo) / 2);
212	jiffies_hi /= (freq_hi - freq_lo);
213	jiffies_lo = jiffies_total - jiffies_hi;
214	dbs_info->freq_lo = freq_lo;
215	dbs_info->freq_lo_jiffies = jiffies_lo;
216	dbs_info->freq_hi_jiffies = jiffies_hi;
217	return freq_hi;
218}
219
220static void ondemand_powersave_bias_init_cpu(int cpu)
221{
222	struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
223	dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
224	dbs_info->freq_lo = 0;
225}
226
227static void ondemand_powersave_bias_init(void)
228{
229	int i;
230	for_each_online_cpu(i) {
231		ondemand_powersave_bias_init_cpu(i);
232	}
233}
234
235/************************** sysfs interface ************************/
 
 
 
 
 
 
 
 
 
236
237static ssize_t show_sampling_rate_min(struct kobject *kobj,
238				      struct attribute *attr, char *buf)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239{
240	return sprintf(buf, "%u\n", min_sampling_rate);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241}
242
243define_one_global_ro(sampling_rate_min);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
245/* cpufreq_ondemand Governor Tunables */
246#define show_one(file_name, object)					\
247static ssize_t show_##file_name						\
248(struct kobject *kobj, struct attribute *attr, char *buf)              \
249{									\
250	return sprintf(buf, "%u\n", dbs_tuners_ins.object);		\
251}
252show_one(sampling_rate, sampling_rate);
253show_one(io_is_busy, io_is_busy);
254show_one(up_threshold, up_threshold);
255show_one(sampling_down_factor, sampling_down_factor);
256show_one(ignore_nice_load, ignore_nice);
257show_one(powersave_bias, powersave_bias);
258
259static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
260				   const char *buf, size_t count)
 
 
 
 
 
 
 
 
261{
262	unsigned int input;
263	int ret;
264	ret = sscanf(buf, "%u", &input);
265	if (ret != 1)
266		return -EINVAL;
267	dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
 
268	return count;
269}
270
271static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b,
272				   const char *buf, size_t count)
273{
 
274	unsigned int input;
275	int ret;
 
276
277	ret = sscanf(buf, "%u", &input);
278	if (ret != 1)
279		return -EINVAL;
280	dbs_tuners_ins.io_is_busy = !!input;
 
 
 
 
 
 
 
 
281	return count;
282}
283
284static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
285				  const char *buf, size_t count)
286{
 
287	unsigned int input;
288	int ret;
289	ret = sscanf(buf, "%u", &input);
290
291	if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
292			input < MIN_FREQUENCY_UP_THRESHOLD) {
293		return -EINVAL;
294	}
295	dbs_tuners_ins.up_threshold = input;
 
296	return count;
297}
298
299static ssize_t store_sampling_down_factor(struct kobject *a,
300			struct attribute *b, const char *buf, size_t count)
301{
 
302	unsigned int input, j;
303	int ret;
304	ret = sscanf(buf, "%u", &input);
305
306	if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
307		return -EINVAL;
308	dbs_tuners_ins.sampling_down_factor = input;
309
310	/* Reset down sampling multiplier in case it was active */
311	for_each_online_cpu(j) {
312		struct cpu_dbs_info_s *dbs_info;
313		dbs_info = &per_cpu(od_cpu_dbs_info, j);
314		dbs_info->rate_mult = 1;
315	}
316	return count;
317}
318
319static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
320				      const char *buf, size_t count)
321{
 
322	unsigned int input;
323	int ret;
324
325	unsigned int j;
326
327	ret = sscanf(buf, "%u", &input);
328	if (ret != 1)
329		return -EINVAL;
330
331	if (input > 1)
332		input = 1;
333
334	if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
335		return count;
336	}
337	dbs_tuners_ins.ignore_nice = input;
338
339	/* we need to re-evaluate prev_cpu_idle */
340	for_each_online_cpu(j) {
341		struct cpu_dbs_info_s *dbs_info;
342		dbs_info = &per_cpu(od_cpu_dbs_info, j);
343		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
344						&dbs_info->prev_cpu_wall);
345		if (dbs_tuners_ins.ignore_nice)
346			dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
 
347
348	}
349	return count;
350}
351
352static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b,
353				    const char *buf, size_t count)
354{
 
355	unsigned int input;
356	int ret;
357	ret = sscanf(buf, "%u", &input);
358
359	if (ret != 1)
360		return -EINVAL;
361
362	if (input > 1000)
363		input = 1000;
364
365	dbs_tuners_ins.powersave_bias = input;
366	ondemand_powersave_bias_init();
367	return count;
368}
369
370define_one_global_rw(sampling_rate);
371define_one_global_rw(io_is_busy);
372define_one_global_rw(up_threshold);
373define_one_global_rw(sampling_down_factor);
374define_one_global_rw(ignore_nice_load);
375define_one_global_rw(powersave_bias);
376
377static struct attribute *dbs_attributes[] = {
378	&sampling_rate_min.attr,
379	&sampling_rate.attr,
380	&up_threshold.attr,
381	&sampling_down_factor.attr,
382	&ignore_nice_load.attr,
383	&powersave_bias.attr,
384	&io_is_busy.attr,
 
 
 
 
 
 
 
 
 
385	NULL
386};
387
388static struct attribute_group dbs_attr_group = {
389	.attrs = dbs_attributes,
390	.name = "ondemand",
391};
392
393/************************** sysfs end ************************/
 
 
 
 
 
 
 
 
 
394
395static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
396{
397	if (dbs_tuners_ins.powersave_bias)
398		freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H);
399	else if (p->cur == p->max)
400		return;
401
402	__cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ?
403			CPUFREQ_RELATION_L : CPUFREQ_RELATION_H);
404}
405
406static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
407{
408	unsigned int max_load_freq;
409
410	struct cpufreq_policy *policy;
411	unsigned int j;
412
413	this_dbs_info->freq_lo = 0;
414	policy = this_dbs_info->cur_policy;
415
416	/*
417	 * Every sampling_rate, we check, if current idle time is less
418	 * than 20% (default), then we try to increase frequency
419	 * Every sampling_rate, we look for a the lowest
420	 * frequency which can sustain the load while keeping idle time over
421	 * 30%. If such a frequency exist, we try to decrease to this frequency.
422	 *
423	 * Any frequency increase takes it to the maximum frequency.
424	 * Frequency reduction happens at minimum steps of
425	 * 5% (default) of current frequency
426	 */
427
428	/* Get Absolute Load - in terms of freq */
429	max_load_freq = 0;
430
431	for_each_cpu(j, policy->cpus) {
432		struct cpu_dbs_info_s *j_dbs_info;
433		cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
434		unsigned int idle_time, wall_time, iowait_time;
435		unsigned int load, load_freq;
436		int freq_avg;
437
438		j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
439
440		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
441		cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
442
443		wall_time = (unsigned int) cputime64_sub(cur_wall_time,
444				j_dbs_info->prev_cpu_wall);
445		j_dbs_info->prev_cpu_wall = cur_wall_time;
446
447		idle_time = (unsigned int) cputime64_sub(cur_idle_time,
448				j_dbs_info->prev_cpu_idle);
449		j_dbs_info->prev_cpu_idle = cur_idle_time;
450
451		iowait_time = (unsigned int) cputime64_sub(cur_iowait_time,
452				j_dbs_info->prev_cpu_iowait);
453		j_dbs_info->prev_cpu_iowait = cur_iowait_time;
454
455		if (dbs_tuners_ins.ignore_nice) {
456			cputime64_t cur_nice;
457			unsigned long cur_nice_jiffies;
458
459			cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
460					 j_dbs_info->prev_cpu_nice);
461			/*
462			 * Assumption: nice time between sampling periods will
463			 * be less than 2^32 jiffies for 32 bit sys
464			 */
465			cur_nice_jiffies = (unsigned long)
466					cputime64_to_jiffies64(cur_nice);
467
468			j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
469			idle_time += jiffies_to_usecs(cur_nice_jiffies);
470		}
 
 
471
 
 
 
 
 
 
472		/*
473		 * For the purpose of ondemand, waiting for disk IO is an
474		 * indication that you're performance critical, and not that
475		 * the system is actually idle. So subtract the iowait time
476		 * from the cpu idle time.
477		 */
478
479		if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time)
480			idle_time -= iowait_time;
481
482		if (unlikely(!wall_time || wall_time < idle_time))
483			continue;
484
485		load = 100 * (wall_time - idle_time) / wall_time;
486
487		freq_avg = __cpufreq_driver_getavg(policy, j);
488		if (freq_avg <= 0)
489			freq_avg = policy->cur;
490
491		load_freq = load * freq_avg;
492		if (load_freq > max_load_freq)
493			max_load_freq = load_freq;
494	}
495
496	/* Check for frequency increase */
497	if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
498		/* If switching to max speed, apply sampling_down_factor */
499		if (policy->cur < policy->max)
500			this_dbs_info->rate_mult =
501				dbs_tuners_ins.sampling_down_factor;
502		dbs_freq_increase(policy, policy->max);
503		return;
504	}
505
506	/* Check for frequency decrease */
507	/* if we cannot reduce the frequency anymore, break out early */
508	if (policy->cur == policy->min)
509		return;
510
511	/*
512	 * The optimal frequency is the frequency that is the lowest that
513	 * can support the current CPU usage without triggering the up
514	 * policy. To be safe, we focus 10 points under the threshold.
515	 */
516	if (max_load_freq <
517	    (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
518	     policy->cur) {
519		unsigned int freq_next;
520		freq_next = max_load_freq /
521				(dbs_tuners_ins.up_threshold -
522				 dbs_tuners_ins.down_differential);
523
524		/* No longer fully busy, reset rate_mult */
525		this_dbs_info->rate_mult = 1;
526
527		if (freq_next < policy->min)
528			freq_next = policy->min;
 
 
 
529
530		if (!dbs_tuners_ins.powersave_bias) {
531			__cpufreq_driver_target(policy, freq_next,
532					CPUFREQ_RELATION_L);
533		} else {
534			int freq = powersave_bias_target(policy, freq_next,
535					CPUFREQ_RELATION_L);
536			__cpufreq_driver_target(policy, freq,
537				CPUFREQ_RELATION_L);
538		}
539	}
540}
 
541
542static void do_dbs_timer(struct work_struct *work)
543{
544	struct cpu_dbs_info_s *dbs_info =
545		container_of(work, struct cpu_dbs_info_s, work.work);
546	unsigned int cpu = dbs_info->cpu;
547	int sample_type = dbs_info->sample_type;
 
 
 
 
 
 
 
 
 
548
549	int delay;
 
 
550
551	mutex_lock(&dbs_info->timer_mutex);
552
553	/* Common NORMAL_SAMPLE setup */
554	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
555	if (!dbs_tuners_ins.powersave_bias ||
556	    sample_type == DBS_NORMAL_SAMPLE) {
557		dbs_check_cpu(dbs_info);
558		if (dbs_info->freq_lo) {
559			/* Setup timer for SUB_SAMPLE */
560			dbs_info->sample_type = DBS_SUB_SAMPLE;
561			delay = dbs_info->freq_hi_jiffies;
562		} else {
563			/* We want all CPUs to do sampling nearly on
564			 * same jiffy
565			 */
566			delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate
567				* dbs_info->rate_mult);
568
569			if (num_online_cpus() > 1)
570				delay -= jiffies % delay;
571		}
572	} else {
573		__cpufreq_driver_target(dbs_info->cur_policy,
574			dbs_info->freq_lo, CPUFREQ_RELATION_H);
575		delay = dbs_info->freq_lo_jiffies;
576	}
577	schedule_delayed_work_on(cpu, &dbs_info->work, delay);
578	mutex_unlock(&dbs_info->timer_mutex);
579}
580
581static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
 
 
582{
583	/* We want all CPUs to do sampling nearly on same jiffy */
584	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
585
586	if (num_online_cpus() > 1)
587		delay -= jiffies % delay;
588
589	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
590	INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
591	schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
592}
 
593
594static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
595{
596	cancel_delayed_work_sync(&dbs_info->work);
 
597}
 
598
599/*
600 * Not all CPUs want IO time to be accounted as busy; this dependson how
601 * efficient idling at a higher frequency/voltage is.
602 * Pavel Machek says this is not so for various generations of AMD and old
603 * Intel systems.
604 * Mike Chan (androidlcom) calis this is also not true for ARM.
605 * Because of this, whitelist specific known (series) of CPUs by default, and
606 * leave all others up to the user.
607 */
608static int should_io_be_busy(void)
609{
610#if defined(CONFIG_X86)
611	/*
612	 * For Intel, Core 2 (model 15) andl later have an efficient idle.
613	 */
614	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
615	    boot_cpu_data.x86 == 6 &&
616	    boot_cpu_data.x86_model >= 15)
617		return 1;
618#endif
619	return 0;
620}
621
622static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
623				   unsigned int event)
624{
625	unsigned int cpu = policy->cpu;
626	struct cpu_dbs_info_s *this_dbs_info;
627	unsigned int j;
628	int rc;
629
630	this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
631
632	switch (event) {
633	case CPUFREQ_GOV_START:
634		if ((!cpu_online(cpu)) || (!policy->cur))
635			return -EINVAL;
636
637		mutex_lock(&dbs_mutex);
638
639		dbs_enable++;
640		for_each_cpu(j, policy->cpus) {
641			struct cpu_dbs_info_s *j_dbs_info;
642			j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
643			j_dbs_info->cur_policy = policy;
644
645			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
646						&j_dbs_info->prev_cpu_wall);
647			if (dbs_tuners_ins.ignore_nice) {
648				j_dbs_info->prev_cpu_nice =
649						kstat_cpu(j).cpustat.nice;
650			}
651		}
652		this_dbs_info->cpu = cpu;
653		this_dbs_info->rate_mult = 1;
654		ondemand_powersave_bias_init_cpu(cpu);
655		/*
656		 * Start the timerschedule work, when this governor
657		 * is used for first time
658		 */
659		if (dbs_enable == 1) {
660			unsigned int latency;
661
662			rc = sysfs_create_group(cpufreq_global_kobject,
663						&dbs_attr_group);
664			if (rc) {
665				mutex_unlock(&dbs_mutex);
666				return rc;
667			}
668
669			/* policy latency is in nS. Convert it to uS first */
670			latency = policy->cpuinfo.transition_latency / 1000;
671			if (latency == 0)
672				latency = 1;
673			/* Bring kernel and HW constraints together */
674			min_sampling_rate = max(min_sampling_rate,
675					MIN_LATENCY_MULTIPLIER * latency);
676			dbs_tuners_ins.sampling_rate =
677				max(min_sampling_rate,
678				    latency * LATENCY_MULTIPLIER);
679			dbs_tuners_ins.io_is_busy = should_io_be_busy();
680		}
681		mutex_unlock(&dbs_mutex);
682
683		mutex_init(&this_dbs_info->timer_mutex);
684		dbs_timer_init(this_dbs_info);
685		break;
686
687	case CPUFREQ_GOV_STOP:
688		dbs_timer_exit(this_dbs_info);
689
690		mutex_lock(&dbs_mutex);
691		mutex_destroy(&this_dbs_info->timer_mutex);
692		dbs_enable--;
693		mutex_unlock(&dbs_mutex);
694		if (!dbs_enable)
695			sysfs_remove_group(cpufreq_global_kobject,
696					   &dbs_attr_group);
697
698		break;
699
700	case CPUFREQ_GOV_LIMITS:
701		mutex_lock(&this_dbs_info->timer_mutex);
702		if (policy->max < this_dbs_info->cur_policy->cur)
703			__cpufreq_driver_target(this_dbs_info->cur_policy,
704				policy->max, CPUFREQ_RELATION_H);
705		else if (policy->min > this_dbs_info->cur_policy->cur)
706			__cpufreq_driver_target(this_dbs_info->cur_policy,
707				policy->min, CPUFREQ_RELATION_L);
708		mutex_unlock(&this_dbs_info->timer_mutex);
709		break;
710	}
711	return 0;
712}
713
714static int __init cpufreq_gov_dbs_init(void)
715{
716	cputime64_t wall;
717	u64 idle_time;
718	int cpu = get_cpu();
719
720	idle_time = get_cpu_idle_time_us(cpu, &wall);
721	put_cpu();
722	if (idle_time != -1ULL) {
723		/* Idle micro accounting is supported. Use finer thresholds */
724		dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
725		dbs_tuners_ins.down_differential =
726					MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
727		/*
728		 * In no_hz/micro accounting case we set the minimum frequency
729		 * not depending on HZ, but fixed (very low). The deferred
730		 * timer might skip some samples if idle/sleeping as needed.
731		*/
732		min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
733	} else {
734		/* For correct statistics, we need 10 ticks for each measure */
735		min_sampling_rate =
736			MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
737	}
738
739	return cpufreq_register_governor(&cpufreq_gov_ondemand);
740}
741
742static void __exit cpufreq_gov_dbs_exit(void)
743{
744	cpufreq_unregister_governor(&cpufreq_gov_ondemand);
745}
746
747
748MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
749MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
750MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
751	"Low Latency Frequency Transition capable processors");
752MODULE_LICENSE("GPL");
753
754#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
755fs_initcall(cpufreq_gov_dbs_init);
756#else
757module_init(cpufreq_gov_dbs_init);
758#endif
759module_exit(cpufreq_gov_dbs_exit);
v3.15
  1/*
  2 *  drivers/cpufreq/cpufreq_ondemand.c
  3 *
  4 *  Copyright (C)  2001 Russell King
  5 *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
  6 *                      Jun Nakajima <jun.nakajima@intel.com>
  7 *
  8 * This program is free software; you can redistribute it and/or modify
  9 * it under the terms of the GNU General Public License version 2 as
 10 * published by the Free Software Foundation.
 11 */
 12
 13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 14
 
 
 15#include <linux/cpu.h>
 16#include <linux/percpu-defs.h>
 17#include <linux/slab.h>
 
 
 18#include <linux/tick.h>
 19#include "cpufreq_governor.h"
 
 
 
 
 
 
 20
 21/* On-demand governor macros */
 22#define DEF_FREQUENCY_UP_THRESHOLD		(80)
 23#define DEF_SAMPLING_DOWN_FACTOR		(1)
 24#define MAX_SAMPLING_DOWN_FACTOR		(100000)
 
 25#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
 26#define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
 27#define MIN_FREQUENCY_UP_THRESHOLD		(11)
 28#define MAX_FREQUENCY_UP_THRESHOLD		(100)
 29
 30static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 31
 32static struct od_ops od_ops;
 
 
 33
 34#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
 35static struct cpufreq_governor cpufreq_gov_ondemand;
 36#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 37
 38static unsigned int default_powersave_bias;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 39
 40static void ondemand_powersave_bias_init_cpu(int cpu)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 41{
 42	struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
 43
 44	dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
 45	dbs_info->freq_lo = 0;
 
 
 46}
 47
 48/*
 49 * Not all CPUs want IO time to be accounted as busy; this depends on how
 50 * efficient idling at a higher frequency/voltage is.
 51 * Pavel Machek says this is not so for various generations of AMD and old
 52 * Intel systems.
 53 * Mike Chan (android.com) claims this is also not true for ARM.
 54 * Because of this, whitelist specific known (series) of CPUs by default, and
 55 * leave all others up to the user.
 56 */
 57static int should_io_be_busy(void)
 58{
 59#if defined(CONFIG_X86)
 60	/*
 61	 * For Intel, Core 2 (model 15) and later have an efficient idle.
 62	 */
 63	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
 64			boot_cpu_data.x86 == 6 &&
 65			boot_cpu_data.x86_model >= 15)
 66		return 1;
 67#endif
 68	return 0;
 69}
 70
 71/*
 72 * Find right freq to be set now with powersave_bias on.
 73 * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
 74 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
 75 */
 76static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
 77		unsigned int freq_next, unsigned int relation)
 
 78{
 79	unsigned int freq_req, freq_reduc, freq_avg;
 80	unsigned int freq_hi, freq_lo;
 81	unsigned int index = 0;
 82	unsigned int jiffies_total, jiffies_hi, jiffies_lo;
 83	struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
 84						   policy->cpu);
 85	struct dbs_data *dbs_data = policy->governor_data;
 86	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 87
 88	if (!dbs_info->freq_table) {
 89		dbs_info->freq_lo = 0;
 90		dbs_info->freq_lo_jiffies = 0;
 91		return freq_next;
 92	}
 93
 94	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
 95			relation, &index);
 96	freq_req = dbs_info->freq_table[index].frequency;
 97	freq_reduc = freq_req * od_tuners->powersave_bias / 1000;
 98	freq_avg = freq_req - freq_reduc;
 99
100	/* Find freq bounds for freq_avg in freq_table */
101	index = 0;
102	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
103			CPUFREQ_RELATION_H, &index);
104	freq_lo = dbs_info->freq_table[index].frequency;
105	index = 0;
106	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
107			CPUFREQ_RELATION_L, &index);
108	freq_hi = dbs_info->freq_table[index].frequency;
109
110	/* Find out how long we have to be in hi and lo freqs */
111	if (freq_hi == freq_lo) {
112		dbs_info->freq_lo = 0;
113		dbs_info->freq_lo_jiffies = 0;
114		return freq_lo;
115	}
116	jiffies_total = usecs_to_jiffies(od_tuners->sampling_rate);
117	jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
118	jiffies_hi += ((freq_hi - freq_lo) / 2);
119	jiffies_hi /= (freq_hi - freq_lo);
120	jiffies_lo = jiffies_total - jiffies_hi;
121	dbs_info->freq_lo = freq_lo;
122	dbs_info->freq_lo_jiffies = jiffies_lo;
123	dbs_info->freq_hi_jiffies = jiffies_hi;
124	return freq_hi;
125}
126
 
 
 
 
 
 
 
127static void ondemand_powersave_bias_init(void)
128{
129	int i;
130	for_each_online_cpu(i) {
131		ondemand_powersave_bias_init_cpu(i);
132	}
133}
134
135static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
136{
137	struct dbs_data *dbs_data = policy->governor_data;
138	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
139
140	if (od_tuners->powersave_bias)
141		freq = od_ops.powersave_bias_target(policy, freq,
142				CPUFREQ_RELATION_H);
143	else if (policy->cur == policy->max)
144		return;
145
146	__cpufreq_driver_target(policy, freq, od_tuners->powersave_bias ?
147			CPUFREQ_RELATION_L : CPUFREQ_RELATION_H);
148}
149
150/*
151 * Every sampling_rate, we check, if current idle time is less than 20%
152 * (default), then we try to increase frequency. Else, we adjust the frequency
153 * proportional to load.
154 */
155static void od_check_cpu(int cpu, unsigned int load)
156{
157	struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
158	struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy;
159	struct dbs_data *dbs_data = policy->governor_data;
160	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
161
162	dbs_info->freq_lo = 0;
163
164	/* Check for frequency increase */
165	if (load > od_tuners->up_threshold) {
166		/* If switching to max speed, apply sampling_down_factor */
167		if (policy->cur < policy->max)
168			dbs_info->rate_mult =
169				od_tuners->sampling_down_factor;
170		dbs_freq_increase(policy, policy->max);
171	} else {
172		/* Calculate the next frequency proportional to load */
173		unsigned int freq_next;
174		freq_next = load * policy->cpuinfo.max_freq / 100;
175
176		/* No longer fully busy, reset rate_mult */
177		dbs_info->rate_mult = 1;
178
179		if (!od_tuners->powersave_bias) {
180			__cpufreq_driver_target(policy, freq_next,
181					CPUFREQ_RELATION_L);
182			return;
183		}
184
185		freq_next = od_ops.powersave_bias_target(policy, freq_next,
186					CPUFREQ_RELATION_L);
187		__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
188	}
189}
190
191static void od_dbs_timer(struct work_struct *work)
192{
193	struct od_cpu_dbs_info_s *dbs_info =
194		container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work);
195	unsigned int cpu = dbs_info->cdbs.cur_policy->cpu;
196	struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info,
197			cpu);
198	struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data;
199	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
200	int delay = 0, sample_type = core_dbs_info->sample_type;
201	bool modify_all = true;
202
203	mutex_lock(&core_dbs_info->cdbs.timer_mutex);
204	if (!need_load_eval(&core_dbs_info->cdbs, od_tuners->sampling_rate)) {
205		modify_all = false;
206		goto max_delay;
207	}
208
209	/* Common NORMAL_SAMPLE setup */
210	core_dbs_info->sample_type = OD_NORMAL_SAMPLE;
211	if (sample_type == OD_SUB_SAMPLE) {
212		delay = core_dbs_info->freq_lo_jiffies;
213		__cpufreq_driver_target(core_dbs_info->cdbs.cur_policy,
214				core_dbs_info->freq_lo, CPUFREQ_RELATION_H);
215	} else {
216		dbs_check_cpu(dbs_data, cpu);
217		if (core_dbs_info->freq_lo) {
218			/* Setup timer for SUB_SAMPLE */
219			core_dbs_info->sample_type = OD_SUB_SAMPLE;
220			delay = core_dbs_info->freq_hi_jiffies;
221		}
222	}
223
224max_delay:
225	if (!delay)
226		delay = delay_for_sampling_rate(od_tuners->sampling_rate
227				* core_dbs_info->rate_mult);
228
229	gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, delay, modify_all);
230	mutex_unlock(&core_dbs_info->cdbs.timer_mutex);
231}
232
233/************************** sysfs interface ************************/
234static struct common_dbs_data od_dbs_cdata;
235
236/**
237 * update_sampling_rate - update sampling rate effective immediately if needed.
238 * @new_rate: new sampling rate
239 *
240 * If new rate is smaller than the old, simply updating
241 * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the
242 * original sampling_rate was 1 second and the requested new sampling rate is 10
243 * ms because the user needs immediate reaction from ondemand governor, but not
244 * sure if higher frequency will be required or not, then, the governor may
245 * change the sampling rate too late; up to 1 second later. Thus, if we are
246 * reducing the sampling rate, we need to make the new value effective
247 * immediately.
248 */
249static void update_sampling_rate(struct dbs_data *dbs_data,
250		unsigned int new_rate)
251{
252	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
253	int cpu;
254
255	od_tuners->sampling_rate = new_rate = max(new_rate,
256			dbs_data->min_sampling_rate);
257
258	for_each_online_cpu(cpu) {
259		struct cpufreq_policy *policy;
260		struct od_cpu_dbs_info_s *dbs_info;
261		unsigned long next_sampling, appointed_at;
262
263		policy = cpufreq_cpu_get(cpu);
264		if (!policy)
265			continue;
266		if (policy->governor != &cpufreq_gov_ondemand) {
267			cpufreq_cpu_put(policy);
268			continue;
269		}
270		dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
271		cpufreq_cpu_put(policy);
272
273		mutex_lock(&dbs_info->cdbs.timer_mutex);
274
275		if (!delayed_work_pending(&dbs_info->cdbs.work)) {
276			mutex_unlock(&dbs_info->cdbs.timer_mutex);
277			continue;
278		}
279
280		next_sampling = jiffies + usecs_to_jiffies(new_rate);
281		appointed_at = dbs_info->cdbs.work.timer.expires;
282
283		if (time_before(next_sampling, appointed_at)) {
284
285			mutex_unlock(&dbs_info->cdbs.timer_mutex);
286			cancel_delayed_work_sync(&dbs_info->cdbs.work);
287			mutex_lock(&dbs_info->cdbs.timer_mutex);
 
 
 
 
 
 
 
 
 
 
288
289			gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy,
290					usecs_to_jiffies(new_rate), true);
291
292		}
293		mutex_unlock(&dbs_info->cdbs.timer_mutex);
294	}
295}
296
297static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
298		size_t count)
299{
300	unsigned int input;
301	int ret;
302	ret = sscanf(buf, "%u", &input);
303	if (ret != 1)
304		return -EINVAL;
305
306	update_sampling_rate(dbs_data, input);
307	return count;
308}
309
310static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf,
311		size_t count)
312{
313	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
314	unsigned int input;
315	int ret;
316	unsigned int j;
317
318	ret = sscanf(buf, "%u", &input);
319	if (ret != 1)
320		return -EINVAL;
321	od_tuners->io_is_busy = !!input;
322
323	/* we need to re-evaluate prev_cpu_idle */
324	for_each_online_cpu(j) {
325		struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
326									j);
327		dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
328			&dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy);
329	}
330	return count;
331}
332
333static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
334		size_t count)
335{
336	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
337	unsigned int input;
338	int ret;
339	ret = sscanf(buf, "%u", &input);
340
341	if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
342			input < MIN_FREQUENCY_UP_THRESHOLD) {
343		return -EINVAL;
344	}
345
346	od_tuners->up_threshold = input;
347	return count;
348}
349
350static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
351		const char *buf, size_t count)
352{
353	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
354	unsigned int input, j;
355	int ret;
356	ret = sscanf(buf, "%u", &input);
357
358	if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
359		return -EINVAL;
360	od_tuners->sampling_down_factor = input;
361
362	/* Reset down sampling multiplier in case it was active */
363	for_each_online_cpu(j) {
364		struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
365				j);
366		dbs_info->rate_mult = 1;
367	}
368	return count;
369}
370
371static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
372		const char *buf, size_t count)
373{
374	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
375	unsigned int input;
376	int ret;
377
378	unsigned int j;
379
380	ret = sscanf(buf, "%u", &input);
381	if (ret != 1)
382		return -EINVAL;
383
384	if (input > 1)
385		input = 1;
386
387	if (input == od_tuners->ignore_nice_load) { /* nothing to do */
388		return count;
389	}
390	od_tuners->ignore_nice_load = input;
391
392	/* we need to re-evaluate prev_cpu_idle */
393	for_each_online_cpu(j) {
394		struct od_cpu_dbs_info_s *dbs_info;
395		dbs_info = &per_cpu(od_cpu_dbs_info, j);
396		dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
397			&dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy);
398		if (od_tuners->ignore_nice_load)
399			dbs_info->cdbs.prev_cpu_nice =
400				kcpustat_cpu(j).cpustat[CPUTIME_NICE];
401
402	}
403	return count;
404}
405
406static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf,
407		size_t count)
408{
409	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
410	unsigned int input;
411	int ret;
412	ret = sscanf(buf, "%u", &input);
413
414	if (ret != 1)
415		return -EINVAL;
416
417	if (input > 1000)
418		input = 1000;
419
420	od_tuners->powersave_bias = input;
421	ondemand_powersave_bias_init();
422	return count;
423}
424
425show_store_one(od, sampling_rate);
426show_store_one(od, io_is_busy);
427show_store_one(od, up_threshold);
428show_store_one(od, sampling_down_factor);
429show_store_one(od, ignore_nice_load);
430show_store_one(od, powersave_bias);
431declare_show_sampling_rate_min(od);
432
433gov_sys_pol_attr_rw(sampling_rate);
434gov_sys_pol_attr_rw(io_is_busy);
435gov_sys_pol_attr_rw(up_threshold);
436gov_sys_pol_attr_rw(sampling_down_factor);
437gov_sys_pol_attr_rw(ignore_nice_load);
438gov_sys_pol_attr_rw(powersave_bias);
439gov_sys_pol_attr_ro(sampling_rate_min);
440
441static struct attribute *dbs_attributes_gov_sys[] = {
442	&sampling_rate_min_gov_sys.attr,
443	&sampling_rate_gov_sys.attr,
444	&up_threshold_gov_sys.attr,
445	&sampling_down_factor_gov_sys.attr,
446	&ignore_nice_load_gov_sys.attr,
447	&powersave_bias_gov_sys.attr,
448	&io_is_busy_gov_sys.attr,
449	NULL
450};
451
452static struct attribute_group od_attr_group_gov_sys = {
453	.attrs = dbs_attributes_gov_sys,
454	.name = "ondemand",
455};
456
457static struct attribute *dbs_attributes_gov_pol[] = {
458	&sampling_rate_min_gov_pol.attr,
459	&sampling_rate_gov_pol.attr,
460	&up_threshold_gov_pol.attr,
461	&sampling_down_factor_gov_pol.attr,
462	&ignore_nice_load_gov_pol.attr,
463	&powersave_bias_gov_pol.attr,
464	&io_is_busy_gov_pol.attr,
465	NULL
466};
467
468static struct attribute_group od_attr_group_gov_pol = {
469	.attrs = dbs_attributes_gov_pol,
470	.name = "ondemand",
471};
 
 
472
473/************************** sysfs end ************************/
 
 
474
475static int od_init(struct dbs_data *dbs_data)
476{
477	struct od_dbs_tuners *tuners;
478	u64 idle_time;
479	int cpu;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
480
481	tuners = kzalloc(sizeof(*tuners), GFP_KERNEL);
482	if (!tuners) {
483		pr_err("%s: kzalloc failed\n", __func__);
484		return -ENOMEM;
485	}
486
487	cpu = get_cpu();
488	idle_time = get_cpu_idle_time_us(cpu, NULL);
489	put_cpu();
490	if (idle_time != -1ULL) {
491		/* Idle micro accounting is supported. Use finer thresholds */
492		tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
493		/*
494		 * In nohz/micro accounting case we set the minimum frequency
495		 * not depending on HZ, but fixed (very low). The deferred
496		 * timer might skip some samples if idle/sleeping as needed.
497		*/
498		dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
499	} else {
500		tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
 
 
 
 
 
 
501
502		/* For correct statistics, we need 10 ticks for each measure */
503		dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
504			jiffies_to_usecs(10);
 
 
 
 
505	}
506
507	tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR;
508	tuners->ignore_nice_load = 0;
509	tuners->powersave_bias = default_powersave_bias;
510	tuners->io_is_busy = should_io_be_busy();
 
 
 
 
 
511
512	dbs_data->tuners = tuners;
513	mutex_init(&dbs_data->mutex);
514	return 0;
515}
516
517static void od_exit(struct dbs_data *dbs_data)
518{
519	kfree(dbs_data->tuners);
520}
 
 
 
 
 
 
 
 
521
522define_get_cpu_dbs_routines(od_cpu_dbs_info);
 
523
524static struct od_ops od_ops = {
525	.powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu,
526	.powersave_bias_target = generic_powersave_bias_target,
527	.freq_increase = dbs_freq_increase,
528};
529
530static struct common_dbs_data od_dbs_cdata = {
531	.governor = GOV_ONDEMAND,
532	.attr_group_gov_sys = &od_attr_group_gov_sys,
533	.attr_group_gov_pol = &od_attr_group_gov_pol,
534	.get_cpu_cdbs = get_cpu_cdbs,
535	.get_cpu_dbs_info_s = get_cpu_dbs_info_s,
536	.gov_dbs_timer = od_dbs_timer,
537	.gov_check_cpu = od_check_cpu,
538	.gov_ops = &od_ops,
539	.init = od_init,
540	.exit = od_exit,
541};
542
543static void od_set_powersave_bias(unsigned int powersave_bias)
544{
545	struct cpufreq_policy *policy;
546	struct dbs_data *dbs_data;
547	struct od_dbs_tuners *od_tuners;
548	unsigned int cpu;
549	cpumask_t done;
550
551	default_powersave_bias = powersave_bias;
552	cpumask_clear(&done);
553
554	get_online_cpus();
555	for_each_online_cpu(cpu) {
556		if (cpumask_test_cpu(cpu, &done))
557			continue;
558
559		policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.cur_policy;
560		if (!policy)
561			continue;
562
563		cpumask_or(&done, &done, policy->cpus);
564
565		if (policy->governor != &cpufreq_gov_ondemand)
566			continue;
 
 
 
 
 
 
 
 
 
 
 
 
 
567
568		dbs_data = policy->governor_data;
569		od_tuners = dbs_data->tuners;
570		od_tuners->powersave_bias = default_powersave_bias;
 
 
 
 
571	}
572	put_online_cpus();
 
573}
574
575void od_register_powersave_bias_handler(unsigned int (*f)
576		(struct cpufreq_policy *, unsigned int, unsigned int),
577		unsigned int powersave_bias)
578{
579	od_ops.powersave_bias_target = f;
580	od_set_powersave_bias(powersave_bias);
 
 
 
 
 
 
 
581}
582EXPORT_SYMBOL_GPL(od_register_powersave_bias_handler);
583
584void od_unregister_powersave_bias_handler(void)
585{
586	od_ops.powersave_bias_target = generic_powersave_bias_target;
587	od_set_powersave_bias(0);
588}
589EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler);
590
591static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy,
592		unsigned int event)
 
 
 
 
 
 
 
 
593{
594	return cpufreq_governor_dbs(policy, &od_dbs_cdata, event);
 
 
 
 
 
 
 
 
 
595}
596
597#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
598static
599#endif
600struct cpufreq_governor cpufreq_gov_ondemand = {
601	.name			= "ondemand",
602	.governor		= od_cpufreq_governor_dbs,
603	.max_transition_latency	= TRANSITION_LATENCY_LIMIT,
604	.owner			= THIS_MODULE,
605};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
606
607static int __init cpufreq_gov_dbs_init(void)
608{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609	return cpufreq_register_governor(&cpufreq_gov_ondemand);
610}
611
612static void __exit cpufreq_gov_dbs_exit(void)
613{
614	cpufreq_unregister_governor(&cpufreq_gov_ondemand);
615}
 
616
617MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
618MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
619MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
620	"Low Latency Frequency Transition capable processors");
621MODULE_LICENSE("GPL");
622
623#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
624fs_initcall(cpufreq_gov_dbs_init);
625#else
626module_init(cpufreq_gov_dbs_init);
627#endif
628module_exit(cpufreq_gov_dbs_exit);