timings.c - kernel/irq/timings.c - Linux source code v3.15

Note: File does not exist in v3.15.
  1// SPDX-License-Identifier: GPL-2.0
  2// Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
  3
  4#include <linux/kernel.h>
  5#include <linux/percpu.h>
  6#include <linux/slab.h>
  7#include <linux/static_key.h>
  8#include <linux/interrupt.h>
  9#include <linux/idr.h>
 10#include <linux/irq.h>
 11#include <linux/math64.h>
 12
 13#include <trace/events/irq.h>
 14
 15#include "internals.h"
 16
 17DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
 18
 19DEFINE_PER_CPU(struct irq_timings, irq_timings);
 20
 21struct irqt_stat {
 22	u64	next_evt;
 23	u64	last_ts;
 24	u64	variance;
 25	u32	avg;
 26	u32	nr_samples;
 27	int	anomalies;
 28	int	valid;
 29};
 30
 31static DEFINE_IDR(irqt_stats);
 32
 33void irq_timings_enable(void)
 34{
 35	static_branch_enable(&irq_timing_enabled);
 36}
 37
 38void irq_timings_disable(void)
 39{
 40	static_branch_disable(&irq_timing_enabled);
 41}
 42
 43/**
 44 * irqs_update - update the irq timing statistics with a new timestamp
 45 *
 46 * @irqs: an irqt_stat struct pointer
 47 * @ts: the new timestamp
 48 *
 49 * The statistics are computed online, in other words, the code is
 50 * designed to compute the statistics on a stream of values rather
 51 * than doing multiple passes on the values to compute the average,
 52 * then the variance. The integer division introduces a loss of
 53 * precision but with an acceptable error margin regarding the results
 54 * we would have with the double floating precision: we are dealing
 55 * with nanosec, so big numbers, consequently the mantisse is
 56 * negligeable, especially when converting the time in usec
 57 * afterwards.
 58 *
 59 * The computation happens at idle time. When the CPU is not idle, the
 60 * interrupts' timestamps are stored in the circular buffer, when the
 61 * CPU goes idle and this routine is called, all the buffer's values
 62 * are injected in the statistical model continuying to extend the
 63 * statistics from the previous busy-idle cycle.
 64 *
 65 * The observations showed a device will trigger a burst of periodic
 66 * interrupts followed by one or two peaks of longer time, for
 67 * instance when a SD card device flushes its cache, then the periodic
 68 * intervals occur again. A one second inactivity period resets the
 69 * stats, that gives us the certitude the statistical values won't
 70 * exceed 1x10^9, thus the computation won't overflow.
 71 *
 72 * Basically, the purpose of the algorithm is to watch the periodic
 73 * interrupts and eliminate the peaks.
 74 *
 75 * An interrupt is considered periodically stable if the interval of
 76 * its occurences follow the normal distribution, thus the values
 77 * comply with:
 78 *
 79 *      avg - 3 x stddev < value < avg + 3 x stddev
 80 *
 81 * Which can be simplified to:
 82 *
 83 *      -3 x stddev < value - avg < 3 x stddev
 84 *
 85 *      abs(value - avg) < 3 x stddev
 86 *
 87 * In order to save a costly square root computation, we use the
 88 * variance. For the record, stddev = sqrt(variance). The equation
 89 * above becomes:
 90 *
 91 *      abs(value - avg) < 3 x sqrt(variance)
 92 *
 93 * And finally we square it:
 94 *
 95 *      (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
 96 *
 97 *      (value - avg) x (value - avg) < 9 x variance
 98 *
 99 * Statistically speaking, any values out of this interval is
100 * considered as an anomaly and is discarded. However, a normal
101 * distribution appears when the number of samples is 30 (it is the
102 * rule of thumb in statistics, cf. "30 samples" on Internet). When
103 * there are three consecutive anomalies, the statistics are resetted.
104 *
105 */
106static void irqs_update(struct irqt_stat *irqs, u64 ts)
107{
108	u64 old_ts = irqs->last_ts;
109	u64 variance = 0;
110	u64 interval;
111	s64 diff;
112
113	/*
114	 * The timestamps are absolute time values, we need to compute
115	 * the timing interval between two interrupts.
116	 */
117	irqs->last_ts = ts;
118
119	/*
120	 * The interval type is u64 in order to deal with the same
121	 * type in our computation, that prevent mindfuck issues with
122	 * overflow, sign and division.
123	 */
124	interval = ts - old_ts;
125
126	/*
127	 * The interrupt triggered more than one second apart, that
128	 * ends the sequence as predictible for our purpose. In this
129	 * case, assume we have the beginning of a sequence and the
130	 * timestamp is the first value. As it is impossible to
131	 * predict anything at this point, return.
132	 *
133	 * Note the first timestamp of the sequence will always fall
134	 * in this test because the old_ts is zero. That is what we
135	 * want as we need another timestamp to compute an interval.
136	 */
137	if (interval >= NSEC_PER_SEC) {
138		memset(irqs, 0, sizeof(*irqs));
139		irqs->last_ts = ts;
140		return;
141	}
142
143	/*
144	 * Pre-compute the delta with the average as the result is
145	 * used several times in this function.
146	 */
147	diff = interval - irqs->avg;
148
149	/*
150	 * Increment the number of samples.
151	 */
152	irqs->nr_samples++;
153
154	/*
155	 * Online variance divided by the number of elements if there
156	 * is more than one sample.  Normally the formula is division
157	 * by nr_samples - 1 but we assume the number of element will be
158	 * more than 32 and dividing by 32 instead of 31 is enough
159	 * precise.
160	 */
161	if (likely(irqs->nr_samples > 1))
162		variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
163
164	/*
165	 * The rule of thumb in statistics for the normal distribution
166	 * is having at least 30 samples in order to have the model to
167	 * apply. Values outside the interval are considered as an
168	 * anomaly.
169	 */
170	if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {
171		/*
172		 * After three consecutive anomalies, we reset the
173		 * stats as it is no longer stable enough.
174		 */
175		if (irqs->anomalies++ >= 3) {
176			memset(irqs, 0, sizeof(*irqs));
177			irqs->last_ts = ts;
178			return;
179		}
180	} else {
181		/*
182		 * The anomalies must be consecutives, so at this
183		 * point, we reset the anomalies counter.
184		 */
185		irqs->anomalies = 0;
186	}
187
188	/*
189	 * The interrupt is considered stable enough to try to predict
190	 * the next event on it.
191	 */
192	irqs->valid = 1;
193
194	/*
195	 * Online average algorithm:
196	 *
197	 *  new_average = average + ((value - average) / count)
198	 *
199	 * The variance computation depends on the new average
200	 * to be computed here first.
201	 *
202	 */
203	irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
204
205	/*
206	 * Online variance algorithm:
207	 *
208	 *  new_variance = variance + (value - average) x (value - new_average)
209	 *
210	 * Warning: irqs->avg is updated with the line above, hence
211	 * 'interval - irqs->avg' is no longer equal to 'diff'
212	 */
213	irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
214
215	/*
216	 * Update the next event
217	 */
218	irqs->next_evt = ts + irqs->avg;
219}
220
221/**
222 * irq_timings_next_event - Return when the next event is supposed to arrive
223 *
224 * During the last busy cycle, the number of interrupts is incremented
225 * and stored in the irq_timings structure. This information is
226 * necessary to:
227 *
228 * - know if the index in the table wrapped up:
229 *
230 *      If more than the array size interrupts happened during the
231 *      last busy/idle cycle, the index wrapped up and we have to
232 *      begin with the next element in the array which is the last one
233 *      in the sequence, otherwise it is a the index 0.
234 *
235 * - have an indication of the interrupts activity on this CPU
236 *   (eg. irq/sec)
237 *
238 * The values are 'consumed' after inserting in the statistical model,
239 * thus the count is reinitialized.
240 *
241 * The array of values **must** be browsed in the time direction, the
242 * timestamp must increase between an element and the next one.
243 *
244 * Returns a nanosec time based estimation of the earliest interrupt,
245 * U64_MAX otherwise.
246 */
247u64 irq_timings_next_event(u64 now)
248{
249	struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
250	struct irqt_stat *irqs;
251	struct irqt_stat __percpu *s;
252	u64 ts, next_evt = U64_MAX;
253	int i, irq = 0;
254
255	/*
256	 * This function must be called with the local irq disabled in
257	 * order to prevent the timings circular buffer to be updated
258	 * while we are reading it.
259	 */
260	lockdep_assert_irqs_disabled();
261
262	/*
263	 * Number of elements in the circular buffer: If it happens it
264	 * was flushed before, then the number of elements could be
265	 * smaller than IRQ_TIMINGS_SIZE, so the count is used,
266	 * otherwise the array size is used as we wrapped. The index
267	 * begins from zero when we did not wrap. That could be done
268	 * in a nicer way with the proper circular array structure
269	 * type but with the cost of extra computation in the
270	 * interrupt handler hot path. We choose efficiency.
271	 *
272	 * Inject measured irq/timestamp to the statistical model
273	 * while decrementing the counter because we consume the data
274	 * from our circular buffer.
275	 */
276	for (i = irqts->count & IRQ_TIMINGS_MASK,
277		     irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
278	     irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
279
280		irq = irq_timing_decode(irqts->values[i], &ts);
281
282		s = idr_find(&irqt_stats, irq);
283		if (s) {
284			irqs = this_cpu_ptr(s);
285			irqs_update(irqs, ts);
286		}
287	}
288
289	/*
290	 * Look in the list of interrupts' statistics, the earliest
291	 * next event.
292	 */
293	idr_for_each_entry(&irqt_stats, s, i) {
294
295		irqs = this_cpu_ptr(s);
296
297		if (!irqs->valid)
298			continue;
299
300		if (irqs->next_evt <= now) {
301			irq = i;
302			next_evt = now;
303
304			/*
305			 * This interrupt mustn't use in the future
306			 * until new events occur and update the
307			 * statistics.
308			 */
309			irqs->valid = 0;
310			break;
311		}
312
313		if (irqs->next_evt < next_evt) {
314			irq = i;
315			next_evt = irqs->next_evt;
316		}
317	}
318
319	return next_evt;
320}
321
322void irq_timings_free(int irq)
323{
324	struct irqt_stat __percpu *s;
325
326	s = idr_find(&irqt_stats, irq);
327	if (s) {
328		free_percpu(s);
329		idr_remove(&irqt_stats, irq);
330	}
331}
332
333int irq_timings_alloc(int irq)
334{
335	struct irqt_stat __percpu *s;
336	int id;
337
338	/*
339	 * Some platforms can have the same private interrupt per cpu,
340	 * so this function may be be called several times with the
341	 * same interrupt number. Just bail out in case the per cpu
342	 * stat structure is already allocated.
343	 */
344	s = idr_find(&irqt_stats, irq);
345	if (s)
346		return 0;
347
348	s = alloc_percpu(*s);
349	if (!s)
350		return -ENOMEM;
351
352	idr_preload(GFP_KERNEL);
353	id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT);
354	idr_preload_end();
355
356	if (id < 0) {
357		free_percpu(s);
358		return id;
359	}
360
361	return 0;
362}