stats.h - kernel/sched/stats.h - Linux diff v6.13.7

  1/* SPDX-License-Identifier: GPL-2.0 */
  2#ifndef _KERNEL_STATS_H
  3#define _KERNEL_STATS_H
  4
  5#ifdef CONFIG_SCHEDSTATS
  6
  7extern struct static_key_false sched_schedstats;
  8
  9/*
 10 * Expects runqueue lock to be held for atomicity of update
 11 */
 12static inline void
 13rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
 14{
 15	if (rq) {
 16		rq->rq_sched_info.run_delay += delta;
 17		rq->rq_sched_info.pcount++;
 18	}
 19}
 20
 21/*
 22 * Expects runqueue lock to be held for atomicity of update
 23 */
 24static inline void
 25rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 26{
 27	if (rq)
 28		rq->rq_cpu_time += delta;
 29}
 30
 31static inline void
 32rq_sched_info_dequeue(struct rq *rq, unsigned long long delta)
 33{
 34	if (rq)
 35		rq->rq_sched_info.run_delay += delta;
 36}
 37#define   schedstat_enabled()		static_branch_unlikely(&sched_schedstats)
 38#define __schedstat_inc(var)		do { var++; } while (0)
 39#define   schedstat_inc(var)		do { if (schedstat_enabled()) { var++; } } while (0)
 40#define __schedstat_add(var, amt)	do { var += (amt); } while (0)
 41#define   schedstat_add(var, amt)	do { if (schedstat_enabled()) { var += (amt); } } while (0)
 42#define __schedstat_set(var, val)	do { var = (val); } while (0)
 43#define   schedstat_set(var, val)	do { if (schedstat_enabled()) { var = (val); } } while (0)
 44#define   schedstat_val(var)		(var)
 45#define   schedstat_val_or_zero(var)	((schedstat_enabled()) ? (var) : 0)
 46
 47void __update_stats_wait_start(struct rq *rq, struct task_struct *p,
 48			       struct sched_statistics *stats);
 49
 50void __update_stats_wait_end(struct rq *rq, struct task_struct *p,
 51			     struct sched_statistics *stats);
 52void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p,
 53				    struct sched_statistics *stats);
 54
 55static inline void
 56check_schedstat_required(void)
 57{
 58	if (schedstat_enabled())
 59		return;
 60
 61	/* Force schedstat enabled if a dependent tracepoint is active */
 62	if (trace_sched_stat_wait_enabled()    ||
 63	    trace_sched_stat_sleep_enabled()   ||
 64	    trace_sched_stat_iowait_enabled()  ||
 65	    trace_sched_stat_blocked_enabled() ||
 66	    trace_sched_stat_runtime_enabled())
 67		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and stat_runtime require the kernel parameter schedstats=enable or kernel.sched_schedstats=1\n");
 68}
 69
 70#else /* !CONFIG_SCHEDSTATS: */
 71
 72static inline void rq_sched_info_arrive  (struct rq *rq, unsigned long long delta) { }
 73static inline void rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) { }
 74static inline void rq_sched_info_depart  (struct rq *rq, unsigned long long delta) { }
 75# define   schedstat_enabled()		0
 76# define __schedstat_inc(var)		do { } while (0)
 77# define   schedstat_inc(var)		do { } while (0)
 78# define __schedstat_add(var, amt)	do { } while (0)
 79# define   schedstat_add(var, amt)	do { } while (0)
 80# define __schedstat_set(var, val)	do { } while (0)
 81# define   schedstat_set(var, val)	do { } while (0)
 82# define   schedstat_val(var)		0
 83# define   schedstat_val_or_zero(var)	0
 84
 85# define __update_stats_wait_start(rq, p, stats)       do { } while (0)
 86# define __update_stats_wait_end(rq, p, stats)         do { } while (0)
 87# define __update_stats_enqueue_sleeper(rq, p, stats)  do { } while (0)
 88# define check_schedstat_required()                    do { } while (0)
 89
 90#endif /* CONFIG_SCHEDSTATS */
 91
 92#ifdef CONFIG_FAIR_GROUP_SCHED
 93struct sched_entity_stats {
 94	struct sched_entity     se;
 95	struct sched_statistics stats;
 96} __no_randomize_layout;
 97#endif
 98
 99static inline struct sched_statistics *
100__schedstats_from_se(struct sched_entity *se)
101{
102#ifdef CONFIG_FAIR_GROUP_SCHED
103	if (!entity_is_task(se))
104		return &container_of(se, struct sched_entity_stats, se)->stats;
105#endif
106	return &task_of(se)->stats;
107}
108
109#ifdef CONFIG_PSI
110void psi_task_change(struct task_struct *task, int clear, int set);
111void psi_task_switch(struct task_struct *prev, struct task_struct *next,
112		     bool sleep);
113#ifdef CONFIG_IRQ_TIME_ACCOUNTING
114void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev);
115#else
116static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
117				       struct task_struct *prev) {}
118#endif /*CONFIG_IRQ_TIME_ACCOUNTING */
119/*
120 * PSI tracks state that persists across sleeps, such as iowaits and
121 * memory stalls. As a result, it has to distinguish between sleeps,
122 * where a task's runnable state changes, and migrations, where a task
123 * and its runnable state are being moved between CPUs and runqueues.
124 *
125 * A notable case is a task whose dequeue is delayed. PSI considers
126 * those sleeping, but because they are still on the runqueue they can
127 * go through migration requeues. In this case, *sleeping* states need
128 * to be transferred.
129 */
130static inline void psi_enqueue(struct task_struct *p, int flags)
131{
132	int clear = 0, set = 0;
133
134	if (static_branch_likely(&psi_disabled))
135		return;
136
137	/* Same runqueue, nothing changed for psi */
138	if (flags & ENQUEUE_RESTORE)
139		return;
140
141	/* psi_sched_switch() will handle the flags */
142	if (task_on_cpu(task_rq(p), p))
143		return;
144
145	if (p->se.sched_delayed) {
146		/* CPU migration of "sleeping" task */
147		SCHED_WARN_ON(!(flags & ENQUEUE_MIGRATED));
148		if (p->in_memstall)
149			set |= TSK_MEMSTALL;
150		if (p->in_iowait)
151			set |= TSK_IOWAIT;
152	} else if (flags & ENQUEUE_MIGRATED) {
153		/* CPU migration of runnable task */
154		set = TSK_RUNNING;
155		if (p->in_memstall)
156			set |= TSK_MEMSTALL | TSK_MEMSTALL_RUNNING;
157	} else {
158		/* Wakeup of new or sleeping task */
159		if (p->in_iowait)
160			clear |= TSK_IOWAIT;
161		set = TSK_RUNNING;
162		if (p->in_memstall)
163			set |= TSK_MEMSTALL_RUNNING;
164	}
165
166	psi_task_change(p, clear, set);
167}
168
169static inline void psi_dequeue(struct task_struct *p, int flags)
170{
 
 
171	if (static_branch_likely(&psi_disabled))
172		return;
173
174	/* Same runqueue, nothing changed for psi */
175	if (flags & DEQUEUE_SAVE)
176		return;
 
 
 
 
 
 
 
 
177
178	/*
179	 * A voluntary sleep is a dequeue followed by a task switch. To
180	 * avoid walking all ancestors twice, psi_task_switch() handles
181	 * TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU.
182	 * Do nothing here.
183	 */
184	if (flags & DEQUEUE_SLEEP)
185		return;
186
187	/*
188	 * When migrating a task to another CPU, clear all psi
189	 * state. The enqueue callback above will work it out.
190	 */
191	psi_task_change(p, p->psi_flags, 0);
192}
193
194static inline void psi_ttwu_dequeue(struct task_struct *p)
195{
196	if (static_branch_likely(&psi_disabled))
197		return;
198	/*
199	 * Is the task being migrated during a wakeup? Make sure to
200	 * deregister its sleep-persistent psi states from the old
201	 * queue, and let psi_enqueue() know it has to requeue.
202	 */
203	if (unlikely(p->psi_flags)) {
204		struct rq_flags rf;
205		struct rq *rq;
 
 
 
 
 
 
206
207		rq = __task_rq_lock(p, &rf);
208		psi_task_change(p, p->psi_flags, 0);
 
209		__task_rq_unlock(rq, &rf);
210	}
211}
212
213static inline void psi_sched_switch(struct task_struct *prev,
214				    struct task_struct *next,
215				    bool sleep)
216{
217	if (static_branch_likely(&psi_disabled))
218		return;
219
220	psi_task_switch(prev, next, sleep);
221}
222
 
 
 
 
 
 
 
 
223#else /* CONFIG_PSI */
224static inline void psi_enqueue(struct task_struct *p, bool migrate) {}
225static inline void psi_dequeue(struct task_struct *p, bool migrate) {}
226static inline void psi_ttwu_dequeue(struct task_struct *p) {}
227static inline void psi_sched_switch(struct task_struct *prev,
228				    struct task_struct *next,
229				    bool sleep) {}
230static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
231				       struct task_struct *prev) {}
232#endif /* CONFIG_PSI */
233
234#ifdef CONFIG_SCHED_INFO
 
 
 
 
 
235/*
236 * We are interested in knowing how long it was from the *first* time a
237 * task was queued to the time that it finally hit a CPU, we call this routine
238 * from dequeue_task() to account for possible rq->clock skew across CPUs. The
239 * delta taken on each CPU would annul the skew.
240 */
241static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
242{
243	unsigned long long delta = 0;
244
245	if (!t->sched_info.last_queued)
246		return;
247
248	delta = rq_clock(rq) - t->sched_info.last_queued;
249	t->sched_info.last_queued = 0;
250	t->sched_info.run_delay += delta;
251
252	rq_sched_info_dequeue(rq, delta);
253}
254
255/*
256 * Called when a task finally hits the CPU.  We can now calculate how
257 * long it was waiting to run.  We also note when it began so that we
258 * can keep stats on how long its time-slice is.
259 */
260static void sched_info_arrive(struct rq *rq, struct task_struct *t)
261{
262	unsigned long long now, delta = 0;
263
264	if (!t->sched_info.last_queued)
265		return;
266
267	now = rq_clock(rq);
268	delta = now - t->sched_info.last_queued;
269	t->sched_info.last_queued = 0;
270	t->sched_info.run_delay += delta;
271	t->sched_info.last_arrival = now;
272	t->sched_info.pcount++;
273
274	rq_sched_info_arrive(rq, delta);
275}
276
277/*
278 * This function is only called from enqueue_task(), but also only updates
279 * the timestamp if it is already not set.  It's assumed that
280 * sched_info_dequeue() will clear that stamp when appropriate.
281 */
282static inline void sched_info_enqueue(struct rq *rq, struct task_struct *t)
283{
284	if (!t->sched_info.last_queued)
285		t->sched_info.last_queued = rq_clock(rq);
 
 
286}
287
288/*
289 * Called when a process ceases being the active-running process involuntarily
290 * due, typically, to expiring its time slice (this may also be called when
291 * switching to the idle task).  Now we can calculate how long we ran.
292 * Also, if the process is still in the TASK_RUNNING state, call
293 * sched_info_enqueue() to mark that it has now again started waiting on
294 * the runqueue.
295 */
296static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
297{
298	unsigned long long delta = rq_clock(rq) - t->sched_info.last_arrival;
299
300	rq_sched_info_depart(rq, delta);
301
302	if (task_is_running(t))
303		sched_info_enqueue(rq, t);
304}
305
306/*
307 * Called when tasks are switched involuntarily due, typically, to expiring
308 * their time slice.  (This may also be called when switching to or from
309 * the idle task.)  We are only called when prev != next.
310 */
311static inline void
312sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
313{
314	/*
315	 * prev now departs the CPU.  It's not interesting to record
316	 * stats about how efficient we were at scheduling the idle
317	 * process, however.
318	 */
319	if (prev != rq->idle)
320		sched_info_depart(rq, prev);
321
322	if (next != rq->idle)
323		sched_info_arrive(rq, next);
324}
325
 
 
 
 
 
 
 
326#else /* !CONFIG_SCHED_INFO: */
327# define sched_info_enqueue(rq, t)	do { } while (0)
328# define sched_info_dequeue(rq, t)	do { } while (0)
 
 
 
329# define sched_info_switch(rq, t, next)	do { } while (0)
330#endif /* CONFIG_SCHED_INFO */
331
332#endif /* _KERNEL_STATS_H */

  1/* SPDX-License-Identifier: GPL-2.0 */
 
 
  2
  3#ifdef CONFIG_SCHEDSTATS
  4
 
 
  5/*
  6 * Expects runqueue lock to be held for atomicity of update
  7 */
  8static inline void
  9rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
 10{
 11	if (rq) {
 12		rq->rq_sched_info.run_delay += delta;
 13		rq->rq_sched_info.pcount++;
 14	}
 15}
 16
 17/*
 18 * Expects runqueue lock to be held for atomicity of update
 19 */
 20static inline void
 21rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 22{
 23	if (rq)
 24		rq->rq_cpu_time += delta;
 25}
 26
 27static inline void
 28rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 29{
 30	if (rq)
 31		rq->rq_sched_info.run_delay += delta;
 32}
 33#define   schedstat_enabled()		static_branch_unlikely(&sched_schedstats)
 34#define __schedstat_inc(var)		do { var++; } while (0)
 35#define   schedstat_inc(var)		do { if (schedstat_enabled()) { var++; } } while (0)
 36#define __schedstat_add(var, amt)	do { var += (amt); } while (0)
 37#define   schedstat_add(var, amt)	do { if (schedstat_enabled()) { var += (amt); } } while (0)
 38#define __schedstat_set(var, val)	do { var = (val); } while (0)
 39#define   schedstat_set(var, val)	do { if (schedstat_enabled()) { var = (val); } } while (0)
 40#define   schedstat_val(var)		(var)
 41#define   schedstat_val_or_zero(var)	((schedstat_enabled()) ? (var) : 0)
 42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 43#else /* !CONFIG_SCHEDSTATS: */
 
 44static inline void rq_sched_info_arrive  (struct rq *rq, unsigned long long delta) { }
 45static inline void rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) { }
 46static inline void rq_sched_info_depart  (struct rq *rq, unsigned long long delta) { }
 47# define   schedstat_enabled()		0
 48# define __schedstat_inc(var)		do { } while (0)
 49# define   schedstat_inc(var)		do { } while (0)
 50# define __schedstat_add(var, amt)	do { } while (0)
 51# define   schedstat_add(var, amt)	do { } while (0)
 52# define __schedstat_set(var, val)	do { } while (0)
 53# define   schedstat_set(var, val)	do { } while (0)
 54# define   schedstat_val(var)		0
 55# define   schedstat_val_or_zero(var)	0
 
 
 
 
 
 
 56#endif /* CONFIG_SCHEDSTATS */
 57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 58#ifdef CONFIG_PSI
 
 
 
 
 
 
 
 
 
 59/*
 60 * PSI tracks state that persists across sleeps, such as iowaits and
 61 * memory stalls. As a result, it has to distinguish between sleeps,
 62 * where a task's runnable state changes, and requeues, where a task
 63 * and its state are being moved between CPUs and runqueues.
 
 
 
 
 
 64 */
 65static inline void psi_enqueue(struct task_struct *p, bool wakeup)
 66{
 67	int clear = 0, set = TSK_RUNNING;
 68
 69	if (static_branch_likely(&psi_disabled))
 70		return;
 71
 72	if (!wakeup || p->sched_psi_wake_requeue) {
 
 
 
 
 
 
 
 
 
 
 73		if (p->in_memstall)
 74			set |= TSK_MEMSTALL;
 75		if (p->sched_psi_wake_requeue)
 76			p->sched_psi_wake_requeue = 0;
 
 
 
 
 
 77	} else {
 
 78		if (p->in_iowait)
 79			clear |= TSK_IOWAIT;
 
 
 
 80	}
 81
 82	psi_task_change(p, clear, set);
 83}
 84
 85static inline void psi_dequeue(struct task_struct *p, bool sleep)
 86{
 87	int clear = TSK_RUNNING, set = 0;
 88
 89	if (static_branch_likely(&psi_disabled))
 90		return;
 91
 92	if (!sleep) {
 93		if (p->in_memstall)
 94			clear |= TSK_MEMSTALL;
 95	} else {
 96		/*
 97		 * When a task sleeps, schedule() dequeues it before
 98		 * switching to the next one. Merge the clearing of
 99		 * TSK_RUNNING and TSK_ONCPU to save an unnecessary
100		 * psi_task_change() call in psi_sched_switch().
101		 */
102		clear |= TSK_ONCPU;
103
104		if (p->in_iowait)
105			set |= TSK_IOWAIT;
106	}
 
 
 
 
 
107
108	psi_task_change(p, clear, set);
 
 
 
 
109}
110
111static inline void psi_ttwu_dequeue(struct task_struct *p)
112{
113	if (static_branch_likely(&psi_disabled))
114		return;
115	/*
116	 * Is the task being migrated during a wakeup? Make sure to
117	 * deregister its sleep-persistent psi states from the old
118	 * queue, and let psi_enqueue() know it has to requeue.
119	 */
120	if (unlikely(p->in_iowait || p->in_memstall)) {
121		struct rq_flags rf;
122		struct rq *rq;
123		int clear = 0;
124
125		if (p->in_iowait)
126			clear |= TSK_IOWAIT;
127		if (p->in_memstall)
128			clear |= TSK_MEMSTALL;
129
130		rq = __task_rq_lock(p, &rf);
131		psi_task_change(p, clear, 0);
132		p->sched_psi_wake_requeue = 1;
133		__task_rq_unlock(rq, &rf);
134	}
135}
136
137static inline void psi_sched_switch(struct task_struct *prev,
138				    struct task_struct *next,
139				    bool sleep)
140{
141	if (static_branch_likely(&psi_disabled))
142		return;
143
144	psi_task_switch(prev, next, sleep);
145}
146
147static inline void psi_task_tick(struct rq *rq)
148{
149	if (static_branch_likely(&psi_disabled))
150		return;
151
152	if (unlikely(rq->curr->in_memstall))
153		psi_memstall_tick(rq->curr, cpu_of(rq));
154}
155#else /* CONFIG_PSI */
156static inline void psi_enqueue(struct task_struct *p, bool wakeup) {}
157static inline void psi_dequeue(struct task_struct *p, bool sleep) {}
158static inline void psi_ttwu_dequeue(struct task_struct *p) {}
159static inline void psi_sched_switch(struct task_struct *prev,
160				    struct task_struct *next,
161				    bool sleep) {}
162static inline void psi_task_tick(struct rq *rq) {}
 
163#endif /* CONFIG_PSI */
164
165#ifdef CONFIG_SCHED_INFO
166static inline void sched_info_reset_dequeued(struct task_struct *t)
167{
168	t->sched_info.last_queued = 0;
169}
170
171/*
172 * We are interested in knowing how long it was from the *first* time a
173 * task was queued to the time that it finally hit a CPU, we call this routine
174 * from dequeue_task() to account for possible rq->clock skew across CPUs. The
175 * delta taken on each CPU would annul the skew.
176 */
177static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t)
178{
179	unsigned long long now = rq_clock(rq), delta = 0;
180
181	if (sched_info_on()) {
182		if (t->sched_info.last_queued)
183			delta = now - t->sched_info.last_queued;
184	}
185	sched_info_reset_dequeued(t);
186	t->sched_info.run_delay += delta;
187
188	rq_sched_info_dequeued(rq, delta);
189}
190
191/*
192 * Called when a task finally hits the CPU.  We can now calculate how
193 * long it was waiting to run.  We also note when it began so that we
194 * can keep stats on how long its timeslice is.
195 */
196static void sched_info_arrive(struct rq *rq, struct task_struct *t)
197{
198	unsigned long long now = rq_clock(rq), delta = 0;
199
200	if (t->sched_info.last_queued)
201		delta = now - t->sched_info.last_queued;
202	sched_info_reset_dequeued(t);
 
 
 
203	t->sched_info.run_delay += delta;
204	t->sched_info.last_arrival = now;
205	t->sched_info.pcount++;
206
207	rq_sched_info_arrive(rq, delta);
208}
209
210/*
211 * This function is only called from enqueue_task(), but also only updates
212 * the timestamp if it is already not set.  It's assumed that
213 * sched_info_dequeued() will clear that stamp when appropriate.
214 */
215static inline void sched_info_queued(struct rq *rq, struct task_struct *t)
216{
217	if (sched_info_on()) {
218		if (!t->sched_info.last_queued)
219			t->sched_info.last_queued = rq_clock(rq);
220	}
221}
222
223/*
224 * Called when a process ceases being the active-running process involuntarily
225 * due, typically, to expiring its time slice (this may also be called when
226 * switching to the idle task).  Now we can calculate how long we ran.
227 * Also, if the process is still in the TASK_RUNNING state, call
228 * sched_info_queued() to mark that it has now again started waiting on
229 * the runqueue.
230 */
231static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
232{
233	unsigned long long delta = rq_clock(rq) - t->sched_info.last_arrival;
234
235	rq_sched_info_depart(rq, delta);
236
237	if (t->state == TASK_RUNNING)
238		sched_info_queued(rq, t);
239}
240
241/*
242 * Called when tasks are switched involuntarily due, typically, to expiring
243 * their time slice.  (This may also be called when switching to or from
244 * the idle task.)  We are only called when prev != next.
245 */
246static inline void
247__sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
248{
249	/*
250	 * prev now departs the CPU.  It's not interesting to record
251	 * stats about how efficient we were at scheduling the idle
252	 * process, however.
253	 */
254	if (prev != rq->idle)
255		sched_info_depart(rq, prev);
256
257	if (next != rq->idle)
258		sched_info_arrive(rq, next);
259}
260
261static inline void
262sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
263{
264	if (sched_info_on())
265		__sched_info_switch(rq, prev, next);
266}
267
268#else /* !CONFIG_SCHED_INFO: */
269# define sched_info_queued(rq, t)	do { } while (0)
270# define sched_info_reset_dequeued(t)	do { } while (0)
271# define sched_info_dequeued(rq, t)	do { } while (0)
272# define sched_info_depart(rq, t)	do { } while (0)
273# define sched_info_arrive(rq, next)	do { } while (0)
274# define sched_info_switch(rq, t, next)	do { } while (0)
275#endif /* CONFIG_SCHED_INFO */