Loading...
Note: File does not exist in v3.15.
1#!/usr/bin/env drgn
2#
3# Copyright (C) 2023 Tejun Heo <tj@kernel.org>
4# Copyright (C) 2023 Meta Platforms, Inc. and affiliates.
5
6desc = """
7This is a drgn script to monitor workqueues. For more info on drgn, visit
8https://github.com/osandov/drgn.
9
10 total Total number of work items executed by the workqueue.
11
12 infl The number of currently in-flight work items.
13
14 CPUtime Total CPU time consumed by the workqueue in seconds. This is
15 sampled from scheduler ticks and only provides ballpark
16 measurement. "nohz_full=" CPUs are excluded from measurement.
17
18 CPUitsv The number of times a concurrency-managed work item hogged CPU
19 longer than the threshold (workqueue.cpu_intensive_thresh_us)
20 and got excluded from concurrency management to avoid stalling
21 other work items.
22
23 CMW/RPR For per-cpu workqueues, the number of concurrency-management
24 wake-ups while executing a work item of the workqueue. For
25 unbound workqueues, the number of times a worker was repatriated
26 to its affinity scope after being migrated to an off-scope CPU by
27 the scheduler.
28
29 mayday The number of times the rescuer was requested while waiting for
30 new worker creation.
31
32 rescued The number of work items executed by the rescuer.
33"""
34
35import sys
36import signal
37import os
38import re
39import time
40import json
41
42import drgn
43from drgn.helpers.linux.list import list_for_each_entry,list_empty
44from drgn.helpers.linux.cpumask import for_each_possible_cpu
45
46import argparse
47parser = argparse.ArgumentParser(description=desc,
48 formatter_class=argparse.RawTextHelpFormatter)
49parser.add_argument('workqueue', metavar='REGEX', nargs='*',
50 help='Target workqueue name patterns (all if empty)')
51parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
52 help='Monitoring interval (0 to print once and exit)')
53parser.add_argument('-j', '--json', action='store_true',
54 help='Output in json')
55args = parser.parse_args()
56
57def err(s):
58 print(s, file=sys.stderr, flush=True)
59 sys.exit(1)
60
61workqueues = prog['workqueues']
62
63WQ_UNBOUND = prog['WQ_UNBOUND']
64WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM']
65
66PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution
67PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution
68PWQ_STAT_CPU_TIME = prog['PWQ_STAT_CPU_TIME'] # total CPU time consumed
69PWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations
70PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups
71PWQ_STAT_REPATRIATED = prog['PWQ_STAT_REPATRIATED'] # unbound workers brought back into scope
72PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer
73PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer
74PWQ_NR_STATS = prog['PWQ_NR_STATS']
75
76class WqStats:
77 def __init__(self, wq):
78 self.name = wq.name.string_().decode()
79 self.unbound = wq.flags & WQ_UNBOUND != 0
80 self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0
81 self.stats = [0] * PWQ_NR_STATS
82 for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'):
83 for i in range(PWQ_NR_STATS):
84 self.stats[i] += int(pwq.stats[i])
85
86 def dict(self, now):
87 return { 'timestamp' : now,
88 'name' : self.name,
89 'unbound' : self.unbound,
90 'mem_reclaim' : self.mem_reclaim,
91 'started' : self.stats[PWQ_STAT_STARTED],
92 'completed' : self.stats[PWQ_STAT_COMPLETED],
93 'cpu_time' : self.stats[PWQ_STAT_CPU_TIME],
94 'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE],
95 'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP],
96 'repatriated' : self.stats[PWQ_STAT_REPATRIATED],
97 'mayday' : self.stats[PWQ_STAT_MAYDAY],
98 'rescued' : self.stats[PWQ_STAT_RESCUED], }
99
100 def table_header_str():
101 return f'{"":>24} {"total":>8} {"infl":>5} {"CPUtime":>8} '\
102 f'{"CPUitsv":>7} {"CMW/RPR":>7} {"mayday":>7} {"rescued":>7}'
103
104 def table_row_str(self):
105 cpu_intensive = '-'
106 cmw_rpr = '-'
107 mayday = '-'
108 rescued = '-'
109
110 if self.unbound:
111 cmw_rpr = str(self.stats[PWQ_STAT_REPATRIATED]);
112 else:
113 cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE])
114 cmw_rpr = str(self.stats[PWQ_STAT_CM_WAKEUP])
115
116 if self.mem_reclaim:
117 mayday = str(self.stats[PWQ_STAT_MAYDAY])
118 rescued = str(self.stats[PWQ_STAT_RESCUED])
119
120 out = f'{self.name[-24:]:24} ' \
121 f'{self.stats[PWQ_STAT_STARTED]:8} ' \
122 f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
123 f'{self.stats[PWQ_STAT_CPU_TIME] / 1000000:8.1f} ' \
124 f'{cpu_intensive:>7} ' \
125 f'{cmw_rpr:>7} ' \
126 f'{mayday:>7} ' \
127 f'{rescued:>7} '
128 return out.rstrip(':')
129
130exit_req = False
131
132def sigint_handler(signr, frame):
133 global exit_req
134 exit_req = True
135
136def main():
137 # handle args
138 table_fmt = not args.json
139 interval = args.interval
140
141 re_str = None
142 if args.workqueue:
143 for r in args.workqueue:
144 if re_str is None:
145 re_str = r
146 else:
147 re_str += '|' + r
148
149 filter_re = re.compile(re_str) if re_str else None
150
151 # monitoring loop
152 signal.signal(signal.SIGINT, sigint_handler)
153
154 while not exit_req:
155 now = time.time()
156
157 if table_fmt:
158 print()
159 print(WqStats.table_header_str())
160
161 for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
162 stats = WqStats(wq)
163 if filter_re and not filter_re.search(stats.name):
164 continue
165 if table_fmt:
166 print(stats.table_row_str())
167 else:
168 print(stats.dict(now))
169
170 if interval == 0:
171 break
172 time.sleep(interval)
173
174if __name__ == "__main__":
175 main()