Loading...
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0-only
3#
4# top-like utility for displaying kvm statistics
5#
6# Copyright 2006-2008 Qumranet Technologies
7# Copyright 2008-2011 Red Hat, Inc.
8#
9# Authors:
10# Avi Kivity <avi@redhat.com>
11#
12"""The kvm_stat module outputs statistics about running KVM VMs
13
14Three different ways of output formatting are available:
15- as a top-like text ui
16- in a key -> value format
17- in an all keys, all values format
18
19The data is sampled from the KVM's debugfs entries and its perf events.
20"""
21from __future__ import print_function
22
23import curses
24import sys
25import locale
26import os
27import time
28import argparse
29import ctypes
30import fcntl
31import resource
32import struct
33import re
34import subprocess
35import signal
36from collections import defaultdict, namedtuple
37from functools import reduce
38from datetime import datetime
39
40VMX_EXIT_REASONS = {
41 'EXCEPTION_NMI': 0,
42 'EXTERNAL_INTERRUPT': 1,
43 'TRIPLE_FAULT': 2,
44 'INIT_SIGNAL': 3,
45 'SIPI_SIGNAL': 4,
46 'INTERRUPT_WINDOW': 7,
47 'NMI_WINDOW': 8,
48 'TASK_SWITCH': 9,
49 'CPUID': 10,
50 'HLT': 12,
51 'INVD': 13,
52 'INVLPG': 14,
53 'RDPMC': 15,
54 'RDTSC': 16,
55 'VMCALL': 18,
56 'VMCLEAR': 19,
57 'VMLAUNCH': 20,
58 'VMPTRLD': 21,
59 'VMPTRST': 22,
60 'VMREAD': 23,
61 'VMRESUME': 24,
62 'VMWRITE': 25,
63 'VMOFF': 26,
64 'VMON': 27,
65 'CR_ACCESS': 28,
66 'DR_ACCESS': 29,
67 'IO_INSTRUCTION': 30,
68 'MSR_READ': 31,
69 'MSR_WRITE': 32,
70 'INVALID_STATE': 33,
71 'MSR_LOAD_FAIL': 34,
72 'MWAIT_INSTRUCTION': 36,
73 'MONITOR_TRAP_FLAG': 37,
74 'MONITOR_INSTRUCTION': 39,
75 'PAUSE_INSTRUCTION': 40,
76 'MCE_DURING_VMENTRY': 41,
77 'TPR_BELOW_THRESHOLD': 43,
78 'APIC_ACCESS': 44,
79 'EOI_INDUCED': 45,
80 'GDTR_IDTR': 46,
81 'LDTR_TR': 47,
82 'EPT_VIOLATION': 48,
83 'EPT_MISCONFIG': 49,
84 'INVEPT': 50,
85 'RDTSCP': 51,
86 'PREEMPTION_TIMER': 52,
87 'INVVPID': 53,
88 'WBINVD': 54,
89 'XSETBV': 55,
90 'APIC_WRITE': 56,
91 'RDRAND': 57,
92 'INVPCID': 58,
93 'VMFUNC': 59,
94 'ENCLS': 60,
95 'RDSEED': 61,
96 'PML_FULL': 62,
97 'XSAVES': 63,
98 'XRSTORS': 64,
99 'UMWAIT': 67,
100 'TPAUSE': 68,
101 'BUS_LOCK': 74,
102 'NOTIFY': 75,
103}
104
105SVM_EXIT_REASONS = {
106 'READ_CR0': 0x000,
107 'READ_CR2': 0x002,
108 'READ_CR3': 0x003,
109 'READ_CR4': 0x004,
110 'READ_CR8': 0x008,
111 'WRITE_CR0': 0x010,
112 'WRITE_CR2': 0x012,
113 'WRITE_CR3': 0x013,
114 'WRITE_CR4': 0x014,
115 'WRITE_CR8': 0x018,
116 'READ_DR0': 0x020,
117 'READ_DR1': 0x021,
118 'READ_DR2': 0x022,
119 'READ_DR3': 0x023,
120 'READ_DR4': 0x024,
121 'READ_DR5': 0x025,
122 'READ_DR6': 0x026,
123 'READ_DR7': 0x027,
124 'WRITE_DR0': 0x030,
125 'WRITE_DR1': 0x031,
126 'WRITE_DR2': 0x032,
127 'WRITE_DR3': 0x033,
128 'WRITE_DR4': 0x034,
129 'WRITE_DR5': 0x035,
130 'WRITE_DR6': 0x036,
131 'WRITE_DR7': 0x037,
132 'EXCP_BASE': 0x040,
133 'LAST_EXCP': 0x05f,
134 'INTR': 0x060,
135 'NMI': 0x061,
136 'SMI': 0x062,
137 'INIT': 0x063,
138 'VINTR': 0x064,
139 'CR0_SEL_WRITE': 0x065,
140 'IDTR_READ': 0x066,
141 'GDTR_READ': 0x067,
142 'LDTR_READ': 0x068,
143 'TR_READ': 0x069,
144 'IDTR_WRITE': 0x06a,
145 'GDTR_WRITE': 0x06b,
146 'LDTR_WRITE': 0x06c,
147 'TR_WRITE': 0x06d,
148 'RDTSC': 0x06e,
149 'RDPMC': 0x06f,
150 'PUSHF': 0x070,
151 'POPF': 0x071,
152 'CPUID': 0x072,
153 'RSM': 0x073,
154 'IRET': 0x074,
155 'SWINT': 0x075,
156 'INVD': 0x076,
157 'PAUSE': 0x077,
158 'HLT': 0x078,
159 'INVLPG': 0x079,
160 'INVLPGA': 0x07a,
161 'IOIO': 0x07b,
162 'MSR': 0x07c,
163 'TASK_SWITCH': 0x07d,
164 'FERR_FREEZE': 0x07e,
165 'SHUTDOWN': 0x07f,
166 'VMRUN': 0x080,
167 'VMMCALL': 0x081,
168 'VMLOAD': 0x082,
169 'VMSAVE': 0x083,
170 'STGI': 0x084,
171 'CLGI': 0x085,
172 'SKINIT': 0x086,
173 'RDTSCP': 0x087,
174 'ICEBP': 0x088,
175 'WBINVD': 0x089,
176 'MONITOR': 0x08a,
177 'MWAIT': 0x08b,
178 'MWAIT_COND': 0x08c,
179 'XSETBV': 0x08d,
180 'RDPRU': 0x08e,
181 'EFER_WRITE_TRAP': 0x08f,
182 'CR0_WRITE_TRAP': 0x090,
183 'CR1_WRITE_TRAP': 0x091,
184 'CR2_WRITE_TRAP': 0x092,
185 'CR3_WRITE_TRAP': 0x093,
186 'CR4_WRITE_TRAP': 0x094,
187 'CR5_WRITE_TRAP': 0x095,
188 'CR6_WRITE_TRAP': 0x096,
189 'CR7_WRITE_TRAP': 0x097,
190 'CR8_WRITE_TRAP': 0x098,
191 'CR9_WRITE_TRAP': 0x099,
192 'CR10_WRITE_TRAP': 0x09a,
193 'CR11_WRITE_TRAP': 0x09b,
194 'CR12_WRITE_TRAP': 0x09c,
195 'CR13_WRITE_TRAP': 0x09d,
196 'CR14_WRITE_TRAP': 0x09e,
197 'CR15_WRITE_TRAP': 0x09f,
198 'INVPCID': 0x0a2,
199 'NPF': 0x400,
200 'AVIC_INCOMPLETE_IPI': 0x401,
201 'AVIC_UNACCELERATED_ACCESS': 0x402,
202 'VMGEXIT': 0x403,
203}
204
205# EC definition of HSR (from arch/arm64/include/asm/esr.h)
206AARCH64_EXIT_REASONS = {
207 'UNKNOWN': 0x00,
208 'WFx': 0x01,
209 'CP15_32': 0x03,
210 'CP15_64': 0x04,
211 'CP14_MR': 0x05,
212 'CP14_LS': 0x06,
213 'FP_ASIMD': 0x07,
214 'CP10_ID': 0x08,
215 'PAC': 0x09,
216 'CP14_64': 0x0C,
217 'BTI': 0x0D,
218 'ILL': 0x0E,
219 'SVC32': 0x11,
220 'HVC32': 0x12,
221 'SMC32': 0x13,
222 'SVC64': 0x15,
223 'HVC64': 0x16,
224 'SMC64': 0x17,
225 'SYS64': 0x18,
226 'SVE': 0x19,
227 'ERET': 0x1A,
228 'FPAC': 0x1C,
229 'SME': 0x1D,
230 'IMP_DEF': 0x1F,
231 'IABT_LOW': 0x20,
232 'IABT_CUR': 0x21,
233 'PC_ALIGN': 0x22,
234 'DABT_LOW': 0x24,
235 'DABT_CUR': 0x25,
236 'SP_ALIGN': 0x26,
237 'FP_EXC32': 0x28,
238 'FP_EXC64': 0x2C,
239 'SERROR': 0x2F,
240 'BREAKPT_LOW': 0x30,
241 'BREAKPT_CUR': 0x31,
242 'SOFTSTP_LOW': 0x32,
243 'SOFTSTP_CUR': 0x33,
244 'WATCHPT_LOW': 0x34,
245 'WATCHPT_CUR': 0x35,
246 'BKPT32': 0x38,
247 'VECTOR32': 0x3A,
248 'BRK64': 0x3C,
249}
250
251# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
252USERSPACE_EXIT_REASONS = {
253 'UNKNOWN': 0,
254 'EXCEPTION': 1,
255 'IO': 2,
256 'HYPERCALL': 3,
257 'DEBUG': 4,
258 'HLT': 5,
259 'MMIO': 6,
260 'IRQ_WINDOW_OPEN': 7,
261 'SHUTDOWN': 8,
262 'FAIL_ENTRY': 9,
263 'INTR': 10,
264 'SET_TPR': 11,
265 'TPR_ACCESS': 12,
266 'S390_SIEIC': 13,
267 'S390_RESET': 14,
268 'DCR': 15,
269 'NMI': 16,
270 'INTERNAL_ERROR': 17,
271 'OSI': 18,
272 'PAPR_HCALL': 19,
273 'S390_UCONTROL': 20,
274 'WATCHDOG': 21,
275 'S390_TSCH': 22,
276 'EPR': 23,
277 'SYSTEM_EVENT': 24,
278 'S390_STSI': 25,
279 'IOAPIC_EOI': 26,
280 'HYPERV': 27,
281 'ARM_NISV': 28,
282 'X86_RDMSR': 29,
283 'X86_WRMSR': 30,
284 'DIRTY_RING_FULL': 31,
285 'AP_RESET_HOLD': 32,
286 'X86_BUS_LOCK': 33,
287 'XEN': 34,
288 'RISCV_SBI': 35,
289 'RISCV_CSR': 36,
290 'NOTIFY': 37,
291}
292
293IOCTL_NUMBERS = {
294 'SET_FILTER': 0x40082406,
295 'ENABLE': 0x00002400,
296 'DISABLE': 0x00002401,
297 'RESET': 0x00002403,
298}
299
300signal_received = False
301
302ENCODING = locale.getpreferredencoding(False)
303TRACE_FILTER = re.compile(r'^[^\(]*$')
304
305
306class Arch(object):
307 """Encapsulates global architecture specific data.
308
309 Contains the performance event open syscall and ioctl numbers, as
310 well as the VM exit reasons for the architecture it runs on.
311
312 """
313 @staticmethod
314 def get_arch():
315 machine = os.uname()[4]
316
317 if machine.startswith('ppc'):
318 return ArchPPC()
319 elif machine.startswith('aarch64'):
320 return ArchA64()
321 elif machine.startswith('s390'):
322 return ArchS390()
323 else:
324 # X86_64
325 for line in open('/proc/cpuinfo'):
326 if not line.startswith('flags'):
327 continue
328
329 flags = line.split()
330 if 'vmx' in flags:
331 return ArchX86(VMX_EXIT_REASONS)
332 if 'svm' in flags:
333 return ArchX86(SVM_EXIT_REASONS)
334 return
335
336 def tracepoint_is_child(self, field):
337 if (TRACE_FILTER.match(field)):
338 return None
339 return field.split('(', 1)[0]
340
341
342class ArchX86(Arch):
343 def __init__(self, exit_reasons):
344 self.sc_perf_evt_open = 298
345 self.ioctl_numbers = IOCTL_NUMBERS
346 self.exit_reason_field = 'exit_reason'
347 self.exit_reasons = exit_reasons
348
349 def debugfs_is_child(self, field):
350 """ Returns name of parent if 'field' is a child, None otherwise """
351 return None
352
353
354class ArchPPC(Arch):
355 def __init__(self):
356 self.sc_perf_evt_open = 319
357 self.ioctl_numbers = IOCTL_NUMBERS
358 self.ioctl_numbers['ENABLE'] = 0x20002400
359 self.ioctl_numbers['DISABLE'] = 0x20002401
360 self.ioctl_numbers['RESET'] = 0x20002403
361
362 # PPC comes in 32 and 64 bit and some generated ioctl
363 # numbers depend on the wordsize.
364 char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
365 self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
366 self.exit_reason_field = 'exit_nr'
367 self.exit_reasons = {}
368
369 def debugfs_is_child(self, field):
370 """ Returns name of parent if 'field' is a child, None otherwise """
371 return None
372
373
374class ArchA64(Arch):
375 def __init__(self):
376 self.sc_perf_evt_open = 241
377 self.ioctl_numbers = IOCTL_NUMBERS
378 self.exit_reason_field = 'esr_ec'
379 self.exit_reasons = AARCH64_EXIT_REASONS
380
381 def debugfs_is_child(self, field):
382 """ Returns name of parent if 'field' is a child, None otherwise """
383 return None
384
385
386class ArchS390(Arch):
387 def __init__(self):
388 self.sc_perf_evt_open = 331
389 self.ioctl_numbers = IOCTL_NUMBERS
390 self.exit_reason_field = None
391 self.exit_reasons = None
392
393 def debugfs_is_child(self, field):
394 """ Returns name of parent if 'field' is a child, None otherwise """
395 if field.startswith('instruction_'):
396 return 'exit_instruction'
397
398
399ARCH = Arch.get_arch()
400
401
402class perf_event_attr(ctypes.Structure):
403 """Struct that holds the necessary data to set up a trace event.
404
405 For an extensive explanation see perf_event_open(2) and
406 include/uapi/linux/perf_event.h, struct perf_event_attr
407
408 All fields that are not initialized in the constructor are 0.
409
410 """
411 _fields_ = [('type', ctypes.c_uint32),
412 ('size', ctypes.c_uint32),
413 ('config', ctypes.c_uint64),
414 ('sample_freq', ctypes.c_uint64),
415 ('sample_type', ctypes.c_uint64),
416 ('read_format', ctypes.c_uint64),
417 ('flags', ctypes.c_uint64),
418 ('wakeup_events', ctypes.c_uint32),
419 ('bp_type', ctypes.c_uint32),
420 ('bp_addr', ctypes.c_uint64),
421 ('bp_len', ctypes.c_uint64),
422 ]
423
424 def __init__(self):
425 super(self.__class__, self).__init__()
426 self.type = PERF_TYPE_TRACEPOINT
427 self.size = ctypes.sizeof(self)
428 self.read_format = PERF_FORMAT_GROUP
429
430
431PERF_TYPE_TRACEPOINT = 2
432PERF_FORMAT_GROUP = 1 << 3
433
434
435class Group(object):
436 """Represents a perf event group."""
437
438 def __init__(self):
439 self.events = []
440
441 def add_event(self, event):
442 self.events.append(event)
443
444 def read(self):
445 """Returns a dict with 'event name: value' for all events in the
446 group.
447
448 Values are read by reading from the file descriptor of the
449 event that is the group leader. See perf_event_open(2) for
450 details.
451
452 Read format for the used event configuration is:
453 struct read_format {
454 u64 nr; /* The number of events */
455 struct {
456 u64 value; /* The value of the event */
457 } values[nr];
458 };
459
460 """
461 length = 8 * (1 + len(self.events))
462 read_format = 'xxxxxxxx' + 'Q' * len(self.events)
463 return dict(zip([event.name for event in self.events],
464 struct.unpack(read_format,
465 os.read(self.events[0].fd, length))))
466
467
468class Event(object):
469 """Represents a performance event and manages its life cycle."""
470 def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
471 trace_filter, trace_set='kvm'):
472 self.libc = ctypes.CDLL('libc.so.6', use_errno=True)
473 self.syscall = self.libc.syscall
474 self.name = name
475 self.fd = None
476 self._setup_event(group, trace_cpu, trace_pid, trace_point,
477 trace_filter, trace_set)
478
479 def __del__(self):
480 """Closes the event's file descriptor.
481
482 As no python file object was created for the file descriptor,
483 python will not reference count the descriptor and will not
484 close it itself automatically, so we do it.
485
486 """
487 if self.fd:
488 os.close(self.fd)
489
490 def _perf_event_open(self, attr, pid, cpu, group_fd, flags):
491 """Wrapper for the sys_perf_evt_open() syscall.
492
493 Used to set up performance events, returns a file descriptor or -1
494 on error.
495
496 Attributes are:
497 - syscall number
498 - struct perf_event_attr *
499 - pid or -1 to monitor all pids
500 - cpu number or -1 to monitor all cpus
501 - The file descriptor of the group leader or -1 to create a group.
502 - flags
503
504 """
505 return self.syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
506 ctypes.c_int(pid), ctypes.c_int(cpu),
507 ctypes.c_int(group_fd), ctypes.c_long(flags))
508
509 def _setup_event_attribute(self, trace_set, trace_point):
510 """Returns an initialized ctype perf_event_attr struct."""
511
512 id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
513 trace_point, 'id')
514
515 event_attr = perf_event_attr()
516 event_attr.config = int(open(id_path).read())
517 return event_attr
518
519 def _setup_event(self, group, trace_cpu, trace_pid, trace_point,
520 trace_filter, trace_set):
521 """Sets up the perf event in Linux.
522
523 Issues the syscall to register the event in the kernel and
524 then sets the optional filter.
525
526 """
527
528 event_attr = self._setup_event_attribute(trace_set, trace_point)
529
530 # First event will be group leader.
531 group_leader = -1
532
533 # All others have to pass the leader's descriptor instead.
534 if group.events:
535 group_leader = group.events[0].fd
536
537 fd = self._perf_event_open(event_attr, trace_pid,
538 trace_cpu, group_leader, 0)
539 if fd == -1:
540 err = ctypes.get_errno()
541 raise OSError(err, os.strerror(err),
542 'while calling sys_perf_event_open().')
543
544 if trace_filter:
545 fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
546 trace_filter)
547
548 self.fd = fd
549
550 def enable(self):
551 """Enables the trace event in the kernel.
552
553 Enabling the group leader makes reading counters from it and the
554 events under it possible.
555
556 """
557 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
558
559 def disable(self):
560 """Disables the trace event in the kernel.
561
562 Disabling the group leader makes reading all counters under it
563 impossible.
564
565 """
566 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
567
568 def reset(self):
569 """Resets the count of the trace event in the kernel."""
570 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
571
572
573class Provider(object):
574 """Encapsulates functionalities used by all providers."""
575 def __init__(self, pid):
576 self.child_events = False
577 self.pid = pid
578
579 @staticmethod
580 def is_field_wanted(fields_filter, field):
581 """Indicate whether field is valid according to fields_filter."""
582 if not fields_filter:
583 return True
584 return re.match(fields_filter, field) is not None
585
586 @staticmethod
587 def walkdir(path):
588 """Returns os.walk() data for specified directory.
589
590 As it is only a wrapper it returns the same 3-tuple of (dirpath,
591 dirnames, filenames).
592 """
593 return next(os.walk(path))
594
595
596class TracepointProvider(Provider):
597 """Data provider for the stats class.
598
599 Manages the events/groups from which it acquires its data.
600
601 """
602 def __init__(self, pid, fields_filter):
603 self.group_leaders = []
604 self.filters = self._get_filters()
605 self.update_fields(fields_filter)
606 super(TracepointProvider, self).__init__(pid)
607
608 @staticmethod
609 def _get_filters():
610 """Returns a dict of trace events, their filter ids and
611 the values that can be filtered.
612
613 Trace events can be filtered for special values by setting a
614 filter string via an ioctl. The string normally has the format
615 identifier==value. For each filter a new event will be created, to
616 be able to distinguish the events.
617
618 """
619 filters = {}
620 filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
621 if ARCH.exit_reason_field and ARCH.exit_reasons:
622 filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons)
623 return filters
624
625 def _get_available_fields(self):
626 """Returns a list of available events of format 'event name(filter
627 name)'.
628
629 All available events have directories under
630 /sys/kernel/tracing/events/ which export information
631 about the specific event. Therefore, listing the dirs gives us
632 a list of all available events.
633
634 Some events like the vm exit reasons can be filtered for
635 specific values. To take account for that, the routine below
636 creates special fields with the following format:
637 event name(filter name)
638
639 """
640 path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
641 fields = self.walkdir(path)[1]
642 extra = []
643 for field in fields:
644 if field in self.filters:
645 filter_name_, filter_dicts = self.filters[field]
646 for name in filter_dicts:
647 extra.append(field + '(' + name + ')')
648 fields += extra
649 return fields
650
651 def update_fields(self, fields_filter):
652 """Refresh fields, applying fields_filter"""
653 self.fields = [field for field in self._get_available_fields()
654 if self.is_field_wanted(fields_filter, field)]
655 # add parents for child fields - otherwise we won't see any output!
656 for field in self._fields:
657 parent = ARCH.tracepoint_is_child(field)
658 if (parent and parent not in self._fields):
659 self.fields.append(parent)
660
661 @staticmethod
662 def _get_online_cpus():
663 """Returns a list of cpu id integers."""
664 def parse_int_list(list_string):
665 """Returns an int list from a string of comma separated integers and
666 integer ranges."""
667 integers = []
668 members = list_string.split(',')
669
670 for member in members:
671 if '-' not in member:
672 integers.append(int(member))
673 else:
674 int_range = member.split('-')
675 integers.extend(range(int(int_range[0]),
676 int(int_range[1]) + 1))
677
678 return integers
679
680 with open('/sys/devices/system/cpu/online') as cpu_list:
681 cpu_string = cpu_list.readline()
682 return parse_int_list(cpu_string)
683
684 def _setup_traces(self):
685 """Creates all event and group objects needed to be able to retrieve
686 data."""
687 fields = self._get_available_fields()
688 if self._pid > 0:
689 # Fetch list of all threads of the monitored pid, as qemu
690 # starts a thread for each vcpu.
691 path = os.path.join('/proc', str(self._pid), 'task')
692 groupids = self.walkdir(path)[1]
693 else:
694 groupids = self._get_online_cpus()
695
696 # The constant is needed as a buffer for python libs, std
697 # streams and other files that the script opens.
698 newlim = len(groupids) * len(fields) + 50
699 try:
700 softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
701
702 if hardlim < newlim:
703 # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
704 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
705 else:
706 # Raising the soft limit is sufficient.
707 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
708
709 except ValueError:
710 sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
711
712 for groupid in groupids:
713 group = Group()
714 for name in fields:
715 tracepoint = name
716 tracefilter = None
717 match = re.match(r'(.*)\((.*)\)', name)
718 if match:
719 tracepoint, sub = match.groups()
720 tracefilter = ('%s==%d\0' %
721 (self.filters[tracepoint][0],
722 self.filters[tracepoint][1][sub]))
723
724 # From perf_event_open(2):
725 # pid > 0 and cpu == -1
726 # This measures the specified process/thread on any CPU.
727 #
728 # pid == -1 and cpu >= 0
729 # This measures all processes/threads on the specified CPU.
730 trace_cpu = groupid if self._pid == 0 else -1
731 trace_pid = int(groupid) if self._pid != 0 else -1
732
733 group.add_event(Event(name=name,
734 group=group,
735 trace_cpu=trace_cpu,
736 trace_pid=trace_pid,
737 trace_point=tracepoint,
738 trace_filter=tracefilter))
739
740 self.group_leaders.append(group)
741
742 @property
743 def fields(self):
744 return self._fields
745
746 @fields.setter
747 def fields(self, fields):
748 """Enables/disables the (un)wanted events"""
749 self._fields = fields
750 for group in self.group_leaders:
751 for index, event in enumerate(group.events):
752 if event.name in fields:
753 event.reset()
754 event.enable()
755 else:
756 # Do not disable the group leader.
757 # It would disable all of its events.
758 if index != 0:
759 event.disable()
760
761 @property
762 def pid(self):
763 return self._pid
764
765 @pid.setter
766 def pid(self, pid):
767 """Changes the monitored pid by setting new traces."""
768 self._pid = pid
769 # The garbage collector will get rid of all Event/Group
770 # objects and open files after removing the references.
771 self.group_leaders = []
772 self._setup_traces()
773 self.fields = self._fields
774
775 def read(self, by_guest=0):
776 """Returns 'event name: current value' for all enabled events."""
777 ret = defaultdict(int)
778 for group in self.group_leaders:
779 for name, val in group.read().items():
780 if name not in self._fields:
781 continue
782 parent = ARCH.tracepoint_is_child(name)
783 if parent:
784 name += ' ' + parent
785 ret[name] += val
786 return ret
787
788 def reset(self):
789 """Reset all field counters"""
790 for group in self.group_leaders:
791 for event in group.events:
792 event.reset()
793
794
795class DebugfsProvider(Provider):
796 """Provides data from the files that KVM creates in the kvm debugfs
797 folder."""
798 def __init__(self, pid, fields_filter, include_past):
799 self.update_fields(fields_filter)
800 self._baseline = {}
801 self.do_read = True
802 self.paths = []
803 super(DebugfsProvider, self).__init__(pid)
804 if include_past:
805 self._restore()
806
807 def _get_available_fields(self):
808 """"Returns a list of available fields.
809
810 The fields are all available KVM debugfs files
811
812 """
813 exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns', 'halt_wait_ns']
814 fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2]
815 if field not in exempt_list]
816
817 return fields
818
819 def update_fields(self, fields_filter):
820 """Refresh fields, applying fields_filter"""
821 self._fields = [field for field in self._get_available_fields()
822 if self.is_field_wanted(fields_filter, field)]
823 # add parents for child fields - otherwise we won't see any output!
824 for field in self._fields:
825 parent = ARCH.debugfs_is_child(field)
826 if (parent and parent not in self._fields):
827 self.fields.append(parent)
828
829 @property
830 def fields(self):
831 return self._fields
832
833 @fields.setter
834 def fields(self, fields):
835 self._fields = fields
836 self.reset()
837
838 @property
839 def pid(self):
840 return self._pid
841
842 @pid.setter
843 def pid(self, pid):
844 self._pid = pid
845 if pid != 0:
846 vms = self.walkdir(PATH_DEBUGFS_KVM)[1]
847 if len(vms) == 0:
848 self.do_read = False
849
850 self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms))
851
852 else:
853 self.paths = []
854 self.do_read = True
855
856 def _verify_paths(self):
857 """Remove invalid paths"""
858 for path in self.paths:
859 if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)):
860 self.paths.remove(path)
861 continue
862
863 def read(self, reset=0, by_guest=0):
864 """Returns a dict with format:'file name / field -> current value'.
865
866 Parameter 'reset':
867 0 plain read
868 1 reset field counts to 0
869 2 restore the original field counts
870
871 """
872 results = {}
873
874 # If no debugfs filtering support is available, then don't read.
875 if not self.do_read:
876 return results
877 self._verify_paths()
878
879 paths = self.paths
880 if self._pid == 0:
881 paths = []
882 for entry in os.walk(PATH_DEBUGFS_KVM):
883 for dir in entry[1]:
884 paths.append(dir)
885 for path in paths:
886 for field in self._fields:
887 value = self._read_field(field, path)
888 key = path + field
889 if reset == 1:
890 self._baseline[key] = value
891 if reset == 2:
892 self._baseline[key] = 0
893 if self._baseline.get(key, -1) == -1:
894 self._baseline[key] = value
895 parent = ARCH.debugfs_is_child(field)
896 if parent:
897 field = field + ' ' + parent
898 else:
899 if by_guest:
900 field = key.split('-')[0] # set 'field' to 'pid'
901 increment = value - self._baseline.get(key, 0)
902 if field in results:
903 results[field] += increment
904 else:
905 results[field] = increment
906
907 return results
908
909 def _read_field(self, field, path):
910 """Returns the value of a single field from a specific VM."""
911 try:
912 return int(open(os.path.join(PATH_DEBUGFS_KVM,
913 path,
914 field))
915 .read())
916 except IOError:
917 return 0
918
919 def reset(self):
920 """Reset field counters"""
921 self._baseline = {}
922 self.read(1)
923
924 def _restore(self):
925 """Reset field counters"""
926 self._baseline = {}
927 self.read(2)
928
929
930EventStat = namedtuple('EventStat', ['value', 'delta'])
931
932
933class Stats(object):
934 """Manages the data providers and the data they provide.
935
936 It is used to set filters on the provider's data and collect all
937 provider data.
938
939 """
940 def __init__(self, options):
941 self.providers = self._get_providers(options)
942 self._pid_filter = options.pid
943 self._fields_filter = options.fields
944 self.values = {}
945 self._child_events = False
946
947 def _get_providers(self, options):
948 """Returns a list of data providers depending on the passed options."""
949 providers = []
950
951 if options.debugfs:
952 providers.append(DebugfsProvider(options.pid, options.fields,
953 options.debugfs_include_past))
954 if options.tracepoints or not providers:
955 providers.append(TracepointProvider(options.pid, options.fields))
956
957 return providers
958
959 def _update_provider_filters(self):
960 """Propagates fields filters to providers."""
961 # As we reset the counters when updating the fields we can
962 # also clear the cache of old values.
963 self.values = {}
964 for provider in self.providers:
965 provider.update_fields(self._fields_filter)
966
967 def reset(self):
968 self.values = {}
969 for provider in self.providers:
970 provider.reset()
971
972 @property
973 def fields_filter(self):
974 return self._fields_filter
975
976 @fields_filter.setter
977 def fields_filter(self, fields_filter):
978 if fields_filter != self._fields_filter:
979 self._fields_filter = fields_filter
980 self._update_provider_filters()
981
982 @property
983 def pid_filter(self):
984 return self._pid_filter
985
986 @pid_filter.setter
987 def pid_filter(self, pid):
988 if pid != self._pid_filter:
989 self._pid_filter = pid
990 self.values = {}
991 for provider in self.providers:
992 provider.pid = self._pid_filter
993
994 @property
995 def child_events(self):
996 return self._child_events
997
998 @child_events.setter
999 def child_events(self, val):
1000 self._child_events = val
1001 for provider in self.providers:
1002 provider.child_events = val
1003
1004 def get(self, by_guest=0):
1005 """Returns a dict with field -> (value, delta to last value) of all
1006 provider data.
1007 Key formats:
1008 * plain: 'key' is event name
1009 * child-parent: 'key' is in format '<child> <parent>'
1010 * pid: 'key' is the pid of the guest, and the record contains the
1011 aggregated event data
1012 These formats are generated by the providers, and handled in class TUI.
1013 """
1014 for provider in self.providers:
1015 new = provider.read(by_guest=by_guest)
1016 for key in new:
1017 oldval = self.values.get(key, EventStat(0, 0)).value
1018 newval = new.get(key, 0)
1019 newdelta = newval - oldval
1020 self.values[key] = EventStat(newval, newdelta)
1021 return self.values
1022
1023 def toggle_display_guests(self, to_pid):
1024 """Toggle between collection of stats by individual event and by
1025 guest pid
1026
1027 Events reported by DebugfsProvider change when switching to/from
1028 reading by guest values. Hence we have to remove the excess event
1029 names from self.values.
1030
1031 """
1032 if any(isinstance(ins, TracepointProvider) for ins in self.providers):
1033 return 1
1034 if to_pid:
1035 for provider in self.providers:
1036 if isinstance(provider, DebugfsProvider):
1037 for key in provider.fields:
1038 if key in self.values.keys():
1039 del self.values[key]
1040 else:
1041 oldvals = self.values.copy()
1042 for key in oldvals:
1043 if key.isdigit():
1044 del self.values[key]
1045 # Update oldval (see get())
1046 self.get(to_pid)
1047 return 0
1048
1049
1050DELAY_DEFAULT = 3.0
1051MAX_GUEST_NAME_LEN = 48
1052MAX_REGEX_LEN = 44
1053SORT_DEFAULT = 0
1054MIN_DELAY = 0.1
1055MAX_DELAY = 25.5
1056
1057
1058class Tui(object):
1059 """Instruments curses to draw a nice text ui."""
1060 def __init__(self, stats, opts):
1061 self.stats = stats
1062 self.screen = None
1063 self._delay_initial = 0.25
1064 self._delay_regular = opts.set_delay
1065 self._sorting = SORT_DEFAULT
1066 self._display_guests = 0
1067
1068 def __enter__(self):
1069 """Initialises curses for later use. Based on curses.wrapper
1070 implementation from the Python standard library."""
1071 self.screen = curses.initscr()
1072 curses.noecho()
1073 curses.cbreak()
1074
1075 # The try/catch works around a minor bit of
1076 # over-conscientiousness in the curses module, the error
1077 # return from C start_color() is ignorable.
1078 try:
1079 curses.start_color()
1080 except curses.error:
1081 pass
1082
1083 # Hide cursor in extra statement as some monochrome terminals
1084 # might support hiding but not colors.
1085 try:
1086 curses.curs_set(0)
1087 except curses.error:
1088 pass
1089
1090 curses.use_default_colors()
1091 return self
1092
1093 def __exit__(self, *exception):
1094 """Resets the terminal to its normal state. Based on curses.wrapper
1095 implementation from the Python standard library."""
1096 if self.screen:
1097 self.screen.keypad(0)
1098 curses.echo()
1099 curses.nocbreak()
1100 curses.endwin()
1101
1102 @staticmethod
1103 def get_all_gnames():
1104 """Returns a list of (pid, gname) tuples of all running guests"""
1105 res = []
1106 try:
1107 child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'],
1108 stdout=subprocess.PIPE)
1109 except:
1110 raise Exception
1111 for line in child.stdout:
1112 line = line.decode(ENCODING).lstrip().split(' ', 1)
1113 # perform a sanity check before calling the more expensive
1114 # function to possibly extract the guest name
1115 if ' -name ' in line[1]:
1116 res.append((line[0], Tui.get_gname_from_pid(line[0])))
1117 child.stdout.close()
1118
1119 return res
1120
1121 def _print_all_gnames(self, row):
1122 """Print a list of all running guests along with their pids."""
1123 self.screen.addstr(row, 2, '%8s %-60s' %
1124 ('Pid', 'Guest Name (fuzzy list, might be '
1125 'inaccurate!)'),
1126 curses.A_UNDERLINE)
1127 row += 1
1128 try:
1129 for line in self.get_all_gnames():
1130 self.screen.addstr(row, 2, '%8s %-60s' % (line[0], line[1]))
1131 row += 1
1132 if row >= self.screen.getmaxyx()[0]:
1133 break
1134 except Exception:
1135 self.screen.addstr(row + 1, 2, 'Not available')
1136
1137 @staticmethod
1138 def get_pid_from_gname(gname):
1139 """Fuzzy function to convert guest name to QEMU process pid.
1140
1141 Returns a list of potential pids, can be empty if no match found.
1142 Throws an exception on processing errors.
1143
1144 """
1145 pids = []
1146 for line in Tui.get_all_gnames():
1147 if gname == line[1]:
1148 pids.append(int(line[0]))
1149
1150 return pids
1151
1152 @staticmethod
1153 def get_gname_from_pid(pid):
1154 """Returns the guest name for a QEMU process pid.
1155
1156 Extracts the guest name from the QEMU comma line by processing the
1157 '-name' option. Will also handle names specified out of sequence.
1158
1159 """
1160 name = ''
1161 try:
1162 line = open('/proc/{}/cmdline'
1163 .format(pid), 'r').read().split('\0')
1164 parms = line[line.index('-name') + 1].split(',')
1165 while '' in parms:
1166 # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results
1167 # in # ['foo', '', 'bar'], which we revert here
1168 idx = parms.index('')
1169 parms[idx - 1] += ',' + parms[idx + 1]
1170 del parms[idx:idx+2]
1171 # the '-name' switch allows for two ways to specify the guest name,
1172 # where the plain name overrides the name specified via 'guest='
1173 for arg in parms:
1174 if '=' not in arg:
1175 name = arg
1176 break
1177 if arg[:6] == 'guest=':
1178 name = arg[6:]
1179 except (ValueError, IOError, IndexError):
1180 pass
1181
1182 return name
1183
1184 def _update_pid(self, pid):
1185 """Propagates pid selection to stats object."""
1186 self.screen.addstr(4, 1, 'Updating pid filter...')
1187 self.screen.refresh()
1188 self.stats.pid_filter = pid
1189
1190 def _refresh_header(self, pid=None):
1191 """Refreshes the header."""
1192 if pid is None:
1193 pid = self.stats.pid_filter
1194 self.screen.erase()
1195 gname = self.get_gname_from_pid(pid)
1196 self._gname = gname
1197 if gname:
1198 gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
1199 if len(gname) > MAX_GUEST_NAME_LEN
1200 else gname))
1201 if pid > 0:
1202 self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname)
1203 else:
1204 self._headline = 'kvm statistics - summary'
1205 self.screen.addstr(0, 0, self._headline, curses.A_BOLD)
1206 if self.stats.fields_filter:
1207 regex = self.stats.fields_filter
1208 if len(regex) > MAX_REGEX_LEN:
1209 regex = regex[:MAX_REGEX_LEN] + '...'
1210 self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
1211 if self._display_guests:
1212 col_name = 'Guest Name'
1213 else:
1214 col_name = 'Event'
1215 self.screen.addstr(2, 1, '%-40s %10s%7s %8s' %
1216 (col_name, 'Total', '%Total', 'CurAvg/s'),
1217 curses.A_STANDOUT)
1218 self.screen.addstr(4, 1, 'Collecting data...')
1219 self.screen.refresh()
1220
1221 def _refresh_body(self, sleeptime):
1222 def insert_child(sorted_items, child, values, parent):
1223 num = len(sorted_items)
1224 for i in range(0, num):
1225 # only add child if parent is present
1226 if parent.startswith(sorted_items[i][0]):
1227 sorted_items.insert(i + 1, (' ' + child, values))
1228
1229 def get_sorted_events(self, stats):
1230 """ separate parent and child events """
1231 if self._sorting == SORT_DEFAULT:
1232 def sortkey(pair):
1233 # sort by (delta value, overall value)
1234 v = pair[1]
1235 return (v.delta, v.value)
1236 else:
1237 def sortkey(pair):
1238 # sort by overall value
1239 v = pair[1]
1240 return v.value
1241
1242 childs = []
1243 sorted_items = []
1244 # we can't rule out child events to appear prior to parents even
1245 # when sorted - separate out all children first, and add in later
1246 for key, values in sorted(stats.items(), key=sortkey,
1247 reverse=True):
1248 if values == (0, 0):
1249 continue
1250 if key.find(' ') != -1:
1251 if not self.stats.child_events:
1252 continue
1253 childs.insert(0, (key, values))
1254 else:
1255 sorted_items.append((key, values))
1256 if self.stats.child_events:
1257 for key, values in childs:
1258 (child, parent) = key.split(' ')
1259 insert_child(sorted_items, child, values, parent)
1260
1261 return sorted_items
1262
1263 if not self._is_running_guest(self.stats.pid_filter):
1264 if self._gname:
1265 try: # ...to identify the guest by name in case it's back
1266 pids = self.get_pid_from_gname(self._gname)
1267 if len(pids) == 1:
1268 self._refresh_header(pids[0])
1269 self._update_pid(pids[0])
1270 return
1271 except:
1272 pass
1273 self._display_guest_dead()
1274 # leave final data on screen
1275 return
1276 row = 3
1277 self.screen.move(row, 0)
1278 self.screen.clrtobot()
1279 stats = self.stats.get(self._display_guests)
1280 total = 0.
1281 ctotal = 0.
1282 for key, values in stats.items():
1283 if self._display_guests:
1284 if self.get_gname_from_pid(key):
1285 total += values.value
1286 continue
1287 if not key.find(' ') != -1:
1288 total += values.value
1289 else:
1290 ctotal += values.value
1291 if total == 0.:
1292 # we don't have any fields, or all non-child events are filtered
1293 total = ctotal
1294
1295 # print events
1296 tavg = 0
1297 tcur = 0
1298 guest_removed = False
1299 for key, values in get_sorted_events(self, stats):
1300 if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
1301 break
1302 if self._display_guests:
1303 key = self.get_gname_from_pid(key)
1304 if not key:
1305 continue
1306 cur = int(round(values.delta / sleeptime)) if values.delta else 0
1307 if cur < 0:
1308 guest_removed = True
1309 continue
1310 if key[0] != ' ':
1311 if values.delta:
1312 tcur += values.delta
1313 ptotal = values.value
1314 ltotal = total
1315 else:
1316 ltotal = ptotal
1317 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key,
1318 values.value,
1319 values.value * 100 / float(ltotal), cur))
1320 row += 1
1321 if row == 3:
1322 if guest_removed:
1323 self.screen.addstr(4, 1, 'Guest removed, updating...')
1324 else:
1325 self.screen.addstr(4, 1, 'No matching events reported yet')
1326 if row > 4:
1327 tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
1328 self.screen.addstr(row, 1, '%-40s %10d %8s' %
1329 ('Total', total, tavg), curses.A_BOLD)
1330 self.screen.refresh()
1331
1332 def _display_guest_dead(self):
1333 marker = ' Guest is DEAD '
1334 y = min(len(self._headline), 80 - len(marker))
1335 self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT)
1336
1337 def _show_msg(self, text):
1338 """Display message centered text and exit on key press"""
1339 hint = 'Press any key to continue'
1340 curses.cbreak()
1341 self.screen.erase()
1342 (x, term_width) = self.screen.getmaxyx()
1343 row = 2
1344 for line in text:
1345 start = (term_width - len(line)) // 2
1346 self.screen.addstr(row, start, line)
1347 row += 1
1348 self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint,
1349 curses.A_STANDOUT)
1350 self.screen.getkey()
1351
1352 def _show_help_interactive(self):
1353 """Display help with list of interactive commands"""
1354 msg = (' b toggle events by guests (debugfs only, honors'
1355 ' filters)',
1356 ' c clear filter',
1357 ' f filter by regular expression',
1358 ' g filter by guest name/PID',
1359 ' h display interactive commands reference',
1360 ' o toggle sorting order (Total vs CurAvg/s)',
1361 ' p filter by guest name/PID',
1362 ' q quit',
1363 ' r reset stats',
1364 ' s set delay between refreshs (value range: '
1365 '%s-%s secs)' % (MIN_DELAY, MAX_DELAY),
1366 ' x toggle reporting of stats for individual child trace'
1367 ' events',
1368 'Any other key refreshes statistics immediately')
1369 curses.cbreak()
1370 self.screen.erase()
1371 self.screen.addstr(0, 0, "Interactive commands reference",
1372 curses.A_BOLD)
1373 self.screen.addstr(2, 0, "Press any key to exit", curses.A_STANDOUT)
1374 row = 4
1375 for line in msg:
1376 self.screen.addstr(row, 0, line)
1377 row += 1
1378 self.screen.getkey()
1379 self._refresh_header()
1380
1381 def _show_filter_selection(self):
1382 """Draws filter selection mask.
1383
1384 Asks for a valid regex and sets the fields filter accordingly.
1385
1386 """
1387 msg = ''
1388 while True:
1389 self.screen.erase()
1390 self.screen.addstr(0, 0,
1391 "Show statistics for events matching a regex.",
1392 curses.A_BOLD)
1393 self.screen.addstr(2, 0,
1394 "Current regex: {0}"
1395 .format(self.stats.fields_filter))
1396 self.screen.addstr(5, 0, msg)
1397 self.screen.addstr(3, 0, "New regex: ")
1398 curses.echo()
1399 regex = self.screen.getstr().decode(ENCODING)
1400 curses.noecho()
1401 if len(regex) == 0:
1402 self.stats.fields_filter = ''
1403 self._refresh_header()
1404 return
1405 try:
1406 re.compile(regex)
1407 self.stats.fields_filter = regex
1408 self._refresh_header()
1409 return
1410 except re.error:
1411 msg = '"' + regex + '": Not a valid regular expression'
1412 continue
1413
1414 def _show_set_update_interval(self):
1415 """Draws update interval selection mask."""
1416 msg = ''
1417 while True:
1418 self.screen.erase()
1419 self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).'
1420 % DELAY_DEFAULT, curses.A_BOLD)
1421 self.screen.addstr(4, 0, msg)
1422 self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %
1423 self._delay_regular)
1424 curses.echo()
1425 val = self.screen.getstr().decode(ENCODING)
1426 curses.noecho()
1427
1428 try:
1429 if len(val) > 0:
1430 delay = float(val)
1431 err = is_delay_valid(delay)
1432 if err is not None:
1433 msg = err
1434 continue
1435 else:
1436 delay = DELAY_DEFAULT
1437 self._delay_regular = delay
1438 break
1439
1440 except ValueError:
1441 msg = '"' + str(val) + '": Invalid value'
1442 self._refresh_header()
1443
1444 def _is_running_guest(self, pid):
1445 """Check if pid is still a running process."""
1446 if not pid:
1447 return True
1448 return os.path.isdir(os.path.join('/proc/', str(pid)))
1449
1450 def _show_vm_selection_by_guest(self):
1451 """Draws guest selection mask.
1452
1453 Asks for a guest name or pid until a valid guest name or '' is entered.
1454
1455 """
1456 msg = ''
1457 while True:
1458 self.screen.erase()
1459 self.screen.addstr(0, 0,
1460 'Show statistics for specific guest or pid.',
1461 curses.A_BOLD)
1462 self.screen.addstr(1, 0,
1463 'This might limit the shown data to the trace '
1464 'statistics.')
1465 self.screen.addstr(5, 0, msg)
1466 self._print_all_gnames(7)
1467 curses.echo()
1468 curses.curs_set(1)
1469 self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ")
1470 guest = self.screen.getstr().decode(ENCODING)
1471 curses.noecho()
1472
1473 pid = 0
1474 if not guest or guest == '0':
1475 break
1476 if guest.isdigit():
1477 if not self._is_running_guest(guest):
1478 msg = '"' + guest + '": Not a running process'
1479 continue
1480 pid = int(guest)
1481 break
1482 pids = []
1483 try:
1484 pids = self.get_pid_from_gname(guest)
1485 except:
1486 msg = '"' + guest + '": Internal error while searching, ' \
1487 'use pid filter instead'
1488 continue
1489 if len(pids) == 0:
1490 msg = '"' + guest + '": Not an active guest'
1491 continue
1492 if len(pids) > 1:
1493 msg = '"' + guest + '": Multiple matches found, use pid ' \
1494 'filter instead'
1495 continue
1496 pid = pids[0]
1497 break
1498 curses.curs_set(0)
1499 self._refresh_header(pid)
1500 self._update_pid(pid)
1501
1502 def show_stats(self):
1503 """Refreshes the screen and processes user input."""
1504 sleeptime = self._delay_initial
1505 self._refresh_header()
1506 start = 0.0 # result based on init value never appears on screen
1507 while True:
1508 self._refresh_body(time.time() - start)
1509 curses.halfdelay(int(sleeptime * 10))
1510 start = time.time()
1511 sleeptime = self._delay_regular
1512 try:
1513 char = self.screen.getkey()
1514 if char == 'b':
1515 self._display_guests = not self._display_guests
1516 if self.stats.toggle_display_guests(self._display_guests):
1517 self._show_msg(['Command not available with '
1518 'tracepoints enabled', 'Restart with '
1519 'debugfs only (see option \'-d\') and '
1520 'try again!'])
1521 self._display_guests = not self._display_guests
1522 self._refresh_header()
1523 if char == 'c':
1524 self.stats.fields_filter = ''
1525 self._refresh_header(0)
1526 self._update_pid(0)
1527 if char == 'f':
1528 curses.curs_set(1)
1529 self._show_filter_selection()
1530 curses.curs_set(0)
1531 sleeptime = self._delay_initial
1532 if char == 'g' or char == 'p':
1533 self._show_vm_selection_by_guest()
1534 sleeptime = self._delay_initial
1535 if char == 'h':
1536 self._show_help_interactive()
1537 if char == 'o':
1538 self._sorting = not self._sorting
1539 if char == 'q':
1540 break
1541 if char == 'r':
1542 self.stats.reset()
1543 if char == 's':
1544 curses.curs_set(1)
1545 self._show_set_update_interval()
1546 curses.curs_set(0)
1547 sleeptime = self._delay_initial
1548 if char == 'x':
1549 self.stats.child_events = not self.stats.child_events
1550 except KeyboardInterrupt:
1551 break
1552 except curses.error:
1553 continue
1554
1555
1556def batch(stats):
1557 """Prints statistics in a key, value format."""
1558 try:
1559 s = stats.get()
1560 time.sleep(1)
1561 s = stats.get()
1562 for key, values in sorted(s.items()):
1563 print('%-42s%10d%10d' % (key.split(' ')[0], values.value,
1564 values.delta))
1565 except KeyboardInterrupt:
1566 pass
1567
1568
1569class StdFormat(object):
1570 def __init__(self, keys):
1571 self._banner = ''
1572 for key in keys:
1573 self._banner += key.split(' ')[0] + ' '
1574
1575 def get_banner(self):
1576 return self._banner
1577
1578 def get_statline(self, keys, s):
1579 res = ''
1580 for key in keys:
1581 res += ' %9d' % s[key].delta
1582 return res
1583
1584
1585class CSVFormat(object):
1586 def __init__(self, keys):
1587 self._banner = 'timestamp'
1588 self._banner += reduce(lambda res, key: "{},{!s}".format(res,
1589 key.split(' ')[0]), keys, '')
1590
1591 def get_banner(self):
1592 return self._banner
1593
1594 def get_statline(self, keys, s):
1595 return reduce(lambda res, key: "{},{!s}".format(res, s[key].delta),
1596 keys, '')
1597
1598
1599def log(stats, opts, frmt, keys):
1600 """Prints statistics as reiterating key block, multiple value blocks."""
1601 global signal_received
1602 line = 0
1603 banner_repeat = 20
1604 f = None
1605
1606 def do_banner(opts):
1607 nonlocal f
1608 if opts.log_to_file:
1609 if not f:
1610 try:
1611 f = open(opts.log_to_file, 'a')
1612 except (IOError, OSError):
1613 sys.exit("Error: Could not open file: %s" %
1614 opts.log_to_file)
1615 if isinstance(frmt, CSVFormat) and f.tell() != 0:
1616 return
1617 print(frmt.get_banner(), file=f or sys.stdout)
1618
1619 def do_statline(opts, values):
1620 statline = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + \
1621 frmt.get_statline(keys, values)
1622 print(statline, file=f or sys.stdout)
1623
1624 do_banner(opts)
1625 banner_printed = True
1626 while True:
1627 try:
1628 time.sleep(opts.set_delay)
1629 if signal_received:
1630 banner_printed = True
1631 line = 0
1632 f.close()
1633 do_banner(opts)
1634 signal_received = False
1635 if (line % banner_repeat == 0 and not banner_printed and
1636 not (opts.log_to_file and isinstance(frmt, CSVFormat))):
1637 do_banner(opts)
1638 banner_printed = True
1639 values = stats.get()
1640 if (not opts.skip_zero_records or
1641 any(values[k].delta != 0 for k in keys)):
1642 do_statline(opts, values)
1643 line += 1
1644 banner_printed = False
1645 except KeyboardInterrupt:
1646 break
1647
1648 if opts.log_to_file:
1649 f.close()
1650
1651
1652def handle_signal(sig, frame):
1653 global signal_received
1654
1655 signal_received = True
1656
1657 return
1658
1659
1660def is_delay_valid(delay):
1661 """Verify delay is in valid value range."""
1662 msg = None
1663 if delay < MIN_DELAY:
1664 msg = '"' + str(delay) + '": Delay must be >=%s' % MIN_DELAY
1665 if delay > MAX_DELAY:
1666 msg = '"' + str(delay) + '": Delay must be <=%s' % MAX_DELAY
1667 return msg
1668
1669
1670def get_options():
1671 """Returns processed program arguments."""
1672 description_text = """
1673This script displays various statistics about VMs running under KVM.
1674The statistics are gathered from the KVM debugfs entries and / or the
1675currently available perf traces.
1676
1677The monitoring takes additional cpu cycles and might affect the VM's
1678performance.
1679
1680Requirements:
1681- Access to:
1682 %s
1683 %s/events/*
1684 /proc/pid/task
1685- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
1686 CAP_SYS_ADMIN and perf events are used.
1687- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
1688 the large number of files that are possibly opened.
1689
1690Interactive Commands:
1691 b toggle events by guests (debugfs only, honors filters)
1692 c clear filter
1693 f filter by regular expression
1694 g filter by guest name
1695 h display interactive commands reference
1696 o toggle sorting order (Total vs CurAvg/s)
1697 p filter by PID
1698 q quit
1699 r reset stats
1700 s set update interval (value range: 0.1-25.5 secs)
1701 x toggle reporting of stats for individual child trace events
1702Press any other key to refresh statistics immediately.
1703""" % (PATH_DEBUGFS_KVM, PATH_DEBUGFS_TRACING)
1704
1705 class Guest_to_pid(argparse.Action):
1706 def __call__(self, parser, namespace, values, option_string=None):
1707 try:
1708 pids = Tui.get_pid_from_gname(values)
1709 except:
1710 sys.exit('Error while searching for guest "{}". Use "-p" to '
1711 'specify a pid instead?'.format(values))
1712 if len(pids) == 0:
1713 sys.exit('Error: No guest by the name "{}" found'
1714 .format(values))
1715 if len(pids) > 1:
1716 sys.exit('Error: Multiple processes found (pids: {}). Use "-p"'
1717 ' to specify the desired pid'
1718 .format(" ".join(map(str, pids))))
1719 namespace.pid = pids[0]
1720
1721 argparser = argparse.ArgumentParser(description=description_text,
1722 formatter_class=argparse
1723 .RawTextHelpFormatter)
1724 argparser.add_argument('-1', '--once', '--batch',
1725 action='store_true',
1726 default=False,
1727 help='run in batch mode for one second',
1728 )
1729 argparser.add_argument('-c', '--csv',
1730 action='store_true',
1731 default=False,
1732 help='log in csv format - requires option -l/-L',
1733 )
1734 argparser.add_argument('-d', '--debugfs',
1735 action='store_true',
1736 default=False,
1737 help='retrieve statistics from debugfs',
1738 )
1739 argparser.add_argument('-f', '--fields',
1740 default='',
1741 help='''fields to display (regex)
1742"-f help" for a list of available events''',
1743 )
1744 argparser.add_argument('-g', '--guest',
1745 type=str,
1746 help='restrict statistics to guest by name',
1747 action=Guest_to_pid,
1748 )
1749 argparser.add_argument('-i', '--debugfs-include-past',
1750 action='store_true',
1751 default=False,
1752 help='include all available data on past events for'
1753 ' debugfs',
1754 )
1755 argparser.add_argument('-l', '--log',
1756 action='store_true',
1757 default=False,
1758 help='run in logging mode (like vmstat)',
1759 )
1760 argparser.add_argument('-L', '--log-to-file',
1761 type=str,
1762 metavar='FILE',
1763 help="like '--log', but logging to a file"
1764 )
1765 argparser.add_argument('-p', '--pid',
1766 type=int,
1767 default=0,
1768 help='restrict statistics to pid',
1769 )
1770 argparser.add_argument('-s', '--set-delay',
1771 type=float,
1772 default=DELAY_DEFAULT,
1773 metavar='DELAY',
1774 help='set delay between refreshs (value range: '
1775 '%s-%s secs)' % (MIN_DELAY, MAX_DELAY),
1776 )
1777 argparser.add_argument('-t', '--tracepoints',
1778 action='store_true',
1779 default=False,
1780 help='retrieve statistics from tracepoints',
1781 )
1782 argparser.add_argument('-z', '--skip-zero-records',
1783 action='store_true',
1784 default=False,
1785 help='omit records with all zeros in logging mode',
1786 )
1787 options = argparser.parse_args()
1788 if options.csv and not (options.log or options.log_to_file):
1789 sys.exit('Error: Option -c/--csv requires -l/--log')
1790 if options.skip_zero_records and not (options.log or options.log_to_file):
1791 sys.exit('Error: Option -z/--skip-zero-records requires -l/-L')
1792 try:
1793 # verify that we were passed a valid regex up front
1794 re.compile(options.fields)
1795 except re.error:
1796 sys.exit('Error: "' + options.fields + '" is not a valid regular '
1797 'expression')
1798
1799 return options
1800
1801
1802def check_access(options):
1803 """Exits if the current user can't access all needed directories."""
1804 if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
1805 not options.debugfs):
1806 sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
1807 "when using the option -t (default).\n"
1808 "If it is enabled, make {0} readable by the "
1809 "current user.\n"
1810 .format(PATH_DEBUGFS_TRACING))
1811 if options.tracepoints:
1812 sys.exit(1)
1813
1814 sys.stderr.write("Falling back to debugfs statistics!\n")
1815 options.debugfs = True
1816 time.sleep(5)
1817
1818 return options
1819
1820
1821def assign_globals():
1822 global PATH_DEBUGFS_KVM
1823 global PATH_DEBUGFS_TRACING
1824
1825 debugfs = ''
1826 for line in open('/proc/mounts'):
1827 if line.split(' ')[2] == 'debugfs':
1828 debugfs = line.split(' ')[1]
1829 break
1830 if debugfs == '':
1831 sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in "
1832 "your kernel, mounted and\nreadable by the current "
1833 "user:\n"
1834 "('mount -t debugfs debugfs /sys/kernel/debug')\n")
1835 sys.exit(1)
1836
1837 PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm')
1838 PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing')
1839
1840 if not os.path.exists(PATH_DEBUGFS_KVM):
1841 sys.stderr.write("Please make sure that CONFIG_KVM is enabled in "
1842 "your kernel and that the modules are loaded.\n")
1843 sys.exit(1)
1844
1845
1846def main():
1847 assign_globals()
1848 options = get_options()
1849 options = check_access(options)
1850
1851 if (options.pid > 0 and
1852 not os.path.isdir(os.path.join('/proc/',
1853 str(options.pid)))):
1854 sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
1855 sys.exit('Specified pid does not exist.')
1856
1857 err = is_delay_valid(options.set_delay)
1858 if err is not None:
1859 sys.exit('Error: ' + err)
1860
1861 stats = Stats(options)
1862
1863 if options.fields == 'help':
1864 stats.fields_filter = None
1865 event_list = []
1866 for key in stats.get().keys():
1867 event_list.append(key.split('(', 1)[0])
1868 sys.stdout.write(' ' + '\n '.join(sorted(set(event_list))) + '\n')
1869 sys.exit(0)
1870
1871 if options.log or options.log_to_file:
1872 if options.log_to_file:
1873 signal.signal(signal.SIGHUP, handle_signal)
1874 keys = sorted(stats.get().keys())
1875 if options.csv:
1876 frmt = CSVFormat(keys)
1877 else:
1878 frmt = StdFormat(keys)
1879 log(stats, options, frmt, keys)
1880 elif not options.once:
1881 with Tui(stats, options) as tui:
1882 tui.show_stats()
1883 else:
1884 batch(stats)
1885
1886
1887if __name__ == "__main__":
1888 main()
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0-only
3#
4# top-like utility for displaying kvm statistics
5#
6# Copyright 2006-2008 Qumranet Technologies
7# Copyright 2008-2011 Red Hat, Inc.
8#
9# Authors:
10# Avi Kivity <avi@redhat.com>
11#
12"""The kvm_stat module outputs statistics about running KVM VMs
13
14Three different ways of output formatting are available:
15- as a top-like text ui
16- in a key -> value format
17- in an all keys, all values format
18
19The data is sampled from the KVM's debugfs entries and its perf events.
20"""
21from __future__ import print_function
22
23import curses
24import sys
25import locale
26import os
27import time
28import argparse
29import ctypes
30import fcntl
31import resource
32import struct
33import re
34import subprocess
35import signal
36from collections import defaultdict, namedtuple
37from functools import reduce
38from datetime import datetime
39
40VMX_EXIT_REASONS = {
41 'EXCEPTION_NMI': 0,
42 'EXTERNAL_INTERRUPT': 1,
43 'TRIPLE_FAULT': 2,
44 'PENDING_INTERRUPT': 7,
45 'NMI_WINDOW': 8,
46 'TASK_SWITCH': 9,
47 'CPUID': 10,
48 'HLT': 12,
49 'INVLPG': 14,
50 'RDPMC': 15,
51 'RDTSC': 16,
52 'VMCALL': 18,
53 'VMCLEAR': 19,
54 'VMLAUNCH': 20,
55 'VMPTRLD': 21,
56 'VMPTRST': 22,
57 'VMREAD': 23,
58 'VMRESUME': 24,
59 'VMWRITE': 25,
60 'VMOFF': 26,
61 'VMON': 27,
62 'CR_ACCESS': 28,
63 'DR_ACCESS': 29,
64 'IO_INSTRUCTION': 30,
65 'MSR_READ': 31,
66 'MSR_WRITE': 32,
67 'INVALID_STATE': 33,
68 'MWAIT_INSTRUCTION': 36,
69 'MONITOR_INSTRUCTION': 39,
70 'PAUSE_INSTRUCTION': 40,
71 'MCE_DURING_VMENTRY': 41,
72 'TPR_BELOW_THRESHOLD': 43,
73 'APIC_ACCESS': 44,
74 'EPT_VIOLATION': 48,
75 'EPT_MISCONFIG': 49,
76 'WBINVD': 54,
77 'XSETBV': 55,
78 'APIC_WRITE': 56,
79 'INVPCID': 58,
80}
81
82SVM_EXIT_REASONS = {
83 'READ_CR0': 0x000,
84 'READ_CR3': 0x003,
85 'READ_CR4': 0x004,
86 'READ_CR8': 0x008,
87 'WRITE_CR0': 0x010,
88 'WRITE_CR3': 0x013,
89 'WRITE_CR4': 0x014,
90 'WRITE_CR8': 0x018,
91 'READ_DR0': 0x020,
92 'READ_DR1': 0x021,
93 'READ_DR2': 0x022,
94 'READ_DR3': 0x023,
95 'READ_DR4': 0x024,
96 'READ_DR5': 0x025,
97 'READ_DR6': 0x026,
98 'READ_DR7': 0x027,
99 'WRITE_DR0': 0x030,
100 'WRITE_DR1': 0x031,
101 'WRITE_DR2': 0x032,
102 'WRITE_DR3': 0x033,
103 'WRITE_DR4': 0x034,
104 'WRITE_DR5': 0x035,
105 'WRITE_DR6': 0x036,
106 'WRITE_DR7': 0x037,
107 'EXCP_BASE': 0x040,
108 'INTR': 0x060,
109 'NMI': 0x061,
110 'SMI': 0x062,
111 'INIT': 0x063,
112 'VINTR': 0x064,
113 'CR0_SEL_WRITE': 0x065,
114 'IDTR_READ': 0x066,
115 'GDTR_READ': 0x067,
116 'LDTR_READ': 0x068,
117 'TR_READ': 0x069,
118 'IDTR_WRITE': 0x06a,
119 'GDTR_WRITE': 0x06b,
120 'LDTR_WRITE': 0x06c,
121 'TR_WRITE': 0x06d,
122 'RDTSC': 0x06e,
123 'RDPMC': 0x06f,
124 'PUSHF': 0x070,
125 'POPF': 0x071,
126 'CPUID': 0x072,
127 'RSM': 0x073,
128 'IRET': 0x074,
129 'SWINT': 0x075,
130 'INVD': 0x076,
131 'PAUSE': 0x077,
132 'HLT': 0x078,
133 'INVLPG': 0x079,
134 'INVLPGA': 0x07a,
135 'IOIO': 0x07b,
136 'MSR': 0x07c,
137 'TASK_SWITCH': 0x07d,
138 'FERR_FREEZE': 0x07e,
139 'SHUTDOWN': 0x07f,
140 'VMRUN': 0x080,
141 'VMMCALL': 0x081,
142 'VMLOAD': 0x082,
143 'VMSAVE': 0x083,
144 'STGI': 0x084,
145 'CLGI': 0x085,
146 'SKINIT': 0x086,
147 'RDTSCP': 0x087,
148 'ICEBP': 0x088,
149 'WBINVD': 0x089,
150 'MONITOR': 0x08a,
151 'MWAIT': 0x08b,
152 'MWAIT_COND': 0x08c,
153 'XSETBV': 0x08d,
154 'NPF': 0x400,
155}
156
157# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
158AARCH64_EXIT_REASONS = {
159 'UNKNOWN': 0x00,
160 'WFI': 0x01,
161 'CP15_32': 0x03,
162 'CP15_64': 0x04,
163 'CP14_MR': 0x05,
164 'CP14_LS': 0x06,
165 'FP_ASIMD': 0x07,
166 'CP10_ID': 0x08,
167 'CP14_64': 0x0C,
168 'ILL_ISS': 0x0E,
169 'SVC32': 0x11,
170 'HVC32': 0x12,
171 'SMC32': 0x13,
172 'SVC64': 0x15,
173 'HVC64': 0x16,
174 'SMC64': 0x17,
175 'SYS64': 0x18,
176 'IABT': 0x20,
177 'IABT_HYP': 0x21,
178 'PC_ALIGN': 0x22,
179 'DABT': 0x24,
180 'DABT_HYP': 0x25,
181 'SP_ALIGN': 0x26,
182 'FP_EXC32': 0x28,
183 'FP_EXC64': 0x2C,
184 'SERROR': 0x2F,
185 'BREAKPT': 0x30,
186 'BREAKPT_HYP': 0x31,
187 'SOFTSTP': 0x32,
188 'SOFTSTP_HYP': 0x33,
189 'WATCHPT': 0x34,
190 'WATCHPT_HYP': 0x35,
191 'BKPT32': 0x38,
192 'VECTOR32': 0x3A,
193 'BRK64': 0x3C,
194}
195
196# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
197USERSPACE_EXIT_REASONS = {
198 'UNKNOWN': 0,
199 'EXCEPTION': 1,
200 'IO': 2,
201 'HYPERCALL': 3,
202 'DEBUG': 4,
203 'HLT': 5,
204 'MMIO': 6,
205 'IRQ_WINDOW_OPEN': 7,
206 'SHUTDOWN': 8,
207 'FAIL_ENTRY': 9,
208 'INTR': 10,
209 'SET_TPR': 11,
210 'TPR_ACCESS': 12,
211 'S390_SIEIC': 13,
212 'S390_RESET': 14,
213 'DCR': 15,
214 'NMI': 16,
215 'INTERNAL_ERROR': 17,
216 'OSI': 18,
217 'PAPR_HCALL': 19,
218 'S390_UCONTROL': 20,
219 'WATCHDOG': 21,
220 'S390_TSCH': 22,
221 'EPR': 23,
222 'SYSTEM_EVENT': 24,
223}
224
225IOCTL_NUMBERS = {
226 'SET_FILTER': 0x40082406,
227 'ENABLE': 0x00002400,
228 'DISABLE': 0x00002401,
229 'RESET': 0x00002403,
230}
231
232signal_received = False
233
234ENCODING = locale.getpreferredencoding(False)
235TRACE_FILTER = re.compile(r'^[^\(]*$')
236
237
238class Arch(object):
239 """Encapsulates global architecture specific data.
240
241 Contains the performance event open syscall and ioctl numbers, as
242 well as the VM exit reasons for the architecture it runs on.
243
244 """
245 @staticmethod
246 def get_arch():
247 machine = os.uname()[4]
248
249 if machine.startswith('ppc'):
250 return ArchPPC()
251 elif machine.startswith('aarch64'):
252 return ArchA64()
253 elif machine.startswith('s390'):
254 return ArchS390()
255 else:
256 # X86_64
257 for line in open('/proc/cpuinfo'):
258 if not line.startswith('flags'):
259 continue
260
261 flags = line.split()
262 if 'vmx' in flags:
263 return ArchX86(VMX_EXIT_REASONS)
264 if 'svm' in flags:
265 return ArchX86(SVM_EXIT_REASONS)
266 return
267
268 def tracepoint_is_child(self, field):
269 if (TRACE_FILTER.match(field)):
270 return None
271 return field.split('(', 1)[0]
272
273
274class ArchX86(Arch):
275 def __init__(self, exit_reasons):
276 self.sc_perf_evt_open = 298
277 self.ioctl_numbers = IOCTL_NUMBERS
278 self.exit_reason_field = 'exit_reason'
279 self.exit_reasons = exit_reasons
280
281 def debugfs_is_child(self, field):
282 """ Returns name of parent if 'field' is a child, None otherwise """
283 return None
284
285
286class ArchPPC(Arch):
287 def __init__(self):
288 self.sc_perf_evt_open = 319
289 self.ioctl_numbers = IOCTL_NUMBERS
290 self.ioctl_numbers['ENABLE'] = 0x20002400
291 self.ioctl_numbers['DISABLE'] = 0x20002401
292 self.ioctl_numbers['RESET'] = 0x20002403
293
294 # PPC comes in 32 and 64 bit and some generated ioctl
295 # numbers depend on the wordsize.
296 char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
297 self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
298 self.exit_reason_field = 'exit_nr'
299 self.exit_reasons = {}
300
301 def debugfs_is_child(self, field):
302 """ Returns name of parent if 'field' is a child, None otherwise """
303 return None
304
305
306class ArchA64(Arch):
307 def __init__(self):
308 self.sc_perf_evt_open = 241
309 self.ioctl_numbers = IOCTL_NUMBERS
310 self.exit_reason_field = 'esr_ec'
311 self.exit_reasons = AARCH64_EXIT_REASONS
312
313 def debugfs_is_child(self, field):
314 """ Returns name of parent if 'field' is a child, None otherwise """
315 return None
316
317
318class ArchS390(Arch):
319 def __init__(self):
320 self.sc_perf_evt_open = 331
321 self.ioctl_numbers = IOCTL_NUMBERS
322 self.exit_reason_field = None
323 self.exit_reasons = None
324
325 def debugfs_is_child(self, field):
326 """ Returns name of parent if 'field' is a child, None otherwise """
327 if field.startswith('instruction_'):
328 return 'exit_instruction'
329
330
331ARCH = Arch.get_arch()
332
333
334class perf_event_attr(ctypes.Structure):
335 """Struct that holds the necessary data to set up a trace event.
336
337 For an extensive explanation see perf_event_open(2) and
338 include/uapi/linux/perf_event.h, struct perf_event_attr
339
340 All fields that are not initialized in the constructor are 0.
341
342 """
343 _fields_ = [('type', ctypes.c_uint32),
344 ('size', ctypes.c_uint32),
345 ('config', ctypes.c_uint64),
346 ('sample_freq', ctypes.c_uint64),
347 ('sample_type', ctypes.c_uint64),
348 ('read_format', ctypes.c_uint64),
349 ('flags', ctypes.c_uint64),
350 ('wakeup_events', ctypes.c_uint32),
351 ('bp_type', ctypes.c_uint32),
352 ('bp_addr', ctypes.c_uint64),
353 ('bp_len', ctypes.c_uint64),
354 ]
355
356 def __init__(self):
357 super(self.__class__, self).__init__()
358 self.type = PERF_TYPE_TRACEPOINT
359 self.size = ctypes.sizeof(self)
360 self.read_format = PERF_FORMAT_GROUP
361
362
363PERF_TYPE_TRACEPOINT = 2
364PERF_FORMAT_GROUP = 1 << 3
365
366
367class Group(object):
368 """Represents a perf event group."""
369
370 def __init__(self):
371 self.events = []
372
373 def add_event(self, event):
374 self.events.append(event)
375
376 def read(self):
377 """Returns a dict with 'event name: value' for all events in the
378 group.
379
380 Values are read by reading from the file descriptor of the
381 event that is the group leader. See perf_event_open(2) for
382 details.
383
384 Read format for the used event configuration is:
385 struct read_format {
386 u64 nr; /* The number of events */
387 struct {
388 u64 value; /* The value of the event */
389 } values[nr];
390 };
391
392 """
393 length = 8 * (1 + len(self.events))
394 read_format = 'xxxxxxxx' + 'Q' * len(self.events)
395 return dict(zip([event.name for event in self.events],
396 struct.unpack(read_format,
397 os.read(self.events[0].fd, length))))
398
399
400class Event(object):
401 """Represents a performance event and manages its life cycle."""
402 def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
403 trace_filter, trace_set='kvm'):
404 self.libc = ctypes.CDLL('libc.so.6', use_errno=True)
405 self.syscall = self.libc.syscall
406 self.name = name
407 self.fd = None
408 self._setup_event(group, trace_cpu, trace_pid, trace_point,
409 trace_filter, trace_set)
410
411 def __del__(self):
412 """Closes the event's file descriptor.
413
414 As no python file object was created for the file descriptor,
415 python will not reference count the descriptor and will not
416 close it itself automatically, so we do it.
417
418 """
419 if self.fd:
420 os.close(self.fd)
421
422 def _perf_event_open(self, attr, pid, cpu, group_fd, flags):
423 """Wrapper for the sys_perf_evt_open() syscall.
424
425 Used to set up performance events, returns a file descriptor or -1
426 on error.
427
428 Attributes are:
429 - syscall number
430 - struct perf_event_attr *
431 - pid or -1 to monitor all pids
432 - cpu number or -1 to monitor all cpus
433 - The file descriptor of the group leader or -1 to create a group.
434 - flags
435
436 """
437 return self.syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
438 ctypes.c_int(pid), ctypes.c_int(cpu),
439 ctypes.c_int(group_fd), ctypes.c_long(flags))
440
441 def _setup_event_attribute(self, trace_set, trace_point):
442 """Returns an initialized ctype perf_event_attr struct."""
443
444 id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
445 trace_point, 'id')
446
447 event_attr = perf_event_attr()
448 event_attr.config = int(open(id_path).read())
449 return event_attr
450
451 def _setup_event(self, group, trace_cpu, trace_pid, trace_point,
452 trace_filter, trace_set):
453 """Sets up the perf event in Linux.
454
455 Issues the syscall to register the event in the kernel and
456 then sets the optional filter.
457
458 """
459
460 event_attr = self._setup_event_attribute(trace_set, trace_point)
461
462 # First event will be group leader.
463 group_leader = -1
464
465 # All others have to pass the leader's descriptor instead.
466 if group.events:
467 group_leader = group.events[0].fd
468
469 fd = self._perf_event_open(event_attr, trace_pid,
470 trace_cpu, group_leader, 0)
471 if fd == -1:
472 err = ctypes.get_errno()
473 raise OSError(err, os.strerror(err),
474 'while calling sys_perf_event_open().')
475
476 if trace_filter:
477 fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
478 trace_filter)
479
480 self.fd = fd
481
482 def enable(self):
483 """Enables the trace event in the kernel.
484
485 Enabling the group leader makes reading counters from it and the
486 events under it possible.
487
488 """
489 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
490
491 def disable(self):
492 """Disables the trace event in the kernel.
493
494 Disabling the group leader makes reading all counters under it
495 impossible.
496
497 """
498 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
499
500 def reset(self):
501 """Resets the count of the trace event in the kernel."""
502 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
503
504
505class Provider(object):
506 """Encapsulates functionalities used by all providers."""
507 def __init__(self, pid):
508 self.child_events = False
509 self.pid = pid
510
511 @staticmethod
512 def is_field_wanted(fields_filter, field):
513 """Indicate whether field is valid according to fields_filter."""
514 if not fields_filter:
515 return True
516 return re.match(fields_filter, field) is not None
517
518 @staticmethod
519 def walkdir(path):
520 """Returns os.walk() data for specified directory.
521
522 As it is only a wrapper it returns the same 3-tuple of (dirpath,
523 dirnames, filenames).
524 """
525 return next(os.walk(path))
526
527
528class TracepointProvider(Provider):
529 """Data provider for the stats class.
530
531 Manages the events/groups from which it acquires its data.
532
533 """
534 def __init__(self, pid, fields_filter):
535 self.group_leaders = []
536 self.filters = self._get_filters()
537 self.update_fields(fields_filter)
538 super(TracepointProvider, self).__init__(pid)
539
540 @staticmethod
541 def _get_filters():
542 """Returns a dict of trace events, their filter ids and
543 the values that can be filtered.
544
545 Trace events can be filtered for special values by setting a
546 filter string via an ioctl. The string normally has the format
547 identifier==value. For each filter a new event will be created, to
548 be able to distinguish the events.
549
550 """
551 filters = {}
552 filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
553 if ARCH.exit_reason_field and ARCH.exit_reasons:
554 filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons)
555 return filters
556
557 def _get_available_fields(self):
558 """Returns a list of available events of format 'event name(filter
559 name)'.
560
561 All available events have directories under
562 /sys/kernel/debug/tracing/events/ which export information
563 about the specific event. Therefore, listing the dirs gives us
564 a list of all available events.
565
566 Some events like the vm exit reasons can be filtered for
567 specific values. To take account for that, the routine below
568 creates special fields with the following format:
569 event name(filter name)
570
571 """
572 path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
573 fields = self.walkdir(path)[1]
574 extra = []
575 for field in fields:
576 if field in self.filters:
577 filter_name_, filter_dicts = self.filters[field]
578 for name in filter_dicts:
579 extra.append(field + '(' + name + ')')
580 fields += extra
581 return fields
582
583 def update_fields(self, fields_filter):
584 """Refresh fields, applying fields_filter"""
585 self.fields = [field for field in self._get_available_fields()
586 if self.is_field_wanted(fields_filter, field)]
587 # add parents for child fields - otherwise we won't see any output!
588 for field in self._fields:
589 parent = ARCH.tracepoint_is_child(field)
590 if (parent and parent not in self._fields):
591 self.fields.append(parent)
592
593 @staticmethod
594 def _get_online_cpus():
595 """Returns a list of cpu id integers."""
596 def parse_int_list(list_string):
597 """Returns an int list from a string of comma separated integers and
598 integer ranges."""
599 integers = []
600 members = list_string.split(',')
601
602 for member in members:
603 if '-' not in member:
604 integers.append(int(member))
605 else:
606 int_range = member.split('-')
607 integers.extend(range(int(int_range[0]),
608 int(int_range[1]) + 1))
609
610 return integers
611
612 with open('/sys/devices/system/cpu/online') as cpu_list:
613 cpu_string = cpu_list.readline()
614 return parse_int_list(cpu_string)
615
616 def _setup_traces(self):
617 """Creates all event and group objects needed to be able to retrieve
618 data."""
619 fields = self._get_available_fields()
620 if self._pid > 0:
621 # Fetch list of all threads of the monitored pid, as qemu
622 # starts a thread for each vcpu.
623 path = os.path.join('/proc', str(self._pid), 'task')
624 groupids = self.walkdir(path)[1]
625 else:
626 groupids = self._get_online_cpus()
627
628 # The constant is needed as a buffer for python libs, std
629 # streams and other files that the script opens.
630 newlim = len(groupids) * len(fields) + 50
631 try:
632 softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
633
634 if hardlim < newlim:
635 # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
636 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
637 else:
638 # Raising the soft limit is sufficient.
639 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
640
641 except ValueError:
642 sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
643
644 for groupid in groupids:
645 group = Group()
646 for name in fields:
647 tracepoint = name
648 tracefilter = None
649 match = re.match(r'(.*)\((.*)\)', name)
650 if match:
651 tracepoint, sub = match.groups()
652 tracefilter = ('%s==%d\0' %
653 (self.filters[tracepoint][0],
654 self.filters[tracepoint][1][sub]))
655
656 # From perf_event_open(2):
657 # pid > 0 and cpu == -1
658 # This measures the specified process/thread on any CPU.
659 #
660 # pid == -1 and cpu >= 0
661 # This measures all processes/threads on the specified CPU.
662 trace_cpu = groupid if self._pid == 0 else -1
663 trace_pid = int(groupid) if self._pid != 0 else -1
664
665 group.add_event(Event(name=name,
666 group=group,
667 trace_cpu=trace_cpu,
668 trace_pid=trace_pid,
669 trace_point=tracepoint,
670 trace_filter=tracefilter))
671
672 self.group_leaders.append(group)
673
674 @property
675 def fields(self):
676 return self._fields
677
678 @fields.setter
679 def fields(self, fields):
680 """Enables/disables the (un)wanted events"""
681 self._fields = fields
682 for group in self.group_leaders:
683 for index, event in enumerate(group.events):
684 if event.name in fields:
685 event.reset()
686 event.enable()
687 else:
688 # Do not disable the group leader.
689 # It would disable all of its events.
690 if index != 0:
691 event.disable()
692
693 @property
694 def pid(self):
695 return self._pid
696
697 @pid.setter
698 def pid(self, pid):
699 """Changes the monitored pid by setting new traces."""
700 self._pid = pid
701 # The garbage collector will get rid of all Event/Group
702 # objects and open files after removing the references.
703 self.group_leaders = []
704 self._setup_traces()
705 self.fields = self._fields
706
707 def read(self, by_guest=0):
708 """Returns 'event name: current value' for all enabled events."""
709 ret = defaultdict(int)
710 for group in self.group_leaders:
711 for name, val in group.read().items():
712 if name not in self._fields:
713 continue
714 parent = ARCH.tracepoint_is_child(name)
715 if parent:
716 name += ' ' + parent
717 ret[name] += val
718 return ret
719
720 def reset(self):
721 """Reset all field counters"""
722 for group in self.group_leaders:
723 for event in group.events:
724 event.reset()
725
726
727class DebugfsProvider(Provider):
728 """Provides data from the files that KVM creates in the kvm debugfs
729 folder."""
730 def __init__(self, pid, fields_filter, include_past):
731 self.update_fields(fields_filter)
732 self._baseline = {}
733 self.do_read = True
734 self.paths = []
735 super(DebugfsProvider, self).__init__(pid)
736 if include_past:
737 self._restore()
738
739 def _get_available_fields(self):
740 """"Returns a list of available fields.
741
742 The fields are all available KVM debugfs files
743
744 """
745 return self.walkdir(PATH_DEBUGFS_KVM)[2]
746
747 def update_fields(self, fields_filter):
748 """Refresh fields, applying fields_filter"""
749 self._fields = [field for field in self._get_available_fields()
750 if self.is_field_wanted(fields_filter, field)]
751 # add parents for child fields - otherwise we won't see any output!
752 for field in self._fields:
753 parent = ARCH.debugfs_is_child(field)
754 if (parent and parent not in self._fields):
755 self.fields.append(parent)
756
757 @property
758 def fields(self):
759 return self._fields
760
761 @fields.setter
762 def fields(self, fields):
763 self._fields = fields
764 self.reset()
765
766 @property
767 def pid(self):
768 return self._pid
769
770 @pid.setter
771 def pid(self, pid):
772 self._pid = pid
773 if pid != 0:
774 vms = self.walkdir(PATH_DEBUGFS_KVM)[1]
775 if len(vms) == 0:
776 self.do_read = False
777
778 self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms))
779
780 else:
781 self.paths = []
782 self.do_read = True
783
784 def _verify_paths(self):
785 """Remove invalid paths"""
786 for path in self.paths:
787 if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)):
788 self.paths.remove(path)
789 continue
790
791 def read(self, reset=0, by_guest=0):
792 """Returns a dict with format:'file name / field -> current value'.
793
794 Parameter 'reset':
795 0 plain read
796 1 reset field counts to 0
797 2 restore the original field counts
798
799 """
800 results = {}
801
802 # If no debugfs filtering support is available, then don't read.
803 if not self.do_read:
804 return results
805 self._verify_paths()
806
807 paths = self.paths
808 if self._pid == 0:
809 paths = []
810 for entry in os.walk(PATH_DEBUGFS_KVM):
811 for dir in entry[1]:
812 paths.append(dir)
813 for path in paths:
814 for field in self._fields:
815 value = self._read_field(field, path)
816 key = path + field
817 if reset == 1:
818 self._baseline[key] = value
819 if reset == 2:
820 self._baseline[key] = 0
821 if self._baseline.get(key, -1) == -1:
822 self._baseline[key] = value
823 parent = ARCH.debugfs_is_child(field)
824 if parent:
825 field = field + ' ' + parent
826 else:
827 if by_guest:
828 field = key.split('-')[0] # set 'field' to 'pid'
829 increment = value - self._baseline.get(key, 0)
830 if field in results:
831 results[field] += increment
832 else:
833 results[field] = increment
834
835 return results
836
837 def _read_field(self, field, path):
838 """Returns the value of a single field from a specific VM."""
839 try:
840 return int(open(os.path.join(PATH_DEBUGFS_KVM,
841 path,
842 field))
843 .read())
844 except IOError:
845 return 0
846
847 def reset(self):
848 """Reset field counters"""
849 self._baseline = {}
850 self.read(1)
851
852 def _restore(self):
853 """Reset field counters"""
854 self._baseline = {}
855 self.read(2)
856
857
858EventStat = namedtuple('EventStat', ['value', 'delta'])
859
860
861class Stats(object):
862 """Manages the data providers and the data they provide.
863
864 It is used to set filters on the provider's data and collect all
865 provider data.
866
867 """
868 def __init__(self, options):
869 self.providers = self._get_providers(options)
870 self._pid_filter = options.pid
871 self._fields_filter = options.fields
872 self.values = {}
873 self._child_events = False
874
875 def _get_providers(self, options):
876 """Returns a list of data providers depending on the passed options."""
877 providers = []
878
879 if options.debugfs:
880 providers.append(DebugfsProvider(options.pid, options.fields,
881 options.debugfs_include_past))
882 if options.tracepoints or not providers:
883 providers.append(TracepointProvider(options.pid, options.fields))
884
885 return providers
886
887 def _update_provider_filters(self):
888 """Propagates fields filters to providers."""
889 # As we reset the counters when updating the fields we can
890 # also clear the cache of old values.
891 self.values = {}
892 for provider in self.providers:
893 provider.update_fields(self._fields_filter)
894
895 def reset(self):
896 self.values = {}
897 for provider in self.providers:
898 provider.reset()
899
900 @property
901 def fields_filter(self):
902 return self._fields_filter
903
904 @fields_filter.setter
905 def fields_filter(self, fields_filter):
906 if fields_filter != self._fields_filter:
907 self._fields_filter = fields_filter
908 self._update_provider_filters()
909
910 @property
911 def pid_filter(self):
912 return self._pid_filter
913
914 @pid_filter.setter
915 def pid_filter(self, pid):
916 if pid != self._pid_filter:
917 self._pid_filter = pid
918 self.values = {}
919 for provider in self.providers:
920 provider.pid = self._pid_filter
921
922 @property
923 def child_events(self):
924 return self._child_events
925
926 @child_events.setter
927 def child_events(self, val):
928 self._child_events = val
929 for provider in self.providers:
930 provider.child_events = val
931
932 def get(self, by_guest=0):
933 """Returns a dict with field -> (value, delta to last value) of all
934 provider data.
935 Key formats:
936 * plain: 'key' is event name
937 * child-parent: 'key' is in format '<child> <parent>'
938 * pid: 'key' is the pid of the guest, and the record contains the
939 aggregated event data
940 These formats are generated by the providers, and handled in class TUI.
941 """
942 for provider in self.providers:
943 new = provider.read(by_guest=by_guest)
944 for key in new:
945 oldval = self.values.get(key, EventStat(0, 0)).value
946 newval = new.get(key, 0)
947 newdelta = newval - oldval
948 self.values[key] = EventStat(newval, newdelta)
949 return self.values
950
951 def toggle_display_guests(self, to_pid):
952 """Toggle between collection of stats by individual event and by
953 guest pid
954
955 Events reported by DebugfsProvider change when switching to/from
956 reading by guest values. Hence we have to remove the excess event
957 names from self.values.
958
959 """
960 if any(isinstance(ins, TracepointProvider) for ins in self.providers):
961 return 1
962 if to_pid:
963 for provider in self.providers:
964 if isinstance(provider, DebugfsProvider):
965 for key in provider.fields:
966 if key in self.values.keys():
967 del self.values[key]
968 else:
969 oldvals = self.values.copy()
970 for key in oldvals:
971 if key.isdigit():
972 del self.values[key]
973 # Update oldval (see get())
974 self.get(to_pid)
975 return 0
976
977
978DELAY_DEFAULT = 3.0
979MAX_GUEST_NAME_LEN = 48
980MAX_REGEX_LEN = 44
981SORT_DEFAULT = 0
982MIN_DELAY = 0.1
983MAX_DELAY = 25.5
984
985
986class Tui(object):
987 """Instruments curses to draw a nice text ui."""
988 def __init__(self, stats, opts):
989 self.stats = stats
990 self.screen = None
991 self._delay_initial = 0.25
992 self._delay_regular = opts.set_delay
993 self._sorting = SORT_DEFAULT
994 self._display_guests = 0
995
996 def __enter__(self):
997 """Initialises curses for later use. Based on curses.wrapper
998 implementation from the Python standard library."""
999 self.screen = curses.initscr()
1000 curses.noecho()
1001 curses.cbreak()
1002
1003 # The try/catch works around a minor bit of
1004 # over-conscientiousness in the curses module, the error
1005 # return from C start_color() is ignorable.
1006 try:
1007 curses.start_color()
1008 except curses.error:
1009 pass
1010
1011 # Hide cursor in extra statement as some monochrome terminals
1012 # might support hiding but not colors.
1013 try:
1014 curses.curs_set(0)
1015 except curses.error:
1016 pass
1017
1018 curses.use_default_colors()
1019 return self
1020
1021 def __exit__(self, *exception):
1022 """Resets the terminal to its normal state. Based on curses.wrapper
1023 implementation from the Python standard library."""
1024 if self.screen:
1025 self.screen.keypad(0)
1026 curses.echo()
1027 curses.nocbreak()
1028 curses.endwin()
1029
1030 @staticmethod
1031 def get_all_gnames():
1032 """Returns a list of (pid, gname) tuples of all running guests"""
1033 res = []
1034 try:
1035 child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'],
1036 stdout=subprocess.PIPE)
1037 except:
1038 raise Exception
1039 for line in child.stdout:
1040 line = line.decode(ENCODING).lstrip().split(' ', 1)
1041 # perform a sanity check before calling the more expensive
1042 # function to possibly extract the guest name
1043 if ' -name ' in line[1]:
1044 res.append((line[0], Tui.get_gname_from_pid(line[0])))
1045 child.stdout.close()
1046
1047 return res
1048
1049 def _print_all_gnames(self, row):
1050 """Print a list of all running guests along with their pids."""
1051 self.screen.addstr(row, 2, '%8s %-60s' %
1052 ('Pid', 'Guest Name (fuzzy list, might be '
1053 'inaccurate!)'),
1054 curses.A_UNDERLINE)
1055 row += 1
1056 try:
1057 for line in self.get_all_gnames():
1058 self.screen.addstr(row, 2, '%8s %-60s' % (line[0], line[1]))
1059 row += 1
1060 if row >= self.screen.getmaxyx()[0]:
1061 break
1062 except Exception:
1063 self.screen.addstr(row + 1, 2, 'Not available')
1064
1065 @staticmethod
1066 def get_pid_from_gname(gname):
1067 """Fuzzy function to convert guest name to QEMU process pid.
1068
1069 Returns a list of potential pids, can be empty if no match found.
1070 Throws an exception on processing errors.
1071
1072 """
1073 pids = []
1074 for line in Tui.get_all_gnames():
1075 if gname == line[1]:
1076 pids.append(int(line[0]))
1077
1078 return pids
1079
1080 @staticmethod
1081 def get_gname_from_pid(pid):
1082 """Returns the guest name for a QEMU process pid.
1083
1084 Extracts the guest name from the QEMU comma line by processing the
1085 '-name' option. Will also handle names specified out of sequence.
1086
1087 """
1088 name = ''
1089 try:
1090 line = open('/proc/{}/cmdline'
1091 .format(pid), 'r').read().split('\0')
1092 parms = line[line.index('-name') + 1].split(',')
1093 while '' in parms:
1094 # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results
1095 # in # ['foo', '', 'bar'], which we revert here
1096 idx = parms.index('')
1097 parms[idx - 1] += ',' + parms[idx + 1]
1098 del parms[idx:idx+2]
1099 # the '-name' switch allows for two ways to specify the guest name,
1100 # where the plain name overrides the name specified via 'guest='
1101 for arg in parms:
1102 if '=' not in arg:
1103 name = arg
1104 break
1105 if arg[:6] == 'guest=':
1106 name = arg[6:]
1107 except (ValueError, IOError, IndexError):
1108 pass
1109
1110 return name
1111
1112 def _update_pid(self, pid):
1113 """Propagates pid selection to stats object."""
1114 self.screen.addstr(4, 1, 'Updating pid filter...')
1115 self.screen.refresh()
1116 self.stats.pid_filter = pid
1117
1118 def _refresh_header(self, pid=None):
1119 """Refreshes the header."""
1120 if pid is None:
1121 pid = self.stats.pid_filter
1122 self.screen.erase()
1123 gname = self.get_gname_from_pid(pid)
1124 self._gname = gname
1125 if gname:
1126 gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
1127 if len(gname) > MAX_GUEST_NAME_LEN
1128 else gname))
1129 if pid > 0:
1130 self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname)
1131 else:
1132 self._headline = 'kvm statistics - summary'
1133 self.screen.addstr(0, 0, self._headline, curses.A_BOLD)
1134 if self.stats.fields_filter:
1135 regex = self.stats.fields_filter
1136 if len(regex) > MAX_REGEX_LEN:
1137 regex = regex[:MAX_REGEX_LEN] + '...'
1138 self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
1139 if self._display_guests:
1140 col_name = 'Guest Name'
1141 else:
1142 col_name = 'Event'
1143 self.screen.addstr(2, 1, '%-40s %10s%7s %8s' %
1144 (col_name, 'Total', '%Total', 'CurAvg/s'),
1145 curses.A_STANDOUT)
1146 self.screen.addstr(4, 1, 'Collecting data...')
1147 self.screen.refresh()
1148
1149 def _refresh_body(self, sleeptime):
1150 def insert_child(sorted_items, child, values, parent):
1151 num = len(sorted_items)
1152 for i in range(0, num):
1153 # only add child if parent is present
1154 if parent.startswith(sorted_items[i][0]):
1155 sorted_items.insert(i + 1, (' ' + child, values))
1156
1157 def get_sorted_events(self, stats):
1158 """ separate parent and child events """
1159 if self._sorting == SORT_DEFAULT:
1160 def sortkey(pair):
1161 # sort by (delta value, overall value)
1162 v = pair[1]
1163 return (v.delta, v.value)
1164 else:
1165 def sortkey(pair):
1166 # sort by overall value
1167 v = pair[1]
1168 return v.value
1169
1170 childs = []
1171 sorted_items = []
1172 # we can't rule out child events to appear prior to parents even
1173 # when sorted - separate out all children first, and add in later
1174 for key, values in sorted(stats.items(), key=sortkey,
1175 reverse=True):
1176 if values == (0, 0):
1177 continue
1178 if key.find(' ') != -1:
1179 if not self.stats.child_events:
1180 continue
1181 childs.insert(0, (key, values))
1182 else:
1183 sorted_items.append((key, values))
1184 if self.stats.child_events:
1185 for key, values in childs:
1186 (child, parent) = key.split(' ')
1187 insert_child(sorted_items, child, values, parent)
1188
1189 return sorted_items
1190
1191 if not self._is_running_guest(self.stats.pid_filter):
1192 if self._gname:
1193 try: # ...to identify the guest by name in case it's back
1194 pids = self.get_pid_from_gname(self._gname)
1195 if len(pids) == 1:
1196 self._refresh_header(pids[0])
1197 self._update_pid(pids[0])
1198 return
1199 except:
1200 pass
1201 self._display_guest_dead()
1202 # leave final data on screen
1203 return
1204 row = 3
1205 self.screen.move(row, 0)
1206 self.screen.clrtobot()
1207 stats = self.stats.get(self._display_guests)
1208 total = 0.
1209 ctotal = 0.
1210 for key, values in stats.items():
1211 if self._display_guests:
1212 if self.get_gname_from_pid(key):
1213 total += values.value
1214 continue
1215 if not key.find(' ') != -1:
1216 total += values.value
1217 else:
1218 ctotal += values.value
1219 if total == 0.:
1220 # we don't have any fields, or all non-child events are filtered
1221 total = ctotal
1222
1223 # print events
1224 tavg = 0
1225 tcur = 0
1226 guest_removed = False
1227 for key, values in get_sorted_events(self, stats):
1228 if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
1229 break
1230 if self._display_guests:
1231 key = self.get_gname_from_pid(key)
1232 if not key:
1233 continue
1234 cur = int(round(values.delta / sleeptime)) if values.delta else 0
1235 if cur < 0:
1236 guest_removed = True
1237 continue
1238 if key[0] != ' ':
1239 if values.delta:
1240 tcur += values.delta
1241 ptotal = values.value
1242 ltotal = total
1243 else:
1244 ltotal = ptotal
1245 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key,
1246 values.value,
1247 values.value * 100 / float(ltotal), cur))
1248 row += 1
1249 if row == 3:
1250 if guest_removed:
1251 self.screen.addstr(4, 1, 'Guest removed, updating...')
1252 else:
1253 self.screen.addstr(4, 1, 'No matching events reported yet')
1254 if row > 4:
1255 tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
1256 self.screen.addstr(row, 1, '%-40s %10d %8s' %
1257 ('Total', total, tavg), curses.A_BOLD)
1258 self.screen.refresh()
1259
1260 def _display_guest_dead(self):
1261 marker = ' Guest is DEAD '
1262 y = min(len(self._headline), 80 - len(marker))
1263 self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT)
1264
1265 def _show_msg(self, text):
1266 """Display message centered text and exit on key press"""
1267 hint = 'Press any key to continue'
1268 curses.cbreak()
1269 self.screen.erase()
1270 (x, term_width) = self.screen.getmaxyx()
1271 row = 2
1272 for line in text:
1273 start = (term_width - len(line)) // 2
1274 self.screen.addstr(row, start, line)
1275 row += 1
1276 self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint,
1277 curses.A_STANDOUT)
1278 self.screen.getkey()
1279
1280 def _show_help_interactive(self):
1281 """Display help with list of interactive commands"""
1282 msg = (' b toggle events by guests (debugfs only, honors'
1283 ' filters)',
1284 ' c clear filter',
1285 ' f filter by regular expression',
1286 ' g filter by guest name/PID',
1287 ' h display interactive commands reference',
1288 ' o toggle sorting order (Total vs CurAvg/s)',
1289 ' p filter by guest name/PID',
1290 ' q quit',
1291 ' r reset stats',
1292 ' s set delay between refreshs (value range: '
1293 '%s-%s secs)' % (MIN_DELAY, MAX_DELAY),
1294 ' x toggle reporting of stats for individual child trace'
1295 ' events',
1296 'Any other key refreshes statistics immediately')
1297 curses.cbreak()
1298 self.screen.erase()
1299 self.screen.addstr(0, 0, "Interactive commands reference",
1300 curses.A_BOLD)
1301 self.screen.addstr(2, 0, "Press any key to exit", curses.A_STANDOUT)
1302 row = 4
1303 for line in msg:
1304 self.screen.addstr(row, 0, line)
1305 row += 1
1306 self.screen.getkey()
1307 self._refresh_header()
1308
1309 def _show_filter_selection(self):
1310 """Draws filter selection mask.
1311
1312 Asks for a valid regex and sets the fields filter accordingly.
1313
1314 """
1315 msg = ''
1316 while True:
1317 self.screen.erase()
1318 self.screen.addstr(0, 0,
1319 "Show statistics for events matching a regex.",
1320 curses.A_BOLD)
1321 self.screen.addstr(2, 0,
1322 "Current regex: {0}"
1323 .format(self.stats.fields_filter))
1324 self.screen.addstr(5, 0, msg)
1325 self.screen.addstr(3, 0, "New regex: ")
1326 curses.echo()
1327 regex = self.screen.getstr().decode(ENCODING)
1328 curses.noecho()
1329 if len(regex) == 0:
1330 self.stats.fields_filter = ''
1331 self._refresh_header()
1332 return
1333 try:
1334 re.compile(regex)
1335 self.stats.fields_filter = regex
1336 self._refresh_header()
1337 return
1338 except re.error:
1339 msg = '"' + regex + '": Not a valid regular expression'
1340 continue
1341
1342 def _show_set_update_interval(self):
1343 """Draws update interval selection mask."""
1344 msg = ''
1345 while True:
1346 self.screen.erase()
1347 self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).'
1348 % DELAY_DEFAULT, curses.A_BOLD)
1349 self.screen.addstr(4, 0, msg)
1350 self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %
1351 self._delay_regular)
1352 curses.echo()
1353 val = self.screen.getstr().decode(ENCODING)
1354 curses.noecho()
1355
1356 try:
1357 if len(val) > 0:
1358 delay = float(val)
1359 err = is_delay_valid(delay)
1360 if err is not None:
1361 msg = err
1362 continue
1363 else:
1364 delay = DELAY_DEFAULT
1365 self._delay_regular = delay
1366 break
1367
1368 except ValueError:
1369 msg = '"' + str(val) + '": Invalid value'
1370 self._refresh_header()
1371
1372 def _is_running_guest(self, pid):
1373 """Check if pid is still a running process."""
1374 if not pid:
1375 return True
1376 return os.path.isdir(os.path.join('/proc/', str(pid)))
1377
1378 def _show_vm_selection_by_guest(self):
1379 """Draws guest selection mask.
1380
1381 Asks for a guest name or pid until a valid guest name or '' is entered.
1382
1383 """
1384 msg = ''
1385 while True:
1386 self.screen.erase()
1387 self.screen.addstr(0, 0,
1388 'Show statistics for specific guest or pid.',
1389 curses.A_BOLD)
1390 self.screen.addstr(1, 0,
1391 'This might limit the shown data to the trace '
1392 'statistics.')
1393 self.screen.addstr(5, 0, msg)
1394 self._print_all_gnames(7)
1395 curses.echo()
1396 curses.curs_set(1)
1397 self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ")
1398 guest = self.screen.getstr().decode(ENCODING)
1399 curses.noecho()
1400
1401 pid = 0
1402 if not guest or guest == '0':
1403 break
1404 if guest.isdigit():
1405 if not self._is_running_guest(guest):
1406 msg = '"' + guest + '": Not a running process'
1407 continue
1408 pid = int(guest)
1409 break
1410 pids = []
1411 try:
1412 pids = self.get_pid_from_gname(guest)
1413 except:
1414 msg = '"' + guest + '": Internal error while searching, ' \
1415 'use pid filter instead'
1416 continue
1417 if len(pids) == 0:
1418 msg = '"' + guest + '": Not an active guest'
1419 continue
1420 if len(pids) > 1:
1421 msg = '"' + guest + '": Multiple matches found, use pid ' \
1422 'filter instead'
1423 continue
1424 pid = pids[0]
1425 break
1426 curses.curs_set(0)
1427 self._refresh_header(pid)
1428 self._update_pid(pid)
1429
1430 def show_stats(self):
1431 """Refreshes the screen and processes user input."""
1432 sleeptime = self._delay_initial
1433 self._refresh_header()
1434 start = 0.0 # result based on init value never appears on screen
1435 while True:
1436 self._refresh_body(time.time() - start)
1437 curses.halfdelay(int(sleeptime * 10))
1438 start = time.time()
1439 sleeptime = self._delay_regular
1440 try:
1441 char = self.screen.getkey()
1442 if char == 'b':
1443 self._display_guests = not self._display_guests
1444 if self.stats.toggle_display_guests(self._display_guests):
1445 self._show_msg(['Command not available with '
1446 'tracepoints enabled', 'Restart with '
1447 'debugfs only (see option \'-d\') and '
1448 'try again!'])
1449 self._display_guests = not self._display_guests
1450 self._refresh_header()
1451 if char == 'c':
1452 self.stats.fields_filter = ''
1453 self._refresh_header(0)
1454 self._update_pid(0)
1455 if char == 'f':
1456 curses.curs_set(1)
1457 self._show_filter_selection()
1458 curses.curs_set(0)
1459 sleeptime = self._delay_initial
1460 if char == 'g' or char == 'p':
1461 self._show_vm_selection_by_guest()
1462 sleeptime = self._delay_initial
1463 if char == 'h':
1464 self._show_help_interactive()
1465 if char == 'o':
1466 self._sorting = not self._sorting
1467 if char == 'q':
1468 break
1469 if char == 'r':
1470 self.stats.reset()
1471 if char == 's':
1472 curses.curs_set(1)
1473 self._show_set_update_interval()
1474 curses.curs_set(0)
1475 sleeptime = self._delay_initial
1476 if char == 'x':
1477 self.stats.child_events = not self.stats.child_events
1478 except KeyboardInterrupt:
1479 break
1480 except curses.error:
1481 continue
1482
1483
1484def batch(stats):
1485 """Prints statistics in a key, value format."""
1486 try:
1487 s = stats.get()
1488 time.sleep(1)
1489 s = stats.get()
1490 for key, values in sorted(s.items()):
1491 print('%-42s%10d%10d' % (key.split(' ')[0], values.value,
1492 values.delta))
1493 except KeyboardInterrupt:
1494 pass
1495
1496
1497class StdFormat(object):
1498 def __init__(self, keys):
1499 self._banner = ''
1500 for key in keys:
1501 self._banner += key.split(' ')[0] + ' '
1502
1503 def get_banner(self):
1504 return self._banner
1505
1506 def get_statline(self, keys, s):
1507 res = ''
1508 for key in keys:
1509 res += ' %9d' % s[key].delta
1510 return res
1511
1512
1513class CSVFormat(object):
1514 def __init__(self, keys):
1515 self._banner = 'timestamp'
1516 self._banner += reduce(lambda res, key: "{},{!s}".format(res,
1517 key.split(' ')[0]), keys, '')
1518
1519 def get_banner(self):
1520 return self._banner
1521
1522 def get_statline(self, keys, s):
1523 return reduce(lambda res, key: "{},{!s}".format(res, s[key].delta),
1524 keys, '')
1525
1526
1527def log(stats, opts, frmt, keys):
1528 """Prints statistics as reiterating key block, multiple value blocks."""
1529 global signal_received
1530 line = 0
1531 banner_repeat = 20
1532 f = None
1533
1534 def do_banner(opts):
1535 nonlocal f
1536 if opts.log_to_file:
1537 if not f:
1538 try:
1539 f = open(opts.log_to_file, 'a')
1540 except (IOError, OSError):
1541 sys.exit("Error: Could not open file: %s" %
1542 opts.log_to_file)
1543 if isinstance(frmt, CSVFormat) and f.tell() != 0:
1544 return
1545 print(frmt.get_banner(), file=f or sys.stdout)
1546
1547 def do_statline(opts, values):
1548 statline = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + \
1549 frmt.get_statline(keys, values)
1550 print(statline, file=f or sys.stdout)
1551
1552 do_banner(opts)
1553 banner_printed = True
1554 while True:
1555 try:
1556 time.sleep(opts.set_delay)
1557 if signal_received:
1558 banner_printed = True
1559 line = 0
1560 f.close()
1561 do_banner(opts)
1562 signal_received = False
1563 if (line % banner_repeat == 0 and not banner_printed and
1564 not (opts.log_to_file and isinstance(frmt, CSVFormat))):
1565 do_banner(opts)
1566 banner_printed = True
1567 values = stats.get()
1568 if (not opts.skip_zero_records or
1569 any(values[k].delta != 0 for k in keys)):
1570 do_statline(opts, values)
1571 line += 1
1572 banner_printed = False
1573 except KeyboardInterrupt:
1574 break
1575
1576 if opts.log_to_file:
1577 f.close()
1578
1579
1580def handle_signal(sig, frame):
1581 global signal_received
1582
1583 signal_received = True
1584
1585 return
1586
1587
1588def is_delay_valid(delay):
1589 """Verify delay is in valid value range."""
1590 msg = None
1591 if delay < MIN_DELAY:
1592 msg = '"' + str(delay) + '": Delay must be >=%s' % MIN_DELAY
1593 if delay > MAX_DELAY:
1594 msg = '"' + str(delay) + '": Delay must be <=%s' % MAX_DELAY
1595 return msg
1596
1597
1598def get_options():
1599 """Returns processed program arguments."""
1600 description_text = """
1601This script displays various statistics about VMs running under KVM.
1602The statistics are gathered from the KVM debugfs entries and / or the
1603currently available perf traces.
1604
1605The monitoring takes additional cpu cycles and might affect the VM's
1606performance.
1607
1608Requirements:
1609- Access to:
1610 %s
1611 %s/events/*
1612 /proc/pid/task
1613- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
1614 CAP_SYS_ADMIN and perf events are used.
1615- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
1616 the large number of files that are possibly opened.
1617
1618Interactive Commands:
1619 b toggle events by guests (debugfs only, honors filters)
1620 c clear filter
1621 f filter by regular expression
1622 g filter by guest name
1623 h display interactive commands reference
1624 o toggle sorting order (Total vs CurAvg/s)
1625 p filter by PID
1626 q quit
1627 r reset stats
1628 s set update interval (value range: 0.1-25.5 secs)
1629 x toggle reporting of stats for individual child trace events
1630Press any other key to refresh statistics immediately.
1631""" % (PATH_DEBUGFS_KVM, PATH_DEBUGFS_TRACING)
1632
1633 class Guest_to_pid(argparse.Action):
1634 def __call__(self, parser, namespace, values, option_string=None):
1635 try:
1636 pids = Tui.get_pid_from_gname(values)
1637 except:
1638 sys.exit('Error while searching for guest "{}". Use "-p" to '
1639 'specify a pid instead?'.format(values))
1640 if len(pids) == 0:
1641 sys.exit('Error: No guest by the name "{}" found'
1642 .format(values))
1643 if len(pids) > 1:
1644 sys.exit('Error: Multiple processes found (pids: {}). Use "-p"'
1645 ' to specify the desired pid'.format(" ".join(pids)))
1646 namespace.pid = pids[0]
1647
1648 argparser = argparse.ArgumentParser(description=description_text,
1649 formatter_class=argparse
1650 .RawTextHelpFormatter)
1651 argparser.add_argument('-1', '--once', '--batch',
1652 action='store_true',
1653 default=False,
1654 help='run in batch mode for one second',
1655 )
1656 argparser.add_argument('-c', '--csv',
1657 action='store_true',
1658 default=False,
1659 help='log in csv format - requires option -l/-L',
1660 )
1661 argparser.add_argument('-d', '--debugfs',
1662 action='store_true',
1663 default=False,
1664 help='retrieve statistics from debugfs',
1665 )
1666 argparser.add_argument('-f', '--fields',
1667 default='',
1668 help='''fields to display (regex)
1669"-f help" for a list of available events''',
1670 )
1671 argparser.add_argument('-g', '--guest',
1672 type=str,
1673 help='restrict statistics to guest by name',
1674 action=Guest_to_pid,
1675 )
1676 argparser.add_argument('-i', '--debugfs-include-past',
1677 action='store_true',
1678 default=False,
1679 help='include all available data on past events for'
1680 ' debugfs',
1681 )
1682 argparser.add_argument('-l', '--log',
1683 action='store_true',
1684 default=False,
1685 help='run in logging mode (like vmstat)',
1686 )
1687 argparser.add_argument('-L', '--log-to-file',
1688 type=str,
1689 metavar='FILE',
1690 help="like '--log', but logging to a file"
1691 )
1692 argparser.add_argument('-p', '--pid',
1693 type=int,
1694 default=0,
1695 help='restrict statistics to pid',
1696 )
1697 argparser.add_argument('-s', '--set-delay',
1698 type=float,
1699 default=DELAY_DEFAULT,
1700 metavar='DELAY',
1701 help='set delay between refreshs (value range: '
1702 '%s-%s secs)' % (MIN_DELAY, MAX_DELAY),
1703 )
1704 argparser.add_argument('-t', '--tracepoints',
1705 action='store_true',
1706 default=False,
1707 help='retrieve statistics from tracepoints',
1708 )
1709 argparser.add_argument('-z', '--skip-zero-records',
1710 action='store_true',
1711 default=False,
1712 help='omit records with all zeros in logging mode',
1713 )
1714 options = argparser.parse_args()
1715 if options.csv and not (options.log or options.log_to_file):
1716 sys.exit('Error: Option -c/--csv requires -l/--log')
1717 if options.skip_zero_records and not (options.log or options.log_to_file):
1718 sys.exit('Error: Option -z/--skip-zero-records requires -l/-L')
1719 try:
1720 # verify that we were passed a valid regex up front
1721 re.compile(options.fields)
1722 except re.error:
1723 sys.exit('Error: "' + options.fields + '" is not a valid regular '
1724 'expression')
1725
1726 return options
1727
1728
1729def check_access(options):
1730 """Exits if the current user can't access all needed directories."""
1731 if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
1732 not options.debugfs):
1733 sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
1734 "when using the option -t (default).\n"
1735 "If it is enabled, make {0} readable by the "
1736 "current user.\n"
1737 .format(PATH_DEBUGFS_TRACING))
1738 if options.tracepoints:
1739 sys.exit(1)
1740
1741 sys.stderr.write("Falling back to debugfs statistics!\n")
1742 options.debugfs = True
1743 time.sleep(5)
1744
1745 return options
1746
1747
1748def assign_globals():
1749 global PATH_DEBUGFS_KVM
1750 global PATH_DEBUGFS_TRACING
1751
1752 debugfs = ''
1753 for line in open('/proc/mounts'):
1754 if line.split(' ')[0] == 'debugfs':
1755 debugfs = line.split(' ')[1]
1756 break
1757 if debugfs == '':
1758 sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in "
1759 "your kernel, mounted and\nreadable by the current "
1760 "user:\n"
1761 "('mount -t debugfs debugfs /sys/kernel/debug')\n")
1762 sys.exit(1)
1763
1764 PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm')
1765 PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing')
1766
1767 if not os.path.exists(PATH_DEBUGFS_KVM):
1768 sys.stderr.write("Please make sure that CONFIG_KVM is enabled in "
1769 "your kernel and that the modules are loaded.\n")
1770 sys.exit(1)
1771
1772
1773def main():
1774 assign_globals()
1775 options = get_options()
1776 options = check_access(options)
1777
1778 if (options.pid > 0 and
1779 not os.path.isdir(os.path.join('/proc/',
1780 str(options.pid)))):
1781 sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
1782 sys.exit('Specified pid does not exist.')
1783
1784 err = is_delay_valid(options.set_delay)
1785 if err is not None:
1786 sys.exit('Error: ' + err)
1787
1788 stats = Stats(options)
1789
1790 if options.fields == 'help':
1791 stats.fields_filter = None
1792 event_list = []
1793 for key in stats.get().keys():
1794 event_list.append(key.split('(', 1)[0])
1795 sys.stdout.write(' ' + '\n '.join(sorted(set(event_list))) + '\n')
1796 sys.exit(0)
1797
1798 if options.log or options.log_to_file:
1799 if options.log_to_file:
1800 signal.signal(signal.SIGHUP, handle_signal)
1801 keys = sorted(stats.get().keys())
1802 if options.csv:
1803 frmt = CSVFormat(keys)
1804 else:
1805 frmt = StdFormat(keys)
1806 log(stats, options, frmt, keys)
1807 elif not options.once:
1808 with Tui(stats, options) as tui:
1809 tui.show_stats()
1810 else:
1811 batch(stats)
1812
1813
1814if __name__ == "__main__":
1815 main()