timerlat_aa.c - tools/tracing/rtla/src/timerlat_aa.c - Linux source code v3.1

Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
   4 */
   5
   6#include <stdlib.h>
   7#include <errno.h>
   8#include "utils.h"
   9#include "osnoise.h"
  10#include "timerlat.h"
  11#include <unistd.h>
  12
  13enum timelat_state {
  14	TIMERLAT_INIT = 0,
  15	TIMERLAT_WAITING_IRQ,
  16	TIMERLAT_WAITING_THREAD,
  17};
  18
  19#define MAX_COMM		24
  20
  21/*
  22 * Per-cpu data statistics and data.
  23 */
  24struct timerlat_aa_data {
  25	/* Current CPU state */
  26	int			curr_state;
  27
  28	/* timerlat IRQ latency */
  29	unsigned long long	tlat_irq_seqnum;
  30	unsigned long long	tlat_irq_latency;
  31	unsigned long long	tlat_irq_timstamp;
  32
  33	/* timerlat Thread latency */
  34	unsigned long long	tlat_thread_seqnum;
  35	unsigned long long	tlat_thread_latency;
  36	unsigned long long	tlat_thread_timstamp;
  37
  38	/*
  39	 * Information about the thread running when the IRQ
  40	 * arrived.
  41	 *
  42	 * This can be blocking or interference, depending on the
  43	 * priority of the thread. Assuming timerlat is the highest
  44	 * prio, it is blocking. If timerlat has a lower prio, it is
  45	 * interference.
  46	 * note: "unsigned long long" because they are fetch using tep_get_field_val();
  47	 */
  48	unsigned long long	run_thread_pid;
  49	char			run_thread_comm[MAX_COMM];
  50	unsigned long long	thread_blocking_duration;
  51	unsigned long long	max_exit_idle_latency;
  52
  53	/* Information about the timerlat timer irq */
  54	unsigned long long	timer_irq_start_time;
  55	unsigned long long	timer_irq_start_delay;
  56	unsigned long long	timer_irq_duration;
  57	unsigned long long	timer_exit_from_idle;
  58
  59	/*
  60	 * Information about the last IRQ before the timerlat irq
  61	 * arrived.
  62	 *
  63	 * If now - timestamp is <= latency, it might have influenced
  64	 * in the timerlat irq latency. Otherwise, ignore it.
  65	 */
  66	unsigned long long	prev_irq_duration;
  67	unsigned long long	prev_irq_timstamp;
  68
  69	/*
  70	 * Interference sum.
  71	 */
  72	unsigned long long	thread_nmi_sum;
  73	unsigned long long	thread_irq_sum;
  74	unsigned long long	thread_softirq_sum;
  75	unsigned long long	thread_thread_sum;
  76
  77	/*
  78	 * Interference task information.
  79	 */
  80	struct trace_seq	*prev_irqs_seq;
  81	struct trace_seq	*nmi_seq;
  82	struct trace_seq	*irqs_seq;
  83	struct trace_seq	*softirqs_seq;
  84	struct trace_seq	*threads_seq;
  85	struct trace_seq	*stack_seq;
  86
  87	/*
  88	 * Current thread.
  89	 */
  90	char			current_comm[MAX_COMM];
  91	unsigned long long	current_pid;
  92
  93	/*
  94	 * Is the system running a kworker?
  95	 */
  96	unsigned long long	kworker;
  97	unsigned long long	kworker_func;
  98};
  99
 100/*
 101 * The analysis context and system wide view
 102 */
 103struct timerlat_aa_context {
 104	int nr_cpus;
 105	int dump_tasks;
 106
 107	/* per CPU data */
 108	struct timerlat_aa_data *taa_data;
 109
 110	/*
 111	 * required to translate function names and register
 112	 * events.
 113	 */
 114	struct osnoise_tool *tool;
 115};
 116
 117/*
 118 * The data is stored as a local variable, but accessed via a helper function.
 119 *
 120 * It could be stored inside the trace context. But every access would
 121 * require container_of() + a series of pointers. Do we need it? Not sure.
 122 *
 123 * For now keep it simple. If needed, store it in the tool, add the *context
 124 * as a parameter in timerlat_aa_get_ctx() and do the magic there.
 125 */
 126static struct timerlat_aa_context *__timerlat_aa_ctx;
 127
 128static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
 129{
 130	return __timerlat_aa_ctx;
 131}
 132
 133/*
 134 * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
 135 */
 136static struct timerlat_aa_data
 137*timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
 138{
 139	return &taa_ctx->taa_data[cpu];
 140}
 141
 142/*
 143 * timerlat_aa_irq_latency - Handles timerlat IRQ event
 144 */
 145static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
 146				   struct trace_seq *s, struct tep_record *record,
 147				   struct tep_event *event)
 148{
 149	/*
 150	 * For interference, we start now looking for things that can delay
 151	 * the thread.
 152	 */
 153	taa_data->curr_state = TIMERLAT_WAITING_THREAD;
 154	taa_data->tlat_irq_timstamp = record->ts;
 155
 156	/*
 157	 * Zero values.
 158	 */
 159	taa_data->thread_nmi_sum = 0;
 160	taa_data->thread_irq_sum = 0;
 161	taa_data->thread_softirq_sum = 0;
 162	taa_data->thread_thread_sum = 0;
 163	taa_data->thread_blocking_duration = 0;
 164	taa_data->timer_irq_start_time = 0;
 165	taa_data->timer_irq_duration = 0;
 166	taa_data->timer_exit_from_idle = 0;
 167
 168	/*
 169	 * Zero interference tasks.
 170	 */
 171	trace_seq_reset(taa_data->nmi_seq);
 172	trace_seq_reset(taa_data->irqs_seq);
 173	trace_seq_reset(taa_data->softirqs_seq);
 174	trace_seq_reset(taa_data->threads_seq);
 175
 176	/* IRQ latency values */
 177	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
 178	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
 179
 180	/* The thread that can cause blocking */
 181	tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
 182
 183	/*
 184	 * Get exit from idle case.
 185	 *
 186	 * If it is not idle thread:
 187	 */
 188	if (taa_data->run_thread_pid)
 189		return 0;
 190
 191	/*
 192	 * if the latency is shorter than the known exit from idle:
 193	 */
 194	if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
 195		return 0;
 196
 197	/*
 198	 * To be safe, ignore the cases in which an IRQ/NMI could have
 199	 * interfered with the timerlat IRQ.
 200	 */
 201	if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
 202	    < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
 203		return 0;
 204
 205	taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
 206
 207	return 0;
 208}
 209
 210/*
 211 * timerlat_aa_thread_latency - Handles timerlat thread event
 212 */
 213static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
 214				      struct trace_seq *s, struct tep_record *record,
 215				      struct tep_event *event)
 216{
 217	/*
 218	 * For interference, we start now looking for things that can delay
 219	 * the IRQ of the next cycle.
 220	 */
 221	taa_data->curr_state = TIMERLAT_WAITING_IRQ;
 222	taa_data->tlat_thread_timstamp = record->ts;
 223
 224	/* Thread latency values */
 225	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
 226	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
 227
 228	return 0;
 229}
 230
 231/*
 232 * timerlat_aa_handler - Handle timerlat events
 233 *
 234 * This function is called to handle timerlat events recording statistics.
 235 *
 236 * Returns 0 on success, -1 otherwise.
 237 */
 238static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
 239			struct tep_event *event, void *context)
 240{
 241	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 242	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 243	unsigned long long thread;
 244
 245	if (!taa_data)
 246		return -1;
 247
 248	tep_get_field_val(s, event, "context", record, &thread, 1);
 249	if (!thread)
 250		return timerlat_aa_irq_latency(taa_data, s, record, event);
 251	else
 252		return timerlat_aa_thread_latency(taa_data, s, record, event);
 253}
 254
 255/*
 256 * timerlat_aa_nmi_handler - Handles NMI noise
 257 *
 258 * It is used to collect information about interferences from NMI. It is
 259 * hooked to the osnoise:nmi_noise event.
 260 */
 261static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
 262				   struct tep_event *event, void *context)
 263{
 264	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 265	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 266	unsigned long long duration;
 267	unsigned long long start;
 268
 269	tep_get_field_val(s, event, "duration", record, &duration, 1);
 270	tep_get_field_val(s, event, "start", record, &start, 1);
 271
 272	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
 273		taa_data->prev_irq_duration = duration;
 274		taa_data->prev_irq_timstamp = start;
 275
 276		trace_seq_reset(taa_data->prev_irqs_seq);
 277		trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s	\t\t\t%9.2f us\n",
 278			 "nmi", ns_to_usf(duration));
 279		return 0;
 280	}
 281
 282	taa_data->thread_nmi_sum += duration;
 283	trace_seq_printf(taa_data->nmi_seq, "	%24s	\t\t\t%9.2f us\n",
 284		 "nmi", ns_to_usf(duration));
 285
 286	return 0;
 287}
 288
 289/*
 290 * timerlat_aa_irq_handler - Handles IRQ noise
 291 *
 292 * It is used to collect information about interferences from IRQ. It is
 293 * hooked to the osnoise:irq_noise event.
 294 *
 295 * It is a little bit more complex than the other because it measures:
 296 *	- The IRQs that can delay the timer IRQ before it happened.
 297 *	- The Timerlat IRQ handler
 298 *	- The IRQs that happened between the timerlat IRQ and the timerlat thread
 299 *	  (IRQ interference).
 300 */
 301static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
 302				   struct tep_event *event, void *context)
 303{
 304	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 305	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 306	unsigned long long expected_start;
 307	unsigned long long duration;
 308	unsigned long long vector;
 309	unsigned long long start;
 310	char *desc;
 311	int val;
 312
 313	tep_get_field_val(s, event, "duration", record, &duration, 1);
 314	tep_get_field_val(s, event, "start", record, &start, 1);
 315	tep_get_field_val(s, event, "vector", record, &vector, 1);
 316	desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
 317
 318	/*
 319	 * Before the timerlat IRQ.
 320	 */
 321	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
 322		taa_data->prev_irq_duration = duration;
 323		taa_data->prev_irq_timstamp = start;
 324
 325		trace_seq_reset(taa_data->prev_irqs_seq);
 326		trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu	\t\t%9.2f us\n",
 327				 desc, vector, ns_to_usf(duration));
 328		return 0;
 329	}
 330
 331	/*
 332	 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
 333	 * the timerlat irq handler.
 334	 */
 335	if (!taa_data->timer_irq_start_time) {
 336		expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
 337
 338		taa_data->timer_irq_start_time = start;
 339		taa_data->timer_irq_duration = duration;
 340
 341		/*
 342		 * We are dealing with two different clock sources: the
 343		 * external clock source that timerlat uses as a reference
 344		 * and the clock used by the tracer. There are also two
 345		 * moments: the time reading the clock and the timer in
 346		 * which the event is placed in the buffer (the trace
 347		 * event timestamp). If the processor is slow or there
 348		 * is some hardware noise, the difference between the
 349		 * timestamp and the external clock read can be longer
 350		 * than the IRQ handler delay, resulting in a negative
 351		 * time. If so, set IRQ start delay as 0. In the end,
 352		 * it is less relevant than the noise.
 353		 */
 354		if (expected_start < taa_data->timer_irq_start_time)
 355			taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
 356		else
 357			taa_data->timer_irq_start_delay = 0;
 358
 359		/*
 360		 * not exit from idle.
 361		 */
 362		if (taa_data->run_thread_pid)
 363			return 0;
 364
 365		if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
 366			taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
 367
 368		return 0;
 369	}
 370
 371	/*
 372	 * IRQ interference.
 373	 */
 374	taa_data->thread_irq_sum += duration;
 375	trace_seq_printf(taa_data->irqs_seq, "	%24s:%-3llu	\t	%9.2f us\n",
 376			 desc, vector, ns_to_usf(duration));
 377
 378	return 0;
 379}
 380
 381static char *softirq_name[] = { "HI", "TIMER",	"NET_TX", "NET_RX", "BLOCK",
 382				"IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
 383
 384
 385/*
 386 * timerlat_aa_softirq_handler - Handles Softirq noise
 387 *
 388 * It is used to collect information about interferences from Softirq. It is
 389 * hooked to the osnoise:softirq_noise event.
 390 *
 391 * It is only printed in the non-rt kernel, as softirqs become thread on RT.
 392 */
 393static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
 394				       struct tep_event *event, void *context)
 395{
 396	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 397	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 398	unsigned long long duration;
 399	unsigned long long vector;
 400	unsigned long long start;
 401
 402	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
 403		return 0;
 404
 405	tep_get_field_val(s, event, "duration", record, &duration, 1);
 406	tep_get_field_val(s, event, "start", record, &start, 1);
 407	tep_get_field_val(s, event, "vector", record, &vector, 1);
 408
 409	taa_data->thread_softirq_sum += duration;
 410
 411	trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu	\t	%9.2f us\n",
 412			 softirq_name[vector], vector, ns_to_usf(duration));
 413	return 0;
 414}
 415
 416/*
 417 * timerlat_aa_softirq_handler - Handles thread noise
 418 *
 419 * It is used to collect information about interferences from threads. It is
 420 * hooked to the osnoise:thread_noise event.
 421 *
 422 * Note: if you see thread noise, your timerlat thread was not the highest prio one.
 423 */
 424static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
 425				      struct tep_event *event, void *context)
 426{
 427	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 428	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 429	unsigned long long duration;
 430	unsigned long long start;
 431	unsigned long long pid;
 432	const char *comm;
 433	int val;
 434
 435	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
 436		return 0;
 437
 438	tep_get_field_val(s, event, "duration", record, &duration, 1);
 439	tep_get_field_val(s, event, "start", record, &start, 1);
 440
 441	tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
 442	comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
 443
 444	if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
 445		taa_data->thread_blocking_duration = duration;
 446
 447		if (comm)
 448			strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
 449		else
 450			sprintf(taa_data->run_thread_comm, "<...>");
 451
 452	} else {
 453		taa_data->thread_thread_sum += duration;
 454
 455		trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu	\t\t%9.2f us\n",
 456			 comm, pid, ns_to_usf(duration));
 457	}
 458
 459	return 0;
 460}
 461
 462/*
 463 * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
 464 *
 465 * Saves and parse the stack trace generated by the timerlat IRQ.
 466 */
 467static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
 468			      struct tep_event *event, void *context)
 469{
 470	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 471	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 472	unsigned long *caller;
 473	const char *function;
 474	int val, i;
 475
 476	trace_seq_reset(taa_data->stack_seq);
 477
 478	trace_seq_printf(taa_data->stack_seq, "    Blocking thread stack trace\n");
 479	caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
 480	if (caller) {
 481		for (i = 0; ; i++) {
 482			function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
 483			if (!function)
 484				break;
 485			trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
 486		}
 487	}
 488	return 0;
 489}
 490
 491/*
 492 * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
 493 *
 494 * Handles the sched:sched_switch event to trace the current thread running on the
 495 * CPU. It is used to display the threads running on the other CPUs when the trace
 496 * stops.
 497 */
 498static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
 499					    struct tep_event *event, void *context)
 500{
 501	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 502	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 503	const char *comm;
 504	int val;
 505
 506	tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
 507	comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
 508
 509	strncpy(taa_data->current_comm, comm, MAX_COMM);
 510
 511	/*
 512	 * If this was a kworker, clean the last kworkers that ran.
 513	 */
 514	taa_data->kworker = 0;
 515	taa_data->kworker_func = 0;
 516
 517	return 0;
 518}
 519
 520/*
 521 * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
 522 *
 523 * Handles workqueue:workqueue_execute_start event, keeping track of
 524 * the job that a kworker could be doing in the CPU.
 525 *
 526 * We already catch problems of hardware related latencies caused by work queues
 527 * running driver code that causes hardware stall. For example, with DRM drivers.
 528 */
 529static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
 530					     struct tep_event *event, void *context)
 531{
 532	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 533	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 534
 535	tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
 536	tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
 537	return 0;
 538}
 539
 540/*
 541 * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
 542 *
 543 * This is the core of the analysis.
 544 */
 545static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
 546				     int irq_thresh, int thread_thresh)
 547{
 548	long long exp_irq_ts;
 549	int total;
 550	int irq;
 551
 552	/*
 553	 * IRQ latency or Thread latency?
 554	 */
 555	if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
 556		irq = 1;
 557		total = taa_data->tlat_irq_latency;
 558	} else {
 559		irq = 0;
 560		total = taa_data->tlat_thread_latency;
 561	}
 562
 563	/*
 564	 * Expected IRQ arrival time using the trace clock as the base.
 565	 *
 566	 * TODO: Add a list of previous IRQ, and then run the list backwards.
 567	 */
 568	exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
 569	if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) {
 570		if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time)
 571			printf("  Previous IRQ interference:	\t\t up to  %9.2f us\n",
 572				ns_to_usf(taa_data->prev_irq_duration));
 573	}
 574
 575	/*
 576	 * The delay that the IRQ suffered before starting.
 577	 */
 578	printf("  IRQ handler delay:		%16s	%9.2f us (%.2f %%)\n",
 579		(ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
 580		ns_to_usf(taa_data->timer_irq_start_delay),
 581		ns_to_per(total, taa_data->timer_irq_start_delay));
 582
 583	/*
 584	 * Timerlat IRQ.
 585	 */
 586	printf("  IRQ latency:	\t\t\t\t	%9.2f us\n",
 587		ns_to_usf(taa_data->tlat_irq_latency));
 588
 589	if (irq) {
 590		/*
 591		 * If the trace stopped due to IRQ, the other events will not happen
 592		 * because... the trace stopped :-).
 593		 *
 594		 * That is all folks, the stack trace was printed before the stop,
 595		 * so it will be displayed, it is the key.
 596		 */
 597		printf("  Blocking thread:\n");
 598		printf("	%24s:%-9llu\n",
 599			taa_data->run_thread_comm, taa_data->run_thread_pid);
 600	} else  {
 601		/*
 602		 * The duration of the IRQ handler that handled the timerlat IRQ.
 603		 */
 604		printf("  Timerlat IRQ duration:	\t\t	%9.2f us (%.2f %%)\n",
 605			ns_to_usf(taa_data->timer_irq_duration),
 606			ns_to_per(total, taa_data->timer_irq_duration));
 607
 608		/*
 609		 * The amount of time that the current thread postponed the scheduler.
 610		 *
 611		 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
 612		 * is no need to compute values here.
 613		 */
 614		printf("  Blocking thread:	\t\t\t	%9.2f us (%.2f %%)\n",
 615			ns_to_usf(taa_data->thread_blocking_duration),
 616			ns_to_per(total, taa_data->thread_blocking_duration));
 617
 618		printf("	%24s:%-9llu		%9.2f us\n",
 619			taa_data->run_thread_comm, taa_data->run_thread_pid,
 620			ns_to_usf(taa_data->thread_blocking_duration));
 621	}
 622
 623	/*
 624	 * Print the stack trace!
 625	 */
 626	trace_seq_do_printf(taa_data->stack_seq);
 627
 628	/*
 629	 * NMIs can happen during the IRQ, so they are always possible.
 630	 */
 631	if (taa_data->thread_nmi_sum)
 632		printf("  NMI interference	\t\t\t	%9.2f us (%.2f %%)\n",
 633			ns_to_usf(taa_data->thread_nmi_sum),
 634			ns_to_per(total, taa_data->thread_nmi_sum));
 635
 636	/*
 637	 * If it is an IRQ latency, the other factors can be skipped.
 638	 */
 639	if (irq)
 640		goto print_total;
 641
 642	/*
 643	 * Prints the interference caused by IRQs to the thread latency.
 644	 */
 645	if (taa_data->thread_irq_sum) {
 646		printf("  IRQ interference	\t\t\t	%9.2f us (%.2f %%)\n",
 647			ns_to_usf(taa_data->thread_irq_sum),
 648			ns_to_per(total, taa_data->thread_irq_sum));
 649
 650		trace_seq_do_printf(taa_data->irqs_seq);
 651	}
 652
 653	/*
 654	 * Prints the interference caused by Softirqs to the thread latency.
 655	 */
 656	if (taa_data->thread_softirq_sum) {
 657		printf("  Softirq interference	\t\t\t	%9.2f us (%.2f %%)\n",
 658			ns_to_usf(taa_data->thread_softirq_sum),
 659			ns_to_per(total, taa_data->thread_softirq_sum));
 660
 661		trace_seq_do_printf(taa_data->softirqs_seq);
 662	}
 663
 664	/*
 665	 * Prints the interference caused by other threads to the thread latency.
 666	 *
 667	 * If this happens, your timerlat is not the highest prio. OK, migration
 668	 * thread can happen. But otherwise, you are not measuring the "scheduling
 669	 * latency" only, and here is the difference from scheduling latency and
 670	 * timer handling latency.
 671	 */
 672	if (taa_data->thread_thread_sum) {
 673		printf("  Thread interference	\t\t\t	%9.2f us (%.2f %%)\n",
 674			ns_to_usf(taa_data->thread_thread_sum),
 675			ns_to_per(total, taa_data->thread_thread_sum));
 676
 677		trace_seq_do_printf(taa_data->threads_seq);
 678	}
 679
 680	/*
 681	 * Done.
 682	 */
 683print_total:
 684	printf("------------------------------------------------------------------------\n");
 685	printf("  %s latency:	\t\t\t	%9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
 686		ns_to_usf(total));
 687}
 688
 689static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
 690{
 691	struct trace_instance *trace = &taa_ctx->tool->trace;
 692	int retval;
 693
 694	retval = tracefs_iterate_raw_events(trace->tep,
 695					    trace->inst,
 696					    NULL,
 697					    0,
 698					    collect_registered_events,
 699					    trace);
 700		if (retval < 0) {
 701			err_msg("Error iterating on events\n");
 702			return 0;
 703		}
 704
 705	return 1;
 706}
 707
 708/**
 709 * timerlat_auto_analysis - Analyze the collected data
 710 */
 711void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
 712{
 713	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 714	unsigned long long max_exit_from_idle = 0;
 715	struct timerlat_aa_data *taa_data;
 716	int max_exit_from_idle_cpu;
 717	struct tep_handle *tep;
 718	int cpu;
 719
 720	timerlat_auto_analysis_collect_trace(taa_ctx);
 721
 722	/* bring stop tracing to the ns scale */
 723	irq_thresh = irq_thresh * 1000;
 724	thread_thresh = thread_thresh * 1000;
 725
 726	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
 727		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
 728
 729		if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
 730			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
 731			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
 732		} else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
 733			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
 734			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
 735		}
 736
 737		if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
 738			max_exit_from_idle = taa_data->max_exit_idle_latency;
 739			max_exit_from_idle_cpu = cpu;
 740		}
 741
 742	}
 743
 744	if (max_exit_from_idle) {
 745		printf("\n");
 746		printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
 747			ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
 748	}
 749	if (!taa_ctx->dump_tasks)
 750		return;
 751
 752	printf("\n");
 753	printf("Printing CPU tasks:\n");
 754	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
 755		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
 756		tep = taa_ctx->tool->trace.tep;
 757
 758		printf("    [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
 759
 760		if (taa_data->kworker_func)
 761			printf(" kworker:%s:%s",
 762				tep_find_function(tep, taa_data->kworker) ? : "<...>",
 763				tep_find_function(tep, taa_data->kworker_func));
 764		printf("\n");
 765	}
 766
 767}
 768
 769/*
 770 * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
 771 */
 772static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
 773{
 774	struct timerlat_aa_data *taa_data;
 775	int i;
 776
 777	if (!taa_ctx->taa_data)
 778		return;
 779
 780	for (i = 0; i < taa_ctx->nr_cpus; i++) {
 781		taa_data = timerlat_aa_get_data(taa_ctx, i);
 782
 783		if (taa_data->prev_irqs_seq) {
 784			trace_seq_destroy(taa_data->prev_irqs_seq);
 785			free(taa_data->prev_irqs_seq);
 786		}
 787
 788		if (taa_data->nmi_seq) {
 789			trace_seq_destroy(taa_data->nmi_seq);
 790			free(taa_data->nmi_seq);
 791		}
 792
 793		if (taa_data->irqs_seq) {
 794			trace_seq_destroy(taa_data->irqs_seq);
 795			free(taa_data->irqs_seq);
 796		}
 797
 798		if (taa_data->softirqs_seq) {
 799			trace_seq_destroy(taa_data->softirqs_seq);
 800			free(taa_data->softirqs_seq);
 801		}
 802
 803		if (taa_data->threads_seq) {
 804			trace_seq_destroy(taa_data->threads_seq);
 805			free(taa_data->threads_seq);
 806		}
 807
 808		if (taa_data->stack_seq) {
 809			trace_seq_destroy(taa_data->stack_seq);
 810			free(taa_data->stack_seq);
 811		}
 812	}
 813}
 814
 815/*
 816 * timerlat_aa_init_seqs - Init seq files used to store parsed information
 817 *
 818 * Instead of keeping data structures to store raw data, use seq files to
 819 * store parsed data.
 820 *
 821 * Allocates and initialize seq files.
 822 *
 823 * Returns 0 on success, -1 otherwise.
 824 */
 825static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
 826{
 827	struct timerlat_aa_data *taa_data;
 828	int i;
 829
 830	for (i = 0; i < taa_ctx->nr_cpus; i++) {
 831
 832		taa_data = timerlat_aa_get_data(taa_ctx, i);
 833
 834		taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
 835		if (!taa_data->prev_irqs_seq)
 836			goto out_err;
 837
 838		trace_seq_init(taa_data->prev_irqs_seq);
 839
 840		taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
 841		if (!taa_data->nmi_seq)
 842			goto out_err;
 843
 844		trace_seq_init(taa_data->nmi_seq);
 845
 846		taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
 847		if (!taa_data->irqs_seq)
 848			goto out_err;
 849
 850		trace_seq_init(taa_data->irqs_seq);
 851
 852		taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
 853		if (!taa_data->softirqs_seq)
 854			goto out_err;
 855
 856		trace_seq_init(taa_data->softirqs_seq);
 857
 858		taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
 859		if (!taa_data->threads_seq)
 860			goto out_err;
 861
 862		trace_seq_init(taa_data->threads_seq);
 863
 864		taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
 865		if (!taa_data->stack_seq)
 866			goto out_err;
 867
 868		trace_seq_init(taa_data->stack_seq);
 869	}
 870
 871	return 0;
 872
 873out_err:
 874	timerlat_aa_destroy_seqs(taa_ctx);
 875	return -1;
 876}
 877
 878/*
 879 * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
 880 */
 881static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
 882{
 883
 884	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
 885				     timerlat_aa_handler, tool);
 886
 887	tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
 888
 889	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
 890				     timerlat_aa_nmi_handler, tool);
 891
 892	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
 893				     timerlat_aa_irq_handler, tool);
 894
 895	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
 896				     timerlat_aa_softirq_handler, tool);
 897
 898	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
 899				     timerlat_aa_thread_handler, tool);
 900
 901	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
 902				     timerlat_aa_stack_handler, tool);
 903	if (!dump_tasks)
 904		return;
 905
 906	tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
 907	tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
 908				     timerlat_aa_sched_switch_handler, tool);
 909
 910	tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
 911	tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
 912				     timerlat_aa_kworker_start_handler, tool);
 913}
 914
 915/*
 916 * timerlat_aa_register_events - Register events used in the auto-analysis
 917 *
 918 * Returns 0 on success, -1 otherwise.
 919 */
 920static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
 921{
 922	int retval;
 923
 924	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
 925				timerlat_aa_handler, tool);
 926
 927
 928	/*
 929	 * register auto-analysis handlers.
 930	 */
 931	retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
 932	if (retval < 0 && !errno) {
 933		err_msg("Could not find osnoise events\n");
 934		goto out_err;
 935	}
 936
 937	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
 938				   timerlat_aa_nmi_handler, tool);
 939
 940	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
 941				   timerlat_aa_irq_handler, tool);
 942
 943	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
 944				   timerlat_aa_softirq_handler, tool);
 945
 946	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
 947				   timerlat_aa_thread_handler, tool);
 948
 949	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
 950				   timerlat_aa_stack_handler, tool);
 951
 952	if (!dump_tasks)
 953		return 0;
 954
 955	/*
 956	 * Dump task events.
 957	 */
 958	retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
 959	if (retval < 0 && !errno) {
 960		err_msg("Could not find sched_switch\n");
 961		goto out_err;
 962	}
 963
 964	tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
 965				   timerlat_aa_sched_switch_handler, tool);
 966
 967	retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
 968	if (retval < 0 && !errno) {
 969		err_msg("Could not find workqueue_execute_start\n");
 970		goto out_err;
 971	}
 972
 973	tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
 974				   timerlat_aa_kworker_start_handler, tool);
 975
 976	return 0;
 977
 978out_err:
 979	timerlat_aa_unregister_events(tool, dump_tasks);
 980	return -1;
 981}
 982
 983/**
 984 * timerlat_aa_destroy - Destroy timerlat auto-analysis
 985 */
 986void timerlat_aa_destroy(void)
 987{
 988	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 989
 990	if (!taa_ctx)
 991		return;
 992
 993	if (!taa_ctx->taa_data)
 994		goto out_ctx;
 995
 996	timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
 997	timerlat_aa_destroy_seqs(taa_ctx);
 998	free(taa_ctx->taa_data);
 999out_ctx:
1000	free(taa_ctx);
1001}
1002
1003/**
1004 * timerlat_aa_init - Initialize timerlat auto-analysis
1005 *
1006 * Returns 0 on success, -1 otherwise.
1007 */
1008int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
1009{
1010	int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
1011	struct timerlat_aa_context *taa_ctx;
1012	int retval;
1013
1014	taa_ctx = calloc(1, sizeof(*taa_ctx));
1015	if (!taa_ctx)
1016		return -1;
1017
1018	__timerlat_aa_ctx = taa_ctx;
1019
1020	taa_ctx->nr_cpus = nr_cpus;
1021	taa_ctx->tool = tool;
1022	taa_ctx->dump_tasks = dump_tasks;
1023
1024	taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1025	if (!taa_ctx->taa_data)
1026		goto out_err;
1027
1028	retval = timerlat_aa_init_seqs(taa_ctx);
1029	if (retval)
1030		goto out_err;
1031
1032	retval = timerlat_aa_register_events(tool, dump_tasks);
1033	if (retval)
1034		goto out_err;
1035
1036	return 0;
1037
1038out_err:
1039	timerlat_aa_destroy();
1040	return -1;
1041}