Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.5.6.
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Arm Statistical Profiling Extensions (SPE) support
   4 * Copyright (c) 2017-2018, Arm Ltd.
   5 */
   6
   7#include <byteswap.h>
   8#include <endian.h>
   9#include <errno.h>
  10#include <inttypes.h>
  11#include <linux/bitops.h>
  12#include <linux/kernel.h>
  13#include <linux/log2.h>
  14#include <linux/types.h>
  15#include <linux/zalloc.h>
  16#include <stdlib.h>
  17#include <unistd.h>
  18
  19#include "auxtrace.h"
  20#include "color.h"
  21#include "debug.h"
  22#include "evlist.h"
  23#include "evsel.h"
  24#include "machine.h"
  25#include "session.h"
  26#include "symbol.h"
  27#include "thread.h"
  28#include "thread-stack.h"
  29#include "tsc.h"
  30#include "tool.h"
  31#include "util/synthetic-events.h"
  32
  33#include "arm-spe.h"
  34#include "arm-spe-decoder/arm-spe-decoder.h"
  35#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
  36
  37#define MAX_TIMESTAMP (~0ULL)
  38
  39struct arm_spe {
  40	struct auxtrace			auxtrace;
  41	struct auxtrace_queues		queues;
  42	struct auxtrace_heap		heap;
  43	struct itrace_synth_opts        synth_opts;
  44	u32				auxtrace_type;
  45	struct perf_session		*session;
  46	struct machine			*machine;
  47	u32				pmu_type;
  48
  49	struct perf_tsc_conversion	tc;
  50
  51	u8				timeless_decoding;
  52	u8				data_queued;
  53
  54	u8				sample_flc;
  55	u8				sample_llc;
  56	u8				sample_tlb;
  57	u8				sample_branch;
  58	u8				sample_remote_access;
  59	u8				sample_memory;
  60
  61	u64				l1d_miss_id;
  62	u64				l1d_access_id;
  63	u64				llc_miss_id;
  64	u64				llc_access_id;
  65	u64				tlb_miss_id;
  66	u64				tlb_access_id;
  67	u64				branch_miss_id;
  68	u64				remote_access_id;
  69	u64				memory_id;
  70
  71	u64				kernel_start;
  72
  73	unsigned long			num_events;
  74};
  75
  76struct arm_spe_queue {
  77	struct arm_spe			*spe;
  78	unsigned int			queue_nr;
  79	struct auxtrace_buffer		*buffer;
  80	struct auxtrace_buffer		*old_buffer;
  81	union perf_event		*event_buf;
  82	bool				on_heap;
  83	bool				done;
  84	pid_t				pid;
  85	pid_t				tid;
  86	int				cpu;
  87	struct arm_spe_decoder		*decoder;
  88	u64				time;
  89	u64				timestamp;
  90	struct thread			*thread;
  91};
  92
  93static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
  94			 unsigned char *buf, size_t len)
  95{
  96	struct arm_spe_pkt packet;
  97	size_t pos = 0;
  98	int ret, pkt_len, i;
  99	char desc[ARM_SPE_PKT_DESC_MAX];
 100	const char *color = PERF_COLOR_BLUE;
 101
 102	color_fprintf(stdout, color,
 103		      ". ... ARM SPE data: size %zu bytes\n",
 104		      len);
 105
 106	while (len) {
 107		ret = arm_spe_get_packet(buf, len, &packet);
 108		if (ret > 0)
 109			pkt_len = ret;
 110		else
 111			pkt_len = 1;
 112		printf(".");
 113		color_fprintf(stdout, color, "  %08x: ", pos);
 114		for (i = 0; i < pkt_len; i++)
 115			color_fprintf(stdout, color, " %02x", buf[i]);
 116		for (; i < 16; i++)
 117			color_fprintf(stdout, color, "   ");
 118		if (ret > 0) {
 119			ret = arm_spe_pkt_desc(&packet, desc,
 120					       ARM_SPE_PKT_DESC_MAX);
 121			if (!ret)
 122				color_fprintf(stdout, color, " %s\n", desc);
 123		} else {
 124			color_fprintf(stdout, color, " Bad packet!\n");
 125		}
 126		pos += pkt_len;
 127		buf += pkt_len;
 128		len -= pkt_len;
 129	}
 130}
 131
 132static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
 133			       size_t len)
 134{
 135	printf(".\n");
 136	arm_spe_dump(spe, buf, len);
 137}
 138
 139static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
 140{
 141	struct arm_spe_queue *speq = data;
 142	struct auxtrace_buffer *buffer = speq->buffer;
 143	struct auxtrace_buffer *old_buffer = speq->old_buffer;
 144	struct auxtrace_queue *queue;
 145
 146	queue = &speq->spe->queues.queue_array[speq->queue_nr];
 147
 148	buffer = auxtrace_buffer__next(queue, buffer);
 149	/* If no more data, drop the previous auxtrace_buffer and return */
 150	if (!buffer) {
 151		if (old_buffer)
 152			auxtrace_buffer__drop_data(old_buffer);
 153		b->len = 0;
 154		return 0;
 155	}
 156
 157	speq->buffer = buffer;
 158
 159	/* If the aux_buffer doesn't have data associated, try to load it */
 160	if (!buffer->data) {
 161		/* get the file desc associated with the perf data file */
 162		int fd = perf_data__fd(speq->spe->session->data);
 163
 164		buffer->data = auxtrace_buffer__get_data(buffer, fd);
 165		if (!buffer->data)
 166			return -ENOMEM;
 167	}
 168
 169	b->len = buffer->size;
 170	b->buf = buffer->data;
 171
 172	if (b->len) {
 173		if (old_buffer)
 174			auxtrace_buffer__drop_data(old_buffer);
 175		speq->old_buffer = buffer;
 176	} else {
 177		auxtrace_buffer__drop_data(buffer);
 178		return arm_spe_get_trace(b, data);
 179	}
 180
 181	return 0;
 182}
 183
 184static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
 185		unsigned int queue_nr)
 186{
 187	struct arm_spe_params params = { .get_trace = 0, };
 188	struct arm_spe_queue *speq;
 189
 190	speq = zalloc(sizeof(*speq));
 191	if (!speq)
 192		return NULL;
 193
 194	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
 195	if (!speq->event_buf)
 196		goto out_free;
 197
 198	speq->spe = spe;
 199	speq->queue_nr = queue_nr;
 200	speq->pid = -1;
 201	speq->tid = -1;
 202	speq->cpu = -1;
 203
 204	/* params set */
 205	params.get_trace = arm_spe_get_trace;
 206	params.data = speq;
 207
 208	/* create new decoder */
 209	speq->decoder = arm_spe_decoder_new(&params);
 210	if (!speq->decoder)
 211		goto out_free;
 212
 213	return speq;
 214
 215out_free:
 216	zfree(&speq->event_buf);
 217	free(speq);
 218
 219	return NULL;
 220}
 221
 222static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
 223{
 224	return ip >= spe->kernel_start ?
 225		PERF_RECORD_MISC_KERNEL :
 226		PERF_RECORD_MISC_USER;
 227}
 228
 229static void arm_spe_prep_sample(struct arm_spe *spe,
 230				struct arm_spe_queue *speq,
 231				union perf_event *event,
 232				struct perf_sample *sample)
 233{
 234	struct arm_spe_record *record = &speq->decoder->record;
 235
 236	if (!spe->timeless_decoding)
 237		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
 238
 239	sample->ip = record->from_ip;
 240	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
 241	sample->pid = speq->pid;
 242	sample->tid = speq->tid;
 243	sample->period = 1;
 244	sample->cpu = speq->cpu;
 245
 246	event->sample.header.type = PERF_RECORD_SAMPLE;
 247	event->sample.header.misc = sample->cpumode;
 248	event->sample.header.size = sizeof(struct perf_event_header);
 249}
 250
 251static inline int
 252arm_spe_deliver_synth_event(struct arm_spe *spe,
 253			    struct arm_spe_queue *speq __maybe_unused,
 254			    union perf_event *event,
 255			    struct perf_sample *sample)
 256{
 257	int ret;
 258
 259	ret = perf_session__deliver_synth_event(spe->session, event, sample);
 260	if (ret)
 261		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
 262
 263	return ret;
 264}
 265
 266static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
 267				     u64 spe_events_id, u64 data_src)
 268{
 269	struct arm_spe *spe = speq->spe;
 270	struct arm_spe_record *record = &speq->decoder->record;
 271	union perf_event *event = speq->event_buf;
 272	struct perf_sample sample = { .ip = 0, };
 273
 274	arm_spe_prep_sample(spe, speq, event, &sample);
 275
 276	sample.id = spe_events_id;
 277	sample.stream_id = spe_events_id;
 278	sample.addr = record->virt_addr;
 279	sample.phys_addr = record->phys_addr;
 280	sample.data_src = data_src;
 281
 282	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 283}
 284
 285static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
 286					u64 spe_events_id)
 287{
 288	struct arm_spe *spe = speq->spe;
 289	struct arm_spe_record *record = &speq->decoder->record;
 290	union perf_event *event = speq->event_buf;
 291	struct perf_sample sample = { .ip = 0, };
 292
 293	arm_spe_prep_sample(spe, speq, event, &sample);
 294
 295	sample.id = spe_events_id;
 296	sample.stream_id = spe_events_id;
 297	sample.addr = record->to_ip;
 298
 299	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 300}
 301
 302#define SPE_MEM_TYPE	(ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
 303			 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
 304			 ARM_SPE_REMOTE_ACCESS)
 305
 306static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
 307{
 308	if (type & SPE_MEM_TYPE)
 309		return true;
 310
 311	return false;
 312}
 313
 314static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
 315{
 316	union perf_mem_data_src	data_src = { 0 };
 317
 318	if (record->op == ARM_SPE_LD)
 319		data_src.mem_op = PERF_MEM_OP_LOAD;
 320	else
 321		data_src.mem_op = PERF_MEM_OP_STORE;
 322
 323	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
 324		data_src.mem_lvl = PERF_MEM_LVL_L3;
 325
 326		if (record->type & ARM_SPE_LLC_MISS)
 327			data_src.mem_lvl |= PERF_MEM_LVL_MISS;
 328		else
 329			data_src.mem_lvl |= PERF_MEM_LVL_HIT;
 330	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
 331		data_src.mem_lvl = PERF_MEM_LVL_L1;
 332
 333		if (record->type & ARM_SPE_L1D_MISS)
 334			data_src.mem_lvl |= PERF_MEM_LVL_MISS;
 335		else
 336			data_src.mem_lvl |= PERF_MEM_LVL_HIT;
 337	}
 338
 339	if (record->type & ARM_SPE_REMOTE_ACCESS)
 340		data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
 341
 342	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
 343		data_src.mem_dtlb = PERF_MEM_TLB_WK;
 344
 345		if (record->type & ARM_SPE_TLB_MISS)
 346			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
 347		else
 348			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
 349	}
 350
 351	return data_src.val;
 352}
 353
 354static int arm_spe_sample(struct arm_spe_queue *speq)
 355{
 356	const struct arm_spe_record *record = &speq->decoder->record;
 357	struct arm_spe *spe = speq->spe;
 358	u64 data_src;
 359	int err;
 360
 361	data_src = arm_spe__synth_data_source(record);
 362
 363	if (spe->sample_flc) {
 364		if (record->type & ARM_SPE_L1D_MISS) {
 365			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
 366							data_src);
 367			if (err)
 368				return err;
 369		}
 370
 371		if (record->type & ARM_SPE_L1D_ACCESS) {
 372			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
 373							data_src);
 374			if (err)
 375				return err;
 376		}
 377	}
 378
 379	if (spe->sample_llc) {
 380		if (record->type & ARM_SPE_LLC_MISS) {
 381			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
 382							data_src);
 383			if (err)
 384				return err;
 385		}
 386
 387		if (record->type & ARM_SPE_LLC_ACCESS) {
 388			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
 389							data_src);
 390			if (err)
 391				return err;
 392		}
 393	}
 394
 395	if (spe->sample_tlb) {
 396		if (record->type & ARM_SPE_TLB_MISS) {
 397			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
 398							data_src);
 399			if (err)
 400				return err;
 401		}
 402
 403		if (record->type & ARM_SPE_TLB_ACCESS) {
 404			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
 405							data_src);
 406			if (err)
 407				return err;
 408		}
 409	}
 410
 411	if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
 412		err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
 413		if (err)
 414			return err;
 415	}
 416
 417	if (spe->sample_remote_access &&
 418	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
 419		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
 420						data_src);
 421		if (err)
 422			return err;
 423	}
 424
 425	if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
 426		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
 427		if (err)
 428			return err;
 429	}
 430
 431	return 0;
 432}
 433
 434static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
 435{
 436	struct arm_spe *spe = speq->spe;
 437	struct arm_spe_record *record;
 438	int ret;
 439
 440	if (!spe->kernel_start)
 441		spe->kernel_start = machine__kernel_start(spe->machine);
 442
 443	while (1) {
 444		/*
 445		 * The usual logic is firstly to decode the packets, and then
 446		 * based the record to synthesize sample; but here the flow is
 447		 * reversed: it calls arm_spe_sample() for synthesizing samples
 448		 * prior to arm_spe_decode().
 449		 *
 450		 * Two reasons for this code logic:
 451		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
 452		 * has decoded trace data and generated a record, but the record
 453		 * is left to generate sample until run to here, so it's correct
 454		 * to synthesize sample for the left record.
 455		 * 2. After decoding trace data, it needs to compare the record
 456		 * timestamp with the coming perf event, if the record timestamp
 457		 * is later than the perf event, it needs bail out and pushs the
 458		 * record into auxtrace heap, thus the record can be deferred to
 459		 * synthesize sample until run to here at the next time; so this
 460		 * can correlate samples between Arm SPE trace data and other
 461		 * perf events with correct time ordering.
 462		 */
 463		ret = arm_spe_sample(speq);
 464		if (ret)
 465			return ret;
 466
 467		ret = arm_spe_decode(speq->decoder);
 468		if (!ret) {
 469			pr_debug("No data or all data has been processed.\n");
 470			return 1;
 471		}
 472
 473		/*
 474		 * Error is detected when decode SPE trace data, continue to
 475		 * the next trace data and find out more records.
 476		 */
 477		if (ret < 0)
 478			continue;
 479
 480		record = &speq->decoder->record;
 481
 482		/* Update timestamp for the last record */
 483		if (record->timestamp > speq->timestamp)
 484			speq->timestamp = record->timestamp;
 485
 486		/*
 487		 * If the timestamp of the queue is later than timestamp of the
 488		 * coming perf event, bail out so can allow the perf event to
 489		 * be processed ahead.
 490		 */
 491		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
 492			*timestamp = speq->timestamp;
 493			return 0;
 494		}
 495	}
 496
 497	return 0;
 498}
 499
 500static int arm_spe__setup_queue(struct arm_spe *spe,
 501			       struct auxtrace_queue *queue,
 502			       unsigned int queue_nr)
 503{
 504	struct arm_spe_queue *speq = queue->priv;
 505	struct arm_spe_record *record;
 506
 507	if (list_empty(&queue->head) || speq)
 508		return 0;
 509
 510	speq = arm_spe__alloc_queue(spe, queue_nr);
 511
 512	if (!speq)
 513		return -ENOMEM;
 514
 515	queue->priv = speq;
 516
 517	if (queue->cpu != -1)
 518		speq->cpu = queue->cpu;
 519
 520	if (!speq->on_heap) {
 521		int ret;
 522
 523		if (spe->timeless_decoding)
 524			return 0;
 525
 526retry:
 527		ret = arm_spe_decode(speq->decoder);
 528
 529		if (!ret)
 530			return 0;
 531
 532		if (ret < 0)
 533			goto retry;
 534
 535		record = &speq->decoder->record;
 536
 537		speq->timestamp = record->timestamp;
 538		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
 539		if (ret)
 540			return ret;
 541		speq->on_heap = true;
 542	}
 543
 544	return 0;
 545}
 546
 547static int arm_spe__setup_queues(struct arm_spe *spe)
 548{
 549	unsigned int i;
 550	int ret;
 551
 552	for (i = 0; i < spe->queues.nr_queues; i++) {
 553		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
 554		if (ret)
 555			return ret;
 556	}
 557
 558	return 0;
 559}
 560
 561static int arm_spe__update_queues(struct arm_spe *spe)
 562{
 563	if (spe->queues.new_data) {
 564		spe->queues.new_data = false;
 565		return arm_spe__setup_queues(spe);
 566	}
 567
 568	return 0;
 569}
 570
 571static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
 572{
 573	struct evsel *evsel;
 574	struct evlist *evlist = spe->session->evlist;
 575	bool timeless_decoding = true;
 576
 577	/*
 578	 * Circle through the list of event and complain if we find one
 579	 * with the time bit set.
 580	 */
 581	evlist__for_each_entry(evlist, evsel) {
 582		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
 583			timeless_decoding = false;
 584	}
 585
 586	return timeless_decoding;
 587}
 588
 589static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
 590				    struct auxtrace_queue *queue)
 591{
 592	struct arm_spe_queue *speq = queue->priv;
 593	pid_t tid;
 594
 595	tid = machine__get_current_tid(spe->machine, speq->cpu);
 596	if (tid != -1) {
 597		speq->tid = tid;
 598		thread__zput(speq->thread);
 599	} else
 600		speq->tid = queue->tid;
 601
 602	if ((!speq->thread) && (speq->tid != -1)) {
 603		speq->thread = machine__find_thread(spe->machine, -1,
 604						    speq->tid);
 605	}
 606
 607	if (speq->thread) {
 608		speq->pid = speq->thread->pid_;
 609		if (queue->cpu == -1)
 610			speq->cpu = speq->thread->cpu;
 611	}
 612}
 613
 614static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
 615{
 616	unsigned int queue_nr;
 617	u64 ts;
 618	int ret;
 619
 620	while (1) {
 621		struct auxtrace_queue *queue;
 622		struct arm_spe_queue *speq;
 623
 624		if (!spe->heap.heap_cnt)
 625			return 0;
 626
 627		if (spe->heap.heap_array[0].ordinal >= timestamp)
 628			return 0;
 629
 630		queue_nr = spe->heap.heap_array[0].queue_nr;
 631		queue = &spe->queues.queue_array[queue_nr];
 632		speq = queue->priv;
 633
 634		auxtrace_heap__pop(&spe->heap);
 635
 636		if (spe->heap.heap_cnt) {
 637			ts = spe->heap.heap_array[0].ordinal + 1;
 638			if (ts > timestamp)
 639				ts = timestamp;
 640		} else {
 641			ts = timestamp;
 642		}
 643
 644		arm_spe_set_pid_tid_cpu(spe, queue);
 645
 646		ret = arm_spe_run_decoder(speq, &ts);
 647		if (ret < 0) {
 648			auxtrace_heap__add(&spe->heap, queue_nr, ts);
 649			return ret;
 650		}
 651
 652		if (!ret) {
 653			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
 654			if (ret < 0)
 655				return ret;
 656		} else {
 657			speq->on_heap = false;
 658		}
 659	}
 660
 661	return 0;
 662}
 663
 664static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
 665					    u64 time_)
 666{
 667	struct auxtrace_queues *queues = &spe->queues;
 668	unsigned int i;
 669	u64 ts = 0;
 670
 671	for (i = 0; i < queues->nr_queues; i++) {
 672		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
 673		struct arm_spe_queue *speq = queue->priv;
 674
 675		if (speq && (tid == -1 || speq->tid == tid)) {
 676			speq->time = time_;
 677			arm_spe_set_pid_tid_cpu(spe, queue);
 678			arm_spe_run_decoder(speq, &ts);
 679		}
 680	}
 681	return 0;
 682}
 683
 684static int arm_spe_process_event(struct perf_session *session,
 685				 union perf_event *event,
 686				 struct perf_sample *sample,
 687				 struct perf_tool *tool)
 688{
 689	int err = 0;
 690	u64 timestamp;
 691	struct arm_spe *spe = container_of(session->auxtrace,
 692			struct arm_spe, auxtrace);
 693
 694	if (dump_trace)
 695		return 0;
 696
 697	if (!tool->ordered_events) {
 698		pr_err("SPE trace requires ordered events\n");
 699		return -EINVAL;
 700	}
 701
 702	if (sample->time && (sample->time != (u64) -1))
 703		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
 704	else
 705		timestamp = 0;
 706
 707	if (timestamp || spe->timeless_decoding) {
 708		err = arm_spe__update_queues(spe);
 709		if (err)
 710			return err;
 711	}
 712
 713	if (spe->timeless_decoding) {
 714		if (event->header.type == PERF_RECORD_EXIT) {
 715			err = arm_spe_process_timeless_queues(spe,
 716					event->fork.tid,
 717					sample->time);
 718		}
 719	} else if (timestamp) {
 720		err = arm_spe_process_queues(spe, timestamp);
 721	}
 722
 723	return err;
 724}
 725
 726static int arm_spe_process_auxtrace_event(struct perf_session *session,
 727					  union perf_event *event,
 728					  struct perf_tool *tool __maybe_unused)
 729{
 730	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
 731					     auxtrace);
 732
 733	if (!spe->data_queued) {
 734		struct auxtrace_buffer *buffer;
 735		off_t data_offset;
 736		int fd = perf_data__fd(session->data);
 737		int err;
 738
 739		if (perf_data__is_pipe(session->data)) {
 740			data_offset = 0;
 741		} else {
 742			data_offset = lseek(fd, 0, SEEK_CUR);
 743			if (data_offset == -1)
 744				return -errno;
 745		}
 746
 747		err = auxtrace_queues__add_event(&spe->queues, session, event,
 748				data_offset, &buffer);
 749		if (err)
 750			return err;
 751
 752		/* Dump here now we have copied a piped trace out of the pipe */
 753		if (dump_trace) {
 754			if (auxtrace_buffer__get_data(buffer, fd)) {
 755				arm_spe_dump_event(spe, buffer->data,
 756						buffer->size);
 757				auxtrace_buffer__put_data(buffer);
 758			}
 759		}
 760	}
 761
 762	return 0;
 763}
 764
 765static int arm_spe_flush(struct perf_session *session __maybe_unused,
 766			 struct perf_tool *tool __maybe_unused)
 767{
 768	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
 769			auxtrace);
 770	int ret;
 771
 772	if (dump_trace)
 773		return 0;
 774
 775	if (!tool->ordered_events)
 776		return -EINVAL;
 777
 778	ret = arm_spe__update_queues(spe);
 779	if (ret < 0)
 780		return ret;
 781
 782	if (spe->timeless_decoding)
 783		return arm_spe_process_timeless_queues(spe, -1,
 784				MAX_TIMESTAMP - 1);
 785
 786	return arm_spe_process_queues(spe, MAX_TIMESTAMP);
 787}
 788
 789static void arm_spe_free_queue(void *priv)
 790{
 791	struct arm_spe_queue *speq = priv;
 792
 793	if (!speq)
 794		return;
 795	thread__zput(speq->thread);
 796	arm_spe_decoder_free(speq->decoder);
 797	zfree(&speq->event_buf);
 798	free(speq);
 799}
 800
 801static void arm_spe_free_events(struct perf_session *session)
 802{
 803	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
 804					     auxtrace);
 805	struct auxtrace_queues *queues = &spe->queues;
 806	unsigned int i;
 807
 808	for (i = 0; i < queues->nr_queues; i++) {
 809		arm_spe_free_queue(queues->queue_array[i].priv);
 810		queues->queue_array[i].priv = NULL;
 811	}
 812	auxtrace_queues__free(queues);
 813}
 814
 815static void arm_spe_free(struct perf_session *session)
 816{
 817	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
 818					     auxtrace);
 819
 820	auxtrace_heap__free(&spe->heap);
 821	arm_spe_free_events(session);
 822	session->auxtrace = NULL;
 823	free(spe);
 824}
 825
 826static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
 827				      struct evsel *evsel)
 828{
 829	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
 830
 831	return evsel->core.attr.type == spe->pmu_type;
 832}
 833
 834static const char * const arm_spe_info_fmts[] = {
 835	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
 836};
 837
 838static void arm_spe_print_info(__u64 *arr)
 839{
 840	if (!dump_trace)
 841		return;
 842
 843	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
 844}
 845
 846struct arm_spe_synth {
 847	struct perf_tool dummy_tool;
 848	struct perf_session *session;
 849};
 850
 851static int arm_spe_event_synth(struct perf_tool *tool,
 852			       union perf_event *event,
 853			       struct perf_sample *sample __maybe_unused,
 854			       struct machine *machine __maybe_unused)
 855{
 856	struct arm_spe_synth *arm_spe_synth =
 857		      container_of(tool, struct arm_spe_synth, dummy_tool);
 858
 859	return perf_session__deliver_synth_event(arm_spe_synth->session,
 860						 event, NULL);
 861}
 862
 863static int arm_spe_synth_event(struct perf_session *session,
 864			       struct perf_event_attr *attr, u64 id)
 865{
 866	struct arm_spe_synth arm_spe_synth;
 867
 868	memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
 869	arm_spe_synth.session = session;
 870
 871	return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
 872					   &id, arm_spe_event_synth);
 873}
 874
 875static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
 876				    const char *name)
 877{
 878	struct evsel *evsel;
 879
 880	evlist__for_each_entry(evlist, evsel) {
 881		if (evsel->core.id && evsel->core.id[0] == id) {
 882			if (evsel->name)
 883				zfree(&evsel->name);
 884			evsel->name = strdup(name);
 885			break;
 886		}
 887	}
 888}
 889
 890static int
 891arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
 892{
 893	struct evlist *evlist = session->evlist;
 894	struct evsel *evsel;
 895	struct perf_event_attr attr;
 896	bool found = false;
 897	u64 id;
 898	int err;
 899
 900	evlist__for_each_entry(evlist, evsel) {
 901		if (evsel->core.attr.type == spe->pmu_type) {
 902			found = true;
 903			break;
 904		}
 905	}
 906
 907	if (!found) {
 908		pr_debug("No selected events with SPE trace data\n");
 909		return 0;
 910	}
 911
 912	memset(&attr, 0, sizeof(struct perf_event_attr));
 913	attr.size = sizeof(struct perf_event_attr);
 914	attr.type = PERF_TYPE_HARDWARE;
 915	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
 916	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
 917			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
 918	if (spe->timeless_decoding)
 919		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
 920	else
 921		attr.sample_type |= PERF_SAMPLE_TIME;
 922
 923	attr.exclude_user = evsel->core.attr.exclude_user;
 924	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
 925	attr.exclude_hv = evsel->core.attr.exclude_hv;
 926	attr.exclude_host = evsel->core.attr.exclude_host;
 927	attr.exclude_guest = evsel->core.attr.exclude_guest;
 928	attr.sample_id_all = evsel->core.attr.sample_id_all;
 929	attr.read_format = evsel->core.attr.read_format;
 930
 931	/* create new id val to be a fixed offset from evsel id */
 932	id = evsel->core.id[0] + 1000000000;
 933
 934	if (!id)
 935		id = 1;
 936
 937	if (spe->synth_opts.flc) {
 938		spe->sample_flc = true;
 939
 940		/* Level 1 data cache miss */
 941		err = arm_spe_synth_event(session, &attr, id);
 942		if (err)
 943			return err;
 944		spe->l1d_miss_id = id;
 945		arm_spe_set_event_name(evlist, id, "l1d-miss");
 946		id += 1;
 947
 948		/* Level 1 data cache access */
 949		err = arm_spe_synth_event(session, &attr, id);
 950		if (err)
 951			return err;
 952		spe->l1d_access_id = id;
 953		arm_spe_set_event_name(evlist, id, "l1d-access");
 954		id += 1;
 955	}
 956
 957	if (spe->synth_opts.llc) {
 958		spe->sample_llc = true;
 959
 960		/* Last level cache miss */
 961		err = arm_spe_synth_event(session, &attr, id);
 962		if (err)
 963			return err;
 964		spe->llc_miss_id = id;
 965		arm_spe_set_event_name(evlist, id, "llc-miss");
 966		id += 1;
 967
 968		/* Last level cache access */
 969		err = arm_spe_synth_event(session, &attr, id);
 970		if (err)
 971			return err;
 972		spe->llc_access_id = id;
 973		arm_spe_set_event_name(evlist, id, "llc-access");
 974		id += 1;
 975	}
 976
 977	if (spe->synth_opts.tlb) {
 978		spe->sample_tlb = true;
 979
 980		/* TLB miss */
 981		err = arm_spe_synth_event(session, &attr, id);
 982		if (err)
 983			return err;
 984		spe->tlb_miss_id = id;
 985		arm_spe_set_event_name(evlist, id, "tlb-miss");
 986		id += 1;
 987
 988		/* TLB access */
 989		err = arm_spe_synth_event(session, &attr, id);
 990		if (err)
 991			return err;
 992		spe->tlb_access_id = id;
 993		arm_spe_set_event_name(evlist, id, "tlb-access");
 994		id += 1;
 995	}
 996
 997	if (spe->synth_opts.branches) {
 998		spe->sample_branch = true;
 999
1000		/* Branch miss */
1001		err = arm_spe_synth_event(session, &attr, id);
1002		if (err)
1003			return err;
1004		spe->branch_miss_id = id;
1005		arm_spe_set_event_name(evlist, id, "branch-miss");
1006		id += 1;
1007	}
1008
1009	if (spe->synth_opts.remote_access) {
1010		spe->sample_remote_access = true;
1011
1012		/* Remote access */
1013		err = arm_spe_synth_event(session, &attr, id);
1014		if (err)
1015			return err;
1016		spe->remote_access_id = id;
1017		arm_spe_set_event_name(evlist, id, "remote-access");
1018		id += 1;
1019	}
1020
1021	if (spe->synth_opts.mem) {
1022		spe->sample_memory = true;
1023
1024		err = arm_spe_synth_event(session, &attr, id);
1025		if (err)
1026			return err;
1027		spe->memory_id = id;
1028		arm_spe_set_event_name(evlist, id, "memory");
1029	}
1030
1031	return 0;
1032}
1033
1034int arm_spe_process_auxtrace_info(union perf_event *event,
1035				  struct perf_session *session)
1036{
1037	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1038	size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1039	struct perf_record_time_conv *tc = &session->time_conv;
1040	struct arm_spe *spe;
1041	int err;
1042
1043	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1044					min_sz)
1045		return -EINVAL;
1046
1047	spe = zalloc(sizeof(struct arm_spe));
1048	if (!spe)
1049		return -ENOMEM;
1050
1051	err = auxtrace_queues__init(&spe->queues);
1052	if (err)
1053		goto err_free;
1054
1055	spe->session = session;
1056	spe->machine = &session->machines.host; /* No kvm support */
1057	spe->auxtrace_type = auxtrace_info->type;
1058	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1059
1060	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1061
1062	/*
1063	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1064	 * and the parameters for hardware clock are stored in the session
1065	 * context.  Passes these parameters to the struct perf_tsc_conversion
1066	 * in "spe->tc", which is used for later conversion between clock
1067	 * counter and timestamp.
1068	 *
1069	 * For backward compatibility, copies the fields starting from
1070	 * "time_cycles" only if they are contained in the event.
1071	 */
1072	spe->tc.time_shift = tc->time_shift;
1073	spe->tc.time_mult = tc->time_mult;
1074	spe->tc.time_zero = tc->time_zero;
1075
1076	if (event_contains(*tc, time_cycles)) {
1077		spe->tc.time_cycles = tc->time_cycles;
1078		spe->tc.time_mask = tc->time_mask;
1079		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1080		spe->tc.cap_user_time_short = tc->cap_user_time_short;
1081	}
1082
1083	spe->auxtrace.process_event = arm_spe_process_event;
1084	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1085	spe->auxtrace.flush_events = arm_spe_flush;
1086	spe->auxtrace.free_events = arm_spe_free_events;
1087	spe->auxtrace.free = arm_spe_free;
1088	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1089	session->auxtrace = &spe->auxtrace;
1090
1091	arm_spe_print_info(&auxtrace_info->priv[0]);
1092
1093	if (dump_trace)
1094		return 0;
1095
1096	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1097		spe->synth_opts = *session->itrace_synth_opts;
1098	else
1099		itrace_synth_opts__set_default(&spe->synth_opts, false);
1100
1101	err = arm_spe_synth_events(spe, session);
1102	if (err)
1103		goto err_free_queues;
1104
1105	err = auxtrace_queues__process_index(&spe->queues, session);
1106	if (err)
1107		goto err_free_queues;
1108
1109	if (spe->queues.populated)
1110		spe->data_queued = true;
1111
1112	return 0;
1113
1114err_free_queues:
1115	auxtrace_queues__free(&spe->queues);
1116	session->auxtrace = NULL;
1117err_free:
1118	free(spe);
1119	return err;
1120}