Linux Audio

Check our new training course

Linux BSP development engineering services

Need help to port Linux and bootloaders to your hardware?
Loading...
v6.2
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Arm Statistical Profiling Extensions (SPE) support
   4 * Copyright (c) 2017-2018, Arm Ltd.
   5 */
   6
   7#include <byteswap.h>
   8#include <endian.h>
   9#include <errno.h>
  10#include <inttypes.h>
  11#include <linux/bitops.h>
  12#include <linux/kernel.h>
  13#include <linux/log2.h>
  14#include <linux/types.h>
  15#include <linux/zalloc.h>
  16#include <stdlib.h>
  17#include <unistd.h>
  18
  19#include "auxtrace.h"
  20#include "color.h"
  21#include "debug.h"
  22#include "evlist.h"
  23#include "evsel.h"
  24#include "machine.h"
  25#include "session.h"
  26#include "symbol.h"
  27#include "thread.h"
  28#include "thread-stack.h"
  29#include "tsc.h"
  30#include "tool.h"
  31#include "util/synthetic-events.h"
  32
  33#include "arm-spe.h"
  34#include "arm-spe-decoder/arm-spe-decoder.h"
  35#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
  36
  37#include "../../arch/arm64/include/asm/cputype.h"
  38#define MAX_TIMESTAMP (~0ULL)
  39
  40struct arm_spe {
  41	struct auxtrace			auxtrace;
  42	struct auxtrace_queues		queues;
  43	struct auxtrace_heap		heap;
  44	struct itrace_synth_opts        synth_opts;
  45	u32				auxtrace_type;
  46	struct perf_session		*session;
  47	struct machine			*machine;
  48	u32				pmu_type;
  49	u64				midr;
  50
  51	struct perf_tsc_conversion	tc;
  52
  53	u8				timeless_decoding;
  54	u8				data_queued;
  55
  56	u64				sample_type;
  57	u8				sample_flc;
  58	u8				sample_llc;
  59	u8				sample_tlb;
  60	u8				sample_branch;
  61	u8				sample_remote_access;
  62	u8				sample_memory;
  63	u8				sample_instructions;
  64	u64				instructions_sample_period;
  65
  66	u64				l1d_miss_id;
  67	u64				l1d_access_id;
  68	u64				llc_miss_id;
  69	u64				llc_access_id;
  70	u64				tlb_miss_id;
  71	u64				tlb_access_id;
  72	u64				branch_miss_id;
  73	u64				remote_access_id;
  74	u64				memory_id;
  75	u64				instructions_id;
  76
  77	u64				kernel_start;
  78
  79	unsigned long			num_events;
  80	u8				use_ctx_pkt_for_pid;
 
 
 
 
 
  81};
  82
  83struct arm_spe_queue {
  84	struct arm_spe			*spe;
  85	unsigned int			queue_nr;
  86	struct auxtrace_buffer		*buffer;
  87	struct auxtrace_buffer		*old_buffer;
  88	union perf_event		*event_buf;
  89	bool				on_heap;
  90	bool				done;
  91	pid_t				pid;
  92	pid_t				tid;
  93	int				cpu;
  94	struct arm_spe_decoder		*decoder;
  95	u64				time;
  96	u64				timestamp;
  97	struct thread			*thread;
  98	u64				period_instructions;
 
  99};
 100
 101static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
 102			 unsigned char *buf, size_t len)
 103{
 104	struct arm_spe_pkt packet;
 105	size_t pos = 0;
 106	int ret, pkt_len, i;
 107	char desc[ARM_SPE_PKT_DESC_MAX];
 108	const char *color = PERF_COLOR_BLUE;
 109
 110	color_fprintf(stdout, color,
 111		      ". ... ARM SPE data: size %#zx bytes\n",
 112		      len);
 113
 114	while (len) {
 115		ret = arm_spe_get_packet(buf, len, &packet);
 116		if (ret > 0)
 117			pkt_len = ret;
 118		else
 119			pkt_len = 1;
 120		printf(".");
 121		color_fprintf(stdout, color, "  %08x: ", pos);
 122		for (i = 0; i < pkt_len; i++)
 123			color_fprintf(stdout, color, " %02x", buf[i]);
 124		for (; i < 16; i++)
 125			color_fprintf(stdout, color, "   ");
 126		if (ret > 0) {
 127			ret = arm_spe_pkt_desc(&packet, desc,
 128					       ARM_SPE_PKT_DESC_MAX);
 129			if (!ret)
 130				color_fprintf(stdout, color, " %s\n", desc);
 131		} else {
 132			color_fprintf(stdout, color, " Bad packet!\n");
 133		}
 134		pos += pkt_len;
 135		buf += pkt_len;
 136		len -= pkt_len;
 137	}
 138}
 139
 140static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
 141			       size_t len)
 142{
 143	printf(".\n");
 144	arm_spe_dump(spe, buf, len);
 145}
 146
 147static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
 148{
 149	struct arm_spe_queue *speq = data;
 150	struct auxtrace_buffer *buffer = speq->buffer;
 151	struct auxtrace_buffer *old_buffer = speq->old_buffer;
 152	struct auxtrace_queue *queue;
 153
 154	queue = &speq->spe->queues.queue_array[speq->queue_nr];
 155
 156	buffer = auxtrace_buffer__next(queue, buffer);
 157	/* If no more data, drop the previous auxtrace_buffer and return */
 158	if (!buffer) {
 159		if (old_buffer)
 160			auxtrace_buffer__drop_data(old_buffer);
 161		b->len = 0;
 162		return 0;
 163	}
 164
 165	speq->buffer = buffer;
 166
 167	/* If the aux_buffer doesn't have data associated, try to load it */
 168	if (!buffer->data) {
 169		/* get the file desc associated with the perf data file */
 170		int fd = perf_data__fd(speq->spe->session->data);
 171
 172		buffer->data = auxtrace_buffer__get_data(buffer, fd);
 173		if (!buffer->data)
 174			return -ENOMEM;
 175	}
 176
 177	b->len = buffer->size;
 178	b->buf = buffer->data;
 179
 180	if (b->len) {
 181		if (old_buffer)
 182			auxtrace_buffer__drop_data(old_buffer);
 183		speq->old_buffer = buffer;
 184	} else {
 185		auxtrace_buffer__drop_data(buffer);
 186		return arm_spe_get_trace(b, data);
 187	}
 188
 189	return 0;
 190}
 191
 192static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
 193		unsigned int queue_nr)
 194{
 195	struct arm_spe_params params = { .get_trace = 0, };
 196	struct arm_spe_queue *speq;
 197
 198	speq = zalloc(sizeof(*speq));
 199	if (!speq)
 200		return NULL;
 201
 202	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
 203	if (!speq->event_buf)
 204		goto out_free;
 205
 206	speq->spe = spe;
 207	speq->queue_nr = queue_nr;
 208	speq->pid = -1;
 209	speq->tid = -1;
 210	speq->cpu = -1;
 211	speq->period_instructions = 0;
 212
 213	/* params set */
 214	params.get_trace = arm_spe_get_trace;
 215	params.data = speq;
 216
 217	/* create new decoder */
 218	speq->decoder = arm_spe_decoder_new(&params);
 219	if (!speq->decoder)
 220		goto out_free;
 221
 222	return speq;
 223
 224out_free:
 225	zfree(&speq->event_buf);
 226	free(speq);
 227
 228	return NULL;
 229}
 230
 231static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
 232{
 233	return ip >= spe->kernel_start ?
 234		PERF_RECORD_MISC_KERNEL :
 235		PERF_RECORD_MISC_USER;
 236}
 237
 238static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
 239				    struct auxtrace_queue *queue)
 240{
 241	struct arm_spe_queue *speq = queue->priv;
 242	pid_t tid;
 243
 244	tid = machine__get_current_tid(spe->machine, speq->cpu);
 245	if (tid != -1) {
 246		speq->tid = tid;
 247		thread__zput(speq->thread);
 248	} else
 249		speq->tid = queue->tid;
 250
 251	if ((!speq->thread) && (speq->tid != -1)) {
 252		speq->thread = machine__find_thread(spe->machine, -1,
 253						    speq->tid);
 254	}
 255
 256	if (speq->thread) {
 257		speq->pid = speq->thread->pid_;
 258		if (queue->cpu == -1)
 259			speq->cpu = speq->thread->cpu;
 260	}
 261}
 262
 263static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
 264{
 265	struct arm_spe *spe = speq->spe;
 266	int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
 267
 268	if (err)
 269		return err;
 270
 271	arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
 272
 273	return 0;
 274}
 275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 276static void arm_spe_prep_sample(struct arm_spe *spe,
 277				struct arm_spe_queue *speq,
 278				union perf_event *event,
 279				struct perf_sample *sample)
 280{
 281	struct arm_spe_record *record = &speq->decoder->record;
 282
 283	if (!spe->timeless_decoding)
 284		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
 285
 286	sample->ip = record->from_ip;
 287	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
 288	sample->pid = speq->pid;
 289	sample->tid = speq->tid;
 290	sample->period = 1;
 291	sample->cpu = speq->cpu;
 
 292
 293	event->sample.header.type = PERF_RECORD_SAMPLE;
 294	event->sample.header.misc = sample->cpumode;
 295	event->sample.header.size = sizeof(struct perf_event_header);
 296}
 297
 298static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
 299{
 300	event->header.size = perf_event__sample_event_size(sample, type, 0);
 301	return perf_event__synthesize_sample(event, type, 0, sample);
 302}
 303
 304static inline int
 305arm_spe_deliver_synth_event(struct arm_spe *spe,
 306			    struct arm_spe_queue *speq __maybe_unused,
 307			    union perf_event *event,
 308			    struct perf_sample *sample)
 309{
 310	int ret;
 311
 312	if (spe->synth_opts.inject) {
 313		ret = arm_spe__inject_event(event, sample, spe->sample_type);
 314		if (ret)
 315			return ret;
 316	}
 317
 318	ret = perf_session__deliver_synth_event(spe->session, event, sample);
 319	if (ret)
 320		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
 321
 322	return ret;
 323}
 324
 325static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
 326				     u64 spe_events_id, u64 data_src)
 327{
 328	struct arm_spe *spe = speq->spe;
 329	struct arm_spe_record *record = &speq->decoder->record;
 330	union perf_event *event = speq->event_buf;
 331	struct perf_sample sample = { .ip = 0, };
 332
 333	arm_spe_prep_sample(spe, speq, event, &sample);
 334
 335	sample.id = spe_events_id;
 336	sample.stream_id = spe_events_id;
 337	sample.addr = record->virt_addr;
 338	sample.phys_addr = record->phys_addr;
 339	sample.data_src = data_src;
 340	sample.weight = record->latency;
 341
 342	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 343}
 344
 345static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
 346					u64 spe_events_id)
 347{
 348	struct arm_spe *spe = speq->spe;
 349	struct arm_spe_record *record = &speq->decoder->record;
 350	union perf_event *event = speq->event_buf;
 351	struct perf_sample sample = { .ip = 0, };
 352
 353	arm_spe_prep_sample(spe, speq, event, &sample);
 354
 355	sample.id = spe_events_id;
 356	sample.stream_id = spe_events_id;
 357	sample.addr = record->to_ip;
 358	sample.weight = record->latency;
 
 359
 360	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 361}
 362
 363static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
 364					     u64 spe_events_id, u64 data_src)
 365{
 366	struct arm_spe *spe = speq->spe;
 367	struct arm_spe_record *record = &speq->decoder->record;
 368	union perf_event *event = speq->event_buf;
 369	struct perf_sample sample = { .ip = 0, };
 370
 371	/*
 372	 * Handles perf instruction sampling period.
 373	 */
 374	speq->period_instructions++;
 375	if (speq->period_instructions < spe->instructions_sample_period)
 376		return 0;
 377	speq->period_instructions = 0;
 378
 379	arm_spe_prep_sample(spe, speq, event, &sample);
 380
 381	sample.id = spe_events_id;
 382	sample.stream_id = spe_events_id;
 383	sample.addr = record->virt_addr;
 384	sample.phys_addr = record->phys_addr;
 385	sample.data_src = data_src;
 386	sample.period = spe->instructions_sample_period;
 387	sample.weight = record->latency;
 
 388
 389	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 390}
 391
 392static const struct midr_range neoverse_spe[] = {
 
 
 
 
 
 393	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
 394	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
 395	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
 
 396	{},
 397};
 398
 399static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record,
 400						union perf_mem_data_src *data_src)
 
 
 
 
 
 
 
 
 
 
 
 
 
 401{
 402	/*
 403	 * Even though four levels of cache hierarchy are possible, no known
 404	 * production Neoverse systems currently include more than three levels
 405	 * so for the time being we assume three exist. If a production system
 406	 * is built with four the this function would have to be changed to
 407	 * detect the number of levels for reporting.
 408	 */
 409
 410	/*
 411	 * We have no data on the hit level or data source for stores in the
 412	 * Neoverse SPE records.
 413	 */
 414	if (record->op & ARM_SPE_ST) {
 415		data_src->mem_lvl = PERF_MEM_LVL_NA;
 416		data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
 417		data_src->mem_snoop = PERF_MEM_SNOOP_NA;
 418		return;
 419	}
 420
 421	switch (record->source) {
 422	case ARM_SPE_NV_L1D:
 423		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
 424		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
 425		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
 426		break;
 427	case ARM_SPE_NV_L2:
 428		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
 429		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
 430		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
 431		break;
 432	case ARM_SPE_NV_PEER_CORE:
 433		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
 434		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
 435		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
 436		break;
 437	/*
 438	 * We don't know if this is L1, L2 but we do know it was a cache-2-cache
 439	 * transfer, so set SNOOPX_PEER
 440	 */
 441	case ARM_SPE_NV_LOCAL_CLUSTER:
 442	case ARM_SPE_NV_PEER_CLUSTER:
 443		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
 444		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
 445		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
 446		break;
 447	/*
 448	 * System cache is assumed to be L3
 449	 */
 450	case ARM_SPE_NV_SYS_CACHE:
 451		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
 452		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
 453		data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
 454		break;
 455	/*
 456	 * We don't know what level it hit in, except it came from the other
 457	 * socket
 458	 */
 459	case ARM_SPE_NV_REMOTE:
 460		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
 461		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
 462		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
 463		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
 464		break;
 465	case ARM_SPE_NV_DRAM:
 466		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
 467		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
 468		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
 469		break;
 470	default:
 471		break;
 472	}
 473}
 474
 475static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record,
 476					       union perf_mem_data_src *data_src)
 477{
 478	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
 479		data_src->mem_lvl = PERF_MEM_LVL_L3;
 480
 481		if (record->type & ARM_SPE_LLC_MISS)
 482			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
 483		else
 484			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
 485	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
 486		data_src->mem_lvl = PERF_MEM_LVL_L1;
 487
 488		if (record->type & ARM_SPE_L1D_MISS)
 489			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
 490		else
 491			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
 492	}
 493
 494	if (record->type & ARM_SPE_REMOTE_ACCESS)
 495		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
 496}
 497
 498static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 499{
 500	union perf_mem_data_src	data_src = { 0 };
 501	bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
 502
 503	if (record->op == ARM_SPE_LD)
 504		data_src.mem_op = PERF_MEM_OP_LOAD;
 505	else if (record->op == ARM_SPE_ST)
 506		data_src.mem_op = PERF_MEM_OP_STORE;
 507	else
 508		return 0;
 509
 510	if (is_neoverse)
 511		arm_spe__synth_data_source_neoverse(record, &data_src);
 512	else
 513		arm_spe__synth_data_source_generic(record, &data_src);
 514
 515	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
 516		data_src.mem_dtlb = PERF_MEM_TLB_WK;
 517
 518		if (record->type & ARM_SPE_TLB_MISS)
 519			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
 520		else
 521			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
 522	}
 523
 524	return data_src.val;
 525}
 526
 527static int arm_spe_sample(struct arm_spe_queue *speq)
 528{
 529	const struct arm_spe_record *record = &speq->decoder->record;
 530	struct arm_spe *spe = speq->spe;
 531	u64 data_src;
 532	int err;
 533
 534	data_src = arm_spe__synth_data_source(record, spe->midr);
 
 535
 536	if (spe->sample_flc) {
 537		if (record->type & ARM_SPE_L1D_MISS) {
 538			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
 539							data_src);
 540			if (err)
 541				return err;
 542		}
 543
 544		if (record->type & ARM_SPE_L1D_ACCESS) {
 545			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
 546							data_src);
 547			if (err)
 548				return err;
 549		}
 550	}
 551
 552	if (spe->sample_llc) {
 553		if (record->type & ARM_SPE_LLC_MISS) {
 554			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
 555							data_src);
 556			if (err)
 557				return err;
 558		}
 559
 560		if (record->type & ARM_SPE_LLC_ACCESS) {
 561			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
 562							data_src);
 563			if (err)
 564				return err;
 565		}
 566	}
 567
 568	if (spe->sample_tlb) {
 569		if (record->type & ARM_SPE_TLB_MISS) {
 570			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
 571							data_src);
 572			if (err)
 573				return err;
 574		}
 575
 576		if (record->type & ARM_SPE_TLB_ACCESS) {
 577			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
 578							data_src);
 579			if (err)
 580				return err;
 581		}
 582	}
 583
 584	if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
 585		err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
 586		if (err)
 587			return err;
 588	}
 589
 590	if (spe->sample_remote_access &&
 591	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
 592		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
 593						data_src);
 594		if (err)
 595			return err;
 596	}
 597
 598	/*
 599	 * When data_src is zero it means the record is not a memory operation,
 600	 * skip to synthesize memory sample for this case.
 601	 */
 602	if (spe->sample_memory && data_src) {
 603		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
 604		if (err)
 605			return err;
 606	}
 607
 608	if (spe->sample_instructions) {
 609		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
 610		if (err)
 611			return err;
 612	}
 613
 614	return 0;
 615}
 616
 617static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
 618{
 619	struct arm_spe *spe = speq->spe;
 620	struct arm_spe_record *record;
 621	int ret;
 622
 623	if (!spe->kernel_start)
 624		spe->kernel_start = machine__kernel_start(spe->machine);
 625
 626	while (1) {
 627		/*
 628		 * The usual logic is firstly to decode the packets, and then
 629		 * based the record to synthesize sample; but here the flow is
 630		 * reversed: it calls arm_spe_sample() for synthesizing samples
 631		 * prior to arm_spe_decode().
 632		 *
 633		 * Two reasons for this code logic:
 634		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
 635		 * has decoded trace data and generated a record, but the record
 636		 * is left to generate sample until run to here, so it's correct
 637		 * to synthesize sample for the left record.
 638		 * 2. After decoding trace data, it needs to compare the record
 639		 * timestamp with the coming perf event, if the record timestamp
 640		 * is later than the perf event, it needs bail out and pushs the
 641		 * record into auxtrace heap, thus the record can be deferred to
 642		 * synthesize sample until run to here at the next time; so this
 643		 * can correlate samples between Arm SPE trace data and other
 644		 * perf events with correct time ordering.
 645		 */
 646
 647		/*
 648		 * Update pid/tid info.
 649		 */
 650		record = &speq->decoder->record;
 651		if (!spe->timeless_decoding && record->context_id != (u64)-1) {
 652			ret = arm_spe_set_tid(speq, record->context_id);
 653			if (ret)
 654				return ret;
 655
 656			spe->use_ctx_pkt_for_pid = true;
 657		}
 658
 659		ret = arm_spe_sample(speq);
 660		if (ret)
 661			return ret;
 662
 663		ret = arm_spe_decode(speq->decoder);
 664		if (!ret) {
 665			pr_debug("No data or all data has been processed.\n");
 666			return 1;
 667		}
 668
 669		/*
 670		 * Error is detected when decode SPE trace data, continue to
 671		 * the next trace data and find out more records.
 672		 */
 673		if (ret < 0)
 674			continue;
 675
 676		record = &speq->decoder->record;
 677
 678		/* Update timestamp for the last record */
 679		if (record->timestamp > speq->timestamp)
 680			speq->timestamp = record->timestamp;
 681
 682		/*
 683		 * If the timestamp of the queue is later than timestamp of the
 684		 * coming perf event, bail out so can allow the perf event to
 685		 * be processed ahead.
 686		 */
 687		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
 688			*timestamp = speq->timestamp;
 689			return 0;
 690		}
 691	}
 692
 693	return 0;
 694}
 695
 696static int arm_spe__setup_queue(struct arm_spe *spe,
 697			       struct auxtrace_queue *queue,
 698			       unsigned int queue_nr)
 699{
 700	struct arm_spe_queue *speq = queue->priv;
 701	struct arm_spe_record *record;
 702
 703	if (list_empty(&queue->head) || speq)
 704		return 0;
 705
 706	speq = arm_spe__alloc_queue(spe, queue_nr);
 707
 708	if (!speq)
 709		return -ENOMEM;
 710
 711	queue->priv = speq;
 712
 713	if (queue->cpu != -1)
 714		speq->cpu = queue->cpu;
 715
 716	if (!speq->on_heap) {
 717		int ret;
 718
 719		if (spe->timeless_decoding)
 720			return 0;
 721
 722retry:
 723		ret = arm_spe_decode(speq->decoder);
 724
 725		if (!ret)
 726			return 0;
 727
 728		if (ret < 0)
 729			goto retry;
 730
 731		record = &speq->decoder->record;
 732
 733		speq->timestamp = record->timestamp;
 734		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
 735		if (ret)
 736			return ret;
 737		speq->on_heap = true;
 738	}
 739
 740	return 0;
 741}
 742
 743static int arm_spe__setup_queues(struct arm_spe *spe)
 744{
 745	unsigned int i;
 746	int ret;
 747
 748	for (i = 0; i < spe->queues.nr_queues; i++) {
 749		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
 750		if (ret)
 751			return ret;
 752	}
 753
 754	return 0;
 755}
 756
 757static int arm_spe__update_queues(struct arm_spe *spe)
 758{
 759	if (spe->queues.new_data) {
 760		spe->queues.new_data = false;
 761		return arm_spe__setup_queues(spe);
 762	}
 763
 764	return 0;
 765}
 766
 767static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
 768{
 769	struct evsel *evsel;
 770	struct evlist *evlist = spe->session->evlist;
 771	bool timeless_decoding = true;
 772
 773	/*
 774	 * Circle through the list of event and complain if we find one
 775	 * with the time bit set.
 776	 */
 777	evlist__for_each_entry(evlist, evsel) {
 778		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
 779			timeless_decoding = false;
 780	}
 781
 782	return timeless_decoding;
 783}
 784
 785static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
 786{
 787	unsigned int queue_nr;
 788	u64 ts;
 789	int ret;
 790
 791	while (1) {
 792		struct auxtrace_queue *queue;
 793		struct arm_spe_queue *speq;
 794
 795		if (!spe->heap.heap_cnt)
 796			return 0;
 797
 798		if (spe->heap.heap_array[0].ordinal >= timestamp)
 799			return 0;
 800
 801		queue_nr = spe->heap.heap_array[0].queue_nr;
 802		queue = &spe->queues.queue_array[queue_nr];
 803		speq = queue->priv;
 804
 805		auxtrace_heap__pop(&spe->heap);
 806
 807		if (spe->heap.heap_cnt) {
 808			ts = spe->heap.heap_array[0].ordinal + 1;
 809			if (ts > timestamp)
 810				ts = timestamp;
 811		} else {
 812			ts = timestamp;
 813		}
 814
 815		/*
 816		 * A previous context-switch event has set pid/tid in the machine's context, so
 817		 * here we need to update the pid/tid in the thread and SPE queue.
 818		 */
 819		if (!spe->use_ctx_pkt_for_pid)
 820			arm_spe_set_pid_tid_cpu(spe, queue);
 821
 822		ret = arm_spe_run_decoder(speq, &ts);
 823		if (ret < 0) {
 824			auxtrace_heap__add(&spe->heap, queue_nr, ts);
 825			return ret;
 826		}
 827
 828		if (!ret) {
 829			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
 830			if (ret < 0)
 831				return ret;
 832		} else {
 833			speq->on_heap = false;
 834		}
 835	}
 836
 837	return 0;
 838}
 839
 840static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
 841					    u64 time_)
 842{
 843	struct auxtrace_queues *queues = &spe->queues;
 844	unsigned int i;
 845	u64 ts = 0;
 846
 847	for (i = 0; i < queues->nr_queues; i++) {
 848		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
 849		struct arm_spe_queue *speq = queue->priv;
 850
 851		if (speq && (tid == -1 || speq->tid == tid)) {
 852			speq->time = time_;
 853			arm_spe_set_pid_tid_cpu(spe, queue);
 854			arm_spe_run_decoder(speq, &ts);
 855		}
 856	}
 857	return 0;
 858}
 859
 860static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
 861				  struct perf_sample *sample)
 862{
 863	pid_t pid, tid;
 864	int cpu;
 865
 866	if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
 867		return 0;
 868
 869	pid = event->context_switch.next_prev_pid;
 870	tid = event->context_switch.next_prev_tid;
 871	cpu = sample->cpu;
 872
 873	if (tid == -1)
 874		pr_warning("context_switch event has no tid\n");
 875
 876	return machine__set_current_tid(spe->machine, cpu, pid, tid);
 877}
 878
 879static int arm_spe_process_event(struct perf_session *session,
 880				 union perf_event *event,
 881				 struct perf_sample *sample,
 882				 struct perf_tool *tool)
 883{
 884	int err = 0;
 885	u64 timestamp;
 886	struct arm_spe *spe = container_of(session->auxtrace,
 887			struct arm_spe, auxtrace);
 888
 889	if (dump_trace)
 890		return 0;
 891
 892	if (!tool->ordered_events) {
 893		pr_err("SPE trace requires ordered events\n");
 894		return -EINVAL;
 895	}
 896
 897	if (sample->time && (sample->time != (u64) -1))
 898		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
 899	else
 900		timestamp = 0;
 901
 902	if (timestamp || spe->timeless_decoding) {
 903		err = arm_spe__update_queues(spe);
 904		if (err)
 905			return err;
 906	}
 907
 908	if (spe->timeless_decoding) {
 909		if (event->header.type == PERF_RECORD_EXIT) {
 910			err = arm_spe_process_timeless_queues(spe,
 911					event->fork.tid,
 912					sample->time);
 913		}
 914	} else if (timestamp) {
 915		err = arm_spe_process_queues(spe, timestamp);
 916		if (err)
 917			return err;
 918
 919		if (!spe->use_ctx_pkt_for_pid &&
 920		    (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
 921		    event->header.type == PERF_RECORD_SWITCH))
 922			err = arm_spe_context_switch(spe, event, sample);
 923	}
 924
 925	return err;
 926}
 927
 928static int arm_spe_process_auxtrace_event(struct perf_session *session,
 929					  union perf_event *event,
 930					  struct perf_tool *tool __maybe_unused)
 931{
 932	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
 933					     auxtrace);
 934
 935	if (!spe->data_queued) {
 936		struct auxtrace_buffer *buffer;
 937		off_t data_offset;
 938		int fd = perf_data__fd(session->data);
 939		int err;
 940
 941		if (perf_data__is_pipe(session->data)) {
 942			data_offset = 0;
 943		} else {
 944			data_offset = lseek(fd, 0, SEEK_CUR);
 945			if (data_offset == -1)
 946				return -errno;
 947		}
 948
 949		err = auxtrace_queues__add_event(&spe->queues, session, event,
 950				data_offset, &buffer);
 951		if (err)
 952			return err;
 953
 954		/* Dump here now we have copied a piped trace out of the pipe */
 955		if (dump_trace) {
 956			if (auxtrace_buffer__get_data(buffer, fd)) {
 957				arm_spe_dump_event(spe, buffer->data,
 958						buffer->size);
 959				auxtrace_buffer__put_data(buffer);
 960			}
 961		}
 962	}
 963
 964	return 0;
 965}
 966
 967static int arm_spe_flush(struct perf_session *session __maybe_unused,
 968			 struct perf_tool *tool __maybe_unused)
 969{
 970	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
 971			auxtrace);
 972	int ret;
 973
 974	if (dump_trace)
 975		return 0;
 976
 977	if (!tool->ordered_events)
 978		return -EINVAL;
 979
 980	ret = arm_spe__update_queues(spe);
 981	if (ret < 0)
 982		return ret;
 983
 984	if (spe->timeless_decoding)
 985		return arm_spe_process_timeless_queues(spe, -1,
 986				MAX_TIMESTAMP - 1);
 987
 988	ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
 989	if (ret)
 990		return ret;
 991
 992	if (!spe->use_ctx_pkt_for_pid)
 993		ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
 994			    "Matching of TIDs to SPE events could be inaccurate.\n");
 995
 996	return 0;
 997}
 998
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 999static void arm_spe_free_queue(void *priv)
1000{
1001	struct arm_spe_queue *speq = priv;
1002
1003	if (!speq)
1004		return;
1005	thread__zput(speq->thread);
1006	arm_spe_decoder_free(speq->decoder);
1007	zfree(&speq->event_buf);
1008	free(speq);
1009}
1010
1011static void arm_spe_free_events(struct perf_session *session)
1012{
1013	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1014					     auxtrace);
1015	struct auxtrace_queues *queues = &spe->queues;
1016	unsigned int i;
1017
1018	for (i = 0; i < queues->nr_queues; i++) {
1019		arm_spe_free_queue(queues->queue_array[i].priv);
1020		queues->queue_array[i].priv = NULL;
1021	}
1022	auxtrace_queues__free(queues);
1023}
1024
1025static void arm_spe_free(struct perf_session *session)
1026{
1027	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1028					     auxtrace);
1029
1030	auxtrace_heap__free(&spe->heap);
1031	arm_spe_free_events(session);
1032	session->auxtrace = NULL;
 
1033	free(spe);
1034}
1035
1036static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
1037				      struct evsel *evsel)
1038{
1039	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
1040
1041	return evsel->core.attr.type == spe->pmu_type;
1042}
1043
1044static const char * const arm_spe_info_fmts[] = {
1045	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
 
1046};
1047
1048static void arm_spe_print_info(__u64 *arr)
1049{
1050	if (!dump_trace)
1051		return;
1052
1053	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
1054}
1055
1056struct arm_spe_synth {
1057	struct perf_tool dummy_tool;
1058	struct perf_session *session;
 
 
 
 
1059};
1060
1061static int arm_spe_event_synth(struct perf_tool *tool,
1062			       union perf_event *event,
1063			       struct perf_sample *sample __maybe_unused,
1064			       struct machine *machine __maybe_unused)
1065{
1066	struct arm_spe_synth *arm_spe_synth =
1067		      container_of(tool, struct arm_spe_synth, dummy_tool);
1068
1069	return perf_session__deliver_synth_event(arm_spe_synth->session,
1070						 event, NULL);
1071}
1072
1073static int arm_spe_synth_event(struct perf_session *session,
1074			       struct perf_event_attr *attr, u64 id)
1075{
1076	struct arm_spe_synth arm_spe_synth;
 
 
 
 
 
1077
1078	memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
1079	arm_spe_synth.session = session;
1080
1081	return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
1082					   &id, arm_spe_event_synth);
 
 
 
 
 
 
 
 
 
 
1083}
1084
1085static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
1086				    const char *name)
1087{
1088	struct evsel *evsel;
1089
1090	evlist__for_each_entry(evlist, evsel) {
1091		if (evsel->core.id && evsel->core.id[0] == id) {
1092			if (evsel->name)
1093				zfree(&evsel->name);
1094			evsel->name = strdup(name);
1095			break;
1096		}
1097	}
1098}
1099
1100static int
1101arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
1102{
1103	struct evlist *evlist = session->evlist;
1104	struct evsel *evsel;
1105	struct perf_event_attr attr;
1106	bool found = false;
1107	u64 id;
1108	int err;
1109
1110	evlist__for_each_entry(evlist, evsel) {
1111		if (evsel->core.attr.type == spe->pmu_type) {
1112			found = true;
1113			break;
1114		}
1115	}
1116
1117	if (!found) {
1118		pr_debug("No selected events with SPE trace data\n");
1119		return 0;
1120	}
1121
1122	memset(&attr, 0, sizeof(struct perf_event_attr));
1123	attr.size = sizeof(struct perf_event_attr);
1124	attr.type = PERF_TYPE_HARDWARE;
1125	attr.sample_type = evsel->core.attr.sample_type &
1126				(PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
1127	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1128			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
1129			    PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
1130	if (spe->timeless_decoding)
1131		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1132	else
1133		attr.sample_type |= PERF_SAMPLE_TIME;
1134
1135	spe->sample_type = attr.sample_type;
1136
1137	attr.exclude_user = evsel->core.attr.exclude_user;
1138	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1139	attr.exclude_hv = evsel->core.attr.exclude_hv;
1140	attr.exclude_host = evsel->core.attr.exclude_host;
1141	attr.exclude_guest = evsel->core.attr.exclude_guest;
1142	attr.sample_id_all = evsel->core.attr.sample_id_all;
1143	attr.read_format = evsel->core.attr.read_format;
1144
1145	/* create new id val to be a fixed offset from evsel id */
1146	id = evsel->core.id[0] + 1000000000;
1147
1148	if (!id)
1149		id = 1;
1150
1151	if (spe->synth_opts.flc) {
1152		spe->sample_flc = true;
1153
1154		/* Level 1 data cache miss */
1155		err = arm_spe_synth_event(session, &attr, id);
1156		if (err)
1157			return err;
1158		spe->l1d_miss_id = id;
1159		arm_spe_set_event_name(evlist, id, "l1d-miss");
1160		id += 1;
1161
1162		/* Level 1 data cache access */
1163		err = arm_spe_synth_event(session, &attr, id);
1164		if (err)
1165			return err;
1166		spe->l1d_access_id = id;
1167		arm_spe_set_event_name(evlist, id, "l1d-access");
1168		id += 1;
1169	}
1170
1171	if (spe->synth_opts.llc) {
1172		spe->sample_llc = true;
1173
1174		/* Last level cache miss */
1175		err = arm_spe_synth_event(session, &attr, id);
1176		if (err)
1177			return err;
1178		spe->llc_miss_id = id;
1179		arm_spe_set_event_name(evlist, id, "llc-miss");
1180		id += 1;
1181
1182		/* Last level cache access */
1183		err = arm_spe_synth_event(session, &attr, id);
1184		if (err)
1185			return err;
1186		spe->llc_access_id = id;
1187		arm_spe_set_event_name(evlist, id, "llc-access");
1188		id += 1;
1189	}
1190
1191	if (spe->synth_opts.tlb) {
1192		spe->sample_tlb = true;
1193
1194		/* TLB miss */
1195		err = arm_spe_synth_event(session, &attr, id);
1196		if (err)
1197			return err;
1198		spe->tlb_miss_id = id;
1199		arm_spe_set_event_name(evlist, id, "tlb-miss");
1200		id += 1;
1201
1202		/* TLB access */
1203		err = arm_spe_synth_event(session, &attr, id);
1204		if (err)
1205			return err;
1206		spe->tlb_access_id = id;
1207		arm_spe_set_event_name(evlist, id, "tlb-access");
1208		id += 1;
1209	}
1210
1211	if (spe->synth_opts.branches) {
1212		spe->sample_branch = true;
1213
1214		/* Branch miss */
1215		err = arm_spe_synth_event(session, &attr, id);
1216		if (err)
1217			return err;
1218		spe->branch_miss_id = id;
1219		arm_spe_set_event_name(evlist, id, "branch-miss");
1220		id += 1;
1221	}
1222
1223	if (spe->synth_opts.remote_access) {
1224		spe->sample_remote_access = true;
1225
1226		/* Remote access */
1227		err = arm_spe_synth_event(session, &attr, id);
1228		if (err)
1229			return err;
1230		spe->remote_access_id = id;
1231		arm_spe_set_event_name(evlist, id, "remote-access");
1232		id += 1;
1233	}
1234
1235	if (spe->synth_opts.mem) {
1236		spe->sample_memory = true;
1237
1238		err = arm_spe_synth_event(session, &attr, id);
1239		if (err)
1240			return err;
1241		spe->memory_id = id;
1242		arm_spe_set_event_name(evlist, id, "memory");
1243		id += 1;
1244	}
1245
1246	if (spe->synth_opts.instructions) {
1247		if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
1248			pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
1249			goto synth_instructions_out;
1250		}
1251		if (spe->synth_opts.period > 1)
1252			pr_warning("Arm SPE has a hardware-based sample period.\n"
1253				   "Additional instruction events will be discarded by --itrace\n");
1254
1255		spe->sample_instructions = true;
1256		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1257		attr.sample_period = spe->synth_opts.period;
1258		spe->instructions_sample_period = attr.sample_period;
1259		err = arm_spe_synth_event(session, &attr, id);
1260		if (err)
1261			return err;
1262		spe->instructions_id = id;
1263		arm_spe_set_event_name(evlist, id, "instructions");
1264	}
1265synth_instructions_out:
1266
1267	return 0;
1268}
1269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1270int arm_spe_process_auxtrace_info(union perf_event *event,
1271				  struct perf_session *session)
1272{
1273	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1274	size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1275	struct perf_record_time_conv *tc = &session->time_conv;
1276	const char *cpuid = perf_env__cpuid(session->evlist->env);
1277	u64 midr = strtol(cpuid, NULL, 16);
1278	struct arm_spe *spe;
1279	int err;
 
 
1280
1281	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1282					min_sz)
1283		return -EINVAL;
1284
 
 
 
 
 
 
 
1285	spe = zalloc(sizeof(struct arm_spe));
1286	if (!spe)
1287		return -ENOMEM;
 
 
1288
1289	err = auxtrace_queues__init(&spe->queues);
1290	if (err)
1291		goto err_free;
1292
1293	spe->session = session;
1294	spe->machine = &session->machines.host; /* No kvm support */
1295	spe->auxtrace_type = auxtrace_info->type;
1296	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1297	spe->midr = midr;
 
 
 
 
 
 
1298
1299	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1300
1301	/*
1302	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1303	 * and the parameters for hardware clock are stored in the session
1304	 * context.  Passes these parameters to the struct perf_tsc_conversion
1305	 * in "spe->tc", which is used for later conversion between clock
1306	 * counter and timestamp.
1307	 *
1308	 * For backward compatibility, copies the fields starting from
1309	 * "time_cycles" only if they are contained in the event.
1310	 */
1311	spe->tc.time_shift = tc->time_shift;
1312	spe->tc.time_mult = tc->time_mult;
1313	spe->tc.time_zero = tc->time_zero;
1314
1315	if (event_contains(*tc, time_cycles)) {
1316		spe->tc.time_cycles = tc->time_cycles;
1317		spe->tc.time_mask = tc->time_mask;
1318		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1319		spe->tc.cap_user_time_short = tc->cap_user_time_short;
1320	}
1321
1322	spe->auxtrace.process_event = arm_spe_process_event;
1323	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1324	spe->auxtrace.flush_events = arm_spe_flush;
1325	spe->auxtrace.free_events = arm_spe_free_events;
1326	spe->auxtrace.free = arm_spe_free;
1327	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1328	session->auxtrace = &spe->auxtrace;
1329
1330	arm_spe_print_info(&auxtrace_info->priv[0]);
1331
1332	if (dump_trace)
1333		return 0;
1334
1335	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1336		spe->synth_opts = *session->itrace_synth_opts;
1337	else
1338		itrace_synth_opts__set_default(&spe->synth_opts, false);
1339
1340	err = arm_spe_synth_events(spe, session);
1341	if (err)
1342		goto err_free_queues;
1343
1344	err = auxtrace_queues__process_index(&spe->queues, session);
1345	if (err)
1346		goto err_free_queues;
1347
1348	if (spe->queues.populated)
1349		spe->data_queued = true;
1350
1351	return 0;
1352
1353err_free_queues:
1354	auxtrace_queues__free(&spe->queues);
1355	session->auxtrace = NULL;
1356err_free:
1357	free(spe);
 
 
1358	return err;
1359}
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Arm Statistical Profiling Extensions (SPE) support
   4 * Copyright (c) 2017-2018, Arm Ltd.
   5 */
   6
   7#include <byteswap.h>
   8#include <endian.h>
   9#include <errno.h>
  10#include <inttypes.h>
  11#include <linux/bitops.h>
  12#include <linux/kernel.h>
  13#include <linux/log2.h>
  14#include <linux/types.h>
  15#include <linux/zalloc.h>
  16#include <stdlib.h>
  17#include <unistd.h>
  18
  19#include "auxtrace.h"
  20#include "color.h"
  21#include "debug.h"
  22#include "evlist.h"
  23#include "evsel.h"
  24#include "machine.h"
  25#include "session.h"
  26#include "symbol.h"
  27#include "thread.h"
  28#include "thread-stack.h"
  29#include "tsc.h"
  30#include "tool.h"
  31#include "util/synthetic-events.h"
  32
  33#include "arm-spe.h"
  34#include "arm-spe-decoder/arm-spe-decoder.h"
  35#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
  36
  37#include "../../arch/arm64/include/asm/cputype.h"
  38#define MAX_TIMESTAMP (~0ULL)
  39
  40struct arm_spe {
  41	struct auxtrace			auxtrace;
  42	struct auxtrace_queues		queues;
  43	struct auxtrace_heap		heap;
  44	struct itrace_synth_opts        synth_opts;
  45	u32				auxtrace_type;
  46	struct perf_session		*session;
  47	struct machine			*machine;
  48	u32				pmu_type;
 
  49
  50	struct perf_tsc_conversion	tc;
  51
  52	u8				timeless_decoding;
  53	u8				data_queued;
  54
  55	u64				sample_type;
  56	u8				sample_flc;
  57	u8				sample_llc;
  58	u8				sample_tlb;
  59	u8				sample_branch;
  60	u8				sample_remote_access;
  61	u8				sample_memory;
  62	u8				sample_instructions;
  63	u64				instructions_sample_period;
  64
  65	u64				l1d_miss_id;
  66	u64				l1d_access_id;
  67	u64				llc_miss_id;
  68	u64				llc_access_id;
  69	u64				tlb_miss_id;
  70	u64				tlb_access_id;
  71	u64				branch_id;
  72	u64				remote_access_id;
  73	u64				memory_id;
  74	u64				instructions_id;
  75
  76	u64				kernel_start;
  77
  78	unsigned long			num_events;
  79	u8				use_ctx_pkt_for_pid;
  80
  81	u64				**metadata;
  82	u64				metadata_ver;
  83	u64				metadata_nr_cpu;
  84	bool				is_homogeneous;
  85};
  86
  87struct arm_spe_queue {
  88	struct arm_spe			*spe;
  89	unsigned int			queue_nr;
  90	struct auxtrace_buffer		*buffer;
  91	struct auxtrace_buffer		*old_buffer;
  92	union perf_event		*event_buf;
  93	bool				on_heap;
  94	bool				done;
  95	pid_t				pid;
  96	pid_t				tid;
  97	int				cpu;
  98	struct arm_spe_decoder		*decoder;
  99	u64				time;
 100	u64				timestamp;
 101	struct thread			*thread;
 102	u64				period_instructions;
 103	u32				flags;
 104};
 105
 106static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
 107			 unsigned char *buf, size_t len)
 108{
 109	struct arm_spe_pkt packet;
 110	size_t pos = 0;
 111	int ret, pkt_len, i;
 112	char desc[ARM_SPE_PKT_DESC_MAX];
 113	const char *color = PERF_COLOR_BLUE;
 114
 115	color_fprintf(stdout, color,
 116		      ". ... ARM SPE data: size %#zx bytes\n",
 117		      len);
 118
 119	while (len) {
 120		ret = arm_spe_get_packet(buf, len, &packet);
 121		if (ret > 0)
 122			pkt_len = ret;
 123		else
 124			pkt_len = 1;
 125		printf(".");
 126		color_fprintf(stdout, color, "  %08zx: ", pos);
 127		for (i = 0; i < pkt_len; i++)
 128			color_fprintf(stdout, color, " %02x", buf[i]);
 129		for (; i < 16; i++)
 130			color_fprintf(stdout, color, "   ");
 131		if (ret > 0) {
 132			ret = arm_spe_pkt_desc(&packet, desc,
 133					       ARM_SPE_PKT_DESC_MAX);
 134			if (!ret)
 135				color_fprintf(stdout, color, " %s\n", desc);
 136		} else {
 137			color_fprintf(stdout, color, " Bad packet!\n");
 138		}
 139		pos += pkt_len;
 140		buf += pkt_len;
 141		len -= pkt_len;
 142	}
 143}
 144
 145static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
 146			       size_t len)
 147{
 148	printf(".\n");
 149	arm_spe_dump(spe, buf, len);
 150}
 151
 152static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
 153{
 154	struct arm_spe_queue *speq = data;
 155	struct auxtrace_buffer *buffer = speq->buffer;
 156	struct auxtrace_buffer *old_buffer = speq->old_buffer;
 157	struct auxtrace_queue *queue;
 158
 159	queue = &speq->spe->queues.queue_array[speq->queue_nr];
 160
 161	buffer = auxtrace_buffer__next(queue, buffer);
 162	/* If no more data, drop the previous auxtrace_buffer and return */
 163	if (!buffer) {
 164		if (old_buffer)
 165			auxtrace_buffer__drop_data(old_buffer);
 166		b->len = 0;
 167		return 0;
 168	}
 169
 170	speq->buffer = buffer;
 171
 172	/* If the aux_buffer doesn't have data associated, try to load it */
 173	if (!buffer->data) {
 174		/* get the file desc associated with the perf data file */
 175		int fd = perf_data__fd(speq->spe->session->data);
 176
 177		buffer->data = auxtrace_buffer__get_data(buffer, fd);
 178		if (!buffer->data)
 179			return -ENOMEM;
 180	}
 181
 182	b->len = buffer->size;
 183	b->buf = buffer->data;
 184
 185	if (b->len) {
 186		if (old_buffer)
 187			auxtrace_buffer__drop_data(old_buffer);
 188		speq->old_buffer = buffer;
 189	} else {
 190		auxtrace_buffer__drop_data(buffer);
 191		return arm_spe_get_trace(b, data);
 192	}
 193
 194	return 0;
 195}
 196
 197static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
 198		unsigned int queue_nr)
 199{
 200	struct arm_spe_params params = { .get_trace = 0, };
 201	struct arm_spe_queue *speq;
 202
 203	speq = zalloc(sizeof(*speq));
 204	if (!speq)
 205		return NULL;
 206
 207	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
 208	if (!speq->event_buf)
 209		goto out_free;
 210
 211	speq->spe = spe;
 212	speq->queue_nr = queue_nr;
 213	speq->pid = -1;
 214	speq->tid = -1;
 215	speq->cpu = -1;
 216	speq->period_instructions = 0;
 217
 218	/* params set */
 219	params.get_trace = arm_spe_get_trace;
 220	params.data = speq;
 221
 222	/* create new decoder */
 223	speq->decoder = arm_spe_decoder_new(&params);
 224	if (!speq->decoder)
 225		goto out_free;
 226
 227	return speq;
 228
 229out_free:
 230	zfree(&speq->event_buf);
 231	free(speq);
 232
 233	return NULL;
 234}
 235
 236static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
 237{
 238	return ip >= spe->kernel_start ?
 239		PERF_RECORD_MISC_KERNEL :
 240		PERF_RECORD_MISC_USER;
 241}
 242
 243static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
 244				    struct auxtrace_queue *queue)
 245{
 246	struct arm_spe_queue *speq = queue->priv;
 247	pid_t tid;
 248
 249	tid = machine__get_current_tid(spe->machine, speq->cpu);
 250	if (tid != -1) {
 251		speq->tid = tid;
 252		thread__zput(speq->thread);
 253	} else
 254		speq->tid = queue->tid;
 255
 256	if ((!speq->thread) && (speq->tid != -1)) {
 257		speq->thread = machine__find_thread(spe->machine, -1,
 258						    speq->tid);
 259	}
 260
 261	if (speq->thread) {
 262		speq->pid = thread__pid(speq->thread);
 263		if (queue->cpu == -1)
 264			speq->cpu = thread__cpu(speq->thread);
 265	}
 266}
 267
 268static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
 269{
 270	struct arm_spe *spe = speq->spe;
 271	int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
 272
 273	if (err)
 274		return err;
 275
 276	arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
 277
 278	return 0;
 279}
 280
 281static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu)
 282{
 283	u64 i;
 284
 285	if (!spe->metadata)
 286		return NULL;
 287
 288	for (i = 0; i < spe->metadata_nr_cpu; i++)
 289		if (spe->metadata[i][ARM_SPE_CPU] == cpu)
 290			return spe->metadata[i];
 291
 292	return NULL;
 293}
 294
 295static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record)
 296{
 297	struct simd_flags simd_flags = {};
 298
 299	if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST))
 300		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
 301
 302	if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER))
 303		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
 304
 305	if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
 306		simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL;
 307
 308	if (record->type & ARM_SPE_SVE_EMPTY_PRED)
 309		simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY;
 310
 311	return simd_flags;
 312}
 313
 314static void arm_spe_prep_sample(struct arm_spe *spe,
 315				struct arm_spe_queue *speq,
 316				union perf_event *event,
 317				struct perf_sample *sample)
 318{
 319	struct arm_spe_record *record = &speq->decoder->record;
 320
 321	if (!spe->timeless_decoding)
 322		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
 323
 324	sample->ip = record->from_ip;
 325	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
 326	sample->pid = speq->pid;
 327	sample->tid = speq->tid;
 328	sample->period = 1;
 329	sample->cpu = speq->cpu;
 330	sample->simd_flags = arm_spe__synth_simd_flags(record);
 331
 332	event->sample.header.type = PERF_RECORD_SAMPLE;
 333	event->sample.header.misc = sample->cpumode;
 334	event->sample.header.size = sizeof(struct perf_event_header);
 335}
 336
 337static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
 338{
 339	event->header.size = perf_event__sample_event_size(sample, type, 0);
 340	return perf_event__synthesize_sample(event, type, 0, sample);
 341}
 342
 343static inline int
 344arm_spe_deliver_synth_event(struct arm_spe *spe,
 345			    struct arm_spe_queue *speq __maybe_unused,
 346			    union perf_event *event,
 347			    struct perf_sample *sample)
 348{
 349	int ret;
 350
 351	if (spe->synth_opts.inject) {
 352		ret = arm_spe__inject_event(event, sample, spe->sample_type);
 353		if (ret)
 354			return ret;
 355	}
 356
 357	ret = perf_session__deliver_synth_event(spe->session, event, sample);
 358	if (ret)
 359		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
 360
 361	return ret;
 362}
 363
 364static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
 365				     u64 spe_events_id, u64 data_src)
 366{
 367	struct arm_spe *spe = speq->spe;
 368	struct arm_spe_record *record = &speq->decoder->record;
 369	union perf_event *event = speq->event_buf;
 370	struct perf_sample sample = { .ip = 0, };
 371
 372	arm_spe_prep_sample(spe, speq, event, &sample);
 373
 374	sample.id = spe_events_id;
 375	sample.stream_id = spe_events_id;
 376	sample.addr = record->virt_addr;
 377	sample.phys_addr = record->phys_addr;
 378	sample.data_src = data_src;
 379	sample.weight = record->latency;
 380
 381	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 382}
 383
 384static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
 385					u64 spe_events_id)
 386{
 387	struct arm_spe *spe = speq->spe;
 388	struct arm_spe_record *record = &speq->decoder->record;
 389	union perf_event *event = speq->event_buf;
 390	struct perf_sample sample = { .ip = 0, };
 391
 392	arm_spe_prep_sample(spe, speq, event, &sample);
 393
 394	sample.id = spe_events_id;
 395	sample.stream_id = spe_events_id;
 396	sample.addr = record->to_ip;
 397	sample.weight = record->latency;
 398	sample.flags = speq->flags;
 399
 400	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 401}
 402
 403static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
 404					     u64 spe_events_id, u64 data_src)
 405{
 406	struct arm_spe *spe = speq->spe;
 407	struct arm_spe_record *record = &speq->decoder->record;
 408	union perf_event *event = speq->event_buf;
 409	struct perf_sample sample = { .ip = 0, };
 410
 411	/*
 412	 * Handles perf instruction sampling period.
 413	 */
 414	speq->period_instructions++;
 415	if (speq->period_instructions < spe->instructions_sample_period)
 416		return 0;
 417	speq->period_instructions = 0;
 418
 419	arm_spe_prep_sample(spe, speq, event, &sample);
 420
 421	sample.id = spe_events_id;
 422	sample.stream_id = spe_events_id;
 423	sample.addr = record->to_ip;
 424	sample.phys_addr = record->phys_addr;
 425	sample.data_src = data_src;
 426	sample.period = spe->instructions_sample_period;
 427	sample.weight = record->latency;
 428	sample.flags = speq->flags;
 429
 430	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 431}
 432
 433static const struct midr_range common_ds_encoding_cpus[] = {
 434	MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
 435	MIDR_ALL_VERSIONS(MIDR_CORTEX_A725),
 436	MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
 437	MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
 438	MIDR_ALL_VERSIONS(MIDR_CORTEX_X925),
 439	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
 440	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
 441	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
 442	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
 443	{},
 444};
 445
 446static void arm_spe__sample_flags(struct arm_spe_queue *speq)
 447{
 448	const struct arm_spe_record *record = &speq->decoder->record;
 449
 450	speq->flags = 0;
 451	if (record->op & ARM_SPE_OP_BRANCH_ERET) {
 452		speq->flags = PERF_IP_FLAG_BRANCH;
 453
 454		if (record->type & ARM_SPE_BRANCH_MISS)
 455			speq->flags |= PERF_IP_FLAG_BRANCH_MISS;
 456	}
 457}
 458
 459static void arm_spe__synth_data_source_common(const struct arm_spe_record *record,
 460					      union perf_mem_data_src *data_src)
 461{
 462	/*
 463	 * Even though four levels of cache hierarchy are possible, no known
 464	 * production Neoverse systems currently include more than three levels
 465	 * so for the time being we assume three exist. If a production system
 466	 * is built with four the this function would have to be changed to
 467	 * detect the number of levels for reporting.
 468	 */
 469
 470	/*
 471	 * We have no data on the hit level or data source for stores in the
 472	 * Neoverse SPE records.
 473	 */
 474	if (record->op & ARM_SPE_OP_ST) {
 475		data_src->mem_lvl = PERF_MEM_LVL_NA;
 476		data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
 477		data_src->mem_snoop = PERF_MEM_SNOOP_NA;
 478		return;
 479	}
 480
 481	switch (record->source) {
 482	case ARM_SPE_COMMON_DS_L1D:
 483		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
 484		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
 485		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
 486		break;
 487	case ARM_SPE_COMMON_DS_L2:
 488		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
 489		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
 490		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
 491		break;
 492	case ARM_SPE_COMMON_DS_PEER_CORE:
 493		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
 494		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
 495		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
 496		break;
 497	/*
 498	 * We don't know if this is L1, L2 but we do know it was a cache-2-cache
 499	 * transfer, so set SNOOPX_PEER
 500	 */
 501	case ARM_SPE_COMMON_DS_LOCAL_CLUSTER:
 502	case ARM_SPE_COMMON_DS_PEER_CLUSTER:
 503		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
 504		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
 505		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
 506		break;
 507	/*
 508	 * System cache is assumed to be L3
 509	 */
 510	case ARM_SPE_COMMON_DS_SYS_CACHE:
 511		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
 512		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
 513		data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
 514		break;
 515	/*
 516	 * We don't know what level it hit in, except it came from the other
 517	 * socket
 518	 */
 519	case ARM_SPE_COMMON_DS_REMOTE:
 520		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
 521		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
 522		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
 523		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
 524		break;
 525	case ARM_SPE_COMMON_DS_DRAM:
 526		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
 527		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
 528		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
 529		break;
 530	default:
 531		break;
 532	}
 533}
 534
 535static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
 536					union perf_mem_data_src *data_src)
 537{
 538	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
 539		data_src->mem_lvl = PERF_MEM_LVL_L3;
 540
 541		if (record->type & ARM_SPE_LLC_MISS)
 542			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
 543		else
 544			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
 545	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
 546		data_src->mem_lvl = PERF_MEM_LVL_L1;
 547
 548		if (record->type & ARM_SPE_L1D_MISS)
 549			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
 550		else
 551			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
 552	}
 553
 554	if (record->type & ARM_SPE_REMOTE_ACCESS)
 555		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
 556}
 557
 558static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq)
 559{
 560	struct arm_spe *spe = speq->spe;
 561	bool is_in_cpu_list;
 562	u64 *metadata = NULL;
 563	u64 midr = 0;
 564
 565	/* Metadata version 1 assumes all CPUs are the same (old behavior) */
 566	if (spe->metadata_ver == 1) {
 567		const char *cpuid;
 568
 569		pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n");
 570		cpuid = perf_env__cpuid(spe->session->evlist->env);
 571		midr = strtol(cpuid, NULL, 16);
 572	} else {
 573		/* CPU ID is -1 for per-thread mode */
 574		if (speq->cpu < 0) {
 575			/*
 576			 * On the heterogeneous system, due to CPU ID is -1,
 577			 * cannot confirm the data source packet is supported.
 578			 */
 579			if (!spe->is_homogeneous)
 580				return false;
 581
 582			/* In homogeneous system, simply use CPU0's metadata */
 583			if (spe->metadata)
 584				metadata = spe->metadata[0];
 585		} else {
 586			metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu);
 587		}
 588
 589		if (!metadata)
 590			return false;
 591
 592		midr = metadata[ARM_SPE_CPU_MIDR];
 593	}
 594
 595	is_in_cpu_list = is_midr_in_range_list(midr, common_ds_encoding_cpus);
 596	if (is_in_cpu_list)
 597		return true;
 598	else
 599		return false;
 600}
 601
 602static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
 603				      const struct arm_spe_record *record)
 604{
 605	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
 606	bool is_common = arm_spe__is_common_ds_encoding(speq);
 607
 608	if (record->op & ARM_SPE_OP_LD)
 609		data_src.mem_op = PERF_MEM_OP_LOAD;
 610	else if (record->op & ARM_SPE_OP_ST)
 611		data_src.mem_op = PERF_MEM_OP_STORE;
 612	else
 613		return 0;
 614
 615	if (is_common)
 616		arm_spe__synth_data_source_common(record, &data_src);
 617	else
 618		arm_spe__synth_memory_level(record, &data_src);
 619
 620	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
 621		data_src.mem_dtlb = PERF_MEM_TLB_WK;
 622
 623		if (record->type & ARM_SPE_TLB_MISS)
 624			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
 625		else
 626			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
 627	}
 628
 629	return data_src.val;
 630}
 631
 632static int arm_spe_sample(struct arm_spe_queue *speq)
 633{
 634	const struct arm_spe_record *record = &speq->decoder->record;
 635	struct arm_spe *spe = speq->spe;
 636	u64 data_src;
 637	int err;
 638
 639	arm_spe__sample_flags(speq);
 640	data_src = arm_spe__synth_data_source(speq, record);
 641
 642	if (spe->sample_flc) {
 643		if (record->type & ARM_SPE_L1D_MISS) {
 644			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
 645							data_src);
 646			if (err)
 647				return err;
 648		}
 649
 650		if (record->type & ARM_SPE_L1D_ACCESS) {
 651			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
 652							data_src);
 653			if (err)
 654				return err;
 655		}
 656	}
 657
 658	if (spe->sample_llc) {
 659		if (record->type & ARM_SPE_LLC_MISS) {
 660			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
 661							data_src);
 662			if (err)
 663				return err;
 664		}
 665
 666		if (record->type & ARM_SPE_LLC_ACCESS) {
 667			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
 668							data_src);
 669			if (err)
 670				return err;
 671		}
 672	}
 673
 674	if (spe->sample_tlb) {
 675		if (record->type & ARM_SPE_TLB_MISS) {
 676			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
 677							data_src);
 678			if (err)
 679				return err;
 680		}
 681
 682		if (record->type & ARM_SPE_TLB_ACCESS) {
 683			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
 684							data_src);
 685			if (err)
 686				return err;
 687		}
 688	}
 689
 690	if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) {
 691		err = arm_spe__synth_branch_sample(speq, spe->branch_id);
 692		if (err)
 693			return err;
 694	}
 695
 696	if (spe->sample_remote_access &&
 697	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
 698		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
 699						data_src);
 700		if (err)
 701			return err;
 702	}
 703
 704	/*
 705	 * When data_src is zero it means the record is not a memory operation,
 706	 * skip to synthesize memory sample for this case.
 707	 */
 708	if (spe->sample_memory && data_src) {
 709		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
 710		if (err)
 711			return err;
 712	}
 713
 714	if (spe->sample_instructions) {
 715		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
 716		if (err)
 717			return err;
 718	}
 719
 720	return 0;
 721}
 722
 723static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
 724{
 725	struct arm_spe *spe = speq->spe;
 726	struct arm_spe_record *record;
 727	int ret;
 728
 729	if (!spe->kernel_start)
 730		spe->kernel_start = machine__kernel_start(spe->machine);
 731
 732	while (1) {
 733		/*
 734		 * The usual logic is firstly to decode the packets, and then
 735		 * based the record to synthesize sample; but here the flow is
 736		 * reversed: it calls arm_spe_sample() for synthesizing samples
 737		 * prior to arm_spe_decode().
 738		 *
 739		 * Two reasons for this code logic:
 740		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
 741		 * has decoded trace data and generated a record, but the record
 742		 * is left to generate sample until run to here, so it's correct
 743		 * to synthesize sample for the left record.
 744		 * 2. After decoding trace data, it needs to compare the record
 745		 * timestamp with the coming perf event, if the record timestamp
 746		 * is later than the perf event, it needs bail out and pushs the
 747		 * record into auxtrace heap, thus the record can be deferred to
 748		 * synthesize sample until run to here at the next time; so this
 749		 * can correlate samples between Arm SPE trace data and other
 750		 * perf events with correct time ordering.
 751		 */
 752
 753		/*
 754		 * Update pid/tid info.
 755		 */
 756		record = &speq->decoder->record;
 757		if (!spe->timeless_decoding && record->context_id != (u64)-1) {
 758			ret = arm_spe_set_tid(speq, record->context_id);
 759			if (ret)
 760				return ret;
 761
 762			spe->use_ctx_pkt_for_pid = true;
 763		}
 764
 765		ret = arm_spe_sample(speq);
 766		if (ret)
 767			return ret;
 768
 769		ret = arm_spe_decode(speq->decoder);
 770		if (!ret) {
 771			pr_debug("No data or all data has been processed.\n");
 772			return 1;
 773		}
 774
 775		/*
 776		 * Error is detected when decode SPE trace data, continue to
 777		 * the next trace data and find out more records.
 778		 */
 779		if (ret < 0)
 780			continue;
 781
 782		record = &speq->decoder->record;
 783
 784		/* Update timestamp for the last record */
 785		if (record->timestamp > speq->timestamp)
 786			speq->timestamp = record->timestamp;
 787
 788		/*
 789		 * If the timestamp of the queue is later than timestamp of the
 790		 * coming perf event, bail out so can allow the perf event to
 791		 * be processed ahead.
 792		 */
 793		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
 794			*timestamp = speq->timestamp;
 795			return 0;
 796		}
 797	}
 798
 799	return 0;
 800}
 801
 802static int arm_spe__setup_queue(struct arm_spe *spe,
 803			       struct auxtrace_queue *queue,
 804			       unsigned int queue_nr)
 805{
 806	struct arm_spe_queue *speq = queue->priv;
 807	struct arm_spe_record *record;
 808
 809	if (list_empty(&queue->head) || speq)
 810		return 0;
 811
 812	speq = arm_spe__alloc_queue(spe, queue_nr);
 813
 814	if (!speq)
 815		return -ENOMEM;
 816
 817	queue->priv = speq;
 818
 819	if (queue->cpu != -1)
 820		speq->cpu = queue->cpu;
 821
 822	if (!speq->on_heap) {
 823		int ret;
 824
 825		if (spe->timeless_decoding)
 826			return 0;
 827
 828retry:
 829		ret = arm_spe_decode(speq->decoder);
 830
 831		if (!ret)
 832			return 0;
 833
 834		if (ret < 0)
 835			goto retry;
 836
 837		record = &speq->decoder->record;
 838
 839		speq->timestamp = record->timestamp;
 840		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
 841		if (ret)
 842			return ret;
 843		speq->on_heap = true;
 844	}
 845
 846	return 0;
 847}
 848
 849static int arm_spe__setup_queues(struct arm_spe *spe)
 850{
 851	unsigned int i;
 852	int ret;
 853
 854	for (i = 0; i < spe->queues.nr_queues; i++) {
 855		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
 856		if (ret)
 857			return ret;
 858	}
 859
 860	return 0;
 861}
 862
 863static int arm_spe__update_queues(struct arm_spe *spe)
 864{
 865	if (spe->queues.new_data) {
 866		spe->queues.new_data = false;
 867		return arm_spe__setup_queues(spe);
 868	}
 869
 870	return 0;
 871}
 872
 873static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
 874{
 875	struct evsel *evsel;
 876	struct evlist *evlist = spe->session->evlist;
 877	bool timeless_decoding = true;
 878
 879	/*
 880	 * Circle through the list of event and complain if we find one
 881	 * with the time bit set.
 882	 */
 883	evlist__for_each_entry(evlist, evsel) {
 884		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
 885			timeless_decoding = false;
 886	}
 887
 888	return timeless_decoding;
 889}
 890
 891static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
 892{
 893	unsigned int queue_nr;
 894	u64 ts;
 895	int ret;
 896
 897	while (1) {
 898		struct auxtrace_queue *queue;
 899		struct arm_spe_queue *speq;
 900
 901		if (!spe->heap.heap_cnt)
 902			return 0;
 903
 904		if (spe->heap.heap_array[0].ordinal >= timestamp)
 905			return 0;
 906
 907		queue_nr = spe->heap.heap_array[0].queue_nr;
 908		queue = &spe->queues.queue_array[queue_nr];
 909		speq = queue->priv;
 910
 911		auxtrace_heap__pop(&spe->heap);
 912
 913		if (spe->heap.heap_cnt) {
 914			ts = spe->heap.heap_array[0].ordinal + 1;
 915			if (ts > timestamp)
 916				ts = timestamp;
 917		} else {
 918			ts = timestamp;
 919		}
 920
 921		/*
 922		 * A previous context-switch event has set pid/tid in the machine's context, so
 923		 * here we need to update the pid/tid in the thread and SPE queue.
 924		 */
 925		if (!spe->use_ctx_pkt_for_pid)
 926			arm_spe_set_pid_tid_cpu(spe, queue);
 927
 928		ret = arm_spe_run_decoder(speq, &ts);
 929		if (ret < 0) {
 930			auxtrace_heap__add(&spe->heap, queue_nr, ts);
 931			return ret;
 932		}
 933
 934		if (!ret) {
 935			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
 936			if (ret < 0)
 937				return ret;
 938		} else {
 939			speq->on_heap = false;
 940		}
 941	}
 942
 943	return 0;
 944}
 945
 946static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
 947					    u64 time_)
 948{
 949	struct auxtrace_queues *queues = &spe->queues;
 950	unsigned int i;
 951	u64 ts = 0;
 952
 953	for (i = 0; i < queues->nr_queues; i++) {
 954		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
 955		struct arm_spe_queue *speq = queue->priv;
 956
 957		if (speq && (tid == -1 || speq->tid == tid)) {
 958			speq->time = time_;
 959			arm_spe_set_pid_tid_cpu(spe, queue);
 960			arm_spe_run_decoder(speq, &ts);
 961		}
 962	}
 963	return 0;
 964}
 965
 966static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
 967				  struct perf_sample *sample)
 968{
 969	pid_t pid, tid;
 970	int cpu;
 971
 972	if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
 973		return 0;
 974
 975	pid = event->context_switch.next_prev_pid;
 976	tid = event->context_switch.next_prev_tid;
 977	cpu = sample->cpu;
 978
 979	if (tid == -1)
 980		pr_warning("context_switch event has no tid\n");
 981
 982	return machine__set_current_tid(spe->machine, cpu, pid, tid);
 983}
 984
 985static int arm_spe_process_event(struct perf_session *session,
 986				 union perf_event *event,
 987				 struct perf_sample *sample,
 988				 const struct perf_tool *tool)
 989{
 990	int err = 0;
 991	u64 timestamp;
 992	struct arm_spe *spe = container_of(session->auxtrace,
 993			struct arm_spe, auxtrace);
 994
 995	if (dump_trace)
 996		return 0;
 997
 998	if (!tool->ordered_events) {
 999		pr_err("SPE trace requires ordered events\n");
1000		return -EINVAL;
1001	}
1002
1003	if (sample->time && (sample->time != (u64) -1))
1004		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
1005	else
1006		timestamp = 0;
1007
1008	if (timestamp || spe->timeless_decoding) {
1009		err = arm_spe__update_queues(spe);
1010		if (err)
1011			return err;
1012	}
1013
1014	if (spe->timeless_decoding) {
1015		if (event->header.type == PERF_RECORD_EXIT) {
1016			err = arm_spe_process_timeless_queues(spe,
1017					event->fork.tid,
1018					sample->time);
1019		}
1020	} else if (timestamp) {
1021		err = arm_spe_process_queues(spe, timestamp);
1022		if (err)
1023			return err;
1024
1025		if (!spe->use_ctx_pkt_for_pid &&
1026		    (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
1027		    event->header.type == PERF_RECORD_SWITCH))
1028			err = arm_spe_context_switch(spe, event, sample);
1029	}
1030
1031	return err;
1032}
1033
1034static int arm_spe_process_auxtrace_event(struct perf_session *session,
1035					  union perf_event *event,
1036					  const struct perf_tool *tool __maybe_unused)
1037{
1038	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1039					     auxtrace);
1040
1041	if (!spe->data_queued) {
1042		struct auxtrace_buffer *buffer;
1043		off_t data_offset;
1044		int fd = perf_data__fd(session->data);
1045		int err;
1046
1047		if (perf_data__is_pipe(session->data)) {
1048			data_offset = 0;
1049		} else {
1050			data_offset = lseek(fd, 0, SEEK_CUR);
1051			if (data_offset == -1)
1052				return -errno;
1053		}
1054
1055		err = auxtrace_queues__add_event(&spe->queues, session, event,
1056				data_offset, &buffer);
1057		if (err)
1058			return err;
1059
1060		/* Dump here now we have copied a piped trace out of the pipe */
1061		if (dump_trace) {
1062			if (auxtrace_buffer__get_data(buffer, fd)) {
1063				arm_spe_dump_event(spe, buffer->data,
1064						buffer->size);
1065				auxtrace_buffer__put_data(buffer);
1066			}
1067		}
1068	}
1069
1070	return 0;
1071}
1072
1073static int arm_spe_flush(struct perf_session *session __maybe_unused,
1074			 const struct perf_tool *tool __maybe_unused)
1075{
1076	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1077			auxtrace);
1078	int ret;
1079
1080	if (dump_trace)
1081		return 0;
1082
1083	if (!tool->ordered_events)
1084		return -EINVAL;
1085
1086	ret = arm_spe__update_queues(spe);
1087	if (ret < 0)
1088		return ret;
1089
1090	if (spe->timeless_decoding)
1091		return arm_spe_process_timeless_queues(spe, -1,
1092				MAX_TIMESTAMP - 1);
1093
1094	ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
1095	if (ret)
1096		return ret;
1097
1098	if (!spe->use_ctx_pkt_for_pid)
1099		ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
1100			    "Matching of TIDs to SPE events could be inaccurate.\n");
1101
1102	return 0;
1103}
1104
1105static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size)
1106{
1107	u64 *metadata;
1108
1109	metadata = zalloc(per_cpu_size);
1110	if (!metadata)
1111		return NULL;
1112
1113	memcpy(metadata, buf, per_cpu_size);
1114	return metadata;
1115}
1116
1117static void arm_spe__free_metadata(u64 **metadata, int nr_cpu)
1118{
1119	int i;
1120
1121	for (i = 0; i < nr_cpu; i++)
1122		zfree(&metadata[i]);
1123	free(metadata);
1124}
1125
1126static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info,
1127				     u64 *ver, int *nr_cpu)
1128{
1129	u64 *ptr = (u64 *)info->priv;
1130	u64 metadata_size;
1131	u64 **metadata = NULL;
1132	int hdr_sz, per_cpu_sz, i;
1133
1134	metadata_size = info->header.size -
1135		sizeof(struct perf_record_auxtrace_info);
1136
1137	/* Metadata version 1 */
1138	if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) {
1139		*ver = 1;
1140		*nr_cpu = 0;
1141		/* No per CPU metadata */
1142		return NULL;
1143	}
1144
1145	*ver = ptr[ARM_SPE_HEADER_VERSION];
1146	hdr_sz = ptr[ARM_SPE_HEADER_SIZE];
1147	*nr_cpu = ptr[ARM_SPE_CPUS_NUM];
1148
1149	metadata = calloc(*nr_cpu, sizeof(*metadata));
1150	if (!metadata)
1151		return NULL;
1152
1153	/* Locate the start address of per CPU metadata */
1154	ptr += hdr_sz;
1155	per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu);
1156
1157	for (i = 0; i < *nr_cpu; i++) {
1158		metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz);
1159		if (!metadata[i])
1160			goto err_per_cpu_metadata;
1161
1162		ptr += per_cpu_sz / sizeof(u64);
1163	}
1164
1165	return metadata;
1166
1167err_per_cpu_metadata:
1168	arm_spe__free_metadata(metadata, *nr_cpu);
1169	return NULL;
1170}
1171
1172static void arm_spe_free_queue(void *priv)
1173{
1174	struct arm_spe_queue *speq = priv;
1175
1176	if (!speq)
1177		return;
1178	thread__zput(speq->thread);
1179	arm_spe_decoder_free(speq->decoder);
1180	zfree(&speq->event_buf);
1181	free(speq);
1182}
1183
1184static void arm_spe_free_events(struct perf_session *session)
1185{
1186	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1187					     auxtrace);
1188	struct auxtrace_queues *queues = &spe->queues;
1189	unsigned int i;
1190
1191	for (i = 0; i < queues->nr_queues; i++) {
1192		arm_spe_free_queue(queues->queue_array[i].priv);
1193		queues->queue_array[i].priv = NULL;
1194	}
1195	auxtrace_queues__free(queues);
1196}
1197
1198static void arm_spe_free(struct perf_session *session)
1199{
1200	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1201					     auxtrace);
1202
1203	auxtrace_heap__free(&spe->heap);
1204	arm_spe_free_events(session);
1205	session->auxtrace = NULL;
1206	arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu);
1207	free(spe);
1208}
1209
1210static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
1211				      struct evsel *evsel)
1212{
1213	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
1214
1215	return evsel->core.attr.type == spe->pmu_type;
1216}
1217
1218static const char * const metadata_hdr_v1_fmts[] = {
1219	[ARM_SPE_PMU_TYPE]		= "  PMU Type           :%"PRId64"\n",
1220	[ARM_SPE_PER_CPU_MMAPS]		= "  Per CPU mmaps      :%"PRId64"\n",
1221};
1222
1223static const char * const metadata_hdr_fmts[] = {
1224	[ARM_SPE_HEADER_VERSION]	= "  Header version     :%"PRId64"\n",
1225	[ARM_SPE_HEADER_SIZE]		= "  Header size        :%"PRId64"\n",
1226	[ARM_SPE_PMU_TYPE_V2]		= "  PMU type v2        :%"PRId64"\n",
1227	[ARM_SPE_CPUS_NUM]		= "  CPU number         :%"PRId64"\n",
1228};
 
1229
1230static const char * const metadata_per_cpu_fmts[] = {
1231	[ARM_SPE_MAGIC]			= "    Magic            :0x%"PRIx64"\n",
1232	[ARM_SPE_CPU]			= "    CPU #            :%"PRId64"\n",
1233	[ARM_SPE_CPU_NR_PARAMS]		= "    Num of params    :%"PRId64"\n",
1234	[ARM_SPE_CPU_MIDR]		= "    MIDR             :0x%"PRIx64"\n",
1235	[ARM_SPE_CPU_PMU_TYPE]		= "    PMU Type         :%"PRId64"\n",
1236	[ARM_SPE_CAP_MIN_IVAL]		= "    Min Interval     :%"PRId64"\n",
1237};
1238
1239static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr)
 
 
 
1240{
1241	unsigned int i, cpu, hdr_size, cpu_num, cpu_size;
1242	const char * const *hdr_fmts;
1243
1244	if (!dump_trace)
1245		return;
 
1246
1247	if (spe->metadata_ver == 1) {
1248		cpu_num = 0;
1249		hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX;
1250		hdr_fmts = metadata_hdr_v1_fmts;
1251	} else {
1252		cpu_num = arr[ARM_SPE_CPUS_NUM];
1253		hdr_size = arr[ARM_SPE_HEADER_SIZE];
1254		hdr_fmts = metadata_hdr_fmts;
1255	}
1256
1257	for (i = 0; i < hdr_size; i++)
1258		fprintf(stdout, hdr_fmts[i], arr[i]);
1259
1260	arr += hdr_size;
1261	for (cpu = 0; cpu < cpu_num; cpu++) {
1262		/*
1263		 * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS
1264		 * are fixed. The sequential parameter size is decided by the
1265		 * field 'ARM_SPE_CPU_NR_PARAMS'.
1266		 */
1267		cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS];
1268		for (i = 0; i < cpu_size; i++)
1269			fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]);
1270		arr += cpu_size;
1271	}
1272}
1273
1274static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
1275				    const char *name)
1276{
1277	struct evsel *evsel;
1278
1279	evlist__for_each_entry(evlist, evsel) {
1280		if (evsel->core.id && evsel->core.id[0] == id) {
1281			if (evsel->name)
1282				zfree(&evsel->name);
1283			evsel->name = strdup(name);
1284			break;
1285		}
1286	}
1287}
1288
1289static int
1290arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
1291{
1292	struct evlist *evlist = session->evlist;
1293	struct evsel *evsel;
1294	struct perf_event_attr attr;
1295	bool found = false;
1296	u64 id;
1297	int err;
1298
1299	evlist__for_each_entry(evlist, evsel) {
1300		if (evsel->core.attr.type == spe->pmu_type) {
1301			found = true;
1302			break;
1303		}
1304	}
1305
1306	if (!found) {
1307		pr_debug("No selected events with SPE trace data\n");
1308		return 0;
1309	}
1310
1311	memset(&attr, 0, sizeof(struct perf_event_attr));
1312	attr.size = sizeof(struct perf_event_attr);
1313	attr.type = PERF_TYPE_HARDWARE;
1314	attr.sample_type = evsel->core.attr.sample_type &
1315				(PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
1316	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1317			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
1318			    PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
1319	if (spe->timeless_decoding)
1320		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1321	else
1322		attr.sample_type |= PERF_SAMPLE_TIME;
1323
1324	spe->sample_type = attr.sample_type;
1325
1326	attr.exclude_user = evsel->core.attr.exclude_user;
1327	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1328	attr.exclude_hv = evsel->core.attr.exclude_hv;
1329	attr.exclude_host = evsel->core.attr.exclude_host;
1330	attr.exclude_guest = evsel->core.attr.exclude_guest;
1331	attr.sample_id_all = evsel->core.attr.sample_id_all;
1332	attr.read_format = evsel->core.attr.read_format;
1333
1334	/* create new id val to be a fixed offset from evsel id */
1335	id = evsel->core.id[0] + 1000000000;
1336
1337	if (!id)
1338		id = 1;
1339
1340	if (spe->synth_opts.flc) {
1341		spe->sample_flc = true;
1342
1343		/* Level 1 data cache miss */
1344		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1345		if (err)
1346			return err;
1347		spe->l1d_miss_id = id;
1348		arm_spe_set_event_name(evlist, id, "l1d-miss");
1349		id += 1;
1350
1351		/* Level 1 data cache access */
1352		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1353		if (err)
1354			return err;
1355		spe->l1d_access_id = id;
1356		arm_spe_set_event_name(evlist, id, "l1d-access");
1357		id += 1;
1358	}
1359
1360	if (spe->synth_opts.llc) {
1361		spe->sample_llc = true;
1362
1363		/* Last level cache miss */
1364		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1365		if (err)
1366			return err;
1367		spe->llc_miss_id = id;
1368		arm_spe_set_event_name(evlist, id, "llc-miss");
1369		id += 1;
1370
1371		/* Last level cache access */
1372		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1373		if (err)
1374			return err;
1375		spe->llc_access_id = id;
1376		arm_spe_set_event_name(evlist, id, "llc-access");
1377		id += 1;
1378	}
1379
1380	if (spe->synth_opts.tlb) {
1381		spe->sample_tlb = true;
1382
1383		/* TLB miss */
1384		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1385		if (err)
1386			return err;
1387		spe->tlb_miss_id = id;
1388		arm_spe_set_event_name(evlist, id, "tlb-miss");
1389		id += 1;
1390
1391		/* TLB access */
1392		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1393		if (err)
1394			return err;
1395		spe->tlb_access_id = id;
1396		arm_spe_set_event_name(evlist, id, "tlb-access");
1397		id += 1;
1398	}
1399
1400	if (spe->synth_opts.branches) {
1401		spe->sample_branch = true;
1402
1403		/* Branch */
1404		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1405		if (err)
1406			return err;
1407		spe->branch_id = id;
1408		arm_spe_set_event_name(evlist, id, "branch");
1409		id += 1;
1410	}
1411
1412	if (spe->synth_opts.remote_access) {
1413		spe->sample_remote_access = true;
1414
1415		/* Remote access */
1416		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1417		if (err)
1418			return err;
1419		spe->remote_access_id = id;
1420		arm_spe_set_event_name(evlist, id, "remote-access");
1421		id += 1;
1422	}
1423
1424	if (spe->synth_opts.mem) {
1425		spe->sample_memory = true;
1426
1427		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1428		if (err)
1429			return err;
1430		spe->memory_id = id;
1431		arm_spe_set_event_name(evlist, id, "memory");
1432		id += 1;
1433	}
1434
1435	if (spe->synth_opts.instructions) {
1436		if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
1437			pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
1438			goto synth_instructions_out;
1439		}
1440		if (spe->synth_opts.period > 1)
1441			pr_warning("Arm SPE has a hardware-based sample period.\n"
1442				   "Additional instruction events will be discarded by --itrace\n");
1443
1444		spe->sample_instructions = true;
1445		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1446		attr.sample_period = spe->synth_opts.period;
1447		spe->instructions_sample_period = attr.sample_period;
1448		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1449		if (err)
1450			return err;
1451		spe->instructions_id = id;
1452		arm_spe_set_event_name(evlist, id, "instructions");
1453	}
1454synth_instructions_out:
1455
1456	return 0;
1457}
1458
1459static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu)
1460{
1461	u64 midr;
1462	int i;
1463
1464	if (!nr_cpu)
1465		return false;
1466
1467	for (i = 0; i < nr_cpu; i++) {
1468		if (!metadata[i])
1469			return false;
1470
1471		if (i == 0) {
1472			midr = metadata[i][ARM_SPE_CPU_MIDR];
1473			continue;
1474		}
1475
1476		if (midr != metadata[i][ARM_SPE_CPU_MIDR])
1477			return false;
1478	}
1479
1480	return true;
1481}
1482
1483int arm_spe_process_auxtrace_info(union perf_event *event,
1484				  struct perf_session *session)
1485{
1486	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1487	size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE;
1488	struct perf_record_time_conv *tc = &session->time_conv;
 
 
1489	struct arm_spe *spe;
1490	u64 **metadata = NULL;
1491	u64 metadata_ver;
1492	int nr_cpu, err;
1493
1494	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1495					min_sz)
1496		return -EINVAL;
1497
1498	metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver,
1499					   &nr_cpu);
1500	if (!metadata && metadata_ver != 1) {
1501		pr_err("Failed to parse Arm SPE metadata.\n");
1502		return -EINVAL;
1503	}
1504
1505	spe = zalloc(sizeof(struct arm_spe));
1506	if (!spe) {
1507		err = -ENOMEM;
1508		goto err_free_metadata;
1509	}
1510
1511	err = auxtrace_queues__init(&spe->queues);
1512	if (err)
1513		goto err_free;
1514
1515	spe->session = session;
1516	spe->machine = &session->machines.host; /* No kvm support */
1517	spe->auxtrace_type = auxtrace_info->type;
1518	if (metadata_ver == 1)
1519		spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1520	else
1521		spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2];
1522	spe->metadata = metadata;
1523	spe->metadata_ver = metadata_ver;
1524	spe->metadata_nr_cpu = nr_cpu;
1525	spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu);
1526
1527	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1528
1529	/*
1530	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1531	 * and the parameters for hardware clock are stored in the session
1532	 * context.  Passes these parameters to the struct perf_tsc_conversion
1533	 * in "spe->tc", which is used for later conversion between clock
1534	 * counter and timestamp.
1535	 *
1536	 * For backward compatibility, copies the fields starting from
1537	 * "time_cycles" only if they are contained in the event.
1538	 */
1539	spe->tc.time_shift = tc->time_shift;
1540	spe->tc.time_mult = tc->time_mult;
1541	spe->tc.time_zero = tc->time_zero;
1542
1543	if (event_contains(*tc, time_cycles)) {
1544		spe->tc.time_cycles = tc->time_cycles;
1545		spe->tc.time_mask = tc->time_mask;
1546		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1547		spe->tc.cap_user_time_short = tc->cap_user_time_short;
1548	}
1549
1550	spe->auxtrace.process_event = arm_spe_process_event;
1551	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1552	spe->auxtrace.flush_events = arm_spe_flush;
1553	spe->auxtrace.free_events = arm_spe_free_events;
1554	spe->auxtrace.free = arm_spe_free;
1555	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1556	session->auxtrace = &spe->auxtrace;
1557
1558	arm_spe_print_info(spe, &auxtrace_info->priv[0]);
1559
1560	if (dump_trace)
1561		return 0;
1562
1563	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1564		spe->synth_opts = *session->itrace_synth_opts;
1565	else
1566		itrace_synth_opts__set_default(&spe->synth_opts, false);
1567
1568	err = arm_spe_synth_events(spe, session);
1569	if (err)
1570		goto err_free_queues;
1571
1572	err = auxtrace_queues__process_index(&spe->queues, session);
1573	if (err)
1574		goto err_free_queues;
1575
1576	if (spe->queues.populated)
1577		spe->data_queued = true;
1578
1579	return 0;
1580
1581err_free_queues:
1582	auxtrace_queues__free(&spe->queues);
1583	session->auxtrace = NULL;
1584err_free:
1585	free(spe);
1586err_free_metadata:
1587	arm_spe__free_metadata(metadata, nr_cpu);
1588	return err;
1589}