Linux Audio

Check our new training course

Loading...
v5.4
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Arm Statistical Profiling Extensions (SPE) support
  4 * Copyright (c) 2017-2018, Arm Ltd.
  5 */
  6
 
  7#include <endian.h>
  8#include <errno.h>
  9#include <byteswap.h>
 10#include <inttypes.h>
 11#include <unistd.h>
 12#include <stdlib.h>
 13#include <linux/kernel.h>
 14#include <linux/types.h>
 15#include <linux/bitops.h>
 
 16#include <linux/log2.h>
 
 17#include <linux/zalloc.h>
 
 
 18
 
 19#include "color.h"
 
 
 20#include "evsel.h"
 21#include "machine.h"
 22#include "session.h"
 23#include "debug.h"
 24#include "auxtrace.h"
 
 
 
 
 
 25#include "arm-spe.h"
 26#include "arm-spe-pkt-decoder.h"
 
 
 
 
 27
 28struct arm_spe {
 29	struct auxtrace			auxtrace;
 30	struct auxtrace_queues		queues;
 31	struct auxtrace_heap		heap;
 
 32	u32				auxtrace_type;
 33	struct perf_session		*session;
 34	struct machine			*machine;
 35	u32				pmu_type;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 36};
 37
 38struct arm_spe_queue {
 39	struct arm_spe		*spe;
 40	unsigned int		queue_nr;
 41	struct auxtrace_buffer	*buffer;
 42	bool			on_heap;
 43	bool			done;
 44	pid_t			pid;
 45	pid_t			tid;
 46	int			cpu;
 
 
 
 
 
 
 
 47};
 48
 49static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
 50			 unsigned char *buf, size_t len)
 51{
 52	struct arm_spe_pkt packet;
 53	size_t pos = 0;
 54	int ret, pkt_len, i;
 55	char desc[ARM_SPE_PKT_DESC_MAX];
 56	const char *color = PERF_COLOR_BLUE;
 57
 58	color_fprintf(stdout, color,
 59		      ". ... ARM SPE data: size %zu bytes\n",
 60		      len);
 61
 62	while (len) {
 63		ret = arm_spe_get_packet(buf, len, &packet);
 64		if (ret > 0)
 65			pkt_len = ret;
 66		else
 67			pkt_len = 1;
 68		printf(".");
 69		color_fprintf(stdout, color, "  %08x: ", pos);
 70		for (i = 0; i < pkt_len; i++)
 71			color_fprintf(stdout, color, " %02x", buf[i]);
 72		for (; i < 16; i++)
 73			color_fprintf(stdout, color, "   ");
 74		if (ret > 0) {
 75			ret = arm_spe_pkt_desc(&packet, desc,
 76					       ARM_SPE_PKT_DESC_MAX);
 77			if (ret > 0)
 78				color_fprintf(stdout, color, " %s\n", desc);
 79		} else {
 80			color_fprintf(stdout, color, " Bad packet!\n");
 81		}
 82		pos += pkt_len;
 83		buf += pkt_len;
 84		len -= pkt_len;
 85	}
 86}
 87
 88static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
 89			       size_t len)
 90{
 91	printf(".\n");
 92	arm_spe_dump(spe, buf, len);
 93}
 94
 95static int arm_spe_process_event(struct perf_session *session __maybe_unused,
 96				 union perf_event *event __maybe_unused,
 97				 struct perf_sample *sample __maybe_unused,
 98				 struct perf_tool *tool __maybe_unused)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 99{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100	return 0;
101}
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103static int arm_spe_process_auxtrace_event(struct perf_session *session,
104					  union perf_event *event,
105					  struct perf_tool *tool __maybe_unused)
106{
107	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
108					     auxtrace);
109	struct auxtrace_buffer *buffer;
110	off_t data_offset;
111	int fd = perf_data__fd(session->data);
112	int err;
113
114	if (perf_data__is_pipe(session->data)) {
115		data_offset = 0;
116	} else {
117		data_offset = lseek(fd, 0, SEEK_CUR);
118		if (data_offset == -1)
119			return -errno;
120	}
121
122	err = auxtrace_queues__add_event(&spe->queues, session, event,
123					 data_offset, &buffer);
124	if (err)
125		return err;
 
 
 
126
127	/* Dump here now we have copied a piped trace out of the pipe */
128	if (dump_trace) {
129		if (auxtrace_buffer__get_data(buffer, fd)) {
130			arm_spe_dump_event(spe, buffer->data,
131					     buffer->size);
132			auxtrace_buffer__put_data(buffer);
 
 
 
 
 
 
133		}
134	}
135
136	return 0;
137}
138
139static int arm_spe_flush(struct perf_session *session __maybe_unused,
140			 struct perf_tool *tool __maybe_unused)
141{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142	return 0;
143}
144
145static void arm_spe_free_queue(void *priv)
146{
147	struct arm_spe_queue *speq = priv;
148
149	if (!speq)
150		return;
 
 
 
151	free(speq);
152}
153
154static void arm_spe_free_events(struct perf_session *session)
155{
156	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
157					     auxtrace);
158	struct auxtrace_queues *queues = &spe->queues;
159	unsigned int i;
160
161	for (i = 0; i < queues->nr_queues; i++) {
162		arm_spe_free_queue(queues->queue_array[i].priv);
163		queues->queue_array[i].priv = NULL;
164	}
165	auxtrace_queues__free(queues);
166}
167
168static void arm_spe_free(struct perf_session *session)
169{
170	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
171					     auxtrace);
172
173	auxtrace_heap__free(&spe->heap);
174	arm_spe_free_events(session);
175	session->auxtrace = NULL;
176	free(spe);
177}
178
 
 
 
 
 
 
 
 
179static const char * const arm_spe_info_fmts[] = {
180	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
181};
182
183static void arm_spe_print_info(__u64 *arr)
184{
185	if (!dump_trace)
186		return;
187
188	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
189}
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191int arm_spe_process_auxtrace_info(union perf_event *event,
192				  struct perf_session *session)
193{
194	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
195	size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE;
 
 
 
196	struct arm_spe *spe;
197	int err;
198
199	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
200					min_sz)
201		return -EINVAL;
202
203	spe = zalloc(sizeof(struct arm_spe));
204	if (!spe)
205		return -ENOMEM;
206
207	err = auxtrace_queues__init(&spe->queues);
208	if (err)
209		goto err_free;
210
211	spe->session = session;
212	spe->machine = &session->machines.host; /* No kvm support */
213	spe->auxtrace_type = auxtrace_info->type;
214	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
216	spe->auxtrace.process_event = arm_spe_process_event;
217	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
218	spe->auxtrace.flush_events = arm_spe_flush;
219	spe->auxtrace.free_events = arm_spe_free_events;
220	spe->auxtrace.free = arm_spe_free;
 
221	session->auxtrace = &spe->auxtrace;
222
223	arm_spe_print_info(&auxtrace_info->priv[0]);
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225	return 0;
226
 
 
 
227err_free:
228	free(spe);
229	return err;
230}
v6.8
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Arm Statistical Profiling Extensions (SPE) support
   4 * Copyright (c) 2017-2018, Arm Ltd.
   5 */
   6
   7#include <byteswap.h>
   8#include <endian.h>
   9#include <errno.h>
 
  10#include <inttypes.h>
 
 
 
 
  11#include <linux/bitops.h>
  12#include <linux/kernel.h>
  13#include <linux/log2.h>
  14#include <linux/types.h>
  15#include <linux/zalloc.h>
  16#include <stdlib.h>
  17#include <unistd.h>
  18
  19#include "auxtrace.h"
  20#include "color.h"
  21#include "debug.h"
  22#include "evlist.h"
  23#include "evsel.h"
  24#include "machine.h"
  25#include "session.h"
  26#include "symbol.h"
  27#include "thread.h"
  28#include "thread-stack.h"
  29#include "tsc.h"
  30#include "tool.h"
  31#include "util/synthetic-events.h"
  32
  33#include "arm-spe.h"
  34#include "arm-spe-decoder/arm-spe-decoder.h"
  35#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
  36
  37#include "../../arch/arm64/include/asm/cputype.h"
  38#define MAX_TIMESTAMP (~0ULL)
  39
  40struct arm_spe {
  41	struct auxtrace			auxtrace;
  42	struct auxtrace_queues		queues;
  43	struct auxtrace_heap		heap;
  44	struct itrace_synth_opts        synth_opts;
  45	u32				auxtrace_type;
  46	struct perf_session		*session;
  47	struct machine			*machine;
  48	u32				pmu_type;
  49	u64				midr;
  50
  51	struct perf_tsc_conversion	tc;
  52
  53	u8				timeless_decoding;
  54	u8				data_queued;
  55
  56	u64				sample_type;
  57	u8				sample_flc;
  58	u8				sample_llc;
  59	u8				sample_tlb;
  60	u8				sample_branch;
  61	u8				sample_remote_access;
  62	u8				sample_memory;
  63	u8				sample_instructions;
  64	u64				instructions_sample_period;
  65
  66	u64				l1d_miss_id;
  67	u64				l1d_access_id;
  68	u64				llc_miss_id;
  69	u64				llc_access_id;
  70	u64				tlb_miss_id;
  71	u64				tlb_access_id;
  72	u64				branch_miss_id;
  73	u64				remote_access_id;
  74	u64				memory_id;
  75	u64				instructions_id;
  76
  77	u64				kernel_start;
  78
  79	unsigned long			num_events;
  80	u8				use_ctx_pkt_for_pid;
  81};
  82
  83struct arm_spe_queue {
  84	struct arm_spe			*spe;
  85	unsigned int			queue_nr;
  86	struct auxtrace_buffer		*buffer;
  87	struct auxtrace_buffer		*old_buffer;
  88	union perf_event		*event_buf;
  89	bool				on_heap;
  90	bool				done;
  91	pid_t				pid;
  92	pid_t				tid;
  93	int				cpu;
  94	struct arm_spe_decoder		*decoder;
  95	u64				time;
  96	u64				timestamp;
  97	struct thread			*thread;
  98	u64				period_instructions;
  99};
 100
 101static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
 102			 unsigned char *buf, size_t len)
 103{
 104	struct arm_spe_pkt packet;
 105	size_t pos = 0;
 106	int ret, pkt_len, i;
 107	char desc[ARM_SPE_PKT_DESC_MAX];
 108	const char *color = PERF_COLOR_BLUE;
 109
 110	color_fprintf(stdout, color,
 111		      ". ... ARM SPE data: size %#zx bytes\n",
 112		      len);
 113
 114	while (len) {
 115		ret = arm_spe_get_packet(buf, len, &packet);
 116		if (ret > 0)
 117			pkt_len = ret;
 118		else
 119			pkt_len = 1;
 120		printf(".");
 121		color_fprintf(stdout, color, "  %08x: ", pos);
 122		for (i = 0; i < pkt_len; i++)
 123			color_fprintf(stdout, color, " %02x", buf[i]);
 124		for (; i < 16; i++)
 125			color_fprintf(stdout, color, "   ");
 126		if (ret > 0) {
 127			ret = arm_spe_pkt_desc(&packet, desc,
 128					       ARM_SPE_PKT_DESC_MAX);
 129			if (!ret)
 130				color_fprintf(stdout, color, " %s\n", desc);
 131		} else {
 132			color_fprintf(stdout, color, " Bad packet!\n");
 133		}
 134		pos += pkt_len;
 135		buf += pkt_len;
 136		len -= pkt_len;
 137	}
 138}
 139
 140static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
 141			       size_t len)
 142{
 143	printf(".\n");
 144	arm_spe_dump(spe, buf, len);
 145}
 146
 147static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
 148{
 149	struct arm_spe_queue *speq = data;
 150	struct auxtrace_buffer *buffer = speq->buffer;
 151	struct auxtrace_buffer *old_buffer = speq->old_buffer;
 152	struct auxtrace_queue *queue;
 153
 154	queue = &speq->spe->queues.queue_array[speq->queue_nr];
 155
 156	buffer = auxtrace_buffer__next(queue, buffer);
 157	/* If no more data, drop the previous auxtrace_buffer and return */
 158	if (!buffer) {
 159		if (old_buffer)
 160			auxtrace_buffer__drop_data(old_buffer);
 161		b->len = 0;
 162		return 0;
 163	}
 164
 165	speq->buffer = buffer;
 166
 167	/* If the aux_buffer doesn't have data associated, try to load it */
 168	if (!buffer->data) {
 169		/* get the file desc associated with the perf data file */
 170		int fd = perf_data__fd(speq->spe->session->data);
 171
 172		buffer->data = auxtrace_buffer__get_data(buffer, fd);
 173		if (!buffer->data)
 174			return -ENOMEM;
 175	}
 176
 177	b->len = buffer->size;
 178	b->buf = buffer->data;
 179
 180	if (b->len) {
 181		if (old_buffer)
 182			auxtrace_buffer__drop_data(old_buffer);
 183		speq->old_buffer = buffer;
 184	} else {
 185		auxtrace_buffer__drop_data(buffer);
 186		return arm_spe_get_trace(b, data);
 187	}
 188
 189	return 0;
 190}
 191
 192static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
 193		unsigned int queue_nr)
 194{
 195	struct arm_spe_params params = { .get_trace = 0, };
 196	struct arm_spe_queue *speq;
 197
 198	speq = zalloc(sizeof(*speq));
 199	if (!speq)
 200		return NULL;
 201
 202	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
 203	if (!speq->event_buf)
 204		goto out_free;
 205
 206	speq->spe = spe;
 207	speq->queue_nr = queue_nr;
 208	speq->pid = -1;
 209	speq->tid = -1;
 210	speq->cpu = -1;
 211	speq->period_instructions = 0;
 212
 213	/* params set */
 214	params.get_trace = arm_spe_get_trace;
 215	params.data = speq;
 216
 217	/* create new decoder */
 218	speq->decoder = arm_spe_decoder_new(&params);
 219	if (!speq->decoder)
 220		goto out_free;
 221
 222	return speq;
 223
 224out_free:
 225	zfree(&speq->event_buf);
 226	free(speq);
 227
 228	return NULL;
 229}
 230
 231static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
 232{
 233	return ip >= spe->kernel_start ?
 234		PERF_RECORD_MISC_KERNEL :
 235		PERF_RECORD_MISC_USER;
 236}
 237
 238static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
 239				    struct auxtrace_queue *queue)
 240{
 241	struct arm_spe_queue *speq = queue->priv;
 242	pid_t tid;
 243
 244	tid = machine__get_current_tid(spe->machine, speq->cpu);
 245	if (tid != -1) {
 246		speq->tid = tid;
 247		thread__zput(speq->thread);
 248	} else
 249		speq->tid = queue->tid;
 250
 251	if ((!speq->thread) && (speq->tid != -1)) {
 252		speq->thread = machine__find_thread(spe->machine, -1,
 253						    speq->tid);
 254	}
 255
 256	if (speq->thread) {
 257		speq->pid = thread__pid(speq->thread);
 258		if (queue->cpu == -1)
 259			speq->cpu = thread__cpu(speq->thread);
 260	}
 261}
 262
 263static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
 264{
 265	struct arm_spe *spe = speq->spe;
 266	int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
 267
 268	if (err)
 269		return err;
 270
 271	arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
 272
 273	return 0;
 274}
 275
 276static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record)
 277{
 278	struct simd_flags simd_flags = {};
 279
 280	if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST))
 281		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
 282
 283	if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER))
 284		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
 285
 286	if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
 287		simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL;
 288
 289	if (record->type & ARM_SPE_SVE_EMPTY_PRED)
 290		simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY;
 291
 292	return simd_flags;
 293}
 294
 295static void arm_spe_prep_sample(struct arm_spe *spe,
 296				struct arm_spe_queue *speq,
 297				union perf_event *event,
 298				struct perf_sample *sample)
 299{
 300	struct arm_spe_record *record = &speq->decoder->record;
 301
 302	if (!spe->timeless_decoding)
 303		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
 304
 305	sample->ip = record->from_ip;
 306	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
 307	sample->pid = speq->pid;
 308	sample->tid = speq->tid;
 309	sample->period = 1;
 310	sample->cpu = speq->cpu;
 311	sample->simd_flags = arm_spe__synth_simd_flags(record);
 312
 313	event->sample.header.type = PERF_RECORD_SAMPLE;
 314	event->sample.header.misc = sample->cpumode;
 315	event->sample.header.size = sizeof(struct perf_event_header);
 316}
 317
 318static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
 319{
 320	event->header.size = perf_event__sample_event_size(sample, type, 0);
 321	return perf_event__synthesize_sample(event, type, 0, sample);
 322}
 323
 324static inline int
 325arm_spe_deliver_synth_event(struct arm_spe *spe,
 326			    struct arm_spe_queue *speq __maybe_unused,
 327			    union perf_event *event,
 328			    struct perf_sample *sample)
 329{
 330	int ret;
 331
 332	if (spe->synth_opts.inject) {
 333		ret = arm_spe__inject_event(event, sample, spe->sample_type);
 334		if (ret)
 335			return ret;
 336	}
 337
 338	ret = perf_session__deliver_synth_event(spe->session, event, sample);
 339	if (ret)
 340		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
 341
 342	return ret;
 343}
 344
 345static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
 346				     u64 spe_events_id, u64 data_src)
 347{
 348	struct arm_spe *spe = speq->spe;
 349	struct arm_spe_record *record = &speq->decoder->record;
 350	union perf_event *event = speq->event_buf;
 351	struct perf_sample sample = { .ip = 0, };
 352
 353	arm_spe_prep_sample(spe, speq, event, &sample);
 354
 355	sample.id = spe_events_id;
 356	sample.stream_id = spe_events_id;
 357	sample.addr = record->virt_addr;
 358	sample.phys_addr = record->phys_addr;
 359	sample.data_src = data_src;
 360	sample.weight = record->latency;
 361
 362	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 363}
 364
 365static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
 366					u64 spe_events_id)
 367{
 368	struct arm_spe *spe = speq->spe;
 369	struct arm_spe_record *record = &speq->decoder->record;
 370	union perf_event *event = speq->event_buf;
 371	struct perf_sample sample = { .ip = 0, };
 372
 373	arm_spe_prep_sample(spe, speq, event, &sample);
 374
 375	sample.id = spe_events_id;
 376	sample.stream_id = spe_events_id;
 377	sample.addr = record->to_ip;
 378	sample.weight = record->latency;
 379
 380	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 381}
 382
 383static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
 384					     u64 spe_events_id, u64 data_src)
 385{
 386	struct arm_spe *spe = speq->spe;
 387	struct arm_spe_record *record = &speq->decoder->record;
 388	union perf_event *event = speq->event_buf;
 389	struct perf_sample sample = { .ip = 0, };
 390
 391	/*
 392	 * Handles perf instruction sampling period.
 393	 */
 394	speq->period_instructions++;
 395	if (speq->period_instructions < spe->instructions_sample_period)
 396		return 0;
 397	speq->period_instructions = 0;
 398
 399	arm_spe_prep_sample(spe, speq, event, &sample);
 400
 401	sample.id = spe_events_id;
 402	sample.stream_id = spe_events_id;
 403	sample.addr = record->virt_addr;
 404	sample.phys_addr = record->phys_addr;
 405	sample.data_src = data_src;
 406	sample.period = spe->instructions_sample_period;
 407	sample.weight = record->latency;
 408
 409	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 410}
 411
 412static const struct midr_range neoverse_spe[] = {
 413	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
 414	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
 415	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
 416	{},
 417};
 418
 419static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record,
 420						union perf_mem_data_src *data_src)
 421{
 422	/*
 423	 * Even though four levels of cache hierarchy are possible, no known
 424	 * production Neoverse systems currently include more than three levels
 425	 * so for the time being we assume three exist. If a production system
 426	 * is built with four the this function would have to be changed to
 427	 * detect the number of levels for reporting.
 428	 */
 429
 430	/*
 431	 * We have no data on the hit level or data source for stores in the
 432	 * Neoverse SPE records.
 433	 */
 434	if (record->op & ARM_SPE_OP_ST) {
 435		data_src->mem_lvl = PERF_MEM_LVL_NA;
 436		data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
 437		data_src->mem_snoop = PERF_MEM_SNOOP_NA;
 438		return;
 439	}
 440
 441	switch (record->source) {
 442	case ARM_SPE_NV_L1D:
 443		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
 444		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
 445		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
 446		break;
 447	case ARM_SPE_NV_L2:
 448		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
 449		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
 450		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
 451		break;
 452	case ARM_SPE_NV_PEER_CORE:
 453		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
 454		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
 455		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
 456		break;
 457	/*
 458	 * We don't know if this is L1, L2 but we do know it was a cache-2-cache
 459	 * transfer, so set SNOOPX_PEER
 460	 */
 461	case ARM_SPE_NV_LOCAL_CLUSTER:
 462	case ARM_SPE_NV_PEER_CLUSTER:
 463		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
 464		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
 465		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
 466		break;
 467	/*
 468	 * System cache is assumed to be L3
 469	 */
 470	case ARM_SPE_NV_SYS_CACHE:
 471		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
 472		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
 473		data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
 474		break;
 475	/*
 476	 * We don't know what level it hit in, except it came from the other
 477	 * socket
 478	 */
 479	case ARM_SPE_NV_REMOTE:
 480		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
 481		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
 482		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
 483		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
 484		break;
 485	case ARM_SPE_NV_DRAM:
 486		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
 487		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
 488		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
 489		break;
 490	default:
 491		break;
 492	}
 493}
 494
 495static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record,
 496					       union perf_mem_data_src *data_src)
 497{
 498	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
 499		data_src->mem_lvl = PERF_MEM_LVL_L3;
 500
 501		if (record->type & ARM_SPE_LLC_MISS)
 502			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
 503		else
 504			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
 505	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
 506		data_src->mem_lvl = PERF_MEM_LVL_L1;
 507
 508		if (record->type & ARM_SPE_L1D_MISS)
 509			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
 510		else
 511			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
 512	}
 513
 514	if (record->type & ARM_SPE_REMOTE_ACCESS)
 515		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
 516}
 517
 518static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
 519{
 520	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
 521	bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
 522
 523	if (record->op & ARM_SPE_OP_LD)
 524		data_src.mem_op = PERF_MEM_OP_LOAD;
 525	else if (record->op & ARM_SPE_OP_ST)
 526		data_src.mem_op = PERF_MEM_OP_STORE;
 527	else
 528		return 0;
 529
 530	if (is_neoverse)
 531		arm_spe__synth_data_source_neoverse(record, &data_src);
 532	else
 533		arm_spe__synth_data_source_generic(record, &data_src);
 534
 535	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
 536		data_src.mem_dtlb = PERF_MEM_TLB_WK;
 537
 538		if (record->type & ARM_SPE_TLB_MISS)
 539			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
 540		else
 541			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
 542	}
 543
 544	return data_src.val;
 545}
 546
 547static int arm_spe_sample(struct arm_spe_queue *speq)
 548{
 549	const struct arm_spe_record *record = &speq->decoder->record;
 550	struct arm_spe *spe = speq->spe;
 551	u64 data_src;
 552	int err;
 553
 554	data_src = arm_spe__synth_data_source(record, spe->midr);
 555
 556	if (spe->sample_flc) {
 557		if (record->type & ARM_SPE_L1D_MISS) {
 558			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
 559							data_src);
 560			if (err)
 561				return err;
 562		}
 563
 564		if (record->type & ARM_SPE_L1D_ACCESS) {
 565			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
 566							data_src);
 567			if (err)
 568				return err;
 569		}
 570	}
 571
 572	if (spe->sample_llc) {
 573		if (record->type & ARM_SPE_LLC_MISS) {
 574			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
 575							data_src);
 576			if (err)
 577				return err;
 578		}
 579
 580		if (record->type & ARM_SPE_LLC_ACCESS) {
 581			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
 582							data_src);
 583			if (err)
 584				return err;
 585		}
 586	}
 587
 588	if (spe->sample_tlb) {
 589		if (record->type & ARM_SPE_TLB_MISS) {
 590			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
 591							data_src);
 592			if (err)
 593				return err;
 594		}
 595
 596		if (record->type & ARM_SPE_TLB_ACCESS) {
 597			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
 598							data_src);
 599			if (err)
 600				return err;
 601		}
 602	}
 603
 604	if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
 605		err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
 606		if (err)
 607			return err;
 608	}
 609
 610	if (spe->sample_remote_access &&
 611	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
 612		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
 613						data_src);
 614		if (err)
 615			return err;
 616	}
 617
 618	/*
 619	 * When data_src is zero it means the record is not a memory operation,
 620	 * skip to synthesize memory sample for this case.
 621	 */
 622	if (spe->sample_memory && data_src) {
 623		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
 624		if (err)
 625			return err;
 626	}
 627
 628	if (spe->sample_instructions) {
 629		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
 630		if (err)
 631			return err;
 632	}
 633
 634	return 0;
 635}
 636
 637static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
 638{
 639	struct arm_spe *spe = speq->spe;
 640	struct arm_spe_record *record;
 641	int ret;
 642
 643	if (!spe->kernel_start)
 644		spe->kernel_start = machine__kernel_start(spe->machine);
 645
 646	while (1) {
 647		/*
 648		 * The usual logic is firstly to decode the packets, and then
 649		 * based the record to synthesize sample; but here the flow is
 650		 * reversed: it calls arm_spe_sample() for synthesizing samples
 651		 * prior to arm_spe_decode().
 652		 *
 653		 * Two reasons for this code logic:
 654		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
 655		 * has decoded trace data and generated a record, but the record
 656		 * is left to generate sample until run to here, so it's correct
 657		 * to synthesize sample for the left record.
 658		 * 2. After decoding trace data, it needs to compare the record
 659		 * timestamp with the coming perf event, if the record timestamp
 660		 * is later than the perf event, it needs bail out and pushs the
 661		 * record into auxtrace heap, thus the record can be deferred to
 662		 * synthesize sample until run to here at the next time; so this
 663		 * can correlate samples between Arm SPE trace data and other
 664		 * perf events with correct time ordering.
 665		 */
 666
 667		/*
 668		 * Update pid/tid info.
 669		 */
 670		record = &speq->decoder->record;
 671		if (!spe->timeless_decoding && record->context_id != (u64)-1) {
 672			ret = arm_spe_set_tid(speq, record->context_id);
 673			if (ret)
 674				return ret;
 675
 676			spe->use_ctx_pkt_for_pid = true;
 677		}
 678
 679		ret = arm_spe_sample(speq);
 680		if (ret)
 681			return ret;
 682
 683		ret = arm_spe_decode(speq->decoder);
 684		if (!ret) {
 685			pr_debug("No data or all data has been processed.\n");
 686			return 1;
 687		}
 688
 689		/*
 690		 * Error is detected when decode SPE trace data, continue to
 691		 * the next trace data and find out more records.
 692		 */
 693		if (ret < 0)
 694			continue;
 695
 696		record = &speq->decoder->record;
 697
 698		/* Update timestamp for the last record */
 699		if (record->timestamp > speq->timestamp)
 700			speq->timestamp = record->timestamp;
 701
 702		/*
 703		 * If the timestamp of the queue is later than timestamp of the
 704		 * coming perf event, bail out so can allow the perf event to
 705		 * be processed ahead.
 706		 */
 707		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
 708			*timestamp = speq->timestamp;
 709			return 0;
 710		}
 711	}
 712
 713	return 0;
 714}
 715
 716static int arm_spe__setup_queue(struct arm_spe *spe,
 717			       struct auxtrace_queue *queue,
 718			       unsigned int queue_nr)
 719{
 720	struct arm_spe_queue *speq = queue->priv;
 721	struct arm_spe_record *record;
 722
 723	if (list_empty(&queue->head) || speq)
 724		return 0;
 725
 726	speq = arm_spe__alloc_queue(spe, queue_nr);
 727
 728	if (!speq)
 729		return -ENOMEM;
 730
 731	queue->priv = speq;
 732
 733	if (queue->cpu != -1)
 734		speq->cpu = queue->cpu;
 735
 736	if (!speq->on_heap) {
 737		int ret;
 738
 739		if (spe->timeless_decoding)
 740			return 0;
 741
 742retry:
 743		ret = arm_spe_decode(speq->decoder);
 744
 745		if (!ret)
 746			return 0;
 747
 748		if (ret < 0)
 749			goto retry;
 750
 751		record = &speq->decoder->record;
 752
 753		speq->timestamp = record->timestamp;
 754		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
 755		if (ret)
 756			return ret;
 757		speq->on_heap = true;
 758	}
 759
 760	return 0;
 761}
 762
 763static int arm_spe__setup_queues(struct arm_spe *spe)
 764{
 765	unsigned int i;
 766	int ret;
 767
 768	for (i = 0; i < spe->queues.nr_queues; i++) {
 769		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
 770		if (ret)
 771			return ret;
 772	}
 773
 774	return 0;
 775}
 776
 777static int arm_spe__update_queues(struct arm_spe *spe)
 778{
 779	if (spe->queues.new_data) {
 780		spe->queues.new_data = false;
 781		return arm_spe__setup_queues(spe);
 782	}
 783
 784	return 0;
 785}
 786
 787static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
 788{
 789	struct evsel *evsel;
 790	struct evlist *evlist = spe->session->evlist;
 791	bool timeless_decoding = true;
 792
 793	/*
 794	 * Circle through the list of event and complain if we find one
 795	 * with the time bit set.
 796	 */
 797	evlist__for_each_entry(evlist, evsel) {
 798		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
 799			timeless_decoding = false;
 800	}
 801
 802	return timeless_decoding;
 803}
 804
 805static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
 806{
 807	unsigned int queue_nr;
 808	u64 ts;
 809	int ret;
 810
 811	while (1) {
 812		struct auxtrace_queue *queue;
 813		struct arm_spe_queue *speq;
 814
 815		if (!spe->heap.heap_cnt)
 816			return 0;
 817
 818		if (spe->heap.heap_array[0].ordinal >= timestamp)
 819			return 0;
 820
 821		queue_nr = spe->heap.heap_array[0].queue_nr;
 822		queue = &spe->queues.queue_array[queue_nr];
 823		speq = queue->priv;
 824
 825		auxtrace_heap__pop(&spe->heap);
 826
 827		if (spe->heap.heap_cnt) {
 828			ts = spe->heap.heap_array[0].ordinal + 1;
 829			if (ts > timestamp)
 830				ts = timestamp;
 831		} else {
 832			ts = timestamp;
 833		}
 834
 835		/*
 836		 * A previous context-switch event has set pid/tid in the machine's context, so
 837		 * here we need to update the pid/tid in the thread and SPE queue.
 838		 */
 839		if (!spe->use_ctx_pkt_for_pid)
 840			arm_spe_set_pid_tid_cpu(spe, queue);
 841
 842		ret = arm_spe_run_decoder(speq, &ts);
 843		if (ret < 0) {
 844			auxtrace_heap__add(&spe->heap, queue_nr, ts);
 845			return ret;
 846		}
 847
 848		if (!ret) {
 849			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
 850			if (ret < 0)
 851				return ret;
 852		} else {
 853			speq->on_heap = false;
 854		}
 855	}
 856
 857	return 0;
 858}
 859
 860static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
 861					    u64 time_)
 862{
 863	struct auxtrace_queues *queues = &spe->queues;
 864	unsigned int i;
 865	u64 ts = 0;
 866
 867	for (i = 0; i < queues->nr_queues; i++) {
 868		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
 869		struct arm_spe_queue *speq = queue->priv;
 870
 871		if (speq && (tid == -1 || speq->tid == tid)) {
 872			speq->time = time_;
 873			arm_spe_set_pid_tid_cpu(spe, queue);
 874			arm_spe_run_decoder(speq, &ts);
 875		}
 876	}
 877	return 0;
 878}
 879
 880static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
 881				  struct perf_sample *sample)
 882{
 883	pid_t pid, tid;
 884	int cpu;
 885
 886	if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
 887		return 0;
 888
 889	pid = event->context_switch.next_prev_pid;
 890	tid = event->context_switch.next_prev_tid;
 891	cpu = sample->cpu;
 892
 893	if (tid == -1)
 894		pr_warning("context_switch event has no tid\n");
 895
 896	return machine__set_current_tid(spe->machine, cpu, pid, tid);
 897}
 898
 899static int arm_spe_process_event(struct perf_session *session,
 900				 union perf_event *event,
 901				 struct perf_sample *sample,
 902				 struct perf_tool *tool)
 903{
 904	int err = 0;
 905	u64 timestamp;
 906	struct arm_spe *spe = container_of(session->auxtrace,
 907			struct arm_spe, auxtrace);
 908
 909	if (dump_trace)
 910		return 0;
 911
 912	if (!tool->ordered_events) {
 913		pr_err("SPE trace requires ordered events\n");
 914		return -EINVAL;
 915	}
 916
 917	if (sample->time && (sample->time != (u64) -1))
 918		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
 919	else
 920		timestamp = 0;
 921
 922	if (timestamp || spe->timeless_decoding) {
 923		err = arm_spe__update_queues(spe);
 924		if (err)
 925			return err;
 926	}
 927
 928	if (spe->timeless_decoding) {
 929		if (event->header.type == PERF_RECORD_EXIT) {
 930			err = arm_spe_process_timeless_queues(spe,
 931					event->fork.tid,
 932					sample->time);
 933		}
 934	} else if (timestamp) {
 935		err = arm_spe_process_queues(spe, timestamp);
 936		if (err)
 937			return err;
 938
 939		if (!spe->use_ctx_pkt_for_pid &&
 940		    (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
 941		    event->header.type == PERF_RECORD_SWITCH))
 942			err = arm_spe_context_switch(spe, event, sample);
 943	}
 944
 945	return err;
 946}
 947
 948static int arm_spe_process_auxtrace_event(struct perf_session *session,
 949					  union perf_event *event,
 950					  struct perf_tool *tool __maybe_unused)
 951{
 952	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
 953					     auxtrace);
 
 
 
 
 954
 955	if (!spe->data_queued) {
 956		struct auxtrace_buffer *buffer;
 957		off_t data_offset;
 958		int fd = perf_data__fd(session->data);
 959		int err;
 
 
 960
 961		if (perf_data__is_pipe(session->data)) {
 962			data_offset = 0;
 963		} else {
 964			data_offset = lseek(fd, 0, SEEK_CUR);
 965			if (data_offset == -1)
 966				return -errno;
 967		}
 968
 969		err = auxtrace_queues__add_event(&spe->queues, session, event,
 970				data_offset, &buffer);
 971		if (err)
 972			return err;
 973
 974		/* Dump here now we have copied a piped trace out of the pipe */
 975		if (dump_trace) {
 976			if (auxtrace_buffer__get_data(buffer, fd)) {
 977				arm_spe_dump_event(spe, buffer->data,
 978						buffer->size);
 979				auxtrace_buffer__put_data(buffer);
 980			}
 981		}
 982	}
 983
 984	return 0;
 985}
 986
 987static int arm_spe_flush(struct perf_session *session __maybe_unused,
 988			 struct perf_tool *tool __maybe_unused)
 989{
 990	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
 991			auxtrace);
 992	int ret;
 993
 994	if (dump_trace)
 995		return 0;
 996
 997	if (!tool->ordered_events)
 998		return -EINVAL;
 999
1000	ret = arm_spe__update_queues(spe);
1001	if (ret < 0)
1002		return ret;
1003
1004	if (spe->timeless_decoding)
1005		return arm_spe_process_timeless_queues(spe, -1,
1006				MAX_TIMESTAMP - 1);
1007
1008	ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
1009	if (ret)
1010		return ret;
1011
1012	if (!spe->use_ctx_pkt_for_pid)
1013		ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
1014			    "Matching of TIDs to SPE events could be inaccurate.\n");
1015
1016	return 0;
1017}
1018
1019static void arm_spe_free_queue(void *priv)
1020{
1021	struct arm_spe_queue *speq = priv;
1022
1023	if (!speq)
1024		return;
1025	thread__zput(speq->thread);
1026	arm_spe_decoder_free(speq->decoder);
1027	zfree(&speq->event_buf);
1028	free(speq);
1029}
1030
1031static void arm_spe_free_events(struct perf_session *session)
1032{
1033	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1034					     auxtrace);
1035	struct auxtrace_queues *queues = &spe->queues;
1036	unsigned int i;
1037
1038	for (i = 0; i < queues->nr_queues; i++) {
1039		arm_spe_free_queue(queues->queue_array[i].priv);
1040		queues->queue_array[i].priv = NULL;
1041	}
1042	auxtrace_queues__free(queues);
1043}
1044
1045static void arm_spe_free(struct perf_session *session)
1046{
1047	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1048					     auxtrace);
1049
1050	auxtrace_heap__free(&spe->heap);
1051	arm_spe_free_events(session);
1052	session->auxtrace = NULL;
1053	free(spe);
1054}
1055
1056static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
1057				      struct evsel *evsel)
1058{
1059	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
1060
1061	return evsel->core.attr.type == spe->pmu_type;
1062}
1063
1064static const char * const arm_spe_info_fmts[] = {
1065	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
1066};
1067
1068static void arm_spe_print_info(__u64 *arr)
1069{
1070	if (!dump_trace)
1071		return;
1072
1073	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
1074}
1075
1076struct arm_spe_synth {
1077	struct perf_tool dummy_tool;
1078	struct perf_session *session;
1079};
1080
1081static int arm_spe_event_synth(struct perf_tool *tool,
1082			       union perf_event *event,
1083			       struct perf_sample *sample __maybe_unused,
1084			       struct machine *machine __maybe_unused)
1085{
1086	struct arm_spe_synth *arm_spe_synth =
1087		      container_of(tool, struct arm_spe_synth, dummy_tool);
1088
1089	return perf_session__deliver_synth_event(arm_spe_synth->session,
1090						 event, NULL);
1091}
1092
1093static int arm_spe_synth_event(struct perf_session *session,
1094			       struct perf_event_attr *attr, u64 id)
1095{
1096	struct arm_spe_synth arm_spe_synth;
1097
1098	memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
1099	arm_spe_synth.session = session;
1100
1101	return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
1102					   &id, arm_spe_event_synth);
1103}
1104
1105static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
1106				    const char *name)
1107{
1108	struct evsel *evsel;
1109
1110	evlist__for_each_entry(evlist, evsel) {
1111		if (evsel->core.id && evsel->core.id[0] == id) {
1112			if (evsel->name)
1113				zfree(&evsel->name);
1114			evsel->name = strdup(name);
1115			break;
1116		}
1117	}
1118}
1119
1120static int
1121arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
1122{
1123	struct evlist *evlist = session->evlist;
1124	struct evsel *evsel;
1125	struct perf_event_attr attr;
1126	bool found = false;
1127	u64 id;
1128	int err;
1129
1130	evlist__for_each_entry(evlist, evsel) {
1131		if (evsel->core.attr.type == spe->pmu_type) {
1132			found = true;
1133			break;
1134		}
1135	}
1136
1137	if (!found) {
1138		pr_debug("No selected events with SPE trace data\n");
1139		return 0;
1140	}
1141
1142	memset(&attr, 0, sizeof(struct perf_event_attr));
1143	attr.size = sizeof(struct perf_event_attr);
1144	attr.type = PERF_TYPE_HARDWARE;
1145	attr.sample_type = evsel->core.attr.sample_type &
1146				(PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
1147	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1148			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
1149			    PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
1150	if (spe->timeless_decoding)
1151		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1152	else
1153		attr.sample_type |= PERF_SAMPLE_TIME;
1154
1155	spe->sample_type = attr.sample_type;
1156
1157	attr.exclude_user = evsel->core.attr.exclude_user;
1158	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1159	attr.exclude_hv = evsel->core.attr.exclude_hv;
1160	attr.exclude_host = evsel->core.attr.exclude_host;
1161	attr.exclude_guest = evsel->core.attr.exclude_guest;
1162	attr.sample_id_all = evsel->core.attr.sample_id_all;
1163	attr.read_format = evsel->core.attr.read_format;
1164
1165	/* create new id val to be a fixed offset from evsel id */
1166	id = evsel->core.id[0] + 1000000000;
1167
1168	if (!id)
1169		id = 1;
1170
1171	if (spe->synth_opts.flc) {
1172		spe->sample_flc = true;
1173
1174		/* Level 1 data cache miss */
1175		err = arm_spe_synth_event(session, &attr, id);
1176		if (err)
1177			return err;
1178		spe->l1d_miss_id = id;
1179		arm_spe_set_event_name(evlist, id, "l1d-miss");
1180		id += 1;
1181
1182		/* Level 1 data cache access */
1183		err = arm_spe_synth_event(session, &attr, id);
1184		if (err)
1185			return err;
1186		spe->l1d_access_id = id;
1187		arm_spe_set_event_name(evlist, id, "l1d-access");
1188		id += 1;
1189	}
1190
1191	if (spe->synth_opts.llc) {
1192		spe->sample_llc = true;
1193
1194		/* Last level cache miss */
1195		err = arm_spe_synth_event(session, &attr, id);
1196		if (err)
1197			return err;
1198		spe->llc_miss_id = id;
1199		arm_spe_set_event_name(evlist, id, "llc-miss");
1200		id += 1;
1201
1202		/* Last level cache access */
1203		err = arm_spe_synth_event(session, &attr, id);
1204		if (err)
1205			return err;
1206		spe->llc_access_id = id;
1207		arm_spe_set_event_name(evlist, id, "llc-access");
1208		id += 1;
1209	}
1210
1211	if (spe->synth_opts.tlb) {
1212		spe->sample_tlb = true;
1213
1214		/* TLB miss */
1215		err = arm_spe_synth_event(session, &attr, id);
1216		if (err)
1217			return err;
1218		spe->tlb_miss_id = id;
1219		arm_spe_set_event_name(evlist, id, "tlb-miss");
1220		id += 1;
1221
1222		/* TLB access */
1223		err = arm_spe_synth_event(session, &attr, id);
1224		if (err)
1225			return err;
1226		spe->tlb_access_id = id;
1227		arm_spe_set_event_name(evlist, id, "tlb-access");
1228		id += 1;
1229	}
1230
1231	if (spe->synth_opts.branches) {
1232		spe->sample_branch = true;
1233
1234		/* Branch miss */
1235		err = arm_spe_synth_event(session, &attr, id);
1236		if (err)
1237			return err;
1238		spe->branch_miss_id = id;
1239		arm_spe_set_event_name(evlist, id, "branch-miss");
1240		id += 1;
1241	}
1242
1243	if (spe->synth_opts.remote_access) {
1244		spe->sample_remote_access = true;
1245
1246		/* Remote access */
1247		err = arm_spe_synth_event(session, &attr, id);
1248		if (err)
1249			return err;
1250		spe->remote_access_id = id;
1251		arm_spe_set_event_name(evlist, id, "remote-access");
1252		id += 1;
1253	}
1254
1255	if (spe->synth_opts.mem) {
1256		spe->sample_memory = true;
1257
1258		err = arm_spe_synth_event(session, &attr, id);
1259		if (err)
1260			return err;
1261		spe->memory_id = id;
1262		arm_spe_set_event_name(evlist, id, "memory");
1263		id += 1;
1264	}
1265
1266	if (spe->synth_opts.instructions) {
1267		if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
1268			pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
1269			goto synth_instructions_out;
1270		}
1271		if (spe->synth_opts.period > 1)
1272			pr_warning("Arm SPE has a hardware-based sample period.\n"
1273				   "Additional instruction events will be discarded by --itrace\n");
1274
1275		spe->sample_instructions = true;
1276		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1277		attr.sample_period = spe->synth_opts.period;
1278		spe->instructions_sample_period = attr.sample_period;
1279		err = arm_spe_synth_event(session, &attr, id);
1280		if (err)
1281			return err;
1282		spe->instructions_id = id;
1283		arm_spe_set_event_name(evlist, id, "instructions");
1284	}
1285synth_instructions_out:
1286
1287	return 0;
1288}
1289
1290int arm_spe_process_auxtrace_info(union perf_event *event,
1291				  struct perf_session *session)
1292{
1293	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1294	size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1295	struct perf_record_time_conv *tc = &session->time_conv;
1296	const char *cpuid = perf_env__cpuid(session->evlist->env);
1297	u64 midr = strtol(cpuid, NULL, 16);
1298	struct arm_spe *spe;
1299	int err;
1300
1301	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1302					min_sz)
1303		return -EINVAL;
1304
1305	spe = zalloc(sizeof(struct arm_spe));
1306	if (!spe)
1307		return -ENOMEM;
1308
1309	err = auxtrace_queues__init(&spe->queues);
1310	if (err)
1311		goto err_free;
1312
1313	spe->session = session;
1314	spe->machine = &session->machines.host; /* No kvm support */
1315	spe->auxtrace_type = auxtrace_info->type;
1316	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1317	spe->midr = midr;
1318
1319	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1320
1321	/*
1322	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1323	 * and the parameters for hardware clock are stored in the session
1324	 * context.  Passes these parameters to the struct perf_tsc_conversion
1325	 * in "spe->tc", which is used for later conversion between clock
1326	 * counter and timestamp.
1327	 *
1328	 * For backward compatibility, copies the fields starting from
1329	 * "time_cycles" only if they are contained in the event.
1330	 */
1331	spe->tc.time_shift = tc->time_shift;
1332	spe->tc.time_mult = tc->time_mult;
1333	spe->tc.time_zero = tc->time_zero;
1334
1335	if (event_contains(*tc, time_cycles)) {
1336		spe->tc.time_cycles = tc->time_cycles;
1337		spe->tc.time_mask = tc->time_mask;
1338		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1339		spe->tc.cap_user_time_short = tc->cap_user_time_short;
1340	}
1341
1342	spe->auxtrace.process_event = arm_spe_process_event;
1343	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1344	spe->auxtrace.flush_events = arm_spe_flush;
1345	spe->auxtrace.free_events = arm_spe_free_events;
1346	spe->auxtrace.free = arm_spe_free;
1347	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1348	session->auxtrace = &spe->auxtrace;
1349
1350	arm_spe_print_info(&auxtrace_info->priv[0]);
1351
1352	if (dump_trace)
1353		return 0;
1354
1355	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1356		spe->synth_opts = *session->itrace_synth_opts;
1357	else
1358		itrace_synth_opts__set_default(&spe->synth_opts, false);
1359
1360	err = arm_spe_synth_events(spe, session);
1361	if (err)
1362		goto err_free_queues;
1363
1364	err = auxtrace_queues__process_index(&spe->queues, session);
1365	if (err)
1366		goto err_free_queues;
1367
1368	if (spe->queues.populated)
1369		spe->data_queued = true;
1370
1371	return 0;
1372
1373err_free_queues:
1374	auxtrace_queues__free(&spe->queues);
1375	session->auxtrace = NULL;
1376err_free:
1377	free(spe);
1378	return err;
1379}