Linux Audio

Check our new training course

Loading...
v4.17
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Arm Statistical Profiling Extensions (SPE) support
  4 * Copyright (c) 2017-2018, Arm Ltd.
  5 */
  6
 
  7#include <endian.h>
  8#include <errno.h>
  9#include <byteswap.h>
 10#include <inttypes.h>
 11#include <linux/kernel.h>
 12#include <linux/types.h>
 13#include <linux/bitops.h>
 
 14#include <linux/log2.h>
 
 
 
 
 15
 16#include "cpumap.h"
 17#include "color.h"
 18#include "evsel.h"
 19#include "evlist.h"
 
 20#include "machine.h"
 21#include "session.h"
 22#include "util.h"
 23#include "thread.h"
 24#include "debug.h"
 25#include "auxtrace.h"
 
 
 
 26#include "arm-spe.h"
 27#include "arm-spe-pkt-decoder.h"
 
 
 
 
 28
 29struct arm_spe {
 30	struct auxtrace			auxtrace;
 31	struct auxtrace_queues		queues;
 32	struct auxtrace_heap		heap;
 
 33	u32				auxtrace_type;
 34	struct perf_session		*session;
 35	struct machine			*machine;
 36	u32				pmu_type;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 37};
 38
 39struct arm_spe_queue {
 40	struct arm_spe		*spe;
 41	unsigned int		queue_nr;
 42	struct auxtrace_buffer	*buffer;
 43	bool			on_heap;
 44	bool			done;
 45	pid_t			pid;
 46	pid_t			tid;
 47	int			cpu;
 
 
 
 
 
 
 
 
 48};
 49
 50static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
 51			 unsigned char *buf, size_t len)
 52{
 53	struct arm_spe_pkt packet;
 54	size_t pos = 0;
 55	int ret, pkt_len, i;
 56	char desc[ARM_SPE_PKT_DESC_MAX];
 57	const char *color = PERF_COLOR_BLUE;
 58
 59	color_fprintf(stdout, color,
 60		      ". ... ARM SPE data: size %zu bytes\n",
 61		      len);
 62
 63	while (len) {
 64		ret = arm_spe_get_packet(buf, len, &packet);
 65		if (ret > 0)
 66			pkt_len = ret;
 67		else
 68			pkt_len = 1;
 69		printf(".");
 70		color_fprintf(stdout, color, "  %08x: ", pos);
 71		for (i = 0; i < pkt_len; i++)
 72			color_fprintf(stdout, color, " %02x", buf[i]);
 73		for (; i < 16; i++)
 74			color_fprintf(stdout, color, "   ");
 75		if (ret > 0) {
 76			ret = arm_spe_pkt_desc(&packet, desc,
 77					       ARM_SPE_PKT_DESC_MAX);
 78			if (ret > 0)
 79				color_fprintf(stdout, color, " %s\n", desc);
 80		} else {
 81			color_fprintf(stdout, color, " Bad packet!\n");
 82		}
 83		pos += pkt_len;
 84		buf += pkt_len;
 85		len -= pkt_len;
 86	}
 87}
 88
 89static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
 90			       size_t len)
 91{
 92	printf(".\n");
 93	arm_spe_dump(spe, buf, len);
 94}
 95
 96static int arm_spe_process_event(struct perf_session *session __maybe_unused,
 97				 union perf_event *event __maybe_unused,
 98				 struct perf_sample *sample __maybe_unused,
 99				 struct perf_tool *tool __maybe_unused)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101	return 0;
102}
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104static int arm_spe_process_auxtrace_event(struct perf_session *session,
105					  union perf_event *event,
106					  struct perf_tool *tool __maybe_unused)
107{
108	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
109					     auxtrace);
110	struct auxtrace_buffer *buffer;
111	off_t data_offset;
112	int fd = perf_data__fd(session->data);
113	int err;
114
115	if (perf_data__is_pipe(session->data)) {
116		data_offset = 0;
117	} else {
118		data_offset = lseek(fd, 0, SEEK_CUR);
119		if (data_offset == -1)
120			return -errno;
121	}
122
123	err = auxtrace_queues__add_event(&spe->queues, session, event,
124					 data_offset, &buffer);
125	if (err)
126		return err;
 
 
 
127
128	/* Dump here now we have copied a piped trace out of the pipe */
129	if (dump_trace) {
130		if (auxtrace_buffer__get_data(buffer, fd)) {
131			arm_spe_dump_event(spe, buffer->data,
132					     buffer->size);
133			auxtrace_buffer__put_data(buffer);
 
 
 
 
 
 
134		}
135	}
136
137	return 0;
138}
139
140static int arm_spe_flush(struct perf_session *session __maybe_unused,
141			 struct perf_tool *tool __maybe_unused)
142{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143	return 0;
144}
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146static void arm_spe_free_queue(void *priv)
147{
148	struct arm_spe_queue *speq = priv;
149
150	if (!speq)
151		return;
 
 
 
152	free(speq);
153}
154
155static void arm_spe_free_events(struct perf_session *session)
156{
157	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
158					     auxtrace);
159	struct auxtrace_queues *queues = &spe->queues;
160	unsigned int i;
161
162	for (i = 0; i < queues->nr_queues; i++) {
163		arm_spe_free_queue(queues->queue_array[i].priv);
164		queues->queue_array[i].priv = NULL;
165	}
166	auxtrace_queues__free(queues);
167}
168
169static void arm_spe_free(struct perf_session *session)
170{
171	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
172					     auxtrace);
173
174	auxtrace_heap__free(&spe->heap);
175	arm_spe_free_events(session);
176	session->auxtrace = NULL;
 
177	free(spe);
178}
179
180static const char * const arm_spe_info_fmts[] = {
181	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182};
183
184static void arm_spe_print_info(u64 *arr)
 
 
 
 
 
 
 
 
 
185{
 
 
 
186	if (!dump_trace)
187		return;
188
189	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190}
191
192int arm_spe_process_auxtrace_info(union perf_event *event,
193				  struct perf_session *session)
194{
195	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
196	size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE;
 
197	struct arm_spe *spe;
198	int err;
 
 
199
200	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
201					min_sz)
202		return -EINVAL;
203
 
 
 
 
 
 
 
204	spe = zalloc(sizeof(struct arm_spe));
205	if (!spe)
206		return -ENOMEM;
 
 
207
208	err = auxtrace_queues__init(&spe->queues);
209	if (err)
210		goto err_free;
211
212	spe->session = session;
213	spe->machine = &session->machines.host; /* No kvm support */
214	spe->auxtrace_type = auxtrace_info->type;
215	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
217	spe->auxtrace.process_event = arm_spe_process_event;
218	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
219	spe->auxtrace.flush_events = arm_spe_flush;
220	spe->auxtrace.free_events = arm_spe_free_events;
221	spe->auxtrace.free = arm_spe_free;
 
222	session->auxtrace = &spe->auxtrace;
223
224	arm_spe_print_info(&auxtrace_info->priv[0]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
226	return 0;
227
 
 
 
228err_free:
229	free(spe);
 
 
230	return err;
231}
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Arm Statistical Profiling Extensions (SPE) support
   4 * Copyright (c) 2017-2018, Arm Ltd.
   5 */
   6
   7#include <byteswap.h>
   8#include <endian.h>
   9#include <errno.h>
 
  10#include <inttypes.h>
 
 
  11#include <linux/bitops.h>
  12#include <linux/kernel.h>
  13#include <linux/log2.h>
  14#include <linux/types.h>
  15#include <linux/zalloc.h>
  16#include <stdlib.h>
  17#include <unistd.h>
  18
  19#include "auxtrace.h"
  20#include "color.h"
  21#include "debug.h"
  22#include "evlist.h"
  23#include "evsel.h"
  24#include "machine.h"
  25#include "session.h"
  26#include "symbol.h"
  27#include "thread.h"
  28#include "thread-stack.h"
  29#include "tsc.h"
  30#include "tool.h"
  31#include "util/synthetic-events.h"
  32
  33#include "arm-spe.h"
  34#include "arm-spe-decoder/arm-spe-decoder.h"
  35#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
  36
  37#include "../../arch/arm64/include/asm/cputype.h"
  38#define MAX_TIMESTAMP (~0ULL)
  39
  40struct arm_spe {
  41	struct auxtrace			auxtrace;
  42	struct auxtrace_queues		queues;
  43	struct auxtrace_heap		heap;
  44	struct itrace_synth_opts        synth_opts;
  45	u32				auxtrace_type;
  46	struct perf_session		*session;
  47	struct machine			*machine;
  48	u32				pmu_type;
  49
  50	struct perf_tsc_conversion	tc;
  51
  52	u8				timeless_decoding;
  53	u8				data_queued;
  54
  55	u64				sample_type;
  56	u8				sample_flc;
  57	u8				sample_llc;
  58	u8				sample_tlb;
  59	u8				sample_branch;
  60	u8				sample_remote_access;
  61	u8				sample_memory;
  62	u8				sample_instructions;
  63	u64				instructions_sample_period;
  64
  65	u64				l1d_miss_id;
  66	u64				l1d_access_id;
  67	u64				llc_miss_id;
  68	u64				llc_access_id;
  69	u64				tlb_miss_id;
  70	u64				tlb_access_id;
  71	u64				branch_id;
  72	u64				remote_access_id;
  73	u64				memory_id;
  74	u64				instructions_id;
  75
  76	u64				kernel_start;
  77
  78	unsigned long			num_events;
  79	u8				use_ctx_pkt_for_pid;
  80
  81	u64				**metadata;
  82	u64				metadata_ver;
  83	u64				metadata_nr_cpu;
  84	bool				is_homogeneous;
  85};
  86
  87struct arm_spe_queue {
  88	struct arm_spe			*spe;
  89	unsigned int			queue_nr;
  90	struct auxtrace_buffer		*buffer;
  91	struct auxtrace_buffer		*old_buffer;
  92	union perf_event		*event_buf;
  93	bool				on_heap;
  94	bool				done;
  95	pid_t				pid;
  96	pid_t				tid;
  97	int				cpu;
  98	struct arm_spe_decoder		*decoder;
  99	u64				time;
 100	u64				timestamp;
 101	struct thread			*thread;
 102	u64				period_instructions;
 103	u32				flags;
 104};
 105
 106static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
 107			 unsigned char *buf, size_t len)
 108{
 109	struct arm_spe_pkt packet;
 110	size_t pos = 0;
 111	int ret, pkt_len, i;
 112	char desc[ARM_SPE_PKT_DESC_MAX];
 113	const char *color = PERF_COLOR_BLUE;
 114
 115	color_fprintf(stdout, color,
 116		      ". ... ARM SPE data: size %#zx bytes\n",
 117		      len);
 118
 119	while (len) {
 120		ret = arm_spe_get_packet(buf, len, &packet);
 121		if (ret > 0)
 122			pkt_len = ret;
 123		else
 124			pkt_len = 1;
 125		printf(".");
 126		color_fprintf(stdout, color, "  %08zx: ", pos);
 127		for (i = 0; i < pkt_len; i++)
 128			color_fprintf(stdout, color, " %02x", buf[i]);
 129		for (; i < 16; i++)
 130			color_fprintf(stdout, color, "   ");
 131		if (ret > 0) {
 132			ret = arm_spe_pkt_desc(&packet, desc,
 133					       ARM_SPE_PKT_DESC_MAX);
 134			if (!ret)
 135				color_fprintf(stdout, color, " %s\n", desc);
 136		} else {
 137			color_fprintf(stdout, color, " Bad packet!\n");
 138		}
 139		pos += pkt_len;
 140		buf += pkt_len;
 141		len -= pkt_len;
 142	}
 143}
 144
 145static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
 146			       size_t len)
 147{
 148	printf(".\n");
 149	arm_spe_dump(spe, buf, len);
 150}
 151
 152static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
 153{
 154	struct arm_spe_queue *speq = data;
 155	struct auxtrace_buffer *buffer = speq->buffer;
 156	struct auxtrace_buffer *old_buffer = speq->old_buffer;
 157	struct auxtrace_queue *queue;
 158
 159	queue = &speq->spe->queues.queue_array[speq->queue_nr];
 160
 161	buffer = auxtrace_buffer__next(queue, buffer);
 162	/* If no more data, drop the previous auxtrace_buffer and return */
 163	if (!buffer) {
 164		if (old_buffer)
 165			auxtrace_buffer__drop_data(old_buffer);
 166		b->len = 0;
 167		return 0;
 168	}
 169
 170	speq->buffer = buffer;
 171
 172	/* If the aux_buffer doesn't have data associated, try to load it */
 173	if (!buffer->data) {
 174		/* get the file desc associated with the perf data file */
 175		int fd = perf_data__fd(speq->spe->session->data);
 176
 177		buffer->data = auxtrace_buffer__get_data(buffer, fd);
 178		if (!buffer->data)
 179			return -ENOMEM;
 180	}
 181
 182	b->len = buffer->size;
 183	b->buf = buffer->data;
 184
 185	if (b->len) {
 186		if (old_buffer)
 187			auxtrace_buffer__drop_data(old_buffer);
 188		speq->old_buffer = buffer;
 189	} else {
 190		auxtrace_buffer__drop_data(buffer);
 191		return arm_spe_get_trace(b, data);
 192	}
 193
 194	return 0;
 195}
 196
 197static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
 198		unsigned int queue_nr)
 199{
 200	struct arm_spe_params params = { .get_trace = 0, };
 201	struct arm_spe_queue *speq;
 202
 203	speq = zalloc(sizeof(*speq));
 204	if (!speq)
 205		return NULL;
 206
 207	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
 208	if (!speq->event_buf)
 209		goto out_free;
 210
 211	speq->spe = spe;
 212	speq->queue_nr = queue_nr;
 213	speq->pid = -1;
 214	speq->tid = -1;
 215	speq->cpu = -1;
 216	speq->period_instructions = 0;
 217
 218	/* params set */
 219	params.get_trace = arm_spe_get_trace;
 220	params.data = speq;
 221
 222	/* create new decoder */
 223	speq->decoder = arm_spe_decoder_new(&params);
 224	if (!speq->decoder)
 225		goto out_free;
 226
 227	return speq;
 228
 229out_free:
 230	zfree(&speq->event_buf);
 231	free(speq);
 232
 233	return NULL;
 234}
 235
 236static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
 237{
 238	return ip >= spe->kernel_start ?
 239		PERF_RECORD_MISC_KERNEL :
 240		PERF_RECORD_MISC_USER;
 241}
 242
 243static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
 244				    struct auxtrace_queue *queue)
 245{
 246	struct arm_spe_queue *speq = queue->priv;
 247	pid_t tid;
 248
 249	tid = machine__get_current_tid(spe->machine, speq->cpu);
 250	if (tid != -1) {
 251		speq->tid = tid;
 252		thread__zput(speq->thread);
 253	} else
 254		speq->tid = queue->tid;
 255
 256	if ((!speq->thread) && (speq->tid != -1)) {
 257		speq->thread = machine__find_thread(spe->machine, -1,
 258						    speq->tid);
 259	}
 260
 261	if (speq->thread) {
 262		speq->pid = thread__pid(speq->thread);
 263		if (queue->cpu == -1)
 264			speq->cpu = thread__cpu(speq->thread);
 265	}
 266}
 267
 268static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
 269{
 270	struct arm_spe *spe = speq->spe;
 271	int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
 272
 273	if (err)
 274		return err;
 275
 276	arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
 277
 278	return 0;
 279}
 280
 281static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu)
 282{
 283	u64 i;
 284
 285	if (!spe->metadata)
 286		return NULL;
 287
 288	for (i = 0; i < spe->metadata_nr_cpu; i++)
 289		if (spe->metadata[i][ARM_SPE_CPU] == cpu)
 290			return spe->metadata[i];
 291
 292	return NULL;
 293}
 294
 295static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record)
 296{
 297	struct simd_flags simd_flags = {};
 298
 299	if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST))
 300		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
 301
 302	if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER))
 303		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
 304
 305	if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
 306		simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL;
 307
 308	if (record->type & ARM_SPE_SVE_EMPTY_PRED)
 309		simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY;
 310
 311	return simd_flags;
 312}
 313
 314static void arm_spe_prep_sample(struct arm_spe *spe,
 315				struct arm_spe_queue *speq,
 316				union perf_event *event,
 317				struct perf_sample *sample)
 318{
 319	struct arm_spe_record *record = &speq->decoder->record;
 320
 321	if (!spe->timeless_decoding)
 322		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
 323
 324	sample->ip = record->from_ip;
 325	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
 326	sample->pid = speq->pid;
 327	sample->tid = speq->tid;
 328	sample->period = 1;
 329	sample->cpu = speq->cpu;
 330	sample->simd_flags = arm_spe__synth_simd_flags(record);
 331
 332	event->sample.header.type = PERF_RECORD_SAMPLE;
 333	event->sample.header.misc = sample->cpumode;
 334	event->sample.header.size = sizeof(struct perf_event_header);
 335}
 336
 337static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
 338{
 339	event->header.size = perf_event__sample_event_size(sample, type, 0);
 340	return perf_event__synthesize_sample(event, type, 0, sample);
 341}
 342
 343static inline int
 344arm_spe_deliver_synth_event(struct arm_spe *spe,
 345			    struct arm_spe_queue *speq __maybe_unused,
 346			    union perf_event *event,
 347			    struct perf_sample *sample)
 348{
 349	int ret;
 350
 351	if (spe->synth_opts.inject) {
 352		ret = arm_spe__inject_event(event, sample, spe->sample_type);
 353		if (ret)
 354			return ret;
 355	}
 356
 357	ret = perf_session__deliver_synth_event(spe->session, event, sample);
 358	if (ret)
 359		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
 360
 361	return ret;
 362}
 363
 364static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
 365				     u64 spe_events_id, u64 data_src)
 366{
 367	struct arm_spe *spe = speq->spe;
 368	struct arm_spe_record *record = &speq->decoder->record;
 369	union perf_event *event = speq->event_buf;
 370	struct perf_sample sample = { .ip = 0, };
 371
 372	arm_spe_prep_sample(spe, speq, event, &sample);
 373
 374	sample.id = spe_events_id;
 375	sample.stream_id = spe_events_id;
 376	sample.addr = record->virt_addr;
 377	sample.phys_addr = record->phys_addr;
 378	sample.data_src = data_src;
 379	sample.weight = record->latency;
 380
 381	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 382}
 383
 384static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
 385					u64 spe_events_id)
 386{
 387	struct arm_spe *spe = speq->spe;
 388	struct arm_spe_record *record = &speq->decoder->record;
 389	union perf_event *event = speq->event_buf;
 390	struct perf_sample sample = { .ip = 0, };
 391
 392	arm_spe_prep_sample(spe, speq, event, &sample);
 393
 394	sample.id = spe_events_id;
 395	sample.stream_id = spe_events_id;
 396	sample.addr = record->to_ip;
 397	sample.weight = record->latency;
 398	sample.flags = speq->flags;
 399
 400	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 401}
 402
 403static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
 404					     u64 spe_events_id, u64 data_src)
 405{
 406	struct arm_spe *spe = speq->spe;
 407	struct arm_spe_record *record = &speq->decoder->record;
 408	union perf_event *event = speq->event_buf;
 409	struct perf_sample sample = { .ip = 0, };
 410
 411	/*
 412	 * Handles perf instruction sampling period.
 413	 */
 414	speq->period_instructions++;
 415	if (speq->period_instructions < spe->instructions_sample_period)
 416		return 0;
 417	speq->period_instructions = 0;
 418
 419	arm_spe_prep_sample(spe, speq, event, &sample);
 420
 421	sample.id = spe_events_id;
 422	sample.stream_id = spe_events_id;
 423	sample.addr = record->to_ip;
 424	sample.phys_addr = record->phys_addr;
 425	sample.data_src = data_src;
 426	sample.period = spe->instructions_sample_period;
 427	sample.weight = record->latency;
 428	sample.flags = speq->flags;
 429
 430	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 431}
 432
 433static const struct midr_range common_ds_encoding_cpus[] = {
 434	MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
 435	MIDR_ALL_VERSIONS(MIDR_CORTEX_A725),
 436	MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
 437	MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
 438	MIDR_ALL_VERSIONS(MIDR_CORTEX_X925),
 439	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
 440	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
 441	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
 442	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
 443	{},
 444};
 445
 446static void arm_spe__sample_flags(struct arm_spe_queue *speq)
 447{
 448	const struct arm_spe_record *record = &speq->decoder->record;
 449
 450	speq->flags = 0;
 451	if (record->op & ARM_SPE_OP_BRANCH_ERET) {
 452		speq->flags = PERF_IP_FLAG_BRANCH;
 453
 454		if (record->type & ARM_SPE_BRANCH_MISS)
 455			speq->flags |= PERF_IP_FLAG_BRANCH_MISS;
 456	}
 457}
 458
 459static void arm_spe__synth_data_source_common(const struct arm_spe_record *record,
 460					      union perf_mem_data_src *data_src)
 461{
 462	/*
 463	 * Even though four levels of cache hierarchy are possible, no known
 464	 * production Neoverse systems currently include more than three levels
 465	 * so for the time being we assume three exist. If a production system
 466	 * is built with four the this function would have to be changed to
 467	 * detect the number of levels for reporting.
 468	 */
 469
 470	/*
 471	 * We have no data on the hit level or data source for stores in the
 472	 * Neoverse SPE records.
 473	 */
 474	if (record->op & ARM_SPE_OP_ST) {
 475		data_src->mem_lvl = PERF_MEM_LVL_NA;
 476		data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
 477		data_src->mem_snoop = PERF_MEM_SNOOP_NA;
 478		return;
 479	}
 480
 481	switch (record->source) {
 482	case ARM_SPE_COMMON_DS_L1D:
 483		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
 484		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
 485		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
 486		break;
 487	case ARM_SPE_COMMON_DS_L2:
 488		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
 489		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
 490		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
 491		break;
 492	case ARM_SPE_COMMON_DS_PEER_CORE:
 493		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
 494		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
 495		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
 496		break;
 497	/*
 498	 * We don't know if this is L1, L2 but we do know it was a cache-2-cache
 499	 * transfer, so set SNOOPX_PEER
 500	 */
 501	case ARM_SPE_COMMON_DS_LOCAL_CLUSTER:
 502	case ARM_SPE_COMMON_DS_PEER_CLUSTER:
 503		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
 504		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
 505		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
 506		break;
 507	/*
 508	 * System cache is assumed to be L3
 509	 */
 510	case ARM_SPE_COMMON_DS_SYS_CACHE:
 511		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
 512		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
 513		data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
 514		break;
 515	/*
 516	 * We don't know what level it hit in, except it came from the other
 517	 * socket
 518	 */
 519	case ARM_SPE_COMMON_DS_REMOTE:
 520		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
 521		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
 522		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
 523		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
 524		break;
 525	case ARM_SPE_COMMON_DS_DRAM:
 526		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
 527		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
 528		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
 529		break;
 530	default:
 531		break;
 532	}
 533}
 534
 535static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
 536					union perf_mem_data_src *data_src)
 537{
 538	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
 539		data_src->mem_lvl = PERF_MEM_LVL_L3;
 540
 541		if (record->type & ARM_SPE_LLC_MISS)
 542			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
 543		else
 544			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
 545	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
 546		data_src->mem_lvl = PERF_MEM_LVL_L1;
 547
 548		if (record->type & ARM_SPE_L1D_MISS)
 549			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
 550		else
 551			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
 552	}
 553
 554	if (record->type & ARM_SPE_REMOTE_ACCESS)
 555		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
 556}
 557
 558static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq)
 559{
 560	struct arm_spe *spe = speq->spe;
 561	bool is_in_cpu_list;
 562	u64 *metadata = NULL;
 563	u64 midr = 0;
 564
 565	/* Metadata version 1 assumes all CPUs are the same (old behavior) */
 566	if (spe->metadata_ver == 1) {
 567		const char *cpuid;
 568
 569		pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n");
 570		cpuid = perf_env__cpuid(spe->session->evlist->env);
 571		midr = strtol(cpuid, NULL, 16);
 572	} else {
 573		/* CPU ID is -1 for per-thread mode */
 574		if (speq->cpu < 0) {
 575			/*
 576			 * On the heterogeneous system, due to CPU ID is -1,
 577			 * cannot confirm the data source packet is supported.
 578			 */
 579			if (!spe->is_homogeneous)
 580				return false;
 581
 582			/* In homogeneous system, simply use CPU0's metadata */
 583			if (spe->metadata)
 584				metadata = spe->metadata[0];
 585		} else {
 586			metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu);
 587		}
 588
 589		if (!metadata)
 590			return false;
 591
 592		midr = metadata[ARM_SPE_CPU_MIDR];
 593	}
 594
 595	is_in_cpu_list = is_midr_in_range_list(midr, common_ds_encoding_cpus);
 596	if (is_in_cpu_list)
 597		return true;
 598	else
 599		return false;
 600}
 601
 602static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
 603				      const struct arm_spe_record *record)
 604{
 605	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
 606	bool is_common = arm_spe__is_common_ds_encoding(speq);
 607
 608	if (record->op & ARM_SPE_OP_LD)
 609		data_src.mem_op = PERF_MEM_OP_LOAD;
 610	else if (record->op & ARM_SPE_OP_ST)
 611		data_src.mem_op = PERF_MEM_OP_STORE;
 612	else
 613		return 0;
 614
 615	if (is_common)
 616		arm_spe__synth_data_source_common(record, &data_src);
 617	else
 618		arm_spe__synth_memory_level(record, &data_src);
 619
 620	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
 621		data_src.mem_dtlb = PERF_MEM_TLB_WK;
 622
 623		if (record->type & ARM_SPE_TLB_MISS)
 624			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
 625		else
 626			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
 627	}
 628
 629	return data_src.val;
 630}
 631
 632static int arm_spe_sample(struct arm_spe_queue *speq)
 633{
 634	const struct arm_spe_record *record = &speq->decoder->record;
 635	struct arm_spe *spe = speq->spe;
 636	u64 data_src;
 637	int err;
 638
 639	arm_spe__sample_flags(speq);
 640	data_src = arm_spe__synth_data_source(speq, record);
 641
 642	if (spe->sample_flc) {
 643		if (record->type & ARM_SPE_L1D_MISS) {
 644			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
 645							data_src);
 646			if (err)
 647				return err;
 648		}
 649
 650		if (record->type & ARM_SPE_L1D_ACCESS) {
 651			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
 652							data_src);
 653			if (err)
 654				return err;
 655		}
 656	}
 657
 658	if (spe->sample_llc) {
 659		if (record->type & ARM_SPE_LLC_MISS) {
 660			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
 661							data_src);
 662			if (err)
 663				return err;
 664		}
 665
 666		if (record->type & ARM_SPE_LLC_ACCESS) {
 667			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
 668							data_src);
 669			if (err)
 670				return err;
 671		}
 672	}
 673
 674	if (spe->sample_tlb) {
 675		if (record->type & ARM_SPE_TLB_MISS) {
 676			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
 677							data_src);
 678			if (err)
 679				return err;
 680		}
 681
 682		if (record->type & ARM_SPE_TLB_ACCESS) {
 683			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
 684							data_src);
 685			if (err)
 686				return err;
 687		}
 688	}
 689
 690	if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) {
 691		err = arm_spe__synth_branch_sample(speq, spe->branch_id);
 692		if (err)
 693			return err;
 694	}
 695
 696	if (spe->sample_remote_access &&
 697	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
 698		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
 699						data_src);
 700		if (err)
 701			return err;
 702	}
 703
 704	/*
 705	 * When data_src is zero it means the record is not a memory operation,
 706	 * skip to synthesize memory sample for this case.
 707	 */
 708	if (spe->sample_memory && data_src) {
 709		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
 710		if (err)
 711			return err;
 712	}
 713
 714	if (spe->sample_instructions) {
 715		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
 716		if (err)
 717			return err;
 718	}
 719
 720	return 0;
 721}
 722
 723static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
 724{
 725	struct arm_spe *spe = speq->spe;
 726	struct arm_spe_record *record;
 727	int ret;
 728
 729	if (!spe->kernel_start)
 730		spe->kernel_start = machine__kernel_start(spe->machine);
 731
 732	while (1) {
 733		/*
 734		 * The usual logic is firstly to decode the packets, and then
 735		 * based the record to synthesize sample; but here the flow is
 736		 * reversed: it calls arm_spe_sample() for synthesizing samples
 737		 * prior to arm_spe_decode().
 738		 *
 739		 * Two reasons for this code logic:
 740		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
 741		 * has decoded trace data and generated a record, but the record
 742		 * is left to generate sample until run to here, so it's correct
 743		 * to synthesize sample for the left record.
 744		 * 2. After decoding trace data, it needs to compare the record
 745		 * timestamp with the coming perf event, if the record timestamp
 746		 * is later than the perf event, it needs bail out and pushs the
 747		 * record into auxtrace heap, thus the record can be deferred to
 748		 * synthesize sample until run to here at the next time; so this
 749		 * can correlate samples between Arm SPE trace data and other
 750		 * perf events with correct time ordering.
 751		 */
 752
 753		/*
 754		 * Update pid/tid info.
 755		 */
 756		record = &speq->decoder->record;
 757		if (!spe->timeless_decoding && record->context_id != (u64)-1) {
 758			ret = arm_spe_set_tid(speq, record->context_id);
 759			if (ret)
 760				return ret;
 761
 762			spe->use_ctx_pkt_for_pid = true;
 763		}
 764
 765		ret = arm_spe_sample(speq);
 766		if (ret)
 767			return ret;
 768
 769		ret = arm_spe_decode(speq->decoder);
 770		if (!ret) {
 771			pr_debug("No data or all data has been processed.\n");
 772			return 1;
 773		}
 774
 775		/*
 776		 * Error is detected when decode SPE trace data, continue to
 777		 * the next trace data and find out more records.
 778		 */
 779		if (ret < 0)
 780			continue;
 781
 782		record = &speq->decoder->record;
 783
 784		/* Update timestamp for the last record */
 785		if (record->timestamp > speq->timestamp)
 786			speq->timestamp = record->timestamp;
 787
 788		/*
 789		 * If the timestamp of the queue is later than timestamp of the
 790		 * coming perf event, bail out so can allow the perf event to
 791		 * be processed ahead.
 792		 */
 793		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
 794			*timestamp = speq->timestamp;
 795			return 0;
 796		}
 797	}
 798
 799	return 0;
 800}
 801
 802static int arm_spe__setup_queue(struct arm_spe *spe,
 803			       struct auxtrace_queue *queue,
 804			       unsigned int queue_nr)
 805{
 806	struct arm_spe_queue *speq = queue->priv;
 807	struct arm_spe_record *record;
 808
 809	if (list_empty(&queue->head) || speq)
 810		return 0;
 811
 812	speq = arm_spe__alloc_queue(spe, queue_nr);
 813
 814	if (!speq)
 815		return -ENOMEM;
 816
 817	queue->priv = speq;
 818
 819	if (queue->cpu != -1)
 820		speq->cpu = queue->cpu;
 821
 822	if (!speq->on_heap) {
 823		int ret;
 824
 825		if (spe->timeless_decoding)
 826			return 0;
 827
 828retry:
 829		ret = arm_spe_decode(speq->decoder);
 830
 831		if (!ret)
 832			return 0;
 833
 834		if (ret < 0)
 835			goto retry;
 836
 837		record = &speq->decoder->record;
 838
 839		speq->timestamp = record->timestamp;
 840		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
 841		if (ret)
 842			return ret;
 843		speq->on_heap = true;
 844	}
 845
 846	return 0;
 847}
 848
 849static int arm_spe__setup_queues(struct arm_spe *spe)
 850{
 851	unsigned int i;
 852	int ret;
 853
 854	for (i = 0; i < spe->queues.nr_queues; i++) {
 855		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
 856		if (ret)
 857			return ret;
 858	}
 859
 860	return 0;
 861}
 862
 863static int arm_spe__update_queues(struct arm_spe *spe)
 864{
 865	if (spe->queues.new_data) {
 866		spe->queues.new_data = false;
 867		return arm_spe__setup_queues(spe);
 868	}
 869
 870	return 0;
 871}
 872
 873static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
 874{
 875	struct evsel *evsel;
 876	struct evlist *evlist = spe->session->evlist;
 877	bool timeless_decoding = true;
 878
 879	/*
 880	 * Circle through the list of event and complain if we find one
 881	 * with the time bit set.
 882	 */
 883	evlist__for_each_entry(evlist, evsel) {
 884		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
 885			timeless_decoding = false;
 886	}
 887
 888	return timeless_decoding;
 889}
 890
 891static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
 892{
 893	unsigned int queue_nr;
 894	u64 ts;
 895	int ret;
 896
 897	while (1) {
 898		struct auxtrace_queue *queue;
 899		struct arm_spe_queue *speq;
 900
 901		if (!spe->heap.heap_cnt)
 902			return 0;
 903
 904		if (spe->heap.heap_array[0].ordinal >= timestamp)
 905			return 0;
 906
 907		queue_nr = spe->heap.heap_array[0].queue_nr;
 908		queue = &spe->queues.queue_array[queue_nr];
 909		speq = queue->priv;
 910
 911		auxtrace_heap__pop(&spe->heap);
 912
 913		if (spe->heap.heap_cnt) {
 914			ts = spe->heap.heap_array[0].ordinal + 1;
 915			if (ts > timestamp)
 916				ts = timestamp;
 917		} else {
 918			ts = timestamp;
 919		}
 920
 921		/*
 922		 * A previous context-switch event has set pid/tid in the machine's context, so
 923		 * here we need to update the pid/tid in the thread and SPE queue.
 924		 */
 925		if (!spe->use_ctx_pkt_for_pid)
 926			arm_spe_set_pid_tid_cpu(spe, queue);
 927
 928		ret = arm_spe_run_decoder(speq, &ts);
 929		if (ret < 0) {
 930			auxtrace_heap__add(&spe->heap, queue_nr, ts);
 931			return ret;
 932		}
 933
 934		if (!ret) {
 935			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
 936			if (ret < 0)
 937				return ret;
 938		} else {
 939			speq->on_heap = false;
 940		}
 941	}
 942
 943	return 0;
 944}
 945
 946static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
 947					    u64 time_)
 948{
 949	struct auxtrace_queues *queues = &spe->queues;
 950	unsigned int i;
 951	u64 ts = 0;
 952
 953	for (i = 0; i < queues->nr_queues; i++) {
 954		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
 955		struct arm_spe_queue *speq = queue->priv;
 956
 957		if (speq && (tid == -1 || speq->tid == tid)) {
 958			speq->time = time_;
 959			arm_spe_set_pid_tid_cpu(spe, queue);
 960			arm_spe_run_decoder(speq, &ts);
 961		}
 962	}
 963	return 0;
 964}
 965
 966static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
 967				  struct perf_sample *sample)
 968{
 969	pid_t pid, tid;
 970	int cpu;
 971
 972	if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
 973		return 0;
 974
 975	pid = event->context_switch.next_prev_pid;
 976	tid = event->context_switch.next_prev_tid;
 977	cpu = sample->cpu;
 978
 979	if (tid == -1)
 980		pr_warning("context_switch event has no tid\n");
 981
 982	return machine__set_current_tid(spe->machine, cpu, pid, tid);
 983}
 984
 985static int arm_spe_process_event(struct perf_session *session,
 986				 union perf_event *event,
 987				 struct perf_sample *sample,
 988				 const struct perf_tool *tool)
 989{
 990	int err = 0;
 991	u64 timestamp;
 992	struct arm_spe *spe = container_of(session->auxtrace,
 993			struct arm_spe, auxtrace);
 994
 995	if (dump_trace)
 996		return 0;
 997
 998	if (!tool->ordered_events) {
 999		pr_err("SPE trace requires ordered events\n");
1000		return -EINVAL;
1001	}
1002
1003	if (sample->time && (sample->time != (u64) -1))
1004		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
1005	else
1006		timestamp = 0;
1007
1008	if (timestamp || spe->timeless_decoding) {
1009		err = arm_spe__update_queues(spe);
1010		if (err)
1011			return err;
1012	}
1013
1014	if (spe->timeless_decoding) {
1015		if (event->header.type == PERF_RECORD_EXIT) {
1016			err = arm_spe_process_timeless_queues(spe,
1017					event->fork.tid,
1018					sample->time);
1019		}
1020	} else if (timestamp) {
1021		err = arm_spe_process_queues(spe, timestamp);
1022		if (err)
1023			return err;
1024
1025		if (!spe->use_ctx_pkt_for_pid &&
1026		    (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
1027		    event->header.type == PERF_RECORD_SWITCH))
1028			err = arm_spe_context_switch(spe, event, sample);
1029	}
1030
1031	return err;
1032}
1033
1034static int arm_spe_process_auxtrace_event(struct perf_session *session,
1035					  union perf_event *event,
1036					  const struct perf_tool *tool __maybe_unused)
1037{
1038	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1039					     auxtrace);
 
 
 
 
1040
1041	if (!spe->data_queued) {
1042		struct auxtrace_buffer *buffer;
1043		off_t data_offset;
1044		int fd = perf_data__fd(session->data);
1045		int err;
 
 
1046
1047		if (perf_data__is_pipe(session->data)) {
1048			data_offset = 0;
1049		} else {
1050			data_offset = lseek(fd, 0, SEEK_CUR);
1051			if (data_offset == -1)
1052				return -errno;
1053		}
1054
1055		err = auxtrace_queues__add_event(&spe->queues, session, event,
1056				data_offset, &buffer);
1057		if (err)
1058			return err;
1059
1060		/* Dump here now we have copied a piped trace out of the pipe */
1061		if (dump_trace) {
1062			if (auxtrace_buffer__get_data(buffer, fd)) {
1063				arm_spe_dump_event(spe, buffer->data,
1064						buffer->size);
1065				auxtrace_buffer__put_data(buffer);
1066			}
1067		}
1068	}
1069
1070	return 0;
1071}
1072
1073static int arm_spe_flush(struct perf_session *session __maybe_unused,
1074			 const struct perf_tool *tool __maybe_unused)
1075{
1076	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1077			auxtrace);
1078	int ret;
1079
1080	if (dump_trace)
1081		return 0;
1082
1083	if (!tool->ordered_events)
1084		return -EINVAL;
1085
1086	ret = arm_spe__update_queues(spe);
1087	if (ret < 0)
1088		return ret;
1089
1090	if (spe->timeless_decoding)
1091		return arm_spe_process_timeless_queues(spe, -1,
1092				MAX_TIMESTAMP - 1);
1093
1094	ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
1095	if (ret)
1096		return ret;
1097
1098	if (!spe->use_ctx_pkt_for_pid)
1099		ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
1100			    "Matching of TIDs to SPE events could be inaccurate.\n");
1101
1102	return 0;
1103}
1104
1105static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size)
1106{
1107	u64 *metadata;
1108
1109	metadata = zalloc(per_cpu_size);
1110	if (!metadata)
1111		return NULL;
1112
1113	memcpy(metadata, buf, per_cpu_size);
1114	return metadata;
1115}
1116
1117static void arm_spe__free_metadata(u64 **metadata, int nr_cpu)
1118{
1119	int i;
1120
1121	for (i = 0; i < nr_cpu; i++)
1122		zfree(&metadata[i]);
1123	free(metadata);
1124}
1125
1126static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info,
1127				     u64 *ver, int *nr_cpu)
1128{
1129	u64 *ptr = (u64 *)info->priv;
1130	u64 metadata_size;
1131	u64 **metadata = NULL;
1132	int hdr_sz, per_cpu_sz, i;
1133
1134	metadata_size = info->header.size -
1135		sizeof(struct perf_record_auxtrace_info);
1136
1137	/* Metadata version 1 */
1138	if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) {
1139		*ver = 1;
1140		*nr_cpu = 0;
1141		/* No per CPU metadata */
1142		return NULL;
1143	}
1144
1145	*ver = ptr[ARM_SPE_HEADER_VERSION];
1146	hdr_sz = ptr[ARM_SPE_HEADER_SIZE];
1147	*nr_cpu = ptr[ARM_SPE_CPUS_NUM];
1148
1149	metadata = calloc(*nr_cpu, sizeof(*metadata));
1150	if (!metadata)
1151		return NULL;
1152
1153	/* Locate the start address of per CPU metadata */
1154	ptr += hdr_sz;
1155	per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu);
1156
1157	for (i = 0; i < *nr_cpu; i++) {
1158		metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz);
1159		if (!metadata[i])
1160			goto err_per_cpu_metadata;
1161
1162		ptr += per_cpu_sz / sizeof(u64);
1163	}
1164
1165	return metadata;
1166
1167err_per_cpu_metadata:
1168	arm_spe__free_metadata(metadata, *nr_cpu);
1169	return NULL;
1170}
1171
1172static void arm_spe_free_queue(void *priv)
1173{
1174	struct arm_spe_queue *speq = priv;
1175
1176	if (!speq)
1177		return;
1178	thread__zput(speq->thread);
1179	arm_spe_decoder_free(speq->decoder);
1180	zfree(&speq->event_buf);
1181	free(speq);
1182}
1183
1184static void arm_spe_free_events(struct perf_session *session)
1185{
1186	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1187					     auxtrace);
1188	struct auxtrace_queues *queues = &spe->queues;
1189	unsigned int i;
1190
1191	for (i = 0; i < queues->nr_queues; i++) {
1192		arm_spe_free_queue(queues->queue_array[i].priv);
1193		queues->queue_array[i].priv = NULL;
1194	}
1195	auxtrace_queues__free(queues);
1196}
1197
1198static void arm_spe_free(struct perf_session *session)
1199{
1200	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1201					     auxtrace);
1202
1203	auxtrace_heap__free(&spe->heap);
1204	arm_spe_free_events(session);
1205	session->auxtrace = NULL;
1206	arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu);
1207	free(spe);
1208}
1209
1210static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
1211				      struct evsel *evsel)
1212{
1213	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
1214
1215	return evsel->core.attr.type == spe->pmu_type;
1216}
1217
1218static const char * const metadata_hdr_v1_fmts[] = {
1219	[ARM_SPE_PMU_TYPE]		= "  PMU Type           :%"PRId64"\n",
1220	[ARM_SPE_PER_CPU_MMAPS]		= "  Per CPU mmaps      :%"PRId64"\n",
1221};
1222
1223static const char * const metadata_hdr_fmts[] = {
1224	[ARM_SPE_HEADER_VERSION]	= "  Header version     :%"PRId64"\n",
1225	[ARM_SPE_HEADER_SIZE]		= "  Header size        :%"PRId64"\n",
1226	[ARM_SPE_PMU_TYPE_V2]		= "  PMU type v2        :%"PRId64"\n",
1227	[ARM_SPE_CPUS_NUM]		= "  CPU number         :%"PRId64"\n",
1228};
1229
1230static const char * const metadata_per_cpu_fmts[] = {
1231	[ARM_SPE_MAGIC]			= "    Magic            :0x%"PRIx64"\n",
1232	[ARM_SPE_CPU]			= "    CPU #            :%"PRId64"\n",
1233	[ARM_SPE_CPU_NR_PARAMS]		= "    Num of params    :%"PRId64"\n",
1234	[ARM_SPE_CPU_MIDR]		= "    MIDR             :0x%"PRIx64"\n",
1235	[ARM_SPE_CPU_PMU_TYPE]		= "    PMU Type         :%"PRId64"\n",
1236	[ARM_SPE_CAP_MIN_IVAL]		= "    Min Interval     :%"PRId64"\n",
1237};
1238
1239static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr)
1240{
1241	unsigned int i, cpu, hdr_size, cpu_num, cpu_size;
1242	const char * const *hdr_fmts;
1243
1244	if (!dump_trace)
1245		return;
1246
1247	if (spe->metadata_ver == 1) {
1248		cpu_num = 0;
1249		hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX;
1250		hdr_fmts = metadata_hdr_v1_fmts;
1251	} else {
1252		cpu_num = arr[ARM_SPE_CPUS_NUM];
1253		hdr_size = arr[ARM_SPE_HEADER_SIZE];
1254		hdr_fmts = metadata_hdr_fmts;
1255	}
1256
1257	for (i = 0; i < hdr_size; i++)
1258		fprintf(stdout, hdr_fmts[i], arr[i]);
1259
1260	arr += hdr_size;
1261	for (cpu = 0; cpu < cpu_num; cpu++) {
1262		/*
1263		 * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS
1264		 * are fixed. The sequential parameter size is decided by the
1265		 * field 'ARM_SPE_CPU_NR_PARAMS'.
1266		 */
1267		cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS];
1268		for (i = 0; i < cpu_size; i++)
1269			fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]);
1270		arr += cpu_size;
1271	}
1272}
1273
1274static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
1275				    const char *name)
1276{
1277	struct evsel *evsel;
1278
1279	evlist__for_each_entry(evlist, evsel) {
1280		if (evsel->core.id && evsel->core.id[0] == id) {
1281			if (evsel->name)
1282				zfree(&evsel->name);
1283			evsel->name = strdup(name);
1284			break;
1285		}
1286	}
1287}
1288
1289static int
1290arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
1291{
1292	struct evlist *evlist = session->evlist;
1293	struct evsel *evsel;
1294	struct perf_event_attr attr;
1295	bool found = false;
1296	u64 id;
1297	int err;
1298
1299	evlist__for_each_entry(evlist, evsel) {
1300		if (evsel->core.attr.type == spe->pmu_type) {
1301			found = true;
1302			break;
1303		}
1304	}
1305
1306	if (!found) {
1307		pr_debug("No selected events with SPE trace data\n");
1308		return 0;
1309	}
1310
1311	memset(&attr, 0, sizeof(struct perf_event_attr));
1312	attr.size = sizeof(struct perf_event_attr);
1313	attr.type = PERF_TYPE_HARDWARE;
1314	attr.sample_type = evsel->core.attr.sample_type &
1315				(PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
1316	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1317			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
1318			    PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
1319	if (spe->timeless_decoding)
1320		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1321	else
1322		attr.sample_type |= PERF_SAMPLE_TIME;
1323
1324	spe->sample_type = attr.sample_type;
1325
1326	attr.exclude_user = evsel->core.attr.exclude_user;
1327	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1328	attr.exclude_hv = evsel->core.attr.exclude_hv;
1329	attr.exclude_host = evsel->core.attr.exclude_host;
1330	attr.exclude_guest = evsel->core.attr.exclude_guest;
1331	attr.sample_id_all = evsel->core.attr.sample_id_all;
1332	attr.read_format = evsel->core.attr.read_format;
1333
1334	/* create new id val to be a fixed offset from evsel id */
1335	id = evsel->core.id[0] + 1000000000;
1336
1337	if (!id)
1338		id = 1;
1339
1340	if (spe->synth_opts.flc) {
1341		spe->sample_flc = true;
1342
1343		/* Level 1 data cache miss */
1344		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1345		if (err)
1346			return err;
1347		spe->l1d_miss_id = id;
1348		arm_spe_set_event_name(evlist, id, "l1d-miss");
1349		id += 1;
1350
1351		/* Level 1 data cache access */
1352		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1353		if (err)
1354			return err;
1355		spe->l1d_access_id = id;
1356		arm_spe_set_event_name(evlist, id, "l1d-access");
1357		id += 1;
1358	}
1359
1360	if (spe->synth_opts.llc) {
1361		spe->sample_llc = true;
1362
1363		/* Last level cache miss */
1364		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1365		if (err)
1366			return err;
1367		spe->llc_miss_id = id;
1368		arm_spe_set_event_name(evlist, id, "llc-miss");
1369		id += 1;
1370
1371		/* Last level cache access */
1372		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1373		if (err)
1374			return err;
1375		spe->llc_access_id = id;
1376		arm_spe_set_event_name(evlist, id, "llc-access");
1377		id += 1;
1378	}
1379
1380	if (spe->synth_opts.tlb) {
1381		spe->sample_tlb = true;
1382
1383		/* TLB miss */
1384		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1385		if (err)
1386			return err;
1387		spe->tlb_miss_id = id;
1388		arm_spe_set_event_name(evlist, id, "tlb-miss");
1389		id += 1;
1390
1391		/* TLB access */
1392		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1393		if (err)
1394			return err;
1395		spe->tlb_access_id = id;
1396		arm_spe_set_event_name(evlist, id, "tlb-access");
1397		id += 1;
1398	}
1399
1400	if (spe->synth_opts.branches) {
1401		spe->sample_branch = true;
1402
1403		/* Branch */
1404		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1405		if (err)
1406			return err;
1407		spe->branch_id = id;
1408		arm_spe_set_event_name(evlist, id, "branch");
1409		id += 1;
1410	}
1411
1412	if (spe->synth_opts.remote_access) {
1413		spe->sample_remote_access = true;
1414
1415		/* Remote access */
1416		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1417		if (err)
1418			return err;
1419		spe->remote_access_id = id;
1420		arm_spe_set_event_name(evlist, id, "remote-access");
1421		id += 1;
1422	}
1423
1424	if (spe->synth_opts.mem) {
1425		spe->sample_memory = true;
1426
1427		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1428		if (err)
1429			return err;
1430		spe->memory_id = id;
1431		arm_spe_set_event_name(evlist, id, "memory");
1432		id += 1;
1433	}
1434
1435	if (spe->synth_opts.instructions) {
1436		if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
1437			pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
1438			goto synth_instructions_out;
1439		}
1440		if (spe->synth_opts.period > 1)
1441			pr_warning("Arm SPE has a hardware-based sample period.\n"
1442				   "Additional instruction events will be discarded by --itrace\n");
1443
1444		spe->sample_instructions = true;
1445		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1446		attr.sample_period = spe->synth_opts.period;
1447		spe->instructions_sample_period = attr.sample_period;
1448		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1449		if (err)
1450			return err;
1451		spe->instructions_id = id;
1452		arm_spe_set_event_name(evlist, id, "instructions");
1453	}
1454synth_instructions_out:
1455
1456	return 0;
1457}
1458
1459static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu)
1460{
1461	u64 midr;
1462	int i;
1463
1464	if (!nr_cpu)
1465		return false;
1466
1467	for (i = 0; i < nr_cpu; i++) {
1468		if (!metadata[i])
1469			return false;
1470
1471		if (i == 0) {
1472			midr = metadata[i][ARM_SPE_CPU_MIDR];
1473			continue;
1474		}
1475
1476		if (midr != metadata[i][ARM_SPE_CPU_MIDR])
1477			return false;
1478	}
1479
1480	return true;
1481}
1482
1483int arm_spe_process_auxtrace_info(union perf_event *event,
1484				  struct perf_session *session)
1485{
1486	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1487	size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE;
1488	struct perf_record_time_conv *tc = &session->time_conv;
1489	struct arm_spe *spe;
1490	u64 **metadata = NULL;
1491	u64 metadata_ver;
1492	int nr_cpu, err;
1493
1494	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1495					min_sz)
1496		return -EINVAL;
1497
1498	metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver,
1499					   &nr_cpu);
1500	if (!metadata && metadata_ver != 1) {
1501		pr_err("Failed to parse Arm SPE metadata.\n");
1502		return -EINVAL;
1503	}
1504
1505	spe = zalloc(sizeof(struct arm_spe));
1506	if (!spe) {
1507		err = -ENOMEM;
1508		goto err_free_metadata;
1509	}
1510
1511	err = auxtrace_queues__init(&spe->queues);
1512	if (err)
1513		goto err_free;
1514
1515	spe->session = session;
1516	spe->machine = &session->machines.host; /* No kvm support */
1517	spe->auxtrace_type = auxtrace_info->type;
1518	if (metadata_ver == 1)
1519		spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1520	else
1521		spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2];
1522	spe->metadata = metadata;
1523	spe->metadata_ver = metadata_ver;
1524	spe->metadata_nr_cpu = nr_cpu;
1525	spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu);
1526
1527	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1528
1529	/*
1530	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1531	 * and the parameters for hardware clock are stored in the session
1532	 * context.  Passes these parameters to the struct perf_tsc_conversion
1533	 * in "spe->tc", which is used for later conversion between clock
1534	 * counter and timestamp.
1535	 *
1536	 * For backward compatibility, copies the fields starting from
1537	 * "time_cycles" only if they are contained in the event.
1538	 */
1539	spe->tc.time_shift = tc->time_shift;
1540	spe->tc.time_mult = tc->time_mult;
1541	spe->tc.time_zero = tc->time_zero;
1542
1543	if (event_contains(*tc, time_cycles)) {
1544		spe->tc.time_cycles = tc->time_cycles;
1545		spe->tc.time_mask = tc->time_mask;
1546		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1547		spe->tc.cap_user_time_short = tc->cap_user_time_short;
1548	}
1549
1550	spe->auxtrace.process_event = arm_spe_process_event;
1551	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1552	spe->auxtrace.flush_events = arm_spe_flush;
1553	spe->auxtrace.free_events = arm_spe_free_events;
1554	spe->auxtrace.free = arm_spe_free;
1555	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1556	session->auxtrace = &spe->auxtrace;
1557
1558	arm_spe_print_info(spe, &auxtrace_info->priv[0]);
1559
1560	if (dump_trace)
1561		return 0;
1562
1563	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1564		spe->synth_opts = *session->itrace_synth_opts;
1565	else
1566		itrace_synth_opts__set_default(&spe->synth_opts, false);
1567
1568	err = arm_spe_synth_events(spe, session);
1569	if (err)
1570		goto err_free_queues;
1571
1572	err = auxtrace_queues__process_index(&spe->queues, session);
1573	if (err)
1574		goto err_free_queues;
1575
1576	if (spe->queues.populated)
1577		spe->data_queued = true;
1578
1579	return 0;
1580
1581err_free_queues:
1582	auxtrace_queues__free(&spe->queues);
1583	session->auxtrace = NULL;
1584err_free:
1585	free(spe);
1586err_free_metadata:
1587	arm_spe__free_metadata(metadata, nr_cpu);
1588	return err;
1589}