Loading...
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Arm Statistical Profiling Extensions (SPE) support
4 * Copyright (c) 2017-2018, Arm Ltd.
5 */
6
7#include <endian.h>
8#include <errno.h>
9#include <byteswap.h>
10#include <inttypes.h>
11#include <unistd.h>
12#include <stdlib.h>
13#include <linux/kernel.h>
14#include <linux/types.h>
15#include <linux/bitops.h>
16#include <linux/log2.h>
17#include <linux/zalloc.h>
18
19#include "color.h"
20#include "evsel.h"
21#include "machine.h"
22#include "session.h"
23#include "debug.h"
24#include "auxtrace.h"
25#include "arm-spe.h"
26#include "arm-spe-pkt-decoder.h"
27
28struct arm_spe {
29 struct auxtrace auxtrace;
30 struct auxtrace_queues queues;
31 struct auxtrace_heap heap;
32 u32 auxtrace_type;
33 struct perf_session *session;
34 struct machine *machine;
35 u32 pmu_type;
36};
37
38struct arm_spe_queue {
39 struct arm_spe *spe;
40 unsigned int queue_nr;
41 struct auxtrace_buffer *buffer;
42 bool on_heap;
43 bool done;
44 pid_t pid;
45 pid_t tid;
46 int cpu;
47};
48
49static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
50 unsigned char *buf, size_t len)
51{
52 struct arm_spe_pkt packet;
53 size_t pos = 0;
54 int ret, pkt_len, i;
55 char desc[ARM_SPE_PKT_DESC_MAX];
56 const char *color = PERF_COLOR_BLUE;
57
58 color_fprintf(stdout, color,
59 ". ... ARM SPE data: size %zu bytes\n",
60 len);
61
62 while (len) {
63 ret = arm_spe_get_packet(buf, len, &packet);
64 if (ret > 0)
65 pkt_len = ret;
66 else
67 pkt_len = 1;
68 printf(".");
69 color_fprintf(stdout, color, " %08x: ", pos);
70 for (i = 0; i < pkt_len; i++)
71 color_fprintf(stdout, color, " %02x", buf[i]);
72 for (; i < 16; i++)
73 color_fprintf(stdout, color, " ");
74 if (ret > 0) {
75 ret = arm_spe_pkt_desc(&packet, desc,
76 ARM_SPE_PKT_DESC_MAX);
77 if (ret > 0)
78 color_fprintf(stdout, color, " %s\n", desc);
79 } else {
80 color_fprintf(stdout, color, " Bad packet!\n");
81 }
82 pos += pkt_len;
83 buf += pkt_len;
84 len -= pkt_len;
85 }
86}
87
88static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
89 size_t len)
90{
91 printf(".\n");
92 arm_spe_dump(spe, buf, len);
93}
94
95static int arm_spe_process_event(struct perf_session *session __maybe_unused,
96 union perf_event *event __maybe_unused,
97 struct perf_sample *sample __maybe_unused,
98 struct perf_tool *tool __maybe_unused)
99{
100 return 0;
101}
102
103static int arm_spe_process_auxtrace_event(struct perf_session *session,
104 union perf_event *event,
105 struct perf_tool *tool __maybe_unused)
106{
107 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
108 auxtrace);
109 struct auxtrace_buffer *buffer;
110 off_t data_offset;
111 int fd = perf_data__fd(session->data);
112 int err;
113
114 if (perf_data__is_pipe(session->data)) {
115 data_offset = 0;
116 } else {
117 data_offset = lseek(fd, 0, SEEK_CUR);
118 if (data_offset == -1)
119 return -errno;
120 }
121
122 err = auxtrace_queues__add_event(&spe->queues, session, event,
123 data_offset, &buffer);
124 if (err)
125 return err;
126
127 /* Dump here now we have copied a piped trace out of the pipe */
128 if (dump_trace) {
129 if (auxtrace_buffer__get_data(buffer, fd)) {
130 arm_spe_dump_event(spe, buffer->data,
131 buffer->size);
132 auxtrace_buffer__put_data(buffer);
133 }
134 }
135
136 return 0;
137}
138
139static int arm_spe_flush(struct perf_session *session __maybe_unused,
140 struct perf_tool *tool __maybe_unused)
141{
142 return 0;
143}
144
145static void arm_spe_free_queue(void *priv)
146{
147 struct arm_spe_queue *speq = priv;
148
149 if (!speq)
150 return;
151 free(speq);
152}
153
154static void arm_spe_free_events(struct perf_session *session)
155{
156 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
157 auxtrace);
158 struct auxtrace_queues *queues = &spe->queues;
159 unsigned int i;
160
161 for (i = 0; i < queues->nr_queues; i++) {
162 arm_spe_free_queue(queues->queue_array[i].priv);
163 queues->queue_array[i].priv = NULL;
164 }
165 auxtrace_queues__free(queues);
166}
167
168static void arm_spe_free(struct perf_session *session)
169{
170 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
171 auxtrace);
172
173 auxtrace_heap__free(&spe->heap);
174 arm_spe_free_events(session);
175 session->auxtrace = NULL;
176 free(spe);
177}
178
179static const char * const arm_spe_info_fmts[] = {
180 [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n",
181};
182
183static void arm_spe_print_info(__u64 *arr)
184{
185 if (!dump_trace)
186 return;
187
188 fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
189}
190
191int arm_spe_process_auxtrace_info(union perf_event *event,
192 struct perf_session *session)
193{
194 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
195 size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE;
196 struct arm_spe *spe;
197 int err;
198
199 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
200 min_sz)
201 return -EINVAL;
202
203 spe = zalloc(sizeof(struct arm_spe));
204 if (!spe)
205 return -ENOMEM;
206
207 err = auxtrace_queues__init(&spe->queues);
208 if (err)
209 goto err_free;
210
211 spe->session = session;
212 spe->machine = &session->machines.host; /* No kvm support */
213 spe->auxtrace_type = auxtrace_info->type;
214 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
215
216 spe->auxtrace.process_event = arm_spe_process_event;
217 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
218 spe->auxtrace.flush_events = arm_spe_flush;
219 spe->auxtrace.free_events = arm_spe_free_events;
220 spe->auxtrace.free = arm_spe_free;
221 session->auxtrace = &spe->auxtrace;
222
223 arm_spe_print_info(&auxtrace_info->priv[0]);
224
225 return 0;
226
227err_free:
228 free(spe);
229 return err;
230}
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Arm Statistical Profiling Extensions (SPE) support
4 * Copyright (c) 2017-2018, Arm Ltd.
5 */
6
7#include <byteswap.h>
8#include <endian.h>
9#include <errno.h>
10#include <inttypes.h>
11#include <linux/bitops.h>
12#include <linux/kernel.h>
13#include <linux/log2.h>
14#include <linux/types.h>
15#include <linux/zalloc.h>
16#include <stdlib.h>
17#include <unistd.h>
18
19#include "auxtrace.h"
20#include "color.h"
21#include "debug.h"
22#include "evlist.h"
23#include "evsel.h"
24#include "machine.h"
25#include "session.h"
26#include "symbol.h"
27#include "thread.h"
28#include "thread-stack.h"
29#include "tsc.h"
30#include "tool.h"
31#include "util/synthetic-events.h"
32
33#include "arm-spe.h"
34#include "arm-spe-decoder/arm-spe-decoder.h"
35#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36
37#define MAX_TIMESTAMP (~0ULL)
38
39struct arm_spe {
40 struct auxtrace auxtrace;
41 struct auxtrace_queues queues;
42 struct auxtrace_heap heap;
43 struct itrace_synth_opts synth_opts;
44 u32 auxtrace_type;
45 struct perf_session *session;
46 struct machine *machine;
47 u32 pmu_type;
48
49 struct perf_tsc_conversion tc;
50
51 u8 timeless_decoding;
52 u8 data_queued;
53
54 u8 sample_flc;
55 u8 sample_llc;
56 u8 sample_tlb;
57 u8 sample_branch;
58 u8 sample_remote_access;
59 u8 sample_memory;
60
61 u64 l1d_miss_id;
62 u64 l1d_access_id;
63 u64 llc_miss_id;
64 u64 llc_access_id;
65 u64 tlb_miss_id;
66 u64 tlb_access_id;
67 u64 branch_miss_id;
68 u64 remote_access_id;
69 u64 memory_id;
70
71 u64 kernel_start;
72
73 unsigned long num_events;
74};
75
76struct arm_spe_queue {
77 struct arm_spe *spe;
78 unsigned int queue_nr;
79 struct auxtrace_buffer *buffer;
80 struct auxtrace_buffer *old_buffer;
81 union perf_event *event_buf;
82 bool on_heap;
83 bool done;
84 pid_t pid;
85 pid_t tid;
86 int cpu;
87 struct arm_spe_decoder *decoder;
88 u64 time;
89 u64 timestamp;
90 struct thread *thread;
91};
92
93static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
94 unsigned char *buf, size_t len)
95{
96 struct arm_spe_pkt packet;
97 size_t pos = 0;
98 int ret, pkt_len, i;
99 char desc[ARM_SPE_PKT_DESC_MAX];
100 const char *color = PERF_COLOR_BLUE;
101
102 color_fprintf(stdout, color,
103 ". ... ARM SPE data: size %zu bytes\n",
104 len);
105
106 while (len) {
107 ret = arm_spe_get_packet(buf, len, &packet);
108 if (ret > 0)
109 pkt_len = ret;
110 else
111 pkt_len = 1;
112 printf(".");
113 color_fprintf(stdout, color, " %08x: ", pos);
114 for (i = 0; i < pkt_len; i++)
115 color_fprintf(stdout, color, " %02x", buf[i]);
116 for (; i < 16; i++)
117 color_fprintf(stdout, color, " ");
118 if (ret > 0) {
119 ret = arm_spe_pkt_desc(&packet, desc,
120 ARM_SPE_PKT_DESC_MAX);
121 if (!ret)
122 color_fprintf(stdout, color, " %s\n", desc);
123 } else {
124 color_fprintf(stdout, color, " Bad packet!\n");
125 }
126 pos += pkt_len;
127 buf += pkt_len;
128 len -= pkt_len;
129 }
130}
131
132static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
133 size_t len)
134{
135 printf(".\n");
136 arm_spe_dump(spe, buf, len);
137}
138
139static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
140{
141 struct arm_spe_queue *speq = data;
142 struct auxtrace_buffer *buffer = speq->buffer;
143 struct auxtrace_buffer *old_buffer = speq->old_buffer;
144 struct auxtrace_queue *queue;
145
146 queue = &speq->spe->queues.queue_array[speq->queue_nr];
147
148 buffer = auxtrace_buffer__next(queue, buffer);
149 /* If no more data, drop the previous auxtrace_buffer and return */
150 if (!buffer) {
151 if (old_buffer)
152 auxtrace_buffer__drop_data(old_buffer);
153 b->len = 0;
154 return 0;
155 }
156
157 speq->buffer = buffer;
158
159 /* If the aux_buffer doesn't have data associated, try to load it */
160 if (!buffer->data) {
161 /* get the file desc associated with the perf data file */
162 int fd = perf_data__fd(speq->spe->session->data);
163
164 buffer->data = auxtrace_buffer__get_data(buffer, fd);
165 if (!buffer->data)
166 return -ENOMEM;
167 }
168
169 b->len = buffer->size;
170 b->buf = buffer->data;
171
172 if (b->len) {
173 if (old_buffer)
174 auxtrace_buffer__drop_data(old_buffer);
175 speq->old_buffer = buffer;
176 } else {
177 auxtrace_buffer__drop_data(buffer);
178 return arm_spe_get_trace(b, data);
179 }
180
181 return 0;
182}
183
184static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
185 unsigned int queue_nr)
186{
187 struct arm_spe_params params = { .get_trace = 0, };
188 struct arm_spe_queue *speq;
189
190 speq = zalloc(sizeof(*speq));
191 if (!speq)
192 return NULL;
193
194 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
195 if (!speq->event_buf)
196 goto out_free;
197
198 speq->spe = spe;
199 speq->queue_nr = queue_nr;
200 speq->pid = -1;
201 speq->tid = -1;
202 speq->cpu = -1;
203
204 /* params set */
205 params.get_trace = arm_spe_get_trace;
206 params.data = speq;
207
208 /* create new decoder */
209 speq->decoder = arm_spe_decoder_new(¶ms);
210 if (!speq->decoder)
211 goto out_free;
212
213 return speq;
214
215out_free:
216 zfree(&speq->event_buf);
217 free(speq);
218
219 return NULL;
220}
221
222static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
223{
224 return ip >= spe->kernel_start ?
225 PERF_RECORD_MISC_KERNEL :
226 PERF_RECORD_MISC_USER;
227}
228
229static void arm_spe_prep_sample(struct arm_spe *spe,
230 struct arm_spe_queue *speq,
231 union perf_event *event,
232 struct perf_sample *sample)
233{
234 struct arm_spe_record *record = &speq->decoder->record;
235
236 if (!spe->timeless_decoding)
237 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
238
239 sample->ip = record->from_ip;
240 sample->cpumode = arm_spe_cpumode(spe, sample->ip);
241 sample->pid = speq->pid;
242 sample->tid = speq->tid;
243 sample->period = 1;
244 sample->cpu = speq->cpu;
245
246 event->sample.header.type = PERF_RECORD_SAMPLE;
247 event->sample.header.misc = sample->cpumode;
248 event->sample.header.size = sizeof(struct perf_event_header);
249}
250
251static inline int
252arm_spe_deliver_synth_event(struct arm_spe *spe,
253 struct arm_spe_queue *speq __maybe_unused,
254 union perf_event *event,
255 struct perf_sample *sample)
256{
257 int ret;
258
259 ret = perf_session__deliver_synth_event(spe->session, event, sample);
260 if (ret)
261 pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
262
263 return ret;
264}
265
266static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
267 u64 spe_events_id, u64 data_src)
268{
269 struct arm_spe *spe = speq->spe;
270 struct arm_spe_record *record = &speq->decoder->record;
271 union perf_event *event = speq->event_buf;
272 struct perf_sample sample = { .ip = 0, };
273
274 arm_spe_prep_sample(spe, speq, event, &sample);
275
276 sample.id = spe_events_id;
277 sample.stream_id = spe_events_id;
278 sample.addr = record->virt_addr;
279 sample.phys_addr = record->phys_addr;
280 sample.data_src = data_src;
281
282 return arm_spe_deliver_synth_event(spe, speq, event, &sample);
283}
284
285static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
286 u64 spe_events_id)
287{
288 struct arm_spe *spe = speq->spe;
289 struct arm_spe_record *record = &speq->decoder->record;
290 union perf_event *event = speq->event_buf;
291 struct perf_sample sample = { .ip = 0, };
292
293 arm_spe_prep_sample(spe, speq, event, &sample);
294
295 sample.id = spe_events_id;
296 sample.stream_id = spe_events_id;
297 sample.addr = record->to_ip;
298
299 return arm_spe_deliver_synth_event(spe, speq, event, &sample);
300}
301
302#define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
303 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
304 ARM_SPE_REMOTE_ACCESS)
305
306static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
307{
308 if (type & SPE_MEM_TYPE)
309 return true;
310
311 return false;
312}
313
314static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
315{
316 union perf_mem_data_src data_src = { 0 };
317
318 if (record->op == ARM_SPE_LD)
319 data_src.mem_op = PERF_MEM_OP_LOAD;
320 else
321 data_src.mem_op = PERF_MEM_OP_STORE;
322
323 if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
324 data_src.mem_lvl = PERF_MEM_LVL_L3;
325
326 if (record->type & ARM_SPE_LLC_MISS)
327 data_src.mem_lvl |= PERF_MEM_LVL_MISS;
328 else
329 data_src.mem_lvl |= PERF_MEM_LVL_HIT;
330 } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
331 data_src.mem_lvl = PERF_MEM_LVL_L1;
332
333 if (record->type & ARM_SPE_L1D_MISS)
334 data_src.mem_lvl |= PERF_MEM_LVL_MISS;
335 else
336 data_src.mem_lvl |= PERF_MEM_LVL_HIT;
337 }
338
339 if (record->type & ARM_SPE_REMOTE_ACCESS)
340 data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
341
342 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
343 data_src.mem_dtlb = PERF_MEM_TLB_WK;
344
345 if (record->type & ARM_SPE_TLB_MISS)
346 data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
347 else
348 data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
349 }
350
351 return data_src.val;
352}
353
354static int arm_spe_sample(struct arm_spe_queue *speq)
355{
356 const struct arm_spe_record *record = &speq->decoder->record;
357 struct arm_spe *spe = speq->spe;
358 u64 data_src;
359 int err;
360
361 data_src = arm_spe__synth_data_source(record);
362
363 if (spe->sample_flc) {
364 if (record->type & ARM_SPE_L1D_MISS) {
365 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
366 data_src);
367 if (err)
368 return err;
369 }
370
371 if (record->type & ARM_SPE_L1D_ACCESS) {
372 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
373 data_src);
374 if (err)
375 return err;
376 }
377 }
378
379 if (spe->sample_llc) {
380 if (record->type & ARM_SPE_LLC_MISS) {
381 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
382 data_src);
383 if (err)
384 return err;
385 }
386
387 if (record->type & ARM_SPE_LLC_ACCESS) {
388 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
389 data_src);
390 if (err)
391 return err;
392 }
393 }
394
395 if (spe->sample_tlb) {
396 if (record->type & ARM_SPE_TLB_MISS) {
397 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
398 data_src);
399 if (err)
400 return err;
401 }
402
403 if (record->type & ARM_SPE_TLB_ACCESS) {
404 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
405 data_src);
406 if (err)
407 return err;
408 }
409 }
410
411 if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
412 err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
413 if (err)
414 return err;
415 }
416
417 if (spe->sample_remote_access &&
418 (record->type & ARM_SPE_REMOTE_ACCESS)) {
419 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
420 data_src);
421 if (err)
422 return err;
423 }
424
425 if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
426 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
427 if (err)
428 return err;
429 }
430
431 return 0;
432}
433
434static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
435{
436 struct arm_spe *spe = speq->spe;
437 struct arm_spe_record *record;
438 int ret;
439
440 if (!spe->kernel_start)
441 spe->kernel_start = machine__kernel_start(spe->machine);
442
443 while (1) {
444 /*
445 * The usual logic is firstly to decode the packets, and then
446 * based the record to synthesize sample; but here the flow is
447 * reversed: it calls arm_spe_sample() for synthesizing samples
448 * prior to arm_spe_decode().
449 *
450 * Two reasons for this code logic:
451 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
452 * has decoded trace data and generated a record, but the record
453 * is left to generate sample until run to here, so it's correct
454 * to synthesize sample for the left record.
455 * 2. After decoding trace data, it needs to compare the record
456 * timestamp with the coming perf event, if the record timestamp
457 * is later than the perf event, it needs bail out and pushs the
458 * record into auxtrace heap, thus the record can be deferred to
459 * synthesize sample until run to here at the next time; so this
460 * can correlate samples between Arm SPE trace data and other
461 * perf events with correct time ordering.
462 */
463 ret = arm_spe_sample(speq);
464 if (ret)
465 return ret;
466
467 ret = arm_spe_decode(speq->decoder);
468 if (!ret) {
469 pr_debug("No data or all data has been processed.\n");
470 return 1;
471 }
472
473 /*
474 * Error is detected when decode SPE trace data, continue to
475 * the next trace data and find out more records.
476 */
477 if (ret < 0)
478 continue;
479
480 record = &speq->decoder->record;
481
482 /* Update timestamp for the last record */
483 if (record->timestamp > speq->timestamp)
484 speq->timestamp = record->timestamp;
485
486 /*
487 * If the timestamp of the queue is later than timestamp of the
488 * coming perf event, bail out so can allow the perf event to
489 * be processed ahead.
490 */
491 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
492 *timestamp = speq->timestamp;
493 return 0;
494 }
495 }
496
497 return 0;
498}
499
500static int arm_spe__setup_queue(struct arm_spe *spe,
501 struct auxtrace_queue *queue,
502 unsigned int queue_nr)
503{
504 struct arm_spe_queue *speq = queue->priv;
505 struct arm_spe_record *record;
506
507 if (list_empty(&queue->head) || speq)
508 return 0;
509
510 speq = arm_spe__alloc_queue(spe, queue_nr);
511
512 if (!speq)
513 return -ENOMEM;
514
515 queue->priv = speq;
516
517 if (queue->cpu != -1)
518 speq->cpu = queue->cpu;
519
520 if (!speq->on_heap) {
521 int ret;
522
523 if (spe->timeless_decoding)
524 return 0;
525
526retry:
527 ret = arm_spe_decode(speq->decoder);
528
529 if (!ret)
530 return 0;
531
532 if (ret < 0)
533 goto retry;
534
535 record = &speq->decoder->record;
536
537 speq->timestamp = record->timestamp;
538 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
539 if (ret)
540 return ret;
541 speq->on_heap = true;
542 }
543
544 return 0;
545}
546
547static int arm_spe__setup_queues(struct arm_spe *spe)
548{
549 unsigned int i;
550 int ret;
551
552 for (i = 0; i < spe->queues.nr_queues; i++) {
553 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
554 if (ret)
555 return ret;
556 }
557
558 return 0;
559}
560
561static int arm_spe__update_queues(struct arm_spe *spe)
562{
563 if (spe->queues.new_data) {
564 spe->queues.new_data = false;
565 return arm_spe__setup_queues(spe);
566 }
567
568 return 0;
569}
570
571static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
572{
573 struct evsel *evsel;
574 struct evlist *evlist = spe->session->evlist;
575 bool timeless_decoding = true;
576
577 /*
578 * Circle through the list of event and complain if we find one
579 * with the time bit set.
580 */
581 evlist__for_each_entry(evlist, evsel) {
582 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
583 timeless_decoding = false;
584 }
585
586 return timeless_decoding;
587}
588
589static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
590 struct auxtrace_queue *queue)
591{
592 struct arm_spe_queue *speq = queue->priv;
593 pid_t tid;
594
595 tid = machine__get_current_tid(spe->machine, speq->cpu);
596 if (tid != -1) {
597 speq->tid = tid;
598 thread__zput(speq->thread);
599 } else
600 speq->tid = queue->tid;
601
602 if ((!speq->thread) && (speq->tid != -1)) {
603 speq->thread = machine__find_thread(spe->machine, -1,
604 speq->tid);
605 }
606
607 if (speq->thread) {
608 speq->pid = speq->thread->pid_;
609 if (queue->cpu == -1)
610 speq->cpu = speq->thread->cpu;
611 }
612}
613
614static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
615{
616 unsigned int queue_nr;
617 u64 ts;
618 int ret;
619
620 while (1) {
621 struct auxtrace_queue *queue;
622 struct arm_spe_queue *speq;
623
624 if (!spe->heap.heap_cnt)
625 return 0;
626
627 if (spe->heap.heap_array[0].ordinal >= timestamp)
628 return 0;
629
630 queue_nr = spe->heap.heap_array[0].queue_nr;
631 queue = &spe->queues.queue_array[queue_nr];
632 speq = queue->priv;
633
634 auxtrace_heap__pop(&spe->heap);
635
636 if (spe->heap.heap_cnt) {
637 ts = spe->heap.heap_array[0].ordinal + 1;
638 if (ts > timestamp)
639 ts = timestamp;
640 } else {
641 ts = timestamp;
642 }
643
644 arm_spe_set_pid_tid_cpu(spe, queue);
645
646 ret = arm_spe_run_decoder(speq, &ts);
647 if (ret < 0) {
648 auxtrace_heap__add(&spe->heap, queue_nr, ts);
649 return ret;
650 }
651
652 if (!ret) {
653 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
654 if (ret < 0)
655 return ret;
656 } else {
657 speq->on_heap = false;
658 }
659 }
660
661 return 0;
662}
663
664static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
665 u64 time_)
666{
667 struct auxtrace_queues *queues = &spe->queues;
668 unsigned int i;
669 u64 ts = 0;
670
671 for (i = 0; i < queues->nr_queues; i++) {
672 struct auxtrace_queue *queue = &spe->queues.queue_array[i];
673 struct arm_spe_queue *speq = queue->priv;
674
675 if (speq && (tid == -1 || speq->tid == tid)) {
676 speq->time = time_;
677 arm_spe_set_pid_tid_cpu(spe, queue);
678 arm_spe_run_decoder(speq, &ts);
679 }
680 }
681 return 0;
682}
683
684static int arm_spe_process_event(struct perf_session *session,
685 union perf_event *event,
686 struct perf_sample *sample,
687 struct perf_tool *tool)
688{
689 int err = 0;
690 u64 timestamp;
691 struct arm_spe *spe = container_of(session->auxtrace,
692 struct arm_spe, auxtrace);
693
694 if (dump_trace)
695 return 0;
696
697 if (!tool->ordered_events) {
698 pr_err("SPE trace requires ordered events\n");
699 return -EINVAL;
700 }
701
702 if (sample->time && (sample->time != (u64) -1))
703 timestamp = perf_time_to_tsc(sample->time, &spe->tc);
704 else
705 timestamp = 0;
706
707 if (timestamp || spe->timeless_decoding) {
708 err = arm_spe__update_queues(spe);
709 if (err)
710 return err;
711 }
712
713 if (spe->timeless_decoding) {
714 if (event->header.type == PERF_RECORD_EXIT) {
715 err = arm_spe_process_timeless_queues(spe,
716 event->fork.tid,
717 sample->time);
718 }
719 } else if (timestamp) {
720 err = arm_spe_process_queues(spe, timestamp);
721 }
722
723 return err;
724}
725
726static int arm_spe_process_auxtrace_event(struct perf_session *session,
727 union perf_event *event,
728 struct perf_tool *tool __maybe_unused)
729{
730 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
731 auxtrace);
732
733 if (!spe->data_queued) {
734 struct auxtrace_buffer *buffer;
735 off_t data_offset;
736 int fd = perf_data__fd(session->data);
737 int err;
738
739 if (perf_data__is_pipe(session->data)) {
740 data_offset = 0;
741 } else {
742 data_offset = lseek(fd, 0, SEEK_CUR);
743 if (data_offset == -1)
744 return -errno;
745 }
746
747 err = auxtrace_queues__add_event(&spe->queues, session, event,
748 data_offset, &buffer);
749 if (err)
750 return err;
751
752 /* Dump here now we have copied a piped trace out of the pipe */
753 if (dump_trace) {
754 if (auxtrace_buffer__get_data(buffer, fd)) {
755 arm_spe_dump_event(spe, buffer->data,
756 buffer->size);
757 auxtrace_buffer__put_data(buffer);
758 }
759 }
760 }
761
762 return 0;
763}
764
765static int arm_spe_flush(struct perf_session *session __maybe_unused,
766 struct perf_tool *tool __maybe_unused)
767{
768 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
769 auxtrace);
770 int ret;
771
772 if (dump_trace)
773 return 0;
774
775 if (!tool->ordered_events)
776 return -EINVAL;
777
778 ret = arm_spe__update_queues(spe);
779 if (ret < 0)
780 return ret;
781
782 if (spe->timeless_decoding)
783 return arm_spe_process_timeless_queues(spe, -1,
784 MAX_TIMESTAMP - 1);
785
786 return arm_spe_process_queues(spe, MAX_TIMESTAMP);
787}
788
789static void arm_spe_free_queue(void *priv)
790{
791 struct arm_spe_queue *speq = priv;
792
793 if (!speq)
794 return;
795 thread__zput(speq->thread);
796 arm_spe_decoder_free(speq->decoder);
797 zfree(&speq->event_buf);
798 free(speq);
799}
800
801static void arm_spe_free_events(struct perf_session *session)
802{
803 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
804 auxtrace);
805 struct auxtrace_queues *queues = &spe->queues;
806 unsigned int i;
807
808 for (i = 0; i < queues->nr_queues; i++) {
809 arm_spe_free_queue(queues->queue_array[i].priv);
810 queues->queue_array[i].priv = NULL;
811 }
812 auxtrace_queues__free(queues);
813}
814
815static void arm_spe_free(struct perf_session *session)
816{
817 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
818 auxtrace);
819
820 auxtrace_heap__free(&spe->heap);
821 arm_spe_free_events(session);
822 session->auxtrace = NULL;
823 free(spe);
824}
825
826static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
827 struct evsel *evsel)
828{
829 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
830
831 return evsel->core.attr.type == spe->pmu_type;
832}
833
834static const char * const arm_spe_info_fmts[] = {
835 [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n",
836};
837
838static void arm_spe_print_info(__u64 *arr)
839{
840 if (!dump_trace)
841 return;
842
843 fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
844}
845
846struct arm_spe_synth {
847 struct perf_tool dummy_tool;
848 struct perf_session *session;
849};
850
851static int arm_spe_event_synth(struct perf_tool *tool,
852 union perf_event *event,
853 struct perf_sample *sample __maybe_unused,
854 struct machine *machine __maybe_unused)
855{
856 struct arm_spe_synth *arm_spe_synth =
857 container_of(tool, struct arm_spe_synth, dummy_tool);
858
859 return perf_session__deliver_synth_event(arm_spe_synth->session,
860 event, NULL);
861}
862
863static int arm_spe_synth_event(struct perf_session *session,
864 struct perf_event_attr *attr, u64 id)
865{
866 struct arm_spe_synth arm_spe_synth;
867
868 memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
869 arm_spe_synth.session = session;
870
871 return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
872 &id, arm_spe_event_synth);
873}
874
875static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
876 const char *name)
877{
878 struct evsel *evsel;
879
880 evlist__for_each_entry(evlist, evsel) {
881 if (evsel->core.id && evsel->core.id[0] == id) {
882 if (evsel->name)
883 zfree(&evsel->name);
884 evsel->name = strdup(name);
885 break;
886 }
887 }
888}
889
890static int
891arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
892{
893 struct evlist *evlist = session->evlist;
894 struct evsel *evsel;
895 struct perf_event_attr attr;
896 bool found = false;
897 u64 id;
898 int err;
899
900 evlist__for_each_entry(evlist, evsel) {
901 if (evsel->core.attr.type == spe->pmu_type) {
902 found = true;
903 break;
904 }
905 }
906
907 if (!found) {
908 pr_debug("No selected events with SPE trace data\n");
909 return 0;
910 }
911
912 memset(&attr, 0, sizeof(struct perf_event_attr));
913 attr.size = sizeof(struct perf_event_attr);
914 attr.type = PERF_TYPE_HARDWARE;
915 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
916 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
917 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
918 if (spe->timeless_decoding)
919 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
920 else
921 attr.sample_type |= PERF_SAMPLE_TIME;
922
923 attr.exclude_user = evsel->core.attr.exclude_user;
924 attr.exclude_kernel = evsel->core.attr.exclude_kernel;
925 attr.exclude_hv = evsel->core.attr.exclude_hv;
926 attr.exclude_host = evsel->core.attr.exclude_host;
927 attr.exclude_guest = evsel->core.attr.exclude_guest;
928 attr.sample_id_all = evsel->core.attr.sample_id_all;
929 attr.read_format = evsel->core.attr.read_format;
930
931 /* create new id val to be a fixed offset from evsel id */
932 id = evsel->core.id[0] + 1000000000;
933
934 if (!id)
935 id = 1;
936
937 if (spe->synth_opts.flc) {
938 spe->sample_flc = true;
939
940 /* Level 1 data cache miss */
941 err = arm_spe_synth_event(session, &attr, id);
942 if (err)
943 return err;
944 spe->l1d_miss_id = id;
945 arm_spe_set_event_name(evlist, id, "l1d-miss");
946 id += 1;
947
948 /* Level 1 data cache access */
949 err = arm_spe_synth_event(session, &attr, id);
950 if (err)
951 return err;
952 spe->l1d_access_id = id;
953 arm_spe_set_event_name(evlist, id, "l1d-access");
954 id += 1;
955 }
956
957 if (spe->synth_opts.llc) {
958 spe->sample_llc = true;
959
960 /* Last level cache miss */
961 err = arm_spe_synth_event(session, &attr, id);
962 if (err)
963 return err;
964 spe->llc_miss_id = id;
965 arm_spe_set_event_name(evlist, id, "llc-miss");
966 id += 1;
967
968 /* Last level cache access */
969 err = arm_spe_synth_event(session, &attr, id);
970 if (err)
971 return err;
972 spe->llc_access_id = id;
973 arm_spe_set_event_name(evlist, id, "llc-access");
974 id += 1;
975 }
976
977 if (spe->synth_opts.tlb) {
978 spe->sample_tlb = true;
979
980 /* TLB miss */
981 err = arm_spe_synth_event(session, &attr, id);
982 if (err)
983 return err;
984 spe->tlb_miss_id = id;
985 arm_spe_set_event_name(evlist, id, "tlb-miss");
986 id += 1;
987
988 /* TLB access */
989 err = arm_spe_synth_event(session, &attr, id);
990 if (err)
991 return err;
992 spe->tlb_access_id = id;
993 arm_spe_set_event_name(evlist, id, "tlb-access");
994 id += 1;
995 }
996
997 if (spe->synth_opts.branches) {
998 spe->sample_branch = true;
999
1000 /* Branch miss */
1001 err = arm_spe_synth_event(session, &attr, id);
1002 if (err)
1003 return err;
1004 spe->branch_miss_id = id;
1005 arm_spe_set_event_name(evlist, id, "branch-miss");
1006 id += 1;
1007 }
1008
1009 if (spe->synth_opts.remote_access) {
1010 spe->sample_remote_access = true;
1011
1012 /* Remote access */
1013 err = arm_spe_synth_event(session, &attr, id);
1014 if (err)
1015 return err;
1016 spe->remote_access_id = id;
1017 arm_spe_set_event_name(evlist, id, "remote-access");
1018 id += 1;
1019 }
1020
1021 if (spe->synth_opts.mem) {
1022 spe->sample_memory = true;
1023
1024 err = arm_spe_synth_event(session, &attr, id);
1025 if (err)
1026 return err;
1027 spe->memory_id = id;
1028 arm_spe_set_event_name(evlist, id, "memory");
1029 }
1030
1031 return 0;
1032}
1033
1034int arm_spe_process_auxtrace_info(union perf_event *event,
1035 struct perf_session *session)
1036{
1037 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1038 size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1039 struct perf_record_time_conv *tc = &session->time_conv;
1040 struct arm_spe *spe;
1041 int err;
1042
1043 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1044 min_sz)
1045 return -EINVAL;
1046
1047 spe = zalloc(sizeof(struct arm_spe));
1048 if (!spe)
1049 return -ENOMEM;
1050
1051 err = auxtrace_queues__init(&spe->queues);
1052 if (err)
1053 goto err_free;
1054
1055 spe->session = session;
1056 spe->machine = &session->machines.host; /* No kvm support */
1057 spe->auxtrace_type = auxtrace_info->type;
1058 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1059
1060 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1061
1062 /*
1063 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1064 * and the parameters for hardware clock are stored in the session
1065 * context. Passes these parameters to the struct perf_tsc_conversion
1066 * in "spe->tc", which is used for later conversion between clock
1067 * counter and timestamp.
1068 *
1069 * For backward compatibility, copies the fields starting from
1070 * "time_cycles" only if they are contained in the event.
1071 */
1072 spe->tc.time_shift = tc->time_shift;
1073 spe->tc.time_mult = tc->time_mult;
1074 spe->tc.time_zero = tc->time_zero;
1075
1076 if (event_contains(*tc, time_cycles)) {
1077 spe->tc.time_cycles = tc->time_cycles;
1078 spe->tc.time_mask = tc->time_mask;
1079 spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1080 spe->tc.cap_user_time_short = tc->cap_user_time_short;
1081 }
1082
1083 spe->auxtrace.process_event = arm_spe_process_event;
1084 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1085 spe->auxtrace.flush_events = arm_spe_flush;
1086 spe->auxtrace.free_events = arm_spe_free_events;
1087 spe->auxtrace.free = arm_spe_free;
1088 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1089 session->auxtrace = &spe->auxtrace;
1090
1091 arm_spe_print_info(&auxtrace_info->priv[0]);
1092
1093 if (dump_trace)
1094 return 0;
1095
1096 if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1097 spe->synth_opts = *session->itrace_synth_opts;
1098 else
1099 itrace_synth_opts__set_default(&spe->synth_opts, false);
1100
1101 err = arm_spe_synth_events(spe, session);
1102 if (err)
1103 goto err_free_queues;
1104
1105 err = auxtrace_queues__process_index(&spe->queues, session);
1106 if (err)
1107 goto err_free_queues;
1108
1109 if (spe->queues.populated)
1110 spe->data_queued = true;
1111
1112 return 0;
1113
1114err_free_queues:
1115 auxtrace_queues__free(&spe->queues);
1116 session->auxtrace = NULL;
1117err_free:
1118 free(spe);
1119 return err;
1120}