Loading...
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright(C) 2015-2018 Linaro Limited.
4 *
5 * Author: Tor Jeremiassen <tor@ti.com>
6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7 */
8
9#include <linux/kernel.h>
10#include <linux/bitfield.h>
11#include <linux/bitops.h>
12#include <linux/coresight-pmu.h>
13#include <linux/err.h>
14#include <linux/log2.h>
15#include <linux/types.h>
16#include <linux/zalloc.h>
17
18#include <stdlib.h>
19
20#include "auxtrace.h"
21#include "color.h"
22#include "cs-etm.h"
23#include "cs-etm-decoder/cs-etm-decoder.h"
24#include "debug.h"
25#include "dso.h"
26#include "evlist.h"
27#include "intlist.h"
28#include "machine.h"
29#include "map.h"
30#include "perf.h"
31#include "session.h"
32#include "map_symbol.h"
33#include "branch.h"
34#include "symbol.h"
35#include "tool.h"
36#include "thread.h"
37#include "thread-stack.h"
38#include "tsc.h"
39#include <tools/libc_compat.h>
40#include "util/synthetic-events.h"
41#include "util/util.h"
42
43struct cs_etm_auxtrace {
44 struct auxtrace auxtrace;
45 struct auxtrace_queues queues;
46 struct auxtrace_heap heap;
47 struct itrace_synth_opts synth_opts;
48 struct perf_session *session;
49 struct perf_tsc_conversion tc;
50
51 /*
52 * Timeless has no timestamps in the trace so overlapping mmap lookups
53 * are less accurate but produces smaller trace data. We use context IDs
54 * in the trace instead of matching timestamps with fork records so
55 * they're not really needed in the general case. Overlapping mmaps
56 * happen in cases like between a fork and an exec.
57 */
58 bool timeless_decoding;
59
60 /*
61 * Per-thread ignores the trace channel ID and instead assumes that
62 * everything in a buffer comes from the same process regardless of
63 * which CPU it ran on. It also implies no context IDs so the TID is
64 * taken from the auxtrace buffer.
65 */
66 bool per_thread_decoding;
67 bool snapshot_mode;
68 bool data_queued;
69 bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70
71 int num_cpu;
72 u64 latest_kernel_timestamp;
73 u32 auxtrace_type;
74 u64 branches_sample_type;
75 u64 branches_id;
76 u64 instructions_sample_type;
77 u64 instructions_sample_period;
78 u64 instructions_id;
79 u64 **metadata;
80 unsigned int pmu_type;
81 enum cs_etm_pid_fmt pid_fmt;
82};
83
84struct cs_etm_traceid_queue {
85 u8 trace_chan_id;
86 u64 period_instructions;
87 size_t last_branch_pos;
88 union perf_event *event_buf;
89 struct thread *thread;
90 struct thread *prev_packet_thread;
91 ocsd_ex_level prev_packet_el;
92 ocsd_ex_level el;
93 struct branch_stack *last_branch;
94 struct branch_stack *last_branch_rb;
95 struct cs_etm_packet *prev_packet;
96 struct cs_etm_packet *packet;
97 struct cs_etm_packet_queue packet_queue;
98};
99
100struct cs_etm_queue {
101 struct cs_etm_auxtrace *etm;
102 struct cs_etm_decoder *decoder;
103 struct auxtrace_buffer *buffer;
104 unsigned int queue_nr;
105 u8 pending_timestamp_chan_id;
106 u64 offset;
107 const unsigned char *buf;
108 size_t buf_len, buf_used;
109 /* Conversion between traceID and index in traceid_queues array */
110 struct intlist *traceid_queues_list;
111 struct cs_etm_traceid_queue **traceid_queues;
112};
113
114/* RB tree for quick conversion between traceID and metadata pointers */
115static struct intlist *traceid_list;
116
117static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
118static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
119 pid_t tid);
120static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
121static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
122
123/* PTMs ETMIDR [11:8] set to b0011 */
124#define ETMIDR_PTM_VERSION 0x00000300
125
126/*
127 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
128 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply
129 * encode the etm queue number as the upper 16 bit and the channel as
130 * the lower 16 bit.
131 */
132#define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \
133 (queue_nr << 16 | trace_chan_id)
134#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
135#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
136
137static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
138{
139 etmidr &= ETMIDR_PTM_VERSION;
140
141 if (etmidr == ETMIDR_PTM_VERSION)
142 return CS_ETM_PROTO_PTM;
143
144 return CS_ETM_PROTO_ETMV3;
145}
146
147static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
148{
149 struct int_node *inode;
150 u64 *metadata;
151
152 inode = intlist__find(traceid_list, trace_chan_id);
153 if (!inode)
154 return -EINVAL;
155
156 metadata = inode->priv;
157 *magic = metadata[CS_ETM_MAGIC];
158 return 0;
159}
160
161int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
162{
163 struct int_node *inode;
164 u64 *metadata;
165
166 inode = intlist__find(traceid_list, trace_chan_id);
167 if (!inode)
168 return -EINVAL;
169
170 metadata = inode->priv;
171 *cpu = (int)metadata[CS_ETM_CPU];
172 return 0;
173}
174
175/*
176 * The returned PID format is presented as an enum:
177 *
178 * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
179 * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
180 * CS_ETM_PIDFMT_NONE: No context IDs
181 *
182 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
183 * are enabled at the same time when the session runs on an EL2 kernel.
184 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
185 * recorded in the trace data, the tool will selectively use
186 * CONTEXTIDR_EL2 as PID.
187 *
188 * The result is cached in etm->pid_fmt so this function only needs to be called
189 * when processing the aux info.
190 */
191static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
192{
193 u64 val;
194
195 if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
196 val = metadata[CS_ETM_ETMCR];
197 /* CONTEXTIDR is traced */
198 if (val & BIT(ETM_OPT_CTXTID))
199 return CS_ETM_PIDFMT_CTXTID;
200 } else {
201 val = metadata[CS_ETMV4_TRCCONFIGR];
202 /* CONTEXTIDR_EL2 is traced */
203 if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
204 return CS_ETM_PIDFMT_CTXTID2;
205 /* CONTEXTIDR_EL1 is traced */
206 else if (val & BIT(ETM4_CFG_BIT_CTXTID))
207 return CS_ETM_PIDFMT_CTXTID;
208 }
209
210 return CS_ETM_PIDFMT_NONE;
211}
212
213enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
214{
215 return etmq->etm->pid_fmt;
216}
217
218static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
219{
220 struct int_node *inode;
221
222 /* Get an RB node for this CPU */
223 inode = intlist__findnew(traceid_list, trace_chan_id);
224
225 /* Something went wrong, no need to continue */
226 if (!inode)
227 return -ENOMEM;
228
229 /*
230 * The node for that CPU should not be taken.
231 * Back out if that's the case.
232 */
233 if (inode->priv)
234 return -EINVAL;
235
236 /* All good, associate the traceID with the metadata pointer */
237 inode->priv = cpu_metadata;
238
239 return 0;
240}
241
242static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
243{
244 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
245
246 switch (cs_etm_magic) {
247 case __perf_cs_etmv3_magic:
248 *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
249 CORESIGHT_TRACE_ID_VAL_MASK);
250 break;
251 case __perf_cs_etmv4_magic:
252 case __perf_cs_ete_magic:
253 *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
254 CORESIGHT_TRACE_ID_VAL_MASK);
255 break;
256 default:
257 return -EINVAL;
258 }
259 return 0;
260}
261
262/*
263 * update metadata trace ID from the value found in the AUX_HW_INFO packet.
264 * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
265 */
266static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
267{
268 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
269
270 switch (cs_etm_magic) {
271 case __perf_cs_etmv3_magic:
272 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
273 break;
274 case __perf_cs_etmv4_magic:
275 case __perf_cs_ete_magic:
276 cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
277 break;
278
279 default:
280 return -EINVAL;
281 }
282 return 0;
283}
284
285/*
286 * Get a metadata index for a specific cpu from an array.
287 *
288 */
289static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
290{
291 int i;
292
293 for (i = 0; i < etm->num_cpu; i++) {
294 if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
295 return i;
296 }
297 }
298
299 return -1;
300}
301
302/*
303 * Get a metadata for a specific cpu from an array.
304 *
305 */
306static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
307{
308 int idx = get_cpu_data_idx(etm, cpu);
309
310 return (idx != -1) ? etm->metadata[idx] : NULL;
311}
312
313/*
314 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
315 *
316 * The payload associates the Trace ID and the CPU.
317 * The routine is tolerant of seeing multiple packets with the same association,
318 * but a CPU / Trace ID association changing during a session is an error.
319 */
320static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
321 union perf_event *event)
322{
323 struct cs_etm_auxtrace *etm;
324 struct perf_sample sample;
325 struct int_node *inode;
326 struct evsel *evsel;
327 u64 *cpu_data;
328 u64 hw_id;
329 int cpu, version, err;
330 u8 trace_chan_id, curr_chan_id;
331
332 /* extract and parse the HW ID */
333 hw_id = event->aux_output_hw_id.hw_id;
334 version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
335 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
336
337 /* check that we can handle this version */
338 if (version > CS_AUX_HW_ID_CURR_VERSION)
339 return -EINVAL;
340
341 /* get access to the etm metadata */
342 etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
343 if (!etm || !etm->metadata)
344 return -EINVAL;
345
346 /* parse the sample to get the CPU */
347 evsel = evlist__event2evsel(session->evlist, event);
348 if (!evsel)
349 return -EINVAL;
350 err = evsel__parse_sample(evsel, event, &sample);
351 if (err)
352 return err;
353 cpu = sample.cpu;
354 if (cpu == -1) {
355 /* no CPU in the sample - possibly recorded with an old version of perf */
356 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
357 return -EINVAL;
358 }
359
360 /* See if the ID is mapped to a CPU, and it matches the current CPU */
361 inode = intlist__find(traceid_list, trace_chan_id);
362 if (inode) {
363 cpu_data = inode->priv;
364 if ((int)cpu_data[CS_ETM_CPU] != cpu) {
365 pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
366 return -EINVAL;
367 }
368
369 /* check that the mapped ID matches */
370 err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
371 if (err)
372 return err;
373 if (curr_chan_id != trace_chan_id) {
374 pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
375 return -EINVAL;
376 }
377
378 /* mapped and matched - return OK */
379 return 0;
380 }
381
382 cpu_data = get_cpu_data(etm, cpu);
383 if (cpu_data == NULL)
384 return err;
385
386 /* not one we've seen before - lets map it */
387 err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
388 if (err)
389 return err;
390
391 /*
392 * if we are picking up the association from the packet, need to plug
393 * the correct trace ID into the metadata for setting up decoders later.
394 */
395 err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
396 return err;
397}
398
399void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
400 u8 trace_chan_id)
401{
402 /*
403 * When a timestamp packet is encountered the backend code
404 * is stopped so that the front end has time to process packets
405 * that were accumulated in the traceID queue. Since there can
406 * be more than one channel per cs_etm_queue, we need to specify
407 * what traceID queue needs servicing.
408 */
409 etmq->pending_timestamp_chan_id = trace_chan_id;
410}
411
412static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
413 u8 *trace_chan_id)
414{
415 struct cs_etm_packet_queue *packet_queue;
416
417 if (!etmq->pending_timestamp_chan_id)
418 return 0;
419
420 if (trace_chan_id)
421 *trace_chan_id = etmq->pending_timestamp_chan_id;
422
423 packet_queue = cs_etm__etmq_get_packet_queue(etmq,
424 etmq->pending_timestamp_chan_id);
425 if (!packet_queue)
426 return 0;
427
428 /* Acknowledge pending status */
429 etmq->pending_timestamp_chan_id = 0;
430
431 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
432 return packet_queue->cs_timestamp;
433}
434
435static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
436{
437 int i;
438
439 queue->head = 0;
440 queue->tail = 0;
441 queue->packet_count = 0;
442 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
443 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
444 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
445 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
446 queue->packet_buffer[i].instr_count = 0;
447 queue->packet_buffer[i].last_instr_taken_branch = false;
448 queue->packet_buffer[i].last_instr_size = 0;
449 queue->packet_buffer[i].last_instr_type = 0;
450 queue->packet_buffer[i].last_instr_subtype = 0;
451 queue->packet_buffer[i].last_instr_cond = 0;
452 queue->packet_buffer[i].flags = 0;
453 queue->packet_buffer[i].exception_number = UINT32_MAX;
454 queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
455 queue->packet_buffer[i].cpu = INT_MIN;
456 }
457}
458
459static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
460{
461 int idx;
462 struct int_node *inode;
463 struct cs_etm_traceid_queue *tidq;
464 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
465
466 intlist__for_each_entry(inode, traceid_queues_list) {
467 idx = (int)(intptr_t)inode->priv;
468 tidq = etmq->traceid_queues[idx];
469 cs_etm__clear_packet_queue(&tidq->packet_queue);
470 }
471}
472
473static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
474 struct cs_etm_traceid_queue *tidq,
475 u8 trace_chan_id)
476{
477 int rc = -ENOMEM;
478 struct auxtrace_queue *queue;
479 struct cs_etm_auxtrace *etm = etmq->etm;
480
481 cs_etm__clear_packet_queue(&tidq->packet_queue);
482
483 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
484 tidq->trace_chan_id = trace_chan_id;
485 tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
486 tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
487 queue->tid);
488 tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
489
490 tidq->packet = zalloc(sizeof(struct cs_etm_packet));
491 if (!tidq->packet)
492 goto out;
493
494 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
495 if (!tidq->prev_packet)
496 goto out_free;
497
498 if (etm->synth_opts.last_branch) {
499 size_t sz = sizeof(struct branch_stack);
500
501 sz += etm->synth_opts.last_branch_sz *
502 sizeof(struct branch_entry);
503 tidq->last_branch = zalloc(sz);
504 if (!tidq->last_branch)
505 goto out_free;
506 tidq->last_branch_rb = zalloc(sz);
507 if (!tidq->last_branch_rb)
508 goto out_free;
509 }
510
511 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
512 if (!tidq->event_buf)
513 goto out_free;
514
515 return 0;
516
517out_free:
518 zfree(&tidq->last_branch_rb);
519 zfree(&tidq->last_branch);
520 zfree(&tidq->prev_packet);
521 zfree(&tidq->packet);
522out:
523 return rc;
524}
525
526static struct cs_etm_traceid_queue
527*cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
528{
529 int idx;
530 struct int_node *inode;
531 struct intlist *traceid_queues_list;
532 struct cs_etm_traceid_queue *tidq, **traceid_queues;
533 struct cs_etm_auxtrace *etm = etmq->etm;
534
535 if (etm->per_thread_decoding)
536 trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
537
538 traceid_queues_list = etmq->traceid_queues_list;
539
540 /*
541 * Check if the traceid_queue exist for this traceID by looking
542 * in the queue list.
543 */
544 inode = intlist__find(traceid_queues_list, trace_chan_id);
545 if (inode) {
546 idx = (int)(intptr_t)inode->priv;
547 return etmq->traceid_queues[idx];
548 }
549
550 /* We couldn't find a traceid_queue for this traceID, allocate one */
551 tidq = malloc(sizeof(*tidq));
552 if (!tidq)
553 return NULL;
554
555 memset(tidq, 0, sizeof(*tidq));
556
557 /* Get a valid index for the new traceid_queue */
558 idx = intlist__nr_entries(traceid_queues_list);
559 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
560 inode = intlist__findnew(traceid_queues_list, trace_chan_id);
561 if (!inode)
562 goto out_free;
563
564 /* Associate this traceID with this index */
565 inode->priv = (void *)(intptr_t)idx;
566
567 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
568 goto out_free;
569
570 /* Grow the traceid_queues array by one unit */
571 traceid_queues = etmq->traceid_queues;
572 traceid_queues = reallocarray(traceid_queues,
573 idx + 1,
574 sizeof(*traceid_queues));
575
576 /*
577 * On failure reallocarray() returns NULL and the original block of
578 * memory is left untouched.
579 */
580 if (!traceid_queues)
581 goto out_free;
582
583 traceid_queues[idx] = tidq;
584 etmq->traceid_queues = traceid_queues;
585
586 return etmq->traceid_queues[idx];
587
588out_free:
589 /*
590 * Function intlist__remove() removes the inode from the list
591 * and delete the memory associated to it.
592 */
593 intlist__remove(traceid_queues_list, inode);
594 free(tidq);
595
596 return NULL;
597}
598
599struct cs_etm_packet_queue
600*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
601{
602 struct cs_etm_traceid_queue *tidq;
603
604 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
605 if (tidq)
606 return &tidq->packet_queue;
607
608 return NULL;
609}
610
611static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
612 struct cs_etm_traceid_queue *tidq)
613{
614 struct cs_etm_packet *tmp;
615
616 if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
617 etm->synth_opts.instructions) {
618 /*
619 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
620 * the next incoming packet.
621 *
622 * Threads and exception levels are also tracked for both the
623 * previous and current packets. This is because the previous
624 * packet is used for the 'from' IP for branch samples, so the
625 * thread at that time must also be assigned to that sample.
626 * Across discontinuity packets the thread can change, so by
627 * tracking the thread for the previous packet the branch sample
628 * will have the correct info.
629 */
630 tmp = tidq->packet;
631 tidq->packet = tidq->prev_packet;
632 tidq->prev_packet = tmp;
633 tidq->prev_packet_el = tidq->el;
634 thread__put(tidq->prev_packet_thread);
635 tidq->prev_packet_thread = thread__get(tidq->thread);
636 }
637}
638
639static void cs_etm__packet_dump(const char *pkt_string)
640{
641 const char *color = PERF_COLOR_BLUE;
642 int len = strlen(pkt_string);
643
644 if (len && (pkt_string[len-1] == '\n'))
645 color_fprintf(stdout, color, " %s", pkt_string);
646 else
647 color_fprintf(stdout, color, " %s\n", pkt_string);
648
649 fflush(stdout);
650}
651
652static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
653 struct cs_etm_auxtrace *etm, int t_idx,
654 int m_idx, u32 etmidr)
655{
656 u64 **metadata = etm->metadata;
657
658 t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
659 t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR];
660 t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR];
661}
662
663static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
664 struct cs_etm_auxtrace *etm, int t_idx,
665 int m_idx)
666{
667 u64 **metadata = etm->metadata;
668
669 t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i;
670 t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0];
671 t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1];
672 t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2];
673 t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8];
674 t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR];
675 t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR];
676}
677
678static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
679 struct cs_etm_auxtrace *etm, int t_idx,
680 int m_idx)
681{
682 u64 **metadata = etm->metadata;
683
684 t_params[t_idx].protocol = CS_ETM_PROTO_ETE;
685 t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0];
686 t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1];
687 t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2];
688 t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8];
689 t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR];
690 t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR];
691 t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH];
692}
693
694static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
695 struct cs_etm_auxtrace *etm,
696 bool formatted,
697 int sample_cpu,
698 int decoders)
699{
700 int t_idx, m_idx;
701 u32 etmidr;
702 u64 architecture;
703
704 for (t_idx = 0; t_idx < decoders; t_idx++) {
705 if (formatted)
706 m_idx = t_idx;
707 else {
708 m_idx = get_cpu_data_idx(etm, sample_cpu);
709 if (m_idx == -1) {
710 pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n");
711 m_idx = 0;
712 }
713 }
714
715 architecture = etm->metadata[m_idx][CS_ETM_MAGIC];
716
717 switch (architecture) {
718 case __perf_cs_etmv3_magic:
719 etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR];
720 cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr);
721 break;
722 case __perf_cs_etmv4_magic:
723 cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx);
724 break;
725 case __perf_cs_ete_magic:
726 cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx);
727 break;
728 default:
729 return -EINVAL;
730 }
731 }
732
733 return 0;
734}
735
736static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
737 struct cs_etm_queue *etmq,
738 enum cs_etm_decoder_operation mode,
739 bool formatted)
740{
741 int ret = -EINVAL;
742
743 if (!(mode < CS_ETM_OPERATION_MAX))
744 goto out;
745
746 d_params->packet_printer = cs_etm__packet_dump;
747 d_params->operation = mode;
748 d_params->data = etmq;
749 d_params->formatted = formatted;
750 d_params->fsyncs = false;
751 d_params->hsyncs = false;
752 d_params->frame_aligned = true;
753
754 ret = 0;
755out:
756 return ret;
757}
758
759static void cs_etm__dump_event(struct cs_etm_queue *etmq,
760 struct auxtrace_buffer *buffer)
761{
762 int ret;
763 const char *color = PERF_COLOR_BLUE;
764 size_t buffer_used = 0;
765
766 fprintf(stdout, "\n");
767 color_fprintf(stdout, color,
768 ". ... CoreSight %s Trace data: size %#zx bytes\n",
769 cs_etm_decoder__get_name(etmq->decoder), buffer->size);
770
771 do {
772 size_t consumed;
773
774 ret = cs_etm_decoder__process_data_block(
775 etmq->decoder, buffer->offset,
776 &((u8 *)buffer->data)[buffer_used],
777 buffer->size - buffer_used, &consumed);
778 if (ret)
779 break;
780
781 buffer_used += consumed;
782 } while (buffer_used < buffer->size);
783
784 cs_etm_decoder__reset(etmq->decoder);
785}
786
787static int cs_etm__flush_events(struct perf_session *session,
788 struct perf_tool *tool)
789{
790 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
791 struct cs_etm_auxtrace,
792 auxtrace);
793 if (dump_trace)
794 return 0;
795
796 if (!tool->ordered_events)
797 return -EINVAL;
798
799 if (etm->timeless_decoding) {
800 /*
801 * Pass tid = -1 to process all queues. But likely they will have
802 * already been processed on PERF_RECORD_EXIT anyway.
803 */
804 return cs_etm__process_timeless_queues(etm, -1);
805 }
806
807 return cs_etm__process_timestamped_queues(etm);
808}
809
810static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
811{
812 int idx;
813 uintptr_t priv;
814 struct int_node *inode, *tmp;
815 struct cs_etm_traceid_queue *tidq;
816 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
817
818 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
819 priv = (uintptr_t)inode->priv;
820 idx = priv;
821
822 /* Free this traceid_queue from the array */
823 tidq = etmq->traceid_queues[idx];
824 thread__zput(tidq->thread);
825 thread__zput(tidq->prev_packet_thread);
826 zfree(&tidq->event_buf);
827 zfree(&tidq->last_branch);
828 zfree(&tidq->last_branch_rb);
829 zfree(&tidq->prev_packet);
830 zfree(&tidq->packet);
831 zfree(&tidq);
832
833 /*
834 * Function intlist__remove() removes the inode from the list
835 * and delete the memory associated to it.
836 */
837 intlist__remove(traceid_queues_list, inode);
838 }
839
840 /* Then the RB tree itself */
841 intlist__delete(traceid_queues_list);
842 etmq->traceid_queues_list = NULL;
843
844 /* finally free the traceid_queues array */
845 zfree(&etmq->traceid_queues);
846}
847
848static void cs_etm__free_queue(void *priv)
849{
850 struct cs_etm_queue *etmq = priv;
851
852 if (!etmq)
853 return;
854
855 cs_etm_decoder__free(etmq->decoder);
856 cs_etm__free_traceid_queues(etmq);
857 free(etmq);
858}
859
860static void cs_etm__free_events(struct perf_session *session)
861{
862 unsigned int i;
863 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
864 struct cs_etm_auxtrace,
865 auxtrace);
866 struct auxtrace_queues *queues = &aux->queues;
867
868 for (i = 0; i < queues->nr_queues; i++) {
869 cs_etm__free_queue(queues->queue_array[i].priv);
870 queues->queue_array[i].priv = NULL;
871 }
872
873 auxtrace_queues__free(queues);
874}
875
876static void cs_etm__free(struct perf_session *session)
877{
878 int i;
879 struct int_node *inode, *tmp;
880 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
881 struct cs_etm_auxtrace,
882 auxtrace);
883 cs_etm__free_events(session);
884 session->auxtrace = NULL;
885
886 /* First remove all traceID/metadata nodes for the RB tree */
887 intlist__for_each_entry_safe(inode, tmp, traceid_list)
888 intlist__remove(traceid_list, inode);
889 /* Then the RB tree itself */
890 intlist__delete(traceid_list);
891
892 for (i = 0; i < aux->num_cpu; i++)
893 zfree(&aux->metadata[i]);
894
895 zfree(&aux->metadata);
896 zfree(&aux);
897}
898
899static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
900 struct evsel *evsel)
901{
902 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
903 struct cs_etm_auxtrace,
904 auxtrace);
905
906 return evsel->core.attr.type == aux->pmu_type;
907}
908
909static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
910 ocsd_ex_level el)
911{
912 enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
913
914 /*
915 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
916 * running at EL1 assume everything is the host.
917 */
918 if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
919 return &etmq->etm->session->machines.host;
920
921 /*
922 * Not perfect, but otherwise assume anything in EL1 is the default
923 * guest, and everything else is the host. Distinguishing between guest
924 * and host userspaces isn't currently supported either. Neither is
925 * multiple guest support. All this does is reduce the likeliness of
926 * decode errors where we look into the host kernel maps when it should
927 * have been the guest maps.
928 */
929 switch (el) {
930 case ocsd_EL1:
931 return machines__find_guest(&etmq->etm->session->machines,
932 DEFAULT_GUEST_KERNEL_ID);
933 case ocsd_EL3:
934 case ocsd_EL2:
935 case ocsd_EL0:
936 case ocsd_EL_unknown:
937 default:
938 return &etmq->etm->session->machines.host;
939 }
940}
941
942static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
943 ocsd_ex_level el)
944{
945 struct machine *machine = cs_etm__get_machine(etmq, el);
946
947 if (address >= machine__kernel_start(machine)) {
948 if (machine__is_host(machine))
949 return PERF_RECORD_MISC_KERNEL;
950 else
951 return PERF_RECORD_MISC_GUEST_KERNEL;
952 } else {
953 if (machine__is_host(machine))
954 return PERF_RECORD_MISC_USER;
955 else {
956 /*
957 * Can't really happen at the moment because
958 * cs_etm__get_machine() will always return
959 * machines.host for any non EL1 trace.
960 */
961 return PERF_RECORD_MISC_GUEST_USER;
962 }
963 }
964}
965
966static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
967 u64 address, size_t size, u8 *buffer,
968 const ocsd_mem_space_acc_t mem_space)
969{
970 u8 cpumode;
971 u64 offset;
972 int len;
973 struct addr_location al;
974 struct dso *dso;
975 struct cs_etm_traceid_queue *tidq;
976 int ret = 0;
977
978 if (!etmq)
979 return 0;
980
981 addr_location__init(&al);
982 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
983 if (!tidq)
984 goto out;
985
986 /*
987 * We've already tracked EL along side the PID in cs_etm__set_thread()
988 * so double check that it matches what OpenCSD thinks as well. It
989 * doesn't distinguish between EL0 and EL1 for this mem access callback
990 * so we had to do the extra tracking. Skip validation if it's any of
991 * the 'any' values.
992 */
993 if (!(mem_space == OCSD_MEM_SPACE_ANY ||
994 mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
995 if (mem_space & OCSD_MEM_SPACE_EL1N) {
996 /* Includes both non secure EL1 and EL0 */
997 assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
998 } else if (mem_space & OCSD_MEM_SPACE_EL2)
999 assert(tidq->el == ocsd_EL2);
1000 else if (mem_space & OCSD_MEM_SPACE_EL3)
1001 assert(tidq->el == ocsd_EL3);
1002 }
1003
1004 cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1005
1006 if (!thread__find_map(tidq->thread, cpumode, address, &al))
1007 goto out;
1008
1009 dso = map__dso(al.map);
1010 if (!dso)
1011 goto out;
1012
1013 if (dso->data.status == DSO_DATA_STATUS_ERROR &&
1014 dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1015 goto out;
1016
1017 offset = map__map_ip(al.map, address);
1018
1019 map__load(al.map);
1020
1021 len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1022 offset, buffer, size);
1023
1024 if (len <= 0) {
1025 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1026 " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1027 if (!dso->auxtrace_warned) {
1028 pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1029 address,
1030 dso->long_name ? dso->long_name : "Unknown");
1031 dso->auxtrace_warned = true;
1032 }
1033 goto out;
1034 }
1035 ret = len;
1036out:
1037 addr_location__exit(&al);
1038 return ret;
1039}
1040
1041static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
1042 bool formatted, int sample_cpu)
1043{
1044 struct cs_etm_decoder_params d_params;
1045 struct cs_etm_trace_params *t_params = NULL;
1046 struct cs_etm_queue *etmq;
1047 /*
1048 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
1049 * needed.
1050 */
1051 int decoders = formatted ? etm->num_cpu : 1;
1052
1053 etmq = zalloc(sizeof(*etmq));
1054 if (!etmq)
1055 return NULL;
1056
1057 etmq->traceid_queues_list = intlist__new(NULL);
1058 if (!etmq->traceid_queues_list)
1059 goto out_free;
1060
1061 /* Use metadata to fill in trace parameters for trace decoder */
1062 t_params = zalloc(sizeof(*t_params) * decoders);
1063
1064 if (!t_params)
1065 goto out_free;
1066
1067 if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders))
1068 goto out_free;
1069
1070 /* Set decoder parameters to decode trace packets */
1071 if (cs_etm__init_decoder_params(&d_params, etmq,
1072 dump_trace ? CS_ETM_OPERATION_PRINT :
1073 CS_ETM_OPERATION_DECODE,
1074 formatted))
1075 goto out_free;
1076
1077 etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
1078 t_params);
1079
1080 if (!etmq->decoder)
1081 goto out_free;
1082
1083 /*
1084 * Register a function to handle all memory accesses required by
1085 * the trace decoder library.
1086 */
1087 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
1088 0x0L, ((u64) -1L),
1089 cs_etm__mem_access))
1090 goto out_free_decoder;
1091
1092 zfree(&t_params);
1093 return etmq;
1094
1095out_free_decoder:
1096 cs_etm_decoder__free(etmq->decoder);
1097out_free:
1098 intlist__delete(etmq->traceid_queues_list);
1099 free(etmq);
1100
1101 return NULL;
1102}
1103
1104static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1105 struct auxtrace_queue *queue,
1106 unsigned int queue_nr,
1107 bool formatted,
1108 int sample_cpu)
1109{
1110 struct cs_etm_queue *etmq = queue->priv;
1111
1112 if (list_empty(&queue->head) || etmq)
1113 return 0;
1114
1115 etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu);
1116
1117 if (!etmq)
1118 return -ENOMEM;
1119
1120 queue->priv = etmq;
1121 etmq->etm = etm;
1122 etmq->queue_nr = queue_nr;
1123 etmq->offset = 0;
1124
1125 return 0;
1126}
1127
1128static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1129 struct cs_etm_queue *etmq,
1130 unsigned int queue_nr)
1131{
1132 int ret = 0;
1133 unsigned int cs_queue_nr;
1134 u8 trace_chan_id;
1135 u64 cs_timestamp;
1136
1137 /*
1138 * We are under a CPU-wide trace scenario. As such we need to know
1139 * when the code that generated the traces started to execute so that
1140 * it can be correlated with execution on other CPUs. So we get a
1141 * handle on the beginning of traces and decode until we find a
1142 * timestamp. The timestamp is then added to the auxtrace min heap
1143 * in order to know what nibble (of all the etmqs) to decode first.
1144 */
1145 while (1) {
1146 /*
1147 * Fetch an aux_buffer from this etmq. Bail if no more
1148 * blocks or an error has been encountered.
1149 */
1150 ret = cs_etm__get_data_block(etmq);
1151 if (ret <= 0)
1152 goto out;
1153
1154 /*
1155 * Run decoder on the trace block. The decoder will stop when
1156 * encountering a CS timestamp, a full packet queue or the end of
1157 * trace for that block.
1158 */
1159 ret = cs_etm__decode_data_block(etmq);
1160 if (ret)
1161 goto out;
1162
1163 /*
1164 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1165 * the timestamp calculation for us.
1166 */
1167 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1168
1169 /* We found a timestamp, no need to continue. */
1170 if (cs_timestamp)
1171 break;
1172
1173 /*
1174 * We didn't find a timestamp so empty all the traceid packet
1175 * queues before looking for another timestamp packet, either
1176 * in the current data block or a new one. Packets that were
1177 * just decoded are useless since no timestamp has been
1178 * associated with them. As such simply discard them.
1179 */
1180 cs_etm__clear_all_packet_queues(etmq);
1181 }
1182
1183 /*
1184 * We have a timestamp. Add it to the min heap to reflect when
1185 * instructions conveyed by the range packets of this traceID queue
1186 * started to execute. Once the same has been done for all the traceID
1187 * queues of each etmq, redenring and decoding can start in
1188 * chronological order.
1189 *
1190 * Note that packets decoded above are still in the traceID's packet
1191 * queue and will be processed in cs_etm__process_timestamped_queues().
1192 */
1193 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1194 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1195out:
1196 return ret;
1197}
1198
1199static inline
1200void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1201 struct cs_etm_traceid_queue *tidq)
1202{
1203 struct branch_stack *bs_src = tidq->last_branch_rb;
1204 struct branch_stack *bs_dst = tidq->last_branch;
1205 size_t nr = 0;
1206
1207 /*
1208 * Set the number of records before early exit: ->nr is used to
1209 * determine how many branches to copy from ->entries.
1210 */
1211 bs_dst->nr = bs_src->nr;
1212
1213 /*
1214 * Early exit when there is nothing to copy.
1215 */
1216 if (!bs_src->nr)
1217 return;
1218
1219 /*
1220 * As bs_src->entries is a circular buffer, we need to copy from it in
1221 * two steps. First, copy the branches from the most recently inserted
1222 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1223 */
1224 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1225 memcpy(&bs_dst->entries[0],
1226 &bs_src->entries[tidq->last_branch_pos],
1227 sizeof(struct branch_entry) * nr);
1228
1229 /*
1230 * If we wrapped around at least once, the branches from the beginning
1231 * of the bs_src->entries buffer and until the ->last_branch_pos element
1232 * are older valid branches: copy them over. The total number of
1233 * branches copied over will be equal to the number of branches asked by
1234 * the user in last_branch_sz.
1235 */
1236 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1237 memcpy(&bs_dst->entries[nr],
1238 &bs_src->entries[0],
1239 sizeof(struct branch_entry) * tidq->last_branch_pos);
1240 }
1241}
1242
1243static inline
1244void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1245{
1246 tidq->last_branch_pos = 0;
1247 tidq->last_branch_rb->nr = 0;
1248}
1249
1250static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1251 u8 trace_chan_id, u64 addr)
1252{
1253 u8 instrBytes[2];
1254
1255 cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1256 instrBytes, 0);
1257 /*
1258 * T32 instruction size is indicated by bits[15:11] of the first
1259 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1260 * denote a 32-bit instruction.
1261 */
1262 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1263}
1264
1265static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1266{
1267 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1268 if (packet->sample_type == CS_ETM_DISCONTINUITY)
1269 return 0;
1270
1271 return packet->start_addr;
1272}
1273
1274static inline
1275u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1276{
1277 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1278 if (packet->sample_type == CS_ETM_DISCONTINUITY)
1279 return 0;
1280
1281 return packet->end_addr - packet->last_instr_size;
1282}
1283
1284static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1285 u64 trace_chan_id,
1286 const struct cs_etm_packet *packet,
1287 u64 offset)
1288{
1289 if (packet->isa == CS_ETM_ISA_T32) {
1290 u64 addr = packet->start_addr;
1291
1292 while (offset) {
1293 addr += cs_etm__t32_instr_size(etmq,
1294 trace_chan_id, addr);
1295 offset--;
1296 }
1297 return addr;
1298 }
1299
1300 /* Assume a 4 byte instruction size (A32/A64) */
1301 return packet->start_addr + offset * 4;
1302}
1303
1304static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1305 struct cs_etm_traceid_queue *tidq)
1306{
1307 struct branch_stack *bs = tidq->last_branch_rb;
1308 struct branch_entry *be;
1309
1310 /*
1311 * The branches are recorded in a circular buffer in reverse
1312 * chronological order: we start recording from the last element of the
1313 * buffer down. After writing the first element of the stack, move the
1314 * insert position back to the end of the buffer.
1315 */
1316 if (!tidq->last_branch_pos)
1317 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1318
1319 tidq->last_branch_pos -= 1;
1320
1321 be = &bs->entries[tidq->last_branch_pos];
1322 be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1323 be->to = cs_etm__first_executed_instr(tidq->packet);
1324 /* No support for mispredict */
1325 be->flags.mispred = 0;
1326 be->flags.predicted = 1;
1327
1328 /*
1329 * Increment bs->nr until reaching the number of last branches asked by
1330 * the user on the command line.
1331 */
1332 if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1333 bs->nr += 1;
1334}
1335
1336static int cs_etm__inject_event(union perf_event *event,
1337 struct perf_sample *sample, u64 type)
1338{
1339 event->header.size = perf_event__sample_event_size(sample, type, 0);
1340 return perf_event__synthesize_sample(event, type, 0, sample);
1341}
1342
1343
1344static int
1345cs_etm__get_trace(struct cs_etm_queue *etmq)
1346{
1347 struct auxtrace_buffer *aux_buffer = etmq->buffer;
1348 struct auxtrace_buffer *old_buffer = aux_buffer;
1349 struct auxtrace_queue *queue;
1350
1351 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1352
1353 aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1354
1355 /* If no more data, drop the previous auxtrace_buffer and return */
1356 if (!aux_buffer) {
1357 if (old_buffer)
1358 auxtrace_buffer__drop_data(old_buffer);
1359 etmq->buf_len = 0;
1360 return 0;
1361 }
1362
1363 etmq->buffer = aux_buffer;
1364
1365 /* If the aux_buffer doesn't have data associated, try to load it */
1366 if (!aux_buffer->data) {
1367 /* get the file desc associated with the perf data file */
1368 int fd = perf_data__fd(etmq->etm->session->data);
1369
1370 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1371 if (!aux_buffer->data)
1372 return -ENOMEM;
1373 }
1374
1375 /* If valid, drop the previous buffer */
1376 if (old_buffer)
1377 auxtrace_buffer__drop_data(old_buffer);
1378
1379 etmq->buf_used = 0;
1380 etmq->buf_len = aux_buffer->size;
1381 etmq->buf = aux_buffer->data;
1382
1383 return etmq->buf_len;
1384}
1385
1386static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1387 struct cs_etm_traceid_queue *tidq, pid_t tid,
1388 ocsd_ex_level el)
1389{
1390 struct machine *machine = cs_etm__get_machine(etmq, el);
1391
1392 if (tid != -1) {
1393 thread__zput(tidq->thread);
1394 tidq->thread = machine__find_thread(machine, -1, tid);
1395 }
1396
1397 /* Couldn't find a known thread */
1398 if (!tidq->thread)
1399 tidq->thread = machine__idle_thread(machine);
1400
1401 tidq->el = el;
1402}
1403
1404int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1405 u8 trace_chan_id, ocsd_ex_level el)
1406{
1407 struct cs_etm_traceid_queue *tidq;
1408
1409 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1410 if (!tidq)
1411 return -EINVAL;
1412
1413 cs_etm__set_thread(etmq, tidq, tid, el);
1414 return 0;
1415}
1416
1417bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1418{
1419 return !!etmq->etm->timeless_decoding;
1420}
1421
1422static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1423 u64 trace_chan_id,
1424 const struct cs_etm_packet *packet,
1425 struct perf_sample *sample)
1426{
1427 /*
1428 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1429 * packet, so directly bail out with 'insn_len' = 0.
1430 */
1431 if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1432 sample->insn_len = 0;
1433 return;
1434 }
1435
1436 /*
1437 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1438 * cs_etm__t32_instr_size().
1439 */
1440 if (packet->isa == CS_ETM_ISA_T32)
1441 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1442 sample->ip);
1443 /* Otherwise, A64 and A32 instruction size are always 32-bit. */
1444 else
1445 sample->insn_len = 4;
1446
1447 cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1448 (void *)sample->insn, 0);
1449}
1450
1451u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1452{
1453 struct cs_etm_auxtrace *etm = etmq->etm;
1454
1455 if (etm->has_virtual_ts)
1456 return tsc_to_perf_time(cs_timestamp, &etm->tc);
1457 else
1458 return cs_timestamp;
1459}
1460
1461static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1462 struct cs_etm_traceid_queue *tidq)
1463{
1464 struct cs_etm_auxtrace *etm = etmq->etm;
1465 struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1466
1467 if (!etm->timeless_decoding && etm->has_virtual_ts)
1468 return packet_queue->cs_timestamp;
1469 else
1470 return etm->latest_kernel_timestamp;
1471}
1472
1473static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1474 struct cs_etm_traceid_queue *tidq,
1475 u64 addr, u64 period)
1476{
1477 int ret = 0;
1478 struct cs_etm_auxtrace *etm = etmq->etm;
1479 union perf_event *event = tidq->event_buf;
1480 struct perf_sample sample = {.ip = 0,};
1481
1482 event->sample.header.type = PERF_RECORD_SAMPLE;
1483 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1484 event->sample.header.size = sizeof(struct perf_event_header);
1485
1486 /* Set time field based on etm auxtrace config. */
1487 sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1488
1489 sample.ip = addr;
1490 sample.pid = thread__pid(tidq->thread);
1491 sample.tid = thread__tid(tidq->thread);
1492 sample.id = etmq->etm->instructions_id;
1493 sample.stream_id = etmq->etm->instructions_id;
1494 sample.period = period;
1495 sample.cpu = tidq->packet->cpu;
1496 sample.flags = tidq->prev_packet->flags;
1497 sample.cpumode = event->sample.header.misc;
1498
1499 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1500
1501 if (etm->synth_opts.last_branch)
1502 sample.branch_stack = tidq->last_branch;
1503
1504 if (etm->synth_opts.inject) {
1505 ret = cs_etm__inject_event(event, &sample,
1506 etm->instructions_sample_type);
1507 if (ret)
1508 return ret;
1509 }
1510
1511 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1512
1513 if (ret)
1514 pr_err(
1515 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1516 ret);
1517
1518 return ret;
1519}
1520
1521/*
1522 * The cs etm packet encodes an instruction range between a branch target
1523 * and the next taken branch. Generate sample accordingly.
1524 */
1525static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1526 struct cs_etm_traceid_queue *tidq)
1527{
1528 int ret = 0;
1529 struct cs_etm_auxtrace *etm = etmq->etm;
1530 struct perf_sample sample = {.ip = 0,};
1531 union perf_event *event = tidq->event_buf;
1532 struct dummy_branch_stack {
1533 u64 nr;
1534 u64 hw_idx;
1535 struct branch_entry entries;
1536 } dummy_bs;
1537 u64 ip;
1538
1539 ip = cs_etm__last_executed_instr(tidq->prev_packet);
1540
1541 event->sample.header.type = PERF_RECORD_SAMPLE;
1542 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1543 tidq->prev_packet_el);
1544 event->sample.header.size = sizeof(struct perf_event_header);
1545
1546 /* Set time field based on etm auxtrace config. */
1547 sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1548
1549 sample.ip = ip;
1550 sample.pid = thread__pid(tidq->prev_packet_thread);
1551 sample.tid = thread__tid(tidq->prev_packet_thread);
1552 sample.addr = cs_etm__first_executed_instr(tidq->packet);
1553 sample.id = etmq->etm->branches_id;
1554 sample.stream_id = etmq->etm->branches_id;
1555 sample.period = 1;
1556 sample.cpu = tidq->packet->cpu;
1557 sample.flags = tidq->prev_packet->flags;
1558 sample.cpumode = event->sample.header.misc;
1559
1560 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1561 &sample);
1562
1563 /*
1564 * perf report cannot handle events without a branch stack
1565 */
1566 if (etm->synth_opts.last_branch) {
1567 dummy_bs = (struct dummy_branch_stack){
1568 .nr = 1,
1569 .hw_idx = -1ULL,
1570 .entries = {
1571 .from = sample.ip,
1572 .to = sample.addr,
1573 },
1574 };
1575 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1576 }
1577
1578 if (etm->synth_opts.inject) {
1579 ret = cs_etm__inject_event(event, &sample,
1580 etm->branches_sample_type);
1581 if (ret)
1582 return ret;
1583 }
1584
1585 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1586
1587 if (ret)
1588 pr_err(
1589 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1590 ret);
1591
1592 return ret;
1593}
1594
1595struct cs_etm_synth {
1596 struct perf_tool dummy_tool;
1597 struct perf_session *session;
1598};
1599
1600static int cs_etm__event_synth(struct perf_tool *tool,
1601 union perf_event *event,
1602 struct perf_sample *sample __maybe_unused,
1603 struct machine *machine __maybe_unused)
1604{
1605 struct cs_etm_synth *cs_etm_synth =
1606 container_of(tool, struct cs_etm_synth, dummy_tool);
1607
1608 return perf_session__deliver_synth_event(cs_etm_synth->session,
1609 event, NULL);
1610}
1611
1612static int cs_etm__synth_event(struct perf_session *session,
1613 struct perf_event_attr *attr, u64 id)
1614{
1615 struct cs_etm_synth cs_etm_synth;
1616
1617 memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
1618 cs_etm_synth.session = session;
1619
1620 return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
1621 &id, cs_etm__event_synth);
1622}
1623
1624static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1625 struct perf_session *session)
1626{
1627 struct evlist *evlist = session->evlist;
1628 struct evsel *evsel;
1629 struct perf_event_attr attr;
1630 bool found = false;
1631 u64 id;
1632 int err;
1633
1634 evlist__for_each_entry(evlist, evsel) {
1635 if (evsel->core.attr.type == etm->pmu_type) {
1636 found = true;
1637 break;
1638 }
1639 }
1640
1641 if (!found) {
1642 pr_debug("No selected events with CoreSight Trace data\n");
1643 return 0;
1644 }
1645
1646 memset(&attr, 0, sizeof(struct perf_event_attr));
1647 attr.size = sizeof(struct perf_event_attr);
1648 attr.type = PERF_TYPE_HARDWARE;
1649 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1650 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1651 PERF_SAMPLE_PERIOD;
1652 if (etm->timeless_decoding)
1653 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1654 else
1655 attr.sample_type |= PERF_SAMPLE_TIME;
1656
1657 attr.exclude_user = evsel->core.attr.exclude_user;
1658 attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1659 attr.exclude_hv = evsel->core.attr.exclude_hv;
1660 attr.exclude_host = evsel->core.attr.exclude_host;
1661 attr.exclude_guest = evsel->core.attr.exclude_guest;
1662 attr.sample_id_all = evsel->core.attr.sample_id_all;
1663 attr.read_format = evsel->core.attr.read_format;
1664
1665 /* create new id val to be a fixed offset from evsel id */
1666 id = evsel->core.id[0] + 1000000000;
1667
1668 if (!id)
1669 id = 1;
1670
1671 if (etm->synth_opts.branches) {
1672 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1673 attr.sample_period = 1;
1674 attr.sample_type |= PERF_SAMPLE_ADDR;
1675 err = cs_etm__synth_event(session, &attr, id);
1676 if (err)
1677 return err;
1678 etm->branches_sample_type = attr.sample_type;
1679 etm->branches_id = id;
1680 id += 1;
1681 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1682 }
1683
1684 if (etm->synth_opts.last_branch) {
1685 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1686 /*
1687 * We don't use the hardware index, but the sample generation
1688 * code uses the new format branch_stack with this field,
1689 * so the event attributes must indicate that it's present.
1690 */
1691 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1692 }
1693
1694 if (etm->synth_opts.instructions) {
1695 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1696 attr.sample_period = etm->synth_opts.period;
1697 etm->instructions_sample_period = attr.sample_period;
1698 err = cs_etm__synth_event(session, &attr, id);
1699 if (err)
1700 return err;
1701 etm->instructions_sample_type = attr.sample_type;
1702 etm->instructions_id = id;
1703 id += 1;
1704 }
1705
1706 return 0;
1707}
1708
1709static int cs_etm__sample(struct cs_etm_queue *etmq,
1710 struct cs_etm_traceid_queue *tidq)
1711{
1712 struct cs_etm_auxtrace *etm = etmq->etm;
1713 int ret;
1714 u8 trace_chan_id = tidq->trace_chan_id;
1715 u64 instrs_prev;
1716
1717 /* Get instructions remainder from previous packet */
1718 instrs_prev = tidq->period_instructions;
1719
1720 tidq->period_instructions += tidq->packet->instr_count;
1721
1722 /*
1723 * Record a branch when the last instruction in
1724 * PREV_PACKET is a branch.
1725 */
1726 if (etm->synth_opts.last_branch &&
1727 tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1728 tidq->prev_packet->last_instr_taken_branch)
1729 cs_etm__update_last_branch_rb(etmq, tidq);
1730
1731 if (etm->synth_opts.instructions &&
1732 tidq->period_instructions >= etm->instructions_sample_period) {
1733 /*
1734 * Emit instruction sample periodically
1735 * TODO: allow period to be defined in cycles and clock time
1736 */
1737
1738 /*
1739 * Below diagram demonstrates the instruction samples
1740 * generation flows:
1741 *
1742 * Instrs Instrs Instrs Instrs
1743 * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3)
1744 * | | | |
1745 * V V V V
1746 * --------------------------------------------------
1747 * ^ ^
1748 * | |
1749 * Period Period
1750 * instructions(Pi) instructions(Pi')
1751 *
1752 * | |
1753 * \---------------- -----------------/
1754 * V
1755 * tidq->packet->instr_count
1756 *
1757 * Instrs Sample(n...) are the synthesised samples occurring
1758 * every etm->instructions_sample_period instructions - as
1759 * defined on the perf command line. Sample(n) is being the
1760 * last sample before the current etm packet, n+1 to n+3
1761 * samples are generated from the current etm packet.
1762 *
1763 * tidq->packet->instr_count represents the number of
1764 * instructions in the current etm packet.
1765 *
1766 * Period instructions (Pi) contains the number of
1767 * instructions executed after the sample point(n) from the
1768 * previous etm packet. This will always be less than
1769 * etm->instructions_sample_period.
1770 *
1771 * When generate new samples, it combines with two parts
1772 * instructions, one is the tail of the old packet and another
1773 * is the head of the new coming packet, to generate
1774 * sample(n+1); sample(n+2) and sample(n+3) consume the
1775 * instructions with sample period. After sample(n+3), the rest
1776 * instructions will be used by later packet and it is assigned
1777 * to tidq->period_instructions for next round calculation.
1778 */
1779
1780 /*
1781 * Get the initial offset into the current packet instructions;
1782 * entry conditions ensure that instrs_prev is less than
1783 * etm->instructions_sample_period.
1784 */
1785 u64 offset = etm->instructions_sample_period - instrs_prev;
1786 u64 addr;
1787
1788 /* Prepare last branches for instruction sample */
1789 if (etm->synth_opts.last_branch)
1790 cs_etm__copy_last_branch_rb(etmq, tidq);
1791
1792 while (tidq->period_instructions >=
1793 etm->instructions_sample_period) {
1794 /*
1795 * Calculate the address of the sampled instruction (-1
1796 * as sample is reported as though instruction has just
1797 * been executed, but PC has not advanced to next
1798 * instruction)
1799 */
1800 addr = cs_etm__instr_addr(etmq, trace_chan_id,
1801 tidq->packet, offset - 1);
1802 ret = cs_etm__synth_instruction_sample(
1803 etmq, tidq, addr,
1804 etm->instructions_sample_period);
1805 if (ret)
1806 return ret;
1807
1808 offset += etm->instructions_sample_period;
1809 tidq->period_instructions -=
1810 etm->instructions_sample_period;
1811 }
1812 }
1813
1814 if (etm->synth_opts.branches) {
1815 bool generate_sample = false;
1816
1817 /* Generate sample for tracing on packet */
1818 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1819 generate_sample = true;
1820
1821 /* Generate sample for branch taken packet */
1822 if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1823 tidq->prev_packet->last_instr_taken_branch)
1824 generate_sample = true;
1825
1826 if (generate_sample) {
1827 ret = cs_etm__synth_branch_sample(etmq, tidq);
1828 if (ret)
1829 return ret;
1830 }
1831 }
1832
1833 cs_etm__packet_swap(etm, tidq);
1834
1835 return 0;
1836}
1837
1838static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1839{
1840 /*
1841 * When the exception packet is inserted, whether the last instruction
1842 * in previous range packet is taken branch or not, we need to force
1843 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures
1844 * to generate branch sample for the instruction range before the
1845 * exception is trapped to kernel or before the exception returning.
1846 *
1847 * The exception packet includes the dummy address values, so don't
1848 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful
1849 * for generating instruction and branch samples.
1850 */
1851 if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1852 tidq->prev_packet->last_instr_taken_branch = true;
1853
1854 return 0;
1855}
1856
1857static int cs_etm__flush(struct cs_etm_queue *etmq,
1858 struct cs_etm_traceid_queue *tidq)
1859{
1860 int err = 0;
1861 struct cs_etm_auxtrace *etm = etmq->etm;
1862
1863 /* Handle start tracing packet */
1864 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1865 goto swap_packet;
1866
1867 if (etmq->etm->synth_opts.last_branch &&
1868 etmq->etm->synth_opts.instructions &&
1869 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1870 u64 addr;
1871
1872 /* Prepare last branches for instruction sample */
1873 cs_etm__copy_last_branch_rb(etmq, tidq);
1874
1875 /*
1876 * Generate a last branch event for the branches left in the
1877 * circular buffer at the end of the trace.
1878 *
1879 * Use the address of the end of the last reported execution
1880 * range
1881 */
1882 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1883
1884 err = cs_etm__synth_instruction_sample(
1885 etmq, tidq, addr,
1886 tidq->period_instructions);
1887 if (err)
1888 return err;
1889
1890 tidq->period_instructions = 0;
1891
1892 }
1893
1894 if (etm->synth_opts.branches &&
1895 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1896 err = cs_etm__synth_branch_sample(etmq, tidq);
1897 if (err)
1898 return err;
1899 }
1900
1901swap_packet:
1902 cs_etm__packet_swap(etm, tidq);
1903
1904 /* Reset last branches after flush the trace */
1905 if (etm->synth_opts.last_branch)
1906 cs_etm__reset_last_branch_rb(tidq);
1907
1908 return err;
1909}
1910
1911static int cs_etm__end_block(struct cs_etm_queue *etmq,
1912 struct cs_etm_traceid_queue *tidq)
1913{
1914 int err;
1915
1916 /*
1917 * It has no new packet coming and 'etmq->packet' contains the stale
1918 * packet which was set at the previous time with packets swapping;
1919 * so skip to generate branch sample to avoid stale packet.
1920 *
1921 * For this case only flush branch stack and generate a last branch
1922 * event for the branches left in the circular buffer at the end of
1923 * the trace.
1924 */
1925 if (etmq->etm->synth_opts.last_branch &&
1926 etmq->etm->synth_opts.instructions &&
1927 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1928 u64 addr;
1929
1930 /* Prepare last branches for instruction sample */
1931 cs_etm__copy_last_branch_rb(etmq, tidq);
1932
1933 /*
1934 * Use the address of the end of the last reported execution
1935 * range.
1936 */
1937 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1938
1939 err = cs_etm__synth_instruction_sample(
1940 etmq, tidq, addr,
1941 tidq->period_instructions);
1942 if (err)
1943 return err;
1944
1945 tidq->period_instructions = 0;
1946 }
1947
1948 return 0;
1949}
1950/*
1951 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
1952 * if need be.
1953 * Returns: < 0 if error
1954 * = 0 if no more auxtrace_buffer to read
1955 * > 0 if the current buffer isn't empty yet
1956 */
1957static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
1958{
1959 int ret;
1960
1961 if (!etmq->buf_len) {
1962 ret = cs_etm__get_trace(etmq);
1963 if (ret <= 0)
1964 return ret;
1965 /*
1966 * We cannot assume consecutive blocks in the data file
1967 * are contiguous, reset the decoder to force re-sync.
1968 */
1969 ret = cs_etm_decoder__reset(etmq->decoder);
1970 if (ret)
1971 return ret;
1972 }
1973
1974 return etmq->buf_len;
1975}
1976
1977static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
1978 struct cs_etm_packet *packet,
1979 u64 end_addr)
1980{
1981 /* Initialise to keep compiler happy */
1982 u16 instr16 = 0;
1983 u32 instr32 = 0;
1984 u64 addr;
1985
1986 switch (packet->isa) {
1987 case CS_ETM_ISA_T32:
1988 /*
1989 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
1990 *
1991 * b'15 b'8
1992 * +-----------------+--------+
1993 * | 1 1 0 1 1 1 1 1 | imm8 |
1994 * +-----------------+--------+
1995 *
1996 * According to the specification, it only defines SVC for T32
1997 * with 16 bits instruction and has no definition for 32bits;
1998 * so below only read 2 bytes as instruction size for T32.
1999 */
2000 addr = end_addr - 2;
2001 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2002 (u8 *)&instr16, 0);
2003 if ((instr16 & 0xFF00) == 0xDF00)
2004 return true;
2005
2006 break;
2007 case CS_ETM_ISA_A32:
2008 /*
2009 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2010 *
2011 * b'31 b'28 b'27 b'24
2012 * +---------+---------+-------------------------+
2013 * | !1111 | 1 1 1 1 | imm24 |
2014 * +---------+---------+-------------------------+
2015 */
2016 addr = end_addr - 4;
2017 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2018 (u8 *)&instr32, 0);
2019 if ((instr32 & 0x0F000000) == 0x0F000000 &&
2020 (instr32 & 0xF0000000) != 0xF0000000)
2021 return true;
2022
2023 break;
2024 case CS_ETM_ISA_A64:
2025 /*
2026 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2027 *
2028 * b'31 b'21 b'4 b'0
2029 * +-----------------------+---------+-----------+
2030 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 |
2031 * +-----------------------+---------+-----------+
2032 */
2033 addr = end_addr - 4;
2034 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2035 (u8 *)&instr32, 0);
2036 if ((instr32 & 0xFFE0001F) == 0xd4000001)
2037 return true;
2038
2039 break;
2040 case CS_ETM_ISA_UNKNOWN:
2041 default:
2042 break;
2043 }
2044
2045 return false;
2046}
2047
2048static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2049 struct cs_etm_traceid_queue *tidq, u64 magic)
2050{
2051 u8 trace_chan_id = tidq->trace_chan_id;
2052 struct cs_etm_packet *packet = tidq->packet;
2053 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2054
2055 if (magic == __perf_cs_etmv3_magic)
2056 if (packet->exception_number == CS_ETMV3_EXC_SVC)
2057 return true;
2058
2059 /*
2060 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2061 * HVC cases; need to check if it's SVC instruction based on
2062 * packet address.
2063 */
2064 if (magic == __perf_cs_etmv4_magic) {
2065 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2066 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2067 prev_packet->end_addr))
2068 return true;
2069 }
2070
2071 return false;
2072}
2073
2074static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2075 u64 magic)
2076{
2077 struct cs_etm_packet *packet = tidq->packet;
2078
2079 if (magic == __perf_cs_etmv3_magic)
2080 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2081 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2082 packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2083 packet->exception_number == CS_ETMV3_EXC_IRQ ||
2084 packet->exception_number == CS_ETMV3_EXC_FIQ)
2085 return true;
2086
2087 if (magic == __perf_cs_etmv4_magic)
2088 if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2089 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2090 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2091 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2092 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2093 packet->exception_number == CS_ETMV4_EXC_IRQ ||
2094 packet->exception_number == CS_ETMV4_EXC_FIQ)
2095 return true;
2096
2097 return false;
2098}
2099
2100static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2101 struct cs_etm_traceid_queue *tidq,
2102 u64 magic)
2103{
2104 u8 trace_chan_id = tidq->trace_chan_id;
2105 struct cs_etm_packet *packet = tidq->packet;
2106 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2107
2108 if (magic == __perf_cs_etmv3_magic)
2109 if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2110 packet->exception_number == CS_ETMV3_EXC_HYP ||
2111 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2112 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2113 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2114 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2115 packet->exception_number == CS_ETMV3_EXC_GENERIC)
2116 return true;
2117
2118 if (magic == __perf_cs_etmv4_magic) {
2119 if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2120 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2121 packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2122 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2123 return true;
2124
2125 /*
2126 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2127 * (SMC, HVC) are taken as sync exceptions.
2128 */
2129 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2130 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2131 prev_packet->end_addr))
2132 return true;
2133
2134 /*
2135 * ETMv4 has 5 bits for exception number; if the numbers
2136 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2137 * they are implementation defined exceptions.
2138 *
2139 * For this case, simply take it as sync exception.
2140 */
2141 if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2142 packet->exception_number <= CS_ETMV4_EXC_END)
2143 return true;
2144 }
2145
2146 return false;
2147}
2148
2149static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2150 struct cs_etm_traceid_queue *tidq)
2151{
2152 struct cs_etm_packet *packet = tidq->packet;
2153 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2154 u8 trace_chan_id = tidq->trace_chan_id;
2155 u64 magic;
2156 int ret;
2157
2158 switch (packet->sample_type) {
2159 case CS_ETM_RANGE:
2160 /*
2161 * Immediate branch instruction without neither link nor
2162 * return flag, it's normal branch instruction within
2163 * the function.
2164 */
2165 if (packet->last_instr_type == OCSD_INSTR_BR &&
2166 packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2167 packet->flags = PERF_IP_FLAG_BRANCH;
2168
2169 if (packet->last_instr_cond)
2170 packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2171 }
2172
2173 /*
2174 * Immediate branch instruction with link (e.g. BL), this is
2175 * branch instruction for function call.
2176 */
2177 if (packet->last_instr_type == OCSD_INSTR_BR &&
2178 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2179 packet->flags = PERF_IP_FLAG_BRANCH |
2180 PERF_IP_FLAG_CALL;
2181
2182 /*
2183 * Indirect branch instruction with link (e.g. BLR), this is
2184 * branch instruction for function call.
2185 */
2186 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2187 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2188 packet->flags = PERF_IP_FLAG_BRANCH |
2189 PERF_IP_FLAG_CALL;
2190
2191 /*
2192 * Indirect branch instruction with subtype of
2193 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2194 * function return for A32/T32.
2195 */
2196 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2197 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2198 packet->flags = PERF_IP_FLAG_BRANCH |
2199 PERF_IP_FLAG_RETURN;
2200
2201 /*
2202 * Indirect branch instruction without link (e.g. BR), usually
2203 * this is used for function return, especially for functions
2204 * within dynamic link lib.
2205 */
2206 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2207 packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2208 packet->flags = PERF_IP_FLAG_BRANCH |
2209 PERF_IP_FLAG_RETURN;
2210
2211 /* Return instruction for function return. */
2212 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2213 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2214 packet->flags = PERF_IP_FLAG_BRANCH |
2215 PERF_IP_FLAG_RETURN;
2216
2217 /*
2218 * Decoder might insert a discontinuity in the middle of
2219 * instruction packets, fixup prev_packet with flag
2220 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2221 */
2222 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2223 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2224 PERF_IP_FLAG_TRACE_BEGIN;
2225
2226 /*
2227 * If the previous packet is an exception return packet
2228 * and the return address just follows SVC instruction,
2229 * it needs to calibrate the previous packet sample flags
2230 * as PERF_IP_FLAG_SYSCALLRET.
2231 */
2232 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2233 PERF_IP_FLAG_RETURN |
2234 PERF_IP_FLAG_INTERRUPT) &&
2235 cs_etm__is_svc_instr(etmq, trace_chan_id,
2236 packet, packet->start_addr))
2237 prev_packet->flags = PERF_IP_FLAG_BRANCH |
2238 PERF_IP_FLAG_RETURN |
2239 PERF_IP_FLAG_SYSCALLRET;
2240 break;
2241 case CS_ETM_DISCONTINUITY:
2242 /*
2243 * The trace is discontinuous, if the previous packet is
2244 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2245 * for previous packet.
2246 */
2247 if (prev_packet->sample_type == CS_ETM_RANGE)
2248 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2249 PERF_IP_FLAG_TRACE_END;
2250 break;
2251 case CS_ETM_EXCEPTION:
2252 ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
2253 if (ret)
2254 return ret;
2255
2256 /* The exception is for system call. */
2257 if (cs_etm__is_syscall(etmq, tidq, magic))
2258 packet->flags = PERF_IP_FLAG_BRANCH |
2259 PERF_IP_FLAG_CALL |
2260 PERF_IP_FLAG_SYSCALLRET;
2261 /*
2262 * The exceptions are triggered by external signals from bus,
2263 * interrupt controller, debug module, PE reset or halt.
2264 */
2265 else if (cs_etm__is_async_exception(tidq, magic))
2266 packet->flags = PERF_IP_FLAG_BRANCH |
2267 PERF_IP_FLAG_CALL |
2268 PERF_IP_FLAG_ASYNC |
2269 PERF_IP_FLAG_INTERRUPT;
2270 /*
2271 * Otherwise, exception is caused by trap, instruction &
2272 * data fault, or alignment errors.
2273 */
2274 else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2275 packet->flags = PERF_IP_FLAG_BRANCH |
2276 PERF_IP_FLAG_CALL |
2277 PERF_IP_FLAG_INTERRUPT;
2278
2279 /*
2280 * When the exception packet is inserted, since exception
2281 * packet is not used standalone for generating samples
2282 * and it's affiliation to the previous instruction range
2283 * packet; so set previous range packet flags to tell perf
2284 * it is an exception taken branch.
2285 */
2286 if (prev_packet->sample_type == CS_ETM_RANGE)
2287 prev_packet->flags = packet->flags;
2288 break;
2289 case CS_ETM_EXCEPTION_RET:
2290 /*
2291 * When the exception return packet is inserted, since
2292 * exception return packet is not used standalone for
2293 * generating samples and it's affiliation to the previous
2294 * instruction range packet; so set previous range packet
2295 * flags to tell perf it is an exception return branch.
2296 *
2297 * The exception return can be for either system call or
2298 * other exception types; unfortunately the packet doesn't
2299 * contain exception type related info so we cannot decide
2300 * the exception type purely based on exception return packet.
2301 * If we record the exception number from exception packet and
2302 * reuse it for exception return packet, this is not reliable
2303 * due the trace can be discontinuity or the interrupt can
2304 * be nested, thus the recorded exception number cannot be
2305 * used for exception return packet for these two cases.
2306 *
2307 * For exception return packet, we only need to distinguish the
2308 * packet is for system call or for other types. Thus the
2309 * decision can be deferred when receive the next packet which
2310 * contains the return address, based on the return address we
2311 * can read out the previous instruction and check if it's a
2312 * system call instruction and then calibrate the sample flag
2313 * as needed.
2314 */
2315 if (prev_packet->sample_type == CS_ETM_RANGE)
2316 prev_packet->flags = PERF_IP_FLAG_BRANCH |
2317 PERF_IP_FLAG_RETURN |
2318 PERF_IP_FLAG_INTERRUPT;
2319 break;
2320 case CS_ETM_EMPTY:
2321 default:
2322 break;
2323 }
2324
2325 return 0;
2326}
2327
2328static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2329{
2330 int ret = 0;
2331 size_t processed = 0;
2332
2333 /*
2334 * Packets are decoded and added to the decoder's packet queue
2335 * until the decoder packet processing callback has requested that
2336 * processing stops or there is nothing left in the buffer. Normal
2337 * operations that stop processing are a timestamp packet or a full
2338 * decoder buffer queue.
2339 */
2340 ret = cs_etm_decoder__process_data_block(etmq->decoder,
2341 etmq->offset,
2342 &etmq->buf[etmq->buf_used],
2343 etmq->buf_len,
2344 &processed);
2345 if (ret)
2346 goto out;
2347
2348 etmq->offset += processed;
2349 etmq->buf_used += processed;
2350 etmq->buf_len -= processed;
2351
2352out:
2353 return ret;
2354}
2355
2356static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2357 struct cs_etm_traceid_queue *tidq)
2358{
2359 int ret;
2360 struct cs_etm_packet_queue *packet_queue;
2361
2362 packet_queue = &tidq->packet_queue;
2363
2364 /* Process each packet in this chunk */
2365 while (1) {
2366 ret = cs_etm_decoder__get_packet(packet_queue,
2367 tidq->packet);
2368 if (ret <= 0)
2369 /*
2370 * Stop processing this chunk on
2371 * end of data or error
2372 */
2373 break;
2374
2375 /*
2376 * Since packet addresses are swapped in packet
2377 * handling within below switch() statements,
2378 * thus setting sample flags must be called
2379 * prior to switch() statement to use address
2380 * information before packets swapping.
2381 */
2382 ret = cs_etm__set_sample_flags(etmq, tidq);
2383 if (ret < 0)
2384 break;
2385
2386 switch (tidq->packet->sample_type) {
2387 case CS_ETM_RANGE:
2388 /*
2389 * If the packet contains an instruction
2390 * range, generate instruction sequence
2391 * events.
2392 */
2393 cs_etm__sample(etmq, tidq);
2394 break;
2395 case CS_ETM_EXCEPTION:
2396 case CS_ETM_EXCEPTION_RET:
2397 /*
2398 * If the exception packet is coming,
2399 * make sure the previous instruction
2400 * range packet to be handled properly.
2401 */
2402 cs_etm__exception(tidq);
2403 break;
2404 case CS_ETM_DISCONTINUITY:
2405 /*
2406 * Discontinuity in trace, flush
2407 * previous branch stack
2408 */
2409 cs_etm__flush(etmq, tidq);
2410 break;
2411 case CS_ETM_EMPTY:
2412 /*
2413 * Should not receive empty packet,
2414 * report error.
2415 */
2416 pr_err("CS ETM Trace: empty packet\n");
2417 return -EINVAL;
2418 default:
2419 break;
2420 }
2421 }
2422
2423 return ret;
2424}
2425
2426static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2427{
2428 int idx;
2429 struct int_node *inode;
2430 struct cs_etm_traceid_queue *tidq;
2431 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2432
2433 intlist__for_each_entry(inode, traceid_queues_list) {
2434 idx = (int)(intptr_t)inode->priv;
2435 tidq = etmq->traceid_queues[idx];
2436
2437 /* Ignore return value */
2438 cs_etm__process_traceid_queue(etmq, tidq);
2439
2440 /*
2441 * Generate an instruction sample with the remaining
2442 * branchstack entries.
2443 */
2444 cs_etm__flush(etmq, tidq);
2445 }
2446}
2447
2448static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2449{
2450 int err = 0;
2451 struct cs_etm_traceid_queue *tidq;
2452
2453 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2454 if (!tidq)
2455 return -EINVAL;
2456
2457 /* Go through each buffer in the queue and decode them one by one */
2458 while (1) {
2459 err = cs_etm__get_data_block(etmq);
2460 if (err <= 0)
2461 return err;
2462
2463 /* Run trace decoder until buffer consumed or end of trace */
2464 do {
2465 err = cs_etm__decode_data_block(etmq);
2466 if (err)
2467 return err;
2468
2469 /*
2470 * Process each packet in this chunk, nothing to do if
2471 * an error occurs other than hoping the next one will
2472 * be better.
2473 */
2474 err = cs_etm__process_traceid_queue(etmq, tidq);
2475
2476 } while (etmq->buf_len);
2477
2478 if (err == 0)
2479 /* Flush any remaining branch stack entries */
2480 err = cs_etm__end_block(etmq, tidq);
2481 }
2482
2483 return err;
2484}
2485
2486static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2487{
2488 int idx, err = 0;
2489 struct cs_etm_traceid_queue *tidq;
2490 struct int_node *inode;
2491
2492 /* Go through each buffer in the queue and decode them one by one */
2493 while (1) {
2494 err = cs_etm__get_data_block(etmq);
2495 if (err <= 0)
2496 return err;
2497
2498 /* Run trace decoder until buffer consumed or end of trace */
2499 do {
2500 err = cs_etm__decode_data_block(etmq);
2501 if (err)
2502 return err;
2503
2504 /*
2505 * cs_etm__run_per_thread_timeless_decoder() runs on a
2506 * single traceID queue because each TID has a separate
2507 * buffer. But here in per-cpu mode we need to iterate
2508 * over each channel instead.
2509 */
2510 intlist__for_each_entry(inode,
2511 etmq->traceid_queues_list) {
2512 idx = (int)(intptr_t)inode->priv;
2513 tidq = etmq->traceid_queues[idx];
2514 cs_etm__process_traceid_queue(etmq, tidq);
2515 }
2516 } while (etmq->buf_len);
2517
2518 intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2519 idx = (int)(intptr_t)inode->priv;
2520 tidq = etmq->traceid_queues[idx];
2521 /* Flush any remaining branch stack entries */
2522 err = cs_etm__end_block(etmq, tidq);
2523 if (err)
2524 return err;
2525 }
2526 }
2527
2528 return err;
2529}
2530
2531static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2532 pid_t tid)
2533{
2534 unsigned int i;
2535 struct auxtrace_queues *queues = &etm->queues;
2536
2537 for (i = 0; i < queues->nr_queues; i++) {
2538 struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2539 struct cs_etm_queue *etmq = queue->priv;
2540 struct cs_etm_traceid_queue *tidq;
2541
2542 if (!etmq)
2543 continue;
2544
2545 if (etm->per_thread_decoding) {
2546 tidq = cs_etm__etmq_get_traceid_queue(
2547 etmq, CS_ETM_PER_THREAD_TRACEID);
2548
2549 if (!tidq)
2550 continue;
2551
2552 if (tid == -1 || thread__tid(tidq->thread) == tid)
2553 cs_etm__run_per_thread_timeless_decoder(etmq);
2554 } else
2555 cs_etm__run_per_cpu_timeless_decoder(etmq);
2556 }
2557
2558 return 0;
2559}
2560
2561static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2562{
2563 int ret = 0;
2564 unsigned int cs_queue_nr, queue_nr, i;
2565 u8 trace_chan_id;
2566 u64 cs_timestamp;
2567 struct auxtrace_queue *queue;
2568 struct cs_etm_queue *etmq;
2569 struct cs_etm_traceid_queue *tidq;
2570
2571 /*
2572 * Pre-populate the heap with one entry from each queue so that we can
2573 * start processing in time order across all queues.
2574 */
2575 for (i = 0; i < etm->queues.nr_queues; i++) {
2576 etmq = etm->queues.queue_array[i].priv;
2577 if (!etmq)
2578 continue;
2579
2580 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2581 if (ret)
2582 return ret;
2583 }
2584
2585 while (1) {
2586 if (!etm->heap.heap_cnt)
2587 goto out;
2588
2589 /* Take the entry at the top of the min heap */
2590 cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2591 queue_nr = TO_QUEUE_NR(cs_queue_nr);
2592 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2593 queue = &etm->queues.queue_array[queue_nr];
2594 etmq = queue->priv;
2595
2596 /*
2597 * Remove the top entry from the heap since we are about
2598 * to process it.
2599 */
2600 auxtrace_heap__pop(&etm->heap);
2601
2602 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2603 if (!tidq) {
2604 /*
2605 * No traceID queue has been allocated for this traceID,
2606 * which means something somewhere went very wrong. No
2607 * other choice than simply exit.
2608 */
2609 ret = -EINVAL;
2610 goto out;
2611 }
2612
2613 /*
2614 * Packets associated with this timestamp are already in
2615 * the etmq's traceID queue, so process them.
2616 */
2617 ret = cs_etm__process_traceid_queue(etmq, tidq);
2618 if (ret < 0)
2619 goto out;
2620
2621 /*
2622 * Packets for this timestamp have been processed, time to
2623 * move on to the next timestamp, fetching a new auxtrace_buffer
2624 * if need be.
2625 */
2626refetch:
2627 ret = cs_etm__get_data_block(etmq);
2628 if (ret < 0)
2629 goto out;
2630
2631 /*
2632 * No more auxtrace_buffers to process in this etmq, simply
2633 * move on to another entry in the auxtrace_heap.
2634 */
2635 if (!ret)
2636 continue;
2637
2638 ret = cs_etm__decode_data_block(etmq);
2639 if (ret)
2640 goto out;
2641
2642 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2643
2644 if (!cs_timestamp) {
2645 /*
2646 * Function cs_etm__decode_data_block() returns when
2647 * there is no more traces to decode in the current
2648 * auxtrace_buffer OR when a timestamp has been
2649 * encountered on any of the traceID queues. Since we
2650 * did not get a timestamp, there is no more traces to
2651 * process in this auxtrace_buffer. As such empty and
2652 * flush all traceID queues.
2653 */
2654 cs_etm__clear_all_traceid_queues(etmq);
2655
2656 /* Fetch another auxtrace_buffer for this etmq */
2657 goto refetch;
2658 }
2659
2660 /*
2661 * Add to the min heap the timestamp for packets that have
2662 * just been decoded. They will be processed and synthesized
2663 * during the next call to cs_etm__process_traceid_queue() for
2664 * this queue/traceID.
2665 */
2666 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2667 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2668 }
2669
2670out:
2671 return ret;
2672}
2673
2674static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2675 union perf_event *event)
2676{
2677 struct thread *th;
2678
2679 if (etm->timeless_decoding)
2680 return 0;
2681
2682 /*
2683 * Add the tid/pid to the log so that we can get a match when we get a
2684 * contextID from the decoder. Only track for the host: only kernel
2685 * trace is supported for guests which wouldn't need pids so this should
2686 * be fine.
2687 */
2688 th = machine__findnew_thread(&etm->session->machines.host,
2689 event->itrace_start.pid,
2690 event->itrace_start.tid);
2691 if (!th)
2692 return -ENOMEM;
2693
2694 thread__put(th);
2695
2696 return 0;
2697}
2698
2699static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2700 union perf_event *event)
2701{
2702 struct thread *th;
2703 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2704
2705 /*
2706 * Context switch in per-thread mode are irrelevant since perf
2707 * will start/stop tracing as the process is scheduled.
2708 */
2709 if (etm->timeless_decoding)
2710 return 0;
2711
2712 /*
2713 * SWITCH_IN events carry the next process to be switched out while
2714 * SWITCH_OUT events carry the process to be switched in. As such
2715 * we don't care about IN events.
2716 */
2717 if (!out)
2718 return 0;
2719
2720 /*
2721 * Add the tid/pid to the log so that we can get a match when we get a
2722 * contextID from the decoder. Only track for the host: only kernel
2723 * trace is supported for guests which wouldn't need pids so this should
2724 * be fine.
2725 */
2726 th = machine__findnew_thread(&etm->session->machines.host,
2727 event->context_switch.next_prev_pid,
2728 event->context_switch.next_prev_tid);
2729 if (!th)
2730 return -ENOMEM;
2731
2732 thread__put(th);
2733
2734 return 0;
2735}
2736
2737static int cs_etm__process_event(struct perf_session *session,
2738 union perf_event *event,
2739 struct perf_sample *sample,
2740 struct perf_tool *tool)
2741{
2742 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2743 struct cs_etm_auxtrace,
2744 auxtrace);
2745
2746 if (dump_trace)
2747 return 0;
2748
2749 if (!tool->ordered_events) {
2750 pr_err("CoreSight ETM Trace requires ordered events\n");
2751 return -EINVAL;
2752 }
2753
2754 switch (event->header.type) {
2755 case PERF_RECORD_EXIT:
2756 /*
2757 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2758 * start the decode because we know there will be no more trace from
2759 * this thread. All this does is emit samples earlier than waiting for
2760 * the flush in other modes, but with timestamps it makes sense to wait
2761 * for flush so that events from different threads are interleaved
2762 * properly.
2763 */
2764 if (etm->per_thread_decoding && etm->timeless_decoding)
2765 return cs_etm__process_timeless_queues(etm,
2766 event->fork.tid);
2767 break;
2768
2769 case PERF_RECORD_ITRACE_START:
2770 return cs_etm__process_itrace_start(etm, event);
2771
2772 case PERF_RECORD_SWITCH_CPU_WIDE:
2773 return cs_etm__process_switch_cpu_wide(etm, event);
2774
2775 case PERF_RECORD_AUX:
2776 /*
2777 * Record the latest kernel timestamp available in the header
2778 * for samples so that synthesised samples occur from this point
2779 * onwards.
2780 */
2781 if (sample->time && (sample->time != (u64)-1))
2782 etm->latest_kernel_timestamp = sample->time;
2783 break;
2784
2785 default:
2786 break;
2787 }
2788
2789 return 0;
2790}
2791
2792static void dump_queued_data(struct cs_etm_auxtrace *etm,
2793 struct perf_record_auxtrace *event)
2794{
2795 struct auxtrace_buffer *buf;
2796 unsigned int i;
2797 /*
2798 * Find all buffers with same reference in the queues and dump them.
2799 * This is because the queues can contain multiple entries of the same
2800 * buffer that were split on aux records.
2801 */
2802 for (i = 0; i < etm->queues.nr_queues; ++i)
2803 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2804 if (buf->reference == event->reference)
2805 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2806}
2807
2808static int cs_etm__process_auxtrace_event(struct perf_session *session,
2809 union perf_event *event,
2810 struct perf_tool *tool __maybe_unused)
2811{
2812 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2813 struct cs_etm_auxtrace,
2814 auxtrace);
2815 if (!etm->data_queued) {
2816 struct auxtrace_buffer *buffer;
2817 off_t data_offset;
2818 int fd = perf_data__fd(session->data);
2819 bool is_pipe = perf_data__is_pipe(session->data);
2820 int err;
2821 int idx = event->auxtrace.idx;
2822
2823 if (is_pipe)
2824 data_offset = 0;
2825 else {
2826 data_offset = lseek(fd, 0, SEEK_CUR);
2827 if (data_offset == -1)
2828 return -errno;
2829 }
2830
2831 err = auxtrace_queues__add_event(&etm->queues, session,
2832 event, data_offset, &buffer);
2833 if (err)
2834 return err;
2835
2836 /*
2837 * Knowing if the trace is formatted or not requires a lookup of
2838 * the aux record so only works in non-piped mode where data is
2839 * queued in cs_etm__queue_aux_records(). Always assume
2840 * formatted in piped mode (true).
2841 */
2842 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
2843 idx, true, -1);
2844 if (err)
2845 return err;
2846
2847 if (dump_trace)
2848 if (auxtrace_buffer__get_data(buffer, fd)) {
2849 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2850 auxtrace_buffer__put_data(buffer);
2851 }
2852 } else if (dump_trace)
2853 dump_queued_data(etm, &event->auxtrace);
2854
2855 return 0;
2856}
2857
2858static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2859{
2860 struct evsel *evsel;
2861 struct evlist *evlist = etm->session->evlist;
2862
2863 /* Override timeless mode with user input from --itrace=Z */
2864 if (etm->synth_opts.timeless_decoding) {
2865 etm->timeless_decoding = true;
2866 return 0;
2867 }
2868
2869 /*
2870 * Find the cs_etm evsel and look at what its timestamp setting was
2871 */
2872 evlist__for_each_entry(evlist, evsel)
2873 if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2874 etm->timeless_decoding =
2875 !(evsel->core.attr.config & BIT(ETM_OPT_TS));
2876 return 0;
2877 }
2878
2879 pr_err("CS ETM: Couldn't find ETM evsel\n");
2880 return -EINVAL;
2881}
2882
2883/*
2884 * Read a single cpu parameter block from the auxtrace_info priv block.
2885 *
2886 * For version 1 there is a per cpu nr_params entry. If we are handling
2887 * version 1 file, then there may be less, the same, or more params
2888 * indicated by this value than the compile time number we understand.
2889 *
2890 * For a version 0 info block, there are a fixed number, and we need to
2891 * fill out the nr_param value in the metadata we create.
2892 */
2893static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2894 int out_blk_size, int nr_params_v0)
2895{
2896 u64 *metadata = NULL;
2897 int hdr_version;
2898 int nr_in_params, nr_out_params, nr_cmn_params;
2899 int i, k;
2900
2901 metadata = zalloc(sizeof(*metadata) * out_blk_size);
2902 if (!metadata)
2903 return NULL;
2904
2905 /* read block current index & version */
2906 i = *buff_in_offset;
2907 hdr_version = buff_in[CS_HEADER_VERSION];
2908
2909 if (!hdr_version) {
2910 /* read version 0 info block into a version 1 metadata block */
2911 nr_in_params = nr_params_v0;
2912 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2913 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2914 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2915 /* remaining block params at offset +1 from source */
2916 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2917 metadata[k + 1] = buff_in[i + k];
2918 /* version 0 has 2 common params */
2919 nr_cmn_params = 2;
2920 } else {
2921 /* read version 1 info block - input and output nr_params may differ */
2922 /* version 1 has 3 common params */
2923 nr_cmn_params = 3;
2924 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2925
2926 /* if input has more params than output - skip excess */
2927 nr_out_params = nr_in_params + nr_cmn_params;
2928 if (nr_out_params > out_blk_size)
2929 nr_out_params = out_blk_size;
2930
2931 for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2932 metadata[k] = buff_in[i + k];
2933
2934 /* record the actual nr params we copied */
2935 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2936 }
2937
2938 /* adjust in offset by number of in params used */
2939 i += nr_in_params + nr_cmn_params;
2940 *buff_in_offset = i;
2941 return metadata;
2942}
2943
2944/**
2945 * Puts a fragment of an auxtrace buffer into the auxtrace queues based
2946 * on the bounds of aux_event, if it matches with the buffer that's at
2947 * file_offset.
2948 *
2949 * Normally, whole auxtrace buffers would be added to the queue. But we
2950 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
2951 * is reset across each buffer, so splitting the buffers up in advance has
2952 * the same effect.
2953 */
2954static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
2955 struct perf_record_aux *aux_event, struct perf_sample *sample)
2956{
2957 int err;
2958 char buf[PERF_SAMPLE_MAX_SIZE];
2959 union perf_event *auxtrace_event_union;
2960 struct perf_record_auxtrace *auxtrace_event;
2961 union perf_event auxtrace_fragment;
2962 __u64 aux_offset, aux_size;
2963 __u32 idx;
2964 bool formatted;
2965
2966 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2967 struct cs_etm_auxtrace,
2968 auxtrace);
2969
2970 /*
2971 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
2972 * from looping through the auxtrace index.
2973 */
2974 err = perf_session__peek_event(session, file_offset, buf,
2975 PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
2976 if (err)
2977 return err;
2978 auxtrace_event = &auxtrace_event_union->auxtrace;
2979 if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
2980 return -EINVAL;
2981
2982 if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
2983 auxtrace_event->header.size != sz) {
2984 return -EINVAL;
2985 }
2986
2987 /*
2988 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
2989 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
2990 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
2991 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
2992 * Return 'not found' if mismatch.
2993 */
2994 if (auxtrace_event->cpu == (__u32) -1) {
2995 etm->per_thread_decoding = true;
2996 if (auxtrace_event->tid != sample->tid)
2997 return 1;
2998 } else if (auxtrace_event->cpu != sample->cpu) {
2999 if (etm->per_thread_decoding) {
3000 /*
3001 * Found a per-cpu buffer after a per-thread one was
3002 * already found
3003 */
3004 pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3005 return -EINVAL;
3006 }
3007 return 1;
3008 }
3009
3010 if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3011 /*
3012 * Clamp size in snapshot mode. The buffer size is clamped in
3013 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3014 * the buffer size.
3015 */
3016 aux_size = min(aux_event->aux_size, auxtrace_event->size);
3017
3018 /*
3019 * In this mode, the head also points to the end of the buffer so aux_offset
3020 * needs to have the size subtracted so it points to the beginning as in normal mode
3021 */
3022 aux_offset = aux_event->aux_offset - aux_size;
3023 } else {
3024 aux_size = aux_event->aux_size;
3025 aux_offset = aux_event->aux_offset;
3026 }
3027
3028 if (aux_offset >= auxtrace_event->offset &&
3029 aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3030 /*
3031 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3032 * based on the sizes of the aux event, and queue that fragment.
3033 */
3034 auxtrace_fragment.auxtrace = *auxtrace_event;
3035 auxtrace_fragment.auxtrace.size = aux_size;
3036 auxtrace_fragment.auxtrace.offset = aux_offset;
3037 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3038
3039 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3040 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3041 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3042 file_offset, NULL);
3043 if (err)
3044 return err;
3045
3046 idx = auxtrace_event->idx;
3047 formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
3048 return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
3049 idx, formatted, sample->cpu);
3050 }
3051
3052 /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3053 return 1;
3054}
3055
3056static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3057 u64 offset __maybe_unused, void *data __maybe_unused)
3058{
3059 /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3060 if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3061 (*(int *)data)++; /* increment found count */
3062 return cs_etm__process_aux_output_hw_id(session, event);
3063 }
3064 return 0;
3065}
3066
3067static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3068 u64 offset __maybe_unused, void *data __maybe_unused)
3069{
3070 struct perf_sample sample;
3071 int ret;
3072 struct auxtrace_index_entry *ent;
3073 struct auxtrace_index *auxtrace_index;
3074 struct evsel *evsel;
3075 size_t i;
3076
3077 /* Don't care about any other events, we're only queuing buffers for AUX events */
3078 if (event->header.type != PERF_RECORD_AUX)
3079 return 0;
3080
3081 if (event->header.size < sizeof(struct perf_record_aux))
3082 return -EINVAL;
3083
3084 /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3085 if (!event->aux.aux_size)
3086 return 0;
3087
3088 /*
3089 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3090 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3091 */
3092 evsel = evlist__event2evsel(session->evlist, event);
3093 if (!evsel)
3094 return -EINVAL;
3095 ret = evsel__parse_sample(evsel, event, &sample);
3096 if (ret)
3097 return ret;
3098
3099 /*
3100 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3101 */
3102 list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3103 for (i = 0; i < auxtrace_index->nr; i++) {
3104 ent = &auxtrace_index->entries[i];
3105 ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3106 ent->sz, &event->aux, &sample);
3107 /*
3108 * Stop search on error or successful values. Continue search on
3109 * 1 ('not found')
3110 */
3111 if (ret != 1)
3112 return ret;
3113 }
3114 }
3115
3116 /*
3117 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3118 * don't exit with an error because it will still be possible to decode other aux records.
3119 */
3120 pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3121 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3122 return 0;
3123}
3124
3125static int cs_etm__queue_aux_records(struct perf_session *session)
3126{
3127 struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3128 struct auxtrace_index, list);
3129 if (index && index->nr > 0)
3130 return perf_session__peek_events(session, session->header.data_offset,
3131 session->header.data_size,
3132 cs_etm__queue_aux_records_cb, NULL);
3133
3134 /*
3135 * We would get here if there are no entries in the index (either no auxtrace
3136 * buffers or no index at all). Fail silently as there is the possibility of
3137 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3138 * false.
3139 *
3140 * In that scenario, buffers will not be split by AUX records.
3141 */
3142 return 0;
3143}
3144
3145#define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3146 (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3147
3148/*
3149 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3150 * timestamps).
3151 */
3152static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3153{
3154 int j;
3155
3156 for (j = 0; j < num_cpu; j++) {
3157 switch (metadata[j][CS_ETM_MAGIC]) {
3158 case __perf_cs_etmv4_magic:
3159 if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3160 return false;
3161 break;
3162 case __perf_cs_ete_magic:
3163 if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3164 return false;
3165 break;
3166 default:
3167 /* Unknown / unsupported magic number. */
3168 return false;
3169 }
3170 }
3171 return true;
3172}
3173
3174/* map trace ids to correct metadata block, from information in metadata */
3175static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
3176{
3177 u64 cs_etm_magic;
3178 u8 trace_chan_id;
3179 int i, err;
3180
3181 for (i = 0; i < num_cpu; i++) {
3182 cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3183 switch (cs_etm_magic) {
3184 case __perf_cs_etmv3_magic:
3185 metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3186 trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3187 break;
3188 case __perf_cs_etmv4_magic:
3189 case __perf_cs_ete_magic:
3190 metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3191 trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3192 break;
3193 default:
3194 /* unknown magic number */
3195 return -EINVAL;
3196 }
3197 err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
3198 if (err)
3199 return err;
3200 }
3201 return 0;
3202}
3203
3204/*
3205 * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
3206 * unused value to reduce the number of unneeded decoders created.
3207 */
3208static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
3209{
3210 u64 cs_etm_magic;
3211 int i;
3212
3213 for (i = 0; i < num_cpu; i++) {
3214 cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3215 switch (cs_etm_magic) {
3216 case __perf_cs_etmv3_magic:
3217 if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3218 metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3219 break;
3220 case __perf_cs_etmv4_magic:
3221 case __perf_cs_ete_magic:
3222 if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3223 metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3224 break;
3225 default:
3226 /* unknown magic number */
3227 return -EINVAL;
3228 }
3229 }
3230 return 0;
3231}
3232
3233int cs_etm__process_auxtrace_info_full(union perf_event *event,
3234 struct perf_session *session)
3235{
3236 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3237 struct cs_etm_auxtrace *etm = NULL;
3238 struct perf_record_time_conv *tc = &session->time_conv;
3239 int event_header_size = sizeof(struct perf_event_header);
3240 int total_size = auxtrace_info->header.size;
3241 int priv_size = 0;
3242 int num_cpu;
3243 int err = 0;
3244 int aux_hw_id_found;
3245 int i, j;
3246 u64 *ptr = NULL;
3247 u64 **metadata = NULL;
3248
3249 /*
3250 * Create an RB tree for traceID-metadata tuple. Since the conversion
3251 * has to be made for each packet that gets decoded, optimizing access
3252 * in anything other than a sequential array is worth doing.
3253 */
3254 traceid_list = intlist__new(NULL);
3255 if (!traceid_list)
3256 return -ENOMEM;
3257
3258 /* First the global part */
3259 ptr = (u64 *) auxtrace_info->priv;
3260 num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3261 metadata = zalloc(sizeof(*metadata) * num_cpu);
3262 if (!metadata) {
3263 err = -ENOMEM;
3264 goto err_free_traceid_list;
3265 }
3266
3267 /* Start parsing after the common part of the header */
3268 i = CS_HEADER_VERSION_MAX;
3269
3270 /*
3271 * The metadata is stored in the auxtrace_info section and encodes
3272 * the configuration of the ARM embedded trace macrocell which is
3273 * required by the trace decoder to properly decode the trace due
3274 * to its highly compressed nature.
3275 */
3276 for (j = 0; j < num_cpu; j++) {
3277 if (ptr[i] == __perf_cs_etmv3_magic) {
3278 metadata[j] =
3279 cs_etm__create_meta_blk(ptr, &i,
3280 CS_ETM_PRIV_MAX,
3281 CS_ETM_NR_TRC_PARAMS_V0);
3282 } else if (ptr[i] == __perf_cs_etmv4_magic) {
3283 metadata[j] =
3284 cs_etm__create_meta_blk(ptr, &i,
3285 CS_ETMV4_PRIV_MAX,
3286 CS_ETMV4_NR_TRC_PARAMS_V0);
3287 } else if (ptr[i] == __perf_cs_ete_magic) {
3288 metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3289 } else {
3290 ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3291 ptr[i]);
3292 err = -EINVAL;
3293 goto err_free_metadata;
3294 }
3295
3296 if (!metadata[j]) {
3297 err = -ENOMEM;
3298 goto err_free_metadata;
3299 }
3300 }
3301
3302 /*
3303 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3304 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3305 * global metadata, and each cpu's metadata respectively.
3306 * The following tests if the correct number of double words was
3307 * present in the auxtrace info section.
3308 */
3309 priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3310 if (i * 8 != priv_size) {
3311 err = -EINVAL;
3312 goto err_free_metadata;
3313 }
3314
3315 etm = zalloc(sizeof(*etm));
3316
3317 if (!etm) {
3318 err = -ENOMEM;
3319 goto err_free_metadata;
3320 }
3321
3322 /*
3323 * As all the ETMs run at the same exception level, the system should
3324 * have the same PID format crossing CPUs. So cache the PID format
3325 * and reuse it for sequential decoding.
3326 */
3327 etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3328
3329 err = auxtrace_queues__init(&etm->queues);
3330 if (err)
3331 goto err_free_etm;
3332
3333 if (session->itrace_synth_opts->set) {
3334 etm->synth_opts = *session->itrace_synth_opts;
3335 } else {
3336 itrace_synth_opts__set_default(&etm->synth_opts,
3337 session->itrace_synth_opts->default_no_sample);
3338 etm->synth_opts.callchain = false;
3339 }
3340
3341 etm->session = session;
3342
3343 etm->num_cpu = num_cpu;
3344 etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3345 etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3346 etm->metadata = metadata;
3347 etm->auxtrace_type = auxtrace_info->type;
3348
3349 if (etm->synth_opts.use_timestamp)
3350 /*
3351 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3352 * therefore the decoder cannot know if the timestamp trace is
3353 * same with the kernel time.
3354 *
3355 * If a user has knowledge for the working platform and can
3356 * specify itrace option 'T' to tell decoder to forcely use the
3357 * traced timestamp as the kernel time.
3358 */
3359 etm->has_virtual_ts = true;
3360 else
3361 /* Use virtual timestamps if all ETMs report ts_source = 1 */
3362 etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3363
3364 if (!etm->has_virtual_ts)
3365 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3366 "The time field of the samples will not be set accurately.\n"
3367 "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3368 "you can specify the itrace option 'T' for timestamp decoding\n"
3369 "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3370
3371 etm->auxtrace.process_event = cs_etm__process_event;
3372 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3373 etm->auxtrace.flush_events = cs_etm__flush_events;
3374 etm->auxtrace.free_events = cs_etm__free_events;
3375 etm->auxtrace.free = cs_etm__free;
3376 etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3377 session->auxtrace = &etm->auxtrace;
3378
3379 err = cs_etm__setup_timeless_decoding(etm);
3380 if (err)
3381 return err;
3382
3383 etm->tc.time_shift = tc->time_shift;
3384 etm->tc.time_mult = tc->time_mult;
3385 etm->tc.time_zero = tc->time_zero;
3386 if (event_contains(*tc, time_cycles)) {
3387 etm->tc.time_cycles = tc->time_cycles;
3388 etm->tc.time_mask = tc->time_mask;
3389 etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3390 etm->tc.cap_user_time_short = tc->cap_user_time_short;
3391 }
3392 err = cs_etm__synth_events(etm, session);
3393 if (err)
3394 goto err_free_queues;
3395
3396 /*
3397 * Map Trace ID values to CPU metadata.
3398 *
3399 * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
3400 * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
3401 * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
3402 *
3403 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3404 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3405 * in which case a different value will be used. This means an older perf may still
3406 * be able to record and read files generate on a newer system.
3407 *
3408 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3409 * those packets. If they are there then the values will be mapped and plugged into
3410 * the metadata. We then set any remaining metadata values with the used flag to a
3411 * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
3412 *
3413 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3414 * then we map Trace ID values to CPU directly from the metadata - clearing any unused
3415 * flags if present.
3416 */
3417
3418 /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3419 aux_hw_id_found = 0;
3420 err = perf_session__peek_events(session, session->header.data_offset,
3421 session->header.data_size,
3422 cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3423 if (err)
3424 goto err_free_queues;
3425
3426 /* if HW ID found then clear any unused metadata ID values */
3427 if (aux_hw_id_found)
3428 err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
3429 /* otherwise, this is a file with metadata values only, map from metadata */
3430 else
3431 err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
3432
3433 if (err)
3434 goto err_free_queues;
3435
3436 err = cs_etm__queue_aux_records(session);
3437 if (err)
3438 goto err_free_queues;
3439
3440 etm->data_queued = etm->queues.populated;
3441 return 0;
3442
3443err_free_queues:
3444 auxtrace_queues__free(&etm->queues);
3445 session->auxtrace = NULL;
3446err_free_etm:
3447 zfree(&etm);
3448err_free_metadata:
3449 /* No need to check @metadata[j], free(NULL) is supported */
3450 for (j = 0; j < num_cpu; j++)
3451 zfree(&metadata[j]);
3452 zfree(&metadata);
3453err_free_traceid_list:
3454 intlist__delete(traceid_list);
3455 return err;
3456}
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright(C) 2015-2018 Linaro Limited.
4 *
5 * Author: Tor Jeremiassen <tor@ti.com>
6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7 */
8
9#include <linux/bitops.h>
10#include <linux/coresight-pmu.h>
11#include <linux/err.h>
12#include <linux/kernel.h>
13#include <linux/log2.h>
14#include <linux/types.h>
15#include <linux/zalloc.h>
16
17#include <opencsd/ocsd_if_types.h>
18#include <stdlib.h>
19
20#include "auxtrace.h"
21#include "color.h"
22#include "cs-etm.h"
23#include "cs-etm-decoder/cs-etm-decoder.h"
24#include "debug.h"
25#include "dso.h"
26#include "evlist.h"
27#include "intlist.h"
28#include "machine.h"
29#include "map.h"
30#include "perf.h"
31#include "session.h"
32#include "map_symbol.h"
33#include "branch.h"
34#include "symbol.h"
35#include "tool.h"
36#include "thread.h"
37#include "thread-stack.h"
38#include <tools/libc_compat.h>
39#include "util/synthetic-events.h"
40
41struct cs_etm_auxtrace {
42 struct auxtrace auxtrace;
43 struct auxtrace_queues queues;
44 struct auxtrace_heap heap;
45 struct itrace_synth_opts synth_opts;
46 struct perf_session *session;
47 struct machine *machine;
48 struct thread *unknown_thread;
49
50 u8 timeless_decoding;
51 u8 snapshot_mode;
52 u8 data_queued;
53
54 int num_cpu;
55 u64 latest_kernel_timestamp;
56 u32 auxtrace_type;
57 u64 branches_sample_type;
58 u64 branches_id;
59 u64 instructions_sample_type;
60 u64 instructions_sample_period;
61 u64 instructions_id;
62 u64 **metadata;
63 unsigned int pmu_type;
64};
65
66struct cs_etm_traceid_queue {
67 u8 trace_chan_id;
68 pid_t pid, tid;
69 u64 period_instructions;
70 size_t last_branch_pos;
71 union perf_event *event_buf;
72 struct thread *thread;
73 struct branch_stack *last_branch;
74 struct branch_stack *last_branch_rb;
75 struct cs_etm_packet *prev_packet;
76 struct cs_etm_packet *packet;
77 struct cs_etm_packet_queue packet_queue;
78};
79
80struct cs_etm_queue {
81 struct cs_etm_auxtrace *etm;
82 struct cs_etm_decoder *decoder;
83 struct auxtrace_buffer *buffer;
84 unsigned int queue_nr;
85 u8 pending_timestamp_chan_id;
86 u64 offset;
87 const unsigned char *buf;
88 size_t buf_len, buf_used;
89 /* Conversion between traceID and index in traceid_queues array */
90 struct intlist *traceid_queues_list;
91 struct cs_etm_traceid_queue **traceid_queues;
92};
93
94/* RB tree for quick conversion between traceID and metadata pointers */
95static struct intlist *traceid_list;
96
97static int cs_etm__process_queues(struct cs_etm_auxtrace *etm);
98static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
99 pid_t tid);
100static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
101static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
102
103/* PTMs ETMIDR [11:8] set to b0011 */
104#define ETMIDR_PTM_VERSION 0x00000300
105
106/*
107 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
108 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply
109 * encode the etm queue number as the upper 16 bit and the channel as
110 * the lower 16 bit.
111 */
112#define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \
113 (queue_nr << 16 | trace_chan_id)
114#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
115#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
116
117static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
118{
119 etmidr &= ETMIDR_PTM_VERSION;
120
121 if (etmidr == ETMIDR_PTM_VERSION)
122 return CS_ETM_PROTO_PTM;
123
124 return CS_ETM_PROTO_ETMV3;
125}
126
127static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
128{
129 struct int_node *inode;
130 u64 *metadata;
131
132 inode = intlist__find(traceid_list, trace_chan_id);
133 if (!inode)
134 return -EINVAL;
135
136 metadata = inode->priv;
137 *magic = metadata[CS_ETM_MAGIC];
138 return 0;
139}
140
141int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
142{
143 struct int_node *inode;
144 u64 *metadata;
145
146 inode = intlist__find(traceid_list, trace_chan_id);
147 if (!inode)
148 return -EINVAL;
149
150 metadata = inode->priv;
151 *cpu = (int)metadata[CS_ETM_CPU];
152 return 0;
153}
154
155/*
156 * The returned PID format is presented by two bits:
157 *
158 * Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced;
159 * Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced.
160 *
161 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
162 * are enabled at the same time when the session runs on an EL2 kernel.
163 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
164 * recorded in the trace data, the tool will selectively use
165 * CONTEXTIDR_EL2 as PID.
166 */
167int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt)
168{
169 struct int_node *inode;
170 u64 *metadata, val;
171
172 inode = intlist__find(traceid_list, trace_chan_id);
173 if (!inode)
174 return -EINVAL;
175
176 metadata = inode->priv;
177
178 if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
179 val = metadata[CS_ETM_ETMCR];
180 /* CONTEXTIDR is traced */
181 if (val & BIT(ETM_OPT_CTXTID))
182 *pid_fmt = BIT(ETM_OPT_CTXTID);
183 } else {
184 val = metadata[CS_ETMV4_TRCCONFIGR];
185 /* CONTEXTIDR_EL2 is traced */
186 if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
187 *pid_fmt = BIT(ETM_OPT_CTXTID2);
188 /* CONTEXTIDR_EL1 is traced */
189 else if (val & BIT(ETM4_CFG_BIT_CTXTID))
190 *pid_fmt = BIT(ETM_OPT_CTXTID);
191 }
192
193 return 0;
194}
195
196void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
197 u8 trace_chan_id)
198{
199 /*
200 * When a timestamp packet is encountered the backend code
201 * is stopped so that the front end has time to process packets
202 * that were accumulated in the traceID queue. Since there can
203 * be more than one channel per cs_etm_queue, we need to specify
204 * what traceID queue needs servicing.
205 */
206 etmq->pending_timestamp_chan_id = trace_chan_id;
207}
208
209static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
210 u8 *trace_chan_id)
211{
212 struct cs_etm_packet_queue *packet_queue;
213
214 if (!etmq->pending_timestamp_chan_id)
215 return 0;
216
217 if (trace_chan_id)
218 *trace_chan_id = etmq->pending_timestamp_chan_id;
219
220 packet_queue = cs_etm__etmq_get_packet_queue(etmq,
221 etmq->pending_timestamp_chan_id);
222 if (!packet_queue)
223 return 0;
224
225 /* Acknowledge pending status */
226 etmq->pending_timestamp_chan_id = 0;
227
228 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
229 return packet_queue->cs_timestamp;
230}
231
232static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
233{
234 int i;
235
236 queue->head = 0;
237 queue->tail = 0;
238 queue->packet_count = 0;
239 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
240 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
241 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
242 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
243 queue->packet_buffer[i].instr_count = 0;
244 queue->packet_buffer[i].last_instr_taken_branch = false;
245 queue->packet_buffer[i].last_instr_size = 0;
246 queue->packet_buffer[i].last_instr_type = 0;
247 queue->packet_buffer[i].last_instr_subtype = 0;
248 queue->packet_buffer[i].last_instr_cond = 0;
249 queue->packet_buffer[i].flags = 0;
250 queue->packet_buffer[i].exception_number = UINT32_MAX;
251 queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
252 queue->packet_buffer[i].cpu = INT_MIN;
253 }
254}
255
256static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
257{
258 int idx;
259 struct int_node *inode;
260 struct cs_etm_traceid_queue *tidq;
261 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
262
263 intlist__for_each_entry(inode, traceid_queues_list) {
264 idx = (int)(intptr_t)inode->priv;
265 tidq = etmq->traceid_queues[idx];
266 cs_etm__clear_packet_queue(&tidq->packet_queue);
267 }
268}
269
270static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
271 struct cs_etm_traceid_queue *tidq,
272 u8 trace_chan_id)
273{
274 int rc = -ENOMEM;
275 struct auxtrace_queue *queue;
276 struct cs_etm_auxtrace *etm = etmq->etm;
277
278 cs_etm__clear_packet_queue(&tidq->packet_queue);
279
280 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
281 tidq->tid = queue->tid;
282 tidq->pid = -1;
283 tidq->trace_chan_id = trace_chan_id;
284
285 tidq->packet = zalloc(sizeof(struct cs_etm_packet));
286 if (!tidq->packet)
287 goto out;
288
289 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
290 if (!tidq->prev_packet)
291 goto out_free;
292
293 if (etm->synth_opts.last_branch) {
294 size_t sz = sizeof(struct branch_stack);
295
296 sz += etm->synth_opts.last_branch_sz *
297 sizeof(struct branch_entry);
298 tidq->last_branch = zalloc(sz);
299 if (!tidq->last_branch)
300 goto out_free;
301 tidq->last_branch_rb = zalloc(sz);
302 if (!tidq->last_branch_rb)
303 goto out_free;
304 }
305
306 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
307 if (!tidq->event_buf)
308 goto out_free;
309
310 return 0;
311
312out_free:
313 zfree(&tidq->last_branch_rb);
314 zfree(&tidq->last_branch);
315 zfree(&tidq->prev_packet);
316 zfree(&tidq->packet);
317out:
318 return rc;
319}
320
321static struct cs_etm_traceid_queue
322*cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
323{
324 int idx;
325 struct int_node *inode;
326 struct intlist *traceid_queues_list;
327 struct cs_etm_traceid_queue *tidq, **traceid_queues;
328 struct cs_etm_auxtrace *etm = etmq->etm;
329
330 if (etm->timeless_decoding)
331 trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
332
333 traceid_queues_list = etmq->traceid_queues_list;
334
335 /*
336 * Check if the traceid_queue exist for this traceID by looking
337 * in the queue list.
338 */
339 inode = intlist__find(traceid_queues_list, trace_chan_id);
340 if (inode) {
341 idx = (int)(intptr_t)inode->priv;
342 return etmq->traceid_queues[idx];
343 }
344
345 /* We couldn't find a traceid_queue for this traceID, allocate one */
346 tidq = malloc(sizeof(*tidq));
347 if (!tidq)
348 return NULL;
349
350 memset(tidq, 0, sizeof(*tidq));
351
352 /* Get a valid index for the new traceid_queue */
353 idx = intlist__nr_entries(traceid_queues_list);
354 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
355 inode = intlist__findnew(traceid_queues_list, trace_chan_id);
356 if (!inode)
357 goto out_free;
358
359 /* Associate this traceID with this index */
360 inode->priv = (void *)(intptr_t)idx;
361
362 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
363 goto out_free;
364
365 /* Grow the traceid_queues array by one unit */
366 traceid_queues = etmq->traceid_queues;
367 traceid_queues = reallocarray(traceid_queues,
368 idx + 1,
369 sizeof(*traceid_queues));
370
371 /*
372 * On failure reallocarray() returns NULL and the original block of
373 * memory is left untouched.
374 */
375 if (!traceid_queues)
376 goto out_free;
377
378 traceid_queues[idx] = tidq;
379 etmq->traceid_queues = traceid_queues;
380
381 return etmq->traceid_queues[idx];
382
383out_free:
384 /*
385 * Function intlist__remove() removes the inode from the list
386 * and delete the memory associated to it.
387 */
388 intlist__remove(traceid_queues_list, inode);
389 free(tidq);
390
391 return NULL;
392}
393
394struct cs_etm_packet_queue
395*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
396{
397 struct cs_etm_traceid_queue *tidq;
398
399 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
400 if (tidq)
401 return &tidq->packet_queue;
402
403 return NULL;
404}
405
406static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
407 struct cs_etm_traceid_queue *tidq)
408{
409 struct cs_etm_packet *tmp;
410
411 if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
412 etm->synth_opts.instructions) {
413 /*
414 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
415 * the next incoming packet.
416 */
417 tmp = tidq->packet;
418 tidq->packet = tidq->prev_packet;
419 tidq->prev_packet = tmp;
420 }
421}
422
423static void cs_etm__packet_dump(const char *pkt_string)
424{
425 const char *color = PERF_COLOR_BLUE;
426 int len = strlen(pkt_string);
427
428 if (len && (pkt_string[len-1] == '\n'))
429 color_fprintf(stdout, color, " %s", pkt_string);
430 else
431 color_fprintf(stdout, color, " %s\n", pkt_string);
432
433 fflush(stdout);
434}
435
436static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
437 struct cs_etm_auxtrace *etm, int idx,
438 u32 etmidr)
439{
440 u64 **metadata = etm->metadata;
441
442 t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
443 t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR];
444 t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR];
445}
446
447static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
448 struct cs_etm_auxtrace *etm, int idx)
449{
450 u64 **metadata = etm->metadata;
451
452 t_params[idx].protocol = CS_ETM_PROTO_ETMV4i;
453 t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
454 t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
455 t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
456 t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
457 t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
458 t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
459}
460
461static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
462 struct cs_etm_auxtrace *etm, int idx)
463{
464 u64 **metadata = etm->metadata;
465
466 t_params[idx].protocol = CS_ETM_PROTO_ETE;
467 t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
468 t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
469 t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
470 t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
471 t_params[idx].ete.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
472 t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
473 t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH];
474}
475
476static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
477 struct cs_etm_auxtrace *etm,
478 int decoders)
479{
480 int i;
481 u32 etmidr;
482 u64 architecture;
483
484 for (i = 0; i < decoders; i++) {
485 architecture = etm->metadata[i][CS_ETM_MAGIC];
486
487 switch (architecture) {
488 case __perf_cs_etmv3_magic:
489 etmidr = etm->metadata[i][CS_ETM_ETMIDR];
490 cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr);
491 break;
492 case __perf_cs_etmv4_magic:
493 cs_etm__set_trace_param_etmv4(t_params, etm, i);
494 break;
495 case __perf_cs_ete_magic:
496 cs_etm__set_trace_param_ete(t_params, etm, i);
497 break;
498 default:
499 return -EINVAL;
500 }
501 }
502
503 return 0;
504}
505
506static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
507 struct cs_etm_queue *etmq,
508 enum cs_etm_decoder_operation mode,
509 bool formatted)
510{
511 int ret = -EINVAL;
512
513 if (!(mode < CS_ETM_OPERATION_MAX))
514 goto out;
515
516 d_params->packet_printer = cs_etm__packet_dump;
517 d_params->operation = mode;
518 d_params->data = etmq;
519 d_params->formatted = formatted;
520 d_params->fsyncs = false;
521 d_params->hsyncs = false;
522 d_params->frame_aligned = true;
523
524 ret = 0;
525out:
526 return ret;
527}
528
529static void cs_etm__dump_event(struct cs_etm_queue *etmq,
530 struct auxtrace_buffer *buffer)
531{
532 int ret;
533 const char *color = PERF_COLOR_BLUE;
534 size_t buffer_used = 0;
535
536 fprintf(stdout, "\n");
537 color_fprintf(stdout, color,
538 ". ... CoreSight %s Trace data: size %#zx bytes\n",
539 cs_etm_decoder__get_name(etmq->decoder), buffer->size);
540
541 do {
542 size_t consumed;
543
544 ret = cs_etm_decoder__process_data_block(
545 etmq->decoder, buffer->offset,
546 &((u8 *)buffer->data)[buffer_used],
547 buffer->size - buffer_used, &consumed);
548 if (ret)
549 break;
550
551 buffer_used += consumed;
552 } while (buffer_used < buffer->size);
553
554 cs_etm_decoder__reset(etmq->decoder);
555}
556
557static int cs_etm__flush_events(struct perf_session *session,
558 struct perf_tool *tool)
559{
560 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
561 struct cs_etm_auxtrace,
562 auxtrace);
563 if (dump_trace)
564 return 0;
565
566 if (!tool->ordered_events)
567 return -EINVAL;
568
569 if (etm->timeless_decoding)
570 return cs_etm__process_timeless_queues(etm, -1);
571
572 return cs_etm__process_queues(etm);
573}
574
575static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
576{
577 int idx;
578 uintptr_t priv;
579 struct int_node *inode, *tmp;
580 struct cs_etm_traceid_queue *tidq;
581 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
582
583 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
584 priv = (uintptr_t)inode->priv;
585 idx = priv;
586
587 /* Free this traceid_queue from the array */
588 tidq = etmq->traceid_queues[idx];
589 thread__zput(tidq->thread);
590 zfree(&tidq->event_buf);
591 zfree(&tidq->last_branch);
592 zfree(&tidq->last_branch_rb);
593 zfree(&tidq->prev_packet);
594 zfree(&tidq->packet);
595 zfree(&tidq);
596
597 /*
598 * Function intlist__remove() removes the inode from the list
599 * and delete the memory associated to it.
600 */
601 intlist__remove(traceid_queues_list, inode);
602 }
603
604 /* Then the RB tree itself */
605 intlist__delete(traceid_queues_list);
606 etmq->traceid_queues_list = NULL;
607
608 /* finally free the traceid_queues array */
609 zfree(&etmq->traceid_queues);
610}
611
612static void cs_etm__free_queue(void *priv)
613{
614 struct cs_etm_queue *etmq = priv;
615
616 if (!etmq)
617 return;
618
619 cs_etm_decoder__free(etmq->decoder);
620 cs_etm__free_traceid_queues(etmq);
621 free(etmq);
622}
623
624static void cs_etm__free_events(struct perf_session *session)
625{
626 unsigned int i;
627 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
628 struct cs_etm_auxtrace,
629 auxtrace);
630 struct auxtrace_queues *queues = &aux->queues;
631
632 for (i = 0; i < queues->nr_queues; i++) {
633 cs_etm__free_queue(queues->queue_array[i].priv);
634 queues->queue_array[i].priv = NULL;
635 }
636
637 auxtrace_queues__free(queues);
638}
639
640static void cs_etm__free(struct perf_session *session)
641{
642 int i;
643 struct int_node *inode, *tmp;
644 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
645 struct cs_etm_auxtrace,
646 auxtrace);
647 cs_etm__free_events(session);
648 session->auxtrace = NULL;
649
650 /* First remove all traceID/metadata nodes for the RB tree */
651 intlist__for_each_entry_safe(inode, tmp, traceid_list)
652 intlist__remove(traceid_list, inode);
653 /* Then the RB tree itself */
654 intlist__delete(traceid_list);
655
656 for (i = 0; i < aux->num_cpu; i++)
657 zfree(&aux->metadata[i]);
658
659 thread__zput(aux->unknown_thread);
660 zfree(&aux->metadata);
661 zfree(&aux);
662}
663
664static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
665 struct evsel *evsel)
666{
667 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
668 struct cs_etm_auxtrace,
669 auxtrace);
670
671 return evsel->core.attr.type == aux->pmu_type;
672}
673
674static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
675{
676 struct machine *machine;
677
678 machine = etmq->etm->machine;
679
680 if (address >= machine__kernel_start(machine)) {
681 if (machine__is_host(machine))
682 return PERF_RECORD_MISC_KERNEL;
683 else
684 return PERF_RECORD_MISC_GUEST_KERNEL;
685 } else {
686 if (machine__is_host(machine))
687 return PERF_RECORD_MISC_USER;
688 else if (perf_guest)
689 return PERF_RECORD_MISC_GUEST_USER;
690 else
691 return PERF_RECORD_MISC_HYPERVISOR;
692 }
693}
694
695static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
696 u64 address, size_t size, u8 *buffer)
697{
698 u8 cpumode;
699 u64 offset;
700 int len;
701 struct thread *thread;
702 struct machine *machine;
703 struct addr_location al;
704 struct cs_etm_traceid_queue *tidq;
705
706 if (!etmq)
707 return 0;
708
709 machine = etmq->etm->machine;
710 cpumode = cs_etm__cpu_mode(etmq, address);
711 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
712 if (!tidq)
713 return 0;
714
715 thread = tidq->thread;
716 if (!thread) {
717 if (cpumode != PERF_RECORD_MISC_KERNEL)
718 return 0;
719 thread = etmq->etm->unknown_thread;
720 }
721
722 if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso)
723 return 0;
724
725 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
726 dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE))
727 return 0;
728
729 offset = al.map->map_ip(al.map, address);
730
731 map__load(al.map);
732
733 len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size);
734
735 if (len <= 0) {
736 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
737 " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
738 if (!al.map->dso->auxtrace_warned) {
739 pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
740 address,
741 al.map->dso->long_name ? al.map->dso->long_name : "Unknown");
742 al.map->dso->auxtrace_warned = true;
743 }
744 return 0;
745 }
746
747 return len;
748}
749
750static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
751 bool formatted)
752{
753 struct cs_etm_decoder_params d_params;
754 struct cs_etm_trace_params *t_params = NULL;
755 struct cs_etm_queue *etmq;
756 /*
757 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
758 * needed.
759 */
760 int decoders = formatted ? etm->num_cpu : 1;
761
762 etmq = zalloc(sizeof(*etmq));
763 if (!etmq)
764 return NULL;
765
766 etmq->traceid_queues_list = intlist__new(NULL);
767 if (!etmq->traceid_queues_list)
768 goto out_free;
769
770 /* Use metadata to fill in trace parameters for trace decoder */
771 t_params = zalloc(sizeof(*t_params) * decoders);
772
773 if (!t_params)
774 goto out_free;
775
776 if (cs_etm__init_trace_params(t_params, etm, decoders))
777 goto out_free;
778
779 /* Set decoder parameters to decode trace packets */
780 if (cs_etm__init_decoder_params(&d_params, etmq,
781 dump_trace ? CS_ETM_OPERATION_PRINT :
782 CS_ETM_OPERATION_DECODE,
783 formatted))
784 goto out_free;
785
786 etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
787 t_params);
788
789 if (!etmq->decoder)
790 goto out_free;
791
792 /*
793 * Register a function to handle all memory accesses required by
794 * the trace decoder library.
795 */
796 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
797 0x0L, ((u64) -1L),
798 cs_etm__mem_access))
799 goto out_free_decoder;
800
801 zfree(&t_params);
802 return etmq;
803
804out_free_decoder:
805 cs_etm_decoder__free(etmq->decoder);
806out_free:
807 intlist__delete(etmq->traceid_queues_list);
808 free(etmq);
809
810 return NULL;
811}
812
813static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
814 struct auxtrace_queue *queue,
815 unsigned int queue_nr,
816 bool formatted)
817{
818 struct cs_etm_queue *etmq = queue->priv;
819
820 if (list_empty(&queue->head) || etmq)
821 return 0;
822
823 etmq = cs_etm__alloc_queue(etm, formatted);
824
825 if (!etmq)
826 return -ENOMEM;
827
828 queue->priv = etmq;
829 etmq->etm = etm;
830 etmq->queue_nr = queue_nr;
831 etmq->offset = 0;
832
833 return 0;
834}
835
836static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
837 struct cs_etm_queue *etmq,
838 unsigned int queue_nr)
839{
840 int ret = 0;
841 unsigned int cs_queue_nr;
842 u8 trace_chan_id;
843 u64 cs_timestamp;
844
845 /*
846 * We are under a CPU-wide trace scenario. As such we need to know
847 * when the code that generated the traces started to execute so that
848 * it can be correlated with execution on other CPUs. So we get a
849 * handle on the beginning of traces and decode until we find a
850 * timestamp. The timestamp is then added to the auxtrace min heap
851 * in order to know what nibble (of all the etmqs) to decode first.
852 */
853 while (1) {
854 /*
855 * Fetch an aux_buffer from this etmq. Bail if no more
856 * blocks or an error has been encountered.
857 */
858 ret = cs_etm__get_data_block(etmq);
859 if (ret <= 0)
860 goto out;
861
862 /*
863 * Run decoder on the trace block. The decoder will stop when
864 * encountering a CS timestamp, a full packet queue or the end of
865 * trace for that block.
866 */
867 ret = cs_etm__decode_data_block(etmq);
868 if (ret)
869 goto out;
870
871 /*
872 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
873 * the timestamp calculation for us.
874 */
875 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
876
877 /* We found a timestamp, no need to continue. */
878 if (cs_timestamp)
879 break;
880
881 /*
882 * We didn't find a timestamp so empty all the traceid packet
883 * queues before looking for another timestamp packet, either
884 * in the current data block or a new one. Packets that were
885 * just decoded are useless since no timestamp has been
886 * associated with them. As such simply discard them.
887 */
888 cs_etm__clear_all_packet_queues(etmq);
889 }
890
891 /*
892 * We have a timestamp. Add it to the min heap to reflect when
893 * instructions conveyed by the range packets of this traceID queue
894 * started to execute. Once the same has been done for all the traceID
895 * queues of each etmq, redenring and decoding can start in
896 * chronological order.
897 *
898 * Note that packets decoded above are still in the traceID's packet
899 * queue and will be processed in cs_etm__process_queues().
900 */
901 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
902 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
903out:
904 return ret;
905}
906
907static inline
908void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
909 struct cs_etm_traceid_queue *tidq)
910{
911 struct branch_stack *bs_src = tidq->last_branch_rb;
912 struct branch_stack *bs_dst = tidq->last_branch;
913 size_t nr = 0;
914
915 /*
916 * Set the number of records before early exit: ->nr is used to
917 * determine how many branches to copy from ->entries.
918 */
919 bs_dst->nr = bs_src->nr;
920
921 /*
922 * Early exit when there is nothing to copy.
923 */
924 if (!bs_src->nr)
925 return;
926
927 /*
928 * As bs_src->entries is a circular buffer, we need to copy from it in
929 * two steps. First, copy the branches from the most recently inserted
930 * branch ->last_branch_pos until the end of bs_src->entries buffer.
931 */
932 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
933 memcpy(&bs_dst->entries[0],
934 &bs_src->entries[tidq->last_branch_pos],
935 sizeof(struct branch_entry) * nr);
936
937 /*
938 * If we wrapped around at least once, the branches from the beginning
939 * of the bs_src->entries buffer and until the ->last_branch_pos element
940 * are older valid branches: copy them over. The total number of
941 * branches copied over will be equal to the number of branches asked by
942 * the user in last_branch_sz.
943 */
944 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
945 memcpy(&bs_dst->entries[nr],
946 &bs_src->entries[0],
947 sizeof(struct branch_entry) * tidq->last_branch_pos);
948 }
949}
950
951static inline
952void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
953{
954 tidq->last_branch_pos = 0;
955 tidq->last_branch_rb->nr = 0;
956}
957
958static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
959 u8 trace_chan_id, u64 addr)
960{
961 u8 instrBytes[2];
962
963 cs_etm__mem_access(etmq, trace_chan_id, addr,
964 ARRAY_SIZE(instrBytes), instrBytes);
965 /*
966 * T32 instruction size is indicated by bits[15:11] of the first
967 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
968 * denote a 32-bit instruction.
969 */
970 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
971}
972
973static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
974{
975 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
976 if (packet->sample_type == CS_ETM_DISCONTINUITY)
977 return 0;
978
979 return packet->start_addr;
980}
981
982static inline
983u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
984{
985 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
986 if (packet->sample_type == CS_ETM_DISCONTINUITY)
987 return 0;
988
989 return packet->end_addr - packet->last_instr_size;
990}
991
992static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
993 u64 trace_chan_id,
994 const struct cs_etm_packet *packet,
995 u64 offset)
996{
997 if (packet->isa == CS_ETM_ISA_T32) {
998 u64 addr = packet->start_addr;
999
1000 while (offset) {
1001 addr += cs_etm__t32_instr_size(etmq,
1002 trace_chan_id, addr);
1003 offset--;
1004 }
1005 return addr;
1006 }
1007
1008 /* Assume a 4 byte instruction size (A32/A64) */
1009 return packet->start_addr + offset * 4;
1010}
1011
1012static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1013 struct cs_etm_traceid_queue *tidq)
1014{
1015 struct branch_stack *bs = tidq->last_branch_rb;
1016 struct branch_entry *be;
1017
1018 /*
1019 * The branches are recorded in a circular buffer in reverse
1020 * chronological order: we start recording from the last element of the
1021 * buffer down. After writing the first element of the stack, move the
1022 * insert position back to the end of the buffer.
1023 */
1024 if (!tidq->last_branch_pos)
1025 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1026
1027 tidq->last_branch_pos -= 1;
1028
1029 be = &bs->entries[tidq->last_branch_pos];
1030 be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1031 be->to = cs_etm__first_executed_instr(tidq->packet);
1032 /* No support for mispredict */
1033 be->flags.mispred = 0;
1034 be->flags.predicted = 1;
1035
1036 /*
1037 * Increment bs->nr until reaching the number of last branches asked by
1038 * the user on the command line.
1039 */
1040 if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1041 bs->nr += 1;
1042}
1043
1044static int cs_etm__inject_event(union perf_event *event,
1045 struct perf_sample *sample, u64 type)
1046{
1047 event->header.size = perf_event__sample_event_size(sample, type, 0);
1048 return perf_event__synthesize_sample(event, type, 0, sample);
1049}
1050
1051
1052static int
1053cs_etm__get_trace(struct cs_etm_queue *etmq)
1054{
1055 struct auxtrace_buffer *aux_buffer = etmq->buffer;
1056 struct auxtrace_buffer *old_buffer = aux_buffer;
1057 struct auxtrace_queue *queue;
1058
1059 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1060
1061 aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1062
1063 /* If no more data, drop the previous auxtrace_buffer and return */
1064 if (!aux_buffer) {
1065 if (old_buffer)
1066 auxtrace_buffer__drop_data(old_buffer);
1067 etmq->buf_len = 0;
1068 return 0;
1069 }
1070
1071 etmq->buffer = aux_buffer;
1072
1073 /* If the aux_buffer doesn't have data associated, try to load it */
1074 if (!aux_buffer->data) {
1075 /* get the file desc associated with the perf data file */
1076 int fd = perf_data__fd(etmq->etm->session->data);
1077
1078 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1079 if (!aux_buffer->data)
1080 return -ENOMEM;
1081 }
1082
1083 /* If valid, drop the previous buffer */
1084 if (old_buffer)
1085 auxtrace_buffer__drop_data(old_buffer);
1086
1087 etmq->buf_used = 0;
1088 etmq->buf_len = aux_buffer->size;
1089 etmq->buf = aux_buffer->data;
1090
1091 return etmq->buf_len;
1092}
1093
1094static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
1095 struct cs_etm_traceid_queue *tidq)
1096{
1097 if ((!tidq->thread) && (tidq->tid != -1))
1098 tidq->thread = machine__find_thread(etm->machine, -1,
1099 tidq->tid);
1100
1101 if (tidq->thread)
1102 tidq->pid = tidq->thread->pid_;
1103}
1104
1105int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
1106 pid_t tid, u8 trace_chan_id)
1107{
1108 int cpu, err = -EINVAL;
1109 struct cs_etm_auxtrace *etm = etmq->etm;
1110 struct cs_etm_traceid_queue *tidq;
1111
1112 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1113 if (!tidq)
1114 return err;
1115
1116 if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
1117 return err;
1118
1119 err = machine__set_current_tid(etm->machine, cpu, tid, tid);
1120 if (err)
1121 return err;
1122
1123 tidq->tid = tid;
1124 thread__zput(tidq->thread);
1125
1126 cs_etm__set_pid_tid_cpu(etm, tidq);
1127 return 0;
1128}
1129
1130bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1131{
1132 return !!etmq->etm->timeless_decoding;
1133}
1134
1135static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1136 u64 trace_chan_id,
1137 const struct cs_etm_packet *packet,
1138 struct perf_sample *sample)
1139{
1140 /*
1141 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1142 * packet, so directly bail out with 'insn_len' = 0.
1143 */
1144 if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1145 sample->insn_len = 0;
1146 return;
1147 }
1148
1149 /*
1150 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1151 * cs_etm__t32_instr_size().
1152 */
1153 if (packet->isa == CS_ETM_ISA_T32)
1154 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1155 sample->ip);
1156 /* Otherwise, A64 and A32 instruction size are always 32-bit. */
1157 else
1158 sample->insn_len = 4;
1159
1160 cs_etm__mem_access(etmq, trace_chan_id, sample->ip,
1161 sample->insn_len, (void *)sample->insn);
1162}
1163
1164static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1165 struct cs_etm_traceid_queue *tidq,
1166 u64 addr, u64 period)
1167{
1168 int ret = 0;
1169 struct cs_etm_auxtrace *etm = etmq->etm;
1170 union perf_event *event = tidq->event_buf;
1171 struct perf_sample sample = {.ip = 0,};
1172
1173 event->sample.header.type = PERF_RECORD_SAMPLE;
1174 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
1175 event->sample.header.size = sizeof(struct perf_event_header);
1176
1177 if (!etm->timeless_decoding)
1178 sample.time = etm->latest_kernel_timestamp;
1179 sample.ip = addr;
1180 sample.pid = tidq->pid;
1181 sample.tid = tidq->tid;
1182 sample.id = etmq->etm->instructions_id;
1183 sample.stream_id = etmq->etm->instructions_id;
1184 sample.period = period;
1185 sample.cpu = tidq->packet->cpu;
1186 sample.flags = tidq->prev_packet->flags;
1187 sample.cpumode = event->sample.header.misc;
1188
1189 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1190
1191 if (etm->synth_opts.last_branch)
1192 sample.branch_stack = tidq->last_branch;
1193
1194 if (etm->synth_opts.inject) {
1195 ret = cs_etm__inject_event(event, &sample,
1196 etm->instructions_sample_type);
1197 if (ret)
1198 return ret;
1199 }
1200
1201 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1202
1203 if (ret)
1204 pr_err(
1205 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1206 ret);
1207
1208 return ret;
1209}
1210
1211/*
1212 * The cs etm packet encodes an instruction range between a branch target
1213 * and the next taken branch. Generate sample accordingly.
1214 */
1215static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1216 struct cs_etm_traceid_queue *tidq)
1217{
1218 int ret = 0;
1219 struct cs_etm_auxtrace *etm = etmq->etm;
1220 struct perf_sample sample = {.ip = 0,};
1221 union perf_event *event = tidq->event_buf;
1222 struct dummy_branch_stack {
1223 u64 nr;
1224 u64 hw_idx;
1225 struct branch_entry entries;
1226 } dummy_bs;
1227 u64 ip;
1228
1229 ip = cs_etm__last_executed_instr(tidq->prev_packet);
1230
1231 event->sample.header.type = PERF_RECORD_SAMPLE;
1232 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
1233 event->sample.header.size = sizeof(struct perf_event_header);
1234
1235 if (!etm->timeless_decoding)
1236 sample.time = etm->latest_kernel_timestamp;
1237 sample.ip = ip;
1238 sample.pid = tidq->pid;
1239 sample.tid = tidq->tid;
1240 sample.addr = cs_etm__first_executed_instr(tidq->packet);
1241 sample.id = etmq->etm->branches_id;
1242 sample.stream_id = etmq->etm->branches_id;
1243 sample.period = 1;
1244 sample.cpu = tidq->packet->cpu;
1245 sample.flags = tidq->prev_packet->flags;
1246 sample.cpumode = event->sample.header.misc;
1247
1248 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1249 &sample);
1250
1251 /*
1252 * perf report cannot handle events without a branch stack
1253 */
1254 if (etm->synth_opts.last_branch) {
1255 dummy_bs = (struct dummy_branch_stack){
1256 .nr = 1,
1257 .hw_idx = -1ULL,
1258 .entries = {
1259 .from = sample.ip,
1260 .to = sample.addr,
1261 },
1262 };
1263 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1264 }
1265
1266 if (etm->synth_opts.inject) {
1267 ret = cs_etm__inject_event(event, &sample,
1268 etm->branches_sample_type);
1269 if (ret)
1270 return ret;
1271 }
1272
1273 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1274
1275 if (ret)
1276 pr_err(
1277 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1278 ret);
1279
1280 return ret;
1281}
1282
1283struct cs_etm_synth {
1284 struct perf_tool dummy_tool;
1285 struct perf_session *session;
1286};
1287
1288static int cs_etm__event_synth(struct perf_tool *tool,
1289 union perf_event *event,
1290 struct perf_sample *sample __maybe_unused,
1291 struct machine *machine __maybe_unused)
1292{
1293 struct cs_etm_synth *cs_etm_synth =
1294 container_of(tool, struct cs_etm_synth, dummy_tool);
1295
1296 return perf_session__deliver_synth_event(cs_etm_synth->session,
1297 event, NULL);
1298}
1299
1300static int cs_etm__synth_event(struct perf_session *session,
1301 struct perf_event_attr *attr, u64 id)
1302{
1303 struct cs_etm_synth cs_etm_synth;
1304
1305 memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
1306 cs_etm_synth.session = session;
1307
1308 return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
1309 &id, cs_etm__event_synth);
1310}
1311
1312static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1313 struct perf_session *session)
1314{
1315 struct evlist *evlist = session->evlist;
1316 struct evsel *evsel;
1317 struct perf_event_attr attr;
1318 bool found = false;
1319 u64 id;
1320 int err;
1321
1322 evlist__for_each_entry(evlist, evsel) {
1323 if (evsel->core.attr.type == etm->pmu_type) {
1324 found = true;
1325 break;
1326 }
1327 }
1328
1329 if (!found) {
1330 pr_debug("No selected events with CoreSight Trace data\n");
1331 return 0;
1332 }
1333
1334 memset(&attr, 0, sizeof(struct perf_event_attr));
1335 attr.size = sizeof(struct perf_event_attr);
1336 attr.type = PERF_TYPE_HARDWARE;
1337 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1338 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1339 PERF_SAMPLE_PERIOD;
1340 if (etm->timeless_decoding)
1341 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1342 else
1343 attr.sample_type |= PERF_SAMPLE_TIME;
1344
1345 attr.exclude_user = evsel->core.attr.exclude_user;
1346 attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1347 attr.exclude_hv = evsel->core.attr.exclude_hv;
1348 attr.exclude_host = evsel->core.attr.exclude_host;
1349 attr.exclude_guest = evsel->core.attr.exclude_guest;
1350 attr.sample_id_all = evsel->core.attr.sample_id_all;
1351 attr.read_format = evsel->core.attr.read_format;
1352
1353 /* create new id val to be a fixed offset from evsel id */
1354 id = evsel->core.id[0] + 1000000000;
1355
1356 if (!id)
1357 id = 1;
1358
1359 if (etm->synth_opts.branches) {
1360 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1361 attr.sample_period = 1;
1362 attr.sample_type |= PERF_SAMPLE_ADDR;
1363 err = cs_etm__synth_event(session, &attr, id);
1364 if (err)
1365 return err;
1366 etm->branches_sample_type = attr.sample_type;
1367 etm->branches_id = id;
1368 id += 1;
1369 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1370 }
1371
1372 if (etm->synth_opts.last_branch) {
1373 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1374 /*
1375 * We don't use the hardware index, but the sample generation
1376 * code uses the new format branch_stack with this field,
1377 * so the event attributes must indicate that it's present.
1378 */
1379 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1380 }
1381
1382 if (etm->synth_opts.instructions) {
1383 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1384 attr.sample_period = etm->synth_opts.period;
1385 etm->instructions_sample_period = attr.sample_period;
1386 err = cs_etm__synth_event(session, &attr, id);
1387 if (err)
1388 return err;
1389 etm->instructions_sample_type = attr.sample_type;
1390 etm->instructions_id = id;
1391 id += 1;
1392 }
1393
1394 return 0;
1395}
1396
1397static int cs_etm__sample(struct cs_etm_queue *etmq,
1398 struct cs_etm_traceid_queue *tidq)
1399{
1400 struct cs_etm_auxtrace *etm = etmq->etm;
1401 int ret;
1402 u8 trace_chan_id = tidq->trace_chan_id;
1403 u64 instrs_prev;
1404
1405 /* Get instructions remainder from previous packet */
1406 instrs_prev = tidq->period_instructions;
1407
1408 tidq->period_instructions += tidq->packet->instr_count;
1409
1410 /*
1411 * Record a branch when the last instruction in
1412 * PREV_PACKET is a branch.
1413 */
1414 if (etm->synth_opts.last_branch &&
1415 tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1416 tidq->prev_packet->last_instr_taken_branch)
1417 cs_etm__update_last_branch_rb(etmq, tidq);
1418
1419 if (etm->synth_opts.instructions &&
1420 tidq->period_instructions >= etm->instructions_sample_period) {
1421 /*
1422 * Emit instruction sample periodically
1423 * TODO: allow period to be defined in cycles and clock time
1424 */
1425
1426 /*
1427 * Below diagram demonstrates the instruction samples
1428 * generation flows:
1429 *
1430 * Instrs Instrs Instrs Instrs
1431 * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3)
1432 * | | | |
1433 * V V V V
1434 * --------------------------------------------------
1435 * ^ ^
1436 * | |
1437 * Period Period
1438 * instructions(Pi) instructions(Pi')
1439 *
1440 * | |
1441 * \---------------- -----------------/
1442 * V
1443 * tidq->packet->instr_count
1444 *
1445 * Instrs Sample(n...) are the synthesised samples occurring
1446 * every etm->instructions_sample_period instructions - as
1447 * defined on the perf command line. Sample(n) is being the
1448 * last sample before the current etm packet, n+1 to n+3
1449 * samples are generated from the current etm packet.
1450 *
1451 * tidq->packet->instr_count represents the number of
1452 * instructions in the current etm packet.
1453 *
1454 * Period instructions (Pi) contains the number of
1455 * instructions executed after the sample point(n) from the
1456 * previous etm packet. This will always be less than
1457 * etm->instructions_sample_period.
1458 *
1459 * When generate new samples, it combines with two parts
1460 * instructions, one is the tail of the old packet and another
1461 * is the head of the new coming packet, to generate
1462 * sample(n+1); sample(n+2) and sample(n+3) consume the
1463 * instructions with sample period. After sample(n+3), the rest
1464 * instructions will be used by later packet and it is assigned
1465 * to tidq->period_instructions for next round calculation.
1466 */
1467
1468 /*
1469 * Get the initial offset into the current packet instructions;
1470 * entry conditions ensure that instrs_prev is less than
1471 * etm->instructions_sample_period.
1472 */
1473 u64 offset = etm->instructions_sample_period - instrs_prev;
1474 u64 addr;
1475
1476 /* Prepare last branches for instruction sample */
1477 if (etm->synth_opts.last_branch)
1478 cs_etm__copy_last_branch_rb(etmq, tidq);
1479
1480 while (tidq->period_instructions >=
1481 etm->instructions_sample_period) {
1482 /*
1483 * Calculate the address of the sampled instruction (-1
1484 * as sample is reported as though instruction has just
1485 * been executed, but PC has not advanced to next
1486 * instruction)
1487 */
1488 addr = cs_etm__instr_addr(etmq, trace_chan_id,
1489 tidq->packet, offset - 1);
1490 ret = cs_etm__synth_instruction_sample(
1491 etmq, tidq, addr,
1492 etm->instructions_sample_period);
1493 if (ret)
1494 return ret;
1495
1496 offset += etm->instructions_sample_period;
1497 tidq->period_instructions -=
1498 etm->instructions_sample_period;
1499 }
1500 }
1501
1502 if (etm->synth_opts.branches) {
1503 bool generate_sample = false;
1504
1505 /* Generate sample for tracing on packet */
1506 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1507 generate_sample = true;
1508
1509 /* Generate sample for branch taken packet */
1510 if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1511 tidq->prev_packet->last_instr_taken_branch)
1512 generate_sample = true;
1513
1514 if (generate_sample) {
1515 ret = cs_etm__synth_branch_sample(etmq, tidq);
1516 if (ret)
1517 return ret;
1518 }
1519 }
1520
1521 cs_etm__packet_swap(etm, tidq);
1522
1523 return 0;
1524}
1525
1526static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1527{
1528 /*
1529 * When the exception packet is inserted, whether the last instruction
1530 * in previous range packet is taken branch or not, we need to force
1531 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures
1532 * to generate branch sample for the instruction range before the
1533 * exception is trapped to kernel or before the exception returning.
1534 *
1535 * The exception packet includes the dummy address values, so don't
1536 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful
1537 * for generating instruction and branch samples.
1538 */
1539 if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1540 tidq->prev_packet->last_instr_taken_branch = true;
1541
1542 return 0;
1543}
1544
1545static int cs_etm__flush(struct cs_etm_queue *etmq,
1546 struct cs_etm_traceid_queue *tidq)
1547{
1548 int err = 0;
1549 struct cs_etm_auxtrace *etm = etmq->etm;
1550
1551 /* Handle start tracing packet */
1552 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1553 goto swap_packet;
1554
1555 if (etmq->etm->synth_opts.last_branch &&
1556 etmq->etm->synth_opts.instructions &&
1557 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1558 u64 addr;
1559
1560 /* Prepare last branches for instruction sample */
1561 cs_etm__copy_last_branch_rb(etmq, tidq);
1562
1563 /*
1564 * Generate a last branch event for the branches left in the
1565 * circular buffer at the end of the trace.
1566 *
1567 * Use the address of the end of the last reported execution
1568 * range
1569 */
1570 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1571
1572 err = cs_etm__synth_instruction_sample(
1573 etmq, tidq, addr,
1574 tidq->period_instructions);
1575 if (err)
1576 return err;
1577
1578 tidq->period_instructions = 0;
1579
1580 }
1581
1582 if (etm->synth_opts.branches &&
1583 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1584 err = cs_etm__synth_branch_sample(etmq, tidq);
1585 if (err)
1586 return err;
1587 }
1588
1589swap_packet:
1590 cs_etm__packet_swap(etm, tidq);
1591
1592 /* Reset last branches after flush the trace */
1593 if (etm->synth_opts.last_branch)
1594 cs_etm__reset_last_branch_rb(tidq);
1595
1596 return err;
1597}
1598
1599static int cs_etm__end_block(struct cs_etm_queue *etmq,
1600 struct cs_etm_traceid_queue *tidq)
1601{
1602 int err;
1603
1604 /*
1605 * It has no new packet coming and 'etmq->packet' contains the stale
1606 * packet which was set at the previous time with packets swapping;
1607 * so skip to generate branch sample to avoid stale packet.
1608 *
1609 * For this case only flush branch stack and generate a last branch
1610 * event for the branches left in the circular buffer at the end of
1611 * the trace.
1612 */
1613 if (etmq->etm->synth_opts.last_branch &&
1614 etmq->etm->synth_opts.instructions &&
1615 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1616 u64 addr;
1617
1618 /* Prepare last branches for instruction sample */
1619 cs_etm__copy_last_branch_rb(etmq, tidq);
1620
1621 /*
1622 * Use the address of the end of the last reported execution
1623 * range.
1624 */
1625 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1626
1627 err = cs_etm__synth_instruction_sample(
1628 etmq, tidq, addr,
1629 tidq->period_instructions);
1630 if (err)
1631 return err;
1632
1633 tidq->period_instructions = 0;
1634 }
1635
1636 return 0;
1637}
1638/*
1639 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
1640 * if need be.
1641 * Returns: < 0 if error
1642 * = 0 if no more auxtrace_buffer to read
1643 * > 0 if the current buffer isn't empty yet
1644 */
1645static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
1646{
1647 int ret;
1648
1649 if (!etmq->buf_len) {
1650 ret = cs_etm__get_trace(etmq);
1651 if (ret <= 0)
1652 return ret;
1653 /*
1654 * We cannot assume consecutive blocks in the data file
1655 * are contiguous, reset the decoder to force re-sync.
1656 */
1657 ret = cs_etm_decoder__reset(etmq->decoder);
1658 if (ret)
1659 return ret;
1660 }
1661
1662 return etmq->buf_len;
1663}
1664
1665static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
1666 struct cs_etm_packet *packet,
1667 u64 end_addr)
1668{
1669 /* Initialise to keep compiler happy */
1670 u16 instr16 = 0;
1671 u32 instr32 = 0;
1672 u64 addr;
1673
1674 switch (packet->isa) {
1675 case CS_ETM_ISA_T32:
1676 /*
1677 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
1678 *
1679 * b'15 b'8
1680 * +-----------------+--------+
1681 * | 1 1 0 1 1 1 1 1 | imm8 |
1682 * +-----------------+--------+
1683 *
1684 * According to the specification, it only defines SVC for T32
1685 * with 16 bits instruction and has no definition for 32bits;
1686 * so below only read 2 bytes as instruction size for T32.
1687 */
1688 addr = end_addr - 2;
1689 cs_etm__mem_access(etmq, trace_chan_id, addr,
1690 sizeof(instr16), (u8 *)&instr16);
1691 if ((instr16 & 0xFF00) == 0xDF00)
1692 return true;
1693
1694 break;
1695 case CS_ETM_ISA_A32:
1696 /*
1697 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
1698 *
1699 * b'31 b'28 b'27 b'24
1700 * +---------+---------+-------------------------+
1701 * | !1111 | 1 1 1 1 | imm24 |
1702 * +---------+---------+-------------------------+
1703 */
1704 addr = end_addr - 4;
1705 cs_etm__mem_access(etmq, trace_chan_id, addr,
1706 sizeof(instr32), (u8 *)&instr32);
1707 if ((instr32 & 0x0F000000) == 0x0F000000 &&
1708 (instr32 & 0xF0000000) != 0xF0000000)
1709 return true;
1710
1711 break;
1712 case CS_ETM_ISA_A64:
1713 /*
1714 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
1715 *
1716 * b'31 b'21 b'4 b'0
1717 * +-----------------------+---------+-----------+
1718 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 |
1719 * +-----------------------+---------+-----------+
1720 */
1721 addr = end_addr - 4;
1722 cs_etm__mem_access(etmq, trace_chan_id, addr,
1723 sizeof(instr32), (u8 *)&instr32);
1724 if ((instr32 & 0xFFE0001F) == 0xd4000001)
1725 return true;
1726
1727 break;
1728 case CS_ETM_ISA_UNKNOWN:
1729 default:
1730 break;
1731 }
1732
1733 return false;
1734}
1735
1736static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
1737 struct cs_etm_traceid_queue *tidq, u64 magic)
1738{
1739 u8 trace_chan_id = tidq->trace_chan_id;
1740 struct cs_etm_packet *packet = tidq->packet;
1741 struct cs_etm_packet *prev_packet = tidq->prev_packet;
1742
1743 if (magic == __perf_cs_etmv3_magic)
1744 if (packet->exception_number == CS_ETMV3_EXC_SVC)
1745 return true;
1746
1747 /*
1748 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
1749 * HVC cases; need to check if it's SVC instruction based on
1750 * packet address.
1751 */
1752 if (magic == __perf_cs_etmv4_magic) {
1753 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
1754 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
1755 prev_packet->end_addr))
1756 return true;
1757 }
1758
1759 return false;
1760}
1761
1762static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
1763 u64 magic)
1764{
1765 struct cs_etm_packet *packet = tidq->packet;
1766
1767 if (magic == __perf_cs_etmv3_magic)
1768 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
1769 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
1770 packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
1771 packet->exception_number == CS_ETMV3_EXC_IRQ ||
1772 packet->exception_number == CS_ETMV3_EXC_FIQ)
1773 return true;
1774
1775 if (magic == __perf_cs_etmv4_magic)
1776 if (packet->exception_number == CS_ETMV4_EXC_RESET ||
1777 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
1778 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
1779 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
1780 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
1781 packet->exception_number == CS_ETMV4_EXC_IRQ ||
1782 packet->exception_number == CS_ETMV4_EXC_FIQ)
1783 return true;
1784
1785 return false;
1786}
1787
1788static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
1789 struct cs_etm_traceid_queue *tidq,
1790 u64 magic)
1791{
1792 u8 trace_chan_id = tidq->trace_chan_id;
1793 struct cs_etm_packet *packet = tidq->packet;
1794 struct cs_etm_packet *prev_packet = tidq->prev_packet;
1795
1796 if (magic == __perf_cs_etmv3_magic)
1797 if (packet->exception_number == CS_ETMV3_EXC_SMC ||
1798 packet->exception_number == CS_ETMV3_EXC_HYP ||
1799 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
1800 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
1801 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
1802 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
1803 packet->exception_number == CS_ETMV3_EXC_GENERIC)
1804 return true;
1805
1806 if (magic == __perf_cs_etmv4_magic) {
1807 if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
1808 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
1809 packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
1810 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
1811 return true;
1812
1813 /*
1814 * For CS_ETMV4_EXC_CALL, except SVC other instructions
1815 * (SMC, HVC) are taken as sync exceptions.
1816 */
1817 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
1818 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
1819 prev_packet->end_addr))
1820 return true;
1821
1822 /*
1823 * ETMv4 has 5 bits for exception number; if the numbers
1824 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
1825 * they are implementation defined exceptions.
1826 *
1827 * For this case, simply take it as sync exception.
1828 */
1829 if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
1830 packet->exception_number <= CS_ETMV4_EXC_END)
1831 return true;
1832 }
1833
1834 return false;
1835}
1836
1837static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
1838 struct cs_etm_traceid_queue *tidq)
1839{
1840 struct cs_etm_packet *packet = tidq->packet;
1841 struct cs_etm_packet *prev_packet = tidq->prev_packet;
1842 u8 trace_chan_id = tidq->trace_chan_id;
1843 u64 magic;
1844 int ret;
1845
1846 switch (packet->sample_type) {
1847 case CS_ETM_RANGE:
1848 /*
1849 * Immediate branch instruction without neither link nor
1850 * return flag, it's normal branch instruction within
1851 * the function.
1852 */
1853 if (packet->last_instr_type == OCSD_INSTR_BR &&
1854 packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
1855 packet->flags = PERF_IP_FLAG_BRANCH;
1856
1857 if (packet->last_instr_cond)
1858 packet->flags |= PERF_IP_FLAG_CONDITIONAL;
1859 }
1860
1861 /*
1862 * Immediate branch instruction with link (e.g. BL), this is
1863 * branch instruction for function call.
1864 */
1865 if (packet->last_instr_type == OCSD_INSTR_BR &&
1866 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
1867 packet->flags = PERF_IP_FLAG_BRANCH |
1868 PERF_IP_FLAG_CALL;
1869
1870 /*
1871 * Indirect branch instruction with link (e.g. BLR), this is
1872 * branch instruction for function call.
1873 */
1874 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
1875 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
1876 packet->flags = PERF_IP_FLAG_BRANCH |
1877 PERF_IP_FLAG_CALL;
1878
1879 /*
1880 * Indirect branch instruction with subtype of
1881 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
1882 * function return for A32/T32.
1883 */
1884 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
1885 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
1886 packet->flags = PERF_IP_FLAG_BRANCH |
1887 PERF_IP_FLAG_RETURN;
1888
1889 /*
1890 * Indirect branch instruction without link (e.g. BR), usually
1891 * this is used for function return, especially for functions
1892 * within dynamic link lib.
1893 */
1894 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
1895 packet->last_instr_subtype == OCSD_S_INSTR_NONE)
1896 packet->flags = PERF_IP_FLAG_BRANCH |
1897 PERF_IP_FLAG_RETURN;
1898
1899 /* Return instruction for function return. */
1900 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
1901 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
1902 packet->flags = PERF_IP_FLAG_BRANCH |
1903 PERF_IP_FLAG_RETURN;
1904
1905 /*
1906 * Decoder might insert a discontinuity in the middle of
1907 * instruction packets, fixup prev_packet with flag
1908 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
1909 */
1910 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1911 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
1912 PERF_IP_FLAG_TRACE_BEGIN;
1913
1914 /*
1915 * If the previous packet is an exception return packet
1916 * and the return address just follows SVC instruction,
1917 * it needs to calibrate the previous packet sample flags
1918 * as PERF_IP_FLAG_SYSCALLRET.
1919 */
1920 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
1921 PERF_IP_FLAG_RETURN |
1922 PERF_IP_FLAG_INTERRUPT) &&
1923 cs_etm__is_svc_instr(etmq, trace_chan_id,
1924 packet, packet->start_addr))
1925 prev_packet->flags = PERF_IP_FLAG_BRANCH |
1926 PERF_IP_FLAG_RETURN |
1927 PERF_IP_FLAG_SYSCALLRET;
1928 break;
1929 case CS_ETM_DISCONTINUITY:
1930 /*
1931 * The trace is discontinuous, if the previous packet is
1932 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
1933 * for previous packet.
1934 */
1935 if (prev_packet->sample_type == CS_ETM_RANGE)
1936 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
1937 PERF_IP_FLAG_TRACE_END;
1938 break;
1939 case CS_ETM_EXCEPTION:
1940 ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
1941 if (ret)
1942 return ret;
1943
1944 /* The exception is for system call. */
1945 if (cs_etm__is_syscall(etmq, tidq, magic))
1946 packet->flags = PERF_IP_FLAG_BRANCH |
1947 PERF_IP_FLAG_CALL |
1948 PERF_IP_FLAG_SYSCALLRET;
1949 /*
1950 * The exceptions are triggered by external signals from bus,
1951 * interrupt controller, debug module, PE reset or halt.
1952 */
1953 else if (cs_etm__is_async_exception(tidq, magic))
1954 packet->flags = PERF_IP_FLAG_BRANCH |
1955 PERF_IP_FLAG_CALL |
1956 PERF_IP_FLAG_ASYNC |
1957 PERF_IP_FLAG_INTERRUPT;
1958 /*
1959 * Otherwise, exception is caused by trap, instruction &
1960 * data fault, or alignment errors.
1961 */
1962 else if (cs_etm__is_sync_exception(etmq, tidq, magic))
1963 packet->flags = PERF_IP_FLAG_BRANCH |
1964 PERF_IP_FLAG_CALL |
1965 PERF_IP_FLAG_INTERRUPT;
1966
1967 /*
1968 * When the exception packet is inserted, since exception
1969 * packet is not used standalone for generating samples
1970 * and it's affiliation to the previous instruction range
1971 * packet; so set previous range packet flags to tell perf
1972 * it is an exception taken branch.
1973 */
1974 if (prev_packet->sample_type == CS_ETM_RANGE)
1975 prev_packet->flags = packet->flags;
1976 break;
1977 case CS_ETM_EXCEPTION_RET:
1978 /*
1979 * When the exception return packet is inserted, since
1980 * exception return packet is not used standalone for
1981 * generating samples and it's affiliation to the previous
1982 * instruction range packet; so set previous range packet
1983 * flags to tell perf it is an exception return branch.
1984 *
1985 * The exception return can be for either system call or
1986 * other exception types; unfortunately the packet doesn't
1987 * contain exception type related info so we cannot decide
1988 * the exception type purely based on exception return packet.
1989 * If we record the exception number from exception packet and
1990 * reuse it for exception return packet, this is not reliable
1991 * due the trace can be discontinuity or the interrupt can
1992 * be nested, thus the recorded exception number cannot be
1993 * used for exception return packet for these two cases.
1994 *
1995 * For exception return packet, we only need to distinguish the
1996 * packet is for system call or for other types. Thus the
1997 * decision can be deferred when receive the next packet which
1998 * contains the return address, based on the return address we
1999 * can read out the previous instruction and check if it's a
2000 * system call instruction and then calibrate the sample flag
2001 * as needed.
2002 */
2003 if (prev_packet->sample_type == CS_ETM_RANGE)
2004 prev_packet->flags = PERF_IP_FLAG_BRANCH |
2005 PERF_IP_FLAG_RETURN |
2006 PERF_IP_FLAG_INTERRUPT;
2007 break;
2008 case CS_ETM_EMPTY:
2009 default:
2010 break;
2011 }
2012
2013 return 0;
2014}
2015
2016static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2017{
2018 int ret = 0;
2019 size_t processed = 0;
2020
2021 /*
2022 * Packets are decoded and added to the decoder's packet queue
2023 * until the decoder packet processing callback has requested that
2024 * processing stops or there is nothing left in the buffer. Normal
2025 * operations that stop processing are a timestamp packet or a full
2026 * decoder buffer queue.
2027 */
2028 ret = cs_etm_decoder__process_data_block(etmq->decoder,
2029 etmq->offset,
2030 &etmq->buf[etmq->buf_used],
2031 etmq->buf_len,
2032 &processed);
2033 if (ret)
2034 goto out;
2035
2036 etmq->offset += processed;
2037 etmq->buf_used += processed;
2038 etmq->buf_len -= processed;
2039
2040out:
2041 return ret;
2042}
2043
2044static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2045 struct cs_etm_traceid_queue *tidq)
2046{
2047 int ret;
2048 struct cs_etm_packet_queue *packet_queue;
2049
2050 packet_queue = &tidq->packet_queue;
2051
2052 /* Process each packet in this chunk */
2053 while (1) {
2054 ret = cs_etm_decoder__get_packet(packet_queue,
2055 tidq->packet);
2056 if (ret <= 0)
2057 /*
2058 * Stop processing this chunk on
2059 * end of data or error
2060 */
2061 break;
2062
2063 /*
2064 * Since packet addresses are swapped in packet
2065 * handling within below switch() statements,
2066 * thus setting sample flags must be called
2067 * prior to switch() statement to use address
2068 * information before packets swapping.
2069 */
2070 ret = cs_etm__set_sample_flags(etmq, tidq);
2071 if (ret < 0)
2072 break;
2073
2074 switch (tidq->packet->sample_type) {
2075 case CS_ETM_RANGE:
2076 /*
2077 * If the packet contains an instruction
2078 * range, generate instruction sequence
2079 * events.
2080 */
2081 cs_etm__sample(etmq, tidq);
2082 break;
2083 case CS_ETM_EXCEPTION:
2084 case CS_ETM_EXCEPTION_RET:
2085 /*
2086 * If the exception packet is coming,
2087 * make sure the previous instruction
2088 * range packet to be handled properly.
2089 */
2090 cs_etm__exception(tidq);
2091 break;
2092 case CS_ETM_DISCONTINUITY:
2093 /*
2094 * Discontinuity in trace, flush
2095 * previous branch stack
2096 */
2097 cs_etm__flush(etmq, tidq);
2098 break;
2099 case CS_ETM_EMPTY:
2100 /*
2101 * Should not receive empty packet,
2102 * report error.
2103 */
2104 pr_err("CS ETM Trace: empty packet\n");
2105 return -EINVAL;
2106 default:
2107 break;
2108 }
2109 }
2110
2111 return ret;
2112}
2113
2114static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2115{
2116 int idx;
2117 struct int_node *inode;
2118 struct cs_etm_traceid_queue *tidq;
2119 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2120
2121 intlist__for_each_entry(inode, traceid_queues_list) {
2122 idx = (int)(intptr_t)inode->priv;
2123 tidq = etmq->traceid_queues[idx];
2124
2125 /* Ignore return value */
2126 cs_etm__process_traceid_queue(etmq, tidq);
2127
2128 /*
2129 * Generate an instruction sample with the remaining
2130 * branchstack entries.
2131 */
2132 cs_etm__flush(etmq, tidq);
2133 }
2134}
2135
2136static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
2137{
2138 int err = 0;
2139 struct cs_etm_traceid_queue *tidq;
2140
2141 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2142 if (!tidq)
2143 return -EINVAL;
2144
2145 /* Go through each buffer in the queue and decode them one by one */
2146 while (1) {
2147 err = cs_etm__get_data_block(etmq);
2148 if (err <= 0)
2149 return err;
2150
2151 /* Run trace decoder until buffer consumed or end of trace */
2152 do {
2153 err = cs_etm__decode_data_block(etmq);
2154 if (err)
2155 return err;
2156
2157 /*
2158 * Process each packet in this chunk, nothing to do if
2159 * an error occurs other than hoping the next one will
2160 * be better.
2161 */
2162 err = cs_etm__process_traceid_queue(etmq, tidq);
2163
2164 } while (etmq->buf_len);
2165
2166 if (err == 0)
2167 /* Flush any remaining branch stack entries */
2168 err = cs_etm__end_block(etmq, tidq);
2169 }
2170
2171 return err;
2172}
2173
2174static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2175 pid_t tid)
2176{
2177 unsigned int i;
2178 struct auxtrace_queues *queues = &etm->queues;
2179
2180 for (i = 0; i < queues->nr_queues; i++) {
2181 struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2182 struct cs_etm_queue *etmq = queue->priv;
2183 struct cs_etm_traceid_queue *tidq;
2184
2185 if (!etmq)
2186 continue;
2187
2188 tidq = cs_etm__etmq_get_traceid_queue(etmq,
2189 CS_ETM_PER_THREAD_TRACEID);
2190
2191 if (!tidq)
2192 continue;
2193
2194 if ((tid == -1) || (tidq->tid == tid)) {
2195 cs_etm__set_pid_tid_cpu(etm, tidq);
2196 cs_etm__run_decoder(etmq);
2197 }
2198 }
2199
2200 return 0;
2201}
2202
2203static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
2204{
2205 int ret = 0;
2206 unsigned int cs_queue_nr, queue_nr, i;
2207 u8 trace_chan_id;
2208 u64 cs_timestamp;
2209 struct auxtrace_queue *queue;
2210 struct cs_etm_queue *etmq;
2211 struct cs_etm_traceid_queue *tidq;
2212
2213 /*
2214 * Pre-populate the heap with one entry from each queue so that we can
2215 * start processing in time order across all queues.
2216 */
2217 for (i = 0; i < etm->queues.nr_queues; i++) {
2218 etmq = etm->queues.queue_array[i].priv;
2219 if (!etmq)
2220 continue;
2221
2222 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2223 if (ret)
2224 return ret;
2225 }
2226
2227 while (1) {
2228 if (!etm->heap.heap_cnt)
2229 goto out;
2230
2231 /* Take the entry at the top of the min heap */
2232 cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2233 queue_nr = TO_QUEUE_NR(cs_queue_nr);
2234 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2235 queue = &etm->queues.queue_array[queue_nr];
2236 etmq = queue->priv;
2237
2238 /*
2239 * Remove the top entry from the heap since we are about
2240 * to process it.
2241 */
2242 auxtrace_heap__pop(&etm->heap);
2243
2244 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2245 if (!tidq) {
2246 /*
2247 * No traceID queue has been allocated for this traceID,
2248 * which means something somewhere went very wrong. No
2249 * other choice than simply exit.
2250 */
2251 ret = -EINVAL;
2252 goto out;
2253 }
2254
2255 /*
2256 * Packets associated with this timestamp are already in
2257 * the etmq's traceID queue, so process them.
2258 */
2259 ret = cs_etm__process_traceid_queue(etmq, tidq);
2260 if (ret < 0)
2261 goto out;
2262
2263 /*
2264 * Packets for this timestamp have been processed, time to
2265 * move on to the next timestamp, fetching a new auxtrace_buffer
2266 * if need be.
2267 */
2268refetch:
2269 ret = cs_etm__get_data_block(etmq);
2270 if (ret < 0)
2271 goto out;
2272
2273 /*
2274 * No more auxtrace_buffers to process in this etmq, simply
2275 * move on to another entry in the auxtrace_heap.
2276 */
2277 if (!ret)
2278 continue;
2279
2280 ret = cs_etm__decode_data_block(etmq);
2281 if (ret)
2282 goto out;
2283
2284 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2285
2286 if (!cs_timestamp) {
2287 /*
2288 * Function cs_etm__decode_data_block() returns when
2289 * there is no more traces to decode in the current
2290 * auxtrace_buffer OR when a timestamp has been
2291 * encountered on any of the traceID queues. Since we
2292 * did not get a timestamp, there is no more traces to
2293 * process in this auxtrace_buffer. As such empty and
2294 * flush all traceID queues.
2295 */
2296 cs_etm__clear_all_traceid_queues(etmq);
2297
2298 /* Fetch another auxtrace_buffer for this etmq */
2299 goto refetch;
2300 }
2301
2302 /*
2303 * Add to the min heap the timestamp for packets that have
2304 * just been decoded. They will be processed and synthesized
2305 * during the next call to cs_etm__process_traceid_queue() for
2306 * this queue/traceID.
2307 */
2308 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2309 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2310 }
2311
2312out:
2313 return ret;
2314}
2315
2316static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2317 union perf_event *event)
2318{
2319 struct thread *th;
2320
2321 if (etm->timeless_decoding)
2322 return 0;
2323
2324 /*
2325 * Add the tid/pid to the log so that we can get a match when
2326 * we get a contextID from the decoder.
2327 */
2328 th = machine__findnew_thread(etm->machine,
2329 event->itrace_start.pid,
2330 event->itrace_start.tid);
2331 if (!th)
2332 return -ENOMEM;
2333
2334 thread__put(th);
2335
2336 return 0;
2337}
2338
2339static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2340 union perf_event *event)
2341{
2342 struct thread *th;
2343 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2344
2345 /*
2346 * Context switch in per-thread mode are irrelevant since perf
2347 * will start/stop tracing as the process is scheduled.
2348 */
2349 if (etm->timeless_decoding)
2350 return 0;
2351
2352 /*
2353 * SWITCH_IN events carry the next process to be switched out while
2354 * SWITCH_OUT events carry the process to be switched in. As such
2355 * we don't care about IN events.
2356 */
2357 if (!out)
2358 return 0;
2359
2360 /*
2361 * Add the tid/pid to the log so that we can get a match when
2362 * we get a contextID from the decoder.
2363 */
2364 th = machine__findnew_thread(etm->machine,
2365 event->context_switch.next_prev_pid,
2366 event->context_switch.next_prev_tid);
2367 if (!th)
2368 return -ENOMEM;
2369
2370 thread__put(th);
2371
2372 return 0;
2373}
2374
2375static int cs_etm__process_event(struct perf_session *session,
2376 union perf_event *event,
2377 struct perf_sample *sample,
2378 struct perf_tool *tool)
2379{
2380 u64 sample_kernel_timestamp;
2381 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2382 struct cs_etm_auxtrace,
2383 auxtrace);
2384
2385 if (dump_trace)
2386 return 0;
2387
2388 if (!tool->ordered_events) {
2389 pr_err("CoreSight ETM Trace requires ordered events\n");
2390 return -EINVAL;
2391 }
2392
2393 if (sample->time && (sample->time != (u64) -1))
2394 sample_kernel_timestamp = sample->time;
2395 else
2396 sample_kernel_timestamp = 0;
2397
2398 /*
2399 * Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We
2400 * need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because
2401 * ETM_OPT_CTXTID is not enabled.
2402 */
2403 if (etm->timeless_decoding &&
2404 event->header.type == PERF_RECORD_EXIT)
2405 return cs_etm__process_timeless_queues(etm,
2406 event->fork.tid);
2407
2408 if (event->header.type == PERF_RECORD_ITRACE_START)
2409 return cs_etm__process_itrace_start(etm, event);
2410 else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
2411 return cs_etm__process_switch_cpu_wide(etm, event);
2412
2413 if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) {
2414 /*
2415 * Record the latest kernel timestamp available in the header
2416 * for samples so that synthesised samples occur from this point
2417 * onwards.
2418 */
2419 etm->latest_kernel_timestamp = sample_kernel_timestamp;
2420 }
2421
2422 return 0;
2423}
2424
2425static void dump_queued_data(struct cs_etm_auxtrace *etm,
2426 struct perf_record_auxtrace *event)
2427{
2428 struct auxtrace_buffer *buf;
2429 unsigned int i;
2430 /*
2431 * Find all buffers with same reference in the queues and dump them.
2432 * This is because the queues can contain multiple entries of the same
2433 * buffer that were split on aux records.
2434 */
2435 for (i = 0; i < etm->queues.nr_queues; ++i)
2436 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2437 if (buf->reference == event->reference)
2438 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2439}
2440
2441static int cs_etm__process_auxtrace_event(struct perf_session *session,
2442 union perf_event *event,
2443 struct perf_tool *tool __maybe_unused)
2444{
2445 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2446 struct cs_etm_auxtrace,
2447 auxtrace);
2448 if (!etm->data_queued) {
2449 struct auxtrace_buffer *buffer;
2450 off_t data_offset;
2451 int fd = perf_data__fd(session->data);
2452 bool is_pipe = perf_data__is_pipe(session->data);
2453 int err;
2454 int idx = event->auxtrace.idx;
2455
2456 if (is_pipe)
2457 data_offset = 0;
2458 else {
2459 data_offset = lseek(fd, 0, SEEK_CUR);
2460 if (data_offset == -1)
2461 return -errno;
2462 }
2463
2464 err = auxtrace_queues__add_event(&etm->queues, session,
2465 event, data_offset, &buffer);
2466 if (err)
2467 return err;
2468
2469 /*
2470 * Knowing if the trace is formatted or not requires a lookup of
2471 * the aux record so only works in non-piped mode where data is
2472 * queued in cs_etm__queue_aux_records(). Always assume
2473 * formatted in piped mode (true).
2474 */
2475 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
2476 idx, true);
2477 if (err)
2478 return err;
2479
2480 if (dump_trace)
2481 if (auxtrace_buffer__get_data(buffer, fd)) {
2482 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2483 auxtrace_buffer__put_data(buffer);
2484 }
2485 } else if (dump_trace)
2486 dump_queued_data(etm, &event->auxtrace);
2487
2488 return 0;
2489}
2490
2491static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm)
2492{
2493 struct evsel *evsel;
2494 struct evlist *evlist = etm->session->evlist;
2495 bool timeless_decoding = true;
2496
2497 /* Override timeless mode with user input from --itrace=Z */
2498 if (etm->synth_opts.timeless_decoding)
2499 return true;
2500
2501 /*
2502 * Circle through the list of event and complain if we find one
2503 * with the time bit set.
2504 */
2505 evlist__for_each_entry(evlist, evsel) {
2506 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
2507 timeless_decoding = false;
2508 }
2509
2510 return timeless_decoding;
2511}
2512
2513/*
2514 * Read a single cpu parameter block from the auxtrace_info priv block.
2515 *
2516 * For version 1 there is a per cpu nr_params entry. If we are handling
2517 * version 1 file, then there may be less, the same, or more params
2518 * indicated by this value than the compile time number we understand.
2519 *
2520 * For a version 0 info block, there are a fixed number, and we need to
2521 * fill out the nr_param value in the metadata we create.
2522 */
2523static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2524 int out_blk_size, int nr_params_v0)
2525{
2526 u64 *metadata = NULL;
2527 int hdr_version;
2528 int nr_in_params, nr_out_params, nr_cmn_params;
2529 int i, k;
2530
2531 metadata = zalloc(sizeof(*metadata) * out_blk_size);
2532 if (!metadata)
2533 return NULL;
2534
2535 /* read block current index & version */
2536 i = *buff_in_offset;
2537 hdr_version = buff_in[CS_HEADER_VERSION];
2538
2539 if (!hdr_version) {
2540 /* read version 0 info block into a version 1 metadata block */
2541 nr_in_params = nr_params_v0;
2542 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2543 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2544 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2545 /* remaining block params at offset +1 from source */
2546 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2547 metadata[k + 1] = buff_in[i + k];
2548 /* version 0 has 2 common params */
2549 nr_cmn_params = 2;
2550 } else {
2551 /* read version 1 info block - input and output nr_params may differ */
2552 /* version 1 has 3 common params */
2553 nr_cmn_params = 3;
2554 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2555
2556 /* if input has more params than output - skip excess */
2557 nr_out_params = nr_in_params + nr_cmn_params;
2558 if (nr_out_params > out_blk_size)
2559 nr_out_params = out_blk_size;
2560
2561 for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2562 metadata[k] = buff_in[i + k];
2563
2564 /* record the actual nr params we copied */
2565 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2566 }
2567
2568 /* adjust in offset by number of in params used */
2569 i += nr_in_params + nr_cmn_params;
2570 *buff_in_offset = i;
2571 return metadata;
2572}
2573
2574/**
2575 * Puts a fragment of an auxtrace buffer into the auxtrace queues based
2576 * on the bounds of aux_event, if it matches with the buffer that's at
2577 * file_offset.
2578 *
2579 * Normally, whole auxtrace buffers would be added to the queue. But we
2580 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
2581 * is reset across each buffer, so splitting the buffers up in advance has
2582 * the same effect.
2583 */
2584static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
2585 struct perf_record_aux *aux_event, struct perf_sample *sample)
2586{
2587 int err;
2588 char buf[PERF_SAMPLE_MAX_SIZE];
2589 union perf_event *auxtrace_event_union;
2590 struct perf_record_auxtrace *auxtrace_event;
2591 union perf_event auxtrace_fragment;
2592 __u64 aux_offset, aux_size;
2593 __u32 idx;
2594 bool formatted;
2595
2596 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2597 struct cs_etm_auxtrace,
2598 auxtrace);
2599
2600 /*
2601 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
2602 * from looping through the auxtrace index.
2603 */
2604 err = perf_session__peek_event(session, file_offset, buf,
2605 PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
2606 if (err)
2607 return err;
2608 auxtrace_event = &auxtrace_event_union->auxtrace;
2609 if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
2610 return -EINVAL;
2611
2612 if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
2613 auxtrace_event->header.size != sz) {
2614 return -EINVAL;
2615 }
2616
2617 /*
2618 * In per-thread mode, CPU is set to -1, but TID will be set instead. See
2619 * auxtrace_mmap_params__set_idx(). Return 'not found' if neither CPU nor TID match.
2620 */
2621 if ((auxtrace_event->cpu == (__u32) -1 && auxtrace_event->tid != sample->tid) ||
2622 auxtrace_event->cpu != sample->cpu)
2623 return 1;
2624
2625 if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
2626 /*
2627 * Clamp size in snapshot mode. The buffer size is clamped in
2628 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
2629 * the buffer size.
2630 */
2631 aux_size = min(aux_event->aux_size, auxtrace_event->size);
2632
2633 /*
2634 * In this mode, the head also points to the end of the buffer so aux_offset
2635 * needs to have the size subtracted so it points to the beginning as in normal mode
2636 */
2637 aux_offset = aux_event->aux_offset - aux_size;
2638 } else {
2639 aux_size = aux_event->aux_size;
2640 aux_offset = aux_event->aux_offset;
2641 }
2642
2643 if (aux_offset >= auxtrace_event->offset &&
2644 aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
2645 /*
2646 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
2647 * based on the sizes of the aux event, and queue that fragment.
2648 */
2649 auxtrace_fragment.auxtrace = *auxtrace_event;
2650 auxtrace_fragment.auxtrace.size = aux_size;
2651 auxtrace_fragment.auxtrace.offset = aux_offset;
2652 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
2653
2654 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
2655 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
2656 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
2657 file_offset, NULL);
2658 if (err)
2659 return err;
2660
2661 idx = auxtrace_event->idx;
2662 formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
2663 return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
2664 idx, formatted);
2665 }
2666
2667 /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
2668 return 1;
2669}
2670
2671static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
2672 u64 offset __maybe_unused, void *data __maybe_unused)
2673{
2674 struct perf_sample sample;
2675 int ret;
2676 struct auxtrace_index_entry *ent;
2677 struct auxtrace_index *auxtrace_index;
2678 struct evsel *evsel;
2679 size_t i;
2680
2681 /* Don't care about any other events, we're only queuing buffers for AUX events */
2682 if (event->header.type != PERF_RECORD_AUX)
2683 return 0;
2684
2685 if (event->header.size < sizeof(struct perf_record_aux))
2686 return -EINVAL;
2687
2688 /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
2689 if (!event->aux.aux_size)
2690 return 0;
2691
2692 /*
2693 * Parse the sample, we need the sample_id_all data that comes after the event so that the
2694 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
2695 */
2696 evsel = evlist__event2evsel(session->evlist, event);
2697 if (!evsel)
2698 return -EINVAL;
2699 ret = evsel__parse_sample(evsel, event, &sample);
2700 if (ret)
2701 return ret;
2702
2703 /*
2704 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
2705 */
2706 list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
2707 for (i = 0; i < auxtrace_index->nr; i++) {
2708 ent = &auxtrace_index->entries[i];
2709 ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
2710 ent->sz, &event->aux, &sample);
2711 /*
2712 * Stop search on error or successful values. Continue search on
2713 * 1 ('not found')
2714 */
2715 if (ret != 1)
2716 return ret;
2717 }
2718 }
2719
2720 /*
2721 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
2722 * don't exit with an error because it will still be possible to decode other aux records.
2723 */
2724 pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
2725 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
2726 return 0;
2727}
2728
2729static int cs_etm__queue_aux_records(struct perf_session *session)
2730{
2731 struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
2732 struct auxtrace_index, list);
2733 if (index && index->nr > 0)
2734 return perf_session__peek_events(session, session->header.data_offset,
2735 session->header.data_size,
2736 cs_etm__queue_aux_records_cb, NULL);
2737
2738 /*
2739 * We would get here if there are no entries in the index (either no auxtrace
2740 * buffers or no index at all). Fail silently as there is the possibility of
2741 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
2742 * false.
2743 *
2744 * In that scenario, buffers will not be split by AUX records.
2745 */
2746 return 0;
2747}
2748
2749int cs_etm__process_auxtrace_info_full(union perf_event *event,
2750 struct perf_session *session)
2751{
2752 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
2753 struct cs_etm_auxtrace *etm = NULL;
2754 struct int_node *inode;
2755 int event_header_size = sizeof(struct perf_event_header);
2756 int total_size = auxtrace_info->header.size;
2757 int priv_size = 0;
2758 int num_cpu, trcidr_idx;
2759 int err = 0;
2760 int i, j;
2761 u64 *ptr = NULL;
2762 u64 **metadata = NULL;
2763
2764 /*
2765 * Create an RB tree for traceID-metadata tuple. Since the conversion
2766 * has to be made for each packet that gets decoded, optimizing access
2767 * in anything other than a sequential array is worth doing.
2768 */
2769 traceid_list = intlist__new(NULL);
2770 if (!traceid_list)
2771 return -ENOMEM;
2772
2773 /* First the global part */
2774 ptr = (u64 *) auxtrace_info->priv;
2775 num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
2776 metadata = zalloc(sizeof(*metadata) * num_cpu);
2777 if (!metadata) {
2778 err = -ENOMEM;
2779 goto err_free_traceid_list;
2780 }
2781
2782 /* Start parsing after the common part of the header */
2783 i = CS_HEADER_VERSION_MAX;
2784
2785 /*
2786 * The metadata is stored in the auxtrace_info section and encodes
2787 * the configuration of the ARM embedded trace macrocell which is
2788 * required by the trace decoder to properly decode the trace due
2789 * to its highly compressed nature.
2790 */
2791 for (j = 0; j < num_cpu; j++) {
2792 if (ptr[i] == __perf_cs_etmv3_magic) {
2793 metadata[j] =
2794 cs_etm__create_meta_blk(ptr, &i,
2795 CS_ETM_PRIV_MAX,
2796 CS_ETM_NR_TRC_PARAMS_V0);
2797
2798 /* The traceID is our handle */
2799 trcidr_idx = CS_ETM_ETMTRACEIDR;
2800
2801 } else if (ptr[i] == __perf_cs_etmv4_magic) {
2802 metadata[j] =
2803 cs_etm__create_meta_blk(ptr, &i,
2804 CS_ETMV4_PRIV_MAX,
2805 CS_ETMV4_NR_TRC_PARAMS_V0);
2806
2807 /* The traceID is our handle */
2808 trcidr_idx = CS_ETMV4_TRCTRACEIDR;
2809 } else if (ptr[i] == __perf_cs_ete_magic) {
2810 metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
2811
2812 /* ETE shares first part of metadata with ETMv4 */
2813 trcidr_idx = CS_ETMV4_TRCTRACEIDR;
2814 } else {
2815 ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
2816 ptr[i]);
2817 err = -EINVAL;
2818 goto err_free_metadata;
2819 }
2820
2821 if (!metadata[j]) {
2822 err = -ENOMEM;
2823 goto err_free_metadata;
2824 }
2825
2826 /* Get an RB node for this CPU */
2827 inode = intlist__findnew(traceid_list, metadata[j][trcidr_idx]);
2828
2829 /* Something went wrong, no need to continue */
2830 if (!inode) {
2831 err = -ENOMEM;
2832 goto err_free_metadata;
2833 }
2834
2835 /*
2836 * The node for that CPU should not be taken.
2837 * Back out if that's the case.
2838 */
2839 if (inode->priv) {
2840 err = -EINVAL;
2841 goto err_free_metadata;
2842 }
2843 /* All good, associate the traceID with the metadata pointer */
2844 inode->priv = metadata[j];
2845 }
2846
2847 /*
2848 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
2849 * CS_ETMV4_PRIV_MAX mark how many double words are in the
2850 * global metadata, and each cpu's metadata respectively.
2851 * The following tests if the correct number of double words was
2852 * present in the auxtrace info section.
2853 */
2854 priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
2855 if (i * 8 != priv_size) {
2856 err = -EINVAL;
2857 goto err_free_metadata;
2858 }
2859
2860 etm = zalloc(sizeof(*etm));
2861
2862 if (!etm) {
2863 err = -ENOMEM;
2864 goto err_free_metadata;
2865 }
2866
2867 err = auxtrace_queues__init(&etm->queues);
2868 if (err)
2869 goto err_free_etm;
2870
2871 if (session->itrace_synth_opts->set) {
2872 etm->synth_opts = *session->itrace_synth_opts;
2873 } else {
2874 itrace_synth_opts__set_default(&etm->synth_opts,
2875 session->itrace_synth_opts->default_no_sample);
2876 etm->synth_opts.callchain = false;
2877 }
2878
2879 etm->session = session;
2880 etm->machine = &session->machines.host;
2881
2882 etm->num_cpu = num_cpu;
2883 etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
2884 etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
2885 etm->metadata = metadata;
2886 etm->auxtrace_type = auxtrace_info->type;
2887 etm->timeless_decoding = cs_etm__is_timeless_decoding(etm);
2888
2889 etm->auxtrace.process_event = cs_etm__process_event;
2890 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
2891 etm->auxtrace.flush_events = cs_etm__flush_events;
2892 etm->auxtrace.free_events = cs_etm__free_events;
2893 etm->auxtrace.free = cs_etm__free;
2894 etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
2895 session->auxtrace = &etm->auxtrace;
2896
2897 etm->unknown_thread = thread__new(999999999, 999999999);
2898 if (!etm->unknown_thread) {
2899 err = -ENOMEM;
2900 goto err_free_queues;
2901 }
2902
2903 /*
2904 * Initialize list node so that at thread__zput() we can avoid
2905 * segmentation fault at list_del_init().
2906 */
2907 INIT_LIST_HEAD(&etm->unknown_thread->node);
2908
2909 err = thread__set_comm(etm->unknown_thread, "unknown", 0);
2910 if (err)
2911 goto err_delete_thread;
2912
2913 if (thread__init_maps(etm->unknown_thread, etm->machine)) {
2914 err = -ENOMEM;
2915 goto err_delete_thread;
2916 }
2917
2918 err = cs_etm__synth_events(etm, session);
2919 if (err)
2920 goto err_delete_thread;
2921
2922 err = cs_etm__queue_aux_records(session);
2923 if (err)
2924 goto err_delete_thread;
2925
2926 etm->data_queued = etm->queues.populated;
2927 /*
2928 * Print warning in pipe mode, see cs_etm__process_auxtrace_event() and
2929 * cs_etm__queue_aux_fragment() for details relating to limitations.
2930 */
2931 if (!etm->data_queued)
2932 pr_warning("CS ETM warning: Coresight decode and TRBE support requires random file access.\n"
2933 "Continuing with best effort decoding in piped mode.\n\n");
2934
2935 return 0;
2936
2937err_delete_thread:
2938 thread__zput(etm->unknown_thread);
2939err_free_queues:
2940 auxtrace_queues__free(&etm->queues);
2941 session->auxtrace = NULL;
2942err_free_etm:
2943 zfree(&etm);
2944err_free_metadata:
2945 /* No need to check @metadata[j], free(NULL) is supported */
2946 for (j = 0; j < num_cpu; j++)
2947 zfree(&metadata[j]);
2948 zfree(&metadata);
2949err_free_traceid_list:
2950 intlist__delete(traceid_list);
2951 return err;
2952}