Linux Audio

Check our new training course

Loading...
v5.4
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * intel-bts.c: Intel Processor Trace support
  4 * Copyright (c) 2013-2015, Intel Corporation.
 
 
 
 
 
 
 
 
 
 
  5 */
  6
  7#include <endian.h>
  8#include <errno.h>
  9#include <byteswap.h>
 10#include <inttypes.h>
 11#include <linux/kernel.h>
 12#include <linux/types.h>
 13#include <linux/bitops.h>
 14#include <linux/log2.h>
 15#include <linux/zalloc.h>
 16
 
 17#include "color.h"
 18#include "evsel.h"
 19#include "evlist.h"
 20#include "machine.h"
 21#include "symbol.h"
 22#include "session.h"
 23#include "tool.h"
 24#include "thread.h"
 25#include "thread-stack.h"
 26#include "debug.h"
 27#include "tsc.h"
 28#include "auxtrace.h"
 29#include "intel-pt-decoder/intel-pt-insn-decoder.h"
 30#include "intel-bts.h"
 31#include "util/synthetic-events.h"
 32
 33#define MAX_TIMESTAMP (~0ULL)
 34
 35#define INTEL_BTS_ERR_NOINSN  5
 36#define INTEL_BTS_ERR_LOST    9
 37
 38#if __BYTE_ORDER == __BIG_ENDIAN
 39#define le64_to_cpu bswap_64
 40#else
 41#define le64_to_cpu
 42#endif
 43
 44struct intel_bts {
 45	struct auxtrace			auxtrace;
 46	struct auxtrace_queues		queues;
 47	struct auxtrace_heap		heap;
 48	u32				auxtrace_type;
 49	struct perf_session		*session;
 50	struct machine			*machine;
 51	bool				sampling_mode;
 52	bool				snapshot_mode;
 53	bool				data_queued;
 54	u32				pmu_type;
 55	struct perf_tsc_conversion	tc;
 56	bool				cap_user_time_zero;
 57	struct itrace_synth_opts	synth_opts;
 58	bool				sample_branches;
 59	u32				branches_filter;
 60	u64				branches_sample_type;
 61	u64				branches_id;
 62	size_t				branches_event_size;
 
 63	unsigned long			num_events;
 64};
 65
 66struct intel_bts_queue {
 67	struct intel_bts	*bts;
 68	unsigned int		queue_nr;
 69	struct auxtrace_buffer	*buffer;
 70	bool			on_heap;
 71	bool			done;
 72	pid_t			pid;
 73	pid_t			tid;
 74	int			cpu;
 75	u64			time;
 76	struct intel_pt_insn	intel_pt_insn;
 77	u32			sample_flags;
 78};
 79
 80struct branch {
 81	u64 from;
 82	u64 to;
 83	u64 misc;
 84};
 85
 86static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
 87			   unsigned char *buf, size_t len)
 88{
 89	struct branch *branch;
 90	size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
 91	const char *color = PERF_COLOR_BLUE;
 92
 93	color_fprintf(stdout, color,
 94		      ". ... Intel BTS data: size %zu bytes\n",
 95		      len);
 96
 97	while (len) {
 98		if (len >= br_sz)
 99			sz = br_sz;
100		else
101			sz = len;
102		printf(".");
103		color_fprintf(stdout, color, "  %08x: ", pos);
104		for (i = 0; i < sz; i++)
105			color_fprintf(stdout, color, " %02x", buf[i]);
106		for (; i < br_sz; i++)
107			color_fprintf(stdout, color, "   ");
108		if (len >= br_sz) {
109			branch = (struct branch *)buf;
110			color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
111				      le64_to_cpu(branch->from),
112				      le64_to_cpu(branch->to),
113				      le64_to_cpu(branch->misc) & 0x10 ?
114							"pred" : "miss");
115		} else {
116			color_fprintf(stdout, color, " Bad record!\n");
117		}
118		pos += sz;
119		buf += sz;
120		len -= sz;
121	}
122}
123
124static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
125				 size_t len)
126{
127	printf(".\n");
128	intel_bts_dump(bts, buf, len);
129}
130
131static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
132{
133	union perf_event event;
134	int err;
135
136	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
137			     INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
138			     sample->tid, 0, "Lost trace data", sample->time);
139
140	err = perf_session__deliver_synth_event(bts->session, &event, NULL);
141	if (err)
142		pr_err("Intel BTS: failed to deliver error event, error %d\n",
143		       err);
144
145	return err;
146}
147
148static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
149						     unsigned int queue_nr)
150{
151	struct intel_bts_queue *btsq;
152
153	btsq = zalloc(sizeof(struct intel_bts_queue));
154	if (!btsq)
155		return NULL;
156
157	btsq->bts = bts;
158	btsq->queue_nr = queue_nr;
159	btsq->pid = -1;
160	btsq->tid = -1;
161	btsq->cpu = -1;
162
163	return btsq;
164}
165
166static int intel_bts_setup_queue(struct intel_bts *bts,
167				 struct auxtrace_queue *queue,
168				 unsigned int queue_nr)
169{
170	struct intel_bts_queue *btsq = queue->priv;
171
172	if (list_empty(&queue->head))
173		return 0;
174
175	if (!btsq) {
176		btsq = intel_bts_alloc_queue(bts, queue_nr);
177		if (!btsq)
178			return -ENOMEM;
179		queue->priv = btsq;
180
181		if (queue->cpu != -1)
182			btsq->cpu = queue->cpu;
183		btsq->tid = queue->tid;
184	}
185
186	if (bts->sampling_mode)
187		return 0;
188
189	if (!btsq->on_heap && !btsq->buffer) {
190		int ret;
191
192		btsq->buffer = auxtrace_buffer__next(queue, NULL);
193		if (!btsq->buffer)
194			return 0;
195
196		ret = auxtrace_heap__add(&bts->heap, queue_nr,
197					 btsq->buffer->reference);
198		if (ret)
199			return ret;
200		btsq->on_heap = true;
201	}
202
203	return 0;
204}
205
206static int intel_bts_setup_queues(struct intel_bts *bts)
207{
208	unsigned int i;
209	int ret;
210
211	for (i = 0; i < bts->queues.nr_queues; i++) {
212		ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
213					    i);
214		if (ret)
215			return ret;
216	}
217	return 0;
218}
219
220static inline int intel_bts_update_queues(struct intel_bts *bts)
221{
222	if (bts->queues.new_data) {
223		bts->queues.new_data = false;
224		return intel_bts_setup_queues(bts);
225	}
226	return 0;
227}
228
229static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
230					     unsigned char *buf_b, size_t len_b)
231{
232	size_t offs, len;
233
234	if (len_a > len_b)
235		offs = len_a - len_b;
236	else
237		offs = 0;
238
239	for (; offs < len_a; offs += sizeof(struct branch)) {
240		len = len_a - offs;
241		if (!memcmp(buf_a + offs, buf_b, len))
242			return buf_b + len;
243	}
244
245	return buf_b;
246}
247
248static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
249				    struct auxtrace_buffer *b)
250{
251	struct auxtrace_buffer *a;
252	void *start;
253
254	if (b->list.prev == &queue->head)
255		return 0;
256	a = list_entry(b->list.prev, struct auxtrace_buffer, list);
257	start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
258	if (!start)
259		return -EINVAL;
260	b->use_size = b->data + b->size - start;
261	b->use_data = start;
262	return 0;
263}
264
265static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip)
266{
267	return machine__kernel_ip(bts->machine, ip) ?
268	       PERF_RECORD_MISC_KERNEL :
269	       PERF_RECORD_MISC_USER;
270}
271
272static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
273					 struct branch *branch)
274{
275	int ret;
276	struct intel_bts *bts = btsq->bts;
277	union perf_event event;
278	struct perf_sample sample = { .ip = 0, };
279
280	if (bts->synth_opts.initial_skip &&
281	    bts->num_events++ <= bts->synth_opts.initial_skip)
282		return 0;
283
 
 
 
 
 
284	sample.ip = le64_to_cpu(branch->from);
285	sample.cpumode = intel_bts_cpumode(bts, sample.ip);
286	sample.pid = btsq->pid;
287	sample.tid = btsq->tid;
288	sample.addr = le64_to_cpu(branch->to);
289	sample.id = btsq->bts->branches_id;
290	sample.stream_id = btsq->bts->branches_id;
291	sample.period = 1;
292	sample.cpu = btsq->cpu;
293	sample.flags = btsq->sample_flags;
294	sample.insn_len = btsq->intel_pt_insn.length;
295	memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
296
297	event.sample.header.type = PERF_RECORD_SAMPLE;
298	event.sample.header.misc = sample.cpumode;
299	event.sample.header.size = sizeof(struct perf_event_header);
300
301	if (bts->synth_opts.inject) {
302		event.sample.header.size = bts->branches_event_size;
303		ret = perf_event__synthesize_sample(&event,
304						    bts->branches_sample_type,
305						    0, &sample);
 
306		if (ret)
307			return ret;
308	}
309
310	ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
311	if (ret)
312		pr_err("Intel BTS: failed to deliver branch event, error %d\n",
313		       ret);
314
315	return ret;
316}
317
318static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
319{
320	struct machine *machine = btsq->bts->machine;
321	struct thread *thread;
 
322	unsigned char buf[INTEL_PT_INSN_BUF_SZ];
323	ssize_t len;
324	bool x86_64;
 
325	int err = -1;
326
 
 
 
 
 
327	thread = machine__find_thread(machine, -1, btsq->tid);
328	if (!thread)
329		return -1;
330
331	len = thread__memcpy(thread, machine, buf, ip, INTEL_PT_INSN_BUF_SZ, &x86_64);
 
 
 
 
 
332	if (len <= 0)
333		goto out_put;
334
 
 
 
 
 
335	if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
336		goto out_put;
337
338	err = 0;
339out_put:
340	thread__put(thread);
341	return err;
342}
343
344static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
345				 pid_t tid, u64 ip)
346{
347	union perf_event event;
348	int err;
349
350	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
351			     INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
352			     "Failed to get instruction", 0);
353
354	err = perf_session__deliver_synth_event(bts->session, &event, NULL);
355	if (err)
356		pr_err("Intel BTS: failed to deliver error event, error %d\n",
357		       err);
358
359	return err;
360}
361
362static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
363				     struct branch *branch)
364{
365	int err;
366
367	if (!branch->from) {
368		if (branch->to)
369			btsq->sample_flags = PERF_IP_FLAG_BRANCH |
370					     PERF_IP_FLAG_TRACE_BEGIN;
371		else
372			btsq->sample_flags = 0;
373		btsq->intel_pt_insn.length = 0;
374	} else if (!branch->to) {
375		btsq->sample_flags = PERF_IP_FLAG_BRANCH |
376				     PERF_IP_FLAG_TRACE_END;
377		btsq->intel_pt_insn.length = 0;
378	} else {
379		err = intel_bts_get_next_insn(btsq, branch->from);
380		if (err) {
381			btsq->sample_flags = 0;
382			btsq->intel_pt_insn.length = 0;
383			if (!btsq->bts->synth_opts.errors)
384				return 0;
385			err = intel_bts_synth_error(btsq->bts, btsq->cpu,
386						    btsq->pid, btsq->tid,
387						    branch->from);
388			return err;
389		}
390		btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
391		/* Check for an async branch into the kernel */
392		if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
393		    machine__kernel_ip(btsq->bts->machine, branch->to) &&
394		    btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
395					   PERF_IP_FLAG_CALL |
396					   PERF_IP_FLAG_SYSCALLRET))
397			btsq->sample_flags = PERF_IP_FLAG_BRANCH |
398					     PERF_IP_FLAG_CALL |
399					     PERF_IP_FLAG_ASYNC |
400					     PERF_IP_FLAG_INTERRUPT;
401	}
402
403	return 0;
404}
405
406static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
407				    struct auxtrace_buffer *buffer,
408				    struct thread *thread)
409{
410	struct branch *branch;
411	size_t sz, bsz = sizeof(struct branch);
412	u32 filter = btsq->bts->branches_filter;
413	int err = 0;
414
415	if (buffer->use_data) {
416		sz = buffer->use_size;
417		branch = buffer->use_data;
418	} else {
419		sz = buffer->size;
420		branch = buffer->data;
421	}
422
423	if (!btsq->bts->sample_branches)
424		return 0;
425
426	for (; sz > bsz; branch += 1, sz -= bsz) {
427		if (!branch->from && !branch->to)
428			continue;
429		intel_bts_get_branch_type(btsq, branch);
430		if (btsq->bts->synth_opts.thread_stack)
431			thread_stack__event(thread, btsq->cpu, btsq->sample_flags,
432					    le64_to_cpu(branch->from),
433					    le64_to_cpu(branch->to),
434					    btsq->intel_pt_insn.length,
435					    buffer->buffer_nr + 1);
436		if (filter && !(filter & btsq->sample_flags))
437			continue;
438		err = intel_bts_synth_branch_sample(btsq, branch);
439		if (err)
440			break;
441	}
442	return err;
443}
444
445static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
446{
447	struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
448	struct auxtrace_queue *queue;
449	struct thread *thread;
450	int err;
451
452	if (btsq->done)
453		return 1;
454
455	if (btsq->pid == -1) {
456		thread = machine__find_thread(btsq->bts->machine, -1,
457					      btsq->tid);
458		if (thread)
459			btsq->pid = thread->pid_;
460	} else {
461		thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
462						 btsq->tid);
463	}
464
465	queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
466
467	if (!buffer)
468		buffer = auxtrace_buffer__next(queue, NULL);
469
470	if (!buffer) {
471		if (!btsq->bts->sampling_mode)
472			btsq->done = 1;
473		err = 1;
474		goto out_put;
475	}
476
477	/* Currently there is no support for split buffers */
478	if (buffer->consecutive) {
479		err = -EINVAL;
480		goto out_put;
481	}
482
483	if (!buffer->data) {
484		int fd = perf_data__fd(btsq->bts->session->data);
485
486		buffer->data = auxtrace_buffer__get_data(buffer, fd);
487		if (!buffer->data) {
488			err = -ENOMEM;
489			goto out_put;
490		}
491	}
492
493	if (btsq->bts->snapshot_mode && !buffer->consecutive &&
494	    intel_bts_do_fix_overlap(queue, buffer)) {
495		err = -ENOMEM;
496		goto out_put;
497	}
498
499	if (!btsq->bts->synth_opts.callchain &&
500	    !btsq->bts->synth_opts.thread_stack && thread &&
501	    (!old_buffer || btsq->bts->sampling_mode ||
502	     (btsq->bts->snapshot_mode && !buffer->consecutive)))
503		thread_stack__set_trace_nr(thread, btsq->cpu, buffer->buffer_nr + 1);
504
505	err = intel_bts_process_buffer(btsq, buffer, thread);
506
507	auxtrace_buffer__drop_data(buffer);
508
509	btsq->buffer = auxtrace_buffer__next(queue, buffer);
510	if (btsq->buffer) {
511		if (timestamp)
512			*timestamp = btsq->buffer->reference;
513	} else {
514		if (!btsq->bts->sampling_mode)
515			btsq->done = 1;
516	}
517out_put:
518	thread__put(thread);
519	return err;
520}
521
522static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
523{
524	u64 ts = 0;
525	int ret;
526
527	while (1) {
528		ret = intel_bts_process_queue(btsq, &ts);
529		if (ret < 0)
530			return ret;
531		if (ret)
532			break;
533	}
534	return 0;
535}
536
537static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
538{
539	struct auxtrace_queues *queues = &bts->queues;
540	unsigned int i;
541
542	for (i = 0; i < queues->nr_queues; i++) {
543		struct auxtrace_queue *queue = &bts->queues.queue_array[i];
544		struct intel_bts_queue *btsq = queue->priv;
545
546		if (btsq && btsq->tid == tid)
547			return intel_bts_flush_queue(btsq);
548	}
549	return 0;
550}
551
552static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
553{
554	while (1) {
555		unsigned int queue_nr;
556		struct auxtrace_queue *queue;
557		struct intel_bts_queue *btsq;
558		u64 ts = 0;
559		int ret;
560
561		if (!bts->heap.heap_cnt)
562			return 0;
563
564		if (bts->heap.heap_array[0].ordinal > timestamp)
565			return 0;
566
567		queue_nr = bts->heap.heap_array[0].queue_nr;
568		queue = &bts->queues.queue_array[queue_nr];
569		btsq = queue->priv;
570
571		auxtrace_heap__pop(&bts->heap);
572
573		ret = intel_bts_process_queue(btsq, &ts);
574		if (ret < 0) {
575			auxtrace_heap__add(&bts->heap, queue_nr, ts);
576			return ret;
577		}
578
579		if (!ret) {
580			ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
581			if (ret < 0)
582				return ret;
583		} else {
584			btsq->on_heap = false;
585		}
586	}
587
588	return 0;
589}
590
591static int intel_bts_process_event(struct perf_session *session,
592				   union perf_event *event,
593				   struct perf_sample *sample,
594				   struct perf_tool *tool)
595{
596	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
597					     auxtrace);
598	u64 timestamp;
599	int err;
600
601	if (dump_trace)
602		return 0;
603
604	if (!tool->ordered_events) {
605		pr_err("Intel BTS requires ordered events\n");
606		return -EINVAL;
607	}
608
609	if (sample->time && sample->time != (u64)-1)
610		timestamp = perf_time_to_tsc(sample->time, &bts->tc);
611	else
612		timestamp = 0;
613
614	err = intel_bts_update_queues(bts);
615	if (err)
616		return err;
617
618	err = intel_bts_process_queues(bts, timestamp);
619	if (err)
620		return err;
621	if (event->header.type == PERF_RECORD_EXIT) {
622		err = intel_bts_process_tid_exit(bts, event->fork.tid);
623		if (err)
624			return err;
625	}
626
627	if (event->header.type == PERF_RECORD_AUX &&
628	    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
629	    bts->synth_opts.errors)
630		err = intel_bts_lost(bts, sample);
631
632	return err;
633}
634
635static int intel_bts_process_auxtrace_event(struct perf_session *session,
636					    union perf_event *event,
637					    struct perf_tool *tool __maybe_unused)
638{
639	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
640					     auxtrace);
641
642	if (bts->sampling_mode)
643		return 0;
644
645	if (!bts->data_queued) {
646		struct auxtrace_buffer *buffer;
647		off_t data_offset;
648		int fd = perf_data__fd(session->data);
649		int err;
650
651		if (perf_data__is_pipe(session->data)) {
652			data_offset = 0;
653		} else {
654			data_offset = lseek(fd, 0, SEEK_CUR);
655			if (data_offset == -1)
656				return -errno;
657		}
658
659		err = auxtrace_queues__add_event(&bts->queues, session, event,
660						 data_offset, &buffer);
661		if (err)
662			return err;
663
664		/* Dump here now we have copied a piped trace out of the pipe */
665		if (dump_trace) {
666			if (auxtrace_buffer__get_data(buffer, fd)) {
667				intel_bts_dump_event(bts, buffer->data,
668						     buffer->size);
669				auxtrace_buffer__put_data(buffer);
670			}
671		}
672	}
673
674	return 0;
675}
676
677static int intel_bts_flush(struct perf_session *session,
678			   struct perf_tool *tool __maybe_unused)
679{
680	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
681					     auxtrace);
682	int ret;
683
684	if (dump_trace || bts->sampling_mode)
685		return 0;
686
687	if (!tool->ordered_events)
688		return -EINVAL;
689
690	ret = intel_bts_update_queues(bts);
691	if (ret < 0)
692		return ret;
693
694	return intel_bts_process_queues(bts, MAX_TIMESTAMP);
695}
696
697static void intel_bts_free_queue(void *priv)
698{
699	struct intel_bts_queue *btsq = priv;
700
701	if (!btsq)
702		return;
703	free(btsq);
704}
705
706static void intel_bts_free_events(struct perf_session *session)
707{
708	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
709					     auxtrace);
710	struct auxtrace_queues *queues = &bts->queues;
711	unsigned int i;
712
713	for (i = 0; i < queues->nr_queues; i++) {
714		intel_bts_free_queue(queues->queue_array[i].priv);
715		queues->queue_array[i].priv = NULL;
716	}
717	auxtrace_queues__free(queues);
718}
719
720static void intel_bts_free(struct perf_session *session)
721{
722	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
723					     auxtrace);
724
725	auxtrace_heap__free(&bts->heap);
726	intel_bts_free_events(session);
727	session->auxtrace = NULL;
728	free(bts);
729}
730
731struct intel_bts_synth {
732	struct perf_tool dummy_tool;
733	struct perf_session *session;
734};
735
736static int intel_bts_event_synth(struct perf_tool *tool,
737				 union perf_event *event,
738				 struct perf_sample *sample __maybe_unused,
739				 struct machine *machine __maybe_unused)
740{
741	struct intel_bts_synth *intel_bts_synth =
742			container_of(tool, struct intel_bts_synth, dummy_tool);
743
744	return perf_session__deliver_synth_event(intel_bts_synth->session,
745						 event, NULL);
746}
747
748static int intel_bts_synth_event(struct perf_session *session,
749				 struct perf_event_attr *attr, u64 id)
750{
751	struct intel_bts_synth intel_bts_synth;
752
753	memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
754	intel_bts_synth.session = session;
755
756	return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
757					   &id, intel_bts_event_synth);
758}
759
760static int intel_bts_synth_events(struct intel_bts *bts,
761				  struct perf_session *session)
762{
763	struct evlist *evlist = session->evlist;
764	struct evsel *evsel;
765	struct perf_event_attr attr;
766	bool found = false;
767	u64 id;
768	int err;
769
770	evlist__for_each_entry(evlist, evsel) {
771		if (evsel->core.attr.type == bts->pmu_type && evsel->core.ids) {
772			found = true;
773			break;
774		}
775	}
776
777	if (!found) {
778		pr_debug("There are no selected events with Intel BTS data\n");
779		return 0;
780	}
781
782	memset(&attr, 0, sizeof(struct perf_event_attr));
783	attr.size = sizeof(struct perf_event_attr);
784	attr.type = PERF_TYPE_HARDWARE;
785	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
786	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
787			    PERF_SAMPLE_PERIOD;
788	attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
789	attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
790	attr.exclude_user = evsel->core.attr.exclude_user;
791	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
792	attr.exclude_hv = evsel->core.attr.exclude_hv;
793	attr.exclude_host = evsel->core.attr.exclude_host;
794	attr.exclude_guest = evsel->core.attr.exclude_guest;
795	attr.sample_id_all = evsel->core.attr.sample_id_all;
796	attr.read_format = evsel->core.attr.read_format;
797
798	id = evsel->core.id[0] + 1000000000;
799	if (!id)
800		id = 1;
801
802	if (bts->synth_opts.branches) {
803		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
804		attr.sample_period = 1;
805		attr.sample_type |= PERF_SAMPLE_ADDR;
806		pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
807			 id, (u64)attr.sample_type);
808		err = intel_bts_synth_event(session, &attr, id);
809		if (err) {
810			pr_err("%s: failed to synthesize 'branches' event type\n",
811			       __func__);
812			return err;
813		}
814		bts->sample_branches = true;
815		bts->branches_sample_type = attr.sample_type;
816		bts->branches_id = id;
817		/*
818		 * We only use sample types from PERF_SAMPLE_MASK so we can use
819		 * __perf_evsel__sample_size() here.
820		 */
821		bts->branches_event_size = sizeof(struct perf_record_sample) +
822				__perf_evsel__sample_size(attr.sample_type);
823	}
824
 
 
825	return 0;
826}
827
828static const char * const intel_bts_info_fmts[] = {
829	[INTEL_BTS_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
830	[INTEL_BTS_TIME_SHIFT]		= "  Time Shift         %"PRIu64"\n",
831	[INTEL_BTS_TIME_MULT]		= "  Time Muliplier     %"PRIu64"\n",
832	[INTEL_BTS_TIME_ZERO]		= "  Time Zero          %"PRIu64"\n",
833	[INTEL_BTS_CAP_USER_TIME_ZERO]	= "  Cap Time Zero      %"PRId64"\n",
834	[INTEL_BTS_SNAPSHOT_MODE]	= "  Snapshot mode      %"PRId64"\n",
835};
836
837static void intel_bts_print_info(__u64 *arr, int start, int finish)
838{
839	int i;
840
841	if (!dump_trace)
842		return;
843
844	for (i = start; i <= finish; i++)
845		fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
846}
847
 
 
848int intel_bts_process_auxtrace_info(union perf_event *event,
849				    struct perf_session *session)
850{
851	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
852	size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
853	struct intel_bts *bts;
854	int err;
855
856	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
857					min_sz)
858		return -EINVAL;
859
860	bts = zalloc(sizeof(struct intel_bts));
861	if (!bts)
862		return -ENOMEM;
863
864	err = auxtrace_queues__init(&bts->queues);
865	if (err)
866		goto err_free;
867
868	bts->session = session;
869	bts->machine = &session->machines.host; /* No kvm support */
870	bts->auxtrace_type = auxtrace_info->type;
871	bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
872	bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
873	bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
874	bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
875	bts->cap_user_time_zero =
876			auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
877	bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
878
879	bts->sampling_mode = false;
880
881	bts->auxtrace.process_event = intel_bts_process_event;
882	bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
883	bts->auxtrace.flush_events = intel_bts_flush;
884	bts->auxtrace.free_events = intel_bts_free_events;
885	bts->auxtrace.free = intel_bts_free;
886	session->auxtrace = &bts->auxtrace;
887
888	intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
889			     INTEL_BTS_SNAPSHOT_MODE);
890
891	if (dump_trace)
892		return 0;
893
894	if (session->itrace_synth_opts->set) {
895		bts->synth_opts = *session->itrace_synth_opts;
896	} else {
897		itrace_synth_opts__set_default(&bts->synth_opts,
898				session->itrace_synth_opts->default_no_sample);
899		bts->synth_opts.thread_stack =
900				session->itrace_synth_opts->thread_stack;
901	}
902
903	if (bts->synth_opts.calls)
904		bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
905					PERF_IP_FLAG_TRACE_END;
906	if (bts->synth_opts.returns)
907		bts->branches_filter |= PERF_IP_FLAG_RETURN |
908					PERF_IP_FLAG_TRACE_BEGIN;
909
910	err = intel_bts_synth_events(bts, session);
911	if (err)
912		goto err_free_queues;
913
914	err = auxtrace_queues__process_index(&bts->queues, session);
915	if (err)
916		goto err_free_queues;
917
918	if (bts->queues.populated)
919		bts->data_queued = true;
920
921	return 0;
922
923err_free_queues:
924	auxtrace_queues__free(&bts->queues);
925	session->auxtrace = NULL;
926err_free:
927	free(bts);
928	return err;
929}
v4.10.11
 
  1/*
  2 * intel-bts.c: Intel Processor Trace support
  3 * Copyright (c) 2013-2015, Intel Corporation.
  4 *
  5 * This program is free software; you can redistribute it and/or modify it
  6 * under the terms and conditions of the GNU General Public License,
  7 * version 2, as published by the Free Software Foundation.
  8 *
  9 * This program is distributed in the hope it will be useful, but WITHOUT
 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 12 * more details.
 13 *
 14 */
 15
 16#include <endian.h>
 
 17#include <byteswap.h>
 
 18#include <linux/kernel.h>
 19#include <linux/types.h>
 20#include <linux/bitops.h>
 21#include <linux/log2.h>
 
 22
 23#include "cpumap.h"
 24#include "color.h"
 25#include "evsel.h"
 26#include "evlist.h"
 27#include "machine.h"
 
 28#include "session.h"
 29#include "util.h"
 30#include "thread.h"
 31#include "thread-stack.h"
 32#include "debug.h"
 33#include "tsc.h"
 34#include "auxtrace.h"
 35#include "intel-pt-decoder/intel-pt-insn-decoder.h"
 36#include "intel-bts.h"
 
 37
 38#define MAX_TIMESTAMP (~0ULL)
 39
 40#define INTEL_BTS_ERR_NOINSN  5
 41#define INTEL_BTS_ERR_LOST    9
 42
 43#if __BYTE_ORDER == __BIG_ENDIAN
 44#define le64_to_cpu bswap_64
 45#else
 46#define le64_to_cpu
 47#endif
 48
 49struct intel_bts {
 50	struct auxtrace			auxtrace;
 51	struct auxtrace_queues		queues;
 52	struct auxtrace_heap		heap;
 53	u32				auxtrace_type;
 54	struct perf_session		*session;
 55	struct machine			*machine;
 56	bool				sampling_mode;
 57	bool				snapshot_mode;
 58	bool				data_queued;
 59	u32				pmu_type;
 60	struct perf_tsc_conversion	tc;
 61	bool				cap_user_time_zero;
 62	struct itrace_synth_opts	synth_opts;
 63	bool				sample_branches;
 64	u32				branches_filter;
 65	u64				branches_sample_type;
 66	u64				branches_id;
 67	size_t				branches_event_size;
 68	bool				synth_needs_swap;
 69	unsigned long			num_events;
 70};
 71
 72struct intel_bts_queue {
 73	struct intel_bts	*bts;
 74	unsigned int		queue_nr;
 75	struct auxtrace_buffer	*buffer;
 76	bool			on_heap;
 77	bool			done;
 78	pid_t			pid;
 79	pid_t			tid;
 80	int			cpu;
 81	u64			time;
 82	struct intel_pt_insn	intel_pt_insn;
 83	u32			sample_flags;
 84};
 85
 86struct branch {
 87	u64 from;
 88	u64 to;
 89	u64 misc;
 90};
 91
 92static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
 93			   unsigned char *buf, size_t len)
 94{
 95	struct branch *branch;
 96	size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
 97	const char *color = PERF_COLOR_BLUE;
 98
 99	color_fprintf(stdout, color,
100		      ". ... Intel BTS data: size %zu bytes\n",
101		      len);
102
103	while (len) {
104		if (len >= br_sz)
105			sz = br_sz;
106		else
107			sz = len;
108		printf(".");
109		color_fprintf(stdout, color, "  %08x: ", pos);
110		for (i = 0; i < sz; i++)
111			color_fprintf(stdout, color, " %02x", buf[i]);
112		for (; i < br_sz; i++)
113			color_fprintf(stdout, color, "   ");
114		if (len >= br_sz) {
115			branch = (struct branch *)buf;
116			color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
117				      le64_to_cpu(branch->from),
118				      le64_to_cpu(branch->to),
119				      le64_to_cpu(branch->misc) & 0x10 ?
120							"pred" : "miss");
121		} else {
122			color_fprintf(stdout, color, " Bad record!\n");
123		}
124		pos += sz;
125		buf += sz;
126		len -= sz;
127	}
128}
129
130static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
131				 size_t len)
132{
133	printf(".\n");
134	intel_bts_dump(bts, buf, len);
135}
136
137static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
138{
139	union perf_event event;
140	int err;
141
142	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
143			     INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
144			     sample->tid, 0, "Lost trace data");
145
146	err = perf_session__deliver_synth_event(bts->session, &event, NULL);
147	if (err)
148		pr_err("Intel BTS: failed to deliver error event, error %d\n",
149		       err);
150
151	return err;
152}
153
154static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
155						     unsigned int queue_nr)
156{
157	struct intel_bts_queue *btsq;
158
159	btsq = zalloc(sizeof(struct intel_bts_queue));
160	if (!btsq)
161		return NULL;
162
163	btsq->bts = bts;
164	btsq->queue_nr = queue_nr;
165	btsq->pid = -1;
166	btsq->tid = -1;
167	btsq->cpu = -1;
168
169	return btsq;
170}
171
172static int intel_bts_setup_queue(struct intel_bts *bts,
173				 struct auxtrace_queue *queue,
174				 unsigned int queue_nr)
175{
176	struct intel_bts_queue *btsq = queue->priv;
177
178	if (list_empty(&queue->head))
179		return 0;
180
181	if (!btsq) {
182		btsq = intel_bts_alloc_queue(bts, queue_nr);
183		if (!btsq)
184			return -ENOMEM;
185		queue->priv = btsq;
186
187		if (queue->cpu != -1)
188			btsq->cpu = queue->cpu;
189		btsq->tid = queue->tid;
190	}
191
192	if (bts->sampling_mode)
193		return 0;
194
195	if (!btsq->on_heap && !btsq->buffer) {
196		int ret;
197
198		btsq->buffer = auxtrace_buffer__next(queue, NULL);
199		if (!btsq->buffer)
200			return 0;
201
202		ret = auxtrace_heap__add(&bts->heap, queue_nr,
203					 btsq->buffer->reference);
204		if (ret)
205			return ret;
206		btsq->on_heap = true;
207	}
208
209	return 0;
210}
211
212static int intel_bts_setup_queues(struct intel_bts *bts)
213{
214	unsigned int i;
215	int ret;
216
217	for (i = 0; i < bts->queues.nr_queues; i++) {
218		ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
219					    i);
220		if (ret)
221			return ret;
222	}
223	return 0;
224}
225
226static inline int intel_bts_update_queues(struct intel_bts *bts)
227{
228	if (bts->queues.new_data) {
229		bts->queues.new_data = false;
230		return intel_bts_setup_queues(bts);
231	}
232	return 0;
233}
234
235static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
236					     unsigned char *buf_b, size_t len_b)
237{
238	size_t offs, len;
239
240	if (len_a > len_b)
241		offs = len_a - len_b;
242	else
243		offs = 0;
244
245	for (; offs < len_a; offs += sizeof(struct branch)) {
246		len = len_a - offs;
247		if (!memcmp(buf_a + offs, buf_b, len))
248			return buf_b + len;
249	}
250
251	return buf_b;
252}
253
254static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
255				    struct auxtrace_buffer *b)
256{
257	struct auxtrace_buffer *a;
258	void *start;
259
260	if (b->list.prev == &queue->head)
261		return 0;
262	a = list_entry(b->list.prev, struct auxtrace_buffer, list);
263	start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
264	if (!start)
265		return -EINVAL;
266	b->use_size = b->data + b->size - start;
267	b->use_data = start;
268	return 0;
269}
270
 
 
 
 
 
 
 
271static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
272					 struct branch *branch)
273{
274	int ret;
275	struct intel_bts *bts = btsq->bts;
276	union perf_event event;
277	struct perf_sample sample = { .ip = 0, };
278
279	if (bts->synth_opts.initial_skip &&
280	    bts->num_events++ <= bts->synth_opts.initial_skip)
281		return 0;
282
283	event.sample.header.type = PERF_RECORD_SAMPLE;
284	event.sample.header.misc = PERF_RECORD_MISC_USER;
285	event.sample.header.size = sizeof(struct perf_event_header);
286
287	sample.cpumode = PERF_RECORD_MISC_USER;
288	sample.ip = le64_to_cpu(branch->from);
 
289	sample.pid = btsq->pid;
290	sample.tid = btsq->tid;
291	sample.addr = le64_to_cpu(branch->to);
292	sample.id = btsq->bts->branches_id;
293	sample.stream_id = btsq->bts->branches_id;
294	sample.period = 1;
295	sample.cpu = btsq->cpu;
296	sample.flags = btsq->sample_flags;
297	sample.insn_len = btsq->intel_pt_insn.length;
298	memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
299
 
 
 
 
300	if (bts->synth_opts.inject) {
301		event.sample.header.size = bts->branches_event_size;
302		ret = perf_event__synthesize_sample(&event,
303						    bts->branches_sample_type,
304						    0, &sample,
305						    bts->synth_needs_swap);
306		if (ret)
307			return ret;
308	}
309
310	ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
311	if (ret)
312		pr_err("Intel BTS: failed to deliver branch event, error %d\n",
313		       ret);
314
315	return ret;
316}
317
318static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
319{
320	struct machine *machine = btsq->bts->machine;
321	struct thread *thread;
322	struct addr_location al;
323	unsigned char buf[INTEL_PT_INSN_BUF_SZ];
324	ssize_t len;
325	int x86_64;
326	uint8_t cpumode;
327	int err = -1;
328
329	if (machine__kernel_ip(machine, ip))
330		cpumode = PERF_RECORD_MISC_KERNEL;
331	else
332		cpumode = PERF_RECORD_MISC_USER;
333
334	thread = machine__find_thread(machine, -1, btsq->tid);
335	if (!thread)
336		return -1;
337
338	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
339	if (!al.map || !al.map->dso)
340		goto out_put;
341
342	len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf,
343				  INTEL_PT_INSN_BUF_SZ);
344	if (len <= 0)
345		goto out_put;
346
347	/* Load maps to ensure dso->is_64_bit has been updated */
348	map__load(al.map);
349
350	x86_64 = al.map->dso->is_64_bit;
351
352	if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
353		goto out_put;
354
355	err = 0;
356out_put:
357	thread__put(thread);
358	return err;
359}
360
361static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
362				 pid_t tid, u64 ip)
363{
364	union perf_event event;
365	int err;
366
367	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
368			     INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
369			     "Failed to get instruction");
370
371	err = perf_session__deliver_synth_event(bts->session, &event, NULL);
372	if (err)
373		pr_err("Intel BTS: failed to deliver error event, error %d\n",
374		       err);
375
376	return err;
377}
378
379static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
380				     struct branch *branch)
381{
382	int err;
383
384	if (!branch->from) {
385		if (branch->to)
386			btsq->sample_flags = PERF_IP_FLAG_BRANCH |
387					     PERF_IP_FLAG_TRACE_BEGIN;
388		else
389			btsq->sample_flags = 0;
390		btsq->intel_pt_insn.length = 0;
391	} else if (!branch->to) {
392		btsq->sample_flags = PERF_IP_FLAG_BRANCH |
393				     PERF_IP_FLAG_TRACE_END;
394		btsq->intel_pt_insn.length = 0;
395	} else {
396		err = intel_bts_get_next_insn(btsq, branch->from);
397		if (err) {
398			btsq->sample_flags = 0;
399			btsq->intel_pt_insn.length = 0;
400			if (!btsq->bts->synth_opts.errors)
401				return 0;
402			err = intel_bts_synth_error(btsq->bts, btsq->cpu,
403						    btsq->pid, btsq->tid,
404						    branch->from);
405			return err;
406		}
407		btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
408		/* Check for an async branch into the kernel */
409		if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
410		    machine__kernel_ip(btsq->bts->machine, branch->to) &&
411		    btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
412					   PERF_IP_FLAG_CALL |
413					   PERF_IP_FLAG_SYSCALLRET))
414			btsq->sample_flags = PERF_IP_FLAG_BRANCH |
415					     PERF_IP_FLAG_CALL |
416					     PERF_IP_FLAG_ASYNC |
417					     PERF_IP_FLAG_INTERRUPT;
418	}
419
420	return 0;
421}
422
423static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
424				    struct auxtrace_buffer *buffer,
425				    struct thread *thread)
426{
427	struct branch *branch;
428	size_t sz, bsz = sizeof(struct branch);
429	u32 filter = btsq->bts->branches_filter;
430	int err = 0;
431
432	if (buffer->use_data) {
433		sz = buffer->use_size;
434		branch = buffer->use_data;
435	} else {
436		sz = buffer->size;
437		branch = buffer->data;
438	}
439
440	if (!btsq->bts->sample_branches)
441		return 0;
442
443	for (; sz > bsz; branch += 1, sz -= bsz) {
444		if (!branch->from && !branch->to)
445			continue;
446		intel_bts_get_branch_type(btsq, branch);
447		if (btsq->bts->synth_opts.thread_stack)
448			thread_stack__event(thread, btsq->sample_flags,
449					    le64_to_cpu(branch->from),
450					    le64_to_cpu(branch->to),
451					    btsq->intel_pt_insn.length,
452					    buffer->buffer_nr + 1);
453		if (filter && !(filter & btsq->sample_flags))
454			continue;
455		err = intel_bts_synth_branch_sample(btsq, branch);
456		if (err)
457			break;
458	}
459	return err;
460}
461
462static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
463{
464	struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
465	struct auxtrace_queue *queue;
466	struct thread *thread;
467	int err;
468
469	if (btsq->done)
470		return 1;
471
472	if (btsq->pid == -1) {
473		thread = machine__find_thread(btsq->bts->machine, -1,
474					      btsq->tid);
475		if (thread)
476			btsq->pid = thread->pid_;
477	} else {
478		thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
479						 btsq->tid);
480	}
481
482	queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
483
484	if (!buffer)
485		buffer = auxtrace_buffer__next(queue, NULL);
486
487	if (!buffer) {
488		if (!btsq->bts->sampling_mode)
489			btsq->done = 1;
490		err = 1;
491		goto out_put;
492	}
493
494	/* Currently there is no support for split buffers */
495	if (buffer->consecutive) {
496		err = -EINVAL;
497		goto out_put;
498	}
499
500	if (!buffer->data) {
501		int fd = perf_data_file__fd(btsq->bts->session->file);
502
503		buffer->data = auxtrace_buffer__get_data(buffer, fd);
504		if (!buffer->data) {
505			err = -ENOMEM;
506			goto out_put;
507		}
508	}
509
510	if (btsq->bts->snapshot_mode && !buffer->consecutive &&
511	    intel_bts_do_fix_overlap(queue, buffer)) {
512		err = -ENOMEM;
513		goto out_put;
514	}
515
516	if (!btsq->bts->synth_opts.callchain &&
517	    !btsq->bts->synth_opts.thread_stack && thread &&
518	    (!old_buffer || btsq->bts->sampling_mode ||
519	     (btsq->bts->snapshot_mode && !buffer->consecutive)))
520		thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
521
522	err = intel_bts_process_buffer(btsq, buffer, thread);
523
524	auxtrace_buffer__drop_data(buffer);
525
526	btsq->buffer = auxtrace_buffer__next(queue, buffer);
527	if (btsq->buffer) {
528		if (timestamp)
529			*timestamp = btsq->buffer->reference;
530	} else {
531		if (!btsq->bts->sampling_mode)
532			btsq->done = 1;
533	}
534out_put:
535	thread__put(thread);
536	return err;
537}
538
539static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
540{
541	u64 ts = 0;
542	int ret;
543
544	while (1) {
545		ret = intel_bts_process_queue(btsq, &ts);
546		if (ret < 0)
547			return ret;
548		if (ret)
549			break;
550	}
551	return 0;
552}
553
554static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
555{
556	struct auxtrace_queues *queues = &bts->queues;
557	unsigned int i;
558
559	for (i = 0; i < queues->nr_queues; i++) {
560		struct auxtrace_queue *queue = &bts->queues.queue_array[i];
561		struct intel_bts_queue *btsq = queue->priv;
562
563		if (btsq && btsq->tid == tid)
564			return intel_bts_flush_queue(btsq);
565	}
566	return 0;
567}
568
569static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
570{
571	while (1) {
572		unsigned int queue_nr;
573		struct auxtrace_queue *queue;
574		struct intel_bts_queue *btsq;
575		u64 ts = 0;
576		int ret;
577
578		if (!bts->heap.heap_cnt)
579			return 0;
580
581		if (bts->heap.heap_array[0].ordinal > timestamp)
582			return 0;
583
584		queue_nr = bts->heap.heap_array[0].queue_nr;
585		queue = &bts->queues.queue_array[queue_nr];
586		btsq = queue->priv;
587
588		auxtrace_heap__pop(&bts->heap);
589
590		ret = intel_bts_process_queue(btsq, &ts);
591		if (ret < 0) {
592			auxtrace_heap__add(&bts->heap, queue_nr, ts);
593			return ret;
594		}
595
596		if (!ret) {
597			ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
598			if (ret < 0)
599				return ret;
600		} else {
601			btsq->on_heap = false;
602		}
603	}
604
605	return 0;
606}
607
608static int intel_bts_process_event(struct perf_session *session,
609				   union perf_event *event,
610				   struct perf_sample *sample,
611				   struct perf_tool *tool)
612{
613	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
614					     auxtrace);
615	u64 timestamp;
616	int err;
617
618	if (dump_trace)
619		return 0;
620
621	if (!tool->ordered_events) {
622		pr_err("Intel BTS requires ordered events\n");
623		return -EINVAL;
624	}
625
626	if (sample->time && sample->time != (u64)-1)
627		timestamp = perf_time_to_tsc(sample->time, &bts->tc);
628	else
629		timestamp = 0;
630
631	err = intel_bts_update_queues(bts);
632	if (err)
633		return err;
634
635	err = intel_bts_process_queues(bts, timestamp);
636	if (err)
637		return err;
638	if (event->header.type == PERF_RECORD_EXIT) {
639		err = intel_bts_process_tid_exit(bts, event->fork.tid);
640		if (err)
641			return err;
642	}
643
644	if (event->header.type == PERF_RECORD_AUX &&
645	    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
646	    bts->synth_opts.errors)
647		err = intel_bts_lost(bts, sample);
648
649	return err;
650}
651
652static int intel_bts_process_auxtrace_event(struct perf_session *session,
653					    union perf_event *event,
654					    struct perf_tool *tool __maybe_unused)
655{
656	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
657					     auxtrace);
658
659	if (bts->sampling_mode)
660		return 0;
661
662	if (!bts->data_queued) {
663		struct auxtrace_buffer *buffer;
664		off_t data_offset;
665		int fd = perf_data_file__fd(session->file);
666		int err;
667
668		if (perf_data_file__is_pipe(session->file)) {
669			data_offset = 0;
670		} else {
671			data_offset = lseek(fd, 0, SEEK_CUR);
672			if (data_offset == -1)
673				return -errno;
674		}
675
676		err = auxtrace_queues__add_event(&bts->queues, session, event,
677						 data_offset, &buffer);
678		if (err)
679			return err;
680
681		/* Dump here now we have copied a piped trace out of the pipe */
682		if (dump_trace) {
683			if (auxtrace_buffer__get_data(buffer, fd)) {
684				intel_bts_dump_event(bts, buffer->data,
685						     buffer->size);
686				auxtrace_buffer__put_data(buffer);
687			}
688		}
689	}
690
691	return 0;
692}
693
694static int intel_bts_flush(struct perf_session *session,
695			   struct perf_tool *tool __maybe_unused)
696{
697	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
698					     auxtrace);
699	int ret;
700
701	if (dump_trace || bts->sampling_mode)
702		return 0;
703
704	if (!tool->ordered_events)
705		return -EINVAL;
706
707	ret = intel_bts_update_queues(bts);
708	if (ret < 0)
709		return ret;
710
711	return intel_bts_process_queues(bts, MAX_TIMESTAMP);
712}
713
714static void intel_bts_free_queue(void *priv)
715{
716	struct intel_bts_queue *btsq = priv;
717
718	if (!btsq)
719		return;
720	free(btsq);
721}
722
723static void intel_bts_free_events(struct perf_session *session)
724{
725	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
726					     auxtrace);
727	struct auxtrace_queues *queues = &bts->queues;
728	unsigned int i;
729
730	for (i = 0; i < queues->nr_queues; i++) {
731		intel_bts_free_queue(queues->queue_array[i].priv);
732		queues->queue_array[i].priv = NULL;
733	}
734	auxtrace_queues__free(queues);
735}
736
737static void intel_bts_free(struct perf_session *session)
738{
739	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
740					     auxtrace);
741
742	auxtrace_heap__free(&bts->heap);
743	intel_bts_free_events(session);
744	session->auxtrace = NULL;
745	free(bts);
746}
747
748struct intel_bts_synth {
749	struct perf_tool dummy_tool;
750	struct perf_session *session;
751};
752
753static int intel_bts_event_synth(struct perf_tool *tool,
754				 union perf_event *event,
755				 struct perf_sample *sample __maybe_unused,
756				 struct machine *machine __maybe_unused)
757{
758	struct intel_bts_synth *intel_bts_synth =
759			container_of(tool, struct intel_bts_synth, dummy_tool);
760
761	return perf_session__deliver_synth_event(intel_bts_synth->session,
762						 event, NULL);
763}
764
765static int intel_bts_synth_event(struct perf_session *session,
766				 struct perf_event_attr *attr, u64 id)
767{
768	struct intel_bts_synth intel_bts_synth;
769
770	memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
771	intel_bts_synth.session = session;
772
773	return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
774					   &id, intel_bts_event_synth);
775}
776
777static int intel_bts_synth_events(struct intel_bts *bts,
778				  struct perf_session *session)
779{
780	struct perf_evlist *evlist = session->evlist;
781	struct perf_evsel *evsel;
782	struct perf_event_attr attr;
783	bool found = false;
784	u64 id;
785	int err;
786
787	evlist__for_each_entry(evlist, evsel) {
788		if (evsel->attr.type == bts->pmu_type && evsel->ids) {
789			found = true;
790			break;
791		}
792	}
793
794	if (!found) {
795		pr_debug("There are no selected events with Intel BTS data\n");
796		return 0;
797	}
798
799	memset(&attr, 0, sizeof(struct perf_event_attr));
800	attr.size = sizeof(struct perf_event_attr);
801	attr.type = PERF_TYPE_HARDWARE;
802	attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
803	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
804			    PERF_SAMPLE_PERIOD;
805	attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
806	attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
807	attr.exclude_user = evsel->attr.exclude_user;
808	attr.exclude_kernel = evsel->attr.exclude_kernel;
809	attr.exclude_hv = evsel->attr.exclude_hv;
810	attr.exclude_host = evsel->attr.exclude_host;
811	attr.exclude_guest = evsel->attr.exclude_guest;
812	attr.sample_id_all = evsel->attr.sample_id_all;
813	attr.read_format = evsel->attr.read_format;
814
815	id = evsel->id[0] + 1000000000;
816	if (!id)
817		id = 1;
818
819	if (bts->synth_opts.branches) {
820		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
821		attr.sample_period = 1;
822		attr.sample_type |= PERF_SAMPLE_ADDR;
823		pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
824			 id, (u64)attr.sample_type);
825		err = intel_bts_synth_event(session, &attr, id);
826		if (err) {
827			pr_err("%s: failed to synthesize 'branches' event type\n",
828			       __func__);
829			return err;
830		}
831		bts->sample_branches = true;
832		bts->branches_sample_type = attr.sample_type;
833		bts->branches_id = id;
834		/*
835		 * We only use sample types from PERF_SAMPLE_MASK so we can use
836		 * __perf_evsel__sample_size() here.
837		 */
838		bts->branches_event_size = sizeof(struct sample_event) +
839				__perf_evsel__sample_size(attr.sample_type);
840	}
841
842	bts->synth_needs_swap = evsel->needs_swap;
843
844	return 0;
845}
846
847static const char * const intel_bts_info_fmts[] = {
848	[INTEL_BTS_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
849	[INTEL_BTS_TIME_SHIFT]		= "  Time Shift         %"PRIu64"\n",
850	[INTEL_BTS_TIME_MULT]		= "  Time Muliplier     %"PRIu64"\n",
851	[INTEL_BTS_TIME_ZERO]		= "  Time Zero          %"PRIu64"\n",
852	[INTEL_BTS_CAP_USER_TIME_ZERO]	= "  Cap Time Zero      %"PRId64"\n",
853	[INTEL_BTS_SNAPSHOT_MODE]	= "  Snapshot mode      %"PRId64"\n",
854};
855
856static void intel_bts_print_info(u64 *arr, int start, int finish)
857{
858	int i;
859
860	if (!dump_trace)
861		return;
862
863	for (i = start; i <= finish; i++)
864		fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
865}
866
867u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE];
868
869int intel_bts_process_auxtrace_info(union perf_event *event,
870				    struct perf_session *session)
871{
872	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
873	size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
874	struct intel_bts *bts;
875	int err;
876
877	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
878					min_sz)
879		return -EINVAL;
880
881	bts = zalloc(sizeof(struct intel_bts));
882	if (!bts)
883		return -ENOMEM;
884
885	err = auxtrace_queues__init(&bts->queues);
886	if (err)
887		goto err_free;
888
889	bts->session = session;
890	bts->machine = &session->machines.host; /* No kvm support */
891	bts->auxtrace_type = auxtrace_info->type;
892	bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
893	bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
894	bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
895	bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
896	bts->cap_user_time_zero =
897			auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
898	bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
899
900	bts->sampling_mode = false;
901
902	bts->auxtrace.process_event = intel_bts_process_event;
903	bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
904	bts->auxtrace.flush_events = intel_bts_flush;
905	bts->auxtrace.free_events = intel_bts_free_events;
906	bts->auxtrace.free = intel_bts_free;
907	session->auxtrace = &bts->auxtrace;
908
909	intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
910			     INTEL_BTS_SNAPSHOT_MODE);
911
912	if (dump_trace)
913		return 0;
914
915	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
916		bts->synth_opts = *session->itrace_synth_opts;
917	} else {
918		itrace_synth_opts__set_default(&bts->synth_opts);
919		if (session->itrace_synth_opts)
920			bts->synth_opts.thread_stack =
921				session->itrace_synth_opts->thread_stack;
922	}
923
924	if (bts->synth_opts.calls)
925		bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
926					PERF_IP_FLAG_TRACE_END;
927	if (bts->synth_opts.returns)
928		bts->branches_filter |= PERF_IP_FLAG_RETURN |
929					PERF_IP_FLAG_TRACE_BEGIN;
930
931	err = intel_bts_synth_events(bts, session);
932	if (err)
933		goto err_free_queues;
934
935	err = auxtrace_queues__process_index(&bts->queues, session);
936	if (err)
937		goto err_free_queues;
938
939	if (bts->queues.populated)
940		bts->data_queued = true;
941
942	return 0;
943
944err_free_queues:
945	auxtrace_queues__free(&bts->queues);
946	session->auxtrace = NULL;
947err_free:
948	free(bts);
949	return err;
950}