Linux Audio

Check our new training course

Loading...
v4.17
  1/*
  2 * intel-bts.c: Intel Processor Trace support
  3 * Copyright (c) 2013-2015, Intel Corporation.
  4 *
  5 * This program is free software; you can redistribute it and/or modify it
  6 * under the terms and conditions of the GNU General Public License,
  7 * version 2, as published by the Free Software Foundation.
  8 *
  9 * This program is distributed in the hope it will be useful, but WITHOUT
 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 12 * more details.
 13 *
 14 */
 15
 16#include <endian.h>
 17#include <errno.h>
 18#include <byteswap.h>
 19#include <inttypes.h>
 20#include <linux/kernel.h>
 21#include <linux/types.h>
 22#include <linux/bitops.h>
 23#include <linux/log2.h>
 24
 25#include "cpumap.h"
 26#include "color.h"
 27#include "evsel.h"
 28#include "evlist.h"
 29#include "machine.h"
 30#include "session.h"
 31#include "util.h"
 32#include "thread.h"
 33#include "thread-stack.h"
 34#include "debug.h"
 35#include "tsc.h"
 36#include "auxtrace.h"
 37#include "intel-pt-decoder/intel-pt-insn-decoder.h"
 38#include "intel-bts.h"
 39
 40#define MAX_TIMESTAMP (~0ULL)
 41
 42#define INTEL_BTS_ERR_NOINSN  5
 43#define INTEL_BTS_ERR_LOST    9
 44
 45#if __BYTE_ORDER == __BIG_ENDIAN
 46#define le64_to_cpu bswap_64
 47#else
 48#define le64_to_cpu
 49#endif
 50
 51struct intel_bts {
 52	struct auxtrace			auxtrace;
 53	struct auxtrace_queues		queues;
 54	struct auxtrace_heap		heap;
 55	u32				auxtrace_type;
 56	struct perf_session		*session;
 57	struct machine			*machine;
 58	bool				sampling_mode;
 59	bool				snapshot_mode;
 60	bool				data_queued;
 61	u32				pmu_type;
 62	struct perf_tsc_conversion	tc;
 63	bool				cap_user_time_zero;
 64	struct itrace_synth_opts	synth_opts;
 65	bool				sample_branches;
 66	u32				branches_filter;
 67	u64				branches_sample_type;
 68	u64				branches_id;
 69	size_t				branches_event_size;
 70	unsigned long			num_events;
 71};
 72
 73struct intel_bts_queue {
 74	struct intel_bts	*bts;
 75	unsigned int		queue_nr;
 76	struct auxtrace_buffer	*buffer;
 77	bool			on_heap;
 78	bool			done;
 79	pid_t			pid;
 80	pid_t			tid;
 81	int			cpu;
 82	u64			time;
 83	struct intel_pt_insn	intel_pt_insn;
 84	u32			sample_flags;
 85};
 86
 87struct branch {
 88	u64 from;
 89	u64 to;
 90	u64 misc;
 91};
 92
 93static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
 94			   unsigned char *buf, size_t len)
 95{
 96	struct branch *branch;
 97	size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
 98	const char *color = PERF_COLOR_BLUE;
 99
100	color_fprintf(stdout, color,
101		      ". ... Intel BTS data: size %zu bytes\n",
102		      len);
103
104	while (len) {
105		if (len >= br_sz)
106			sz = br_sz;
107		else
108			sz = len;
109		printf(".");
110		color_fprintf(stdout, color, "  %08x: ", pos);
111		for (i = 0; i < sz; i++)
112			color_fprintf(stdout, color, " %02x", buf[i]);
113		for (; i < br_sz; i++)
114			color_fprintf(stdout, color, "   ");
115		if (len >= br_sz) {
116			branch = (struct branch *)buf;
117			color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
118				      le64_to_cpu(branch->from),
119				      le64_to_cpu(branch->to),
120				      le64_to_cpu(branch->misc) & 0x10 ?
121							"pred" : "miss");
122		} else {
123			color_fprintf(stdout, color, " Bad record!\n");
124		}
125		pos += sz;
126		buf += sz;
127		len -= sz;
128	}
129}
130
131static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
132				 size_t len)
133{
134	printf(".\n");
135	intel_bts_dump(bts, buf, len);
136}
137
138static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
139{
140	union perf_event event;
141	int err;
142
143	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
144			     INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
145			     sample->tid, 0, "Lost trace data");
146
147	err = perf_session__deliver_synth_event(bts->session, &event, NULL);
148	if (err)
149		pr_err("Intel BTS: failed to deliver error event, error %d\n",
150		       err);
151
152	return err;
153}
154
155static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
156						     unsigned int queue_nr)
157{
158	struct intel_bts_queue *btsq;
159
160	btsq = zalloc(sizeof(struct intel_bts_queue));
161	if (!btsq)
162		return NULL;
163
164	btsq->bts = bts;
165	btsq->queue_nr = queue_nr;
166	btsq->pid = -1;
167	btsq->tid = -1;
168	btsq->cpu = -1;
169
170	return btsq;
171}
172
173static int intel_bts_setup_queue(struct intel_bts *bts,
174				 struct auxtrace_queue *queue,
175				 unsigned int queue_nr)
176{
177	struct intel_bts_queue *btsq = queue->priv;
178
179	if (list_empty(&queue->head))
180		return 0;
181
182	if (!btsq) {
183		btsq = intel_bts_alloc_queue(bts, queue_nr);
184		if (!btsq)
185			return -ENOMEM;
186		queue->priv = btsq;
187
188		if (queue->cpu != -1)
189			btsq->cpu = queue->cpu;
190		btsq->tid = queue->tid;
191	}
192
193	if (bts->sampling_mode)
194		return 0;
195
196	if (!btsq->on_heap && !btsq->buffer) {
197		int ret;
198
199		btsq->buffer = auxtrace_buffer__next(queue, NULL);
200		if (!btsq->buffer)
201			return 0;
202
203		ret = auxtrace_heap__add(&bts->heap, queue_nr,
204					 btsq->buffer->reference);
205		if (ret)
206			return ret;
207		btsq->on_heap = true;
208	}
209
210	return 0;
211}
212
213static int intel_bts_setup_queues(struct intel_bts *bts)
214{
215	unsigned int i;
216	int ret;
217
218	for (i = 0; i < bts->queues.nr_queues; i++) {
219		ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
220					    i);
221		if (ret)
222			return ret;
223	}
224	return 0;
225}
226
227static inline int intel_bts_update_queues(struct intel_bts *bts)
228{
229	if (bts->queues.new_data) {
230		bts->queues.new_data = false;
231		return intel_bts_setup_queues(bts);
232	}
233	return 0;
234}
235
236static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
237					     unsigned char *buf_b, size_t len_b)
238{
239	size_t offs, len;
240
241	if (len_a > len_b)
242		offs = len_a - len_b;
243	else
244		offs = 0;
245
246	for (; offs < len_a; offs += sizeof(struct branch)) {
247		len = len_a - offs;
248		if (!memcmp(buf_a + offs, buf_b, len))
249			return buf_b + len;
250	}
251
252	return buf_b;
253}
254
255static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
256				    struct auxtrace_buffer *b)
257{
258	struct auxtrace_buffer *a;
259	void *start;
260
261	if (b->list.prev == &queue->head)
262		return 0;
263	a = list_entry(b->list.prev, struct auxtrace_buffer, list);
264	start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
265	if (!start)
266		return -EINVAL;
267	b->use_size = b->data + b->size - start;
268	b->use_data = start;
269	return 0;
270}
271
272static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
273					 struct branch *branch)
274{
275	int ret;
276	struct intel_bts *bts = btsq->bts;
277	union perf_event event;
278	struct perf_sample sample = { .ip = 0, };
279
280	if (bts->synth_opts.initial_skip &&
281	    bts->num_events++ <= bts->synth_opts.initial_skip)
282		return 0;
283
284	event.sample.header.type = PERF_RECORD_SAMPLE;
285	event.sample.header.misc = PERF_RECORD_MISC_USER;
286	event.sample.header.size = sizeof(struct perf_event_header);
287
288	sample.cpumode = PERF_RECORD_MISC_USER;
289	sample.ip = le64_to_cpu(branch->from);
290	sample.pid = btsq->pid;
291	sample.tid = btsq->tid;
292	sample.addr = le64_to_cpu(branch->to);
293	sample.id = btsq->bts->branches_id;
294	sample.stream_id = btsq->bts->branches_id;
295	sample.period = 1;
296	sample.cpu = btsq->cpu;
297	sample.flags = btsq->sample_flags;
298	sample.insn_len = btsq->intel_pt_insn.length;
299	memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
300
301	if (bts->synth_opts.inject) {
302		event.sample.header.size = bts->branches_event_size;
303		ret = perf_event__synthesize_sample(&event,
304						    bts->branches_sample_type,
305						    0, &sample);
 
306		if (ret)
307			return ret;
308	}
309
310	ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
311	if (ret)
312		pr_err("Intel BTS: failed to deliver branch event, error %d\n",
313		       ret);
314
315	return ret;
316}
317
318static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
319{
320	struct machine *machine = btsq->bts->machine;
321	struct thread *thread;
322	struct addr_location al;
323	unsigned char buf[INTEL_PT_INSN_BUF_SZ];
 
324	ssize_t len;
325	int x86_64;
326	uint8_t cpumode;
327	int err = -1;
328
 
 
329	if (machine__kernel_ip(machine, ip))
330		cpumode = PERF_RECORD_MISC_KERNEL;
331	else
332		cpumode = PERF_RECORD_MISC_USER;
333
334	thread = machine__find_thread(machine, -1, btsq->tid);
335	if (!thread)
336		return -1;
337
338	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
339	if (!al.map || !al.map->dso)
340		goto out_put;
341
342	len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf,
343				  INTEL_PT_INSN_BUF_SZ);
344	if (len <= 0)
345		goto out_put;
346
347	/* Load maps to ensure dso->is_64_bit has been updated */
348	map__load(al.map);
349
350	x86_64 = al.map->dso->is_64_bit;
351
352	if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
353		goto out_put;
354
355	err = 0;
356out_put:
357	thread__put(thread);
358	return err;
359}
360
361static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
362				 pid_t tid, u64 ip)
363{
364	union perf_event event;
365	int err;
366
367	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
368			     INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
369			     "Failed to get instruction");
370
371	err = perf_session__deliver_synth_event(bts->session, &event, NULL);
372	if (err)
373		pr_err("Intel BTS: failed to deliver error event, error %d\n",
374		       err);
375
376	return err;
377}
378
379static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
380				     struct branch *branch)
381{
382	int err;
383
384	if (!branch->from) {
385		if (branch->to)
386			btsq->sample_flags = PERF_IP_FLAG_BRANCH |
387					     PERF_IP_FLAG_TRACE_BEGIN;
388		else
389			btsq->sample_flags = 0;
390		btsq->intel_pt_insn.length = 0;
391	} else if (!branch->to) {
392		btsq->sample_flags = PERF_IP_FLAG_BRANCH |
393				     PERF_IP_FLAG_TRACE_END;
394		btsq->intel_pt_insn.length = 0;
395	} else {
396		err = intel_bts_get_next_insn(btsq, branch->from);
397		if (err) {
398			btsq->sample_flags = 0;
399			btsq->intel_pt_insn.length = 0;
400			if (!btsq->bts->synth_opts.errors)
401				return 0;
402			err = intel_bts_synth_error(btsq->bts, btsq->cpu,
403						    btsq->pid, btsq->tid,
404						    branch->from);
405			return err;
406		}
407		btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
408		/* Check for an async branch into the kernel */
409		if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
410		    machine__kernel_ip(btsq->bts->machine, branch->to) &&
411		    btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
412					   PERF_IP_FLAG_CALL |
413					   PERF_IP_FLAG_SYSCALLRET))
414			btsq->sample_flags = PERF_IP_FLAG_BRANCH |
415					     PERF_IP_FLAG_CALL |
416					     PERF_IP_FLAG_ASYNC |
417					     PERF_IP_FLAG_INTERRUPT;
418	}
419
420	return 0;
421}
422
423static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
424				    struct auxtrace_buffer *buffer,
425				    struct thread *thread)
426{
427	struct branch *branch;
428	size_t sz, bsz = sizeof(struct branch);
429	u32 filter = btsq->bts->branches_filter;
430	int err = 0;
431
432	if (buffer->use_data) {
433		sz = buffer->use_size;
434		branch = buffer->use_data;
435	} else {
436		sz = buffer->size;
437		branch = buffer->data;
438	}
439
440	if (!btsq->bts->sample_branches)
441		return 0;
442
443	for (; sz > bsz; branch += 1, sz -= bsz) {
444		if (!branch->from && !branch->to)
445			continue;
446		intel_bts_get_branch_type(btsq, branch);
447		if (btsq->bts->synth_opts.thread_stack)
448			thread_stack__event(thread, btsq->sample_flags,
449					    le64_to_cpu(branch->from),
450					    le64_to_cpu(branch->to),
451					    btsq->intel_pt_insn.length,
452					    buffer->buffer_nr + 1);
453		if (filter && !(filter & btsq->sample_flags))
454			continue;
455		err = intel_bts_synth_branch_sample(btsq, branch);
456		if (err)
457			break;
458	}
459	return err;
460}
461
462static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
463{
464	struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
465	struct auxtrace_queue *queue;
466	struct thread *thread;
467	int err;
468
469	if (btsq->done)
470		return 1;
471
472	if (btsq->pid == -1) {
473		thread = machine__find_thread(btsq->bts->machine, -1,
474					      btsq->tid);
475		if (thread)
476			btsq->pid = thread->pid_;
477	} else {
478		thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
479						 btsq->tid);
480	}
481
482	queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
483
484	if (!buffer)
485		buffer = auxtrace_buffer__next(queue, NULL);
486
487	if (!buffer) {
488		if (!btsq->bts->sampling_mode)
489			btsq->done = 1;
490		err = 1;
491		goto out_put;
492	}
493
494	/* Currently there is no support for split buffers */
495	if (buffer->consecutive) {
496		err = -EINVAL;
497		goto out_put;
498	}
499
500	if (!buffer->data) {
501		int fd = perf_data__fd(btsq->bts->session->data);
502
503		buffer->data = auxtrace_buffer__get_data(buffer, fd);
504		if (!buffer->data) {
505			err = -ENOMEM;
506			goto out_put;
507		}
508	}
509
510	if (btsq->bts->snapshot_mode && !buffer->consecutive &&
511	    intel_bts_do_fix_overlap(queue, buffer)) {
512		err = -ENOMEM;
513		goto out_put;
514	}
515
516	if (!btsq->bts->synth_opts.callchain &&
517	    !btsq->bts->synth_opts.thread_stack && thread &&
518	    (!old_buffer || btsq->bts->sampling_mode ||
519	     (btsq->bts->snapshot_mode && !buffer->consecutive)))
520		thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
521
522	err = intel_bts_process_buffer(btsq, buffer, thread);
523
524	auxtrace_buffer__drop_data(buffer);
525
526	btsq->buffer = auxtrace_buffer__next(queue, buffer);
527	if (btsq->buffer) {
528		if (timestamp)
529			*timestamp = btsq->buffer->reference;
530	} else {
531		if (!btsq->bts->sampling_mode)
532			btsq->done = 1;
533	}
534out_put:
535	thread__put(thread);
536	return err;
537}
538
539static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
540{
541	u64 ts = 0;
542	int ret;
543
544	while (1) {
545		ret = intel_bts_process_queue(btsq, &ts);
546		if (ret < 0)
547			return ret;
548		if (ret)
549			break;
550	}
551	return 0;
552}
553
554static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
555{
556	struct auxtrace_queues *queues = &bts->queues;
557	unsigned int i;
558
559	for (i = 0; i < queues->nr_queues; i++) {
560		struct auxtrace_queue *queue = &bts->queues.queue_array[i];
561		struct intel_bts_queue *btsq = queue->priv;
562
563		if (btsq && btsq->tid == tid)
564			return intel_bts_flush_queue(btsq);
565	}
566	return 0;
567}
568
569static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
570{
571	while (1) {
572		unsigned int queue_nr;
573		struct auxtrace_queue *queue;
574		struct intel_bts_queue *btsq;
575		u64 ts = 0;
576		int ret;
577
578		if (!bts->heap.heap_cnt)
579			return 0;
580
581		if (bts->heap.heap_array[0].ordinal > timestamp)
582			return 0;
583
584		queue_nr = bts->heap.heap_array[0].queue_nr;
585		queue = &bts->queues.queue_array[queue_nr];
586		btsq = queue->priv;
587
588		auxtrace_heap__pop(&bts->heap);
589
590		ret = intel_bts_process_queue(btsq, &ts);
591		if (ret < 0) {
592			auxtrace_heap__add(&bts->heap, queue_nr, ts);
593			return ret;
594		}
595
596		if (!ret) {
597			ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
598			if (ret < 0)
599				return ret;
600		} else {
601			btsq->on_heap = false;
602		}
603	}
604
605	return 0;
606}
607
608static int intel_bts_process_event(struct perf_session *session,
609				   union perf_event *event,
610				   struct perf_sample *sample,
611				   struct perf_tool *tool)
612{
613	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
614					     auxtrace);
615	u64 timestamp;
616	int err;
617
618	if (dump_trace)
619		return 0;
620
621	if (!tool->ordered_events) {
622		pr_err("Intel BTS requires ordered events\n");
623		return -EINVAL;
624	}
625
626	if (sample->time && sample->time != (u64)-1)
627		timestamp = perf_time_to_tsc(sample->time, &bts->tc);
628	else
629		timestamp = 0;
630
631	err = intel_bts_update_queues(bts);
632	if (err)
633		return err;
634
635	err = intel_bts_process_queues(bts, timestamp);
636	if (err)
637		return err;
638	if (event->header.type == PERF_RECORD_EXIT) {
639		err = intel_bts_process_tid_exit(bts, event->fork.tid);
640		if (err)
641			return err;
642	}
643
644	if (event->header.type == PERF_RECORD_AUX &&
645	    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
646	    bts->synth_opts.errors)
647		err = intel_bts_lost(bts, sample);
648
649	return err;
650}
651
652static int intel_bts_process_auxtrace_event(struct perf_session *session,
653					    union perf_event *event,
654					    struct perf_tool *tool __maybe_unused)
655{
656	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
657					     auxtrace);
658
659	if (bts->sampling_mode)
660		return 0;
661
662	if (!bts->data_queued) {
663		struct auxtrace_buffer *buffer;
664		off_t data_offset;
665		int fd = perf_data__fd(session->data);
666		int err;
667
668		if (perf_data__is_pipe(session->data)) {
669			data_offset = 0;
670		} else {
671			data_offset = lseek(fd, 0, SEEK_CUR);
672			if (data_offset == -1)
673				return -errno;
674		}
675
676		err = auxtrace_queues__add_event(&bts->queues, session, event,
677						 data_offset, &buffer);
678		if (err)
679			return err;
680
681		/* Dump here now we have copied a piped trace out of the pipe */
682		if (dump_trace) {
683			if (auxtrace_buffer__get_data(buffer, fd)) {
684				intel_bts_dump_event(bts, buffer->data,
685						     buffer->size);
686				auxtrace_buffer__put_data(buffer);
687			}
688		}
689	}
690
691	return 0;
692}
693
694static int intel_bts_flush(struct perf_session *session,
695			   struct perf_tool *tool __maybe_unused)
696{
697	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
698					     auxtrace);
699	int ret;
700
701	if (dump_trace || bts->sampling_mode)
702		return 0;
703
704	if (!tool->ordered_events)
705		return -EINVAL;
706
707	ret = intel_bts_update_queues(bts);
708	if (ret < 0)
709		return ret;
710
711	return intel_bts_process_queues(bts, MAX_TIMESTAMP);
712}
713
714static void intel_bts_free_queue(void *priv)
715{
716	struct intel_bts_queue *btsq = priv;
717
718	if (!btsq)
719		return;
720	free(btsq);
721}
722
723static void intel_bts_free_events(struct perf_session *session)
724{
725	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
726					     auxtrace);
727	struct auxtrace_queues *queues = &bts->queues;
728	unsigned int i;
729
730	for (i = 0; i < queues->nr_queues; i++) {
731		intel_bts_free_queue(queues->queue_array[i].priv);
732		queues->queue_array[i].priv = NULL;
733	}
734	auxtrace_queues__free(queues);
735}
736
737static void intel_bts_free(struct perf_session *session)
738{
739	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
740					     auxtrace);
741
742	auxtrace_heap__free(&bts->heap);
743	intel_bts_free_events(session);
744	session->auxtrace = NULL;
745	free(bts);
746}
747
748struct intel_bts_synth {
749	struct perf_tool dummy_tool;
750	struct perf_session *session;
751};
752
753static int intel_bts_event_synth(struct perf_tool *tool,
754				 union perf_event *event,
755				 struct perf_sample *sample __maybe_unused,
756				 struct machine *machine __maybe_unused)
757{
758	struct intel_bts_synth *intel_bts_synth =
759			container_of(tool, struct intel_bts_synth, dummy_tool);
760
761	return perf_session__deliver_synth_event(intel_bts_synth->session,
762						 event, NULL);
763}
764
765static int intel_bts_synth_event(struct perf_session *session,
766				 struct perf_event_attr *attr, u64 id)
767{
768	struct intel_bts_synth intel_bts_synth;
769
770	memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
771	intel_bts_synth.session = session;
772
773	return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
774					   &id, intel_bts_event_synth);
775}
776
777static int intel_bts_synth_events(struct intel_bts *bts,
778				  struct perf_session *session)
779{
780	struct perf_evlist *evlist = session->evlist;
781	struct perf_evsel *evsel;
782	struct perf_event_attr attr;
783	bool found = false;
784	u64 id;
785	int err;
786
787	evlist__for_each_entry(evlist, evsel) {
788		if (evsel->attr.type == bts->pmu_type && evsel->ids) {
789			found = true;
790			break;
791		}
792	}
793
794	if (!found) {
795		pr_debug("There are no selected events with Intel BTS data\n");
796		return 0;
797	}
798
799	memset(&attr, 0, sizeof(struct perf_event_attr));
800	attr.size = sizeof(struct perf_event_attr);
801	attr.type = PERF_TYPE_HARDWARE;
802	attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
803	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
804			    PERF_SAMPLE_PERIOD;
805	attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
806	attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
807	attr.exclude_user = evsel->attr.exclude_user;
808	attr.exclude_kernel = evsel->attr.exclude_kernel;
809	attr.exclude_hv = evsel->attr.exclude_hv;
810	attr.exclude_host = evsel->attr.exclude_host;
811	attr.exclude_guest = evsel->attr.exclude_guest;
812	attr.sample_id_all = evsel->attr.sample_id_all;
813	attr.read_format = evsel->attr.read_format;
814
815	id = evsel->id[0] + 1000000000;
816	if (!id)
817		id = 1;
818
819	if (bts->synth_opts.branches) {
820		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
821		attr.sample_period = 1;
822		attr.sample_type |= PERF_SAMPLE_ADDR;
823		pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
824			 id, (u64)attr.sample_type);
825		err = intel_bts_synth_event(session, &attr, id);
826		if (err) {
827			pr_err("%s: failed to synthesize 'branches' event type\n",
828			       __func__);
829			return err;
830		}
831		bts->sample_branches = true;
832		bts->branches_sample_type = attr.sample_type;
833		bts->branches_id = id;
834		/*
835		 * We only use sample types from PERF_SAMPLE_MASK so we can use
836		 * __perf_evsel__sample_size() here.
837		 */
838		bts->branches_event_size = sizeof(struct sample_event) +
839				__perf_evsel__sample_size(attr.sample_type);
840	}
841
 
 
842	return 0;
843}
844
845static const char * const intel_bts_info_fmts[] = {
846	[INTEL_BTS_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
847	[INTEL_BTS_TIME_SHIFT]		= "  Time Shift         %"PRIu64"\n",
848	[INTEL_BTS_TIME_MULT]		= "  Time Muliplier     %"PRIu64"\n",
849	[INTEL_BTS_TIME_ZERO]		= "  Time Zero          %"PRIu64"\n",
850	[INTEL_BTS_CAP_USER_TIME_ZERO]	= "  Cap Time Zero      %"PRId64"\n",
851	[INTEL_BTS_SNAPSHOT_MODE]	= "  Snapshot mode      %"PRId64"\n",
852};
853
854static void intel_bts_print_info(u64 *arr, int start, int finish)
855{
856	int i;
857
858	if (!dump_trace)
859		return;
860
861	for (i = start; i <= finish; i++)
862		fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
863}
864
 
 
865int intel_bts_process_auxtrace_info(union perf_event *event,
866				    struct perf_session *session)
867{
868	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
869	size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
870	struct intel_bts *bts;
871	int err;
872
873	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
874					min_sz)
875		return -EINVAL;
876
877	bts = zalloc(sizeof(struct intel_bts));
878	if (!bts)
879		return -ENOMEM;
880
881	err = auxtrace_queues__init(&bts->queues);
882	if (err)
883		goto err_free;
884
885	bts->session = session;
886	bts->machine = &session->machines.host; /* No kvm support */
887	bts->auxtrace_type = auxtrace_info->type;
888	bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
889	bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
890	bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
891	bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
892	bts->cap_user_time_zero =
893			auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
894	bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
895
896	bts->sampling_mode = false;
897
898	bts->auxtrace.process_event = intel_bts_process_event;
899	bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
900	bts->auxtrace.flush_events = intel_bts_flush;
901	bts->auxtrace.free_events = intel_bts_free_events;
902	bts->auxtrace.free = intel_bts_free;
903	session->auxtrace = &bts->auxtrace;
904
905	intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
906			     INTEL_BTS_SNAPSHOT_MODE);
907
908	if (dump_trace)
909		return 0;
910
911	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
912		bts->synth_opts = *session->itrace_synth_opts;
913	} else {
914		itrace_synth_opts__set_default(&bts->synth_opts);
915		if (session->itrace_synth_opts)
916			bts->synth_opts.thread_stack =
917				session->itrace_synth_opts->thread_stack;
918	}
919
920	if (bts->synth_opts.calls)
921		bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
922					PERF_IP_FLAG_TRACE_END;
923	if (bts->synth_opts.returns)
924		bts->branches_filter |= PERF_IP_FLAG_RETURN |
925					PERF_IP_FLAG_TRACE_BEGIN;
926
927	err = intel_bts_synth_events(bts, session);
928	if (err)
929		goto err_free_queues;
930
931	err = auxtrace_queues__process_index(&bts->queues, session);
932	if (err)
933		goto err_free_queues;
934
935	if (bts->queues.populated)
936		bts->data_queued = true;
937
938	return 0;
939
940err_free_queues:
941	auxtrace_queues__free(&bts->queues);
942	session->auxtrace = NULL;
943err_free:
944	free(bts);
945	return err;
946}
v4.6
  1/*
  2 * intel-bts.c: Intel Processor Trace support
  3 * Copyright (c) 2013-2015, Intel Corporation.
  4 *
  5 * This program is free software; you can redistribute it and/or modify it
  6 * under the terms and conditions of the GNU General Public License,
  7 * version 2, as published by the Free Software Foundation.
  8 *
  9 * This program is distributed in the hope it will be useful, but WITHOUT
 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 12 * more details.
 13 *
 14 */
 15
 16#include <endian.h>
 
 17#include <byteswap.h>
 
 18#include <linux/kernel.h>
 19#include <linux/types.h>
 20#include <linux/bitops.h>
 21#include <linux/log2.h>
 22
 23#include "cpumap.h"
 24#include "color.h"
 25#include "evsel.h"
 26#include "evlist.h"
 27#include "machine.h"
 28#include "session.h"
 29#include "util.h"
 30#include "thread.h"
 31#include "thread-stack.h"
 32#include "debug.h"
 33#include "tsc.h"
 34#include "auxtrace.h"
 35#include "intel-pt-decoder/intel-pt-insn-decoder.h"
 36#include "intel-bts.h"
 37
 38#define MAX_TIMESTAMP (~0ULL)
 39
 40#define INTEL_BTS_ERR_NOINSN  5
 41#define INTEL_BTS_ERR_LOST    9
 42
 43#if __BYTE_ORDER == __BIG_ENDIAN
 44#define le64_to_cpu bswap_64
 45#else
 46#define le64_to_cpu
 47#endif
 48
 49struct intel_bts {
 50	struct auxtrace			auxtrace;
 51	struct auxtrace_queues		queues;
 52	struct auxtrace_heap		heap;
 53	u32				auxtrace_type;
 54	struct perf_session		*session;
 55	struct machine			*machine;
 56	bool				sampling_mode;
 57	bool				snapshot_mode;
 58	bool				data_queued;
 59	u32				pmu_type;
 60	struct perf_tsc_conversion	tc;
 61	bool				cap_user_time_zero;
 62	struct itrace_synth_opts	synth_opts;
 63	bool				sample_branches;
 64	u32				branches_filter;
 65	u64				branches_sample_type;
 66	u64				branches_id;
 67	size_t				branches_event_size;
 68	bool				synth_needs_swap;
 69};
 70
 71struct intel_bts_queue {
 72	struct intel_bts	*bts;
 73	unsigned int		queue_nr;
 74	struct auxtrace_buffer	*buffer;
 75	bool			on_heap;
 76	bool			done;
 77	pid_t			pid;
 78	pid_t			tid;
 79	int			cpu;
 80	u64			time;
 81	struct intel_pt_insn	intel_pt_insn;
 82	u32			sample_flags;
 83};
 84
 85struct branch {
 86	u64 from;
 87	u64 to;
 88	u64 misc;
 89};
 90
 91static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
 92			   unsigned char *buf, size_t len)
 93{
 94	struct branch *branch;
 95	size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
 96	const char *color = PERF_COLOR_BLUE;
 97
 98	color_fprintf(stdout, color,
 99		      ". ... Intel BTS data: size %zu bytes\n",
100		      len);
101
102	while (len) {
103		if (len >= br_sz)
104			sz = br_sz;
105		else
106			sz = len;
107		printf(".");
108		color_fprintf(stdout, color, "  %08x: ", pos);
109		for (i = 0; i < sz; i++)
110			color_fprintf(stdout, color, " %02x", buf[i]);
111		for (; i < br_sz; i++)
112			color_fprintf(stdout, color, "   ");
113		if (len >= br_sz) {
114			branch = (struct branch *)buf;
115			color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
116				      le64_to_cpu(branch->from),
117				      le64_to_cpu(branch->to),
118				      le64_to_cpu(branch->misc) & 0x10 ?
119							"pred" : "miss");
120		} else {
121			color_fprintf(stdout, color, " Bad record!\n");
122		}
123		pos += sz;
124		buf += sz;
125		len -= sz;
126	}
127}
128
129static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
130				 size_t len)
131{
132	printf(".\n");
133	intel_bts_dump(bts, buf, len);
134}
135
136static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
137{
138	union perf_event event;
139	int err;
140
141	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
142			     INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
143			     sample->tid, 0, "Lost trace data");
144
145	err = perf_session__deliver_synth_event(bts->session, &event, NULL);
146	if (err)
147		pr_err("Intel BTS: failed to deliver error event, error %d\n",
148		       err);
149
150	return err;
151}
152
153static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
154						     unsigned int queue_nr)
155{
156	struct intel_bts_queue *btsq;
157
158	btsq = zalloc(sizeof(struct intel_bts_queue));
159	if (!btsq)
160		return NULL;
161
162	btsq->bts = bts;
163	btsq->queue_nr = queue_nr;
164	btsq->pid = -1;
165	btsq->tid = -1;
166	btsq->cpu = -1;
167
168	return btsq;
169}
170
171static int intel_bts_setup_queue(struct intel_bts *bts,
172				 struct auxtrace_queue *queue,
173				 unsigned int queue_nr)
174{
175	struct intel_bts_queue *btsq = queue->priv;
176
177	if (list_empty(&queue->head))
178		return 0;
179
180	if (!btsq) {
181		btsq = intel_bts_alloc_queue(bts, queue_nr);
182		if (!btsq)
183			return -ENOMEM;
184		queue->priv = btsq;
185
186		if (queue->cpu != -1)
187			btsq->cpu = queue->cpu;
188		btsq->tid = queue->tid;
189	}
190
191	if (bts->sampling_mode)
192		return 0;
193
194	if (!btsq->on_heap && !btsq->buffer) {
195		int ret;
196
197		btsq->buffer = auxtrace_buffer__next(queue, NULL);
198		if (!btsq->buffer)
199			return 0;
200
201		ret = auxtrace_heap__add(&bts->heap, queue_nr,
202					 btsq->buffer->reference);
203		if (ret)
204			return ret;
205		btsq->on_heap = true;
206	}
207
208	return 0;
209}
210
211static int intel_bts_setup_queues(struct intel_bts *bts)
212{
213	unsigned int i;
214	int ret;
215
216	for (i = 0; i < bts->queues.nr_queues; i++) {
217		ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
218					    i);
219		if (ret)
220			return ret;
221	}
222	return 0;
223}
224
225static inline int intel_bts_update_queues(struct intel_bts *bts)
226{
227	if (bts->queues.new_data) {
228		bts->queues.new_data = false;
229		return intel_bts_setup_queues(bts);
230	}
231	return 0;
232}
233
234static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
235					     unsigned char *buf_b, size_t len_b)
236{
237	size_t offs, len;
238
239	if (len_a > len_b)
240		offs = len_a - len_b;
241	else
242		offs = 0;
243
244	for (; offs < len_a; offs += sizeof(struct branch)) {
245		len = len_a - offs;
246		if (!memcmp(buf_a + offs, buf_b, len))
247			return buf_b + len;
248	}
249
250	return buf_b;
251}
252
253static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
254				    struct auxtrace_buffer *b)
255{
256	struct auxtrace_buffer *a;
257	void *start;
258
259	if (b->list.prev == &queue->head)
260		return 0;
261	a = list_entry(b->list.prev, struct auxtrace_buffer, list);
262	start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
263	if (!start)
264		return -EINVAL;
265	b->use_size = b->data + b->size - start;
266	b->use_data = start;
267	return 0;
268}
269
270static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
271					 struct branch *branch)
272{
273	int ret;
274	struct intel_bts *bts = btsq->bts;
275	union perf_event event;
276	struct perf_sample sample = { .ip = 0, };
277
 
 
 
 
278	event.sample.header.type = PERF_RECORD_SAMPLE;
279	event.sample.header.misc = PERF_RECORD_MISC_USER;
280	event.sample.header.size = sizeof(struct perf_event_header);
281
282	sample.cpumode = PERF_RECORD_MISC_USER;
283	sample.ip = le64_to_cpu(branch->from);
284	sample.pid = btsq->pid;
285	sample.tid = btsq->tid;
286	sample.addr = le64_to_cpu(branch->to);
287	sample.id = btsq->bts->branches_id;
288	sample.stream_id = btsq->bts->branches_id;
289	sample.period = 1;
290	sample.cpu = btsq->cpu;
291	sample.flags = btsq->sample_flags;
292	sample.insn_len = btsq->intel_pt_insn.length;
 
293
294	if (bts->synth_opts.inject) {
295		event.sample.header.size = bts->branches_event_size;
296		ret = perf_event__synthesize_sample(&event,
297						    bts->branches_sample_type,
298						    0, &sample,
299						    bts->synth_needs_swap);
300		if (ret)
301			return ret;
302	}
303
304	ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
305	if (ret)
306		pr_err("Intel BTS: failed to deliver branch event, error %d\n",
307		       ret);
308
309	return ret;
310}
311
312static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
313{
314	struct machine *machine = btsq->bts->machine;
315	struct thread *thread;
316	struct addr_location al;
317	unsigned char buf[1024];
318	size_t bufsz;
319	ssize_t len;
320	int x86_64;
321	uint8_t cpumode;
322	int err = -1;
323
324	bufsz = intel_pt_insn_max_size();
325
326	if (machine__kernel_ip(machine, ip))
327		cpumode = PERF_RECORD_MISC_KERNEL;
328	else
329		cpumode = PERF_RECORD_MISC_USER;
330
331	thread = machine__find_thread(machine, -1, btsq->tid);
332	if (!thread)
333		return -1;
334
335	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
336	if (!al.map || !al.map->dso)
337		goto out_put;
338
339	len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, bufsz);
 
340	if (len <= 0)
341		goto out_put;
342
343	/* Load maps to ensure dso->is_64_bit has been updated */
344	map__load(al.map, machine->symbol_filter);
345
346	x86_64 = al.map->dso->is_64_bit;
347
348	if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
349		goto out_put;
350
351	err = 0;
352out_put:
353	thread__put(thread);
354	return err;
355}
356
357static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
358				 pid_t tid, u64 ip)
359{
360	union perf_event event;
361	int err;
362
363	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
364			     INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
365			     "Failed to get instruction");
366
367	err = perf_session__deliver_synth_event(bts->session, &event, NULL);
368	if (err)
369		pr_err("Intel BTS: failed to deliver error event, error %d\n",
370		       err);
371
372	return err;
373}
374
375static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
376				     struct branch *branch)
377{
378	int err;
379
380	if (!branch->from) {
381		if (branch->to)
382			btsq->sample_flags = PERF_IP_FLAG_BRANCH |
383					     PERF_IP_FLAG_TRACE_BEGIN;
384		else
385			btsq->sample_flags = 0;
386		btsq->intel_pt_insn.length = 0;
387	} else if (!branch->to) {
388		btsq->sample_flags = PERF_IP_FLAG_BRANCH |
389				     PERF_IP_FLAG_TRACE_END;
390		btsq->intel_pt_insn.length = 0;
391	} else {
392		err = intel_bts_get_next_insn(btsq, branch->from);
393		if (err) {
394			btsq->sample_flags = 0;
395			btsq->intel_pt_insn.length = 0;
396			if (!btsq->bts->synth_opts.errors)
397				return 0;
398			err = intel_bts_synth_error(btsq->bts, btsq->cpu,
399						    btsq->pid, btsq->tid,
400						    branch->from);
401			return err;
402		}
403		btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
404		/* Check for an async branch into the kernel */
405		if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
406		    machine__kernel_ip(btsq->bts->machine, branch->to) &&
407		    btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
408					   PERF_IP_FLAG_CALL |
409					   PERF_IP_FLAG_SYSCALLRET))
410			btsq->sample_flags = PERF_IP_FLAG_BRANCH |
411					     PERF_IP_FLAG_CALL |
412					     PERF_IP_FLAG_ASYNC |
413					     PERF_IP_FLAG_INTERRUPT;
414	}
415
416	return 0;
417}
418
419static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
420				    struct auxtrace_buffer *buffer)
 
421{
422	struct branch *branch;
423	size_t sz, bsz = sizeof(struct branch);
424	u32 filter = btsq->bts->branches_filter;
425	int err = 0;
426
427	if (buffer->use_data) {
428		sz = buffer->use_size;
429		branch = buffer->use_data;
430	} else {
431		sz = buffer->size;
432		branch = buffer->data;
433	}
434
435	if (!btsq->bts->sample_branches)
436		return 0;
437
438	for (; sz > bsz; branch += 1, sz -= bsz) {
439		if (!branch->from && !branch->to)
440			continue;
441		intel_bts_get_branch_type(btsq, branch);
 
 
 
 
 
 
442		if (filter && !(filter & btsq->sample_flags))
443			continue;
444		err = intel_bts_synth_branch_sample(btsq, branch);
445		if (err)
446			break;
447	}
448	return err;
449}
450
451static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
452{
453	struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
454	struct auxtrace_queue *queue;
455	struct thread *thread;
456	int err;
457
458	if (btsq->done)
459		return 1;
460
461	if (btsq->pid == -1) {
462		thread = machine__find_thread(btsq->bts->machine, -1,
463					      btsq->tid);
464		if (thread)
465			btsq->pid = thread->pid_;
466	} else {
467		thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
468						 btsq->tid);
469	}
470
471	queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
472
473	if (!buffer)
474		buffer = auxtrace_buffer__next(queue, NULL);
475
476	if (!buffer) {
477		if (!btsq->bts->sampling_mode)
478			btsq->done = 1;
479		err = 1;
480		goto out_put;
481	}
482
483	/* Currently there is no support for split buffers */
484	if (buffer->consecutive) {
485		err = -EINVAL;
486		goto out_put;
487	}
488
489	if (!buffer->data) {
490		int fd = perf_data_file__fd(btsq->bts->session->file);
491
492		buffer->data = auxtrace_buffer__get_data(buffer, fd);
493		if (!buffer->data) {
494			err = -ENOMEM;
495			goto out_put;
496		}
497	}
498
499	if (btsq->bts->snapshot_mode && !buffer->consecutive &&
500	    intel_bts_do_fix_overlap(queue, buffer)) {
501		err = -ENOMEM;
502		goto out_put;
503	}
504
505	if (!btsq->bts->synth_opts.callchain && thread &&
 
506	    (!old_buffer || btsq->bts->sampling_mode ||
507	     (btsq->bts->snapshot_mode && !buffer->consecutive)))
508		thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
509
510	err = intel_bts_process_buffer(btsq, buffer);
511
512	auxtrace_buffer__drop_data(buffer);
513
514	btsq->buffer = auxtrace_buffer__next(queue, buffer);
515	if (btsq->buffer) {
516		if (timestamp)
517			*timestamp = btsq->buffer->reference;
518	} else {
519		if (!btsq->bts->sampling_mode)
520			btsq->done = 1;
521	}
522out_put:
523	thread__put(thread);
524	return err;
525}
526
527static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
528{
529	u64 ts = 0;
530	int ret;
531
532	while (1) {
533		ret = intel_bts_process_queue(btsq, &ts);
534		if (ret < 0)
535			return ret;
536		if (ret)
537			break;
538	}
539	return 0;
540}
541
542static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
543{
544	struct auxtrace_queues *queues = &bts->queues;
545	unsigned int i;
546
547	for (i = 0; i < queues->nr_queues; i++) {
548		struct auxtrace_queue *queue = &bts->queues.queue_array[i];
549		struct intel_bts_queue *btsq = queue->priv;
550
551		if (btsq && btsq->tid == tid)
552			return intel_bts_flush_queue(btsq);
553	}
554	return 0;
555}
556
557static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
558{
559	while (1) {
560		unsigned int queue_nr;
561		struct auxtrace_queue *queue;
562		struct intel_bts_queue *btsq;
563		u64 ts = 0;
564		int ret;
565
566		if (!bts->heap.heap_cnt)
567			return 0;
568
569		if (bts->heap.heap_array[0].ordinal > timestamp)
570			return 0;
571
572		queue_nr = bts->heap.heap_array[0].queue_nr;
573		queue = &bts->queues.queue_array[queue_nr];
574		btsq = queue->priv;
575
576		auxtrace_heap__pop(&bts->heap);
577
578		ret = intel_bts_process_queue(btsq, &ts);
579		if (ret < 0) {
580			auxtrace_heap__add(&bts->heap, queue_nr, ts);
581			return ret;
582		}
583
584		if (!ret) {
585			ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
586			if (ret < 0)
587				return ret;
588		} else {
589			btsq->on_heap = false;
590		}
591	}
592
593	return 0;
594}
595
596static int intel_bts_process_event(struct perf_session *session,
597				   union perf_event *event,
598				   struct perf_sample *sample,
599				   struct perf_tool *tool)
600{
601	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
602					     auxtrace);
603	u64 timestamp;
604	int err;
605
606	if (dump_trace)
607		return 0;
608
609	if (!tool->ordered_events) {
610		pr_err("Intel BTS requires ordered events\n");
611		return -EINVAL;
612	}
613
614	if (sample->time && sample->time != (u64)-1)
615		timestamp = perf_time_to_tsc(sample->time, &bts->tc);
616	else
617		timestamp = 0;
618
619	err = intel_bts_update_queues(bts);
620	if (err)
621		return err;
622
623	err = intel_bts_process_queues(bts, timestamp);
624	if (err)
625		return err;
626	if (event->header.type == PERF_RECORD_EXIT) {
627		err = intel_bts_process_tid_exit(bts, event->fork.tid);
628		if (err)
629			return err;
630	}
631
632	if (event->header.type == PERF_RECORD_AUX &&
633	    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
634	    bts->synth_opts.errors)
635		err = intel_bts_lost(bts, sample);
636
637	return err;
638}
639
640static int intel_bts_process_auxtrace_event(struct perf_session *session,
641					    union perf_event *event,
642					    struct perf_tool *tool __maybe_unused)
643{
644	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
645					     auxtrace);
646
647	if (bts->sampling_mode)
648		return 0;
649
650	if (!bts->data_queued) {
651		struct auxtrace_buffer *buffer;
652		off_t data_offset;
653		int fd = perf_data_file__fd(session->file);
654		int err;
655
656		if (perf_data_file__is_pipe(session->file)) {
657			data_offset = 0;
658		} else {
659			data_offset = lseek(fd, 0, SEEK_CUR);
660			if (data_offset == -1)
661				return -errno;
662		}
663
664		err = auxtrace_queues__add_event(&bts->queues, session, event,
665						 data_offset, &buffer);
666		if (err)
667			return err;
668
669		/* Dump here now we have copied a piped trace out of the pipe */
670		if (dump_trace) {
671			if (auxtrace_buffer__get_data(buffer, fd)) {
672				intel_bts_dump_event(bts, buffer->data,
673						     buffer->size);
674				auxtrace_buffer__put_data(buffer);
675			}
676		}
677	}
678
679	return 0;
680}
681
682static int intel_bts_flush(struct perf_session *session,
683			   struct perf_tool *tool __maybe_unused)
684{
685	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
686					     auxtrace);
687	int ret;
688
689	if (dump_trace || bts->sampling_mode)
690		return 0;
691
692	if (!tool->ordered_events)
693		return -EINVAL;
694
695	ret = intel_bts_update_queues(bts);
696	if (ret < 0)
697		return ret;
698
699	return intel_bts_process_queues(bts, MAX_TIMESTAMP);
700}
701
702static void intel_bts_free_queue(void *priv)
703{
704	struct intel_bts_queue *btsq = priv;
705
706	if (!btsq)
707		return;
708	free(btsq);
709}
710
711static void intel_bts_free_events(struct perf_session *session)
712{
713	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
714					     auxtrace);
715	struct auxtrace_queues *queues = &bts->queues;
716	unsigned int i;
717
718	for (i = 0; i < queues->nr_queues; i++) {
719		intel_bts_free_queue(queues->queue_array[i].priv);
720		queues->queue_array[i].priv = NULL;
721	}
722	auxtrace_queues__free(queues);
723}
724
725static void intel_bts_free(struct perf_session *session)
726{
727	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
728					     auxtrace);
729
730	auxtrace_heap__free(&bts->heap);
731	intel_bts_free_events(session);
732	session->auxtrace = NULL;
733	free(bts);
734}
735
736struct intel_bts_synth {
737	struct perf_tool dummy_tool;
738	struct perf_session *session;
739};
740
741static int intel_bts_event_synth(struct perf_tool *tool,
742				 union perf_event *event,
743				 struct perf_sample *sample __maybe_unused,
744				 struct machine *machine __maybe_unused)
745{
746	struct intel_bts_synth *intel_bts_synth =
747			container_of(tool, struct intel_bts_synth, dummy_tool);
748
749	return perf_session__deliver_synth_event(intel_bts_synth->session,
750						 event, NULL);
751}
752
753static int intel_bts_synth_event(struct perf_session *session,
754				 struct perf_event_attr *attr, u64 id)
755{
756	struct intel_bts_synth intel_bts_synth;
757
758	memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
759	intel_bts_synth.session = session;
760
761	return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
762					   &id, intel_bts_event_synth);
763}
764
765static int intel_bts_synth_events(struct intel_bts *bts,
766				  struct perf_session *session)
767{
768	struct perf_evlist *evlist = session->evlist;
769	struct perf_evsel *evsel;
770	struct perf_event_attr attr;
771	bool found = false;
772	u64 id;
773	int err;
774
775	evlist__for_each(evlist, evsel) {
776		if (evsel->attr.type == bts->pmu_type && evsel->ids) {
777			found = true;
778			break;
779		}
780	}
781
782	if (!found) {
783		pr_debug("There are no selected events with Intel BTS data\n");
784		return 0;
785	}
786
787	memset(&attr, 0, sizeof(struct perf_event_attr));
788	attr.size = sizeof(struct perf_event_attr);
789	attr.type = PERF_TYPE_HARDWARE;
790	attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
791	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
792			    PERF_SAMPLE_PERIOD;
793	attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
794	attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
795	attr.exclude_user = evsel->attr.exclude_user;
796	attr.exclude_kernel = evsel->attr.exclude_kernel;
797	attr.exclude_hv = evsel->attr.exclude_hv;
798	attr.exclude_host = evsel->attr.exclude_host;
799	attr.exclude_guest = evsel->attr.exclude_guest;
800	attr.sample_id_all = evsel->attr.sample_id_all;
801	attr.read_format = evsel->attr.read_format;
802
803	id = evsel->id[0] + 1000000000;
804	if (!id)
805		id = 1;
806
807	if (bts->synth_opts.branches) {
808		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
809		attr.sample_period = 1;
810		attr.sample_type |= PERF_SAMPLE_ADDR;
811		pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
812			 id, (u64)attr.sample_type);
813		err = intel_bts_synth_event(session, &attr, id);
814		if (err) {
815			pr_err("%s: failed to synthesize 'branches' event type\n",
816			       __func__);
817			return err;
818		}
819		bts->sample_branches = true;
820		bts->branches_sample_type = attr.sample_type;
821		bts->branches_id = id;
822		/*
823		 * We only use sample types from PERF_SAMPLE_MASK so we can use
824		 * __perf_evsel__sample_size() here.
825		 */
826		bts->branches_event_size = sizeof(struct sample_event) +
827				__perf_evsel__sample_size(attr.sample_type);
828	}
829
830	bts->synth_needs_swap = evsel->needs_swap;
831
832	return 0;
833}
834
835static const char * const intel_bts_info_fmts[] = {
836	[INTEL_BTS_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
837	[INTEL_BTS_TIME_SHIFT]		= "  Time Shift         %"PRIu64"\n",
838	[INTEL_BTS_TIME_MULT]		= "  Time Muliplier     %"PRIu64"\n",
839	[INTEL_BTS_TIME_ZERO]		= "  Time Zero          %"PRIu64"\n",
840	[INTEL_BTS_CAP_USER_TIME_ZERO]	= "  Cap Time Zero      %"PRId64"\n",
841	[INTEL_BTS_SNAPSHOT_MODE]	= "  Snapshot mode      %"PRId64"\n",
842};
843
844static void intel_bts_print_info(u64 *arr, int start, int finish)
845{
846	int i;
847
848	if (!dump_trace)
849		return;
850
851	for (i = start; i <= finish; i++)
852		fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
853}
854
855u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE];
856
857int intel_bts_process_auxtrace_info(union perf_event *event,
858				    struct perf_session *session)
859{
860	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
861	size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
862	struct intel_bts *bts;
863	int err;
864
865	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
866					min_sz)
867		return -EINVAL;
868
869	bts = zalloc(sizeof(struct intel_bts));
870	if (!bts)
871		return -ENOMEM;
872
873	err = auxtrace_queues__init(&bts->queues);
874	if (err)
875		goto err_free;
876
877	bts->session = session;
878	bts->machine = &session->machines.host; /* No kvm support */
879	bts->auxtrace_type = auxtrace_info->type;
880	bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
881	bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
882	bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
883	bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
884	bts->cap_user_time_zero =
885			auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
886	bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
887
888	bts->sampling_mode = false;
889
890	bts->auxtrace.process_event = intel_bts_process_event;
891	bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
892	bts->auxtrace.flush_events = intel_bts_flush;
893	bts->auxtrace.free_events = intel_bts_free_events;
894	bts->auxtrace.free = intel_bts_free;
895	session->auxtrace = &bts->auxtrace;
896
897	intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
898			     INTEL_BTS_SNAPSHOT_MODE);
899
900	if (dump_trace)
901		return 0;
902
903	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
904		bts->synth_opts = *session->itrace_synth_opts;
905	else
906		itrace_synth_opts__set_default(&bts->synth_opts);
 
 
 
 
907
908	if (bts->synth_opts.calls)
909		bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
910					PERF_IP_FLAG_TRACE_END;
911	if (bts->synth_opts.returns)
912		bts->branches_filter |= PERF_IP_FLAG_RETURN |
913					PERF_IP_FLAG_TRACE_BEGIN;
914
915	err = intel_bts_synth_events(bts, session);
916	if (err)
917		goto err_free_queues;
918
919	err = auxtrace_queues__process_index(&bts->queues, session);
920	if (err)
921		goto err_free_queues;
922
923	if (bts->queues.populated)
924		bts->data_queued = true;
925
926	return 0;
927
928err_free_queues:
929	auxtrace_queues__free(&bts->queues);
930	session->auxtrace = NULL;
931err_free:
932	free(bts);
933	return err;
934}