Linux Audio

Check our new training course

Loading...
v3.5.6
  1#include "builtin.h"
  2#include "perf.h"
  3
 
 
  4#include "util/util.h"
  5#include "util/cache.h"
  6#include "util/symbol.h"
  7#include "util/thread.h"
  8#include "util/header.h"
  9#include "util/session.h"
 10#include "util/tool.h"
 11
 12#include "util/parse-options.h"
 13#include "util/trace-event.h"
 
 14
 15#include "util/debug.h"
 16
 17#include <linux/rbtree.h>
 
 18
 19struct alloc_stat;
 20typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
 21
 22static const char		*input_name;
 23
 24static int			alloc_flag;
 25static int			caller_flag;
 26
 27static int			alloc_lines = -1;
 28static int			caller_lines = -1;
 29
 30static bool			raw_ip;
 31
 32static char			default_sort_order[] = "frag,hit,bytes";
 33
 34static int			*cpunode_map;
 35static int			max_cpu_num;
 36
 37struct alloc_stat {
 38	u64	call_site;
 39	u64	ptr;
 40	u64	bytes_req;
 41	u64	bytes_alloc;
 42	u32	hit;
 43	u32	pingpong;
 44
 45	short	alloc_cpu;
 46
 47	struct rb_node node;
 48};
 49
 50static struct rb_root root_alloc_stat;
 51static struct rb_root root_alloc_sorted;
 52static struct rb_root root_caller_stat;
 53static struct rb_root root_caller_sorted;
 54
 55static unsigned long total_requested, total_allocated;
 56static unsigned long nr_allocs, nr_cross_allocs;
 57
 58#define PATH_SYS_NODE	"/sys/devices/system/node"
 59
 60static void init_cpunode_map(void)
 61{
 62	FILE *fp;
 63	int i;
 64
 65	fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
 66	if (!fp) {
 67		max_cpu_num = 4096;
 68		return;
 
 
 
 
 
 69	}
 70
 71	if (fscanf(fp, "%d", &max_cpu_num) < 1)
 72		die("Failed to read 'kernel_max' from sysfs");
 73	max_cpu_num++;
 74
 75	cpunode_map = calloc(max_cpu_num, sizeof(int));
 76	if (!cpunode_map)
 77		die("calloc");
 
 
 
 78	for (i = 0; i < max_cpu_num; i++)
 79		cpunode_map[i] = -1;
 
 
 
 80	fclose(fp);
 
 81}
 82
 83static void setup_cpunode_map(void)
 84{
 85	struct dirent *dent1, *dent2;
 86	DIR *dir1, *dir2;
 87	unsigned int cpu, mem;
 88	char buf[PATH_MAX];
 89
 90	init_cpunode_map();
 
 91
 92	dir1 = opendir(PATH_SYS_NODE);
 93	if (!dir1)
 94		return;
 95
 96	while ((dent1 = readdir(dir1)) != NULL) {
 97		if (dent1->d_type != DT_DIR ||
 98		    sscanf(dent1->d_name, "node%u", &mem) < 1)
 99			continue;
100
101		snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
102		dir2 = opendir(buf);
103		if (!dir2)
104			continue;
105		while ((dent2 = readdir(dir2)) != NULL) {
106			if (dent2->d_type != DT_LNK ||
107			    sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
108				continue;
109			cpunode_map[cpu] = mem;
110		}
111		closedir(dir2);
112	}
113	closedir(dir1);
 
114}
115
116static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
117			      int bytes_req, int bytes_alloc, int cpu)
118{
119	struct rb_node **node = &root_alloc_stat.rb_node;
120	struct rb_node *parent = NULL;
121	struct alloc_stat *data = NULL;
122
123	while (*node) {
124		parent = *node;
125		data = rb_entry(*node, struct alloc_stat, node);
126
127		if (ptr > data->ptr)
128			node = &(*node)->rb_right;
129		else if (ptr < data->ptr)
130			node = &(*node)->rb_left;
131		else
132			break;
133	}
134
135	if (data && data->ptr == ptr) {
136		data->hit++;
137		data->bytes_req += bytes_req;
138		data->bytes_alloc += bytes_alloc;
139	} else {
140		data = malloc(sizeof(*data));
141		if (!data)
142			die("malloc");
 
 
143		data->ptr = ptr;
144		data->pingpong = 0;
145		data->hit = 1;
146		data->bytes_req = bytes_req;
147		data->bytes_alloc = bytes_alloc;
148
149		rb_link_node(&data->node, parent, node);
150		rb_insert_color(&data->node, &root_alloc_stat);
151	}
152	data->call_site = call_site;
153	data->alloc_cpu = cpu;
 
154}
155
156static void insert_caller_stat(unsigned long call_site,
157			      int bytes_req, int bytes_alloc)
158{
159	struct rb_node **node = &root_caller_stat.rb_node;
160	struct rb_node *parent = NULL;
161	struct alloc_stat *data = NULL;
162
163	while (*node) {
164		parent = *node;
165		data = rb_entry(*node, struct alloc_stat, node);
166
167		if (call_site > data->call_site)
168			node = &(*node)->rb_right;
169		else if (call_site < data->call_site)
170			node = &(*node)->rb_left;
171		else
172			break;
173	}
174
175	if (data && data->call_site == call_site) {
176		data->hit++;
177		data->bytes_req += bytes_req;
178		data->bytes_alloc += bytes_alloc;
179	} else {
180		data = malloc(sizeof(*data));
181		if (!data)
182			die("malloc");
 
 
183		data->call_site = call_site;
184		data->pingpong = 0;
185		data->hit = 1;
186		data->bytes_req = bytes_req;
187		data->bytes_alloc = bytes_alloc;
188
189		rb_link_node(&data->node, parent, node);
190		rb_insert_color(&data->node, &root_caller_stat);
191	}
 
 
192}
193
194static void process_alloc_event(void *data,
195				struct event_format *event,
196				int cpu,
197				u64 timestamp __used,
198				struct thread *thread __used,
199				int node)
200{
201	unsigned long call_site;
202	unsigned long ptr;
203	int bytes_req;
204	int bytes_alloc;
205	int node1, node2;
206
207	ptr = raw_field_value(event, "ptr", data);
208	call_site = raw_field_value(event, "call_site", data);
209	bytes_req = raw_field_value(event, "bytes_req", data);
210	bytes_alloc = raw_field_value(event, "bytes_alloc", data);
211
212	insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu);
213	insert_caller_stat(call_site, bytes_req, bytes_alloc);
 
214
215	total_requested += bytes_req;
216	total_allocated += bytes_alloc;
217
218	if (node) {
219		node1 = cpunode_map[cpu];
220		node2 = raw_field_value(event, "node", data);
 
 
 
 
 
 
 
 
 
 
221		if (node1 != node2)
222			nr_cross_allocs++;
223	}
224	nr_allocs++;
 
225}
226
227static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
228static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
229
230static struct alloc_stat *search_alloc_stat(unsigned long ptr,
231					    unsigned long call_site,
232					    struct rb_root *root,
233					    sort_fn_t sort_fn)
234{
235	struct rb_node *node = root->rb_node;
236	struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
237
238	while (node) {
239		struct alloc_stat *data;
240		int cmp;
241
242		data = rb_entry(node, struct alloc_stat, node);
243
244		cmp = sort_fn(&key, data);
245		if (cmp < 0)
246			node = node->rb_left;
247		else if (cmp > 0)
248			node = node->rb_right;
249		else
250			return data;
251	}
252	return NULL;
253}
254
255static void process_free_event(void *data,
256			       struct event_format *event,
257			       int cpu,
258			       u64 timestamp __used,
259			       struct thread *thread __used)
260{
261	unsigned long ptr;
262	struct alloc_stat *s_alloc, *s_caller;
263
264	ptr = raw_field_value(event, "ptr", data);
265
266	s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
267	if (!s_alloc)
268		return;
269
270	if (cpu != s_alloc->alloc_cpu) {
271		s_alloc->pingpong++;
272
273		s_caller = search_alloc_stat(0, s_alloc->call_site,
274					     &root_caller_stat, callsite_cmp);
275		assert(s_caller);
 
276		s_caller->pingpong++;
277	}
278	s_alloc->alloc_cpu = -1;
279}
280
281static void process_raw_event(union perf_event *raw_event __used, void *data,
282			      int cpu, u64 timestamp, struct thread *thread)
283{
284	struct event_format *event;
285	int type;
286
287	type = trace_parse_common_type(data);
288	event = trace_find_event(type);
289
290	if (!strcmp(event->name, "kmalloc") ||
291	    !strcmp(event->name, "kmem_cache_alloc")) {
292		process_alloc_event(data, event, cpu, timestamp, thread, 0);
293		return;
294	}
295
296	if (!strcmp(event->name, "kmalloc_node") ||
297	    !strcmp(event->name, "kmem_cache_alloc_node")) {
298		process_alloc_event(data, event, cpu, timestamp, thread, 1);
299		return;
300	}
301
302	if (!strcmp(event->name, "kfree") ||
303	    !strcmp(event->name, "kmem_cache_free")) {
304		process_free_event(data, event, cpu, timestamp, thread);
305		return;
306	}
307}
308
309static int process_sample_event(struct perf_tool *tool __used,
 
 
 
310				union perf_event *event,
311				struct perf_sample *sample,
312				struct perf_evsel *evsel __used,
313				struct machine *machine)
314{
315	struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
 
316
317	if (thread == NULL) {
318		pr_debug("problem processing %d event, skipping it.\n",
319			 event->header.type);
320		return -1;
321	}
322
323	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
324
325	process_raw_event(event, sample->raw_data, sample->cpu,
326			  sample->time, thread);
 
 
327
328	return 0;
329}
330
331static struct perf_tool perf_kmem = {
332	.sample			= process_sample_event,
333	.comm			= perf_event__process_comm,
334	.ordered_samples	= true,
335};
336
337static double fragmentation(unsigned long n_req, unsigned long n_alloc)
338{
339	if (n_alloc == 0)
340		return 0.0;
341	else
342		return 100.0 - (100.0 * n_req / n_alloc);
343}
344
345static void __print_result(struct rb_root *root, struct perf_session *session,
346			   int n_lines, int is_caller)
347{
348	struct rb_node *next;
349	struct machine *machine;
350
351	printf("%.102s\n", graph_dotted_line);
352	printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
353	printf(" Total_alloc/Per | Total_req/Per   | Hit      | Ping-pong | Frag\n");
354	printf("%.102s\n", graph_dotted_line);
355
356	next = rb_first(root);
357
358	machine = perf_session__find_host_machine(session);
359	if (!machine) {
360		pr_err("__print_result: couldn't find kernel information\n");
361		return;
362	}
363	while (next && n_lines--) {
364		struct alloc_stat *data = rb_entry(next, struct alloc_stat,
365						   node);
366		struct symbol *sym = NULL;
367		struct map *map;
368		char buf[BUFSIZ];
369		u64 addr;
370
371		if (is_caller) {
372			addr = data->call_site;
373			if (!raw_ip)
374				sym = machine__find_kernel_function(machine, addr, &map, NULL);
375		} else
376			addr = data->ptr;
377
378		if (sym != NULL)
379			snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
380				 addr - map->unmap_ip(map, sym->start));
381		else
382			snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
383		printf(" %-34s |", buf);
384
385		printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n",
386		       (unsigned long long)data->bytes_alloc,
387		       (unsigned long)data->bytes_alloc / data->hit,
388		       (unsigned long long)data->bytes_req,
389		       (unsigned long)data->bytes_req / data->hit,
390		       (unsigned long)data->hit,
391		       (unsigned long)data->pingpong,
392		       fragmentation(data->bytes_req, data->bytes_alloc));
393
394		next = rb_next(next);
395	}
396
397	if (n_lines == -1)
398		printf(" ...                                | ...             | ...             | ...    | ...      | ...   \n");
399
400	printf("%.102s\n", graph_dotted_line);
401}
402
403static void print_summary(void)
404{
405	printf("\nSUMMARY\n=======\n");
406	printf("Total bytes requested: %lu\n", total_requested);
407	printf("Total bytes allocated: %lu\n", total_allocated);
408	printf("Total bytes wasted on internal fragmentation: %lu\n",
409	       total_allocated - total_requested);
410	printf("Internal fragmentation: %f%%\n",
411	       fragmentation(total_requested, total_allocated));
412	printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs);
413}
414
415static void print_result(struct perf_session *session)
416{
417	if (caller_flag)
418		__print_result(&root_caller_sorted, session, caller_lines, 1);
419	if (alloc_flag)
420		__print_result(&root_alloc_sorted, session, alloc_lines, 0);
421	print_summary();
422}
423
424struct sort_dimension {
425	const char		name[20];
426	sort_fn_t		cmp;
427	struct list_head	list;
428};
429
430static LIST_HEAD(caller_sort);
431static LIST_HEAD(alloc_sort);
432
433static void sort_insert(struct rb_root *root, struct alloc_stat *data,
434			struct list_head *sort_list)
435{
436	struct rb_node **new = &(root->rb_node);
437	struct rb_node *parent = NULL;
438	struct sort_dimension *sort;
439
440	while (*new) {
441		struct alloc_stat *this;
442		int cmp = 0;
443
444		this = rb_entry(*new, struct alloc_stat, node);
445		parent = *new;
446
447		list_for_each_entry(sort, sort_list, list) {
448			cmp = sort->cmp(data, this);
449			if (cmp)
450				break;
451		}
452
453		if (cmp > 0)
454			new = &((*new)->rb_left);
455		else
456			new = &((*new)->rb_right);
457	}
458
459	rb_link_node(&data->node, parent, new);
460	rb_insert_color(&data->node, root);
461}
462
463static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
464			  struct list_head *sort_list)
465{
466	struct rb_node *node;
467	struct alloc_stat *data;
468
469	for (;;) {
470		node = rb_first(root);
471		if (!node)
472			break;
473
474		rb_erase(node, root);
475		data = rb_entry(node, struct alloc_stat, node);
476		sort_insert(root_sorted, data, sort_list);
477	}
478}
479
480static void sort_result(void)
481{
482	__sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort);
483	__sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
484}
485
486static int __cmd_kmem(void)
487{
488	int err = -EINVAL;
489	struct perf_session *session = perf_session__new(input_name, O_RDONLY,
490							 0, false, &perf_kmem);
 
 
 
 
 
 
 
 
 
 
 
 
 
491	if (session == NULL)
492		return -ENOMEM;
493
494	if (perf_session__create_kernel_maps(session) < 0)
495		goto out_delete;
496
497	if (!perf_session__has_traces(session, "kmem record"))
498		goto out_delete;
499
 
 
 
 
 
500	setup_pager();
501	err = perf_session__process_events(session, &perf_kmem);
502	if (err != 0)
503		goto out_delete;
504	sort_result();
505	print_result(session);
506out_delete:
507	perf_session__delete(session);
508	return err;
509}
510
511static const char * const kmem_usage[] = {
512	"perf kmem [<options>] {record|stat}",
513	NULL
514};
515
516static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
517{
518	if (l->ptr < r->ptr)
519		return -1;
520	else if (l->ptr > r->ptr)
521		return 1;
522	return 0;
523}
524
525static struct sort_dimension ptr_sort_dimension = {
526	.name	= "ptr",
527	.cmp	= ptr_cmp,
528};
529
530static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
531{
532	if (l->call_site < r->call_site)
533		return -1;
534	else if (l->call_site > r->call_site)
535		return 1;
536	return 0;
537}
538
539static struct sort_dimension callsite_sort_dimension = {
540	.name	= "callsite",
541	.cmp	= callsite_cmp,
542};
543
544static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
545{
546	if (l->hit < r->hit)
547		return -1;
548	else if (l->hit > r->hit)
549		return 1;
550	return 0;
551}
552
553static struct sort_dimension hit_sort_dimension = {
554	.name	= "hit",
555	.cmp	= hit_cmp,
556};
557
558static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
559{
560	if (l->bytes_alloc < r->bytes_alloc)
561		return -1;
562	else if (l->bytes_alloc > r->bytes_alloc)
563		return 1;
564	return 0;
565}
566
567static struct sort_dimension bytes_sort_dimension = {
568	.name	= "bytes",
569	.cmp	= bytes_cmp,
570};
571
572static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
573{
574	double x, y;
575
576	x = fragmentation(l->bytes_req, l->bytes_alloc);
577	y = fragmentation(r->bytes_req, r->bytes_alloc);
578
579	if (x < y)
580		return -1;
581	else if (x > y)
582		return 1;
583	return 0;
584}
585
586static struct sort_dimension frag_sort_dimension = {
587	.name	= "frag",
588	.cmp	= frag_cmp,
589};
590
591static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
592{
593	if (l->pingpong < r->pingpong)
594		return -1;
595	else if (l->pingpong > r->pingpong)
596		return 1;
597	return 0;
598}
599
600static struct sort_dimension pingpong_sort_dimension = {
601	.name	= "pingpong",
602	.cmp	= pingpong_cmp,
603};
604
605static struct sort_dimension *avail_sorts[] = {
606	&ptr_sort_dimension,
607	&callsite_sort_dimension,
608	&hit_sort_dimension,
609	&bytes_sort_dimension,
610	&frag_sort_dimension,
611	&pingpong_sort_dimension,
612};
613
614#define NUM_AVAIL_SORTS	\
615	(int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
616
617static int sort_dimension__add(const char *tok, struct list_head *list)
618{
619	struct sort_dimension *sort;
620	int i;
621
622	for (i = 0; i < NUM_AVAIL_SORTS; i++) {
623		if (!strcmp(avail_sorts[i]->name, tok)) {
624			sort = malloc(sizeof(*sort));
625			if (!sort)
626				die("malloc");
627			memcpy(sort, avail_sorts[i], sizeof(*sort));
 
628			list_add_tail(&sort->list, list);
629			return 0;
630		}
631	}
632
633	return -1;
634}
635
636static int setup_sorting(struct list_head *sort_list, const char *arg)
637{
638	char *tok;
639	char *str = strdup(arg);
640
641	if (!str)
642		die("strdup");
 
 
643
644	while (true) {
645		tok = strsep(&str, ",");
646		if (!tok)
647			break;
648		if (sort_dimension__add(tok, sort_list) < 0) {
649			error("Unknown --sort key: '%s'", tok);
650			free(str);
651			return -1;
652		}
653	}
654
655	free(str);
656	return 0;
657}
658
659static int parse_sort_opt(const struct option *opt __used,
660			  const char *arg, int unset __used)
661{
662	if (!arg)
663		return -1;
664
665	if (caller_flag > alloc_flag)
666		return setup_sorting(&caller_sort, arg);
667	else
668		return setup_sorting(&alloc_sort, arg);
669
670	return 0;
671}
672
673static int parse_caller_opt(const struct option *opt __used,
674			  const char *arg __used, int unset __used)
 
675{
676	caller_flag = (alloc_flag + 1);
677	return 0;
678}
679
680static int parse_alloc_opt(const struct option *opt __used,
681			  const char *arg __used, int unset __used)
 
682{
683	alloc_flag = (caller_flag + 1);
684	return 0;
685}
686
687static int parse_line_opt(const struct option *opt __used,
688			  const char *arg, int unset __used)
689{
690	int lines;
691
692	if (!arg)
693		return -1;
694
695	lines = strtoul(arg, NULL, 10);
696
697	if (caller_flag > alloc_flag)
698		caller_lines = lines;
699	else
700		alloc_lines = lines;
701
702	return 0;
703}
704
705static const struct option kmem_options[] = {
706	OPT_STRING('i', "input", &input_name, "file",
707		   "input file name"),
708	OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
709			   "show per-callsite statistics",
710			   parse_caller_opt),
711	OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
712			   "show per-allocation statistics",
713			   parse_alloc_opt),
714	OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
715		     "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
716		     parse_sort_opt),
717	OPT_CALLBACK('l', "line", NULL, "num",
718		     "show n lines",
719		     parse_line_opt),
720	OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
721	OPT_END()
722};
723
724static const char *record_args[] = {
725	"record",
726	"-a",
727	"-R",
728	"-f",
729	"-c", "1",
730	"-e", "kmem:kmalloc",
731	"-e", "kmem:kmalloc_node",
732	"-e", "kmem:kfree",
733	"-e", "kmem:kmem_cache_alloc",
734	"-e", "kmem:kmem_cache_alloc_node",
735	"-e", "kmem:kmem_cache_free",
736};
737
738static int __cmd_record(int argc, const char **argv)
739{
740	unsigned int rec_argc, i, j;
741	const char **rec_argv;
742
743	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
744	rec_argv = calloc(rec_argc + 1, sizeof(char *));
745
746	if (rec_argv == NULL)
747		return -ENOMEM;
748
749	for (i = 0; i < ARRAY_SIZE(record_args); i++)
750		rec_argv[i] = strdup(record_args[i]);
751
752	for (j = 1; j < (unsigned int)argc; j++, i++)
753		rec_argv[i] = argv[j];
754
755	return cmd_record(i, rec_argv, NULL);
756}
757
758int cmd_kmem(int argc, const char **argv, const char *prefix __used)
759{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760	argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
761
762	if (!argc)
763		usage_with_options(kmem_usage, kmem_options);
764
765	symbol__init();
766
767	if (!strncmp(argv[0], "rec", 3)) {
768		return __cmd_record(argc, argv);
769	} else if (!strcmp(argv[0], "stat")) {
770		setup_cpunode_map();
 
771
772		if (list_empty(&caller_sort))
773			setup_sorting(&caller_sort, default_sort_order);
774		if (list_empty(&alloc_sort))
775			setup_sorting(&alloc_sort, default_sort_order);
776
777		return __cmd_kmem();
778	} else
779		usage_with_options(kmem_usage, kmem_options);
780
781	return 0;
782}
783
v3.15
  1#include "builtin.h"
  2#include "perf.h"
  3
  4#include "util/evlist.h"
  5#include "util/evsel.h"
  6#include "util/util.h"
  7#include "util/cache.h"
  8#include "util/symbol.h"
  9#include "util/thread.h"
 10#include "util/header.h"
 11#include "util/session.h"
 12#include "util/tool.h"
 13
 14#include "util/parse-options.h"
 15#include "util/trace-event.h"
 16#include "util/data.h"
 17
 18#include "util/debug.h"
 19
 20#include <linux/rbtree.h>
 21#include <linux/string.h>
 22
 23struct alloc_stat;
 24typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
 25
 
 
 26static int			alloc_flag;
 27static int			caller_flag;
 28
 29static int			alloc_lines = -1;
 30static int			caller_lines = -1;
 31
 32static bool			raw_ip;
 33
 
 
 34static int			*cpunode_map;
 35static int			max_cpu_num;
 36
 37struct alloc_stat {
 38	u64	call_site;
 39	u64	ptr;
 40	u64	bytes_req;
 41	u64	bytes_alloc;
 42	u32	hit;
 43	u32	pingpong;
 44
 45	short	alloc_cpu;
 46
 47	struct rb_node node;
 48};
 49
 50static struct rb_root root_alloc_stat;
 51static struct rb_root root_alloc_sorted;
 52static struct rb_root root_caller_stat;
 53static struct rb_root root_caller_sorted;
 54
 55static unsigned long total_requested, total_allocated;
 56static unsigned long nr_allocs, nr_cross_allocs;
 57
 58#define PATH_SYS_NODE	"/sys/devices/system/node"
 59
 60static int init_cpunode_map(void)
 61{
 62	FILE *fp;
 63	int i, err = -1;
 64
 65	fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
 66	if (!fp) {
 67		max_cpu_num = 4096;
 68		return 0;
 69	}
 70
 71	if (fscanf(fp, "%d", &max_cpu_num) < 1) {
 72		pr_err("Failed to read 'kernel_max' from sysfs");
 73		goto out_close;
 74	}
 75
 
 
 76	max_cpu_num++;
 77
 78	cpunode_map = calloc(max_cpu_num, sizeof(int));
 79	if (!cpunode_map) {
 80		pr_err("%s: calloc failed\n", __func__);
 81		goto out_close;
 82	}
 83
 84	for (i = 0; i < max_cpu_num; i++)
 85		cpunode_map[i] = -1;
 86
 87	err = 0;
 88out_close:
 89	fclose(fp);
 90	return err;
 91}
 92
 93static int setup_cpunode_map(void)
 94{
 95	struct dirent *dent1, *dent2;
 96	DIR *dir1, *dir2;
 97	unsigned int cpu, mem;
 98	char buf[PATH_MAX];
 99
100	if (init_cpunode_map())
101		return -1;
102
103	dir1 = opendir(PATH_SYS_NODE);
104	if (!dir1)
105		return 0;
106
107	while ((dent1 = readdir(dir1)) != NULL) {
108		if (dent1->d_type != DT_DIR ||
109		    sscanf(dent1->d_name, "node%u", &mem) < 1)
110			continue;
111
112		snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
113		dir2 = opendir(buf);
114		if (!dir2)
115			continue;
116		while ((dent2 = readdir(dir2)) != NULL) {
117			if (dent2->d_type != DT_LNK ||
118			    sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
119				continue;
120			cpunode_map[cpu] = mem;
121		}
122		closedir(dir2);
123	}
124	closedir(dir1);
125	return 0;
126}
127
128static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
129			     int bytes_req, int bytes_alloc, int cpu)
130{
131	struct rb_node **node = &root_alloc_stat.rb_node;
132	struct rb_node *parent = NULL;
133	struct alloc_stat *data = NULL;
134
135	while (*node) {
136		parent = *node;
137		data = rb_entry(*node, struct alloc_stat, node);
138
139		if (ptr > data->ptr)
140			node = &(*node)->rb_right;
141		else if (ptr < data->ptr)
142			node = &(*node)->rb_left;
143		else
144			break;
145	}
146
147	if (data && data->ptr == ptr) {
148		data->hit++;
149		data->bytes_req += bytes_req;
150		data->bytes_alloc += bytes_alloc;
151	} else {
152		data = malloc(sizeof(*data));
153		if (!data) {
154			pr_err("%s: malloc failed\n", __func__);
155			return -1;
156		}
157		data->ptr = ptr;
158		data->pingpong = 0;
159		data->hit = 1;
160		data->bytes_req = bytes_req;
161		data->bytes_alloc = bytes_alloc;
162
163		rb_link_node(&data->node, parent, node);
164		rb_insert_color(&data->node, &root_alloc_stat);
165	}
166	data->call_site = call_site;
167	data->alloc_cpu = cpu;
168	return 0;
169}
170
171static int insert_caller_stat(unsigned long call_site,
172			      int bytes_req, int bytes_alloc)
173{
174	struct rb_node **node = &root_caller_stat.rb_node;
175	struct rb_node *parent = NULL;
176	struct alloc_stat *data = NULL;
177
178	while (*node) {
179		parent = *node;
180		data = rb_entry(*node, struct alloc_stat, node);
181
182		if (call_site > data->call_site)
183			node = &(*node)->rb_right;
184		else if (call_site < data->call_site)
185			node = &(*node)->rb_left;
186		else
187			break;
188	}
189
190	if (data && data->call_site == call_site) {
191		data->hit++;
192		data->bytes_req += bytes_req;
193		data->bytes_alloc += bytes_alloc;
194	} else {
195		data = malloc(sizeof(*data));
196		if (!data) {
197			pr_err("%s: malloc failed\n", __func__);
198			return -1;
199		}
200		data->call_site = call_site;
201		data->pingpong = 0;
202		data->hit = 1;
203		data->bytes_req = bytes_req;
204		data->bytes_alloc = bytes_alloc;
205
206		rb_link_node(&data->node, parent, node);
207		rb_insert_color(&data->node, &root_caller_stat);
208	}
209
210	return 0;
211}
212
213static int perf_evsel__process_alloc_event(struct perf_evsel *evsel,
214					   struct perf_sample *sample)
215{
216	unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"),
217		      call_site = perf_evsel__intval(evsel, sample, "call_site");
218	int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"),
219	    bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc");
 
 
 
 
 
 
 
 
 
 
220
221	if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
222	    insert_caller_stat(call_site, bytes_req, bytes_alloc))
223		return -1;
224
225	total_requested += bytes_req;
226	total_allocated += bytes_alloc;
227
228	nr_allocs++;
229	return 0;
230}
231
232static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
233						struct perf_sample *sample)
234{
235	int ret = perf_evsel__process_alloc_event(evsel, sample);
236
237	if (!ret) {
238		int node1 = cpunode_map[sample->cpu],
239		    node2 = perf_evsel__intval(evsel, sample, "node");
240
241		if (node1 != node2)
242			nr_cross_allocs++;
243	}
244
245	return ret;
246}
247
248static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
249static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
250
251static struct alloc_stat *search_alloc_stat(unsigned long ptr,
252					    unsigned long call_site,
253					    struct rb_root *root,
254					    sort_fn_t sort_fn)
255{
256	struct rb_node *node = root->rb_node;
257	struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
258
259	while (node) {
260		struct alloc_stat *data;
261		int cmp;
262
263		data = rb_entry(node, struct alloc_stat, node);
264
265		cmp = sort_fn(&key, data);
266		if (cmp < 0)
267			node = node->rb_left;
268		else if (cmp > 0)
269			node = node->rb_right;
270		else
271			return data;
272	}
273	return NULL;
274}
275
276static int perf_evsel__process_free_event(struct perf_evsel *evsel,
277					  struct perf_sample *sample)
 
 
 
278{
279	unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr");
280	struct alloc_stat *s_alloc, *s_caller;
281
 
 
282	s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
283	if (!s_alloc)
284		return 0;
285
286	if ((short)sample->cpu != s_alloc->alloc_cpu) {
287		s_alloc->pingpong++;
288
289		s_caller = search_alloc_stat(0, s_alloc->call_site,
290					     &root_caller_stat, callsite_cmp);
291		if (!s_caller)
292			return -1;
293		s_caller->pingpong++;
294	}
295	s_alloc->alloc_cpu = -1;
 
 
 
 
 
 
 
296
297	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298}
299
300typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
301				  struct perf_sample *sample);
302
303static int process_sample_event(struct perf_tool *tool __maybe_unused,
304				union perf_event *event,
305				struct perf_sample *sample,
306				struct perf_evsel *evsel,
307				struct machine *machine)
308{
309	struct thread *thread = machine__findnew_thread(machine, sample->pid,
310							sample->pid);
311
312	if (thread == NULL) {
313		pr_debug("problem processing %d event, skipping it.\n",
314			 event->header.type);
315		return -1;
316	}
317
318	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
319
320	if (evsel->handler != NULL) {
321		tracepoint_handler f = evsel->handler;
322		return f(evsel, sample);
323	}
324
325	return 0;
326}
327
328static struct perf_tool perf_kmem = {
329	.sample		 = process_sample_event,
330	.comm		 = perf_event__process_comm,
331	.ordered_samples = true,
332};
333
334static double fragmentation(unsigned long n_req, unsigned long n_alloc)
335{
336	if (n_alloc == 0)
337		return 0.0;
338	else
339		return 100.0 - (100.0 * n_req / n_alloc);
340}
341
342static void __print_result(struct rb_root *root, struct perf_session *session,
343			   int n_lines, int is_caller)
344{
345	struct rb_node *next;
346	struct machine *machine = &session->machines.host;
347
348	printf("%.102s\n", graph_dotted_line);
349	printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
350	printf(" Total_alloc/Per | Total_req/Per   | Hit      | Ping-pong | Frag\n");
351	printf("%.102s\n", graph_dotted_line);
352
353	next = rb_first(root);
354
 
 
 
 
 
355	while (next && n_lines--) {
356		struct alloc_stat *data = rb_entry(next, struct alloc_stat,
357						   node);
358		struct symbol *sym = NULL;
359		struct map *map;
360		char buf[BUFSIZ];
361		u64 addr;
362
363		if (is_caller) {
364			addr = data->call_site;
365			if (!raw_ip)
366				sym = machine__find_kernel_function(machine, addr, &map, NULL);
367		} else
368			addr = data->ptr;
369
370		if (sym != NULL)
371			snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
372				 addr - map->unmap_ip(map, sym->start));
373		else
374			snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
375		printf(" %-34s |", buf);
376
377		printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n",
378		       (unsigned long long)data->bytes_alloc,
379		       (unsigned long)data->bytes_alloc / data->hit,
380		       (unsigned long long)data->bytes_req,
381		       (unsigned long)data->bytes_req / data->hit,
382		       (unsigned long)data->hit,
383		       (unsigned long)data->pingpong,
384		       fragmentation(data->bytes_req, data->bytes_alloc));
385
386		next = rb_next(next);
387	}
388
389	if (n_lines == -1)
390		printf(" ...                                | ...             | ...             | ...    | ...      | ...   \n");
391
392	printf("%.102s\n", graph_dotted_line);
393}
394
395static void print_summary(void)
396{
397	printf("\nSUMMARY\n=======\n");
398	printf("Total bytes requested: %lu\n", total_requested);
399	printf("Total bytes allocated: %lu\n", total_allocated);
400	printf("Total bytes wasted on internal fragmentation: %lu\n",
401	       total_allocated - total_requested);
402	printf("Internal fragmentation: %f%%\n",
403	       fragmentation(total_requested, total_allocated));
404	printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs);
405}
406
407static void print_result(struct perf_session *session)
408{
409	if (caller_flag)
410		__print_result(&root_caller_sorted, session, caller_lines, 1);
411	if (alloc_flag)
412		__print_result(&root_alloc_sorted, session, alloc_lines, 0);
413	print_summary();
414}
415
416struct sort_dimension {
417	const char		name[20];
418	sort_fn_t		cmp;
419	struct list_head	list;
420};
421
422static LIST_HEAD(caller_sort);
423static LIST_HEAD(alloc_sort);
424
425static void sort_insert(struct rb_root *root, struct alloc_stat *data,
426			struct list_head *sort_list)
427{
428	struct rb_node **new = &(root->rb_node);
429	struct rb_node *parent = NULL;
430	struct sort_dimension *sort;
431
432	while (*new) {
433		struct alloc_stat *this;
434		int cmp = 0;
435
436		this = rb_entry(*new, struct alloc_stat, node);
437		parent = *new;
438
439		list_for_each_entry(sort, sort_list, list) {
440			cmp = sort->cmp(data, this);
441			if (cmp)
442				break;
443		}
444
445		if (cmp > 0)
446			new = &((*new)->rb_left);
447		else
448			new = &((*new)->rb_right);
449	}
450
451	rb_link_node(&data->node, parent, new);
452	rb_insert_color(&data->node, root);
453}
454
455static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
456			  struct list_head *sort_list)
457{
458	struct rb_node *node;
459	struct alloc_stat *data;
460
461	for (;;) {
462		node = rb_first(root);
463		if (!node)
464			break;
465
466		rb_erase(node, root);
467		data = rb_entry(node, struct alloc_stat, node);
468		sort_insert(root_sorted, data, sort_list);
469	}
470}
471
472static void sort_result(void)
473{
474	__sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort);
475	__sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
476}
477
478static int __cmd_kmem(void)
479{
480	int err = -EINVAL;
481	struct perf_session *session;
482	const struct perf_evsel_str_handler kmem_tracepoints[] = {
483		{ "kmem:kmalloc",		perf_evsel__process_alloc_event, },
484    		{ "kmem:kmem_cache_alloc",	perf_evsel__process_alloc_event, },
485		{ "kmem:kmalloc_node",		perf_evsel__process_alloc_node_event, },
486    		{ "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
487		{ "kmem:kfree",			perf_evsel__process_free_event, },
488    		{ "kmem:kmem_cache_free",	perf_evsel__process_free_event, },
489	};
490	struct perf_data_file file = {
491		.path = input_name,
492		.mode = PERF_DATA_MODE_READ,
493	};
494
495	session = perf_session__new(&file, false, &perf_kmem);
496	if (session == NULL)
497		return -ENOMEM;
498
499	if (perf_session__create_kernel_maps(session) < 0)
500		goto out_delete;
501
502	if (!perf_session__has_traces(session, "kmem record"))
503		goto out_delete;
504
505	if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) {
506		pr_err("Initializing perf session tracepoint handlers failed\n");
507		return -1;
508	}
509
510	setup_pager();
511	err = perf_session__process_events(session, &perf_kmem);
512	if (err != 0)
513		goto out_delete;
514	sort_result();
515	print_result(session);
516out_delete:
517	perf_session__delete(session);
518	return err;
519}
520
 
 
 
 
 
521static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
522{
523	if (l->ptr < r->ptr)
524		return -1;
525	else if (l->ptr > r->ptr)
526		return 1;
527	return 0;
528}
529
530static struct sort_dimension ptr_sort_dimension = {
531	.name	= "ptr",
532	.cmp	= ptr_cmp,
533};
534
535static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
536{
537	if (l->call_site < r->call_site)
538		return -1;
539	else if (l->call_site > r->call_site)
540		return 1;
541	return 0;
542}
543
544static struct sort_dimension callsite_sort_dimension = {
545	.name	= "callsite",
546	.cmp	= callsite_cmp,
547};
548
549static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
550{
551	if (l->hit < r->hit)
552		return -1;
553	else if (l->hit > r->hit)
554		return 1;
555	return 0;
556}
557
558static struct sort_dimension hit_sort_dimension = {
559	.name	= "hit",
560	.cmp	= hit_cmp,
561};
562
563static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
564{
565	if (l->bytes_alloc < r->bytes_alloc)
566		return -1;
567	else if (l->bytes_alloc > r->bytes_alloc)
568		return 1;
569	return 0;
570}
571
572static struct sort_dimension bytes_sort_dimension = {
573	.name	= "bytes",
574	.cmp	= bytes_cmp,
575};
576
577static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
578{
579	double x, y;
580
581	x = fragmentation(l->bytes_req, l->bytes_alloc);
582	y = fragmentation(r->bytes_req, r->bytes_alloc);
583
584	if (x < y)
585		return -1;
586	else if (x > y)
587		return 1;
588	return 0;
589}
590
591static struct sort_dimension frag_sort_dimension = {
592	.name	= "frag",
593	.cmp	= frag_cmp,
594};
595
596static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
597{
598	if (l->pingpong < r->pingpong)
599		return -1;
600	else if (l->pingpong > r->pingpong)
601		return 1;
602	return 0;
603}
604
605static struct sort_dimension pingpong_sort_dimension = {
606	.name	= "pingpong",
607	.cmp	= pingpong_cmp,
608};
609
610static struct sort_dimension *avail_sorts[] = {
611	&ptr_sort_dimension,
612	&callsite_sort_dimension,
613	&hit_sort_dimension,
614	&bytes_sort_dimension,
615	&frag_sort_dimension,
616	&pingpong_sort_dimension,
617};
618
619#define NUM_AVAIL_SORTS	((int)ARRAY_SIZE(avail_sorts))
 
620
621static int sort_dimension__add(const char *tok, struct list_head *list)
622{
623	struct sort_dimension *sort;
624	int i;
625
626	for (i = 0; i < NUM_AVAIL_SORTS; i++) {
627		if (!strcmp(avail_sorts[i]->name, tok)) {
628			sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i]));
629			if (!sort) {
630				pr_err("%s: memdup failed\n", __func__);
631				return -1;
632			}
633			list_add_tail(&sort->list, list);
634			return 0;
635		}
636	}
637
638	return -1;
639}
640
641static int setup_sorting(struct list_head *sort_list, const char *arg)
642{
643	char *tok;
644	char *str = strdup(arg);
645
646	if (!str) {
647		pr_err("%s: strdup failed\n", __func__);
648		return -1;
649	}
650
651	while (true) {
652		tok = strsep(&str, ",");
653		if (!tok)
654			break;
655		if (sort_dimension__add(tok, sort_list) < 0) {
656			error("Unknown --sort key: '%s'", tok);
657			free(str);
658			return -1;
659		}
660	}
661
662	free(str);
663	return 0;
664}
665
666static int parse_sort_opt(const struct option *opt __maybe_unused,
667			  const char *arg, int unset __maybe_unused)
668{
669	if (!arg)
670		return -1;
671
672	if (caller_flag > alloc_flag)
673		return setup_sorting(&caller_sort, arg);
674	else
675		return setup_sorting(&alloc_sort, arg);
676
677	return 0;
678}
679
680static int parse_caller_opt(const struct option *opt __maybe_unused,
681			    const char *arg __maybe_unused,
682			    int unset __maybe_unused)
683{
684	caller_flag = (alloc_flag + 1);
685	return 0;
686}
687
688static int parse_alloc_opt(const struct option *opt __maybe_unused,
689			   const char *arg __maybe_unused,
690			   int unset __maybe_unused)
691{
692	alloc_flag = (caller_flag + 1);
693	return 0;
694}
695
696static int parse_line_opt(const struct option *opt __maybe_unused,
697			  const char *arg, int unset __maybe_unused)
698{
699	int lines;
700
701	if (!arg)
702		return -1;
703
704	lines = strtoul(arg, NULL, 10);
705
706	if (caller_flag > alloc_flag)
707		caller_lines = lines;
708	else
709		alloc_lines = lines;
710
711	return 0;
712}
713
714static int __cmd_record(int argc, const char **argv)
715{
716	const char * const record_args[] = {
717	"record", "-a", "-R", "-c", "1",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
718	"-e", "kmem:kmalloc",
719	"-e", "kmem:kmalloc_node",
720	"-e", "kmem:kfree",
721	"-e", "kmem:kmem_cache_alloc",
722	"-e", "kmem:kmem_cache_alloc_node",
723	"-e", "kmem:kmem_cache_free",
724	};
 
 
 
725	unsigned int rec_argc, i, j;
726	const char **rec_argv;
727
728	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
729	rec_argv = calloc(rec_argc + 1, sizeof(char *));
730
731	if (rec_argv == NULL)
732		return -ENOMEM;
733
734	for (i = 0; i < ARRAY_SIZE(record_args); i++)
735		rec_argv[i] = strdup(record_args[i]);
736
737	for (j = 1; j < (unsigned int)argc; j++, i++)
738		rec_argv[i] = argv[j];
739
740	return cmd_record(i, rec_argv, NULL);
741}
742
743int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
744{
745	const char * const default_sort_order = "frag,hit,bytes";
746	const struct option kmem_options[] = {
747	OPT_STRING('i', "input", &input_name, "file", "input file name"),
748	OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
749			   "show per-callsite statistics", parse_caller_opt),
750	OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
751			   "show per-allocation statistics", parse_alloc_opt),
752	OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
753		     "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
754		     parse_sort_opt),
755	OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
756	OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
757	OPT_END()
758	};
759	const char * const kmem_usage[] = {
760		"perf kmem [<options>] {record|stat}",
761		NULL
762	};
763	argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
764
765	if (!argc)
766		usage_with_options(kmem_usage, kmem_options);
767
768	symbol__init();
769
770	if (!strncmp(argv[0], "rec", 3)) {
771		return __cmd_record(argc, argv);
772	} else if (!strcmp(argv[0], "stat")) {
773		if (setup_cpunode_map())
774			return -1;
775
776		if (list_empty(&caller_sort))
777			setup_sorting(&caller_sort, default_sort_order);
778		if (list_empty(&alloc_sort))
779			setup_sorting(&alloc_sort, default_sort_order);
780
781		return __cmd_kmem();
782	} else
783		usage_with_options(kmem_usage, kmem_options);
784
785	return 0;
786}
787