Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.15.
  1/*
  2 * mem-memcpy.c
  3 *
  4 * Simple memcpy() and memset() benchmarks
  5 *
  6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  7 */
  8
  9#include "debug.h"
 10#include "../perf.h"
 11#include "../util/util.h"
 12#include <subcmd/parse-options.h>
 13#include "../util/header.h"
 14#include "../util/cloexec.h"
 15#include "bench.h"
 16#include "mem-memcpy-arch.h"
 17#include "mem-memset-arch.h"
 18
 19#include <stdio.h>
 20#include <stdlib.h>
 21#include <string.h>
 22#include <sys/time.h>
 23#include <errno.h>
 24#include <linux/time64.h>
 25
 26#define K 1024
 27
 28static const char	*size_str	= "1MB";
 29static const char	*function_str	= "all";
 30static int		nr_loops	= 1;
 31static bool		use_cycles;
 32static int		cycles_fd;
 33
 34static const struct option options[] = {
 35	OPT_STRING('s', "size", &size_str, "1MB",
 36		    "Specify the size of the memory buffers. "
 37		    "Available units: B, KB, MB, GB and TB (case insensitive)"),
 38
 39	OPT_STRING('f', "function", &function_str, "all",
 40		    "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
 41
 42	OPT_INTEGER('l', "nr_loops", &nr_loops,
 43		    "Specify the number of loops to run. (default: 1)"),
 44
 45	OPT_BOOLEAN('c', "cycles", &use_cycles,
 46		    "Use a cycles event instead of gettimeofday() to measure performance"),
 47
 48	OPT_END()
 49};
 50
 51typedef void *(*memcpy_t)(void *, const void *, size_t);
 52typedef void *(*memset_t)(void *, int, size_t);
 53
 54struct function {
 55	const char *name;
 56	const char *desc;
 57	union {
 58		memcpy_t memcpy;
 59		memset_t memset;
 60	} fn;
 61};
 62
 63static struct perf_event_attr cycle_attr = {
 64	.type		= PERF_TYPE_HARDWARE,
 65	.config		= PERF_COUNT_HW_CPU_CYCLES
 66};
 67
 68static int init_cycles(void)
 69{
 70	cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
 71
 72	if (cycles_fd < 0 && errno == ENOSYS) {
 73		pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 74		return -1;
 75	}
 76
 77	return cycles_fd;
 78}
 79
 80static u64 get_cycles(void)
 81{
 82	int ret;
 83	u64 clk;
 84
 85	ret = read(cycles_fd, &clk, sizeof(u64));
 86	BUG_ON(ret != sizeof(u64));
 87
 88	return clk;
 89}
 90
 91static double timeval2double(struct timeval *ts)
 92{
 93	return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
 94}
 95
 96#define print_bps(x) do {						\
 97		if (x < K)						\
 98			printf(" %14lf bytes/sec\n", x);		\
 99		else if (x < K * K)					\
100			printf(" %14lfd KB/sec\n", x / K);		\
101		else if (x < K * K * K)					\
102			printf(" %14lf MB/sec\n", x / K / K);		\
103		else							\
104			printf(" %14lf GB/sec\n", x / K / K / K);	\
105	} while (0)
106
107struct bench_mem_info {
108	const struct function *functions;
109	u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
110	double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
111	const char *const *usage;
112	bool alloc_src;
113};
114
115static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
116{
117	const struct function *r = &info->functions[r_idx];
118	double result_bps = 0.0;
119	u64 result_cycles = 0;
120	void *src = NULL, *dst = zalloc(size);
121
122	printf("# function '%s' (%s)\n", r->name, r->desc);
123
124	if (dst == NULL)
125		goto out_alloc_failed;
126
127	if (info->alloc_src) {
128		src = zalloc(size);
129		if (src == NULL)
130			goto out_alloc_failed;
131	}
132
133	if (bench_format == BENCH_FORMAT_DEFAULT)
134		printf("# Copying %s bytes ...\n\n", size_str);
135
136	if (use_cycles) {
137		result_cycles = info->do_cycles(r, size, src, dst);
138	} else {
139		result_bps = info->do_gettimeofday(r, size, src, dst);
140	}
141
142	switch (bench_format) {
143	case BENCH_FORMAT_DEFAULT:
144		if (use_cycles) {
145			printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
146		} else {
147			print_bps(result_bps);
148		}
149		break;
150
151	case BENCH_FORMAT_SIMPLE:
152		if (use_cycles) {
153			printf("%lf\n", (double)result_cycles/size_total);
154		} else {
155			printf("%lf\n", result_bps);
156		}
157		break;
158
159	default:
160		BUG_ON(1);
161		break;
162	}
163
164out_free:
165	free(src);
166	free(dst);
167	return;
168out_alloc_failed:
169	printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
170	goto out_free;
171}
172
173static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
174{
175	int i;
176	size_t size;
177	double size_total;
178
179	argc = parse_options(argc, argv, options, info->usage, 0);
180
181	if (use_cycles) {
182		i = init_cycles();
183		if (i < 0) {
184			fprintf(stderr, "Failed to open cycles counter\n");
185			return i;
186		}
187	}
188
189	size = (size_t)perf_atoll((char *)size_str);
190	size_total = (double)size * nr_loops;
191
192	if ((s64)size <= 0) {
193		fprintf(stderr, "Invalid size:%s\n", size_str);
194		return 1;
195	}
196
197	if (!strncmp(function_str, "all", 3)) {
198		for (i = 0; info->functions[i].name; i++)
199			__bench_mem_function(info, i, size, size_total);
200		return 0;
201	}
202
203	for (i = 0; info->functions[i].name; i++) {
204		if (!strcmp(info->functions[i].name, function_str))
205			break;
206	}
207	if (!info->functions[i].name) {
208		if (strcmp(function_str, "help") && strcmp(function_str, "h"))
209			printf("Unknown function: %s\n", function_str);
210		printf("Available functions:\n");
211		for (i = 0; info->functions[i].name; i++) {
212			printf("\t%s ... %s\n",
213			       info->functions[i].name, info->functions[i].desc);
214		}
215		return 1;
216	}
217
218	__bench_mem_function(info, i, size, size_total);
219
220	return 0;
221}
222
223static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
224{
225	u64 cycle_start = 0ULL, cycle_end = 0ULL;
226	memcpy_t fn = r->fn.memcpy;
227	int i;
228
229	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
230	memset(src, 0, size);
231
232	/*
233	 * We prefault the freshly allocated memory range here,
234	 * to not measure page fault overhead:
235	 */
236	fn(dst, src, size);
237
238	cycle_start = get_cycles();
239	for (i = 0; i < nr_loops; ++i)
240		fn(dst, src, size);
241	cycle_end = get_cycles();
242
243	return cycle_end - cycle_start;
244}
245
246static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
247{
248	struct timeval tv_start, tv_end, tv_diff;
249	memcpy_t fn = r->fn.memcpy;
250	int i;
251
252	/*
253	 * We prefault the freshly allocated memory range here,
254	 * to not measure page fault overhead:
255	 */
256	fn(dst, src, size);
257
258	BUG_ON(gettimeofday(&tv_start, NULL));
259	for (i = 0; i < nr_loops; ++i)
260		fn(dst, src, size);
261	BUG_ON(gettimeofday(&tv_end, NULL));
262
263	timersub(&tv_end, &tv_start, &tv_diff);
264
265	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
266}
267
268struct function memcpy_functions[] = {
269	{ .name		= "default",
270	  .desc		= "Default memcpy() provided by glibc",
271	  .fn.memcpy	= memcpy },
272
273#ifdef HAVE_ARCH_X86_64_SUPPORT
274# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
275# include "mem-memcpy-x86-64-asm-def.h"
276# undef MEMCPY_FN
277#endif
278
279	{ .name = NULL, }
280};
281
282static const char * const bench_mem_memcpy_usage[] = {
283	"perf bench mem memcpy <options>",
284	NULL
285};
286
287int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused)
288{
289	struct bench_mem_info info = {
290		.functions		= memcpy_functions,
291		.do_cycles		= do_memcpy_cycles,
292		.do_gettimeofday	= do_memcpy_gettimeofday,
293		.usage			= bench_mem_memcpy_usage,
294		.alloc_src              = true,
295	};
296
297	return bench_mem_common(argc, argv, &info);
298}
299
300static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
301{
302	u64 cycle_start = 0ULL, cycle_end = 0ULL;
303	memset_t fn = r->fn.memset;
304	int i;
305
306	/*
307	 * We prefault the freshly allocated memory range here,
308	 * to not measure page fault overhead:
309	 */
310	fn(dst, -1, size);
311
312	cycle_start = get_cycles();
313	for (i = 0; i < nr_loops; ++i)
314		fn(dst, i, size);
315	cycle_end = get_cycles();
316
317	return cycle_end - cycle_start;
318}
319
320static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
321{
322	struct timeval tv_start, tv_end, tv_diff;
323	memset_t fn = r->fn.memset;
324	int i;
325
326	/*
327	 * We prefault the freshly allocated memory range here,
328	 * to not measure page fault overhead:
329	 */
330	fn(dst, -1, size);
331
332	BUG_ON(gettimeofday(&tv_start, NULL));
333	for (i = 0; i < nr_loops; ++i)
334		fn(dst, i, size);
335	BUG_ON(gettimeofday(&tv_end, NULL));
336
337	timersub(&tv_end, &tv_start, &tv_diff);
338
339	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
340}
341
342static const char * const bench_mem_memset_usage[] = {
343	"perf bench mem memset <options>",
344	NULL
345};
346
347static const struct function memset_functions[] = {
348	{ .name		= "default",
349	  .desc		= "Default memset() provided by glibc",
350	  .fn.memset	= memset },
351
352#ifdef HAVE_ARCH_X86_64_SUPPORT
353# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
354# include "mem-memset-x86-64-asm-def.h"
355# undef MEMSET_FN
356#endif
357
358	{ .name = NULL, }
359};
360
361int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused)
362{
363	struct bench_mem_info info = {
364		.functions		= memset_functions,
365		.do_cycles		= do_memset_cycles,
366		.do_gettimeofday	= do_memset_gettimeofday,
367		.usage			= bench_mem_memset_usage,
368	};
369
370	return bench_mem_common(argc, argv, &info);
371}