Linux Audio

Check our new training course

Loading...
v5.4
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * mem-memcpy.c
  4 *
  5 * Simple memcpy() and memset() benchmarks
  6 *
  7 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  8 */
  9
 10#include "debug.h"
 11#include "../perf-sys.h"
 
 12#include <subcmd/parse-options.h>
 13#include "../util/header.h"
 14#include "../util/cloexec.h"
 15#include "../util/string2.h"
 16#include "bench.h"
 17#include "mem-memcpy-arch.h"
 18#include "mem-memset-arch.h"
 19
 20#include <stdio.h>
 21#include <stdlib.h>
 22#include <string.h>
 23#include <unistd.h>
 24#include <sys/time.h>
 25#include <errno.h>
 26#include <linux/time64.h>
 27#include <linux/zalloc.h>
 28
 29#define K 1024
 30
 31static const char	*size_str	= "1MB";
 32static const char	*function_str	= "all";
 33static int		nr_loops	= 1;
 34static bool		use_cycles;
 35static int		cycles_fd;
 36
 37static const struct option options[] = {
 38	OPT_STRING('s', "size", &size_str, "1MB",
 39		    "Specify the size of the memory buffers. "
 40		    "Available units: B, KB, MB, GB and TB (case insensitive)"),
 41
 42	OPT_STRING('f', "function", &function_str, "all",
 43		    "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
 44
 45	OPT_INTEGER('l', "nr_loops", &nr_loops,
 46		    "Specify the number of loops to run. (default: 1)"),
 47
 48	OPT_BOOLEAN('c', "cycles", &use_cycles,
 49		    "Use a cycles event instead of gettimeofday() to measure performance"),
 50
 51	OPT_END()
 52};
 53
 54typedef void *(*memcpy_t)(void *, const void *, size_t);
 55typedef void *(*memset_t)(void *, int, size_t);
 56
 57struct function {
 58	const char *name;
 59	const char *desc;
 60	union {
 61		memcpy_t memcpy;
 62		memset_t memset;
 63	} fn;
 64};
 65
 66static struct perf_event_attr cycle_attr = {
 67	.type		= PERF_TYPE_HARDWARE,
 68	.config		= PERF_COUNT_HW_CPU_CYCLES
 69};
 70
 71static int init_cycles(void)
 72{
 73	cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
 74
 75	if (cycles_fd < 0 && errno == ENOSYS) {
 76		pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 77		return -1;
 78	}
 79
 80	return cycles_fd;
 81}
 82
 83static u64 get_cycles(void)
 84{
 85	int ret;
 86	u64 clk;
 87
 88	ret = read(cycles_fd, &clk, sizeof(u64));
 89	BUG_ON(ret != sizeof(u64));
 90
 91	return clk;
 92}
 93
 94static double timeval2double(struct timeval *ts)
 95{
 96	return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
 97}
 98
 99#define print_bps(x) do {						\
100		if (x < K)						\
101			printf(" %14lf bytes/sec\n", x);		\
102		else if (x < K * K)					\
103			printf(" %14lfd KB/sec\n", x / K);		\
104		else if (x < K * K * K)					\
105			printf(" %14lf MB/sec\n", x / K / K);		\
106		else							\
107			printf(" %14lf GB/sec\n", x / K / K / K);	\
108	} while (0)
109
110struct bench_mem_info {
111	const struct function *functions;
112	u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
113	double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
114	const char *const *usage;
115	bool alloc_src;
116};
117
118static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
119{
120	const struct function *r = &info->functions[r_idx];
121	double result_bps = 0.0;
122	u64 result_cycles = 0;
123	void *src = NULL, *dst = zalloc(size);
124
125	printf("# function '%s' (%s)\n", r->name, r->desc);
126
127	if (dst == NULL)
128		goto out_alloc_failed;
129
130	if (info->alloc_src) {
131		src = zalloc(size);
132		if (src == NULL)
133			goto out_alloc_failed;
134	}
135
136	if (bench_format == BENCH_FORMAT_DEFAULT)
137		printf("# Copying %s bytes ...\n\n", size_str);
138
139	if (use_cycles) {
140		result_cycles = info->do_cycles(r, size, src, dst);
141	} else {
142		result_bps = info->do_gettimeofday(r, size, src, dst);
143	}
144
145	switch (bench_format) {
146	case BENCH_FORMAT_DEFAULT:
147		if (use_cycles) {
148			printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
149		} else {
150			print_bps(result_bps);
151		}
152		break;
153
154	case BENCH_FORMAT_SIMPLE:
155		if (use_cycles) {
156			printf("%lf\n", (double)result_cycles/size_total);
157		} else {
158			printf("%lf\n", result_bps);
159		}
160		break;
161
162	default:
163		BUG_ON(1);
164		break;
165	}
166
167out_free:
168	free(src);
169	free(dst);
170	return;
171out_alloc_failed:
172	printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
173	goto out_free;
174}
175
176static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
177{
178	int i;
179	size_t size;
180	double size_total;
181
182	argc = parse_options(argc, argv, options, info->usage, 0);
183
184	if (use_cycles) {
185		i = init_cycles();
186		if (i < 0) {
187			fprintf(stderr, "Failed to open cycles counter\n");
188			return i;
189		}
190	}
191
192	size = (size_t)perf_atoll((char *)size_str);
193	size_total = (double)size * nr_loops;
194
195	if ((s64)size <= 0) {
196		fprintf(stderr, "Invalid size:%s\n", size_str);
197		return 1;
198	}
199
200	if (!strncmp(function_str, "all", 3)) {
201		for (i = 0; info->functions[i].name; i++)
202			__bench_mem_function(info, i, size, size_total);
203		return 0;
204	}
205
206	for (i = 0; info->functions[i].name; i++) {
207		if (!strcmp(info->functions[i].name, function_str))
208			break;
209	}
210	if (!info->functions[i].name) {
211		if (strcmp(function_str, "help") && strcmp(function_str, "h"))
212			printf("Unknown function: %s\n", function_str);
213		printf("Available functions:\n");
214		for (i = 0; info->functions[i].name; i++) {
215			printf("\t%s ... %s\n",
216			       info->functions[i].name, info->functions[i].desc);
217		}
218		return 1;
219	}
220
221	__bench_mem_function(info, i, size, size_total);
222
223	return 0;
224}
225
226static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
227{
228	u64 cycle_start = 0ULL, cycle_end = 0ULL;
229	memcpy_t fn = r->fn.memcpy;
230	int i;
231
232	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
233	memset(src, 0, size);
234
235	/*
236	 * We prefault the freshly allocated memory range here,
237	 * to not measure page fault overhead:
238	 */
239	fn(dst, src, size);
240
241	cycle_start = get_cycles();
242	for (i = 0; i < nr_loops; ++i)
243		fn(dst, src, size);
244	cycle_end = get_cycles();
245
246	return cycle_end - cycle_start;
247}
248
249static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
250{
251	struct timeval tv_start, tv_end, tv_diff;
252	memcpy_t fn = r->fn.memcpy;
253	int i;
254
255	/*
256	 * We prefault the freshly allocated memory range here,
257	 * to not measure page fault overhead:
258	 */
259	fn(dst, src, size);
260
261	BUG_ON(gettimeofday(&tv_start, NULL));
262	for (i = 0; i < nr_loops; ++i)
263		fn(dst, src, size);
264	BUG_ON(gettimeofday(&tv_end, NULL));
265
266	timersub(&tv_end, &tv_start, &tv_diff);
267
268	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
269}
270
271struct function memcpy_functions[] = {
272	{ .name		= "default",
273	  .desc		= "Default memcpy() provided by glibc",
274	  .fn.memcpy	= memcpy },
275
276#ifdef HAVE_ARCH_X86_64_SUPPORT
277# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
278# include "mem-memcpy-x86-64-asm-def.h"
279# undef MEMCPY_FN
280#endif
281
282	{ .name = NULL, }
283};
284
285static const char * const bench_mem_memcpy_usage[] = {
286	"perf bench mem memcpy <options>",
287	NULL
288};
289
290int bench_mem_memcpy(int argc, const char **argv)
291{
292	struct bench_mem_info info = {
293		.functions		= memcpy_functions,
294		.do_cycles		= do_memcpy_cycles,
295		.do_gettimeofday	= do_memcpy_gettimeofday,
296		.usage			= bench_mem_memcpy_usage,
297		.alloc_src              = true,
298	};
299
300	return bench_mem_common(argc, argv, &info);
301}
302
303static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
304{
305	u64 cycle_start = 0ULL, cycle_end = 0ULL;
306	memset_t fn = r->fn.memset;
307	int i;
308
309	/*
310	 * We prefault the freshly allocated memory range here,
311	 * to not measure page fault overhead:
312	 */
313	fn(dst, -1, size);
314
315	cycle_start = get_cycles();
316	for (i = 0; i < nr_loops; ++i)
317		fn(dst, i, size);
318	cycle_end = get_cycles();
319
320	return cycle_end - cycle_start;
321}
322
323static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
324{
325	struct timeval tv_start, tv_end, tv_diff;
326	memset_t fn = r->fn.memset;
327	int i;
328
329	/*
330	 * We prefault the freshly allocated memory range here,
331	 * to not measure page fault overhead:
332	 */
333	fn(dst, -1, size);
334
335	BUG_ON(gettimeofday(&tv_start, NULL));
336	for (i = 0; i < nr_loops; ++i)
337		fn(dst, i, size);
338	BUG_ON(gettimeofday(&tv_end, NULL));
339
340	timersub(&tv_end, &tv_start, &tv_diff);
341
342	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
343}
344
345static const char * const bench_mem_memset_usage[] = {
346	"perf bench mem memset <options>",
347	NULL
348};
349
350static const struct function memset_functions[] = {
351	{ .name		= "default",
352	  .desc		= "Default memset() provided by glibc",
353	  .fn.memset	= memset },
354
355#ifdef HAVE_ARCH_X86_64_SUPPORT
356# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
357# include "mem-memset-x86-64-asm-def.h"
358# undef MEMSET_FN
359#endif
360
361	{ .name = NULL, }
362};
363
364int bench_mem_memset(int argc, const char **argv)
365{
366	struct bench_mem_info info = {
367		.functions		= memset_functions,
368		.do_cycles		= do_memset_cycles,
369		.do_gettimeofday	= do_memset_gettimeofday,
370		.usage			= bench_mem_memset_usage,
371	};
372
373	return bench_mem_common(argc, argv, &info);
374}
v4.17
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * mem-memcpy.c
  4 *
  5 * Simple memcpy() and memset() benchmarks
  6 *
  7 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  8 */
  9
 10#include "debug.h"
 11#include "../perf.h"
 12#include "../util/util.h"
 13#include <subcmd/parse-options.h>
 14#include "../util/header.h"
 15#include "../util/cloexec.h"
 16#include "../util/string2.h"
 17#include "bench.h"
 18#include "mem-memcpy-arch.h"
 19#include "mem-memset-arch.h"
 20
 21#include <stdio.h>
 22#include <stdlib.h>
 23#include <string.h>
 
 24#include <sys/time.h>
 25#include <errno.h>
 26#include <linux/time64.h>
 
 27
 28#define K 1024
 29
 30static const char	*size_str	= "1MB";
 31static const char	*function_str	= "all";
 32static int		nr_loops	= 1;
 33static bool		use_cycles;
 34static int		cycles_fd;
 35
 36static const struct option options[] = {
 37	OPT_STRING('s', "size", &size_str, "1MB",
 38		    "Specify the size of the memory buffers. "
 39		    "Available units: B, KB, MB, GB and TB (case insensitive)"),
 40
 41	OPT_STRING('f', "function", &function_str, "all",
 42		    "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
 43
 44	OPT_INTEGER('l', "nr_loops", &nr_loops,
 45		    "Specify the number of loops to run. (default: 1)"),
 46
 47	OPT_BOOLEAN('c', "cycles", &use_cycles,
 48		    "Use a cycles event instead of gettimeofday() to measure performance"),
 49
 50	OPT_END()
 51};
 52
 53typedef void *(*memcpy_t)(void *, const void *, size_t);
 54typedef void *(*memset_t)(void *, int, size_t);
 55
 56struct function {
 57	const char *name;
 58	const char *desc;
 59	union {
 60		memcpy_t memcpy;
 61		memset_t memset;
 62	} fn;
 63};
 64
 65static struct perf_event_attr cycle_attr = {
 66	.type		= PERF_TYPE_HARDWARE,
 67	.config		= PERF_COUNT_HW_CPU_CYCLES
 68};
 69
 70static int init_cycles(void)
 71{
 72	cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
 73
 74	if (cycles_fd < 0 && errno == ENOSYS) {
 75		pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 76		return -1;
 77	}
 78
 79	return cycles_fd;
 80}
 81
 82static u64 get_cycles(void)
 83{
 84	int ret;
 85	u64 clk;
 86
 87	ret = read(cycles_fd, &clk, sizeof(u64));
 88	BUG_ON(ret != sizeof(u64));
 89
 90	return clk;
 91}
 92
 93static double timeval2double(struct timeval *ts)
 94{
 95	return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
 96}
 97
 98#define print_bps(x) do {						\
 99		if (x < K)						\
100			printf(" %14lf bytes/sec\n", x);		\
101		else if (x < K * K)					\
102			printf(" %14lfd KB/sec\n", x / K);		\
103		else if (x < K * K * K)					\
104			printf(" %14lf MB/sec\n", x / K / K);		\
105		else							\
106			printf(" %14lf GB/sec\n", x / K / K / K);	\
107	} while (0)
108
109struct bench_mem_info {
110	const struct function *functions;
111	u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
112	double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
113	const char *const *usage;
114	bool alloc_src;
115};
116
117static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
118{
119	const struct function *r = &info->functions[r_idx];
120	double result_bps = 0.0;
121	u64 result_cycles = 0;
122	void *src = NULL, *dst = zalloc(size);
123
124	printf("# function '%s' (%s)\n", r->name, r->desc);
125
126	if (dst == NULL)
127		goto out_alloc_failed;
128
129	if (info->alloc_src) {
130		src = zalloc(size);
131		if (src == NULL)
132			goto out_alloc_failed;
133	}
134
135	if (bench_format == BENCH_FORMAT_DEFAULT)
136		printf("# Copying %s bytes ...\n\n", size_str);
137
138	if (use_cycles) {
139		result_cycles = info->do_cycles(r, size, src, dst);
140	} else {
141		result_bps = info->do_gettimeofday(r, size, src, dst);
142	}
143
144	switch (bench_format) {
145	case BENCH_FORMAT_DEFAULT:
146		if (use_cycles) {
147			printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
148		} else {
149			print_bps(result_bps);
150		}
151		break;
152
153	case BENCH_FORMAT_SIMPLE:
154		if (use_cycles) {
155			printf("%lf\n", (double)result_cycles/size_total);
156		} else {
157			printf("%lf\n", result_bps);
158		}
159		break;
160
161	default:
162		BUG_ON(1);
163		break;
164	}
165
166out_free:
167	free(src);
168	free(dst);
169	return;
170out_alloc_failed:
171	printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
172	goto out_free;
173}
174
175static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
176{
177	int i;
178	size_t size;
179	double size_total;
180
181	argc = parse_options(argc, argv, options, info->usage, 0);
182
183	if (use_cycles) {
184		i = init_cycles();
185		if (i < 0) {
186			fprintf(stderr, "Failed to open cycles counter\n");
187			return i;
188		}
189	}
190
191	size = (size_t)perf_atoll((char *)size_str);
192	size_total = (double)size * nr_loops;
193
194	if ((s64)size <= 0) {
195		fprintf(stderr, "Invalid size:%s\n", size_str);
196		return 1;
197	}
198
199	if (!strncmp(function_str, "all", 3)) {
200		for (i = 0; info->functions[i].name; i++)
201			__bench_mem_function(info, i, size, size_total);
202		return 0;
203	}
204
205	for (i = 0; info->functions[i].name; i++) {
206		if (!strcmp(info->functions[i].name, function_str))
207			break;
208	}
209	if (!info->functions[i].name) {
210		if (strcmp(function_str, "help") && strcmp(function_str, "h"))
211			printf("Unknown function: %s\n", function_str);
212		printf("Available functions:\n");
213		for (i = 0; info->functions[i].name; i++) {
214			printf("\t%s ... %s\n",
215			       info->functions[i].name, info->functions[i].desc);
216		}
217		return 1;
218	}
219
220	__bench_mem_function(info, i, size, size_total);
221
222	return 0;
223}
224
225static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
226{
227	u64 cycle_start = 0ULL, cycle_end = 0ULL;
228	memcpy_t fn = r->fn.memcpy;
229	int i;
230
231	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
232	memset(src, 0, size);
233
234	/*
235	 * We prefault the freshly allocated memory range here,
236	 * to not measure page fault overhead:
237	 */
238	fn(dst, src, size);
239
240	cycle_start = get_cycles();
241	for (i = 0; i < nr_loops; ++i)
242		fn(dst, src, size);
243	cycle_end = get_cycles();
244
245	return cycle_end - cycle_start;
246}
247
248static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
249{
250	struct timeval tv_start, tv_end, tv_diff;
251	memcpy_t fn = r->fn.memcpy;
252	int i;
253
254	/*
255	 * We prefault the freshly allocated memory range here,
256	 * to not measure page fault overhead:
257	 */
258	fn(dst, src, size);
259
260	BUG_ON(gettimeofday(&tv_start, NULL));
261	for (i = 0; i < nr_loops; ++i)
262		fn(dst, src, size);
263	BUG_ON(gettimeofday(&tv_end, NULL));
264
265	timersub(&tv_end, &tv_start, &tv_diff);
266
267	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
268}
269
270struct function memcpy_functions[] = {
271	{ .name		= "default",
272	  .desc		= "Default memcpy() provided by glibc",
273	  .fn.memcpy	= memcpy },
274
275#ifdef HAVE_ARCH_X86_64_SUPPORT
276# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
277# include "mem-memcpy-x86-64-asm-def.h"
278# undef MEMCPY_FN
279#endif
280
281	{ .name = NULL, }
282};
283
284static const char * const bench_mem_memcpy_usage[] = {
285	"perf bench mem memcpy <options>",
286	NULL
287};
288
289int bench_mem_memcpy(int argc, const char **argv)
290{
291	struct bench_mem_info info = {
292		.functions		= memcpy_functions,
293		.do_cycles		= do_memcpy_cycles,
294		.do_gettimeofday	= do_memcpy_gettimeofday,
295		.usage			= bench_mem_memcpy_usage,
296		.alloc_src              = true,
297	};
298
299	return bench_mem_common(argc, argv, &info);
300}
301
302static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
303{
304	u64 cycle_start = 0ULL, cycle_end = 0ULL;
305	memset_t fn = r->fn.memset;
306	int i;
307
308	/*
309	 * We prefault the freshly allocated memory range here,
310	 * to not measure page fault overhead:
311	 */
312	fn(dst, -1, size);
313
314	cycle_start = get_cycles();
315	for (i = 0; i < nr_loops; ++i)
316		fn(dst, i, size);
317	cycle_end = get_cycles();
318
319	return cycle_end - cycle_start;
320}
321
322static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
323{
324	struct timeval tv_start, tv_end, tv_diff;
325	memset_t fn = r->fn.memset;
326	int i;
327
328	/*
329	 * We prefault the freshly allocated memory range here,
330	 * to not measure page fault overhead:
331	 */
332	fn(dst, -1, size);
333
334	BUG_ON(gettimeofday(&tv_start, NULL));
335	for (i = 0; i < nr_loops; ++i)
336		fn(dst, i, size);
337	BUG_ON(gettimeofday(&tv_end, NULL));
338
339	timersub(&tv_end, &tv_start, &tv_diff);
340
341	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
342}
343
344static const char * const bench_mem_memset_usage[] = {
345	"perf bench mem memset <options>",
346	NULL
347};
348
349static const struct function memset_functions[] = {
350	{ .name		= "default",
351	  .desc		= "Default memset() provided by glibc",
352	  .fn.memset	= memset },
353
354#ifdef HAVE_ARCH_X86_64_SUPPORT
355# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
356# include "mem-memset-x86-64-asm-def.h"
357# undef MEMSET_FN
358#endif
359
360	{ .name = NULL, }
361};
362
363int bench_mem_memset(int argc, const char **argv)
364{
365	struct bench_mem_info info = {
366		.functions		= memset_functions,
367		.do_cycles		= do_memset_cycles,
368		.do_gettimeofday	= do_memset_gettimeofday,
369		.usage			= bench_mem_memset_usage,
370	};
371
372	return bench_mem_common(argc, argv, &info);
373}