Linux Audio

Check our new training course

Loading...
v4.6
 
  1/*
  2 * mem-memcpy.c
  3 *
  4 * Simple memcpy() and memset() benchmarks
  5 *
  6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  7 */
  8
  9#include "../perf.h"
 10#include "../util/util.h"
 11#include <subcmd/parse-options.h>
 12#include "../util/header.h"
 13#include "../util/cloexec.h"
 
 14#include "bench.h"
 15#include "mem-memcpy-arch.h"
 16#include "mem-memset-arch.h"
 17
 18#include <stdio.h>
 19#include <stdlib.h>
 20#include <string.h>
 
 21#include <sys/time.h>
 22#include <errno.h>
 
 
 23
 24#define K 1024
 25
 26static const char	*size_str	= "1MB";
 27static const char	*function_str	= "all";
 28static int		nr_loops	= 1;
 29static bool		use_cycles;
 30static int		cycles_fd;
 31
 32static const struct option options[] = {
 33	OPT_STRING('s', "size", &size_str, "1MB",
 34		    "Specify the size of the memory buffers. "
 35		    "Available units: B, KB, MB, GB and TB (case insensitive)"),
 36
 37	OPT_STRING('f', "function", &function_str, "all",
 38		    "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
 39
 40	OPT_INTEGER('l', "nr_loops", &nr_loops,
 41		    "Specify the number of loops to run. (default: 1)"),
 42
 43	OPT_BOOLEAN('c', "cycles", &use_cycles,
 44		    "Use a cycles event instead of gettimeofday() to measure performance"),
 45
 46	OPT_END()
 47};
 48
 49typedef void *(*memcpy_t)(void *, const void *, size_t);
 50typedef void *(*memset_t)(void *, int, size_t);
 51
 52struct function {
 53	const char *name;
 54	const char *desc;
 55	union {
 56		memcpy_t memcpy;
 57		memset_t memset;
 58	} fn;
 59};
 60
 61static struct perf_event_attr cycle_attr = {
 62	.type		= PERF_TYPE_HARDWARE,
 63	.config		= PERF_COUNT_HW_CPU_CYCLES
 64};
 65
 66static void init_cycles(void)
 67{
 68	cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
 69
 70	if (cycles_fd < 0 && errno == ENOSYS)
 71		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 72	else
 73		BUG_ON(cycles_fd < 0);
 
 
 74}
 75
 76static u64 get_cycles(void)
 77{
 78	int ret;
 79	u64 clk;
 80
 81	ret = read(cycles_fd, &clk, sizeof(u64));
 82	BUG_ON(ret != sizeof(u64));
 83
 84	return clk;
 85}
 86
 87static double timeval2double(struct timeval *ts)
 88{
 89	return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000;
 90}
 91
 92#define print_bps(x) do {						\
 93		if (x < K)						\
 94			printf(" %14lf bytes/sec\n", x);		\
 95		else if (x < K * K)					\
 96			printf(" %14lfd KB/sec\n", x / K);		\
 97		else if (x < K * K * K)					\
 98			printf(" %14lf MB/sec\n", x / K / K);		\
 99		else							\
100			printf(" %14lf GB/sec\n", x / K / K / K);	\
101	} while (0)
102
103struct bench_mem_info {
104	const struct function *functions;
105	u64 (*do_cycles)(const struct function *r, size_t size);
106	double (*do_gettimeofday)(const struct function *r, size_t size);
107	const char *const *usage;
 
108};
109
110static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
111{
112	const struct function *r = &info->functions[r_idx];
113	double result_bps = 0.0;
114	u64 result_cycles = 0;
 
115
116	printf("# function '%s' (%s)\n", r->name, r->desc);
117
 
 
 
 
 
 
 
 
 
118	if (bench_format == BENCH_FORMAT_DEFAULT)
119		printf("# Copying %s bytes ...\n\n", size_str);
120
121	if (use_cycles) {
122		result_cycles = info->do_cycles(r, size);
123	} else {
124		result_bps = info->do_gettimeofday(r, size);
125	}
126
127	switch (bench_format) {
128	case BENCH_FORMAT_DEFAULT:
129		if (use_cycles) {
130			printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
131		} else {
132			print_bps(result_bps);
133		}
134		break;
135
136	case BENCH_FORMAT_SIMPLE:
137		if (use_cycles) {
138			printf("%lf\n", (double)result_cycles/size_total);
139		} else {
140			printf("%lf\n", result_bps);
141		}
142		break;
143
144	default:
145		BUG_ON(1);
146		break;
147	}
 
 
 
 
 
 
 
 
148}
149
150static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
151{
152	int i;
153	size_t size;
154	double size_total;
155
156	argc = parse_options(argc, argv, options, info->usage, 0);
157
158	if (use_cycles)
159		init_cycles();
 
 
 
 
 
160
161	size = (size_t)perf_atoll((char *)size_str);
162	size_total = (double)size * nr_loops;
163
164	if ((s64)size <= 0) {
165		fprintf(stderr, "Invalid size:%s\n", size_str);
166		return 1;
167	}
168
169	if (!strncmp(function_str, "all", 3)) {
170		for (i = 0; info->functions[i].name; i++)
171			__bench_mem_function(info, i, size, size_total);
172		return 0;
173	}
174
175	for (i = 0; info->functions[i].name; i++) {
176		if (!strcmp(info->functions[i].name, function_str))
177			break;
178	}
179	if (!info->functions[i].name) {
180		if (strcmp(function_str, "help") && strcmp(function_str, "h"))
181			printf("Unknown function: %s\n", function_str);
182		printf("Available functions:\n");
183		for (i = 0; info->functions[i].name; i++) {
184			printf("\t%s ... %s\n",
185			       info->functions[i].name, info->functions[i].desc);
186		}
187		return 1;
188	}
189
190	__bench_mem_function(info, i, size, size_total);
191
192	return 0;
193}
194
195static void memcpy_alloc_mem(void **dst, void **src, size_t size)
196{
197	*dst = zalloc(size);
198	if (!*dst)
199		die("memory allocation failed - maybe size is too large?\n");
200
201	*src = zalloc(size);
202	if (!*src)
203		die("memory allocation failed - maybe size is too large?\n");
204
205	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
206	memset(*src, 0, size);
 
 
 
 
 
 
207}
208
209static u64 do_memcpy_cycles(const struct function *r, size_t size)
210{
211	u64 cycle_start = 0ULL, cycle_end = 0ULL;
212	void *src = NULL, *dst = NULL;
213	memcpy_t fn = r->fn.memcpy;
214	int i;
215
216	memcpy_alloc_mem(&dst, &src, size);
217
218	/*
219	 * We prefault the freshly allocated memory range here,
220	 * to not measure page fault overhead:
221	 */
222	fn(dst, src, size);
223
224	cycle_start = get_cycles();
225	for (i = 0; i < nr_loops; ++i)
226		fn(dst, src, size);
227	cycle_end = get_cycles();
228
229	free(src);
230	free(dst);
231	return cycle_end - cycle_start;
232}
233
234static double do_memcpy_gettimeofday(const struct function *r, size_t size)
235{
236	struct timeval tv_start, tv_end, tv_diff;
237	memcpy_t fn = r->fn.memcpy;
238	void *src = NULL, *dst = NULL;
239	int i;
240
241	memcpy_alloc_mem(&dst, &src, size);
242
243	/*
244	 * We prefault the freshly allocated memory range here,
245	 * to not measure page fault overhead:
246	 */
247	fn(dst, src, size);
248
249	BUG_ON(gettimeofday(&tv_start, NULL));
250	for (i = 0; i < nr_loops; ++i)
251		fn(dst, src, size);
252	BUG_ON(gettimeofday(&tv_end, NULL));
253
254	timersub(&tv_end, &tv_start, &tv_diff);
255
256	free(src);
257	free(dst);
258
259	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
260}
261
262struct function memcpy_functions[] = {
263	{ .name		= "default",
264	  .desc		= "Default memcpy() provided by glibc",
265	  .fn.memcpy	= memcpy },
266
267#ifdef HAVE_ARCH_X86_64_SUPPORT
268# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
269# include "mem-memcpy-x86-64-asm-def.h"
270# undef MEMCPY_FN
271#endif
272
273	{ .name = NULL, }
274};
275
276static const char * const bench_mem_memcpy_usage[] = {
277	"perf bench mem memcpy <options>",
278	NULL
279};
280
281int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused)
282{
283	struct bench_mem_info info = {
284		.functions		= memcpy_functions,
285		.do_cycles		= do_memcpy_cycles,
286		.do_gettimeofday	= do_memcpy_gettimeofday,
287		.usage			= bench_mem_memcpy_usage,
 
288	};
289
290	return bench_mem_common(argc, argv, &info);
291}
292
293static void memset_alloc_mem(void **dst, size_t size)
294{
295	*dst = zalloc(size);
296	if (!*dst)
297		die("memory allocation failed - maybe size is too large?\n");
298}
299
300static u64 do_memset_cycles(const struct function *r, size_t size)
301{
302	u64 cycle_start = 0ULL, cycle_end = 0ULL;
303	memset_t fn = r->fn.memset;
304	void *dst = NULL;
305	int i;
306
307	memset_alloc_mem(&dst, size);
308
309	/*
310	 * We prefault the freshly allocated memory range here,
311	 * to not measure page fault overhead:
312	 */
313	fn(dst, -1, size);
314
315	cycle_start = get_cycles();
316	for (i = 0; i < nr_loops; ++i)
317		fn(dst, i, size);
318	cycle_end = get_cycles();
319
320	free(dst);
321	return cycle_end - cycle_start;
322}
323
324static double do_memset_gettimeofday(const struct function *r, size_t size)
325{
326	struct timeval tv_start, tv_end, tv_diff;
327	memset_t fn = r->fn.memset;
328	void *dst = NULL;
329	int i;
330
331	memset_alloc_mem(&dst, size);
332
333	/*
334	 * We prefault the freshly allocated memory range here,
335	 * to not measure page fault overhead:
336	 */
337	fn(dst, -1, size);
338
339	BUG_ON(gettimeofday(&tv_start, NULL));
340	for (i = 0; i < nr_loops; ++i)
341		fn(dst, i, size);
342	BUG_ON(gettimeofday(&tv_end, NULL));
343
344	timersub(&tv_end, &tv_start, &tv_diff);
345
346	free(dst);
347	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
348}
349
350static const char * const bench_mem_memset_usage[] = {
351	"perf bench mem memset <options>",
352	NULL
353};
354
355static const struct function memset_functions[] = {
356	{ .name		= "default",
357	  .desc		= "Default memset() provided by glibc",
358	  .fn.memset	= memset },
359
360#ifdef HAVE_ARCH_X86_64_SUPPORT
361# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
362# include "mem-memset-x86-64-asm-def.h"
363# undef MEMSET_FN
364#endif
365
366	{ .name = NULL, }
367};
368
369int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused)
370{
371	struct bench_mem_info info = {
372		.functions		= memset_functions,
373		.do_cycles		= do_memset_cycles,
374		.do_gettimeofday	= do_memset_gettimeofday,
375		.usage			= bench_mem_memset_usage,
376	};
377
378	return bench_mem_common(argc, argv, &info);
379}
v5.14.15
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * mem-memcpy.c
  4 *
  5 * Simple memcpy() and memset() benchmarks
  6 *
  7 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  8 */
  9
 10#include "debug.h"
 11#include "../perf-sys.h"
 12#include <subcmd/parse-options.h>
 13#include "../util/header.h"
 14#include "../util/cloexec.h"
 15#include "../util/string2.h"
 16#include "bench.h"
 17#include "mem-memcpy-arch.h"
 18#include "mem-memset-arch.h"
 19
 20#include <stdio.h>
 21#include <stdlib.h>
 22#include <string.h>
 23#include <unistd.h>
 24#include <sys/time.h>
 25#include <errno.h>
 26#include <linux/time64.h>
 27#include <linux/zalloc.h>
 28
 29#define K 1024
 30
 31static const char	*size_str	= "1MB";
 32static const char	*function_str	= "all";
 33static int		nr_loops	= 1;
 34static bool		use_cycles;
 35static int		cycles_fd;
 36
 37static const struct option options[] = {
 38	OPT_STRING('s', "size", &size_str, "1MB",
 39		    "Specify the size of the memory buffers. "
 40		    "Available units: B, KB, MB, GB and TB (case insensitive)"),
 41
 42	OPT_STRING('f', "function", &function_str, "all",
 43		    "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
 44
 45	OPT_INTEGER('l', "nr_loops", &nr_loops,
 46		    "Specify the number of loops to run. (default: 1)"),
 47
 48	OPT_BOOLEAN('c', "cycles", &use_cycles,
 49		    "Use a cycles event instead of gettimeofday() to measure performance"),
 50
 51	OPT_END()
 52};
 53
 54typedef void *(*memcpy_t)(void *, const void *, size_t);
 55typedef void *(*memset_t)(void *, int, size_t);
 56
 57struct function {
 58	const char *name;
 59	const char *desc;
 60	union {
 61		memcpy_t memcpy;
 62		memset_t memset;
 63	} fn;
 64};
 65
 66static struct perf_event_attr cycle_attr = {
 67	.type		= PERF_TYPE_HARDWARE,
 68	.config		= PERF_COUNT_HW_CPU_CYCLES
 69};
 70
 71static int init_cycles(void)
 72{
 73	cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
 74
 75	if (cycles_fd < 0 && errno == ENOSYS) {
 76		pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 77		return -1;
 78	}
 79
 80	return cycles_fd;
 81}
 82
 83static u64 get_cycles(void)
 84{
 85	int ret;
 86	u64 clk;
 87
 88	ret = read(cycles_fd, &clk, sizeof(u64));
 89	BUG_ON(ret != sizeof(u64));
 90
 91	return clk;
 92}
 93
 94static double timeval2double(struct timeval *ts)
 95{
 96	return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
 97}
 98
 99#define print_bps(x) do {						\
100		if (x < K)						\
101			printf(" %14lf bytes/sec\n", x);		\
102		else if (x < K * K)					\
103			printf(" %14lfd KB/sec\n", x / K);		\
104		else if (x < K * K * K)					\
105			printf(" %14lf MB/sec\n", x / K / K);		\
106		else							\
107			printf(" %14lf GB/sec\n", x / K / K / K);	\
108	} while (0)
109
110struct bench_mem_info {
111	const struct function *functions;
112	u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
113	double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
114	const char *const *usage;
115	bool alloc_src;
116};
117
118static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
119{
120	const struct function *r = &info->functions[r_idx];
121	double result_bps = 0.0;
122	u64 result_cycles = 0;
123	void *src = NULL, *dst = zalloc(size);
124
125	printf("# function '%s' (%s)\n", r->name, r->desc);
126
127	if (dst == NULL)
128		goto out_alloc_failed;
129
130	if (info->alloc_src) {
131		src = zalloc(size);
132		if (src == NULL)
133			goto out_alloc_failed;
134	}
135
136	if (bench_format == BENCH_FORMAT_DEFAULT)
137		printf("# Copying %s bytes ...\n\n", size_str);
138
139	if (use_cycles) {
140		result_cycles = info->do_cycles(r, size, src, dst);
141	} else {
142		result_bps = info->do_gettimeofday(r, size, src, dst);
143	}
144
145	switch (bench_format) {
146	case BENCH_FORMAT_DEFAULT:
147		if (use_cycles) {
148			printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
149		} else {
150			print_bps(result_bps);
151		}
152		break;
153
154	case BENCH_FORMAT_SIMPLE:
155		if (use_cycles) {
156			printf("%lf\n", (double)result_cycles/size_total);
157		} else {
158			printf("%lf\n", result_bps);
159		}
160		break;
161
162	default:
163		BUG_ON(1);
164		break;
165	}
166
167out_free:
168	free(src);
169	free(dst);
170	return;
171out_alloc_failed:
172	printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
173	goto out_free;
174}
175
176static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
177{
178	int i;
179	size_t size;
180	double size_total;
181
182	argc = parse_options(argc, argv, options, info->usage, 0);
183
184	if (use_cycles) {
185		i = init_cycles();
186		if (i < 0) {
187			fprintf(stderr, "Failed to open cycles counter\n");
188			return i;
189		}
190	}
191
192	size = (size_t)perf_atoll((char *)size_str);
193	size_total = (double)size * nr_loops;
194
195	if ((s64)size <= 0) {
196		fprintf(stderr, "Invalid size:%s\n", size_str);
197		return 1;
198	}
199
200	if (!strncmp(function_str, "all", 3)) {
201		for (i = 0; info->functions[i].name; i++)
202			__bench_mem_function(info, i, size, size_total);
203		return 0;
204	}
205
206	for (i = 0; info->functions[i].name; i++) {
207		if (!strcmp(info->functions[i].name, function_str))
208			break;
209	}
210	if (!info->functions[i].name) {
211		if (strcmp(function_str, "help") && strcmp(function_str, "h"))
212			printf("Unknown function: %s\n", function_str);
213		printf("Available functions:\n");
214		for (i = 0; info->functions[i].name; i++) {
215			printf("\t%s ... %s\n",
216			       info->functions[i].name, info->functions[i].desc);
217		}
218		return 1;
219	}
220
221	__bench_mem_function(info, i, size, size_total);
222
223	return 0;
224}
225
226static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
227{
 
 
 
 
 
 
 
 
228	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
229	memset(src, 0, size);
230
231	/*
232	 * We prefault the freshly allocated memory range here,
233	 * to not measure page fault overhead:
234	 */
235	fn(dst, src, size);
236}
237
238static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
239{
240	u64 cycle_start = 0ULL, cycle_end = 0ULL;
 
241	memcpy_t fn = r->fn.memcpy;
242	int i;
243
244	memcpy_prefault(fn, size, src, dst);
 
 
 
 
 
 
245
246	cycle_start = get_cycles();
247	for (i = 0; i < nr_loops; ++i)
248		fn(dst, src, size);
249	cycle_end = get_cycles();
250
 
 
251	return cycle_end - cycle_start;
252}
253
254static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
255{
256	struct timeval tv_start, tv_end, tv_diff;
257	memcpy_t fn = r->fn.memcpy;
 
258	int i;
259
260	memcpy_prefault(fn, size, src, dst);
 
 
 
 
 
 
261
262	BUG_ON(gettimeofday(&tv_start, NULL));
263	for (i = 0; i < nr_loops; ++i)
264		fn(dst, src, size);
265	BUG_ON(gettimeofday(&tv_end, NULL));
266
267	timersub(&tv_end, &tv_start, &tv_diff);
268
 
 
 
269	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
270}
271
272struct function memcpy_functions[] = {
273	{ .name		= "default",
274	  .desc		= "Default memcpy() provided by glibc",
275	  .fn.memcpy	= memcpy },
276
277#ifdef HAVE_ARCH_X86_64_SUPPORT
278# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
279# include "mem-memcpy-x86-64-asm-def.h"
280# undef MEMCPY_FN
281#endif
282
283	{ .name = NULL, }
284};
285
286static const char * const bench_mem_memcpy_usage[] = {
287	"perf bench mem memcpy <options>",
288	NULL
289};
290
291int bench_mem_memcpy(int argc, const char **argv)
292{
293	struct bench_mem_info info = {
294		.functions		= memcpy_functions,
295		.do_cycles		= do_memcpy_cycles,
296		.do_gettimeofday	= do_memcpy_gettimeofday,
297		.usage			= bench_mem_memcpy_usage,
298		.alloc_src              = true,
299	};
300
301	return bench_mem_common(argc, argv, &info);
302}
303
304static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
 
 
 
 
 
 
 
305{
306	u64 cycle_start = 0ULL, cycle_end = 0ULL;
307	memset_t fn = r->fn.memset;
 
308	int i;
309
 
 
310	/*
311	 * We prefault the freshly allocated memory range here,
312	 * to not measure page fault overhead:
313	 */
314	fn(dst, -1, size);
315
316	cycle_start = get_cycles();
317	for (i = 0; i < nr_loops; ++i)
318		fn(dst, i, size);
319	cycle_end = get_cycles();
320
 
321	return cycle_end - cycle_start;
322}
323
324static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
325{
326	struct timeval tv_start, tv_end, tv_diff;
327	memset_t fn = r->fn.memset;
 
328	int i;
329
 
 
330	/*
331	 * We prefault the freshly allocated memory range here,
332	 * to not measure page fault overhead:
333	 */
334	fn(dst, -1, size);
335
336	BUG_ON(gettimeofday(&tv_start, NULL));
337	for (i = 0; i < nr_loops; ++i)
338		fn(dst, i, size);
339	BUG_ON(gettimeofday(&tv_end, NULL));
340
341	timersub(&tv_end, &tv_start, &tv_diff);
342
 
343	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
344}
345
346static const char * const bench_mem_memset_usage[] = {
347	"perf bench mem memset <options>",
348	NULL
349};
350
351static const struct function memset_functions[] = {
352	{ .name		= "default",
353	  .desc		= "Default memset() provided by glibc",
354	  .fn.memset	= memset },
355
356#ifdef HAVE_ARCH_X86_64_SUPPORT
357# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
358# include "mem-memset-x86-64-asm-def.h"
359# undef MEMSET_FN
360#endif
361
362	{ .name = NULL, }
363};
364
365int bench_mem_memset(int argc, const char **argv)
366{
367	struct bench_mem_info info = {
368		.functions		= memset_functions,
369		.do_cycles		= do_memset_cycles,
370		.do_gettimeofday	= do_memset_gettimeofday,
371		.usage			= bench_mem_memset_usage,
372	};
373
374	return bench_mem_common(argc, argv, &info);
375}