cat_test.c - tools/testing/selftests/resctrl/cat_test.c - Linux diff v6.13.7

  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Cache Allocation Technology (CAT) test
  4 *
  5 * Copyright (C) 2018 Intel Corporation
  6 *
  7 * Authors:
  8 *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
  9 *    Fenghua Yu <fenghua.yu@intel.com>
 10 */
 11#include "resctrl.h"
 12#include <unistd.h>
 13
 14#define RESULT_FILE_NAME	"result_cat"
 
 15#define NUM_OF_RUNS		5
 
 
 16
 17/*
 18 * Minimum difference in LLC misses between a test with n+1 bits CBM to the
 19 * test with n bits is MIN_DIFF_PERCENT_PER_BIT * (n - 1). With e.g. 5 vs 4
 20 * bits in the CBM mask, the minimum difference must be at least
 21 * MIN_DIFF_PERCENT_PER_BIT * (4 - 1) = 3 percent.
 22 *
 23 * The relationship between number of used CBM bits and difference in LLC
 24 * misses is not expected to be linear. With a small number of bits, the
 25 * margin is smaller than with larger number of bits. For selftest purposes,
 26 * however, linear approach is enough because ultimately only pass/fail
 27 * decision has to be made and distinction between strong and stronger
 28 * signal is irrelevant.
 29 */
 30#define MIN_DIFF_PERCENT_PER_BIT	1UL
 31
 32static int show_results_info(__u64 sum_llc_val, int no_of_bits,
 33			     unsigned long cache_span,
 34			     unsigned long min_diff_percent,
 35			     unsigned long num_of_runs, bool platform,
 36			     __s64 *prev_avg_llc_val)
 37{
 38	__u64 avg_llc_val = 0;
 39	float avg_diff;
 40	int ret = 0;
 41
 42	avg_llc_val = sum_llc_val / num_of_runs;
 43	if (*prev_avg_llc_val) {
 44		float delta = (__s64)(avg_llc_val - *prev_avg_llc_val);
 45
 46		avg_diff = delta / *prev_avg_llc_val;
 47		ret = platform && (avg_diff * 100) < (float)min_diff_percent;
 48
 49		ksft_print_msg("%s Check cache miss rate changed more than %.1f%%\n",
 50			       ret ? "Fail:" : "Pass:", (float)min_diff_percent);
 51
 52		ksft_print_msg("Percent diff=%.1f\n", avg_diff * 100);
 53	}
 54	*prev_avg_llc_val = avg_llc_val;
 55
 56	show_cache_info(no_of_bits, avg_llc_val, cache_span, true);
 57
 58	return ret;
 59}
 60
 61/* Remove the highest bit from CBM */
 62static unsigned long next_mask(unsigned long current_mask)
 63{
 64	return current_mask & (current_mask >> 1);
 65}
 66
 67static int check_results(struct resctrl_val_param *param, const char *cache_type,
 68			 unsigned long cache_total_size, unsigned long full_cache_mask,
 69			 unsigned long current_mask)
 70{
 71	char *token_array[8], temp[512];
 72	__u64 sum_llc_perf_miss = 0;
 73	__s64 prev_avg_llc_val = 0;
 74	unsigned long alloc_size;
 75	int runs = 0;
 76	int fail = 0;
 77	int ret;
 78	FILE *fp;
 79
 80	ksft_print_msg("Checking for pass/fail\n");
 81	fp = fopen(param->filename, "r");
 82	if (!fp) {
 83		ksft_perror("Cannot open file");
 84
 85		return -1;
 86	}
 87
 88	while (fgets(temp, sizeof(temp), fp)) {
 89		char *token = strtok(temp, ":\t");
 90		int fields = 0;
 91		int bits;
 92
 93		while (token) {
 94			token_array[fields++] = token;
 95			token = strtok(NULL, ":\t");
 96		}
 97
 98		sum_llc_perf_miss += strtoull(token_array[3], NULL, 0);
 
 
 
 
 99		runs++;
100
101		if (runs < NUM_OF_RUNS)
102			continue;
103
104		if (!current_mask) {
105			ksft_print_msg("Unexpected empty cache mask\n");
106			break;
107		}
108
109		alloc_size = cache_portion_size(cache_total_size, current_mask, full_cache_mask);
110
111		bits = count_bits(current_mask);
112
113		ret = show_results_info(sum_llc_perf_miss, bits,
114					alloc_size / 64,
115					MIN_DIFF_PERCENT_PER_BIT * (bits - 1),
116					runs, get_vendor() == ARCH_INTEL,
117					&prev_avg_llc_val);
118		if (ret)
119			fail = 1;
120
121		runs = 0;
122		sum_llc_perf_miss = 0;
123		current_mask = next_mask(current_mask);
124	}
125
126	fclose(fp);
 
127
128	return fail;
 
 
129}
130
131static void cat_test_cleanup(void)
132{
133	remove(RESULT_FILE_NAME);
 
134}
135
136/*
137 * cat_test - Execute CAT benchmark and measure cache misses
138 * @test:		Test information structure
139 * @uparams:		User supplied parameters
140 * @param:		Parameters passed to cat_test()
141 * @span:		Buffer size for the benchmark
142 * @current_mask	Start mask for the first iteration
143 *
144 * Run CAT selftest by varying the allocated cache portion and comparing the
145 * impact on cache misses (the result analysis is done in check_results()
146 * and show_results_info(), not in this function).
147 *
148 * One bit is removed from the CAT allocation bit mask (in current_mask) for
149 * each subsequent test which keeps reducing the size of the allocated cache
150 * portion. A single test flushes the buffer, reads it to warm up the cache,
151 * and reads the buffer again. The cache misses are measured during the last
152 * read pass.
153 *
154 * Return:		0 when the test was run, < 0 on error.
155 */
156static int cat_test(const struct resctrl_test *test,
157		    const struct user_params *uparams,
158		    struct resctrl_val_param *param,
159		    size_t span, unsigned long current_mask)
160{
161	struct perf_event_read pe_read;
162	struct perf_event_attr pea;
163	cpu_set_t old_affinity;
164	unsigned char *buf;
165	char schemata[64];
166	int ret, i, pe_fd;
167	pid_t bm_pid;
168
169	if (strcmp(param->filename, "") == 0)
170		sprintf(param->filename, "stdio");
171
172	bm_pid = getpid();
173
174	/* Taskset benchmark to specified cpu */
175	ret = taskset_benchmark(bm_pid, uparams->cpu, &old_affinity);
176	if (ret)
177		return ret;
178
179	/* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/
180	ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp);
181	if (ret)
182		goto reset_affinity;
183
184	perf_event_attr_initialize(&pea, PERF_COUNT_HW_CACHE_MISSES);
185	perf_event_initialize_read_format(&pe_read);
186	pe_fd = perf_open(&pea, bm_pid, uparams->cpu);
187	if (pe_fd < 0) {
188		ret = -1;
189		goto reset_affinity;
190	}
191
192	buf = alloc_buffer(span, 1);
193	if (!buf) {
194		ret = -1;
195		goto pe_close;
196	}
197
198	while (current_mask) {
199		snprintf(schemata, sizeof(schemata), "%lx", param->mask & ~current_mask);
200		ret = write_schemata("", schemata, uparams->cpu, test->resource);
201		if (ret)
202			goto free_buf;
203		snprintf(schemata, sizeof(schemata), "%lx", current_mask);
204		ret = write_schemata(param->ctrlgrp, schemata, uparams->cpu, test->resource);
205		if (ret)
206			goto free_buf;
207
208		for (i = 0; i < NUM_OF_RUNS; i++) {
209			mem_flush(buf, span);
210			fill_cache_read(buf, span, true);
211
212			ret = perf_event_reset_enable(pe_fd);
213			if (ret)
214				goto free_buf;
215
216			fill_cache_read(buf, span, true);
217
218			ret = perf_event_measure(pe_fd, &pe_read, param->filename, bm_pid);
219			if (ret)
220				goto free_buf;
221		}
222		current_mask = next_mask(current_mask);
223	}
224
225free_buf:
226	free(buf);
227pe_close:
228	close(pe_fd);
229reset_affinity:
230	taskset_restore(bm_pid, &old_affinity);
231
232	return ret;
233}
234
235static int cat_run_test(const struct resctrl_test *test, const struct user_params *uparams)
236{
237	unsigned long long_mask, start_mask, full_cache_mask;
238	unsigned long cache_total_size = 0;
239	int n = uparams->bits;
240	unsigned int start;
241	int count_of_bits;
 
242	size_t span;
243	int ret;
244
245	ret = get_full_cbm(test->resource, &full_cache_mask);
246	if (ret)
247		return ret;
248	/* Get the largest contiguous exclusive portion of the cache */
249	ret = get_mask_no_shareable(test->resource, &long_mask);
250	if (ret)
251		return ret;
252
 
 
253	/* Get L3/L2 cache size */
254	ret = get_cache_size(uparams->cpu, test->resource, &cache_total_size);
255	if (ret)
256		return ret;
257	ksft_print_msg("Cache size :%lu\n", cache_total_size);
258
259	count_of_bits = count_contiguous_bits(long_mask, &start);
 
260
261	if (!n)
262		n = count_of_bits / 2;
263
264	if (n > count_of_bits - 1) {
265		ksft_print_msg("Invalid input value for no_of_bits n!\n");
266		ksft_print_msg("Please enter value in range 1 to %d\n",
267			       count_of_bits - 1);
268		return -1;
269	}
270	start_mask = create_bit_mask(start, n);
 
 
 
 
271
272	struct resctrl_val_param param = {
273		.ctrlgrp	= "c1",
274		.filename	= RESULT_FILE_NAME,
275		.num_of_runs	= 0,
276	};
277	param.mask = long_mask;
278	span = cache_portion_size(cache_total_size, start_mask, full_cache_mask);
279
280	remove(param.filename);
281
282	ret = cat_test(test, uparams, &param, span, start_mask);
283	if (ret)
284		return ret;
285
286	ret = check_results(&param, test->resource,
287			    cache_total_size, full_cache_mask, start_mask);
288	return ret;
289}
290
291static bool arch_supports_noncont_cat(const struct resctrl_test *test)
292{
293	/* AMD always supports non-contiguous CBM. */
294	if (get_vendor() == ARCH_AMD)
295		return true;
296
297#if defined(__i386__) || defined(__x86_64__) /* arch */
298	unsigned int eax, ebx, ecx, edx;
299	/* Intel support for non-contiguous CBM needs to be discovered. */
300	if (!strcmp(test->resource, "L3"))
301		__cpuid_count(0x10, 1, eax, ebx, ecx, edx);
302	else if (!strcmp(test->resource, "L2"))
303		__cpuid_count(0x10, 2, eax, ebx, ecx, edx);
304	else
305		return false;
306
307	return ((ecx >> 3) & 1);
308#endif /* end arch */
309
310	return false;
311}
312
313static int noncont_cat_run_test(const struct resctrl_test *test,
314				const struct user_params *uparams)
315{
316	unsigned long full_cache_mask, cont_mask, noncont_mask;
317	unsigned int sparse_masks;
318	int bit_center, ret;
319	char schemata[64];
320
321	/* Check to compare sparse_masks content to CPUID output. */
322	ret = resource_info_unsigned_get(test->resource, "sparse_masks", &sparse_masks);
323	if (ret)
324		return ret;
325
326	if (arch_supports_noncont_cat(test) != sparse_masks) {
327		ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n");
328		return 1;
329	}
330
331	/* Write checks initialization. */
332	ret = get_full_cbm(test->resource, &full_cache_mask);
333	if (ret < 0)
334		return ret;
335	bit_center = count_bits(full_cache_mask) / 2;
336
337	/*
338	 * The bit_center needs to be at least 3 to properly calculate the CBM
339	 * hole in the noncont_mask. If it's smaller return an error since the
340	 * cache mask is too short and that shouldn't happen.
341	 */
342	if (bit_center < 3)
343		return -EINVAL;
344	cont_mask = full_cache_mask >> bit_center;
345
346	/* Contiguous mask write check. */
347	snprintf(schemata, sizeof(schemata), "%lx", cont_mask);
348	ret = write_schemata("", schemata, uparams->cpu, test->resource);
349	if (ret) {
350		ksft_print_msg("Write of contiguous CBM failed\n");
351		return 1;
352	}
353
354	/*
355	 * Non-contiguous mask write check. CBM has a 0xf hole approximately in the middle.
356	 * Output is compared with support information to catch any edge case errors.
357	 */
358	noncont_mask = ~(0xfUL << (bit_center - 2)) & full_cache_mask;
359	snprintf(schemata, sizeof(schemata), "%lx", noncont_mask);
360	ret = write_schemata("", schemata, uparams->cpu, test->resource);
361	if (ret && sparse_masks)
362		ksft_print_msg("Non-contiguous CBMs supported but write of non-contiguous CBM failed\n");
363	else if (ret && !sparse_masks)
364		ksft_print_msg("Non-contiguous CBMs not supported and write of non-contiguous CBM failed as expected\n");
365	else if (!ret && !sparse_masks)
366		ksft_print_msg("Non-contiguous CBMs not supported but write of non-contiguous CBM succeeded\n");
367
368	return !ret == !sparse_masks;
369}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
371static bool noncont_cat_feature_check(const struct resctrl_test *test)
372{
373	if (!resctrl_resource_exists(test->resource))
374		return false;
375
376	return resource_info_file_exists(test->resource, "sparse_masks");
377}
378
379struct resctrl_test l3_cat_test = {
380	.name = "L3_CAT",
381	.group = "CAT",
382	.resource = "L3",
383	.feature_check = test_resource_feature_check,
384	.run_test = cat_run_test,
385	.cleanup = cat_test_cleanup,
386};
387
388struct resctrl_test l3_noncont_cat_test = {
389	.name = "L3_NONCONT_CAT",
390	.group = "CAT",
391	.resource = "L3",
392	.feature_check = noncont_cat_feature_check,
393	.run_test = noncont_cat_run_test,
394};
395
396struct resctrl_test l2_noncont_cat_test = {
397	.name = "L2_NONCONT_CAT",
398	.group = "CAT",
399	.resource = "L2",
400	.feature_check = noncont_cat_feature_check,
401	.run_test = noncont_cat_run_test,
402};

  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Cache Allocation Technology (CAT) test
  4 *
  5 * Copyright (C) 2018 Intel Corporation
  6 *
  7 * Authors:
  8 *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
  9 *    Fenghua Yu <fenghua.yu@intel.com>
 10 */
 11#include "resctrl.h"
 12#include <unistd.h>
 13
 14#define RESULT_FILE_NAME1	"result_cat1"
 15#define RESULT_FILE_NAME2	"result_cat2"
 16#define NUM_OF_RUNS		5
 17#define MAX_DIFF_PERCENT	4
 18#define MAX_DIFF		1000000
 19
 20/*
 21 * Change schemata. Write schemata to specified
 22 * con_mon grp, mon_grp in resctrl FS.
 23 * Run 5 times in order to get average values.
 
 
 
 
 
 
 
 
 24 */
 25static int cat_setup(struct resctrl_val_param *p)
 
 
 
 
 
 
 26{
 27	char schemata[64];
 
 28	int ret = 0;
 29
 30	/* Run NUM_OF_RUNS times */
 31	if (p->num_of_runs >= NUM_OF_RUNS)
 32		return END_OF_TESTS;
 33
 34	if (p->num_of_runs == 0) {
 35		sprintf(schemata, "%lx", p->mask);
 36		ret = write_schemata(p->ctrlgrp, schemata, p->cpu_no,
 37				     p->resctrl_val);
 
 
 
 38	}
 39	p->num_of_runs++;
 
 
 40
 41	return ret;
 42}
 43
 44static int check_results(struct resctrl_val_param *param, size_t span)
 
 
 
 
 
 
 
 
 45{
 46	char *token_array[8], temp[512];
 47	unsigned long sum_llc_perf_miss = 0;
 48	int runs = 0, no_of_bits = 0;
 
 
 
 
 49	FILE *fp;
 50
 51	ksft_print_msg("Checking for pass/fail\n");
 52	fp = fopen(param->filename, "r");
 53	if (!fp) {
 54		perror("# Cannot open file");
 55
 56		return errno;
 57	}
 58
 59	while (fgets(temp, sizeof(temp), fp)) {
 60		char *token = strtok(temp, ":\t");
 61		int fields = 0;
 
 62
 63		while (token) {
 64			token_array[fields++] = token;
 65			token = strtok(NULL, ":\t");
 66		}
 67		/*
 68		 * Discard the first value which is inaccurate due to monitoring
 69		 * setup transition phase.
 70		 */
 71		if (runs > 0)
 72			sum_llc_perf_miss += strtoul(token_array[3], NULL, 0);
 73		runs++;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 74	}
 75
 76	fclose(fp);
 77	no_of_bits = count_bits(param->mask);
 78
 79	return show_cache_info(sum_llc_perf_miss, no_of_bits, span / 64,
 80			       MAX_DIFF, MAX_DIFF_PERCENT, runs - 1,
 81			       get_vendor() == ARCH_INTEL, false);
 82}
 83
 84void cat_test_cleanup(void)
 85{
 86	remove(RESULT_FILE_NAME1);
 87	remove(RESULT_FILE_NAME2);
 88}
 89
 90int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 91{
 92	unsigned long l_mask, l_mask_1;
 93	int ret, pipefd[2], sibling_cpu_no;
 94	unsigned long cache_size = 0;
 95	unsigned long long_mask;
 96	char cbm_mask[256];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 97	int count_of_bits;
 98	char pipe_message;
 99	size_t span;
 
100
101	/* Get default cbm mask for L3/L2 cache */
102	ret = get_cbm_mask(cache_type, cbm_mask);
 
 
 
103	if (ret)
104		return ret;
105
106	long_mask = strtoul(cbm_mask, NULL, 16);
107
108	/* Get L3/L2 cache size */
109	ret = get_cache_size(cpu_no, cache_type, &cache_size);
110	if (ret)
111		return ret;
112	ksft_print_msg("Cache size :%lu\n", cache_size);
113
114	/* Get max number of bits from default-cabm mask */
115	count_of_bits = count_bits(long_mask);
116
117	if (!n)
118		n = count_of_bits / 2;
119
120	if (n > count_of_bits - 1) {
121		ksft_print_msg("Invalid input value for no_of_bits n!\n");
122		ksft_print_msg("Please enter value in range 1 to %d\n",
123			       count_of_bits - 1);
124		return -1;
125	}
126
127	/* Get core id from same socket for running another thread */
128	sibling_cpu_no = get_core_sibling(cpu_no);
129	if (sibling_cpu_no < 0)
130		return -1;
131
132	struct resctrl_val_param param = {
133		.resctrl_val	= CAT_STR,
134		.cpu_no		= cpu_no,
135		.setup		= cat_setup,
136	};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
138	l_mask = long_mask >> n;
139	l_mask_1 = ~l_mask & long_mask;
140
141	/* Set param values for parent thread which will be allocated bitmask
142	 * with (max_bits - n) bits
143	 */
144	span = cache_size * (count_of_bits - n) / count_of_bits;
145	strcpy(param.ctrlgrp, "c2");
146	strcpy(param.mongrp, "m2");
147	strcpy(param.filename, RESULT_FILE_NAME2);
148	param.mask = l_mask;
149	param.num_of_runs = 0;
150
151	if (pipe(pipefd)) {
152		perror("# Unable to create pipe");
153		return errno;
 
 
 
 
 
 
154	}
155
156	fflush(stdout);
157	bm_pid = fork();
 
 
 
158
159	/* Set param values for child thread which will be allocated bitmask
160	 * with n bits
 
 
161	 */
162	if (bm_pid == 0) {
163		param.mask = l_mask_1;
164		strcpy(param.ctrlgrp, "c1");
165		strcpy(param.mongrp, "m1");
166		span = cache_size * n / count_of_bits;
167		strcpy(param.filename, RESULT_FILE_NAME1);
168		param.num_of_runs = 0;
169		param.cpu_no = sibling_cpu_no;
 
 
170	}
171
172	remove(param.filename);
 
 
 
 
 
 
 
 
 
 
 
 
173
174	ret = cat_val(&param, span);
175	if (ret == 0)
176		ret = check_results(&param, span);
177
178	if (bm_pid == 0) {
179		/* Tell parent that child is ready */
180		close(pipefd[0]);
181		pipe_message = 1;
182		if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
183		    sizeof(pipe_message))
184			/*
185			 * Just print the error message.
186			 * Let while(1) run and wait for itself to be killed.
187			 */
188			perror("# failed signaling parent process");
189
190		close(pipefd[1]);
191		while (1)
192			;
193	} else {
194		/* Parent waits for child to be ready. */
195		close(pipefd[1]);
196		pipe_message = 0;
197		while (pipe_message != 1) {
198			if (read(pipefd[0], &pipe_message,
199				 sizeof(pipe_message)) < sizeof(pipe_message)) {
200				perror("# failed reading from child process");
201				break;
202			}
203		}
204		close(pipefd[0]);
205		kill(bm_pid, SIGKILL);
206	}
207
208	cat_test_cleanup();
 
 
 
209
210	return ret;
211}