Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#define _GNU_SOURCE
   3
   4#include <linux/limits.h>
   5#include <linux/oom.h>
   6#include <fcntl.h>
   7#include <stdio.h>
   8#include <stdlib.h>
   9#include <string.h>
  10#include <sys/stat.h>
  11#include <sys/types.h>
  12#include <unistd.h>
  13#include <sys/socket.h>
  14#include <sys/wait.h>
  15#include <arpa/inet.h>
  16#include <netinet/in.h>
  17#include <netdb.h>
  18#include <errno.h>
  19#include <sys/mman.h>
  20
  21#include "../kselftest.h"
  22#include "cgroup_util.h"
  23
  24static bool has_localevents;
  25static bool has_recursiveprot;
  26
  27/*
  28 * This test creates two nested cgroups with and without enabling
  29 * the memory controller.
  30 */
  31static int test_memcg_subtree_control(const char *root)
  32{
  33	char *parent, *child, *parent2 = NULL, *child2 = NULL;
  34	int ret = KSFT_FAIL;
  35	char buf[PAGE_SIZE];
  36
  37	/* Create two nested cgroups with the memory controller enabled */
  38	parent = cg_name(root, "memcg_test_0");
  39	child = cg_name(root, "memcg_test_0/memcg_test_1");
  40	if (!parent || !child)
  41		goto cleanup_free;
  42
  43	if (cg_create(parent))
  44		goto cleanup_free;
  45
  46	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
  47		goto cleanup_parent;
  48
  49	if (cg_create(child))
  50		goto cleanup_parent;
  51
  52	if (cg_read_strstr(child, "cgroup.controllers", "memory"))
  53		goto cleanup_child;
  54
  55	/* Create two nested cgroups without enabling memory controller */
  56	parent2 = cg_name(root, "memcg_test_1");
  57	child2 = cg_name(root, "memcg_test_1/memcg_test_1");
  58	if (!parent2 || !child2)
  59		goto cleanup_free2;
  60
  61	if (cg_create(parent2))
  62		goto cleanup_free2;
  63
  64	if (cg_create(child2))
  65		goto cleanup_parent2;
  66
  67	if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
  68		goto cleanup_all;
  69
  70	if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
  71		goto cleanup_all;
  72
  73	ret = KSFT_PASS;
  74
  75cleanup_all:
  76	cg_destroy(child2);
  77cleanup_parent2:
  78	cg_destroy(parent2);
  79cleanup_free2:
  80	free(parent2);
  81	free(child2);
  82cleanup_child:
  83	cg_destroy(child);
  84cleanup_parent:
  85	cg_destroy(parent);
  86cleanup_free:
  87	free(parent);
  88	free(child);
  89
  90	return ret;
  91}
  92
  93static int alloc_anon_50M_check(const char *cgroup, void *arg)
  94{
  95	size_t size = MB(50);
  96	char *buf, *ptr;
  97	long anon, current;
  98	int ret = -1;
  99
 100	buf = malloc(size);
 101	if (buf == NULL) {
 102		fprintf(stderr, "malloc() failed\n");
 103		return -1;
 104	}
 105
 106	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
 107		*ptr = 0;
 108
 109	current = cg_read_long(cgroup, "memory.current");
 110	if (current < size)
 111		goto cleanup;
 112
 113	if (!values_close(size, current, 3))
 114		goto cleanup;
 115
 116	anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
 117	if (anon < 0)
 118		goto cleanup;
 119
 120	if (!values_close(anon, current, 3))
 121		goto cleanup;
 122
 123	ret = 0;
 124cleanup:
 125	free(buf);
 126	return ret;
 127}
 128
 129static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
 130{
 131	size_t size = MB(50);
 132	int ret = -1;
 133	long current, file;
 134	int fd;
 135
 136	fd = get_temp_fd();
 137	if (fd < 0)
 138		return -1;
 139
 140	if (alloc_pagecache(fd, size))
 141		goto cleanup;
 142
 143	current = cg_read_long(cgroup, "memory.current");
 144	if (current < size)
 145		goto cleanup;
 146
 147	file = cg_read_key_long(cgroup, "memory.stat", "file ");
 148	if (file < 0)
 149		goto cleanup;
 150
 151	if (!values_close(file, current, 10))
 152		goto cleanup;
 153
 154	ret = 0;
 155
 156cleanup:
 157	close(fd);
 158	return ret;
 159}
 160
 161/*
 162 * This test create a memory cgroup, allocates
 163 * some anonymous memory and some pagecache
 164 * and checks memory.current, memory.peak, and some memory.stat values.
 165 */
 166static int test_memcg_current_peak(const char *root)
 167{
 168	int ret = KSFT_FAIL;
 169	long current, peak, peak_reset;
 170	char *memcg;
 171	bool fd2_closed = false, fd3_closed = false, fd4_closed = false;
 172	int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1;
 173	struct stat ss;
 174
 175	memcg = cg_name(root, "memcg_test");
 176	if (!memcg)
 177		goto cleanup;
 178
 179	if (cg_create(memcg))
 180		goto cleanup;
 181
 182	current = cg_read_long(memcg, "memory.current");
 183	if (current != 0)
 184		goto cleanup;
 185
 186	peak = cg_read_long(memcg, "memory.peak");
 187	if (peak != 0)
 188		goto cleanup;
 189
 190	if (cg_run(memcg, alloc_anon_50M_check, NULL))
 191		goto cleanup;
 192
 193	peak = cg_read_long(memcg, "memory.peak");
 194	if (peak < MB(50))
 195		goto cleanup;
 196
 197	/*
 198	 * We'll open a few FDs for the same memory.peak file to exercise the free-path
 199	 * We need at least three to be closed in a different order than writes occurred to test
 200	 * the linked-list handling.
 201	 */
 202	peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
 203
 204	if (peak_fd == -1) {
 205		if (errno == ENOENT)
 206			ret = KSFT_SKIP;
 207		goto cleanup;
 208	}
 209
 210	/*
 211	 * Before we try to use memory.peak's fd, try to figure out whether
 212	 * this kernel supports writing to that file in the first place. (by
 213	 * checking the writable bit on the file's st_mode)
 214	 */
 215	if (fstat(peak_fd, &ss))
 216		goto cleanup;
 217
 218	if ((ss.st_mode & S_IWUSR) == 0) {
 219		ret = KSFT_SKIP;
 220		goto cleanup;
 221	}
 222
 223	peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
 224
 225	if (peak_fd2 == -1)
 226		goto cleanup;
 227
 228	peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
 229
 230	if (peak_fd3 == -1)
 231		goto cleanup;
 232
 233	/* any non-empty string resets, but make it clear */
 234	static const char reset_string[] = "reset\n";
 235
 236	peak_reset = write(peak_fd, reset_string, sizeof(reset_string));
 237	if (peak_reset != sizeof(reset_string))
 238		goto cleanup;
 239
 240	peak_reset = write(peak_fd2, reset_string, sizeof(reset_string));
 241	if (peak_reset != sizeof(reset_string))
 242		goto cleanup;
 243
 244	peak_reset = write(peak_fd3, reset_string, sizeof(reset_string));
 245	if (peak_reset != sizeof(reset_string))
 246		goto cleanup;
 247
 248	/* Make sure a completely independent read isn't affected by our  FD-local reset above*/
 249	peak = cg_read_long(memcg, "memory.peak");
 250	if (peak < MB(50))
 251		goto cleanup;
 252
 253	fd2_closed = true;
 254	if (close(peak_fd2))
 255		goto cleanup;
 256
 257	peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
 258
 259	if (peak_fd4 == -1)
 260		goto cleanup;
 261
 262	peak_reset = write(peak_fd4, reset_string, sizeof(reset_string));
 263	if (peak_reset != sizeof(reset_string))
 264		goto cleanup;
 265
 266	peak = cg_read_long_fd(peak_fd);
 267	if (peak > MB(30) || peak < 0)
 268		goto cleanup;
 269
 270	if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
 271		goto cleanup;
 272
 273	peak = cg_read_long(memcg, "memory.peak");
 274	if (peak < MB(50))
 275		goto cleanup;
 276
 277	/* Make sure everything is back to normal */
 278	peak = cg_read_long_fd(peak_fd);
 279	if (peak < MB(50))
 280		goto cleanup;
 281
 282	peak = cg_read_long_fd(peak_fd4);
 283	if (peak < MB(50))
 284		goto cleanup;
 285
 286	fd3_closed = true;
 287	if (close(peak_fd3))
 288		goto cleanup;
 289
 290	fd4_closed = true;
 291	if (close(peak_fd4))
 292		goto cleanup;
 293
 294	ret = KSFT_PASS;
 295
 296cleanup:
 297	close(peak_fd);
 298	if (!fd2_closed)
 299		close(peak_fd2);
 300	if (!fd3_closed)
 301		close(peak_fd3);
 302	if (!fd4_closed)
 303		close(peak_fd4);
 304	cg_destroy(memcg);
 305	free(memcg);
 306
 307	return ret;
 308}
 309
 310static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
 311{
 312	int fd = (long)arg;
 313	int ppid = getppid();
 314
 315	if (alloc_pagecache(fd, MB(50)))
 316		return -1;
 317
 318	while (getppid() == ppid)
 319		sleep(1);
 320
 321	return 0;
 322}
 323
 324static int alloc_anon_noexit(const char *cgroup, void *arg)
 325{
 326	int ppid = getppid();
 327	size_t size = (unsigned long)arg;
 328	char *buf, *ptr;
 329
 330	buf = malloc(size);
 331	if (buf == NULL) {
 332		fprintf(stderr, "malloc() failed\n");
 333		return -1;
 334	}
 335
 336	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
 337		*ptr = 0;
 338
 339	while (getppid() == ppid)
 340		sleep(1);
 341
 342	free(buf);
 343	return 0;
 344}
 345
 346/*
 347 * Wait until processes are killed asynchronously by the OOM killer
 348 * If we exceed a timeout, fail.
 349 */
 350static int cg_test_proc_killed(const char *cgroup)
 351{
 352	int limit;
 353
 354	for (limit = 10; limit > 0; limit--) {
 355		if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
 356			return 0;
 357
 358		usleep(100000);
 359	}
 360	return -1;
 361}
 362
 363static bool reclaim_until(const char *memcg, long goal);
 364
 365/*
 366 * First, this test creates the following hierarchy:
 367 * A       memory.min = 0,    memory.max = 200M
 368 * A/B     memory.min = 50M
 369 * A/B/C   memory.min = 75M,  memory.current = 50M
 370 * A/B/D   memory.min = 25M,  memory.current = 50M
 371 * A/B/E   memory.min = 0,    memory.current = 50M
 372 * A/B/F   memory.min = 500M, memory.current = 0
 373 *
 374 * (or memory.low if we test soft protection)
 375 *
 376 * Usages are pagecache and the test keeps a running
 377 * process in every leaf cgroup.
 378 * Then it creates A/G and creates a significant
 379 * memory pressure in A.
 380 *
 381 * Then it checks actual memory usages and expects that:
 382 * A/B    memory.current ~= 50M
 383 * A/B/C  memory.current ~= 29M
 384 * A/B/D  memory.current ~= 21M
 385 * A/B/E  memory.current ~= 0
 386 * A/B/F  memory.current  = 0
 387 * (for origin of the numbers, see model in memcg_protection.m.)
 388 *
 389 * After that it tries to allocate more than there is
 390 * unprotected memory in A available, and checks that:
 391 * a) memory.min protects pagecache even in this case,
 392 * b) memory.low allows reclaiming page cache with low events.
 393 *
 394 * Then we try to reclaim from A/B/C using memory.reclaim until its
 395 * usage reaches 10M.
 396 * This makes sure that:
 397 * (a) We ignore the protection of the reclaim target memcg.
 398 * (b) The previously calculated emin value (~29M) should be dismissed.
 399 */
 400static int test_memcg_protection(const char *root, bool min)
 401{
 402	int ret = KSFT_FAIL, rc;
 403	char *parent[3] = {NULL};
 404	char *children[4] = {NULL};
 405	const char *attribute = min ? "memory.min" : "memory.low";
 406	long c[4];
 407	long current;
 408	int i, attempts;
 409	int fd;
 410
 411	fd = get_temp_fd();
 412	if (fd < 0)
 413		goto cleanup;
 414
 415	parent[0] = cg_name(root, "memcg_test_0");
 416	if (!parent[0])
 417		goto cleanup;
 418
 419	parent[1] = cg_name(parent[0], "memcg_test_1");
 420	if (!parent[1])
 421		goto cleanup;
 422
 423	parent[2] = cg_name(parent[0], "memcg_test_2");
 424	if (!parent[2])
 425		goto cleanup;
 426
 427	if (cg_create(parent[0]))
 428		goto cleanup;
 429
 430	if (cg_read_long(parent[0], attribute)) {
 431		/* No memory.min on older kernels is fine */
 432		if (min)
 433			ret = KSFT_SKIP;
 434		goto cleanup;
 435	}
 436
 437	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
 438		goto cleanup;
 439
 440	if (cg_write(parent[0], "memory.max", "200M"))
 441		goto cleanup;
 442
 443	if (cg_write(parent[0], "memory.swap.max", "0"))
 444		goto cleanup;
 445
 446	if (cg_create(parent[1]))
 447		goto cleanup;
 448
 449	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
 450		goto cleanup;
 451
 452	if (cg_create(parent[2]))
 453		goto cleanup;
 454
 455	for (i = 0; i < ARRAY_SIZE(children); i++) {
 456		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
 457		if (!children[i])
 458			goto cleanup;
 459
 460		if (cg_create(children[i]))
 461			goto cleanup;
 462
 463		if (i > 2)
 464			continue;
 465
 466		cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
 467			      (void *)(long)fd);
 468	}
 469
 470	if (cg_write(parent[1],   attribute, "50M"))
 471		goto cleanup;
 472	if (cg_write(children[0], attribute, "75M"))
 473		goto cleanup;
 474	if (cg_write(children[1], attribute, "25M"))
 475		goto cleanup;
 476	if (cg_write(children[2], attribute, "0"))
 477		goto cleanup;
 478	if (cg_write(children[3], attribute, "500M"))
 479		goto cleanup;
 480
 481	attempts = 0;
 482	while (!values_close(cg_read_long(parent[1], "memory.current"),
 483			     MB(150), 3)) {
 484		if (attempts++ > 5)
 485			break;
 486		sleep(1);
 487	}
 488
 489	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
 490		goto cleanup;
 491
 492	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
 493		goto cleanup;
 494
 495	for (i = 0; i < ARRAY_SIZE(children); i++)
 496		c[i] = cg_read_long(children[i], "memory.current");
 497
 498	if (!values_close(c[0], MB(29), 10))
 499		goto cleanup;
 500
 501	if (!values_close(c[1], MB(21), 10))
 502		goto cleanup;
 503
 504	if (c[3] != 0)
 505		goto cleanup;
 506
 507	rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
 508	if (min && !rc)
 509		goto cleanup;
 510	else if (!min && rc) {
 511		fprintf(stderr,
 512			"memory.low prevents from allocating anon memory\n");
 513		goto cleanup;
 514	}
 515
 516	current = min ? MB(50) : MB(30);
 517	if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3))
 518		goto cleanup;
 519
 520	if (!reclaim_until(children[0], MB(10)))
 521		goto cleanup;
 522
 523	if (min) {
 524		ret = KSFT_PASS;
 525		goto cleanup;
 526	}
 527
 528	for (i = 0; i < ARRAY_SIZE(children); i++) {
 529		int no_low_events_index = 1;
 530		long low, oom;
 531
 532		oom = cg_read_key_long(children[i], "memory.events", "oom ");
 533		low = cg_read_key_long(children[i], "memory.events", "low ");
 534
 535		if (oom)
 536			goto cleanup;
 537		if (i <= no_low_events_index && low <= 0)
 538			goto cleanup;
 539		if (i > no_low_events_index && low)
 540			goto cleanup;
 541
 542	}
 543
 544	ret = KSFT_PASS;
 545
 546cleanup:
 547	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
 548		if (!children[i])
 549			continue;
 550
 551		cg_destroy(children[i]);
 552		free(children[i]);
 553	}
 554
 555	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
 556		if (!parent[i])
 557			continue;
 558
 559		cg_destroy(parent[i]);
 560		free(parent[i]);
 561	}
 562	close(fd);
 563	return ret;
 564}
 565
 566static int test_memcg_min(const char *root)
 567{
 568	return test_memcg_protection(root, true);
 569}
 570
 571static int test_memcg_low(const char *root)
 572{
 573	return test_memcg_protection(root, false);
 574}
 575
 576static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
 577{
 578	size_t size = MB(50);
 579	int ret = -1;
 580	long current, high, max;
 581	int fd;
 582
 583	high = cg_read_long(cgroup, "memory.high");
 584	max = cg_read_long(cgroup, "memory.max");
 585	if (high != MB(30) && max != MB(30))
 586		return -1;
 587
 588	fd = get_temp_fd();
 589	if (fd < 0)
 590		return -1;
 591
 592	if (alloc_pagecache(fd, size))
 593		goto cleanup;
 594
 595	current = cg_read_long(cgroup, "memory.current");
 596	if (!values_close(current, MB(30), 5))
 597		goto cleanup;
 598
 599	ret = 0;
 600
 601cleanup:
 602	close(fd);
 603	return ret;
 604
 605}
 606
 607/*
 608 * This test checks that memory.high limits the amount of
 609 * memory which can be consumed by either anonymous memory
 610 * or pagecache.
 611 */
 612static int test_memcg_high(const char *root)
 613{
 614	int ret = KSFT_FAIL;
 615	char *memcg;
 616	long high;
 617
 618	memcg = cg_name(root, "memcg_test");
 619	if (!memcg)
 620		goto cleanup;
 621
 622	if (cg_create(memcg))
 623		goto cleanup;
 624
 625	if (cg_read_strcmp(memcg, "memory.high", "max\n"))
 626		goto cleanup;
 627
 628	if (cg_write(memcg, "memory.swap.max", "0"))
 629		goto cleanup;
 630
 631	if (cg_write(memcg, "memory.high", "30M"))
 632		goto cleanup;
 633
 634	if (cg_run(memcg, alloc_anon, (void *)MB(31)))
 635		goto cleanup;
 636
 637	if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
 638		goto cleanup;
 639
 640	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
 641		goto cleanup;
 642
 643	high = cg_read_key_long(memcg, "memory.events", "high ");
 644	if (high <= 0)
 645		goto cleanup;
 646
 647	ret = KSFT_PASS;
 648
 649cleanup:
 650	cg_destroy(memcg);
 651	free(memcg);
 652
 653	return ret;
 654}
 655
 656static int alloc_anon_mlock(const char *cgroup, void *arg)
 657{
 658	size_t size = (size_t)arg;
 659	void *buf;
 660
 661	buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
 662		   0, 0);
 663	if (buf == MAP_FAILED)
 664		return -1;
 665
 666	mlock(buf, size);
 667	munmap(buf, size);
 668	return 0;
 669}
 670
 671/*
 672 * This test checks that memory.high is able to throttle big single shot
 673 * allocation i.e. large allocation within one kernel entry.
 674 */
 675static int test_memcg_high_sync(const char *root)
 676{
 677	int ret = KSFT_FAIL, pid, fd = -1;
 678	char *memcg;
 679	long pre_high, pre_max;
 680	long post_high, post_max;
 681
 682	memcg = cg_name(root, "memcg_test");
 683	if (!memcg)
 684		goto cleanup;
 685
 686	if (cg_create(memcg))
 687		goto cleanup;
 688
 689	pre_high = cg_read_key_long(memcg, "memory.events", "high ");
 690	pre_max = cg_read_key_long(memcg, "memory.events", "max ");
 691	if (pre_high < 0 || pre_max < 0)
 692		goto cleanup;
 693
 694	if (cg_write(memcg, "memory.swap.max", "0"))
 695		goto cleanup;
 696
 697	if (cg_write(memcg, "memory.high", "30M"))
 698		goto cleanup;
 699
 700	if (cg_write(memcg, "memory.max", "140M"))
 701		goto cleanup;
 702
 703	fd = memcg_prepare_for_wait(memcg);
 704	if (fd < 0)
 705		goto cleanup;
 706
 707	pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
 708	if (pid < 0)
 709		goto cleanup;
 710
 711	cg_wait_for(fd);
 712
 713	post_high = cg_read_key_long(memcg, "memory.events", "high ");
 714	post_max = cg_read_key_long(memcg, "memory.events", "max ");
 715	if (post_high < 0 || post_max < 0)
 716		goto cleanup;
 717
 718	if (pre_high == post_high || pre_max != post_max)
 719		goto cleanup;
 720
 721	ret = KSFT_PASS;
 722
 723cleanup:
 724	if (fd >= 0)
 725		close(fd);
 726	cg_destroy(memcg);
 727	free(memcg);
 728
 729	return ret;
 730}
 731
 732/*
 733 * This test checks that memory.max limits the amount of
 734 * memory which can be consumed by either anonymous memory
 735 * or pagecache.
 736 */
 737static int test_memcg_max(const char *root)
 738{
 739	int ret = KSFT_FAIL;
 740	char *memcg;
 741	long current, max;
 742
 743	memcg = cg_name(root, "memcg_test");
 744	if (!memcg)
 745		goto cleanup;
 746
 747	if (cg_create(memcg))
 748		goto cleanup;
 749
 750	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
 751		goto cleanup;
 752
 753	if (cg_write(memcg, "memory.swap.max", "0"))
 754		goto cleanup;
 755
 756	if (cg_write(memcg, "memory.max", "30M"))
 757		goto cleanup;
 758
 759	/* Should be killed by OOM killer */
 760	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
 761		goto cleanup;
 762
 763	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
 764		goto cleanup;
 765
 766	current = cg_read_long(memcg, "memory.current");
 767	if (current > MB(30) || !current)
 768		goto cleanup;
 769
 770	max = cg_read_key_long(memcg, "memory.events", "max ");
 771	if (max <= 0)
 772		goto cleanup;
 773
 774	ret = KSFT_PASS;
 775
 776cleanup:
 777	cg_destroy(memcg);
 778	free(memcg);
 779
 780	return ret;
 781}
 782
 783/*
 784 * Reclaim from @memcg until usage reaches @goal by writing to
 785 * memory.reclaim.
 786 *
 787 * This function will return false if the usage is already below the
 788 * goal.
 789 *
 790 * This function assumes that writing to memory.reclaim is the only
 791 * source of change in memory.current (no concurrent allocations or
 792 * reclaim).
 793 *
 794 * This function makes sure memory.reclaim is sane. It will return
 795 * false if memory.reclaim's error codes do not make sense, even if
 796 * the usage goal was satisfied.
 797 */
 798static bool reclaim_until(const char *memcg, long goal)
 799{
 800	char buf[64];
 801	int retries, err;
 802	long current, to_reclaim;
 803	bool reclaimed = false;
 804
 805	for (retries = 5; retries > 0; retries--) {
 806		current = cg_read_long(memcg, "memory.current");
 807
 808		if (current < goal || values_close(current, goal, 3))
 809			break;
 810		/* Did memory.reclaim return 0 incorrectly? */
 811		else if (reclaimed)
 812			return false;
 813
 814		to_reclaim = current - goal;
 815		snprintf(buf, sizeof(buf), "%ld", to_reclaim);
 816		err = cg_write(memcg, "memory.reclaim", buf);
 817		if (!err)
 818			reclaimed = true;
 819		else if (err != -EAGAIN)
 820			return false;
 821	}
 822	return reclaimed;
 823}
 824
 825/*
 826 * This test checks that memory.reclaim reclaims the given
 827 * amount of memory (from both anon and file, if possible).
 828 */
 829static int test_memcg_reclaim(const char *root)
 830{
 831	int ret = KSFT_FAIL;
 832	int fd = -1;
 833	int retries;
 834	char *memcg;
 835	long current, expected_usage;
 836
 837	memcg = cg_name(root, "memcg_test");
 838	if (!memcg)
 839		goto cleanup;
 840
 841	if (cg_create(memcg))
 842		goto cleanup;
 843
 844	current = cg_read_long(memcg, "memory.current");
 845	if (current != 0)
 846		goto cleanup;
 847
 848	fd = get_temp_fd();
 849	if (fd < 0)
 850		goto cleanup;
 851
 852	cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
 853
 854	/*
 855	 * If swap is enabled, try to reclaim from both anon and file, else try
 856	 * to reclaim from file only.
 857	 */
 858	if (is_swap_enabled()) {
 859		cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
 860		expected_usage = MB(100);
 861	} else
 862		expected_usage = MB(50);
 863
 864	/*
 865	 * Wait until current usage reaches the expected usage (or we run out of
 866	 * retries).
 867	 */
 868	retries = 5;
 869	while (!values_close(cg_read_long(memcg, "memory.current"),
 870			    expected_usage, 10)) {
 871		if (retries--) {
 872			sleep(1);
 873			continue;
 874		} else {
 875			fprintf(stderr,
 876				"failed to allocate %ld for memcg reclaim test\n",
 877				expected_usage);
 878			goto cleanup;
 879		}
 880	}
 881
 882	/*
 883	 * Reclaim until current reaches 30M, this makes sure we hit both anon
 884	 * and file if swap is enabled.
 885	 */
 886	if (!reclaim_until(memcg, MB(30)))
 887		goto cleanup;
 888
 889	ret = KSFT_PASS;
 890cleanup:
 891	cg_destroy(memcg);
 892	free(memcg);
 893	close(fd);
 894
 895	return ret;
 896}
 897
 898static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
 899{
 900	long mem_max = (long)arg;
 901	size_t size = MB(50);
 902	char *buf, *ptr;
 903	long mem_current, swap_current;
 904	int ret = -1;
 905
 906	buf = malloc(size);
 907	if (buf == NULL) {
 908		fprintf(stderr, "malloc() failed\n");
 909		return -1;
 910	}
 911
 912	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
 913		*ptr = 0;
 914
 915	mem_current = cg_read_long(cgroup, "memory.current");
 916	if (!mem_current || !values_close(mem_current, mem_max, 3))
 917		goto cleanup;
 918
 919	swap_current = cg_read_long(cgroup, "memory.swap.current");
 920	if (!swap_current ||
 921	    !values_close(mem_current + swap_current, size, 3))
 922		goto cleanup;
 923
 924	ret = 0;
 925cleanup:
 926	free(buf);
 927	return ret;
 928}
 929
 930/*
 931 * This test checks that memory.swap.max limits the amount of
 932 * anonymous memory which can be swapped out. Additionally, it verifies that
 933 * memory.swap.peak reflects the high watermark and can be reset.
 934 */
 935static int test_memcg_swap_max_peak(const char *root)
 936{
 937	int ret = KSFT_FAIL;
 938	char *memcg;
 939	long max, peak;
 940	struct stat ss;
 941	int swap_peak_fd = -1, mem_peak_fd = -1;
 942
 943	/* any non-empty string resets */
 944	static const char reset_string[] = "foobarbaz";
 945
 946	if (!is_swap_enabled())
 947		return KSFT_SKIP;
 948
 949	memcg = cg_name(root, "memcg_test");
 950	if (!memcg)
 951		goto cleanup;
 952
 953	if (cg_create(memcg))
 954		goto cleanup;
 955
 956	if (cg_read_long(memcg, "memory.swap.current")) {
 957		ret = KSFT_SKIP;
 958		goto cleanup;
 959	}
 960
 961	swap_peak_fd = cg_open(memcg, "memory.swap.peak",
 962			       O_RDWR | O_APPEND | O_CLOEXEC);
 963
 964	if (swap_peak_fd == -1) {
 965		if (errno == ENOENT)
 966			ret = KSFT_SKIP;
 967		goto cleanup;
 968	}
 969
 970	/*
 971	 * Before we try to use memory.swap.peak's fd, try to figure out
 972	 * whether this kernel supports writing to that file in the first
 973	 * place. (by checking the writable bit on the file's st_mode)
 974	 */
 975	if (fstat(swap_peak_fd, &ss))
 976		goto cleanup;
 977
 978	if ((ss.st_mode & S_IWUSR) == 0) {
 979		ret = KSFT_SKIP;
 980		goto cleanup;
 981	}
 982
 983	mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
 984
 985	if (mem_peak_fd == -1)
 986		goto cleanup;
 987
 988	if (cg_read_long(memcg, "memory.swap.peak"))
 989		goto cleanup;
 990
 991	if (cg_read_long_fd(swap_peak_fd))
 992		goto cleanup;
 993
 994	/* switch the swap and mem fds into local-peak tracking mode*/
 995	int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
 996
 997	if (peak_reset != sizeof(reset_string))
 998		goto cleanup;
 999
1000	if (cg_read_long_fd(swap_peak_fd))
1001		goto cleanup;
1002
1003	if (cg_read_long(memcg, "memory.peak"))
1004		goto cleanup;
1005
1006	if (cg_read_long_fd(mem_peak_fd))
1007		goto cleanup;
1008
1009	peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
1010	if (peak_reset != sizeof(reset_string))
1011		goto cleanup;
1012
1013	if (cg_read_long_fd(mem_peak_fd))
1014		goto cleanup;
1015
1016	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
1017		goto cleanup;
1018
1019	if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
1020		goto cleanup;
1021
1022	if (cg_write(memcg, "memory.swap.max", "30M"))
1023		goto cleanup;
1024
1025	if (cg_write(memcg, "memory.max", "30M"))
1026		goto cleanup;
1027
1028	/* Should be killed by OOM killer */
1029	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1030		goto cleanup;
1031
1032	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
1033		goto cleanup;
1034
1035	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
1036		goto cleanup;
1037
1038	peak = cg_read_long(memcg, "memory.peak");
1039	if (peak < MB(29))
1040		goto cleanup;
1041
1042	peak = cg_read_long(memcg, "memory.swap.peak");
1043	if (peak < MB(29))
1044		goto cleanup;
1045
1046	peak = cg_read_long_fd(mem_peak_fd);
1047	if (peak < MB(29))
1048		goto cleanup;
1049
1050	peak = cg_read_long_fd(swap_peak_fd);
1051	if (peak < MB(29))
1052		goto cleanup;
1053
1054	/*
1055	 * open, reset and close the peak swap on another FD to make sure
1056	 * multiple extant fds don't corrupt the linked-list
1057	 */
1058	peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string);
1059	if (peak_reset)
1060		goto cleanup;
1061
1062	peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string);
1063	if (peak_reset)
1064		goto cleanup;
1065
1066	/* actually reset on the fds */
1067	peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
1068	if (peak_reset != sizeof(reset_string))
1069		goto cleanup;
1070
1071	peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
1072	if (peak_reset != sizeof(reset_string))
1073		goto cleanup;
1074
1075	peak = cg_read_long_fd(swap_peak_fd);
1076	if (peak > MB(10))
1077		goto cleanup;
1078
1079	/*
1080	 * The cgroup is now empty, but there may be a page or two associated
1081	 * with the open FD accounted to it.
1082	 */
1083	peak = cg_read_long_fd(mem_peak_fd);
1084	if (peak > MB(1))
1085		goto cleanup;
1086
1087	if (cg_read_long(memcg, "memory.peak") < MB(29))
1088		goto cleanup;
1089
1090	if (cg_read_long(memcg, "memory.swap.peak") < MB(29))
1091		goto cleanup;
1092
1093	if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
1094		goto cleanup;
1095
1096	max = cg_read_key_long(memcg, "memory.events", "max ");
1097	if (max <= 0)
1098		goto cleanup;
1099
1100	peak = cg_read_long(memcg, "memory.peak");
1101	if (peak < MB(29))
1102		goto cleanup;
1103
1104	peak = cg_read_long(memcg, "memory.swap.peak");
1105	if (peak < MB(29))
1106		goto cleanup;
1107
1108	peak = cg_read_long_fd(mem_peak_fd);
1109	if (peak < MB(29))
1110		goto cleanup;
1111
1112	peak = cg_read_long_fd(swap_peak_fd);
1113	if (peak < MB(19))
1114		goto cleanup;
1115
1116	ret = KSFT_PASS;
1117
1118cleanup:
1119	if (mem_peak_fd != -1 && close(mem_peak_fd))
1120		ret = KSFT_FAIL;
1121	if (swap_peak_fd != -1 && close(swap_peak_fd))
1122		ret = KSFT_FAIL;
1123	cg_destroy(memcg);
1124	free(memcg);
1125
1126	return ret;
1127}
1128
1129/*
1130 * This test disables swapping and tries to allocate anonymous memory
1131 * up to OOM. Then it checks for oom and oom_kill events in
1132 * memory.events.
1133 */
1134static int test_memcg_oom_events(const char *root)
1135{
1136	int ret = KSFT_FAIL;
1137	char *memcg;
1138
1139	memcg = cg_name(root, "memcg_test");
1140	if (!memcg)
1141		goto cleanup;
1142
1143	if (cg_create(memcg))
1144		goto cleanup;
1145
1146	if (cg_write(memcg, "memory.max", "30M"))
1147		goto cleanup;
1148
1149	if (cg_write(memcg, "memory.swap.max", "0"))
1150		goto cleanup;
1151
1152	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1153		goto cleanup;
1154
1155	if (cg_read_strcmp(memcg, "cgroup.procs", ""))
1156		goto cleanup;
1157
1158	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
1159		goto cleanup;
1160
1161	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
1162		goto cleanup;
1163
1164	ret = KSFT_PASS;
1165
1166cleanup:
1167	cg_destroy(memcg);
1168	free(memcg);
1169
1170	return ret;
1171}
1172
1173struct tcp_server_args {
1174	unsigned short port;
1175	int ctl[2];
1176};
1177
1178static int tcp_server(const char *cgroup, void *arg)
1179{
1180	struct tcp_server_args *srv_args = arg;
1181	struct sockaddr_in6 saddr = { 0 };
1182	socklen_t slen = sizeof(saddr);
1183	int sk, client_sk, ctl_fd, yes = 1, ret = -1;
1184
1185	close(srv_args->ctl[0]);
1186	ctl_fd = srv_args->ctl[1];
1187
1188	saddr.sin6_family = AF_INET6;
1189	saddr.sin6_addr = in6addr_any;
1190	saddr.sin6_port = htons(srv_args->port);
1191
1192	sk = socket(AF_INET6, SOCK_STREAM, 0);
1193	if (sk < 0)
1194		return ret;
1195
1196	if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
1197		goto cleanup;
1198
1199	if (bind(sk, (struct sockaddr *)&saddr, slen)) {
1200		write(ctl_fd, &errno, sizeof(errno));
1201		goto cleanup;
1202	}
1203
1204	if (listen(sk, 1))
1205		goto cleanup;
1206
1207	ret = 0;
1208	if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
1209		ret = -1;
1210		goto cleanup;
1211	}
1212
1213	client_sk = accept(sk, NULL, NULL);
1214	if (client_sk < 0)
1215		goto cleanup;
1216
1217	ret = -1;
1218	for (;;) {
1219		uint8_t buf[0x100000];
1220
1221		if (write(client_sk, buf, sizeof(buf)) <= 0) {
1222			if (errno == ECONNRESET)
1223				ret = 0;
1224			break;
1225		}
1226	}
1227
1228	close(client_sk);
1229
1230cleanup:
1231	close(sk);
1232	return ret;
1233}
1234
1235static int tcp_client(const char *cgroup, unsigned short port)
1236{
1237	const char server[] = "localhost";
1238	struct addrinfo *ai;
1239	char servport[6];
1240	int retries = 0x10; /* nice round number */
1241	int sk, ret;
1242	long allocated;
1243
1244	allocated = cg_read_long(cgroup, "memory.current");
1245	snprintf(servport, sizeof(servport), "%hd", port);
1246	ret = getaddrinfo(server, servport, NULL, &ai);
1247	if (ret)
1248		return ret;
1249
1250	sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
1251	if (sk < 0)
1252		goto free_ainfo;
1253
1254	ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
1255	if (ret < 0)
1256		goto close_sk;
1257
1258	ret = KSFT_FAIL;
1259	while (retries--) {
1260		uint8_t buf[0x100000];
1261		long current, sock;
1262
1263		if (read(sk, buf, sizeof(buf)) <= 0)
1264			goto close_sk;
1265
1266		current = cg_read_long(cgroup, "memory.current");
1267		sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
1268
1269		if (current < 0 || sock < 0)
1270			goto close_sk;
1271
1272		/* exclude the memory not related to socket connection */
1273		if (values_close(current - allocated, sock, 10)) {
1274			ret = KSFT_PASS;
1275			break;
1276		}
1277	}
1278
1279close_sk:
1280	close(sk);
1281free_ainfo:
1282	freeaddrinfo(ai);
1283	return ret;
1284}
1285
1286/*
1287 * This test checks socket memory accounting.
1288 * The test forks a TCP server listens on a random port between 1000
1289 * and 61000. Once it gets a client connection, it starts writing to
1290 * its socket.
1291 * The TCP client interleaves reads from the socket with check whether
1292 * memory.current and memory.stat.sock are similar.
1293 */
1294static int test_memcg_sock(const char *root)
1295{
1296	int bind_retries = 5, ret = KSFT_FAIL, pid, err;
1297	unsigned short port;
1298	char *memcg;
1299
1300	memcg = cg_name(root, "memcg_test");
1301	if (!memcg)
1302		goto cleanup;
1303
1304	if (cg_create(memcg))
1305		goto cleanup;
1306
1307	while (bind_retries--) {
1308		struct tcp_server_args args;
1309
1310		if (pipe(args.ctl))
1311			goto cleanup;
1312
1313		port = args.port = 1000 + rand() % 60000;
1314
1315		pid = cg_run_nowait(memcg, tcp_server, &args);
1316		if (pid < 0)
1317			goto cleanup;
1318
1319		close(args.ctl[1]);
1320		if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
1321			goto cleanup;
1322		close(args.ctl[0]);
1323
1324		if (!err)
1325			break;
1326		if (err != EADDRINUSE)
1327			goto cleanup;
1328
1329		waitpid(pid, NULL, 0);
1330	}
1331
1332	if (err == EADDRINUSE) {
1333		ret = KSFT_SKIP;
1334		goto cleanup;
1335	}
1336
1337	if (tcp_client(memcg, port) != KSFT_PASS)
1338		goto cleanup;
1339
1340	waitpid(pid, &err, 0);
1341	if (WEXITSTATUS(err))
1342		goto cleanup;
1343
1344	if (cg_read_long(memcg, "memory.current") < 0)
1345		goto cleanup;
1346
1347	if (cg_read_key_long(memcg, "memory.stat", "sock "))
1348		goto cleanup;
1349
1350	ret = KSFT_PASS;
1351
1352cleanup:
1353	cg_destroy(memcg);
1354	free(memcg);
1355
1356	return ret;
1357}
1358
1359/*
1360 * This test disables swapping and tries to allocate anonymous memory
1361 * up to OOM with memory.group.oom set. Then it checks that all
1362 * processes in the leaf were killed. It also checks that oom_events
1363 * were propagated to the parent level.
1364 */
1365static int test_memcg_oom_group_leaf_events(const char *root)
1366{
1367	int ret = KSFT_FAIL;
1368	char *parent, *child;
1369	long parent_oom_events;
1370
1371	parent = cg_name(root, "memcg_test_0");
1372	child = cg_name(root, "memcg_test_0/memcg_test_1");
1373
1374	if (!parent || !child)
1375		goto cleanup;
1376
1377	if (cg_create(parent))
1378		goto cleanup;
1379
1380	if (cg_create(child))
1381		goto cleanup;
1382
1383	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1384		goto cleanup;
1385
1386	if (cg_write(child, "memory.max", "50M"))
1387		goto cleanup;
1388
1389	if (cg_write(child, "memory.swap.max", "0"))
1390		goto cleanup;
1391
1392	if (cg_write(child, "memory.oom.group", "1"))
1393		goto cleanup;
1394
1395	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1396	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1397	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1398	if (!cg_run(child, alloc_anon, (void *)MB(100)))
1399		goto cleanup;
1400
1401	if (cg_test_proc_killed(child))
1402		goto cleanup;
1403
1404	if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1405		goto cleanup;
1406
1407	parent_oom_events = cg_read_key_long(
1408			parent, "memory.events", "oom_kill ");
1409	/*
1410	 * If memory_localevents is not enabled (the default), the parent should
1411	 * count OOM events in its children groups. Otherwise, it should not
1412	 * have observed any events.
1413	 */
1414	if (has_localevents && parent_oom_events != 0)
1415		goto cleanup;
1416	else if (!has_localevents && parent_oom_events <= 0)
1417		goto cleanup;
1418
1419	ret = KSFT_PASS;
1420
1421cleanup:
1422	if (child)
1423		cg_destroy(child);
1424	if (parent)
1425		cg_destroy(parent);
1426	free(child);
1427	free(parent);
1428
1429	return ret;
1430}
1431
1432/*
1433 * This test disables swapping and tries to allocate anonymous memory
1434 * up to OOM with memory.group.oom set. Then it checks that all
1435 * processes in the parent and leaf were killed.
1436 */
1437static int test_memcg_oom_group_parent_events(const char *root)
1438{
1439	int ret = KSFT_FAIL;
1440	char *parent, *child;
1441
1442	parent = cg_name(root, "memcg_test_0");
1443	child = cg_name(root, "memcg_test_0/memcg_test_1");
1444
1445	if (!parent || !child)
1446		goto cleanup;
1447
1448	if (cg_create(parent))
1449		goto cleanup;
1450
1451	if (cg_create(child))
1452		goto cleanup;
1453
1454	if (cg_write(parent, "memory.max", "80M"))
1455		goto cleanup;
1456
1457	if (cg_write(parent, "memory.swap.max", "0"))
1458		goto cleanup;
1459
1460	if (cg_write(parent, "memory.oom.group", "1"))
1461		goto cleanup;
1462
1463	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1464	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1465	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1466
1467	if (!cg_run(child, alloc_anon, (void *)MB(100)))
1468		goto cleanup;
1469
1470	if (cg_test_proc_killed(child))
1471		goto cleanup;
1472	if (cg_test_proc_killed(parent))
1473		goto cleanup;
1474
1475	ret = KSFT_PASS;
1476
1477cleanup:
1478	if (child)
1479		cg_destroy(child);
1480	if (parent)
1481		cg_destroy(parent);
1482	free(child);
1483	free(parent);
1484
1485	return ret;
1486}
1487
1488/*
1489 * This test disables swapping and tries to allocate anonymous memory
1490 * up to OOM with memory.group.oom set. Then it checks that all
1491 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1492 */
1493static int test_memcg_oom_group_score_events(const char *root)
1494{
1495	int ret = KSFT_FAIL;
1496	char *memcg;
1497	int safe_pid;
1498
1499	memcg = cg_name(root, "memcg_test_0");
1500
1501	if (!memcg)
1502		goto cleanup;
1503
1504	if (cg_create(memcg))
1505		goto cleanup;
1506
1507	if (cg_write(memcg, "memory.max", "50M"))
1508		goto cleanup;
1509
1510	if (cg_write(memcg, "memory.swap.max", "0"))
1511		goto cleanup;
1512
1513	if (cg_write(memcg, "memory.oom.group", "1"))
1514		goto cleanup;
1515
1516	safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1517	if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1518		goto cleanup;
1519
1520	cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1521	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1522		goto cleanup;
1523
1524	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1525		goto cleanup;
1526
1527	if (kill(safe_pid, SIGKILL))
1528		goto cleanup;
1529
1530	ret = KSFT_PASS;
1531
1532cleanup:
1533	if (memcg)
1534		cg_destroy(memcg);
1535	free(memcg);
1536
1537	return ret;
1538}
1539
1540#define T(x) { x, #x }
1541struct memcg_test {
1542	int (*fn)(const char *root);
1543	const char *name;
1544} tests[] = {
1545	T(test_memcg_subtree_control),
1546	T(test_memcg_current_peak),
1547	T(test_memcg_min),
1548	T(test_memcg_low),
1549	T(test_memcg_high),
1550	T(test_memcg_high_sync),
1551	T(test_memcg_max),
1552	T(test_memcg_reclaim),
1553	T(test_memcg_oom_events),
1554	T(test_memcg_swap_max_peak),
1555	T(test_memcg_sock),
1556	T(test_memcg_oom_group_leaf_events),
1557	T(test_memcg_oom_group_parent_events),
1558	T(test_memcg_oom_group_score_events),
1559};
1560#undef T
1561
1562int main(int argc, char **argv)
1563{
1564	char root[PATH_MAX];
1565	int i, proc_status, ret = EXIT_SUCCESS;
1566
1567	if (cg_find_unified_root(root, sizeof(root), NULL))
1568		ksft_exit_skip("cgroup v2 isn't mounted\n");
1569
1570	/*
1571	 * Check that memory controller is available:
1572	 * memory is listed in cgroup.controllers
1573	 */
1574	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1575		ksft_exit_skip("memory controller isn't available\n");
1576
1577	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
1578		if (cg_write(root, "cgroup.subtree_control", "+memory"))
1579			ksft_exit_skip("Failed to set memory controller\n");
1580
1581	proc_status = proc_mount_contains("memory_recursiveprot");
1582	if (proc_status < 0)
1583		ksft_exit_skip("Failed to query cgroup mount option\n");
1584	has_recursiveprot = proc_status;
1585
1586	proc_status = proc_mount_contains("memory_localevents");
1587	if (proc_status < 0)
1588		ksft_exit_skip("Failed to query cgroup mount option\n");
1589	has_localevents = proc_status;
1590
1591	for (i = 0; i < ARRAY_SIZE(tests); i++) {
1592		switch (tests[i].fn(root)) {
1593		case KSFT_PASS:
1594			ksft_test_result_pass("%s\n", tests[i].name);
1595			break;
1596		case KSFT_SKIP:
1597			ksft_test_result_skip("%s\n", tests[i].name);
1598			break;
1599		default:
1600			ret = EXIT_FAILURE;
1601			ksft_test_result_fail("%s\n", tests[i].name);
1602			break;
1603		}
1604	}
1605
1606	return ret;
1607}