Linux Audio

Check our new training course

Linux BSP upgrade and security maintenance

Need help to get security updates for your Linux BSP?
Loading...
Note: File does not exist in v4.10.11.
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#define _GNU_SOURCE
   3
   4#include <linux/limits.h>
   5#include <linux/oom.h>
   6#include <fcntl.h>
   7#include <stdio.h>
   8#include <stdlib.h>
   9#include <string.h>
  10#include <sys/stat.h>
  11#include <sys/types.h>
  12#include <unistd.h>
  13#include <sys/socket.h>
  14#include <sys/wait.h>
  15#include <arpa/inet.h>
  16#include <netinet/in.h>
  17#include <netdb.h>
  18#include <errno.h>
  19#include <sys/mman.h>
  20
  21#include "../kselftest.h"
  22#include "cgroup_util.h"
  23
  24static bool has_localevents;
  25static bool has_recursiveprot;
  26
  27/*
  28 * This test creates two nested cgroups with and without enabling
  29 * the memory controller.
  30 */
  31static int test_memcg_subtree_control(const char *root)
  32{
  33	char *parent, *child, *parent2 = NULL, *child2 = NULL;
  34	int ret = KSFT_FAIL;
  35	char buf[PAGE_SIZE];
  36
  37	/* Create two nested cgroups with the memory controller enabled */
  38	parent = cg_name(root, "memcg_test_0");
  39	child = cg_name(root, "memcg_test_0/memcg_test_1");
  40	if (!parent || !child)
  41		goto cleanup_free;
  42
  43	if (cg_create(parent))
  44		goto cleanup_free;
  45
  46	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
  47		goto cleanup_parent;
  48
  49	if (cg_create(child))
  50		goto cleanup_parent;
  51
  52	if (cg_read_strstr(child, "cgroup.controllers", "memory"))
  53		goto cleanup_child;
  54
  55	/* Create two nested cgroups without enabling memory controller */
  56	parent2 = cg_name(root, "memcg_test_1");
  57	child2 = cg_name(root, "memcg_test_1/memcg_test_1");
  58	if (!parent2 || !child2)
  59		goto cleanup_free2;
  60
  61	if (cg_create(parent2))
  62		goto cleanup_free2;
  63
  64	if (cg_create(child2))
  65		goto cleanup_parent2;
  66
  67	if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
  68		goto cleanup_all;
  69
  70	if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
  71		goto cleanup_all;
  72
  73	ret = KSFT_PASS;
  74
  75cleanup_all:
  76	cg_destroy(child2);
  77cleanup_parent2:
  78	cg_destroy(parent2);
  79cleanup_free2:
  80	free(parent2);
  81	free(child2);
  82cleanup_child:
  83	cg_destroy(child);
  84cleanup_parent:
  85	cg_destroy(parent);
  86cleanup_free:
  87	free(parent);
  88	free(child);
  89
  90	return ret;
  91}
  92
  93static int alloc_anon_50M_check(const char *cgroup, void *arg)
  94{
  95	size_t size = MB(50);
  96	char *buf, *ptr;
  97	long anon, current;
  98	int ret = -1;
  99
 100	buf = malloc(size);
 101	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
 102		*ptr = 0;
 103
 104	current = cg_read_long(cgroup, "memory.current");
 105	if (current < size)
 106		goto cleanup;
 107
 108	if (!values_close(size, current, 3))
 109		goto cleanup;
 110
 111	anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
 112	if (anon < 0)
 113		goto cleanup;
 114
 115	if (!values_close(anon, current, 3))
 116		goto cleanup;
 117
 118	ret = 0;
 119cleanup:
 120	free(buf);
 121	return ret;
 122}
 123
 124static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
 125{
 126	size_t size = MB(50);
 127	int ret = -1;
 128	long current, file;
 129	int fd;
 130
 131	fd = get_temp_fd();
 132	if (fd < 0)
 133		return -1;
 134
 135	if (alloc_pagecache(fd, size))
 136		goto cleanup;
 137
 138	current = cg_read_long(cgroup, "memory.current");
 139	if (current < size)
 140		goto cleanup;
 141
 142	file = cg_read_key_long(cgroup, "memory.stat", "file ");
 143	if (file < 0)
 144		goto cleanup;
 145
 146	if (!values_close(file, current, 10))
 147		goto cleanup;
 148
 149	ret = 0;
 150
 151cleanup:
 152	close(fd);
 153	return ret;
 154}
 155
 156/*
 157 * This test create a memory cgroup, allocates
 158 * some anonymous memory and some pagecache
 159 * and check memory.current and some memory.stat values.
 160 */
 161static int test_memcg_current(const char *root)
 162{
 163	int ret = KSFT_FAIL;
 164	long current;
 165	char *memcg;
 166
 167	memcg = cg_name(root, "memcg_test");
 168	if (!memcg)
 169		goto cleanup;
 170
 171	if (cg_create(memcg))
 172		goto cleanup;
 173
 174	current = cg_read_long(memcg, "memory.current");
 175	if (current != 0)
 176		goto cleanup;
 177
 178	if (cg_run(memcg, alloc_anon_50M_check, NULL))
 179		goto cleanup;
 180
 181	if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
 182		goto cleanup;
 183
 184	ret = KSFT_PASS;
 185
 186cleanup:
 187	cg_destroy(memcg);
 188	free(memcg);
 189
 190	return ret;
 191}
 192
 193static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
 194{
 195	int fd = (long)arg;
 196	int ppid = getppid();
 197
 198	if (alloc_pagecache(fd, MB(50)))
 199		return -1;
 200
 201	while (getppid() == ppid)
 202		sleep(1);
 203
 204	return 0;
 205}
 206
 207static int alloc_anon_noexit(const char *cgroup, void *arg)
 208{
 209	int ppid = getppid();
 210	size_t size = (unsigned long)arg;
 211	char *buf, *ptr;
 212
 213	buf = malloc(size);
 214	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
 215		*ptr = 0;
 216
 217	while (getppid() == ppid)
 218		sleep(1);
 219
 220	free(buf);
 221	return 0;
 222}
 223
 224/*
 225 * Wait until processes are killed asynchronously by the OOM killer
 226 * If we exceed a timeout, fail.
 227 */
 228static int cg_test_proc_killed(const char *cgroup)
 229{
 230	int limit;
 231
 232	for (limit = 10; limit > 0; limit--) {
 233		if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
 234			return 0;
 235
 236		usleep(100000);
 237	}
 238	return -1;
 239}
 240
 241static bool reclaim_until(const char *memcg, long goal);
 242
 243/*
 244 * First, this test creates the following hierarchy:
 245 * A       memory.min = 0,    memory.max = 200M
 246 * A/B     memory.min = 50M
 247 * A/B/C   memory.min = 75M,  memory.current = 50M
 248 * A/B/D   memory.min = 25M,  memory.current = 50M
 249 * A/B/E   memory.min = 0,    memory.current = 50M
 250 * A/B/F   memory.min = 500M, memory.current = 0
 251 *
 252 * (or memory.low if we test soft protection)
 253 *
 254 * Usages are pagecache and the test keeps a running
 255 * process in every leaf cgroup.
 256 * Then it creates A/G and creates a significant
 257 * memory pressure in A.
 258 *
 259 * Then it checks actual memory usages and expects that:
 260 * A/B    memory.current ~= 50M
 261 * A/B/C  memory.current ~= 29M
 262 * A/B/D  memory.current ~= 21M
 263 * A/B/E  memory.current ~= 0
 264 * A/B/F  memory.current  = 0
 265 * (for origin of the numbers, see model in memcg_protection.m.)
 266 *
 267 * After that it tries to allocate more than there is
 268 * unprotected memory in A available, and checks that:
 269 * a) memory.min protects pagecache even in this case,
 270 * b) memory.low allows reclaiming page cache with low events.
 271 *
 272 * Then we try to reclaim from A/B/C using memory.reclaim until its
 273 * usage reaches 10M.
 274 * This makes sure that:
 275 * (a) We ignore the protection of the reclaim target memcg.
 276 * (b) The previously calculated emin value (~29M) should be dismissed.
 277 */
 278static int test_memcg_protection(const char *root, bool min)
 279{
 280	int ret = KSFT_FAIL, rc;
 281	char *parent[3] = {NULL};
 282	char *children[4] = {NULL};
 283	const char *attribute = min ? "memory.min" : "memory.low";
 284	long c[4];
 285	int i, attempts;
 286	int fd;
 287
 288	fd = get_temp_fd();
 289	if (fd < 0)
 290		goto cleanup;
 291
 292	parent[0] = cg_name(root, "memcg_test_0");
 293	if (!parent[0])
 294		goto cleanup;
 295
 296	parent[1] = cg_name(parent[0], "memcg_test_1");
 297	if (!parent[1])
 298		goto cleanup;
 299
 300	parent[2] = cg_name(parent[0], "memcg_test_2");
 301	if (!parent[2])
 302		goto cleanup;
 303
 304	if (cg_create(parent[0]))
 305		goto cleanup;
 306
 307	if (cg_read_long(parent[0], attribute)) {
 308		/* No memory.min on older kernels is fine */
 309		if (min)
 310			ret = KSFT_SKIP;
 311		goto cleanup;
 312	}
 313
 314	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
 315		goto cleanup;
 316
 317	if (cg_write(parent[0], "memory.max", "200M"))
 318		goto cleanup;
 319
 320	if (cg_write(parent[0], "memory.swap.max", "0"))
 321		goto cleanup;
 322
 323	if (cg_create(parent[1]))
 324		goto cleanup;
 325
 326	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
 327		goto cleanup;
 328
 329	if (cg_create(parent[2]))
 330		goto cleanup;
 331
 332	for (i = 0; i < ARRAY_SIZE(children); i++) {
 333		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
 334		if (!children[i])
 335			goto cleanup;
 336
 337		if (cg_create(children[i]))
 338			goto cleanup;
 339
 340		if (i > 2)
 341			continue;
 342
 343		cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
 344			      (void *)(long)fd);
 345	}
 346
 347	if (cg_write(parent[1],   attribute, "50M"))
 348		goto cleanup;
 349	if (cg_write(children[0], attribute, "75M"))
 350		goto cleanup;
 351	if (cg_write(children[1], attribute, "25M"))
 352		goto cleanup;
 353	if (cg_write(children[2], attribute, "0"))
 354		goto cleanup;
 355	if (cg_write(children[3], attribute, "500M"))
 356		goto cleanup;
 357
 358	attempts = 0;
 359	while (!values_close(cg_read_long(parent[1], "memory.current"),
 360			     MB(150), 3)) {
 361		if (attempts++ > 5)
 362			break;
 363		sleep(1);
 364	}
 365
 366	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
 367		goto cleanup;
 368
 369	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
 370		goto cleanup;
 371
 372	for (i = 0; i < ARRAY_SIZE(children); i++)
 373		c[i] = cg_read_long(children[i], "memory.current");
 374
 375	if (!values_close(c[0], MB(29), 10))
 376		goto cleanup;
 377
 378	if (!values_close(c[1], MB(21), 10))
 379		goto cleanup;
 380
 381	if (c[3] != 0)
 382		goto cleanup;
 383
 384	rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
 385	if (min && !rc)
 386		goto cleanup;
 387	else if (!min && rc) {
 388		fprintf(stderr,
 389			"memory.low prevents from allocating anon memory\n");
 390		goto cleanup;
 391	}
 392
 393	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
 394		goto cleanup;
 395
 396	if (!reclaim_until(children[0], MB(10)))
 397		goto cleanup;
 398
 399	if (min) {
 400		ret = KSFT_PASS;
 401		goto cleanup;
 402	}
 403
 404	for (i = 0; i < ARRAY_SIZE(children); i++) {
 405		int no_low_events_index = 1;
 406		long low, oom;
 407
 408		oom = cg_read_key_long(children[i], "memory.events", "oom ");
 409		low = cg_read_key_long(children[i], "memory.events", "low ");
 410
 411		if (oom)
 412			goto cleanup;
 413		if (i <= no_low_events_index && low <= 0)
 414			goto cleanup;
 415		if (i > no_low_events_index && low)
 416			goto cleanup;
 417
 418	}
 419
 420	ret = KSFT_PASS;
 421
 422cleanup:
 423	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
 424		if (!children[i])
 425			continue;
 426
 427		cg_destroy(children[i]);
 428		free(children[i]);
 429	}
 430
 431	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
 432		if (!parent[i])
 433			continue;
 434
 435		cg_destroy(parent[i]);
 436		free(parent[i]);
 437	}
 438	close(fd);
 439	return ret;
 440}
 441
 442static int test_memcg_min(const char *root)
 443{
 444	return test_memcg_protection(root, true);
 445}
 446
 447static int test_memcg_low(const char *root)
 448{
 449	return test_memcg_protection(root, false);
 450}
 451
 452static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
 453{
 454	size_t size = MB(50);
 455	int ret = -1;
 456	long current, high, max;
 457	int fd;
 458
 459	high = cg_read_long(cgroup, "memory.high");
 460	max = cg_read_long(cgroup, "memory.max");
 461	if (high != MB(30) && max != MB(30))
 462		return -1;
 463
 464	fd = get_temp_fd();
 465	if (fd < 0)
 466		return -1;
 467
 468	if (alloc_pagecache(fd, size))
 469		goto cleanup;
 470
 471	current = cg_read_long(cgroup, "memory.current");
 472	if (!values_close(current, MB(30), 5))
 473		goto cleanup;
 474
 475	ret = 0;
 476
 477cleanup:
 478	close(fd);
 479	return ret;
 480
 481}
 482
 483/*
 484 * This test checks that memory.high limits the amount of
 485 * memory which can be consumed by either anonymous memory
 486 * or pagecache.
 487 */
 488static int test_memcg_high(const char *root)
 489{
 490	int ret = KSFT_FAIL;
 491	char *memcg;
 492	long high;
 493
 494	memcg = cg_name(root, "memcg_test");
 495	if (!memcg)
 496		goto cleanup;
 497
 498	if (cg_create(memcg))
 499		goto cleanup;
 500
 501	if (cg_read_strcmp(memcg, "memory.high", "max\n"))
 502		goto cleanup;
 503
 504	if (cg_write(memcg, "memory.swap.max", "0"))
 505		goto cleanup;
 506
 507	if (cg_write(memcg, "memory.high", "30M"))
 508		goto cleanup;
 509
 510	if (cg_run(memcg, alloc_anon, (void *)MB(31)))
 511		goto cleanup;
 512
 513	if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
 514		goto cleanup;
 515
 516	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
 517		goto cleanup;
 518
 519	high = cg_read_key_long(memcg, "memory.events", "high ");
 520	if (high <= 0)
 521		goto cleanup;
 522
 523	ret = KSFT_PASS;
 524
 525cleanup:
 526	cg_destroy(memcg);
 527	free(memcg);
 528
 529	return ret;
 530}
 531
 532static int alloc_anon_mlock(const char *cgroup, void *arg)
 533{
 534	size_t size = (size_t)arg;
 535	void *buf;
 536
 537	buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
 538		   0, 0);
 539	if (buf == MAP_FAILED)
 540		return -1;
 541
 542	mlock(buf, size);
 543	munmap(buf, size);
 544	return 0;
 545}
 546
 547/*
 548 * This test checks that memory.high is able to throttle big single shot
 549 * allocation i.e. large allocation within one kernel entry.
 550 */
 551static int test_memcg_high_sync(const char *root)
 552{
 553	int ret = KSFT_FAIL, pid, fd = -1;
 554	char *memcg;
 555	long pre_high, pre_max;
 556	long post_high, post_max;
 557
 558	memcg = cg_name(root, "memcg_test");
 559	if (!memcg)
 560		goto cleanup;
 561
 562	if (cg_create(memcg))
 563		goto cleanup;
 564
 565	pre_high = cg_read_key_long(memcg, "memory.events", "high ");
 566	pre_max = cg_read_key_long(memcg, "memory.events", "max ");
 567	if (pre_high < 0 || pre_max < 0)
 568		goto cleanup;
 569
 570	if (cg_write(memcg, "memory.swap.max", "0"))
 571		goto cleanup;
 572
 573	if (cg_write(memcg, "memory.high", "30M"))
 574		goto cleanup;
 575
 576	if (cg_write(memcg, "memory.max", "140M"))
 577		goto cleanup;
 578
 579	fd = memcg_prepare_for_wait(memcg);
 580	if (fd < 0)
 581		goto cleanup;
 582
 583	pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
 584	if (pid < 0)
 585		goto cleanup;
 586
 587	cg_wait_for(fd);
 588
 589	post_high = cg_read_key_long(memcg, "memory.events", "high ");
 590	post_max = cg_read_key_long(memcg, "memory.events", "max ");
 591	if (post_high < 0 || post_max < 0)
 592		goto cleanup;
 593
 594	if (pre_high == post_high || pre_max != post_max)
 595		goto cleanup;
 596
 597	ret = KSFT_PASS;
 598
 599cleanup:
 600	if (fd >= 0)
 601		close(fd);
 602	cg_destroy(memcg);
 603	free(memcg);
 604
 605	return ret;
 606}
 607
 608/*
 609 * This test checks that memory.max limits the amount of
 610 * memory which can be consumed by either anonymous memory
 611 * or pagecache.
 612 */
 613static int test_memcg_max(const char *root)
 614{
 615	int ret = KSFT_FAIL;
 616	char *memcg;
 617	long current, max;
 618
 619	memcg = cg_name(root, "memcg_test");
 620	if (!memcg)
 621		goto cleanup;
 622
 623	if (cg_create(memcg))
 624		goto cleanup;
 625
 626	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
 627		goto cleanup;
 628
 629	if (cg_write(memcg, "memory.swap.max", "0"))
 630		goto cleanup;
 631
 632	if (cg_write(memcg, "memory.max", "30M"))
 633		goto cleanup;
 634
 635	/* Should be killed by OOM killer */
 636	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
 637		goto cleanup;
 638
 639	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
 640		goto cleanup;
 641
 642	current = cg_read_long(memcg, "memory.current");
 643	if (current > MB(30) || !current)
 644		goto cleanup;
 645
 646	max = cg_read_key_long(memcg, "memory.events", "max ");
 647	if (max <= 0)
 648		goto cleanup;
 649
 650	ret = KSFT_PASS;
 651
 652cleanup:
 653	cg_destroy(memcg);
 654	free(memcg);
 655
 656	return ret;
 657}
 658
 659/*
 660 * Reclaim from @memcg until usage reaches @goal by writing to
 661 * memory.reclaim.
 662 *
 663 * This function will return false if the usage is already below the
 664 * goal.
 665 *
 666 * This function assumes that writing to memory.reclaim is the only
 667 * source of change in memory.current (no concurrent allocations or
 668 * reclaim).
 669 *
 670 * This function makes sure memory.reclaim is sane. It will return
 671 * false if memory.reclaim's error codes do not make sense, even if
 672 * the usage goal was satisfied.
 673 */
 674static bool reclaim_until(const char *memcg, long goal)
 675{
 676	char buf[64];
 677	int retries, err;
 678	long current, to_reclaim;
 679	bool reclaimed = false;
 680
 681	for (retries = 5; retries > 0; retries--) {
 682		current = cg_read_long(memcg, "memory.current");
 683
 684		if (current < goal || values_close(current, goal, 3))
 685			break;
 686		/* Did memory.reclaim return 0 incorrectly? */
 687		else if (reclaimed)
 688			return false;
 689
 690		to_reclaim = current - goal;
 691		snprintf(buf, sizeof(buf), "%ld", to_reclaim);
 692		err = cg_write(memcg, "memory.reclaim", buf);
 693		if (!err)
 694			reclaimed = true;
 695		else if (err != -EAGAIN)
 696			return false;
 697	}
 698	return reclaimed;
 699}
 700
 701/*
 702 * This test checks that memory.reclaim reclaims the given
 703 * amount of memory (from both anon and file, if possible).
 704 */
 705static int test_memcg_reclaim(const char *root)
 706{
 707	int ret = KSFT_FAIL, fd, retries;
 708	char *memcg;
 709	long current, expected_usage;
 710
 711	memcg = cg_name(root, "memcg_test");
 712	if (!memcg)
 713		goto cleanup;
 714
 715	if (cg_create(memcg))
 716		goto cleanup;
 717
 718	current = cg_read_long(memcg, "memory.current");
 719	if (current != 0)
 720		goto cleanup;
 721
 722	fd = get_temp_fd();
 723	if (fd < 0)
 724		goto cleanup;
 725
 726	cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
 727
 728	/*
 729	 * If swap is enabled, try to reclaim from both anon and file, else try
 730	 * to reclaim from file only.
 731	 */
 732	if (is_swap_enabled()) {
 733		cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
 734		expected_usage = MB(100);
 735	} else
 736		expected_usage = MB(50);
 737
 738	/*
 739	 * Wait until current usage reaches the expected usage (or we run out of
 740	 * retries).
 741	 */
 742	retries = 5;
 743	while (!values_close(cg_read_long(memcg, "memory.current"),
 744			    expected_usage, 10)) {
 745		if (retries--) {
 746			sleep(1);
 747			continue;
 748		} else {
 749			fprintf(stderr,
 750				"failed to allocate %ld for memcg reclaim test\n",
 751				expected_usage);
 752			goto cleanup;
 753		}
 754	}
 755
 756	/*
 757	 * Reclaim until current reaches 30M, this makes sure we hit both anon
 758	 * and file if swap is enabled.
 759	 */
 760	if (!reclaim_until(memcg, MB(30)))
 761		goto cleanup;
 762
 763	ret = KSFT_PASS;
 764cleanup:
 765	cg_destroy(memcg);
 766	free(memcg);
 767	close(fd);
 768
 769	return ret;
 770}
 771
 772static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
 773{
 774	long mem_max = (long)arg;
 775	size_t size = MB(50);
 776	char *buf, *ptr;
 777	long mem_current, swap_current;
 778	int ret = -1;
 779
 780	buf = malloc(size);
 781	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
 782		*ptr = 0;
 783
 784	mem_current = cg_read_long(cgroup, "memory.current");
 785	if (!mem_current || !values_close(mem_current, mem_max, 3))
 786		goto cleanup;
 787
 788	swap_current = cg_read_long(cgroup, "memory.swap.current");
 789	if (!swap_current ||
 790	    !values_close(mem_current + swap_current, size, 3))
 791		goto cleanup;
 792
 793	ret = 0;
 794cleanup:
 795	free(buf);
 796	return ret;
 797}
 798
 799/*
 800 * This test checks that memory.swap.max limits the amount of
 801 * anonymous memory which can be swapped out.
 802 */
 803static int test_memcg_swap_max(const char *root)
 804{
 805	int ret = KSFT_FAIL;
 806	char *memcg;
 807	long max;
 808
 809	if (!is_swap_enabled())
 810		return KSFT_SKIP;
 811
 812	memcg = cg_name(root, "memcg_test");
 813	if (!memcg)
 814		goto cleanup;
 815
 816	if (cg_create(memcg))
 817		goto cleanup;
 818
 819	if (cg_read_long(memcg, "memory.swap.current")) {
 820		ret = KSFT_SKIP;
 821		goto cleanup;
 822	}
 823
 824	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
 825		goto cleanup;
 826
 827	if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
 828		goto cleanup;
 829
 830	if (cg_write(memcg, "memory.swap.max", "30M"))
 831		goto cleanup;
 832
 833	if (cg_write(memcg, "memory.max", "30M"))
 834		goto cleanup;
 835
 836	/* Should be killed by OOM killer */
 837	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
 838		goto cleanup;
 839
 840	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
 841		goto cleanup;
 842
 843	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
 844		goto cleanup;
 845
 846	if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
 847		goto cleanup;
 848
 849	max = cg_read_key_long(memcg, "memory.events", "max ");
 850	if (max <= 0)
 851		goto cleanup;
 852
 853	ret = KSFT_PASS;
 854
 855cleanup:
 856	cg_destroy(memcg);
 857	free(memcg);
 858
 859	return ret;
 860}
 861
 862/*
 863 * This test disables swapping and tries to allocate anonymous memory
 864 * up to OOM. Then it checks for oom and oom_kill events in
 865 * memory.events.
 866 */
 867static int test_memcg_oom_events(const char *root)
 868{
 869	int ret = KSFT_FAIL;
 870	char *memcg;
 871
 872	memcg = cg_name(root, "memcg_test");
 873	if (!memcg)
 874		goto cleanup;
 875
 876	if (cg_create(memcg))
 877		goto cleanup;
 878
 879	if (cg_write(memcg, "memory.max", "30M"))
 880		goto cleanup;
 881
 882	if (cg_write(memcg, "memory.swap.max", "0"))
 883		goto cleanup;
 884
 885	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
 886		goto cleanup;
 887
 888	if (cg_read_strcmp(memcg, "cgroup.procs", ""))
 889		goto cleanup;
 890
 891	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
 892		goto cleanup;
 893
 894	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
 895		goto cleanup;
 896
 897	ret = KSFT_PASS;
 898
 899cleanup:
 900	cg_destroy(memcg);
 901	free(memcg);
 902
 903	return ret;
 904}
 905
 906struct tcp_server_args {
 907	unsigned short port;
 908	int ctl[2];
 909};
 910
 911static int tcp_server(const char *cgroup, void *arg)
 912{
 913	struct tcp_server_args *srv_args = arg;
 914	struct sockaddr_in6 saddr = { 0 };
 915	socklen_t slen = sizeof(saddr);
 916	int sk, client_sk, ctl_fd, yes = 1, ret = -1;
 917
 918	close(srv_args->ctl[0]);
 919	ctl_fd = srv_args->ctl[1];
 920
 921	saddr.sin6_family = AF_INET6;
 922	saddr.sin6_addr = in6addr_any;
 923	saddr.sin6_port = htons(srv_args->port);
 924
 925	sk = socket(AF_INET6, SOCK_STREAM, 0);
 926	if (sk < 0)
 927		return ret;
 928
 929	if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
 930		goto cleanup;
 931
 932	if (bind(sk, (struct sockaddr *)&saddr, slen)) {
 933		write(ctl_fd, &errno, sizeof(errno));
 934		goto cleanup;
 935	}
 936
 937	if (listen(sk, 1))
 938		goto cleanup;
 939
 940	ret = 0;
 941	if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
 942		ret = -1;
 943		goto cleanup;
 944	}
 945
 946	client_sk = accept(sk, NULL, NULL);
 947	if (client_sk < 0)
 948		goto cleanup;
 949
 950	ret = -1;
 951	for (;;) {
 952		uint8_t buf[0x100000];
 953
 954		if (write(client_sk, buf, sizeof(buf)) <= 0) {
 955			if (errno == ECONNRESET)
 956				ret = 0;
 957			break;
 958		}
 959	}
 960
 961	close(client_sk);
 962
 963cleanup:
 964	close(sk);
 965	return ret;
 966}
 967
 968static int tcp_client(const char *cgroup, unsigned short port)
 969{
 970	const char server[] = "localhost";
 971	struct addrinfo *ai;
 972	char servport[6];
 973	int retries = 0x10; /* nice round number */
 974	int sk, ret;
 975
 976	snprintf(servport, sizeof(servport), "%hd", port);
 977	ret = getaddrinfo(server, servport, NULL, &ai);
 978	if (ret)
 979		return ret;
 980
 981	sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
 982	if (sk < 0)
 983		goto free_ainfo;
 984
 985	ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
 986	if (ret < 0)
 987		goto close_sk;
 988
 989	ret = KSFT_FAIL;
 990	while (retries--) {
 991		uint8_t buf[0x100000];
 992		long current, sock;
 993
 994		if (read(sk, buf, sizeof(buf)) <= 0)
 995			goto close_sk;
 996
 997		current = cg_read_long(cgroup, "memory.current");
 998		sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
 999
1000		if (current < 0 || sock < 0)
1001			goto close_sk;
1002
1003		if (values_close(current, sock, 10)) {
1004			ret = KSFT_PASS;
1005			break;
1006		}
1007	}
1008
1009close_sk:
1010	close(sk);
1011free_ainfo:
1012	freeaddrinfo(ai);
1013	return ret;
1014}
1015
1016/*
1017 * This test checks socket memory accounting.
1018 * The test forks a TCP server listens on a random port between 1000
1019 * and 61000. Once it gets a client connection, it starts writing to
1020 * its socket.
1021 * The TCP client interleaves reads from the socket with check whether
1022 * memory.current and memory.stat.sock are similar.
1023 */
1024static int test_memcg_sock(const char *root)
1025{
1026	int bind_retries = 5, ret = KSFT_FAIL, pid, err;
1027	unsigned short port;
1028	char *memcg;
1029
1030	memcg = cg_name(root, "memcg_test");
1031	if (!memcg)
1032		goto cleanup;
1033
1034	if (cg_create(memcg))
1035		goto cleanup;
1036
1037	while (bind_retries--) {
1038		struct tcp_server_args args;
1039
1040		if (pipe(args.ctl))
1041			goto cleanup;
1042
1043		port = args.port = 1000 + rand() % 60000;
1044
1045		pid = cg_run_nowait(memcg, tcp_server, &args);
1046		if (pid < 0)
1047			goto cleanup;
1048
1049		close(args.ctl[1]);
1050		if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
1051			goto cleanup;
1052		close(args.ctl[0]);
1053
1054		if (!err)
1055			break;
1056		if (err != EADDRINUSE)
1057			goto cleanup;
1058
1059		waitpid(pid, NULL, 0);
1060	}
1061
1062	if (err == EADDRINUSE) {
1063		ret = KSFT_SKIP;
1064		goto cleanup;
1065	}
1066
1067	if (tcp_client(memcg, port) != KSFT_PASS)
1068		goto cleanup;
1069
1070	waitpid(pid, &err, 0);
1071	if (WEXITSTATUS(err))
1072		goto cleanup;
1073
1074	if (cg_read_long(memcg, "memory.current") < 0)
1075		goto cleanup;
1076
1077	if (cg_read_key_long(memcg, "memory.stat", "sock "))
1078		goto cleanup;
1079
1080	ret = KSFT_PASS;
1081
1082cleanup:
1083	cg_destroy(memcg);
1084	free(memcg);
1085
1086	return ret;
1087}
1088
1089/*
1090 * This test disables swapping and tries to allocate anonymous memory
1091 * up to OOM with memory.group.oom set. Then it checks that all
1092 * processes in the leaf were killed. It also checks that oom_events
1093 * were propagated to the parent level.
1094 */
1095static int test_memcg_oom_group_leaf_events(const char *root)
1096{
1097	int ret = KSFT_FAIL;
1098	char *parent, *child;
1099	long parent_oom_events;
1100
1101	parent = cg_name(root, "memcg_test_0");
1102	child = cg_name(root, "memcg_test_0/memcg_test_1");
1103
1104	if (!parent || !child)
1105		goto cleanup;
1106
1107	if (cg_create(parent))
1108		goto cleanup;
1109
1110	if (cg_create(child))
1111		goto cleanup;
1112
1113	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1114		goto cleanup;
1115
1116	if (cg_write(child, "memory.max", "50M"))
1117		goto cleanup;
1118
1119	if (cg_write(child, "memory.swap.max", "0"))
1120		goto cleanup;
1121
1122	if (cg_write(child, "memory.oom.group", "1"))
1123		goto cleanup;
1124
1125	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1126	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1127	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1128	if (!cg_run(child, alloc_anon, (void *)MB(100)))
1129		goto cleanup;
1130
1131	if (cg_test_proc_killed(child))
1132		goto cleanup;
1133
1134	if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1135		goto cleanup;
1136
1137	parent_oom_events = cg_read_key_long(
1138			parent, "memory.events", "oom_kill ");
1139	/*
1140	 * If memory_localevents is not enabled (the default), the parent should
1141	 * count OOM events in its children groups. Otherwise, it should not
1142	 * have observed any events.
1143	 */
1144	if (has_localevents && parent_oom_events != 0)
1145		goto cleanup;
1146	else if (!has_localevents && parent_oom_events <= 0)
1147		goto cleanup;
1148
1149	ret = KSFT_PASS;
1150
1151cleanup:
1152	if (child)
1153		cg_destroy(child);
1154	if (parent)
1155		cg_destroy(parent);
1156	free(child);
1157	free(parent);
1158
1159	return ret;
1160}
1161
1162/*
1163 * This test disables swapping and tries to allocate anonymous memory
1164 * up to OOM with memory.group.oom set. Then it checks that all
1165 * processes in the parent and leaf were killed.
1166 */
1167static int test_memcg_oom_group_parent_events(const char *root)
1168{
1169	int ret = KSFT_FAIL;
1170	char *parent, *child;
1171
1172	parent = cg_name(root, "memcg_test_0");
1173	child = cg_name(root, "memcg_test_0/memcg_test_1");
1174
1175	if (!parent || !child)
1176		goto cleanup;
1177
1178	if (cg_create(parent))
1179		goto cleanup;
1180
1181	if (cg_create(child))
1182		goto cleanup;
1183
1184	if (cg_write(parent, "memory.max", "80M"))
1185		goto cleanup;
1186
1187	if (cg_write(parent, "memory.swap.max", "0"))
1188		goto cleanup;
1189
1190	if (cg_write(parent, "memory.oom.group", "1"))
1191		goto cleanup;
1192
1193	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1194	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1195	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1196
1197	if (!cg_run(child, alloc_anon, (void *)MB(100)))
1198		goto cleanup;
1199
1200	if (cg_test_proc_killed(child))
1201		goto cleanup;
1202	if (cg_test_proc_killed(parent))
1203		goto cleanup;
1204
1205	ret = KSFT_PASS;
1206
1207cleanup:
1208	if (child)
1209		cg_destroy(child);
1210	if (parent)
1211		cg_destroy(parent);
1212	free(child);
1213	free(parent);
1214
1215	return ret;
1216}
1217
1218/*
1219 * This test disables swapping and tries to allocate anonymous memory
1220 * up to OOM with memory.group.oom set. Then it checks that all
1221 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1222 */
1223static int test_memcg_oom_group_score_events(const char *root)
1224{
1225	int ret = KSFT_FAIL;
1226	char *memcg;
1227	int safe_pid;
1228
1229	memcg = cg_name(root, "memcg_test_0");
1230
1231	if (!memcg)
1232		goto cleanup;
1233
1234	if (cg_create(memcg))
1235		goto cleanup;
1236
1237	if (cg_write(memcg, "memory.max", "50M"))
1238		goto cleanup;
1239
1240	if (cg_write(memcg, "memory.swap.max", "0"))
1241		goto cleanup;
1242
1243	if (cg_write(memcg, "memory.oom.group", "1"))
1244		goto cleanup;
1245
1246	safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1247	if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1248		goto cleanup;
1249
1250	cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1251	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1252		goto cleanup;
1253
1254	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1255		goto cleanup;
1256
1257	if (kill(safe_pid, SIGKILL))
1258		goto cleanup;
1259
1260	ret = KSFT_PASS;
1261
1262cleanup:
1263	if (memcg)
1264		cg_destroy(memcg);
1265	free(memcg);
1266
1267	return ret;
1268}
1269
1270#define T(x) { x, #x }
1271struct memcg_test {
1272	int (*fn)(const char *root);
1273	const char *name;
1274} tests[] = {
1275	T(test_memcg_subtree_control),
1276	T(test_memcg_current),
1277	T(test_memcg_min),
1278	T(test_memcg_low),
1279	T(test_memcg_high),
1280	T(test_memcg_high_sync),
1281	T(test_memcg_max),
1282	T(test_memcg_reclaim),
1283	T(test_memcg_oom_events),
1284	T(test_memcg_swap_max),
1285	T(test_memcg_sock),
1286	T(test_memcg_oom_group_leaf_events),
1287	T(test_memcg_oom_group_parent_events),
1288	T(test_memcg_oom_group_score_events),
1289};
1290#undef T
1291
1292int main(int argc, char **argv)
1293{
1294	char root[PATH_MAX];
1295	int i, proc_status, ret = EXIT_SUCCESS;
1296
1297	if (cg_find_unified_root(root, sizeof(root)))
1298		ksft_exit_skip("cgroup v2 isn't mounted\n");
1299
1300	/*
1301	 * Check that memory controller is available:
1302	 * memory is listed in cgroup.controllers
1303	 */
1304	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1305		ksft_exit_skip("memory controller isn't available\n");
1306
1307	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
1308		if (cg_write(root, "cgroup.subtree_control", "+memory"))
1309			ksft_exit_skip("Failed to set memory controller\n");
1310
1311	proc_status = proc_mount_contains("memory_recursiveprot");
1312	if (proc_status < 0)
1313		ksft_exit_skip("Failed to query cgroup mount option\n");
1314	has_recursiveprot = proc_status;
1315
1316	proc_status = proc_mount_contains("memory_localevents");
1317	if (proc_status < 0)
1318		ksft_exit_skip("Failed to query cgroup mount option\n");
1319	has_localevents = proc_status;
1320
1321	for (i = 0; i < ARRAY_SIZE(tests); i++) {
1322		switch (tests[i].fn(root)) {
1323		case KSFT_PASS:
1324			ksft_test_result_pass("%s\n", tests[i].name);
1325			break;
1326		case KSFT_SKIP:
1327			ksft_test_result_skip("%s\n", tests[i].name);
1328			break;
1329		default:
1330			ret = EXIT_FAILURE;
1331			ksft_test_result_fail("%s\n", tests[i].name);
1332			break;
1333		}
1334	}
1335
1336	return ret;
1337}