Loading...
Note: File does not exist in v4.10.11.
1/* SPDX-License-Identifier: GPL-2.0 */
2#define _GNU_SOURCE
3
4#include <linux/limits.h>
5#include <linux/oom.h>
6#include <fcntl.h>
7#include <stdio.h>
8#include <stdlib.h>
9#include <string.h>
10#include <sys/stat.h>
11#include <sys/types.h>
12#include <unistd.h>
13#include <sys/socket.h>
14#include <sys/wait.h>
15#include <arpa/inet.h>
16#include <netinet/in.h>
17#include <netdb.h>
18#include <errno.h>
19#include <sys/mman.h>
20
21#include "../kselftest.h"
22#include "cgroup_util.h"
23
24static bool has_localevents;
25static bool has_recursiveprot;
26
27/*
28 * This test creates two nested cgroups with and without enabling
29 * the memory controller.
30 */
31static int test_memcg_subtree_control(const char *root)
32{
33 char *parent, *child, *parent2 = NULL, *child2 = NULL;
34 int ret = KSFT_FAIL;
35 char buf[PAGE_SIZE];
36
37 /* Create two nested cgroups with the memory controller enabled */
38 parent = cg_name(root, "memcg_test_0");
39 child = cg_name(root, "memcg_test_0/memcg_test_1");
40 if (!parent || !child)
41 goto cleanup_free;
42
43 if (cg_create(parent))
44 goto cleanup_free;
45
46 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
47 goto cleanup_parent;
48
49 if (cg_create(child))
50 goto cleanup_parent;
51
52 if (cg_read_strstr(child, "cgroup.controllers", "memory"))
53 goto cleanup_child;
54
55 /* Create two nested cgroups without enabling memory controller */
56 parent2 = cg_name(root, "memcg_test_1");
57 child2 = cg_name(root, "memcg_test_1/memcg_test_1");
58 if (!parent2 || !child2)
59 goto cleanup_free2;
60
61 if (cg_create(parent2))
62 goto cleanup_free2;
63
64 if (cg_create(child2))
65 goto cleanup_parent2;
66
67 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
68 goto cleanup_all;
69
70 if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
71 goto cleanup_all;
72
73 ret = KSFT_PASS;
74
75cleanup_all:
76 cg_destroy(child2);
77cleanup_parent2:
78 cg_destroy(parent2);
79cleanup_free2:
80 free(parent2);
81 free(child2);
82cleanup_child:
83 cg_destroy(child);
84cleanup_parent:
85 cg_destroy(parent);
86cleanup_free:
87 free(parent);
88 free(child);
89
90 return ret;
91}
92
93static int alloc_anon_50M_check(const char *cgroup, void *arg)
94{
95 size_t size = MB(50);
96 char *buf, *ptr;
97 long anon, current;
98 int ret = -1;
99
100 buf = malloc(size);
101 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
102 *ptr = 0;
103
104 current = cg_read_long(cgroup, "memory.current");
105 if (current < size)
106 goto cleanup;
107
108 if (!values_close(size, current, 3))
109 goto cleanup;
110
111 anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
112 if (anon < 0)
113 goto cleanup;
114
115 if (!values_close(anon, current, 3))
116 goto cleanup;
117
118 ret = 0;
119cleanup:
120 free(buf);
121 return ret;
122}
123
124static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
125{
126 size_t size = MB(50);
127 int ret = -1;
128 long current, file;
129 int fd;
130
131 fd = get_temp_fd();
132 if (fd < 0)
133 return -1;
134
135 if (alloc_pagecache(fd, size))
136 goto cleanup;
137
138 current = cg_read_long(cgroup, "memory.current");
139 if (current < size)
140 goto cleanup;
141
142 file = cg_read_key_long(cgroup, "memory.stat", "file ");
143 if (file < 0)
144 goto cleanup;
145
146 if (!values_close(file, current, 10))
147 goto cleanup;
148
149 ret = 0;
150
151cleanup:
152 close(fd);
153 return ret;
154}
155
156/*
157 * This test create a memory cgroup, allocates
158 * some anonymous memory and some pagecache
159 * and check memory.current and some memory.stat values.
160 */
161static int test_memcg_current(const char *root)
162{
163 int ret = KSFT_FAIL;
164 long current;
165 char *memcg;
166
167 memcg = cg_name(root, "memcg_test");
168 if (!memcg)
169 goto cleanup;
170
171 if (cg_create(memcg))
172 goto cleanup;
173
174 current = cg_read_long(memcg, "memory.current");
175 if (current != 0)
176 goto cleanup;
177
178 if (cg_run(memcg, alloc_anon_50M_check, NULL))
179 goto cleanup;
180
181 if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
182 goto cleanup;
183
184 ret = KSFT_PASS;
185
186cleanup:
187 cg_destroy(memcg);
188 free(memcg);
189
190 return ret;
191}
192
193static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
194{
195 int fd = (long)arg;
196 int ppid = getppid();
197
198 if (alloc_pagecache(fd, MB(50)))
199 return -1;
200
201 while (getppid() == ppid)
202 sleep(1);
203
204 return 0;
205}
206
207static int alloc_anon_noexit(const char *cgroup, void *arg)
208{
209 int ppid = getppid();
210 size_t size = (unsigned long)arg;
211 char *buf, *ptr;
212
213 buf = malloc(size);
214 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
215 *ptr = 0;
216
217 while (getppid() == ppid)
218 sleep(1);
219
220 free(buf);
221 return 0;
222}
223
224/*
225 * Wait until processes are killed asynchronously by the OOM killer
226 * If we exceed a timeout, fail.
227 */
228static int cg_test_proc_killed(const char *cgroup)
229{
230 int limit;
231
232 for (limit = 10; limit > 0; limit--) {
233 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
234 return 0;
235
236 usleep(100000);
237 }
238 return -1;
239}
240
241static bool reclaim_until(const char *memcg, long goal);
242
243/*
244 * First, this test creates the following hierarchy:
245 * A memory.min = 0, memory.max = 200M
246 * A/B memory.min = 50M
247 * A/B/C memory.min = 75M, memory.current = 50M
248 * A/B/D memory.min = 25M, memory.current = 50M
249 * A/B/E memory.min = 0, memory.current = 50M
250 * A/B/F memory.min = 500M, memory.current = 0
251 *
252 * (or memory.low if we test soft protection)
253 *
254 * Usages are pagecache and the test keeps a running
255 * process in every leaf cgroup.
256 * Then it creates A/G and creates a significant
257 * memory pressure in A.
258 *
259 * Then it checks actual memory usages and expects that:
260 * A/B memory.current ~= 50M
261 * A/B/C memory.current ~= 29M
262 * A/B/D memory.current ~= 21M
263 * A/B/E memory.current ~= 0
264 * A/B/F memory.current = 0
265 * (for origin of the numbers, see model in memcg_protection.m.)
266 *
267 * After that it tries to allocate more than there is
268 * unprotected memory in A available, and checks that:
269 * a) memory.min protects pagecache even in this case,
270 * b) memory.low allows reclaiming page cache with low events.
271 *
272 * Then we try to reclaim from A/B/C using memory.reclaim until its
273 * usage reaches 10M.
274 * This makes sure that:
275 * (a) We ignore the protection of the reclaim target memcg.
276 * (b) The previously calculated emin value (~29M) should be dismissed.
277 */
278static int test_memcg_protection(const char *root, bool min)
279{
280 int ret = KSFT_FAIL, rc;
281 char *parent[3] = {NULL};
282 char *children[4] = {NULL};
283 const char *attribute = min ? "memory.min" : "memory.low";
284 long c[4];
285 int i, attempts;
286 int fd;
287
288 fd = get_temp_fd();
289 if (fd < 0)
290 goto cleanup;
291
292 parent[0] = cg_name(root, "memcg_test_0");
293 if (!parent[0])
294 goto cleanup;
295
296 parent[1] = cg_name(parent[0], "memcg_test_1");
297 if (!parent[1])
298 goto cleanup;
299
300 parent[2] = cg_name(parent[0], "memcg_test_2");
301 if (!parent[2])
302 goto cleanup;
303
304 if (cg_create(parent[0]))
305 goto cleanup;
306
307 if (cg_read_long(parent[0], attribute)) {
308 /* No memory.min on older kernels is fine */
309 if (min)
310 ret = KSFT_SKIP;
311 goto cleanup;
312 }
313
314 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
315 goto cleanup;
316
317 if (cg_write(parent[0], "memory.max", "200M"))
318 goto cleanup;
319
320 if (cg_write(parent[0], "memory.swap.max", "0"))
321 goto cleanup;
322
323 if (cg_create(parent[1]))
324 goto cleanup;
325
326 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
327 goto cleanup;
328
329 if (cg_create(parent[2]))
330 goto cleanup;
331
332 for (i = 0; i < ARRAY_SIZE(children); i++) {
333 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
334 if (!children[i])
335 goto cleanup;
336
337 if (cg_create(children[i]))
338 goto cleanup;
339
340 if (i > 2)
341 continue;
342
343 cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
344 (void *)(long)fd);
345 }
346
347 if (cg_write(parent[1], attribute, "50M"))
348 goto cleanup;
349 if (cg_write(children[0], attribute, "75M"))
350 goto cleanup;
351 if (cg_write(children[1], attribute, "25M"))
352 goto cleanup;
353 if (cg_write(children[2], attribute, "0"))
354 goto cleanup;
355 if (cg_write(children[3], attribute, "500M"))
356 goto cleanup;
357
358 attempts = 0;
359 while (!values_close(cg_read_long(parent[1], "memory.current"),
360 MB(150), 3)) {
361 if (attempts++ > 5)
362 break;
363 sleep(1);
364 }
365
366 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
367 goto cleanup;
368
369 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
370 goto cleanup;
371
372 for (i = 0; i < ARRAY_SIZE(children); i++)
373 c[i] = cg_read_long(children[i], "memory.current");
374
375 if (!values_close(c[0], MB(29), 10))
376 goto cleanup;
377
378 if (!values_close(c[1], MB(21), 10))
379 goto cleanup;
380
381 if (c[3] != 0)
382 goto cleanup;
383
384 rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
385 if (min && !rc)
386 goto cleanup;
387 else if (!min && rc) {
388 fprintf(stderr,
389 "memory.low prevents from allocating anon memory\n");
390 goto cleanup;
391 }
392
393 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
394 goto cleanup;
395
396 if (!reclaim_until(children[0], MB(10)))
397 goto cleanup;
398
399 if (min) {
400 ret = KSFT_PASS;
401 goto cleanup;
402 }
403
404 for (i = 0; i < ARRAY_SIZE(children); i++) {
405 int no_low_events_index = 1;
406 long low, oom;
407
408 oom = cg_read_key_long(children[i], "memory.events", "oom ");
409 low = cg_read_key_long(children[i], "memory.events", "low ");
410
411 if (oom)
412 goto cleanup;
413 if (i <= no_low_events_index && low <= 0)
414 goto cleanup;
415 if (i > no_low_events_index && low)
416 goto cleanup;
417
418 }
419
420 ret = KSFT_PASS;
421
422cleanup:
423 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
424 if (!children[i])
425 continue;
426
427 cg_destroy(children[i]);
428 free(children[i]);
429 }
430
431 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
432 if (!parent[i])
433 continue;
434
435 cg_destroy(parent[i]);
436 free(parent[i]);
437 }
438 close(fd);
439 return ret;
440}
441
442static int test_memcg_min(const char *root)
443{
444 return test_memcg_protection(root, true);
445}
446
447static int test_memcg_low(const char *root)
448{
449 return test_memcg_protection(root, false);
450}
451
452static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
453{
454 size_t size = MB(50);
455 int ret = -1;
456 long current, high, max;
457 int fd;
458
459 high = cg_read_long(cgroup, "memory.high");
460 max = cg_read_long(cgroup, "memory.max");
461 if (high != MB(30) && max != MB(30))
462 return -1;
463
464 fd = get_temp_fd();
465 if (fd < 0)
466 return -1;
467
468 if (alloc_pagecache(fd, size))
469 goto cleanup;
470
471 current = cg_read_long(cgroup, "memory.current");
472 if (!values_close(current, MB(30), 5))
473 goto cleanup;
474
475 ret = 0;
476
477cleanup:
478 close(fd);
479 return ret;
480
481}
482
483/*
484 * This test checks that memory.high limits the amount of
485 * memory which can be consumed by either anonymous memory
486 * or pagecache.
487 */
488static int test_memcg_high(const char *root)
489{
490 int ret = KSFT_FAIL;
491 char *memcg;
492 long high;
493
494 memcg = cg_name(root, "memcg_test");
495 if (!memcg)
496 goto cleanup;
497
498 if (cg_create(memcg))
499 goto cleanup;
500
501 if (cg_read_strcmp(memcg, "memory.high", "max\n"))
502 goto cleanup;
503
504 if (cg_write(memcg, "memory.swap.max", "0"))
505 goto cleanup;
506
507 if (cg_write(memcg, "memory.high", "30M"))
508 goto cleanup;
509
510 if (cg_run(memcg, alloc_anon, (void *)MB(31)))
511 goto cleanup;
512
513 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
514 goto cleanup;
515
516 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
517 goto cleanup;
518
519 high = cg_read_key_long(memcg, "memory.events", "high ");
520 if (high <= 0)
521 goto cleanup;
522
523 ret = KSFT_PASS;
524
525cleanup:
526 cg_destroy(memcg);
527 free(memcg);
528
529 return ret;
530}
531
532static int alloc_anon_mlock(const char *cgroup, void *arg)
533{
534 size_t size = (size_t)arg;
535 void *buf;
536
537 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
538 0, 0);
539 if (buf == MAP_FAILED)
540 return -1;
541
542 mlock(buf, size);
543 munmap(buf, size);
544 return 0;
545}
546
547/*
548 * This test checks that memory.high is able to throttle big single shot
549 * allocation i.e. large allocation within one kernel entry.
550 */
551static int test_memcg_high_sync(const char *root)
552{
553 int ret = KSFT_FAIL, pid, fd = -1;
554 char *memcg;
555 long pre_high, pre_max;
556 long post_high, post_max;
557
558 memcg = cg_name(root, "memcg_test");
559 if (!memcg)
560 goto cleanup;
561
562 if (cg_create(memcg))
563 goto cleanup;
564
565 pre_high = cg_read_key_long(memcg, "memory.events", "high ");
566 pre_max = cg_read_key_long(memcg, "memory.events", "max ");
567 if (pre_high < 0 || pre_max < 0)
568 goto cleanup;
569
570 if (cg_write(memcg, "memory.swap.max", "0"))
571 goto cleanup;
572
573 if (cg_write(memcg, "memory.high", "30M"))
574 goto cleanup;
575
576 if (cg_write(memcg, "memory.max", "140M"))
577 goto cleanup;
578
579 fd = memcg_prepare_for_wait(memcg);
580 if (fd < 0)
581 goto cleanup;
582
583 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
584 if (pid < 0)
585 goto cleanup;
586
587 cg_wait_for(fd);
588
589 post_high = cg_read_key_long(memcg, "memory.events", "high ");
590 post_max = cg_read_key_long(memcg, "memory.events", "max ");
591 if (post_high < 0 || post_max < 0)
592 goto cleanup;
593
594 if (pre_high == post_high || pre_max != post_max)
595 goto cleanup;
596
597 ret = KSFT_PASS;
598
599cleanup:
600 if (fd >= 0)
601 close(fd);
602 cg_destroy(memcg);
603 free(memcg);
604
605 return ret;
606}
607
608/*
609 * This test checks that memory.max limits the amount of
610 * memory which can be consumed by either anonymous memory
611 * or pagecache.
612 */
613static int test_memcg_max(const char *root)
614{
615 int ret = KSFT_FAIL;
616 char *memcg;
617 long current, max;
618
619 memcg = cg_name(root, "memcg_test");
620 if (!memcg)
621 goto cleanup;
622
623 if (cg_create(memcg))
624 goto cleanup;
625
626 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
627 goto cleanup;
628
629 if (cg_write(memcg, "memory.swap.max", "0"))
630 goto cleanup;
631
632 if (cg_write(memcg, "memory.max", "30M"))
633 goto cleanup;
634
635 /* Should be killed by OOM killer */
636 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
637 goto cleanup;
638
639 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
640 goto cleanup;
641
642 current = cg_read_long(memcg, "memory.current");
643 if (current > MB(30) || !current)
644 goto cleanup;
645
646 max = cg_read_key_long(memcg, "memory.events", "max ");
647 if (max <= 0)
648 goto cleanup;
649
650 ret = KSFT_PASS;
651
652cleanup:
653 cg_destroy(memcg);
654 free(memcg);
655
656 return ret;
657}
658
659/*
660 * Reclaim from @memcg until usage reaches @goal by writing to
661 * memory.reclaim.
662 *
663 * This function will return false if the usage is already below the
664 * goal.
665 *
666 * This function assumes that writing to memory.reclaim is the only
667 * source of change in memory.current (no concurrent allocations or
668 * reclaim).
669 *
670 * This function makes sure memory.reclaim is sane. It will return
671 * false if memory.reclaim's error codes do not make sense, even if
672 * the usage goal was satisfied.
673 */
674static bool reclaim_until(const char *memcg, long goal)
675{
676 char buf[64];
677 int retries, err;
678 long current, to_reclaim;
679 bool reclaimed = false;
680
681 for (retries = 5; retries > 0; retries--) {
682 current = cg_read_long(memcg, "memory.current");
683
684 if (current < goal || values_close(current, goal, 3))
685 break;
686 /* Did memory.reclaim return 0 incorrectly? */
687 else if (reclaimed)
688 return false;
689
690 to_reclaim = current - goal;
691 snprintf(buf, sizeof(buf), "%ld", to_reclaim);
692 err = cg_write(memcg, "memory.reclaim", buf);
693 if (!err)
694 reclaimed = true;
695 else if (err != -EAGAIN)
696 return false;
697 }
698 return reclaimed;
699}
700
701/*
702 * This test checks that memory.reclaim reclaims the given
703 * amount of memory (from both anon and file, if possible).
704 */
705static int test_memcg_reclaim(const char *root)
706{
707 int ret = KSFT_FAIL, fd, retries;
708 char *memcg;
709 long current, expected_usage;
710
711 memcg = cg_name(root, "memcg_test");
712 if (!memcg)
713 goto cleanup;
714
715 if (cg_create(memcg))
716 goto cleanup;
717
718 current = cg_read_long(memcg, "memory.current");
719 if (current != 0)
720 goto cleanup;
721
722 fd = get_temp_fd();
723 if (fd < 0)
724 goto cleanup;
725
726 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
727
728 /*
729 * If swap is enabled, try to reclaim from both anon and file, else try
730 * to reclaim from file only.
731 */
732 if (is_swap_enabled()) {
733 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
734 expected_usage = MB(100);
735 } else
736 expected_usage = MB(50);
737
738 /*
739 * Wait until current usage reaches the expected usage (or we run out of
740 * retries).
741 */
742 retries = 5;
743 while (!values_close(cg_read_long(memcg, "memory.current"),
744 expected_usage, 10)) {
745 if (retries--) {
746 sleep(1);
747 continue;
748 } else {
749 fprintf(stderr,
750 "failed to allocate %ld for memcg reclaim test\n",
751 expected_usage);
752 goto cleanup;
753 }
754 }
755
756 /*
757 * Reclaim until current reaches 30M, this makes sure we hit both anon
758 * and file if swap is enabled.
759 */
760 if (!reclaim_until(memcg, MB(30)))
761 goto cleanup;
762
763 ret = KSFT_PASS;
764cleanup:
765 cg_destroy(memcg);
766 free(memcg);
767 close(fd);
768
769 return ret;
770}
771
772static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
773{
774 long mem_max = (long)arg;
775 size_t size = MB(50);
776 char *buf, *ptr;
777 long mem_current, swap_current;
778 int ret = -1;
779
780 buf = malloc(size);
781 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
782 *ptr = 0;
783
784 mem_current = cg_read_long(cgroup, "memory.current");
785 if (!mem_current || !values_close(mem_current, mem_max, 3))
786 goto cleanup;
787
788 swap_current = cg_read_long(cgroup, "memory.swap.current");
789 if (!swap_current ||
790 !values_close(mem_current + swap_current, size, 3))
791 goto cleanup;
792
793 ret = 0;
794cleanup:
795 free(buf);
796 return ret;
797}
798
799/*
800 * This test checks that memory.swap.max limits the amount of
801 * anonymous memory which can be swapped out.
802 */
803static int test_memcg_swap_max(const char *root)
804{
805 int ret = KSFT_FAIL;
806 char *memcg;
807 long max;
808
809 if (!is_swap_enabled())
810 return KSFT_SKIP;
811
812 memcg = cg_name(root, "memcg_test");
813 if (!memcg)
814 goto cleanup;
815
816 if (cg_create(memcg))
817 goto cleanup;
818
819 if (cg_read_long(memcg, "memory.swap.current")) {
820 ret = KSFT_SKIP;
821 goto cleanup;
822 }
823
824 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
825 goto cleanup;
826
827 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
828 goto cleanup;
829
830 if (cg_write(memcg, "memory.swap.max", "30M"))
831 goto cleanup;
832
833 if (cg_write(memcg, "memory.max", "30M"))
834 goto cleanup;
835
836 /* Should be killed by OOM killer */
837 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
838 goto cleanup;
839
840 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
841 goto cleanup;
842
843 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
844 goto cleanup;
845
846 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
847 goto cleanup;
848
849 max = cg_read_key_long(memcg, "memory.events", "max ");
850 if (max <= 0)
851 goto cleanup;
852
853 ret = KSFT_PASS;
854
855cleanup:
856 cg_destroy(memcg);
857 free(memcg);
858
859 return ret;
860}
861
862/*
863 * This test disables swapping and tries to allocate anonymous memory
864 * up to OOM. Then it checks for oom and oom_kill events in
865 * memory.events.
866 */
867static int test_memcg_oom_events(const char *root)
868{
869 int ret = KSFT_FAIL;
870 char *memcg;
871
872 memcg = cg_name(root, "memcg_test");
873 if (!memcg)
874 goto cleanup;
875
876 if (cg_create(memcg))
877 goto cleanup;
878
879 if (cg_write(memcg, "memory.max", "30M"))
880 goto cleanup;
881
882 if (cg_write(memcg, "memory.swap.max", "0"))
883 goto cleanup;
884
885 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
886 goto cleanup;
887
888 if (cg_read_strcmp(memcg, "cgroup.procs", ""))
889 goto cleanup;
890
891 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
892 goto cleanup;
893
894 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
895 goto cleanup;
896
897 ret = KSFT_PASS;
898
899cleanup:
900 cg_destroy(memcg);
901 free(memcg);
902
903 return ret;
904}
905
906struct tcp_server_args {
907 unsigned short port;
908 int ctl[2];
909};
910
911static int tcp_server(const char *cgroup, void *arg)
912{
913 struct tcp_server_args *srv_args = arg;
914 struct sockaddr_in6 saddr = { 0 };
915 socklen_t slen = sizeof(saddr);
916 int sk, client_sk, ctl_fd, yes = 1, ret = -1;
917
918 close(srv_args->ctl[0]);
919 ctl_fd = srv_args->ctl[1];
920
921 saddr.sin6_family = AF_INET6;
922 saddr.sin6_addr = in6addr_any;
923 saddr.sin6_port = htons(srv_args->port);
924
925 sk = socket(AF_INET6, SOCK_STREAM, 0);
926 if (sk < 0)
927 return ret;
928
929 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
930 goto cleanup;
931
932 if (bind(sk, (struct sockaddr *)&saddr, slen)) {
933 write(ctl_fd, &errno, sizeof(errno));
934 goto cleanup;
935 }
936
937 if (listen(sk, 1))
938 goto cleanup;
939
940 ret = 0;
941 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
942 ret = -1;
943 goto cleanup;
944 }
945
946 client_sk = accept(sk, NULL, NULL);
947 if (client_sk < 0)
948 goto cleanup;
949
950 ret = -1;
951 for (;;) {
952 uint8_t buf[0x100000];
953
954 if (write(client_sk, buf, sizeof(buf)) <= 0) {
955 if (errno == ECONNRESET)
956 ret = 0;
957 break;
958 }
959 }
960
961 close(client_sk);
962
963cleanup:
964 close(sk);
965 return ret;
966}
967
968static int tcp_client(const char *cgroup, unsigned short port)
969{
970 const char server[] = "localhost";
971 struct addrinfo *ai;
972 char servport[6];
973 int retries = 0x10; /* nice round number */
974 int sk, ret;
975
976 snprintf(servport, sizeof(servport), "%hd", port);
977 ret = getaddrinfo(server, servport, NULL, &ai);
978 if (ret)
979 return ret;
980
981 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
982 if (sk < 0)
983 goto free_ainfo;
984
985 ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
986 if (ret < 0)
987 goto close_sk;
988
989 ret = KSFT_FAIL;
990 while (retries--) {
991 uint8_t buf[0x100000];
992 long current, sock;
993
994 if (read(sk, buf, sizeof(buf)) <= 0)
995 goto close_sk;
996
997 current = cg_read_long(cgroup, "memory.current");
998 sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
999
1000 if (current < 0 || sock < 0)
1001 goto close_sk;
1002
1003 if (values_close(current, sock, 10)) {
1004 ret = KSFT_PASS;
1005 break;
1006 }
1007 }
1008
1009close_sk:
1010 close(sk);
1011free_ainfo:
1012 freeaddrinfo(ai);
1013 return ret;
1014}
1015
1016/*
1017 * This test checks socket memory accounting.
1018 * The test forks a TCP server listens on a random port between 1000
1019 * and 61000. Once it gets a client connection, it starts writing to
1020 * its socket.
1021 * The TCP client interleaves reads from the socket with check whether
1022 * memory.current and memory.stat.sock are similar.
1023 */
1024static int test_memcg_sock(const char *root)
1025{
1026 int bind_retries = 5, ret = KSFT_FAIL, pid, err;
1027 unsigned short port;
1028 char *memcg;
1029
1030 memcg = cg_name(root, "memcg_test");
1031 if (!memcg)
1032 goto cleanup;
1033
1034 if (cg_create(memcg))
1035 goto cleanup;
1036
1037 while (bind_retries--) {
1038 struct tcp_server_args args;
1039
1040 if (pipe(args.ctl))
1041 goto cleanup;
1042
1043 port = args.port = 1000 + rand() % 60000;
1044
1045 pid = cg_run_nowait(memcg, tcp_server, &args);
1046 if (pid < 0)
1047 goto cleanup;
1048
1049 close(args.ctl[1]);
1050 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
1051 goto cleanup;
1052 close(args.ctl[0]);
1053
1054 if (!err)
1055 break;
1056 if (err != EADDRINUSE)
1057 goto cleanup;
1058
1059 waitpid(pid, NULL, 0);
1060 }
1061
1062 if (err == EADDRINUSE) {
1063 ret = KSFT_SKIP;
1064 goto cleanup;
1065 }
1066
1067 if (tcp_client(memcg, port) != KSFT_PASS)
1068 goto cleanup;
1069
1070 waitpid(pid, &err, 0);
1071 if (WEXITSTATUS(err))
1072 goto cleanup;
1073
1074 if (cg_read_long(memcg, "memory.current") < 0)
1075 goto cleanup;
1076
1077 if (cg_read_key_long(memcg, "memory.stat", "sock "))
1078 goto cleanup;
1079
1080 ret = KSFT_PASS;
1081
1082cleanup:
1083 cg_destroy(memcg);
1084 free(memcg);
1085
1086 return ret;
1087}
1088
1089/*
1090 * This test disables swapping and tries to allocate anonymous memory
1091 * up to OOM with memory.group.oom set. Then it checks that all
1092 * processes in the leaf were killed. It also checks that oom_events
1093 * were propagated to the parent level.
1094 */
1095static int test_memcg_oom_group_leaf_events(const char *root)
1096{
1097 int ret = KSFT_FAIL;
1098 char *parent, *child;
1099 long parent_oom_events;
1100
1101 parent = cg_name(root, "memcg_test_0");
1102 child = cg_name(root, "memcg_test_0/memcg_test_1");
1103
1104 if (!parent || !child)
1105 goto cleanup;
1106
1107 if (cg_create(parent))
1108 goto cleanup;
1109
1110 if (cg_create(child))
1111 goto cleanup;
1112
1113 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1114 goto cleanup;
1115
1116 if (cg_write(child, "memory.max", "50M"))
1117 goto cleanup;
1118
1119 if (cg_write(child, "memory.swap.max", "0"))
1120 goto cleanup;
1121
1122 if (cg_write(child, "memory.oom.group", "1"))
1123 goto cleanup;
1124
1125 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1126 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1127 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1128 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1129 goto cleanup;
1130
1131 if (cg_test_proc_killed(child))
1132 goto cleanup;
1133
1134 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1135 goto cleanup;
1136
1137 parent_oom_events = cg_read_key_long(
1138 parent, "memory.events", "oom_kill ");
1139 /*
1140 * If memory_localevents is not enabled (the default), the parent should
1141 * count OOM events in its children groups. Otherwise, it should not
1142 * have observed any events.
1143 */
1144 if (has_localevents && parent_oom_events != 0)
1145 goto cleanup;
1146 else if (!has_localevents && parent_oom_events <= 0)
1147 goto cleanup;
1148
1149 ret = KSFT_PASS;
1150
1151cleanup:
1152 if (child)
1153 cg_destroy(child);
1154 if (parent)
1155 cg_destroy(parent);
1156 free(child);
1157 free(parent);
1158
1159 return ret;
1160}
1161
1162/*
1163 * This test disables swapping and tries to allocate anonymous memory
1164 * up to OOM with memory.group.oom set. Then it checks that all
1165 * processes in the parent and leaf were killed.
1166 */
1167static int test_memcg_oom_group_parent_events(const char *root)
1168{
1169 int ret = KSFT_FAIL;
1170 char *parent, *child;
1171
1172 parent = cg_name(root, "memcg_test_0");
1173 child = cg_name(root, "memcg_test_0/memcg_test_1");
1174
1175 if (!parent || !child)
1176 goto cleanup;
1177
1178 if (cg_create(parent))
1179 goto cleanup;
1180
1181 if (cg_create(child))
1182 goto cleanup;
1183
1184 if (cg_write(parent, "memory.max", "80M"))
1185 goto cleanup;
1186
1187 if (cg_write(parent, "memory.swap.max", "0"))
1188 goto cleanup;
1189
1190 if (cg_write(parent, "memory.oom.group", "1"))
1191 goto cleanup;
1192
1193 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1194 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1195 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1196
1197 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1198 goto cleanup;
1199
1200 if (cg_test_proc_killed(child))
1201 goto cleanup;
1202 if (cg_test_proc_killed(parent))
1203 goto cleanup;
1204
1205 ret = KSFT_PASS;
1206
1207cleanup:
1208 if (child)
1209 cg_destroy(child);
1210 if (parent)
1211 cg_destroy(parent);
1212 free(child);
1213 free(parent);
1214
1215 return ret;
1216}
1217
1218/*
1219 * This test disables swapping and tries to allocate anonymous memory
1220 * up to OOM with memory.group.oom set. Then it checks that all
1221 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1222 */
1223static int test_memcg_oom_group_score_events(const char *root)
1224{
1225 int ret = KSFT_FAIL;
1226 char *memcg;
1227 int safe_pid;
1228
1229 memcg = cg_name(root, "memcg_test_0");
1230
1231 if (!memcg)
1232 goto cleanup;
1233
1234 if (cg_create(memcg))
1235 goto cleanup;
1236
1237 if (cg_write(memcg, "memory.max", "50M"))
1238 goto cleanup;
1239
1240 if (cg_write(memcg, "memory.swap.max", "0"))
1241 goto cleanup;
1242
1243 if (cg_write(memcg, "memory.oom.group", "1"))
1244 goto cleanup;
1245
1246 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1247 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1248 goto cleanup;
1249
1250 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1251 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1252 goto cleanup;
1253
1254 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1255 goto cleanup;
1256
1257 if (kill(safe_pid, SIGKILL))
1258 goto cleanup;
1259
1260 ret = KSFT_PASS;
1261
1262cleanup:
1263 if (memcg)
1264 cg_destroy(memcg);
1265 free(memcg);
1266
1267 return ret;
1268}
1269
1270#define T(x) { x, #x }
1271struct memcg_test {
1272 int (*fn)(const char *root);
1273 const char *name;
1274} tests[] = {
1275 T(test_memcg_subtree_control),
1276 T(test_memcg_current),
1277 T(test_memcg_min),
1278 T(test_memcg_low),
1279 T(test_memcg_high),
1280 T(test_memcg_high_sync),
1281 T(test_memcg_max),
1282 T(test_memcg_reclaim),
1283 T(test_memcg_oom_events),
1284 T(test_memcg_swap_max),
1285 T(test_memcg_sock),
1286 T(test_memcg_oom_group_leaf_events),
1287 T(test_memcg_oom_group_parent_events),
1288 T(test_memcg_oom_group_score_events),
1289};
1290#undef T
1291
1292int main(int argc, char **argv)
1293{
1294 char root[PATH_MAX];
1295 int i, proc_status, ret = EXIT_SUCCESS;
1296
1297 if (cg_find_unified_root(root, sizeof(root)))
1298 ksft_exit_skip("cgroup v2 isn't mounted\n");
1299
1300 /*
1301 * Check that memory controller is available:
1302 * memory is listed in cgroup.controllers
1303 */
1304 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1305 ksft_exit_skip("memory controller isn't available\n");
1306
1307 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
1308 if (cg_write(root, "cgroup.subtree_control", "+memory"))
1309 ksft_exit_skip("Failed to set memory controller\n");
1310
1311 proc_status = proc_mount_contains("memory_recursiveprot");
1312 if (proc_status < 0)
1313 ksft_exit_skip("Failed to query cgroup mount option\n");
1314 has_recursiveprot = proc_status;
1315
1316 proc_status = proc_mount_contains("memory_localevents");
1317 if (proc_status < 0)
1318 ksft_exit_skip("Failed to query cgroup mount option\n");
1319 has_localevents = proc_status;
1320
1321 for (i = 0; i < ARRAY_SIZE(tests); i++) {
1322 switch (tests[i].fn(root)) {
1323 case KSFT_PASS:
1324 ksft_test_result_pass("%s\n", tests[i].name);
1325 break;
1326 case KSFT_SKIP:
1327 ksft_test_result_skip("%s\n", tests[i].name);
1328 break;
1329 default:
1330 ret = EXIT_FAILURE;
1331 ksft_test_result_fail("%s\n", tests[i].name);
1332 break;
1333 }
1334 }
1335
1336 return ret;
1337}