1 /* SPDX-License-Identifier: GPL-2.0 */
2 #define _GNU_SOURCE
3
4 #include <linux/limits.h>
5 #include <linux/oom.h>
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 #include <unistd.h>
13 #include <sys/socket.h>
14 #include <sys/wait.h>
15 #include <arpa/inet.h>
16 #include <netinet/in.h>
17 #include <netdb.h>
18 #include <errno.h>
19
20 #include "../kselftest.h"
21 #include "cgroup_util.h"
22
23 /*
24 * This test creates two nested cgroups with and without enabling
25 * the memory controller.
26 */
test_memcg_subtree_control(const char * root)27 static int test_memcg_subtree_control(const char *root)
28 {
29 char *parent, *child, *parent2, *child2;
30 int ret = KSFT_FAIL;
31 char buf[PAGE_SIZE];
32
33 /* Create two nested cgroups with the memory controller enabled */
34 parent = cg_name(root, "memcg_test_0");
35 child = cg_name(root, "memcg_test_0/memcg_test_1");
36 if (!parent || !child)
37 goto cleanup;
38
39 if (cg_create(parent))
40 goto cleanup;
41
42 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
43 goto cleanup;
44
45 if (cg_create(child))
46 goto cleanup;
47
48 if (cg_read_strstr(child, "cgroup.controllers", "memory"))
49 goto cleanup;
50
51 /* Create two nested cgroups without enabling memory controller */
52 parent2 = cg_name(root, "memcg_test_1");
53 child2 = cg_name(root, "memcg_test_1/memcg_test_1");
54 if (!parent2 || !child2)
55 goto cleanup;
56
57 if (cg_create(parent2))
58 goto cleanup;
59
60 if (cg_create(child2))
61 goto cleanup;
62
63 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
64 goto cleanup;
65
66 if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
67 goto cleanup;
68
69 ret = KSFT_PASS;
70
71 cleanup:
72 cg_destroy(child);
73 cg_destroy(parent);
74 free(parent);
75 free(child);
76
77 cg_destroy(child2);
78 cg_destroy(parent2);
79 free(parent2);
80 free(child2);
81
82 return ret;
83 }
84
alloc_anon_50M_check(const char * cgroup,void * arg)85 static int alloc_anon_50M_check(const char *cgroup, void *arg)
86 {
87 size_t size = MB(50);
88 char *buf, *ptr;
89 long anon, current;
90 int ret = -1;
91
92 buf = malloc(size);
93 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
94 *ptr = 0;
95
96 current = cg_read_long(cgroup, "memory.current");
97 if (current < size)
98 goto cleanup;
99
100 if (!values_close(size, current, 3))
101 goto cleanup;
102
103 anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
104 if (anon < 0)
105 goto cleanup;
106
107 if (!values_close(anon, current, 3))
108 goto cleanup;
109
110 ret = 0;
111 cleanup:
112 free(buf);
113 return ret;
114 }
115
alloc_pagecache_50M_check(const char * cgroup,void * arg)116 static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
117 {
118 size_t size = MB(50);
119 int ret = -1;
120 long current, file;
121 int fd;
122
123 fd = get_temp_fd();
124 if (fd < 0)
125 return -1;
126
127 if (alloc_pagecache(fd, size))
128 goto cleanup;
129
130 current = cg_read_long(cgroup, "memory.current");
131 if (current < size)
132 goto cleanup;
133
134 file = cg_read_key_long(cgroup, "memory.stat", "file ");
135 if (file < 0)
136 goto cleanup;
137
138 if (!values_close(file, current, 10))
139 goto cleanup;
140
141 ret = 0;
142
143 cleanup:
144 close(fd);
145 return ret;
146 }
147
148 /*
149 * This test create a memory cgroup, allocates
150 * some anonymous memory and some pagecache
151 * and check memory.current and some memory.stat values.
152 */
test_memcg_current(const char * root)153 static int test_memcg_current(const char *root)
154 {
155 int ret = KSFT_FAIL;
156 long current;
157 char *memcg;
158
159 memcg = cg_name(root, "memcg_test");
160 if (!memcg)
161 goto cleanup;
162
163 if (cg_create(memcg))
164 goto cleanup;
165
166 current = cg_read_long(memcg, "memory.current");
167 if (current != 0)
168 goto cleanup;
169
170 if (cg_run(memcg, alloc_anon_50M_check, NULL))
171 goto cleanup;
172
173 if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
174 goto cleanup;
175
176 ret = KSFT_PASS;
177
178 cleanup:
179 cg_destroy(memcg);
180 free(memcg);
181
182 return ret;
183 }
184
alloc_pagecache_50M(const char * cgroup,void * arg)185 static int alloc_pagecache_50M(const char *cgroup, void *arg)
186 {
187 int fd = (long)arg;
188
189 return alloc_pagecache(fd, MB(50));
190 }
191
alloc_pagecache_50M_noexit(const char * cgroup,void * arg)192 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
193 {
194 int fd = (long)arg;
195 int ppid = getppid();
196
197 if (alloc_pagecache(fd, MB(50)))
198 return -1;
199
200 while (getppid() == ppid)
201 sleep(1);
202
203 return 0;
204 }
205
alloc_anon_noexit(const char * cgroup,void * arg)206 static int alloc_anon_noexit(const char *cgroup, void *arg)
207 {
208 int ppid = getppid();
209
210 if (alloc_anon(cgroup, arg))
211 return -1;
212
213 while (getppid() == ppid)
214 sleep(1);
215
216 return 0;
217 }
218
219 /*
220 * Wait until processes are killed asynchronously by the OOM killer
221 * If we exceed a timeout, fail.
222 */
cg_test_proc_killed(const char * cgroup)223 static int cg_test_proc_killed(const char *cgroup)
224 {
225 int limit;
226
227 for (limit = 10; limit > 0; limit--) {
228 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
229 return 0;
230
231 usleep(100000);
232 }
233 return -1;
234 }
235
236 /*
237 * First, this test creates the following hierarchy:
238 * A memory.min = 50M, memory.max = 200M
239 * A/B memory.min = 50M, memory.current = 50M
240 * A/B/C memory.min = 75M, memory.current = 50M
241 * A/B/D memory.min = 25M, memory.current = 50M
242 * A/B/E memory.min = 500M, memory.current = 0
243 * A/B/F memory.min = 0, memory.current = 50M
244 *
245 * Usages are pagecache, but the test keeps a running
246 * process in every leaf cgroup.
247 * Then it creates A/G and creates a significant
248 * memory pressure in it.
249 *
250 * A/B memory.current ~= 50M
251 * A/B/C memory.current ~= 33M
252 * A/B/D memory.current ~= 17M
253 * A/B/E memory.current ~= 0
254 *
255 * After that it tries to allocate more than there is
256 * unprotected memory in A available, and checks
257 * checks that memory.min protects pagecache even
258 * in this case.
259 */
test_memcg_min(const char * root)260 static int test_memcg_min(const char *root)
261 {
262 int ret = KSFT_FAIL;
263 char *parent[3] = {NULL};
264 char *children[4] = {NULL};
265 long c[4];
266 int i, attempts;
267 int fd;
268
269 fd = get_temp_fd();
270 if (fd < 0)
271 goto cleanup;
272
273 parent[0] = cg_name(root, "memcg_test_0");
274 if (!parent[0])
275 goto cleanup;
276
277 parent[1] = cg_name(parent[0], "memcg_test_1");
278 if (!parent[1])
279 goto cleanup;
280
281 parent[2] = cg_name(parent[0], "memcg_test_2");
282 if (!parent[2])
283 goto cleanup;
284
285 if (cg_create(parent[0]))
286 goto cleanup;
287
288 if (cg_read_long(parent[0], "memory.min")) {
289 ret = KSFT_SKIP;
290 goto cleanup;
291 }
292
293 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
294 goto cleanup;
295
296 if (cg_write(parent[0], "memory.max", "200M"))
297 goto cleanup;
298
299 if (cg_write(parent[0], "memory.swap.max", "0"))
300 goto cleanup;
301
302 if (cg_create(parent[1]))
303 goto cleanup;
304
305 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
306 goto cleanup;
307
308 if (cg_create(parent[2]))
309 goto cleanup;
310
311 for (i = 0; i < ARRAY_SIZE(children); i++) {
312 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
313 if (!children[i])
314 goto cleanup;
315
316 if (cg_create(children[i]))
317 goto cleanup;
318
319 if (i == 2)
320 continue;
321
322 cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
323 (void *)(long)fd);
324 }
325
326 if (cg_write(parent[0], "memory.min", "50M"))
327 goto cleanup;
328 if (cg_write(parent[1], "memory.min", "50M"))
329 goto cleanup;
330 if (cg_write(children[0], "memory.min", "75M"))
331 goto cleanup;
332 if (cg_write(children[1], "memory.min", "25M"))
333 goto cleanup;
334 if (cg_write(children[2], "memory.min", "500M"))
335 goto cleanup;
336 if (cg_write(children[3], "memory.min", "0"))
337 goto cleanup;
338
339 attempts = 0;
340 while (!values_close(cg_read_long(parent[1], "memory.current"),
341 MB(150), 3)) {
342 if (attempts++ > 5)
343 break;
344 sleep(1);
345 }
346
347 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
348 goto cleanup;
349
350 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
351 goto cleanup;
352
353 for (i = 0; i < ARRAY_SIZE(children); i++)
354 c[i] = cg_read_long(children[i], "memory.current");
355
356 if (!values_close(c[0], MB(33), 10))
357 goto cleanup;
358
359 if (!values_close(c[1], MB(17), 10))
360 goto cleanup;
361
362 if (!values_close(c[2], 0, 1))
363 goto cleanup;
364
365 if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
366 goto cleanup;
367
368 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
369 goto cleanup;
370
371 ret = KSFT_PASS;
372
373 cleanup:
374 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
375 if (!children[i])
376 continue;
377
378 cg_destroy(children[i]);
379 free(children[i]);
380 }
381
382 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
383 if (!parent[i])
384 continue;
385
386 cg_destroy(parent[i]);
387 free(parent[i]);
388 }
389 close(fd);
390 return ret;
391 }
392
393 /*
394 * First, this test creates the following hierarchy:
395 * A memory.low = 50M, memory.max = 200M
396 * A/B memory.low = 50M, memory.current = 50M
397 * A/B/C memory.low = 75M, memory.current = 50M
398 * A/B/D memory.low = 25M, memory.current = 50M
399 * A/B/E memory.low = 500M, memory.current = 0
400 * A/B/F memory.low = 0, memory.current = 50M
401 *
402 * Usages are pagecache.
403 * Then it creates A/G an creates a significant
404 * memory pressure in it.
405 *
406 * Then it checks actual memory usages and expects that:
407 * A/B memory.current ~= 50M
408 * A/B/ memory.current ~= 33M
409 * A/B/D memory.current ~= 17M
410 * A/B/E memory.current ~= 0
411 *
412 * After that it tries to allocate more than there is
413 * unprotected memory in A available,
414 * and checks low and oom events in memory.events.
415 */
test_memcg_low(const char * root)416 static int test_memcg_low(const char *root)
417 {
418 int ret = KSFT_FAIL;
419 char *parent[3] = {NULL};
420 char *children[4] = {NULL};
421 long low, oom;
422 long c[4];
423 int i;
424 int fd;
425
426 fd = get_temp_fd();
427 if (fd < 0)
428 goto cleanup;
429
430 parent[0] = cg_name(root, "memcg_test_0");
431 if (!parent[0])
432 goto cleanup;
433
434 parent[1] = cg_name(parent[0], "memcg_test_1");
435 if (!parent[1])
436 goto cleanup;
437
438 parent[2] = cg_name(parent[0], "memcg_test_2");
439 if (!parent[2])
440 goto cleanup;
441
442 if (cg_create(parent[0]))
443 goto cleanup;
444
445 if (cg_read_long(parent[0], "memory.low"))
446 goto cleanup;
447
448 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
449 goto cleanup;
450
451 if (cg_write(parent[0], "memory.max", "200M"))
452 goto cleanup;
453
454 if (cg_write(parent[0], "memory.swap.max", "0"))
455 goto cleanup;
456
457 if (cg_create(parent[1]))
458 goto cleanup;
459
460 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
461 goto cleanup;
462
463 if (cg_create(parent[2]))
464 goto cleanup;
465
466 for (i = 0; i < ARRAY_SIZE(children); i++) {
467 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
468 if (!children[i])
469 goto cleanup;
470
471 if (cg_create(children[i]))
472 goto cleanup;
473
474 if (i == 2)
475 continue;
476
477 if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
478 goto cleanup;
479 }
480
481 if (cg_write(parent[0], "memory.low", "50M"))
482 goto cleanup;
483 if (cg_write(parent[1], "memory.low", "50M"))
484 goto cleanup;
485 if (cg_write(children[0], "memory.low", "75M"))
486 goto cleanup;
487 if (cg_write(children[1], "memory.low", "25M"))
488 goto cleanup;
489 if (cg_write(children[2], "memory.low", "500M"))
490 goto cleanup;
491 if (cg_write(children[3], "memory.low", "0"))
492 goto cleanup;
493
494 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
495 goto cleanup;
496
497 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
498 goto cleanup;
499
500 for (i = 0; i < ARRAY_SIZE(children); i++)
501 c[i] = cg_read_long(children[i], "memory.current");
502
503 if (!values_close(c[0], MB(33), 10))
504 goto cleanup;
505
506 if (!values_close(c[1], MB(17), 10))
507 goto cleanup;
508
509 if (!values_close(c[2], 0, 1))
510 goto cleanup;
511
512 if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
513 fprintf(stderr,
514 "memory.low prevents from allocating anon memory\n");
515 goto cleanup;
516 }
517
518 for (i = 0; i < ARRAY_SIZE(children); i++) {
519 oom = cg_read_key_long(children[i], "memory.events", "oom ");
520 low = cg_read_key_long(children[i], "memory.events", "low ");
521
522 if (oom)
523 goto cleanup;
524 if (i < 2 && low <= 0)
525 goto cleanup;
526 if (i >= 2 && low)
527 goto cleanup;
528 }
529
530 ret = KSFT_PASS;
531
532 cleanup:
533 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
534 if (!children[i])
535 continue;
536
537 cg_destroy(children[i]);
538 free(children[i]);
539 }
540
541 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
542 if (!parent[i])
543 continue;
544
545 cg_destroy(parent[i]);
546 free(parent[i]);
547 }
548 close(fd);
549 return ret;
550 }
551
alloc_pagecache_max_30M(const char * cgroup,void * arg)552 static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
553 {
554 size_t size = MB(50);
555 int ret = -1;
556 long current;
557 int fd;
558
559 fd = get_temp_fd();
560 if (fd < 0)
561 return -1;
562
563 if (alloc_pagecache(fd, size))
564 goto cleanup;
565
566 current = cg_read_long(cgroup, "memory.current");
567 if (current <= MB(29) || current > MB(30))
568 goto cleanup;
569
570 ret = 0;
571
572 cleanup:
573 close(fd);
574 return ret;
575
576 }
577
578 /*
579 * This test checks that memory.high limits the amount of
580 * memory which can be consumed by either anonymous memory
581 * or pagecache.
582 */
test_memcg_high(const char * root)583 static int test_memcg_high(const char *root)
584 {
585 int ret = KSFT_FAIL;
586 char *memcg;
587 long high;
588
589 memcg = cg_name(root, "memcg_test");
590 if (!memcg)
591 goto cleanup;
592
593 if (cg_create(memcg))
594 goto cleanup;
595
596 if (cg_read_strcmp(memcg, "memory.high", "max\n"))
597 goto cleanup;
598
599 if (cg_write(memcg, "memory.swap.max", "0"))
600 goto cleanup;
601
602 if (cg_write(memcg, "memory.high", "30M"))
603 goto cleanup;
604
605 if (cg_run(memcg, alloc_anon, (void *)MB(100)))
606 goto cleanup;
607
608 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
609 goto cleanup;
610
611 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
612 goto cleanup;
613
614 high = cg_read_key_long(memcg, "memory.events", "high ");
615 if (high <= 0)
616 goto cleanup;
617
618 ret = KSFT_PASS;
619
620 cleanup:
621 cg_destroy(memcg);
622 free(memcg);
623
624 return ret;
625 }
626
627 /*
628 * This test checks that memory.max limits the amount of
629 * memory which can be consumed by either anonymous memory
630 * or pagecache.
631 */
test_memcg_max(const char * root)632 static int test_memcg_max(const char *root)
633 {
634 int ret = KSFT_FAIL;
635 char *memcg;
636 long current, max;
637
638 memcg = cg_name(root, "memcg_test");
639 if (!memcg)
640 goto cleanup;
641
642 if (cg_create(memcg))
643 goto cleanup;
644
645 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
646 goto cleanup;
647
648 if (cg_write(memcg, "memory.swap.max", "0"))
649 goto cleanup;
650
651 if (cg_write(memcg, "memory.max", "30M"))
652 goto cleanup;
653
654 /* Should be killed by OOM killer */
655 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
656 goto cleanup;
657
658 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
659 goto cleanup;
660
661 current = cg_read_long(memcg, "memory.current");
662 if (current > MB(30) || !current)
663 goto cleanup;
664
665 max = cg_read_key_long(memcg, "memory.events", "max ");
666 if (max <= 0)
667 goto cleanup;
668
669 ret = KSFT_PASS;
670
671 cleanup:
672 cg_destroy(memcg);
673 free(memcg);
674
675 return ret;
676 }
677
alloc_anon_50M_check_swap(const char * cgroup,void * arg)678 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
679 {
680 long mem_max = (long)arg;
681 size_t size = MB(50);
682 char *buf, *ptr;
683 long mem_current, swap_current;
684 int ret = -1;
685
686 buf = malloc(size);
687 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
688 *ptr = 0;
689
690 mem_current = cg_read_long(cgroup, "memory.current");
691 if (!mem_current || !values_close(mem_current, mem_max, 3))
692 goto cleanup;
693
694 swap_current = cg_read_long(cgroup, "memory.swap.current");
695 if (!swap_current ||
696 !values_close(mem_current + swap_current, size, 3))
697 goto cleanup;
698
699 ret = 0;
700 cleanup:
701 free(buf);
702 return ret;
703 }
704
705 /*
706 * This test checks that memory.swap.max limits the amount of
707 * anonymous memory which can be swapped out.
708 */
test_memcg_swap_max(const char * root)709 static int test_memcg_swap_max(const char *root)
710 {
711 int ret = KSFT_FAIL;
712 char *memcg;
713 long max;
714
715 if (!is_swap_enabled())
716 return KSFT_SKIP;
717
718 memcg = cg_name(root, "memcg_test");
719 if (!memcg)
720 goto cleanup;
721
722 if (cg_create(memcg))
723 goto cleanup;
724
725 if (cg_read_long(memcg, "memory.swap.current")) {
726 ret = KSFT_SKIP;
727 goto cleanup;
728 }
729
730 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
731 goto cleanup;
732
733 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
734 goto cleanup;
735
736 if (cg_write(memcg, "memory.swap.max", "30M"))
737 goto cleanup;
738
739 if (cg_write(memcg, "memory.max", "30M"))
740 goto cleanup;
741
742 /* Should be killed by OOM killer */
743 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
744 goto cleanup;
745
746 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
747 goto cleanup;
748
749 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
750 goto cleanup;
751
752 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
753 goto cleanup;
754
755 max = cg_read_key_long(memcg, "memory.events", "max ");
756 if (max <= 0)
757 goto cleanup;
758
759 ret = KSFT_PASS;
760
761 cleanup:
762 cg_destroy(memcg);
763 free(memcg);
764
765 return ret;
766 }
767
768 /*
769 * This test disables swapping and tries to allocate anonymous memory
770 * up to OOM. Then it checks for oom and oom_kill events in
771 * memory.events.
772 */
test_memcg_oom_events(const char * root)773 static int test_memcg_oom_events(const char *root)
774 {
775 int ret = KSFT_FAIL;
776 char *memcg;
777
778 memcg = cg_name(root, "memcg_test");
779 if (!memcg)
780 goto cleanup;
781
782 if (cg_create(memcg))
783 goto cleanup;
784
785 if (cg_write(memcg, "memory.max", "30M"))
786 goto cleanup;
787
788 if (cg_write(memcg, "memory.swap.max", "0"))
789 goto cleanup;
790
791 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
792 goto cleanup;
793
794 if (cg_read_strcmp(memcg, "cgroup.procs", ""))
795 goto cleanup;
796
797 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
798 goto cleanup;
799
800 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
801 goto cleanup;
802
803 ret = KSFT_PASS;
804
805 cleanup:
806 cg_destroy(memcg);
807 free(memcg);
808
809 return ret;
810 }
811
812 struct tcp_server_args {
813 unsigned short port;
814 int ctl[2];
815 };
816
tcp_server(const char * cgroup,void * arg)817 static int tcp_server(const char *cgroup, void *arg)
818 {
819 struct tcp_server_args *srv_args = arg;
820 struct sockaddr_in6 saddr = { 0 };
821 socklen_t slen = sizeof(saddr);
822 int sk, client_sk, ctl_fd, yes = 1, ret = -1;
823
824 close(srv_args->ctl[0]);
825 ctl_fd = srv_args->ctl[1];
826
827 saddr.sin6_family = AF_INET6;
828 saddr.sin6_addr = in6addr_any;
829 saddr.sin6_port = htons(srv_args->port);
830
831 sk = socket(AF_INET6, SOCK_STREAM, 0);
832 if (sk < 0)
833 return ret;
834
835 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
836 goto cleanup;
837
838 if (bind(sk, (struct sockaddr *)&saddr, slen)) {
839 write(ctl_fd, &errno, sizeof(errno));
840 goto cleanup;
841 }
842
843 if (listen(sk, 1))
844 goto cleanup;
845
846 ret = 0;
847 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
848 ret = -1;
849 goto cleanup;
850 }
851
852 client_sk = accept(sk, NULL, NULL);
853 if (client_sk < 0)
854 goto cleanup;
855
856 ret = -1;
857 for (;;) {
858 uint8_t buf[0x100000];
859
860 if (write(client_sk, buf, sizeof(buf)) <= 0) {
861 if (errno == ECONNRESET)
862 ret = 0;
863 break;
864 }
865 }
866
867 close(client_sk);
868
869 cleanup:
870 close(sk);
871 return ret;
872 }
873
tcp_client(const char * cgroup,unsigned short port)874 static int tcp_client(const char *cgroup, unsigned short port)
875 {
876 const char server[] = "localhost";
877 struct addrinfo *ai;
878 char servport[6];
879 int retries = 0x10; /* nice round number */
880 int sk, ret;
881
882 snprintf(servport, sizeof(servport), "%hd", port);
883 ret = getaddrinfo(server, servport, NULL, &ai);
884 if (ret)
885 return ret;
886
887 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
888 if (sk < 0)
889 goto free_ainfo;
890
891 ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
892 if (ret < 0)
893 goto close_sk;
894
895 ret = KSFT_FAIL;
896 while (retries--) {
897 uint8_t buf[0x100000];
898 long current, sock;
899
900 if (read(sk, buf, sizeof(buf)) <= 0)
901 goto close_sk;
902
903 current = cg_read_long(cgroup, "memory.current");
904 sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
905
906 if (current < 0 || sock < 0)
907 goto close_sk;
908
909 if (current < sock)
910 goto close_sk;
911
912 if (values_close(current, sock, 10)) {
913 ret = KSFT_PASS;
914 break;
915 }
916 }
917
918 close_sk:
919 close(sk);
920 free_ainfo:
921 freeaddrinfo(ai);
922 return ret;
923 }
924
925 /*
926 * This test checks socket memory accounting.
927 * The test forks a TCP server listens on a random port between 1000
928 * and 61000. Once it gets a client connection, it starts writing to
929 * its socket.
930 * The TCP client interleaves reads from the socket with check whether
931 * memory.current and memory.stat.sock are similar.
932 */
test_memcg_sock(const char * root)933 static int test_memcg_sock(const char *root)
934 {
935 int bind_retries = 5, ret = KSFT_FAIL, pid, err;
936 unsigned short port;
937 char *memcg;
938
939 memcg = cg_name(root, "memcg_test");
940 if (!memcg)
941 goto cleanup;
942
943 if (cg_create(memcg))
944 goto cleanup;
945
946 while (bind_retries--) {
947 struct tcp_server_args args;
948
949 if (pipe(args.ctl))
950 goto cleanup;
951
952 port = args.port = 1000 + rand() % 60000;
953
954 pid = cg_run_nowait(memcg, tcp_server, &args);
955 if (pid < 0)
956 goto cleanup;
957
958 close(args.ctl[1]);
959 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
960 goto cleanup;
961 close(args.ctl[0]);
962
963 if (!err)
964 break;
965 if (err != EADDRINUSE)
966 goto cleanup;
967
968 waitpid(pid, NULL, 0);
969 }
970
971 if (err == EADDRINUSE) {
972 ret = KSFT_SKIP;
973 goto cleanup;
974 }
975
976 if (tcp_client(memcg, port) != KSFT_PASS)
977 goto cleanup;
978
979 waitpid(pid, &err, 0);
980 if (WEXITSTATUS(err))
981 goto cleanup;
982
983 if (cg_read_long(memcg, "memory.current") < 0)
984 goto cleanup;
985
986 if (cg_read_key_long(memcg, "memory.stat", "sock "))
987 goto cleanup;
988
989 ret = KSFT_PASS;
990
991 cleanup:
992 cg_destroy(memcg);
993 free(memcg);
994
995 return ret;
996 }
997
998 /*
999 * This test disables swapping and tries to allocate anonymous memory
1000 * up to OOM with memory.group.oom set. Then it checks that all
1001 * processes in the leaf (but not the parent) were killed.
1002 */
test_memcg_oom_group_leaf_events(const char * root)1003 static int test_memcg_oom_group_leaf_events(const char *root)
1004 {
1005 int ret = KSFT_FAIL;
1006 char *parent, *child;
1007
1008 parent = cg_name(root, "memcg_test_0");
1009 child = cg_name(root, "memcg_test_0/memcg_test_1");
1010
1011 if (!parent || !child)
1012 goto cleanup;
1013
1014 if (cg_create(parent))
1015 goto cleanup;
1016
1017 if (cg_create(child))
1018 goto cleanup;
1019
1020 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1021 goto cleanup;
1022
1023 if (cg_write(child, "memory.max", "50M"))
1024 goto cleanup;
1025
1026 if (cg_write(child, "memory.swap.max", "0"))
1027 goto cleanup;
1028
1029 if (cg_write(child, "memory.oom.group", "1"))
1030 goto cleanup;
1031
1032 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1033 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1034 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1035 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1036 goto cleanup;
1037
1038 if (cg_test_proc_killed(child))
1039 goto cleanup;
1040
1041 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1042 goto cleanup;
1043
1044 if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
1045 goto cleanup;
1046
1047 ret = KSFT_PASS;
1048
1049 cleanup:
1050 if (child)
1051 cg_destroy(child);
1052 if (parent)
1053 cg_destroy(parent);
1054 free(child);
1055 free(parent);
1056
1057 return ret;
1058 }
1059
1060 /*
1061 * This test disables swapping and tries to allocate anonymous memory
1062 * up to OOM with memory.group.oom set. Then it checks that all
1063 * processes in the parent and leaf were killed.
1064 */
test_memcg_oom_group_parent_events(const char * root)1065 static int test_memcg_oom_group_parent_events(const char *root)
1066 {
1067 int ret = KSFT_FAIL;
1068 char *parent, *child;
1069
1070 parent = cg_name(root, "memcg_test_0");
1071 child = cg_name(root, "memcg_test_0/memcg_test_1");
1072
1073 if (!parent || !child)
1074 goto cleanup;
1075
1076 if (cg_create(parent))
1077 goto cleanup;
1078
1079 if (cg_create(child))
1080 goto cleanup;
1081
1082 if (cg_write(parent, "memory.max", "80M"))
1083 goto cleanup;
1084
1085 if (cg_write(parent, "memory.swap.max", "0"))
1086 goto cleanup;
1087
1088 if (cg_write(parent, "memory.oom.group", "1"))
1089 goto cleanup;
1090
1091 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1092 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1093 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1094
1095 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1096 goto cleanup;
1097
1098 if (cg_test_proc_killed(child))
1099 goto cleanup;
1100 if (cg_test_proc_killed(parent))
1101 goto cleanup;
1102
1103 ret = KSFT_PASS;
1104
1105 cleanup:
1106 if (child)
1107 cg_destroy(child);
1108 if (parent)
1109 cg_destroy(parent);
1110 free(child);
1111 free(parent);
1112
1113 return ret;
1114 }
1115
1116 /*
1117 * This test disables swapping and tries to allocate anonymous memory
1118 * up to OOM with memory.group.oom set. Then it checks that all
1119 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1120 */
test_memcg_oom_group_score_events(const char * root)1121 static int test_memcg_oom_group_score_events(const char *root)
1122 {
1123 int ret = KSFT_FAIL;
1124 char *memcg;
1125 int safe_pid;
1126
1127 memcg = cg_name(root, "memcg_test_0");
1128
1129 if (!memcg)
1130 goto cleanup;
1131
1132 if (cg_create(memcg))
1133 goto cleanup;
1134
1135 if (cg_write(memcg, "memory.max", "50M"))
1136 goto cleanup;
1137
1138 if (cg_write(memcg, "memory.swap.max", "0"))
1139 goto cleanup;
1140
1141 if (cg_write(memcg, "memory.oom.group", "1"))
1142 goto cleanup;
1143
1144 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1145 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1146 goto cleanup;
1147
1148 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1149 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1150 goto cleanup;
1151
1152 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1153 goto cleanup;
1154
1155 if (kill(safe_pid, SIGKILL))
1156 goto cleanup;
1157
1158 ret = KSFT_PASS;
1159
1160 cleanup:
1161 if (memcg)
1162 cg_destroy(memcg);
1163 free(memcg);
1164
1165 return ret;
1166 }
1167
1168
1169 #define T(x) { x, #x }
1170 struct memcg_test {
1171 int (*fn)(const char *root);
1172 const char *name;
1173 } tests[] = {
1174 T(test_memcg_subtree_control),
1175 T(test_memcg_current),
1176 T(test_memcg_min),
1177 T(test_memcg_low),
1178 T(test_memcg_high),
1179 T(test_memcg_max),
1180 T(test_memcg_oom_events),
1181 T(test_memcg_swap_max),
1182 T(test_memcg_sock),
1183 T(test_memcg_oom_group_leaf_events),
1184 T(test_memcg_oom_group_parent_events),
1185 T(test_memcg_oom_group_score_events),
1186 };
1187 #undef T
1188
main(int argc,char ** argv)1189 int main(int argc, char **argv)
1190 {
1191 char root[PATH_MAX];
1192 int i, ret = EXIT_SUCCESS;
1193
1194 if (cg_find_unified_root(root, sizeof(root)))
1195 ksft_exit_skip("cgroup v2 isn't mounted\n");
1196
1197 /*
1198 * Check that memory controller is available:
1199 * memory is listed in cgroup.controllers
1200 */
1201 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1202 ksft_exit_skip("memory controller isn't available\n");
1203
1204 for (i = 0; i < ARRAY_SIZE(tests); i++) {
1205 switch (tests[i].fn(root)) {
1206 case KSFT_PASS:
1207 ksft_test_result_pass("%s\n", tests[i].name);
1208 break;
1209 case KSFT_SKIP:
1210 ksft_test_result_skip("%s\n", tests[i].name);
1211 break;
1212 default:
1213 ret = EXIT_FAILURE;
1214 ksft_test_result_fail("%s\n", tests[i].name);
1215 break;
1216 }
1217 }
1218
1219 return ret;
1220 }
1221