1 /*
2  * Copyright (C) 2012-2017  Red Hat, Inc.
3  *
4  * This program is free software;  you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY;  without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12  * the GNU General Public License for more details.
13  *
14  * Description:
15  *
16  * The program is designed to test max_map_count tunable file
17  *
18  * The kernel Documentation say that:
19  * /proc/sys/vm/max_map_count contains the maximum number of memory map
20  * areas a process may have. Memory map areas are used as a side-effect
21  * of calling malloc, directly by mmap and mprotect, and also when
22  * loading shared libraries.
23  *
24  * Each process has his own maps file: /proc/[pid]/maps, and each line
25  * indicates a map entry, so it can caculate the amount of maps by reading
26  * the file lines' number to check the tunable performance.
27  *
28  * The program tries to invoke mmap() endlessly until it triggers MAP_FAILED,
29  * then reads the process's maps file /proc/[pid]/maps, save the line number to
30  * map_count variable, and compare it with /proc/sys/vm/max_map_count,
31  * map_count should be greater than max_map_count by 1;
32  *
33  * Note: On some architectures there is a special vma VSYSCALL, which
34  * is allocated without incrementing mm->map_count variable. On these
35  * architectures each /proc/<pid>/maps has at the end:
36  * ...
37  * ...
38  * ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0   [vsyscall]
39  *
40  * so we ignore this line during /proc/[pid]/maps reading.
41  */
42 
43 #define _GNU_SOURCE
44 #include <sys/wait.h>
45 #include <errno.h>
46 #include <fcntl.h>
47 #include <stdbool.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <sys/utsname.h>
51 #include "mem.h"
52 
53 #define MAP_COUNT_DEFAULT	1024
54 #define MAX_MAP_COUNT		65536L
55 
56 static long old_max_map_count = -1;
57 static long old_overcommit = -1;
58 static struct utsname un;
59 
setup(void)60 static void setup(void)
61 {
62 	if (access(PATH_SYSVM "max_map_count", F_OK) != 0)
63 		tst_brk(TBROK | TERRNO,
64 			 "Can't support to test max_map_count");
65 
66 	old_max_map_count = get_sys_tune("max_map_count");
67 	old_overcommit = get_sys_tune("overcommit_memory");
68 	set_sys_tune("overcommit_memory", 2, 1);
69 
70 	if (uname(&un) != 0)
71 		tst_brk(TBROK | TERRNO, "uname error");
72 }
73 
cleanup(void)74 static void cleanup(void)
75 {
76 	if (old_overcommit != -1)
77 		set_sys_tune("overcommit_memory", old_overcommit, 0);
78 	if (old_max_map_count != -1)
79 		set_sys_tune("max_map_count", old_max_map_count, 0);
80 }
81 
82 /* This is a filter to exclude map entries which aren't accounted
83  * for in the vm_area_struct's map_count.
84  */
filter_map(const char * line)85 static bool filter_map(const char *line)
86 {
87 	char buf[BUFSIZ];
88 	int ret;
89 
90 	ret = sscanf(line, "%*p-%*p %*4s %*p %*2d:%*2d %*d %s", buf);
91 	if (ret != 1)
92 		return false;
93 
94 #if defined(__x86_64__) || defined(__x86__)
95 	/* On x86, there's an old compat vsyscall page */
96 	if (!strcmp(buf, "[vsyscall]"))
97 		return true;
98 #elif defined(__ia64__)
99 	/* On ia64, the vdso is not a proper mapping */
100 	if (!strcmp(buf, "[vdso]"))
101 		return true;
102 #elif defined(__arm__)
103 	/* Skip it when run it in aarch64 */
104 	if ((!strcmp(un.machine, "aarch64"))
105 	|| (!strcmp(un.machine, "aarch64_be")))
106 		return false;
107 
108 	/* Older arm kernels didn't label their vdso maps */
109 	if (!strncmp(line, "ffff0000-ffff1000", 17))
110 		return true;
111 #endif
112 
113 	return false;
114 }
115 
count_maps(pid_t pid)116 static long count_maps(pid_t pid)
117 {
118 	FILE *fp;
119 	size_t len;
120 	char *line = NULL;
121 	char buf[BUFSIZ];
122 	long map_count = 0;
123 
124 	snprintf(buf, BUFSIZ, "/proc/%d/maps", pid);
125 	fp = fopen(buf, "r");
126 	if (fp == NULL)
127 		tst_brk(TBROK | TERRNO, "fopen %s", buf);
128 	while (getline(&line, &len, fp) != -1) {
129 		/* exclude vdso and vsyscall */
130 		if (filter_map(line))
131 			continue;
132 		map_count++;
133 	}
134 	fclose(fp);
135 
136 	return map_count;
137 }
138 
max_map_count_test(void)139 static void max_map_count_test(void)
140 {
141 	int status;
142 	pid_t pid;
143 	long max_maps;
144 	long map_count;
145 	long max_iters;
146 	long memfree;
147 
148 	/*
149 	 * XXX Due to a possible kernel bug, oom-killer can be easily
150 	 * triggered when doing small piece mmaps in huge amount even if
151 	 * enough free memory available. Also it has been observed that
152 	 * oom-killer often kill wrong victims in this situation, we
153 	 * decided to do following steps to make sure no oom happen:
154 	 * 1) use a safe maximum max_map_count value as upper-bound,
155 	 *    we set it 65536 in this case, i.e., we don't test too big
156 	 *    value;
157 	 * 2) make sure total mapping isn't larger tha
158 	 *        CommitLimit - Committed_AS
159 	 *    and set overcommit_memory to 2, this could help mapping
160 	 *    returns ENOMEM instead of triggering oom-killer when
161 	 *    memory is tight. (When there are enough free memory,
162 	 *    step 1) will be used first.
163 	 * Hope OOM-killer can be more stable oneday.
164 	 */
165 	memfree = SAFE_READ_MEMINFO("CommitLimit:") - SAFE_READ_MEMINFO("Committed_AS:");
166 	/* 64 used as a bias to make sure no overflow happen */
167 	max_iters = memfree / sysconf(_SC_PAGESIZE) * 1024 - 64;
168 	if (max_iters > MAX_MAP_COUNT)
169 		max_iters = MAX_MAP_COUNT;
170 
171 	max_maps = MAP_COUNT_DEFAULT;
172 	while (max_maps <= max_iters) {
173 		set_sys_tune("max_map_count", max_maps, 1);
174 
175 		switch (pid = SAFE_FORK()) {
176 		case 0:
177 			while (mmap(NULL, 1, PROT_READ,
178 				    MAP_SHARED | MAP_ANONYMOUS, -1, 0)
179 			       != MAP_FAILED) ;
180 			if (raise(SIGSTOP) != 0)
181 				tst_brk(TBROK | TERRNO, "raise");
182 			exit(0);
183 		default:
184 			break;
185 		}
186 		/* wait child done mmap and stop */
187 		SAFE_WAITPID(pid, &status, WUNTRACED);
188 		if (!WIFSTOPPED(status))
189 			tst_brk(TBROK, "child did not stopped");
190 
191 		map_count = count_maps(pid);
192 		/* Note max_maps will be exceeded by one for
193 		 * the sysctl setting of max_map_count. This
194 		 * is the mm failure point at the time of
195 		 * writing this COMMENT!
196 		*/
197 		if (map_count == (max_maps + 1))
198 			tst_res(TPASS, "%ld map entries in total "
199 				 "as expected.", max_maps);
200 		else
201 			tst_res(TFAIL, "%ld map entries in total, but "
202 				 "expected %ld entries", map_count, max_maps);
203 
204 		/* make child continue to exit */
205 		SAFE_KILL(pid, SIGCONT);
206 		SAFE_WAITPID(pid, &status, 0);
207 
208 		max_maps = max_maps << 1;
209 	}
210 }
211 
212 static struct tst_test test = {
213 	.needs_root = 1,
214 	.forks_child = 1,
215 	.setup = setup,
216 	.cleanup = cleanup,
217 	.test_all = max_map_count_test,
218 };
219