1 #include "config.h"
2 #include <sys/types.h>
3 #include <sys/mman.h>
4 #include <sys/mount.h>
5 #include <sys/stat.h>
6 #include <sys/wait.h>
7 #include <errno.h>
8 #include <fcntl.h>
9 #if HAVE_NUMA_H
10 #include <numa.h>
11 #endif
12 #if HAVE_NUMAIF_H
13 #include <numaif.h>
14 #endif
15 #include <pthread.h>
16 #include <stdarg.h>
17 #include <stdio.h>
18 #include <string.h>
19 #include <unistd.h>
20 
21 #include "test.h"
22 #include "safe_macros.h"
23 #include "mem.h"
24 #include "numa_helper.h"
25 
26 /* OOM */
27 
alloc_mem(long int length,int testcase)28 static int alloc_mem(long int length, int testcase)
29 {
30 	char *s;
31 	long i, pagesz = getpagesize();
32 	int loop = 10;
33 
34 	tst_resm(TINFO, "thread (%lx), allocating %ld bytes.",
35 		(unsigned long) pthread_self(), length);
36 
37 	s = mmap(NULL, length, PROT_READ | PROT_WRITE,
38 		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
39 	if (s == MAP_FAILED)
40 		return errno;
41 
42 	if (testcase == MLOCK) {
43 		while (mlock(s, length) == -1 && loop > 0) {
44 			if (EAGAIN != errno)
45 				return errno;
46 			usleep(300000);
47 			loop--;
48 		}
49 	}
50 
51 #ifdef HAVE_MADV_MERGEABLE
52 	if (testcase == KSM && madvise(s, length, MADV_MERGEABLE) == -1)
53 		return errno;
54 #endif
55 	for (i = 0; i < length; i += pagesz)
56 		s[i] = '\a';
57 
58 	return 0;
59 }
60 
child_alloc_thread(void * args)61 static void *child_alloc_thread(void *args)
62 {
63 	int ret = 0;
64 
65 	/* keep allocating until there's an error */
66 	while (!ret)
67 		ret = alloc_mem(LENGTH, (long)args);
68 	exit(ret);
69 }
70 
child_alloc(int testcase,int lite,int threads)71 static void child_alloc(int testcase, int lite, int threads)
72 {
73 	int i;
74 	pthread_t *th;
75 
76 	if (lite) {
77 		int ret = alloc_mem(TESTMEM + MB, testcase);
78 		exit(ret);
79 	}
80 
81 	th = malloc(sizeof(pthread_t) * threads);
82 	if (!th) {
83 		tst_resm(TINFO | TERRNO, "malloc");
84 		goto out;
85 	}
86 
87 	for (i = 0; i < threads; i++) {
88 		TEST(pthread_create(&th[i], NULL, child_alloc_thread,
89 			(void *)((long)testcase)));
90 		if (TEST_RETURN) {
91 			tst_resm(TINFO | TRERRNO, "pthread_create");
92 			/*
93 			 * Keep going if thread other than first fails to
94 			 * spawn due to lack of resources.
95 			 */
96 			if (i == 0 || TEST_RETURN != EAGAIN)
97 				goto out;
98 		}
99 	}
100 
101 	/* wait for one of threads to exit whole process */
102 	while (1)
103 		sleep(1);
104 out:
105 	exit(1);
106 }
107 
108 /*
109  * oom - allocates memory according to specified testcase and checks
110  *       desired outcome (e.g. child killed, operation failed with ENOMEM)
111  * @testcase: selects how child allocates memory
112  *            valid choices are: NORMAL, MLOCK and KSM
113  * @lite: if non-zero, child makes only single TESTMEM+MB allocation
114  *        if zero, child keeps allocating memory until it gets killed
115  *        or some operation fails
116  * @retcode: expected return code of child process
117  *           if matches child ret code, this function reports PASS,
118  *           otherwise it reports FAIL
119  * @allow_sigkill: if zero and child is killed, this function reports FAIL
120  *                 if non-zero, then if child is killed by SIGKILL
121  *                 it is considered as PASS
122  */
oom(int testcase,int lite,int retcode,int allow_sigkill)123 void oom(int testcase, int lite, int retcode, int allow_sigkill)
124 {
125 	pid_t pid;
126 	int status, threads;
127 
128 	switch (pid = fork()) {
129 	case -1:
130 		if (errno == retcode) {
131 			tst_resm(TPASS | TERRNO, "fork");
132 			return;
133 		}
134 		tst_brkm(TBROK | TERRNO, cleanup, "fork");
135 	case 0:
136 		threads = MAX(1, tst_ncpus() - 1);
137 		child_alloc(testcase, lite, threads);
138 	default:
139 		break;
140 	}
141 
142 	tst_resm(TINFO, "expected victim is %d.", pid);
143 	if (waitpid(-1, &status, 0) == -1)
144 		tst_brkm(TBROK | TERRNO, cleanup, "waitpid");
145 
146 	if (WIFSIGNALED(status)) {
147 		if (allow_sigkill && WTERMSIG(status) == SIGKILL) {
148 			tst_resm(TPASS, "victim signalled: (%d) %s",
149 				SIGKILL,
150 				tst_strsig(SIGKILL));
151 		} else {
152 			tst_resm(TFAIL, "victim signalled: (%d) %s",
153 				WTERMSIG(status),
154 				tst_strsig(WTERMSIG(status)));
155 		}
156 	} else if (WIFEXITED(status)) {
157 		if (WEXITSTATUS(status) == retcode) {
158 			tst_resm(TPASS, "victim retcode: (%d) %s",
159 				retcode, strerror(retcode));
160 		} else {
161 			tst_resm(TFAIL, "victim unexpectedly ended with "
162 				"retcode: %d, expected: %d",
163 				WEXITSTATUS(status), retcode);
164 		}
165 	} else {
166 		tst_resm(TFAIL, "victim unexpectedly ended");
167 	}
168 }
169 
set_global_mempolicy(int mempolicy)170 static void set_global_mempolicy(int mempolicy)
171 {
172 #if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \
173 	&& HAVE_MPOL_CONSTANTS
174 	unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
175 	int num_nodes, *nodes;
176 	int ret;
177 
178 	if (mempolicy) {
179 		ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
180 		if (ret != 0)
181 			tst_brkm(TBROK|TERRNO, cleanup,
182 				 "get_allowed_nodes_arr");
183 		if (num_nodes < 2) {
184 			tst_resm(TINFO, "mempolicy need NUMA system support");
185 			free(nodes);
186 			return;
187 		}
188 		switch(mempolicy) {
189 		case MPOL_BIND:
190 			/* bind the second node */
191 			set_node(nmask, nodes[1]);
192 			break;
193 		case MPOL_INTERLEAVE:
194 		case MPOL_PREFERRED:
195 			if (num_nodes == 2) {
196 				tst_resm(TINFO, "The mempolicy need "
197 					 "more than 2 numa nodes");
198 				free(nodes);
199 				return;
200 			} else {
201 				/* Using the 2nd,3rd node */
202 				set_node(nmask, nodes[1]);
203 				set_node(nmask, nodes[2]);
204 			}
205 			break;
206 		default:
207 			tst_brkm(TBROK|TERRNO, cleanup, "Bad mempolicy mode");
208 		}
209 		if (set_mempolicy(mempolicy, nmask, MAXNODES) == -1)
210 			tst_brkm(TBROK|TERRNO, cleanup, "set_mempolicy");
211 	}
212 #endif
213 }
214 
testoom(int mempolicy,int lite,int retcode,int allow_sigkill)215 void testoom(int mempolicy, int lite, int retcode, int allow_sigkill)
216 {
217 	int ksm_run_orig;
218 
219 	set_global_mempolicy(mempolicy);
220 
221 	tst_resm(TINFO, "start normal OOM testing.");
222 	oom(NORMAL, lite, retcode, allow_sigkill);
223 
224 	tst_resm(TINFO, "start OOM testing for mlocked pages.");
225 	oom(MLOCK, lite, retcode, allow_sigkill);
226 
227 	/*
228 	 * Skip oom(KSM) if lite == 1, since limit_in_bytes may vary from
229 	 * run to run, which isn't reliable for oom03 cgroup test.
230 	 */
231 	if (access(PATH_KSM, F_OK) == -1 || lite == 1) {
232 		tst_resm(TINFO, "KSM is not configed or lite == 1, "
233 			 "skip OOM test for KSM pags");
234 	} else {
235 		tst_resm(TINFO, "start OOM testing for KSM pages.");
236 		SAFE_FILE_SCANF(cleanup, PATH_KSM "run", "%d", &ksm_run_orig);
237 		SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
238 		oom(KSM, lite, retcode, allow_sigkill);
239 		SAFE_FILE_PRINTF(cleanup,PATH_KSM "run", "%d", ksm_run_orig);
240 	}
241 }
242 
243 /* KSM */
244 
245 static int max_page_sharing;
246 
save_max_page_sharing(void)247 void save_max_page_sharing(void)
248 {
249 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
250 	        SAFE_FILE_SCANF(NULL, PATH_KSM "max_page_sharing",
251 	                        "%d", &max_page_sharing);
252 }
253 
restore_max_page_sharing(void)254 void restore_max_page_sharing(void)
255 {
256 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
257 	        FILE_PRINTF(PATH_KSM "max_page_sharing",
258 	                         "%d", max_page_sharing);
259 }
260 
check(char * path,long int value)261 static void check(char *path, long int value)
262 {
263 	char fullpath[BUFSIZ];
264 	long actual_val;
265 
266 	snprintf(fullpath, BUFSIZ, PATH_KSM "%s", path);
267 	SAFE_FILE_SCANF(cleanup, fullpath, "%ld", &actual_val);
268 
269 	tst_resm(TINFO, "%s is %ld.", path, actual_val);
270 	if (actual_val != value)
271 		tst_resm(TFAIL, "%s is not %ld.", path, value);
272 }
273 
wait_ksmd_done(void)274 static void wait_ksmd_done(void)
275 {
276 	long pages_shared, pages_sharing, pages_volatile, pages_unshared;
277 	long old_pages_shared = 0, old_pages_sharing = 0;
278 	long old_pages_volatile = 0, old_pages_unshared = 0;
279 	int changing = 1, count = 0;
280 
281 	while (changing) {
282 		sleep(10);
283 		count++;
284 
285 		SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_shared",
286 				"%ld", &pages_shared);
287 
288 		SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_sharing",
289 				"%ld", &pages_sharing);
290 
291 		SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_volatile",
292 				"%ld", &pages_volatile);
293 
294 		SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_unshared",
295 				"%ld", &pages_unshared);
296 
297 		if (pages_shared != old_pages_shared ||
298 		    pages_sharing != old_pages_sharing ||
299 		    pages_volatile != old_pages_volatile ||
300 		    pages_unshared != old_pages_unshared) {
301 			old_pages_shared = pages_shared;
302 			old_pages_sharing = pages_sharing;
303 			old_pages_volatile = pages_volatile;
304 			old_pages_unshared = pages_unshared;
305 		} else {
306 			changing = 0;
307 		}
308 	}
309 
310 	tst_resm(TINFO, "ksm daemon takes %ds to scan all mergeable pages",
311 		 count * 10);
312 }
313 
group_check(int run,int pages_shared,int pages_sharing,int pages_volatile,int pages_unshared,int sleep_millisecs,int pages_to_scan)314 static void group_check(int run, int pages_shared, int pages_sharing,
315 			 int pages_volatile, int pages_unshared,
316 			 int sleep_millisecs, int pages_to_scan)
317 {
318 	/* wait for ksm daemon to scan all mergeable pages. */
319 	wait_ksmd_done();
320 
321 	tst_resm(TINFO, "check!");
322 	check("run", run);
323 	check("pages_shared", pages_shared);
324 	check("pages_sharing", pages_sharing);
325 	check("pages_volatile", pages_volatile);
326 	check("pages_unshared", pages_unshared);
327 	check("sleep_millisecs", sleep_millisecs);
328 	check("pages_to_scan", pages_to_scan);
329 }
330 
verify(char ** memory,char value,int proc,int start,int end,int start2,int end2)331 static void verify(char **memory, char value, int proc,
332 		    int start, int end, int start2, int end2)
333 {
334 	int i, j;
335 	void *s = NULL;
336 
337 	s = malloc((end - start) * (end2 - start2));
338 	if (s == NULL)
339 		tst_brkm(TBROK | TERRNO, tst_exit, "malloc");
340 
341 	tst_resm(TINFO, "child %d verifies memory content.", proc);
342 	memset(s, value, (end - start) * (end2 - start2));
343 	if (memcmp(memory[start], s, (end - start) * (end2 - start2))
344 	    != 0)
345 		for (j = start; j < end; j++)
346 			for (i = start2; i < end2; i++)
347 				if (memory[j][i] != value)
348 					tst_resm(TFAIL, "child %d has %c at "
349 						 "%d,%d,%d.",
350 						 proc, memory[j][i], proc,
351 						 j, i);
352 	free(s);
353 }
354 
write_memcg(void)355 void write_memcg(void)
356 {
357 	SAFE_FILE_PRINTF(NULL, MEMCG_LIMIT, "%ld", TESTMEM);
358 
359 	SAFE_FILE_PRINTF(NULL, MEMCG_PATH_NEW "/tasks", "%d", getpid());
360 }
361 
362 struct ksm_merge_data {
363 	char data;
364 	unsigned int mergeable_size;
365 };
366 
ksm_child_memset(int child_num,int size,int total_unit,struct ksm_merge_data ksm_merge_data,char ** memory)367 static void ksm_child_memset(int child_num, int size, int total_unit,
368 		 struct ksm_merge_data ksm_merge_data, char **memory)
369 {
370 	int i = 0, j;
371 	int unit = size / total_unit;
372 
373 	tst_resm(TINFO, "child %d continues...", child_num);
374 
375 	if (ksm_merge_data.mergeable_size == size * MB) {
376 		tst_resm(TINFO, "child %d allocates %d MB filled with '%c'",
377 			child_num, size, ksm_merge_data.data);
378 
379 	} else {
380 		tst_resm(TINFO, "child %d allocates %d MB filled with '%c'"
381 				" except one page with 'e'",
382 				child_num, size, ksm_merge_data.data);
383 	}
384 
385 	for (j = 0; j < total_unit; j++) {
386 		for (i = 0; (unsigned int)i < unit * MB; i++)
387 			memory[j][i] = ksm_merge_data.data;
388 	}
389 
390 	/* if it contains unshared page, then set 'e' char
391 	 * at the end of the last page
392 	 */
393 	if (ksm_merge_data.mergeable_size < size * MB)
394 		memory[j-1][i-1] = 'e';
395 }
396 
create_ksm_child(int child_num,int size,int unit,struct ksm_merge_data * ksm_merge_data)397 static void create_ksm_child(int child_num, int size, int unit,
398 		       struct ksm_merge_data *ksm_merge_data)
399 {
400 	int j, total_unit;
401 	char **memory;
402 
403 	/* The total units in all */
404 	total_unit = size / unit;
405 
406 	/* Apply for the space for memory */
407 	memory = malloc(total_unit * sizeof(char *));
408 	for (j = 0; j < total_unit; j++) {
409 		memory[j] = mmap(NULL, unit * MB, PROT_READ|PROT_WRITE,
410 			MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
411 		if (memory[j] == MAP_FAILED)
412 			tst_brkm(TBROK|TERRNO, tst_exit, "mmap");
413 #ifdef HAVE_MADV_MERGEABLE
414 		if (madvise(memory[j], unit * MB, MADV_MERGEABLE) == -1)
415 			tst_brkm(TBROK|TERRNO, tst_exit, "madvise");
416 #endif
417 	}
418 
419 	tst_resm(TINFO, "child %d stops.", child_num);
420 	if (raise(SIGSTOP) == -1)
421 		tst_brkm(TBROK|TERRNO, tst_exit, "kill");
422 	fflush(stdout);
423 
424 	for (j = 0; j < 4; j++) {
425 
426 		ksm_child_memset(child_num, size, total_unit,
427 				  ksm_merge_data[j], memory);
428 
429 		fflush(stdout);
430 
431 		tst_resm(TINFO, "child %d stops.", child_num);
432 		if (raise(SIGSTOP) == -1)
433 			tst_brkm(TBROK|TERRNO, tst_exit, "kill");
434 
435 		if (ksm_merge_data[j].mergeable_size < size * MB) {
436 			verify(memory, 'e', child_num, total_unit - 1,
437 				total_unit, unit * MB - 1, unit * MB);
438 			verify(memory, ksm_merge_data[j].data, child_num,
439 				0, total_unit, 0, unit * MB - 1);
440 		} else {
441 			verify(memory, ksm_merge_data[j].data, child_num,
442 				0, total_unit, 0, unit * MB);
443 		}
444 	}
445 
446 	tst_resm(TINFO, "child %d finished.", child_num);
447 }
448 
stop_ksm_children(int * child,int num)449 static void stop_ksm_children(int *child, int num)
450 {
451 	int k, status;
452 
453 	tst_resm(TINFO, "wait for all children to stop.");
454 	for (k = 0; k < num; k++) {
455 		if (waitpid(child[k], &status, WUNTRACED) == -1)
456 			tst_brkm(TBROK|TERRNO, cleanup, "waitpid");
457 		if (!WIFSTOPPED(status))
458 			tst_brkm(TBROK, cleanup, "child %d was not stopped", k);
459 	}
460 }
461 
resume_ksm_children(int * child,int num)462 static void resume_ksm_children(int *child, int num)
463 {
464 	int k;
465 
466 	tst_resm(TINFO, "resume all children.");
467 	for (k = 0; k < num; k++) {
468 		if (kill(child[k], SIGCONT) == -1)
469 			tst_brkm(TBROK|TERRNO, cleanup, "kill child[%d]", k);
470 	}
471 	fflush(stdout);
472 }
473 
create_same_memory(int size,int num,int unit)474 void create_same_memory(int size, int num, int unit)
475 {
476 	int i, j, status, *child;
477 	unsigned long ps, pages;
478 	struct ksm_merge_data **ksm_data;
479 
480 	struct ksm_merge_data ksm_data0[] = {
481 	       {'c', size*MB}, {'c', size*MB}, {'d', size*MB}, {'d', size*MB},
482 	};
483 	struct ksm_merge_data ksm_data1[] = {
484 	       {'a', size*MB}, {'b', size*MB}, {'d', size*MB}, {'d', size*MB-1},
485 	};
486 	struct ksm_merge_data ksm_data2[] = {
487 	       {'a', size*MB}, {'a', size*MB}, {'d', size*MB}, {'d', size*MB},
488 	};
489 
490 	ps = sysconf(_SC_PAGE_SIZE);
491 	pages = MB / ps;
492 
493 	ksm_data = malloc((num - 3) * sizeof(struct ksm_merge_data *));
494 	/* Since from third child, the data is same with the first child's */
495 	for (i = 0; i < num - 3; i++) {
496 		ksm_data[i] = malloc(4 * sizeof(struct ksm_merge_data));
497 		for (j = 0; j < 4; j++) {
498 			ksm_data[i][j].data = ksm_data0[j].data;
499 			ksm_data[i][j].mergeable_size =
500 				ksm_data0[j].mergeable_size;
501 		}
502 	}
503 
504 	child = malloc(num * sizeof(int));
505 	if (child == NULL)
506 		tst_brkm(TBROK | TERRNO, cleanup, "malloc");
507 
508 	for (i = 0; i < num; i++) {
509 		fflush(stdout);
510 		switch (child[i] = fork()) {
511 		case -1:
512 			tst_brkm(TBROK|TERRNO, cleanup, "fork");
513 		case 0:
514 			if (i == 0) {
515 				create_ksm_child(i, size, unit, ksm_data0);
516 				exit(0);
517 			} else if (i == 1) {
518 				create_ksm_child(i, size, unit, ksm_data1);
519 				exit(0);
520 			} else if (i == 2) {
521 				create_ksm_child(i, size, unit, ksm_data2);
522 				exit(0);
523 			} else {
524 				create_ksm_child(i, size, unit, ksm_data[i-3]);
525 				exit(0);
526 			}
527 		}
528 	}
529 
530 	stop_ksm_children(child, num);
531 
532 	tst_resm(TINFO, "KSM merging...");
533 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
534 		SAFE_FILE_PRINTF(cleanup, PATH_KSM "max_page_sharing",
535 				"%ld", size * pages * num);
536 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
537 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "pages_to_scan", "%ld",
538 			 size * pages * num);
539 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "sleep_millisecs", "0");
540 
541 	resume_ksm_children(child, num);
542 	group_check(1, 2, size * num * pages - 2, 0, 0, 0, size * pages * num);
543 
544 	stop_ksm_children(child, num);
545 	resume_ksm_children(child, num);
546 	group_check(1, 3, size * num * pages - 3, 0, 0, 0, size * pages * num);
547 
548 	stop_ksm_children(child, num);
549 	resume_ksm_children(child, num);
550 	group_check(1, 1, size * num * pages - 1, 0, 0, 0, size * pages * num);
551 
552 	stop_ksm_children(child, num);
553 	resume_ksm_children(child, num);
554 	group_check(1, 1, size * num * pages - 2, 0, 1, 0, size * pages * num);
555 
556 	stop_ksm_children(child, num);
557 
558 	tst_resm(TINFO, "KSM unmerging...");
559 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
560 
561 	resume_ksm_children(child, num);
562 	group_check(2, 0, 0, 0, 0, 0, size * pages * num);
563 
564 	tst_resm(TINFO, "stop KSM.");
565 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "0");
566 	group_check(0, 0, 0, 0, 0, 0, size * pages * num);
567 
568 	while (waitpid(-1, &status, WUNTRACED | WCONTINUED) > 0)
569 		if (WEXITSTATUS(status) != 0)
570 			tst_resm(TFAIL, "child exit status is %d",
571 				 WEXITSTATUS(status));
572 }
573 
test_ksm_merge_across_nodes(unsigned long nr_pages)574 void test_ksm_merge_across_nodes(unsigned long nr_pages)
575 {
576 	char **memory;
577 	int i, ret;
578 	int num_nodes, *nodes;
579 	unsigned long length;
580 	unsigned long pagesize;
581 
582 #if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \
583 	&& HAVE_MPOL_CONSTANTS
584 	unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
585 #endif
586 
587 	ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
588 	if (ret != 0)
589 		tst_brkm(TBROK|TERRNO, cleanup, "get_allowed_nodes_arr");
590 	if (num_nodes < 2) {
591 		tst_resm(TINFO, "need NUMA system support");
592 		free(nodes);
593 		return;
594 	}
595 
596 	pagesize = sysconf(_SC_PAGE_SIZE);
597 	length = nr_pages * pagesize;
598 
599 	memory = malloc(num_nodes * sizeof(char *));
600 	for (i = 0; i < num_nodes; i++) {
601 		memory[i] = mmap(NULL, length, PROT_READ|PROT_WRITE,
602 			    MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
603 		if (memory[i] == MAP_FAILED)
604 			tst_brkm(TBROK|TERRNO, tst_exit, "mmap");
605 #ifdef HAVE_MADV_MERGEABLE
606 		if (madvise(memory[i], length, MADV_MERGEABLE) == -1)
607 			tst_brkm(TBROK|TERRNO, tst_exit, "madvise");
608 #endif
609 
610 #if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \
611 	&& HAVE_MPOL_CONSTANTS
612 		clean_node(nmask);
613 		set_node(nmask, nodes[i]);
614 		/*
615 		 * Use mbind() to make sure each node contains
616 		 * length size memory.
617 		 */
618 		ret = mbind(memory[i], length, MPOL_BIND, nmask, MAXNODES, 0);
619 		if (ret == -1)
620 			tst_brkm(TBROK|TERRNO, tst_exit, "mbind");
621 #endif
622 
623 		memset(memory[i], 10, length);
624 	}
625 
626 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "sleep_millisecs", "0");
627 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "pages_to_scan", "%ld",
628 			 nr_pages * num_nodes);
629 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
630 		SAFE_FILE_PRINTF(cleanup, PATH_KSM "max_page_sharing",
631 			"%ld", nr_pages * num_nodes);
632 	/*
633 	 * merge_across_nodes setting can be changed only when there
634 	 * are no ksm shared pages in system, so set run 2 to unmerge
635 	 * pages first, then to 1 after changing merge_across_nodes,
636 	 * to remerge according to the new setting.
637 	 */
638 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
639 	wait_ksmd_done();
640 	tst_resm(TINFO, "Start to test KSM with merge_across_nodes=1");
641 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "merge_across_nodes", "1");
642 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
643 	group_check(1, 1, nr_pages * num_nodes - 1, 0, 0, 0,
644 		    nr_pages * num_nodes);
645 
646 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
647 	wait_ksmd_done();
648 	tst_resm(TINFO, "Start to test KSM with merge_across_nodes=0");
649 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "merge_across_nodes", "0");
650 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
651 	group_check(1, num_nodes, nr_pages * num_nodes - num_nodes,
652 		    0, 0, 0, nr_pages * num_nodes);
653 
654 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
655 	wait_ksmd_done();
656 }
657 
check_ksm_options(int * size,int * num,int * unit)658 void check_ksm_options(int *size, int *num, int *unit)
659 {
660 	if (opt_size) {
661 		*size = atoi(opt_sizestr);
662 		if (*size < 1)
663 			tst_brkm(TBROK, cleanup, "size cannot be less than 1.");
664 	}
665 	if (opt_unit) {
666 		*unit = atoi(opt_unitstr);
667 		if (*unit > *size)
668 			tst_brkm(TBROK, cleanup,
669 				 "unit cannot be greater than size.");
670 		if (*size % *unit != 0)
671 			tst_brkm(TBROK, cleanup,
672 				 "the remainder of division of size by unit is "
673 				 "not zero.");
674 	}
675 	if (opt_num) {
676 		*num = atoi(opt_numstr);
677 		if (*num < 3)
678 			tst_brkm(TBROK, cleanup,
679 				 "process number cannot be less 3.");
680 	}
681 }
682 
ksm_usage(void)683 void ksm_usage(void)
684 {
685 	printf("  -n      Number of processes\n");
686 	printf("  -s      Memory allocation size in MB\n");
687 	printf("  -u      Memory allocation unit in MB\n");
688 }
689 
690 /* THP */
691 
692 /* cpuset/memcg */
693 
gather_node_cpus(char * cpus,long nd)694 static void gather_node_cpus(char *cpus, long nd)
695 {
696 	int ncpus = 0;
697 	int i;
698 	long online;
699 	char buf[BUFSIZ];
700 	char path[BUFSIZ], path1[BUFSIZ];
701 
702 	while (path_exist(PATH_SYS_SYSTEM "/cpu/cpu%d", ncpus))
703 		ncpus++;
704 
705 	for (i = 0; i < ncpus; i++) {
706 		snprintf(path, BUFSIZ,
707 			 PATH_SYS_SYSTEM "/node/node%ld/cpu%d", nd, i);
708 		if (path_exist(path)) {
709 			snprintf(path1, BUFSIZ, "%s/online", path);
710 			/*
711 			 * if there is no online knob, then the cpu cannot
712 			 * be taken offline
713 			 */
714 			if (path_exist(path1)) {
715 				SAFE_FILE_SCANF(cleanup, path1, "%ld", &online);
716 				if (online == 0)
717 					continue;
718 			}
719 			sprintf(buf, "%d,", i);
720 			strcat(cpus, buf);
721 		}
722 	}
723 	/* Remove the trailing comma. */
724 	cpus[strlen(cpus) - 1] = '\0';
725 }
726 
read_cpuset_files(char * prefix,char * filename,char * retbuf)727 void read_cpuset_files(char *prefix, char *filename, char *retbuf)
728 {
729 	int fd;
730 	char path[BUFSIZ];
731 
732 	/*
733 	 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
734 	 * please see Documentation/cgroups/cpusets.txt from kernel src
735 	 * for details
736 	 */
737 	snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
738 	fd = open(path, O_RDONLY);
739 	if (fd == -1) {
740 		if (errno == ENOENT) {
741 			snprintf(path, BUFSIZ, "%s/cpuset.%s",
742 				 prefix, filename);
743 			fd = open(path, O_RDONLY);
744 			if (fd == -1)
745 				tst_brkm(TBROK | TERRNO, cleanup,
746 					 "open %s", path);
747 		} else
748 			tst_brkm(TBROK | TERRNO, cleanup, "open %s", path);
749 	}
750 	if (read(fd, retbuf, BUFSIZ) < 0)
751 		tst_brkm(TBROK | TERRNO, cleanup, "read %s", path);
752 	close(fd);
753 }
754 
write_cpuset_files(char * prefix,char * filename,char * buf)755 void write_cpuset_files(char *prefix, char *filename, char *buf)
756 {
757 	int fd;
758 	char path[BUFSIZ];
759 
760 	/*
761 	 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
762 	 * please see Documentation/cgroups/cpusets.txt from kernel src
763 	 * for details
764 	 */
765 	snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
766 	fd = open(path, O_WRONLY);
767 	if (fd == -1) {
768 		if (errno == ENOENT) {
769 			snprintf(path, BUFSIZ, "%s/cpuset.%s",
770 				 prefix, filename);
771 			fd = open(path, O_WRONLY);
772 			if (fd == -1)
773 				tst_brkm(TBROK | TERRNO, cleanup,
774 					 "open %s", path);
775 		} else
776 			tst_brkm(TBROK | TERRNO, cleanup, "open %s", path);
777 	}
778 	if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf))
779 		tst_brkm(TBROK | TERRNO, cleanup, "write %s", path);
780 	close(fd);
781 }
782 
write_cpusets(long nd)783 void write_cpusets(long nd)
784 {
785 	char buf[BUFSIZ];
786 	char cpus[BUFSIZ] = "";
787 
788 	snprintf(buf, BUFSIZ, "%ld", nd);
789 	write_cpuset_files(CPATH_NEW, "mems", buf);
790 
791 	gather_node_cpus(cpus, nd);
792 	/*
793 	 * If the 'nd' node doesn't contain any CPUs,
794 	 * the first ID of CPU '0' will be used as
795 	 * the value of cpuset.cpus.
796 	 */
797 	if (strlen(cpus) != 0) {
798 		write_cpuset_files(CPATH_NEW, "cpus", cpus);
799 	} else {
800 		tst_resm(TINFO, "No CPUs in the node%ld; "
801 				"using only CPU0", nd);
802 		write_cpuset_files(CPATH_NEW, "cpus", "0");
803 	}
804 
805 	SAFE_FILE_PRINTF(NULL, CPATH_NEW "/tasks", "%d", getpid());
806 }
807 
umount_mem(char * path,char * path_new)808 void umount_mem(char *path, char *path_new)
809 {
810 	FILE *fp;
811 	int fd;
812 	char s_new[BUFSIZ], s[BUFSIZ], value[BUFSIZ];
813 
814 	/* Move all processes in task to its parent node. */
815 	sprintf(s, "%s/tasks", path);
816 	fd = open(s, O_WRONLY);
817 	if (fd == -1)
818 		tst_resm(TWARN | TERRNO, "open %s", s);
819 
820 	snprintf(s_new, BUFSIZ, "%s/tasks", path_new);
821 	fp = fopen(s_new, "r");
822 	if (fp == NULL)
823 		tst_resm(TWARN | TERRNO, "fopen %s", s_new);
824 	if ((fd != -1) && (fp != NULL)) {
825 		while (fgets(value, BUFSIZ, fp) != NULL)
826 			if (write(fd, value, strlen(value) - 1)
827 			    != (ssize_t)strlen(value) - 1)
828 				tst_resm(TWARN | TERRNO, "write %s", s);
829 	}
830 	if (fd != -1)
831 		close(fd);
832 	if (fp != NULL)
833 		fclose(fp);
834 	if (rmdir(path_new) == -1)
835 		tst_resm(TWARN | TERRNO, "rmdir %s", path_new);
836 	if (umount(path) == -1)
837 		tst_resm(TWARN | TERRNO, "umount %s", path);
838 	if (rmdir(path) == -1)
839 		tst_resm(TWARN | TERRNO, "rmdir %s", path);
840 }
841 
mount_mem(char * name,char * fs,char * options,char * path,char * path_new)842 void mount_mem(char *name, char *fs, char *options, char *path, char *path_new)
843 {
844 	if (mkdir(path, 0777) == -1)
845 		tst_brkm(TBROK | TERRNO, cleanup, "mkdir %s", path);
846 	if (mount(name, path, fs, 0, options) == -1) {
847 		if (errno == ENODEV) {
848 			if (rmdir(path) == -1)
849 				tst_resm(TWARN | TERRNO, "rmdir %s failed",
850 					 path);
851 			tst_brkm(TCONF, NULL,
852 				 "file system %s is not configured in kernel",
853 				 fs);
854 		}
855 		tst_brkm(TBROK | TERRNO, cleanup, "mount %s", path);
856 	}
857 	if (mkdir(path_new, 0777) == -1)
858 		tst_brkm(TBROK | TERRNO, cleanup, "mkdir %s", path_new);
859 }
860 
861 /* shared */
862 
863 /* Warning: *DO NOT* use this function in child */
get_a_numa_node(void (* cleanup_fn)(void))864 unsigned int get_a_numa_node(void (*cleanup_fn) (void))
865 {
866 	unsigned int nd1, nd2;
867 	int ret;
868 
869 	ret = get_allowed_nodes(0, 2, &nd1, &nd2);
870 	switch (ret) {
871 	case 0:
872 		break;
873 	case -3:
874 		tst_brkm(TCONF, cleanup_fn, "requires a NUMA system.");
875 	default:
876 		tst_brkm(TBROK | TERRNO, cleanup_fn, "1st get_allowed_nodes");
877 	}
878 
879 	ret = get_allowed_nodes(NH_MEMS | NH_CPUS, 1, &nd1);
880 	switch (ret) {
881 	case 0:
882 		tst_resm(TINFO, "get node%u.", nd1);
883 		return nd1;
884 	case -3:
885 		tst_brkm(TCONF, cleanup_fn, "requires a NUMA system that has "
886 			 "at least one node with both memory and CPU "
887 			 "available.");
888 	default:
889 		break;
890 	}
891 	tst_brkm(TBROK | TERRNO, cleanup_fn, "2nd get_allowed_nodes");
892 }
893 
path_exist(const char * path,...)894 int path_exist(const char *path, ...)
895 {
896 	va_list ap;
897 	char pathbuf[PATH_MAX];
898 
899 	va_start(ap, path);
900 	vsnprintf(pathbuf, sizeof(pathbuf), path, ap);
901 	va_end(ap);
902 
903 	return access(pathbuf, F_OK) == 0;
904 }
905 
read_meminfo(char * item)906 long read_meminfo(char *item)
907 {
908 	FILE *fp;
909 	char line[BUFSIZ], buf[BUFSIZ];
910 	long val;
911 
912 	fp = fopen(PATH_MEMINFO, "r");
913 	if (fp == NULL)
914 		tst_brkm(TBROK | TERRNO, cleanup, "fopen %s", PATH_MEMINFO);
915 
916 	while (fgets(line, BUFSIZ, fp) != NULL) {
917 		if (sscanf(line, "%64s %ld", buf, &val) == 2)
918 			if (strcmp(buf, item) == 0) {
919 				fclose(fp);
920 				return val;
921 			}
922 		continue;
923 	}
924 	fclose(fp);
925 
926 	tst_brkm(TBROK, cleanup, "cannot find \"%s\" in %s",
927 		 item, PATH_MEMINFO);
928 }
929 
set_sys_tune(char * sys_file,long tune,int check)930 void set_sys_tune(char *sys_file, long tune, int check)
931 {
932 	long val;
933 	char path[BUFSIZ];
934 
935 	tst_resm(TINFO, "set %s to %ld", sys_file, tune);
936 
937 	snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
938 	SAFE_FILE_PRINTF(NULL, path, "%ld", tune);
939 
940 	if (check) {
941 		val = get_sys_tune(sys_file);
942 		if (val != tune)
943 			tst_brkm(TBROK, cleanup, "%s = %ld, but expect %ld",
944 				 sys_file, val, tune);
945 	}
946 }
947 
get_sys_tune(char * sys_file)948 long get_sys_tune(char *sys_file)
949 {
950 	char path[BUFSIZ];
951 	long tune;
952 
953 	snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
954 	SAFE_FILE_SCANF(NULL, path, "%ld", &tune);
955 
956 	return tune;
957 }
958 
update_shm_size(size_t * shm_size)959 void update_shm_size(size_t * shm_size)
960 {
961 	size_t shmmax;
962 
963 	SAFE_FILE_SCANF(cleanup, PATH_SHMMAX, "%ld", &shmmax);
964 	if (*shm_size > shmmax) {
965 		tst_resm(TINFO, "Set shm_size to shmmax: %ld", shmmax);
966 		*shm_size = shmmax;
967 	}
968 }
969 
range_is_mapped(void (* cleanup_fn)(void),unsigned long low,unsigned long high)970 int range_is_mapped(void (*cleanup_fn) (void), unsigned long low, unsigned long high)
971 {
972 	FILE *fp;
973 
974 	fp = fopen("/proc/self/maps", "r");
975 	if (fp == NULL)
976 		tst_brkm(TBROK | TERRNO, cleanup_fn, "Failed to open /proc/self/maps.");
977 
978 	while (!feof(fp)) {
979 		unsigned long start, end;
980 		int ret;
981 
982 		ret = fscanf(fp, "%lx-%lx %*[^\n]\n", &start, &end);
983 		if (ret != 2) {
984 			fclose(fp);
985 			tst_brkm(TBROK | TERRNO, cleanup_fn, "Couldn't parse /proc/self/maps line.");
986 		}
987 
988 		if ((start >= low) && (start < high)) {
989 			fclose(fp);
990 			return 1;
991 		}
992 		if ((end >= low) && (end < high)) {
993 			fclose(fp);
994 			return 1;
995 		}
996 	}
997 
998 	fclose(fp);
999 	return 0;
1000 }
1001