1 /*
2  * Copyright (C) 2012 Linux Test Project, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it
13  * is free of the rightful claim of any third person regarding
14  * infringement or the like.  Any license provided herein, whether
15  * implied or otherwise, applies only to this software file.  Patent
16  * licenses, if any, provided herein do not apply to combinations of
17  * this program with other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22  * 02110-1301, USA.
23  */
24 
25 /*
26  * use migrate_pages() and check that address is on correct node
27  * 1. process A can migrate its non-shared mem with CAP_SYS_NICE
28  * 2. process A can migrate its non-shared mem without CAP_SYS_NICE
29  * 3. process A can migrate shared mem only with CAP_SYS_NICE
30  * 4. process A can migrate non-shared mem in process B with same effective uid
31  * 5. process A can migrate non-shared mem in process B with CAP_SYS_NICE
32  */
33 #include <sys/types.h>
34 #include <sys/syscall.h>
35 #include <sys/wait.h>
36 #include <sys/mman.h>
37 #include <errno.h>
38 #if HAVE_NUMA_H
39 #include <numa.h>
40 #endif
41 #if HAVE_NUMAIF_H
42 #include <numaif.h>
43 #endif
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <unistd.h>
47 #include <pwd.h>
48 
49 #include "config.h"
50 #include "test.h"
51 #include "safe_macros.h"
52 #include "lapi/syscalls.h"
53 #include "numa_helper.h"
54 #include "migrate_pages_common.h"
55 
56 /*
57  * This is an estimated minimum of free mem required to migrate this
58  * process to another node as migrate_pages will fail if there is not
59  * enough free space on node. While running this test on x86_64
60  * it used ~2048 pages (total VM, not just RSS). Considering ia64 as
61  * architecture with largest (non-huge) page size (16k), this limit
62  * is set to 2048*16k == 32M.
63  */
64 #define NODE_MIN_FREEMEM (32*1024*1024)
65 
66 char *TCID = "migrate_pages02";
67 int TST_TOTAL = 1;
68 
69 #if defined(HAVE_NUMA_V2) && defined(__NR_migrate_pages)
70 
71 static const char nobody_uid[] = "nobody";
72 static struct passwd *ltpuser;
73 static int *nodes, nodeA, nodeB;
74 static int num_nodes;
75 
76 static void setup(void);
77 static void cleanup(void);
78 
79 option_t options[] = {
80 	{NULL, NULL, NULL}
81 };
82 
83 static void print_mem_stats(pid_t pid, int node)
84 {
85 	char s[64];
86 	long long node_size, freep;
87 
88 	if (pid == 0)
89 		pid = getpid();
90 
91 	tst_resm(TINFO, "mem_stats pid: %d, node: %d", pid, node);
92 
93 	/* dump pid's VM info */
94 	sprintf(s, "cat /proc/%d/status", pid);
95 	system(s);
96 	sprintf(s, "cat /proc/%d/numa_maps", pid);
97 	system(s);
98 
99 	/* dump node free mem */
100 	node_size = numa_node_size64(node, &freep);
101 	tst_resm(TINFO, "Node id: %d, size: %lld, free: %lld",
102 		 node, node_size, freep);
103 }
104 
105 static int migrate_to_node(pid_t pid, int node)
106 {
107 	unsigned long nodemask_size, max_node;
108 	unsigned long *old_nodes, *new_nodes;
109 	int i;
110 
111 	tst_resm(TINFO, "pid(%d) migrate pid %d to node -> %d",
112 		 getpid(), pid, node);
113 	max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8);
114 	nodemask_size = max_node / 8;
115 	old_nodes = SAFE_MALLOC(NULL, nodemask_size);
116 	new_nodes = SAFE_MALLOC(NULL, nodemask_size);
117 
118 	memset(old_nodes, 0, nodemask_size);
119 	memset(new_nodes, 0, nodemask_size);
120 	for (i = 0; i < num_nodes; i++)
121 		set_bit(old_nodes, nodes[i], 1);
122 	set_bit(new_nodes, node, 1);
123 
124 	TEST(ltp_syscall(__NR_migrate_pages, pid, max_node, old_nodes,
125 		new_nodes));
126 	if (TEST_RETURN != 0) {
127 		if (TEST_RETURN < 0)
128 			tst_resm(TFAIL | TERRNO, "migrate_pages failed "
129 				 "ret: %ld, ", TEST_RETURN);
130 		else
131 			tst_resm(TINFO, "migrate_pages could not migrate all "
132 				 "pages, not migrated: %ld", TEST_RETURN);
133 		print_mem_stats(pid, node);
134 	}
135 	free(old_nodes);
136 	free(new_nodes);
137 	return TEST_RETURN;
138 }
139 
140 static int addr_on_node(void *addr)
141 {
142 	int node;
143 	int ret;
144 
145 	ret = ltp_syscall(__NR_get_mempolicy, &node, NULL, (unsigned long)0,
146 		      (unsigned long)addr, MPOL_F_NODE | MPOL_F_ADDR);
147 	if (ret == -1) {
148 		tst_resm(TBROK | TERRNO, "error getting memory policy "
149 			 "for page %p", addr);
150 	}
151 	return node;
152 }
153 
154 static int check_addr_on_node(void *addr, int exp_node)
155 {
156 	int node;
157 
158 	node = addr_on_node(addr);
159 	if (node == exp_node) {
160 		tst_resm(TPASS, "pid(%d) addr %p is on expected node: %d",
161 			 getpid(), addr, exp_node);
162 		return 0;
163 	} else {
164 		tst_resm(TFAIL, "pid(%d) addr %p not on expected node: %d "
165 			 ", expected %d", getpid(), addr, node, exp_node);
166 		print_mem_stats(0, exp_node);
167 		return 1;
168 	}
169 }
170 
171 static void test_migrate_current_process(int node1, int node2, int cap_sys_nice)
172 {
173 	char *testp, *testp2;
174 	int ret, status;
175 	pid_t child;
176 
177 	/* parent can migrate its non-shared memory */
178 	tst_resm(TINFO, "current_process, cap_sys_nice: %d", cap_sys_nice);
179 	testp = SAFE_MALLOC(NULL, getpagesize());
180 	testp[0] = 0;
181 	tst_resm(TINFO, "private anonymous: %p", testp);
182 	migrate_to_node(0, node2);
183 	check_addr_on_node(testp, node2);
184 	migrate_to_node(0, node1);
185 	check_addr_on_node(testp, node1);
186 	free(testp);
187 
188 	/* parent can migrate shared memory with CAP_SYS_NICE */
189 	testp2 = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE,
190 		      MAP_ANONYMOUS | MAP_SHARED, 0, 0);
191 	if (testp2 == MAP_FAILED)
192 		tst_brkm(TBROK | TERRNO, cleanup, "mmap failed");
193 	testp2[0] = 1;
194 	tst_resm(TINFO, "shared anonymous: %p", testp2);
195 	migrate_to_node(0, node2);
196 	check_addr_on_node(testp2, node2);
197 
198 	/* shared mem is on node2, try to migrate in child to node1 */
199 	fflush(stdout);
200 	child = fork();
201 	switch (child) {
202 	case -1:
203 		tst_brkm(TBROK | TERRNO, cleanup, "fork");
204 		break;
205 	case 0:
206 		tst_resm(TINFO, "child shared anonymous, cap_sys_nice: %d",
207 			 cap_sys_nice);
208 		testp = SAFE_MALLOC(NULL, getpagesize());
209 		testp[0] = 1;
210 		testp2[0] = 1;
211 		if (!cap_sys_nice)
212 			SAFE_SETEUID(NULL, ltpuser->pw_uid);
213 
214 		migrate_to_node(0, node1);
215 		/* child can migrate non-shared memory */
216 		ret = check_addr_on_node(testp, node1);
217 
218 		free(testp);
219 		munmap(testp2, getpagesize());
220 		exit(ret);
221 	default:
222 		SAFE_WAITPID(cleanup, child, &status, 0);
223 		if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
224 			tst_resm(TFAIL, "child returns %d", status);
225 		if (cap_sys_nice)
226 			/* child can migrate shared memory only
227 			 * with CAP_SYS_NICE */
228 			check_addr_on_node(testp2, node1);
229 		else
230 			check_addr_on_node(testp2, node2);
231 		munmap(testp2, getpagesize());
232 	}
233 }
234 
235 static void test_migrate_other_process(int node1, int node2, int cap_sys_nice)
236 {
237 	char *testp;
238 	int status, ret, tmp;
239 	pid_t child;
240 	int child_ready[2];
241 	int pages_migrated[2];
242 
243 	/* setup pipes to synchronize child/parent */
244 	if (pipe(child_ready) == -1)
245 		tst_resm(TBROK | TERRNO, "pipe #1 failed");
246 	if (pipe(pages_migrated) == -1)
247 		tst_resm(TBROK | TERRNO, "pipe #2 failed");
248 
249 	tst_resm(TINFO, "other_process, cap_sys_nice: %d", cap_sys_nice);
250 
251 	fflush(stdout);
252 	child = fork();
253 	switch (child) {
254 	case -1:
255 		tst_brkm(TBROK | TERRNO, cleanup, "fork");
256 		break;
257 	case 0:
258 		close(child_ready[0]);
259 		close(pages_migrated[1]);
260 
261 		testp = SAFE_MALLOC(NULL, getpagesize());
262 		testp[0] = 0;
263 
264 		/* make sure we are on node1 */
265 		migrate_to_node(0, node1);
266 		check_addr_on_node(testp, node1);
267 
268 		SAFE_SETEUID(NULL, ltpuser->pw_uid);
269 
270 		/* signal parent it's OK to migrate child and wait */
271 		if (write(child_ready[1], &tmp, 1) != 1)
272 			tst_brkm(TBROK | TERRNO, NULL, "write #1 failed");
273 		if (read(pages_migrated[0], &tmp, 1) != 1)
274 			tst_brkm(TBROK | TERRNO, NULL, "read #1 failed");
275 
276 		/* parent can migrate child process with same euid */
277 		/* parent can migrate child process with CAP_SYS_NICE */
278 		ret = check_addr_on_node(testp, node2);
279 
280 		free(testp);
281 		close(child_ready[1]);
282 		close(pages_migrated[0]);
283 		exit(ret);
284 	default:
285 		close(child_ready[1]);
286 		close(pages_migrated[0]);
287 
288 		if (!cap_sys_nice)
289 			SAFE_SETEUID(NULL, ltpuser->pw_uid);
290 
291 		/* wait until child is ready on node1, then migrate and
292 		 * signal to check current node */
293 		if (read(child_ready[0], &tmp, 1) != 1)
294 			tst_brkm(TBROK | TERRNO, NULL, "read #2 failed");
295 		migrate_to_node(child, node2);
296 		if (write(pages_migrated[1], &tmp, 1) != 1)
297 			tst_brkm(TBROK | TERRNO, NULL, "write #2 failed");
298 
299 		SAFE_WAITPID(cleanup, child, &status, 0);
300 		if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
301 			tst_resm(TFAIL, "child returns %d", status);
302 		close(child_ready[0]);
303 		close(pages_migrated[1]);
304 
305 		/* reset euid, so this testcase can be used in loop */
306 		if (!cap_sys_nice)
307 			SAFE_SETEUID(NULL, 0);
308 	}
309 }
310 
311 int main(int argc, char *argv[])
312 {
313 	int lc;
314 
315 	tst_parse_opts(argc, argv, options, NULL);
316 
317 	setup();
318 	for (lc = 0; TEST_LOOPING(lc); lc++) {
319 		tst_count = 0;
320 		test_migrate_current_process(nodeA, nodeB, 1);
321 		test_migrate_current_process(nodeA, nodeB, 0);
322 		test_migrate_other_process(nodeA, nodeB, 1);
323 		test_migrate_other_process(nodeA, nodeB, 0);
324 	}
325 	cleanup();
326 	tst_exit();
327 }
328 
329 static void setup(void)
330 {
331 	int ret, i, j;
332 	int pagesize = getpagesize();
333 	void *p;
334 
335 	tst_require_root();
336 	TEST(ltp_syscall(__NR_migrate_pages, 0, 0, NULL, NULL));
337 
338 	if (numa_available() == -1)
339 		tst_brkm(TCONF, NULL, "NUMA not available");
340 
341 	ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes);
342 	if (ret < 0)
343 		tst_brkm(TBROK | TERRNO, NULL, "get_allowed_nodes(): %d", ret);
344 
345 	if (num_nodes < 2)
346 		tst_brkm(TCONF, NULL, "at least 2 allowed NUMA nodes"
347 			 " are required");
348 	else if (tst_kvercmp(2, 6, 18) < 0)
349 		tst_brkm(TCONF, NULL, "2.6.18 or greater kernel required");
350 
351 	/*
352 	 * find 2 nodes, which can hold NODE_MIN_FREEMEM bytes
353 	 * The reason is that:
354 	 * 1. migrate_pages() is expected to succeed
355 	 * 2. this test avoids hitting:
356 	 *    Bug 870326 - migrate_pages() reports success, but pages are
357 	 *                 not moved to desired node
358 	 *    https://bugzilla.redhat.com/show_bug.cgi?id=870326
359 	 */
360 	nodeA = nodeB = -1;
361 	for (i = 0; i < num_nodes; i++) {
362 		p = numa_alloc_onnode(NODE_MIN_FREEMEM, nodes[i]);
363 		if (p == NULL)
364 			break;
365 		memset(p, 0xff, NODE_MIN_FREEMEM);
366 
367 		j = 0;
368 		while (j < NODE_MIN_FREEMEM) {
369 			if (addr_on_node(p + j) != nodes[i])
370 				break;
371 			j += pagesize;
372 		}
373 		numa_free(p, NODE_MIN_FREEMEM);
374 
375 		if (j >= NODE_MIN_FREEMEM) {
376 			if (nodeA == -1)
377 				nodeA = nodes[i];
378 			else if (nodeB == -1)
379 				nodeB = nodes[i];
380 			else
381 				break;
382 		}
383 	}
384 
385 	if (nodeA == -1 || nodeB == -1)
386 		tst_brkm(TCONF, NULL, "at least 2 NUMA nodes with "
387 			 "free mem > %d are needed", NODE_MIN_FREEMEM);
388 	tst_resm(TINFO, "Using nodes: %d %d", nodeA, nodeB);
389 
390 	ltpuser = getpwnam(nobody_uid);
391 	if (ltpuser == NULL)
392 		tst_brkm(TBROK | TERRNO, NULL, "getpwnam failed");
393 
394 	TEST_PAUSE;
395 }
396 
397 static void cleanup(void)
398 {
399 	free(nodes);
400 }
401 
402 #else
403 int main(void)
404 {
405 	tst_brkm(TCONF, NULL, "System doesn't support __NR_migrate_pages or "
406 		 "libnuma or libnuma development packages are not available");
407 }
408 #endif
409