1 /*
2 * Copyright (C) 2012 Linux Test Project, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it
13 * is free of the rightful claim of any third person regarding
14 * infringement or the like. Any license provided herein, whether
15 * implied or otherwise, applies only to this software file. Patent
16 * licenses, if any, provided herein do not apply to combinations of
17 * this program with other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 * 02110-1301, USA.
23 */
24
25 /*
26 * use migrate_pages() and check that address is on correct node
27 * 1. process A can migrate its non-shared mem with CAP_SYS_NICE
28 * 2. process A can migrate its non-shared mem without CAP_SYS_NICE
29 * 3. process A can migrate shared mem only with CAP_SYS_NICE
30 * 4. process A can migrate non-shared mem in process B with same effective uid
31 * 5. process A can migrate non-shared mem in process B with CAP_SYS_NICE
32 */
33 #include <sys/types.h>
34 #include <sys/syscall.h>
35 #include <sys/wait.h>
36 #include <sys/mman.h>
37 #include <sys/prctl.h>
38 #include <errno.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <unistd.h>
42 #include <pwd.h>
43
44 #include "tst_test.h"
45 #include "lapi/syscalls.h"
46 #include "numa_helper.h"
47 #include "migrate_pages_common.h"
48
49 /*
50 * This is an estimated minimum of free mem required to migrate this
51 * process to another node as migrate_pages will fail if there is not
52 * enough free space on node. While running this test on x86_64
53 * it used ~2048 pages (total VM, not just RSS). Considering ia64 as
54 * architecture with largest (non-huge) page size (16k), this limit
55 * is set to 2048*16k == 32M.
56 */
57 #define NODE_MIN_FREEMEM (32*1024*1024)
58
59 #ifdef HAVE_NUMA_V2
60
61 static const char nobody_uid[] = "nobody";
62 static struct passwd *ltpuser;
63 static int *nodes, nodeA, nodeB;
64 static int num_nodes;
65
66 static const char * const save_restore[] = {
67 "?/proc/sys/kernel/numa_balancing",
68 NULL,
69 };
70
print_mem_stats(pid_t pid,int node)71 static void print_mem_stats(pid_t pid, int node)
72 {
73 char s[64];
74 long long node_size, freep;
75
76 if (pid == 0)
77 pid = getpid();
78
79 tst_res(TINFO, "mem_stats pid: %d, node: %d", pid, node);
80
81 /* dump pid's VM info */
82 sprintf(s, "cat /proc/%d/status", pid);
83 system(s);
84 sprintf(s, "cat /proc/%d/numa_maps", pid);
85 system(s);
86
87 /* dump node free mem */
88 node_size = numa_node_size64(node, &freep);
89 tst_res(TINFO, "Node id: %d, size: %lld, free: %lld",
90 node, node_size, freep);
91 }
92
migrate_to_node(pid_t pid,int node)93 static int migrate_to_node(pid_t pid, int node)
94 {
95 unsigned long nodemask_size, max_node;
96 unsigned long *old_nodes, *new_nodes;
97 int i;
98
99 tst_res(TINFO, "pid(%d) migrate pid %d to node -> %d",
100 getpid(), pid, node);
101 max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8);
102 nodemask_size = max_node / 8;
103 old_nodes = SAFE_MALLOC(nodemask_size);
104 new_nodes = SAFE_MALLOC(nodemask_size);
105
106 memset(old_nodes, 0, nodemask_size);
107 memset(new_nodes, 0, nodemask_size);
108 for (i = 0; i < num_nodes; i++)
109 set_bit(old_nodes, nodes[i], 1);
110 set_bit(new_nodes, node, 1);
111
112 TEST(tst_syscall(__NR_migrate_pages, pid, max_node, old_nodes,
113 new_nodes));
114 if (TST_RET != 0) {
115 if (TST_RET < 0) {
116 tst_res(TFAIL | TERRNO, "migrate_pages failed "
117 "ret: %ld, ", TST_RET);
118 print_mem_stats(pid, node);
119 } else {
120 tst_res(TINFO, "migrate_pages could not migrate all "
121 "pages, not migrated: %ld", TST_RET);
122 }
123 }
124 free(old_nodes);
125 free(new_nodes);
126 return TST_RET;
127 }
128
addr_on_node(void * addr)129 static int addr_on_node(void *addr)
130 {
131 int node;
132 int ret;
133
134 ret = tst_syscall(__NR_get_mempolicy, &node, NULL, (unsigned long)0,
135 (unsigned long)addr, MPOL_F_NODE | MPOL_F_ADDR);
136 if (ret == -1) {
137 tst_res(TBROK | TERRNO, "error getting memory policy "
138 "for page %p", addr);
139 }
140 return node;
141 }
142
check_addr_on_node(void * addr,int exp_node)143 static int check_addr_on_node(void *addr, int exp_node)
144 {
145 int node;
146
147 node = addr_on_node(addr);
148 if (node == exp_node) {
149 tst_res(TPASS, "pid(%d) addr %p is on expected node: %d",
150 getpid(), addr, exp_node);
151 return TPASS;
152 } else {
153 tst_res(TFAIL, "pid(%d) addr %p not on expected node: %d "
154 ", expected %d", getpid(), addr, node, exp_node);
155 print_mem_stats(0, exp_node);
156 return TFAIL;
157 }
158 }
159
test_migrate_current_process(int node1,int node2,int cap_sys_nice)160 static void test_migrate_current_process(int node1, int node2, int cap_sys_nice)
161 {
162 char *private, *shared;
163 int ret;
164 pid_t child;
165
166 /* parent can migrate its non-shared memory */
167 tst_res(TINFO, "current_process, cap_sys_nice: %d", cap_sys_nice);
168 private = SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE,
169 MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
170 private[0] = 0;
171 tst_res(TINFO, "private anonymous: %p", private);
172
173 migrate_to_node(0, node2);
174 check_addr_on_node(private, node2);
175 migrate_to_node(0, node1);
176 check_addr_on_node(private, node1);
177 SAFE_MUNMAP(private, getpagesize());
178
179 /* parent can migrate shared memory with CAP_SYS_NICE */
180 shared = SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE,
181 MAP_ANONYMOUS | MAP_SHARED, 0, 0);
182 shared[0] = 1;
183 tst_res(TINFO, "shared anonymous: %p", shared);
184 migrate_to_node(0, node2);
185 check_addr_on_node(shared, node2);
186
187 /* shared mem is on node2, try to migrate in child to node1 */
188 fflush(stdout);
189 child = SAFE_FORK();
190 if (child == 0) {
191 tst_res(TINFO, "child shared anonymous, cap_sys_nice: %d",
192 cap_sys_nice);
193 private = SAFE_MMAP(NULL, getpagesize(),
194 PROT_READ | PROT_WRITE,
195 MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
196 private[0] = 1;
197 shared[0] = 1;
198 if (!cap_sys_nice)
199 SAFE_SETEUID(ltpuser->pw_uid);
200
201 migrate_to_node(0, node1);
202 /* child can migrate non-shared memory */
203 ret = check_addr_on_node(private, node1);
204
205 exit(ret);
206 }
207
208 SAFE_WAITPID(child, NULL, 0);
209 if (cap_sys_nice)
210 /* child can migrate shared memory only
211 * with CAP_SYS_NICE */
212 check_addr_on_node(shared, node1);
213 else
214 check_addr_on_node(shared, node2);
215 SAFE_MUNMAP(shared, getpagesize());
216 }
217
test_migrate_other_process(int node1,int node2,int cap_sys_nice)218 static void test_migrate_other_process(int node1, int node2, int cap_sys_nice)
219 {
220 char *private;
221 int ret;
222 pid_t child1, child2;
223
224 tst_res(TINFO, "other_process, cap_sys_nice: %d", cap_sys_nice);
225
226 fflush(stdout);
227 child1 = SAFE_FORK();
228 if (child1 == 0) {
229 private = SAFE_MMAP(NULL, getpagesize(),
230 PROT_READ | PROT_WRITE,
231 MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
232 private[0] = 0;
233
234 /* make sure we are on node1 */
235 migrate_to_node(0, node1);
236 check_addr_on_node(private, node1);
237
238 SAFE_SETUID(ltpuser->pw_uid);
239
240 /* commit_creds() will clear dumpable, restore it */
241 if (prctl(PR_SET_DUMPABLE, 1))
242 tst_brk(TBROK | TERRNO, "prctl");
243
244 /* signal child2 it's OK to migrate child1 and wait */
245 TST_CHECKPOINT_WAKE(0);
246 TST_CHECKPOINT_WAIT(1);
247
248 /* child2 can migrate child1 process if it's privileged */
249 /* child2 can migrate child1 process if it has same uid */
250 ret = check_addr_on_node(private, node2);
251
252 exit(ret);
253 }
254
255 fflush(stdout);
256 child2 = SAFE_FORK();
257 if (child2 == 0) {
258 if (!cap_sys_nice)
259 SAFE_SETUID(ltpuser->pw_uid);
260
261 /* wait until child1 is ready on node1, then migrate and
262 * signal to check current node */
263 TST_CHECKPOINT_WAIT(0);
264 migrate_to_node(child1, node2);
265 TST_CHECKPOINT_WAKE(1);
266
267 exit(TPASS);
268 }
269
270 SAFE_WAITPID(child1, NULL, 0);
271 SAFE_WAITPID(child2, NULL, 0);
272 }
273
run(void)274 static void run(void)
275 {
276 test_migrate_current_process(nodeA, nodeB, 1);
277 test_migrate_current_process(nodeA, nodeB, 0);
278 test_migrate_other_process(nodeA, nodeB, 1);
279 test_migrate_other_process(nodeA, nodeB, 0);
280 }
281
setup(void)282 static void setup(void)
283 {
284 int ret, i, j;
285 int pagesize = getpagesize();
286 void *p;
287
288 tst_syscall(__NR_migrate_pages, 0, 0, NULL, NULL);
289
290 if (numa_available() == -1)
291 tst_brk(TCONF, "NUMA not available");
292
293 ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes);
294 if (ret < 0)
295 tst_brk(TBROK | TERRNO, "get_allowed_nodes(): %d", ret);
296
297 if (num_nodes < 2)
298 tst_brk(TCONF, "at least 2 allowed NUMA nodes"
299 " are required");
300 else if (tst_kvercmp(2, 6, 18) < 0)
301 tst_brk(TCONF, "2.6.18 or greater kernel required");
302
303 FILE_PRINTF("/proc/sys/kernel/numa_balancing", "0");
304 /*
305 * find 2 nodes, which can hold NODE_MIN_FREEMEM bytes
306 * The reason is that:
307 * 1. migrate_pages() is expected to succeed
308 * 2. this test avoids hitting:
309 * Bug 870326 - migrate_pages() reports success, but pages are
310 * not moved to desired node
311 * https://bugzilla.redhat.com/show_bug.cgi?id=870326
312 */
313 nodeA = nodeB = -1;
314 for (i = 0; i < num_nodes; i++) {
315 p = numa_alloc_onnode(NODE_MIN_FREEMEM, nodes[i]);
316 if (p == NULL)
317 break;
318 memset(p, 0xff, NODE_MIN_FREEMEM);
319
320 j = 0;
321 while (j < NODE_MIN_FREEMEM) {
322 if (addr_on_node(p + j) != nodes[i])
323 break;
324 j += pagesize;
325 }
326 numa_free(p, NODE_MIN_FREEMEM);
327
328 if (j >= NODE_MIN_FREEMEM) {
329 if (nodeA == -1)
330 nodeA = nodes[i];
331 else if (nodeB == -1)
332 nodeB = nodes[i];
333 else
334 break;
335 }
336 }
337
338 if (nodeA == -1 || nodeB == -1)
339 tst_brk(TCONF, "at least 2 NUMA nodes with "
340 "free mem > %d are needed", NODE_MIN_FREEMEM);
341 tst_res(TINFO, "Using nodes: %d %d", nodeA, nodeB);
342
343 ltpuser = getpwnam(nobody_uid);
344 if (ltpuser == NULL)
345 tst_brk(TBROK | TERRNO, "getpwnam failed");
346 }
347
348 static struct tst_test test = {
349 .needs_root = 1,
350 .needs_checkpoints = 1,
351 .forks_child = 1,
352 .test_all = run,
353 .setup = setup,
354 .save_restore = save_restore,
355 };
356 #else
357 TST_TEST_TCONF(NUMA_ERROR_MSG);
358 #endif
359