1 /* 2 * Copyright (C) 2012 Linux Test Project, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it would be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 11 * 12 * Further, this software is distributed without any warranty that it 13 * is free of the rightful claim of any third person regarding 14 * infringement or the like. Any license provided herein, whether 15 * implied or otherwise, applies only to this software file. Patent 16 * licenses, if any, provided herein do not apply to combinations of 17 * this program with other software, or any other product whatsoever. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 22 * 02110-1301, USA. 23 */ 24 25 /* 26 * use migrate_pages() and check that address is on correct node 27 * 1. process A can migrate its non-shared mem with CAP_SYS_NICE 28 * 2. process A can migrate its non-shared mem without CAP_SYS_NICE 29 * 3. process A can migrate shared mem only with CAP_SYS_NICE 30 * 4. process A can migrate non-shared mem in process B with same effective uid 31 * 5. process A can migrate non-shared mem in process B with CAP_SYS_NICE 32 */ 33 #include <sys/types.h> 34 #include <sys/syscall.h> 35 #include <sys/wait.h> 36 #include <sys/mman.h> 37 #include <errno.h> 38 #if HAVE_NUMA_H 39 #include <numa.h> 40 #endif 41 #if HAVE_NUMAIF_H 42 #include <numaif.h> 43 #endif 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <unistd.h> 47 #include <pwd.h> 48 49 #include "config.h" 50 #include "test.h" 51 #include "safe_macros.h" 52 #include "lapi/syscalls.h" 53 #include "numa_helper.h" 54 #include "migrate_pages_common.h" 55 56 /* 57 * This is an estimated minimum of free mem required to migrate this 58 * process to another node as migrate_pages will fail if there is not 59 * enough free space on node. While running this test on x86_64 60 * it used ~2048 pages (total VM, not just RSS). Considering ia64 as 61 * architecture with largest (non-huge) page size (16k), this limit 62 * is set to 2048*16k == 32M. 63 */ 64 #define NODE_MIN_FREEMEM (32*1024*1024) 65 66 char *TCID = "migrate_pages02"; 67 int TST_TOTAL = 1; 68 69 #if defined(HAVE_NUMA_V2) && defined(__NR_migrate_pages) 70 71 static const char nobody_uid[] = "nobody"; 72 static struct passwd *ltpuser; 73 static int *nodes, nodeA, nodeB; 74 static int num_nodes; 75 76 static void setup(void); 77 static void cleanup(void); 78 79 option_t options[] = { 80 {NULL, NULL, NULL} 81 }; 82 83 static void print_mem_stats(pid_t pid, int node) 84 { 85 char s[64]; 86 long long node_size, freep; 87 88 if (pid == 0) 89 pid = getpid(); 90 91 tst_resm(TINFO, "mem_stats pid: %d, node: %d", pid, node); 92 93 /* dump pid's VM info */ 94 sprintf(s, "cat /proc/%d/status", pid); 95 system(s); 96 sprintf(s, "cat /proc/%d/numa_maps", pid); 97 system(s); 98 99 /* dump node free mem */ 100 node_size = numa_node_size64(node, &freep); 101 tst_resm(TINFO, "Node id: %d, size: %lld, free: %lld", 102 node, node_size, freep); 103 } 104 105 static int migrate_to_node(pid_t pid, int node) 106 { 107 unsigned long nodemask_size, max_node; 108 unsigned long *old_nodes, *new_nodes; 109 int i; 110 111 tst_resm(TINFO, "pid(%d) migrate pid %d to node -> %d", 112 getpid(), pid, node); 113 max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8); 114 nodemask_size = max_node / 8; 115 old_nodes = SAFE_MALLOC(NULL, nodemask_size); 116 new_nodes = SAFE_MALLOC(NULL, nodemask_size); 117 118 memset(old_nodes, 0, nodemask_size); 119 memset(new_nodes, 0, nodemask_size); 120 for (i = 0; i < num_nodes; i++) 121 set_bit(old_nodes, nodes[i], 1); 122 set_bit(new_nodes, node, 1); 123 124 TEST(ltp_syscall(__NR_migrate_pages, pid, max_node, old_nodes, 125 new_nodes)); 126 if (TEST_RETURN != 0) { 127 if (TEST_RETURN < 0) 128 tst_resm(TFAIL | TERRNO, "migrate_pages failed " 129 "ret: %ld, ", TEST_RETURN); 130 else 131 tst_resm(TINFO, "migrate_pages could not migrate all " 132 "pages, not migrated: %ld", TEST_RETURN); 133 print_mem_stats(pid, node); 134 } 135 free(old_nodes); 136 free(new_nodes); 137 return TEST_RETURN; 138 } 139 140 static int addr_on_node(void *addr) 141 { 142 int node; 143 int ret; 144 145 ret = ltp_syscall(__NR_get_mempolicy, &node, NULL, (unsigned long)0, 146 (unsigned long)addr, MPOL_F_NODE | MPOL_F_ADDR); 147 if (ret == -1) { 148 tst_resm(TBROK | TERRNO, "error getting memory policy " 149 "for page %p", addr); 150 } 151 return node; 152 } 153 154 static int check_addr_on_node(void *addr, int exp_node) 155 { 156 int node; 157 158 node = addr_on_node(addr); 159 if (node == exp_node) { 160 tst_resm(TPASS, "pid(%d) addr %p is on expected node: %d", 161 getpid(), addr, exp_node); 162 return 0; 163 } else { 164 tst_resm(TFAIL, "pid(%d) addr %p not on expected node: %d " 165 ", expected %d", getpid(), addr, node, exp_node); 166 print_mem_stats(0, exp_node); 167 return 1; 168 } 169 } 170 171 static void test_migrate_current_process(int node1, int node2, int cap_sys_nice) 172 { 173 char *testp, *testp2; 174 int ret, status; 175 pid_t child; 176 177 /* parent can migrate its non-shared memory */ 178 tst_resm(TINFO, "current_process, cap_sys_nice: %d", cap_sys_nice); 179 testp = SAFE_MALLOC(NULL, getpagesize()); 180 testp[0] = 0; 181 tst_resm(TINFO, "private anonymous: %p", testp); 182 migrate_to_node(0, node2); 183 check_addr_on_node(testp, node2); 184 migrate_to_node(0, node1); 185 check_addr_on_node(testp, node1); 186 free(testp); 187 188 /* parent can migrate shared memory with CAP_SYS_NICE */ 189 testp2 = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, 190 MAP_ANONYMOUS | MAP_SHARED, 0, 0); 191 if (testp2 == MAP_FAILED) 192 tst_brkm(TBROK | TERRNO, cleanup, "mmap failed"); 193 testp2[0] = 1; 194 tst_resm(TINFO, "shared anonymous: %p", testp2); 195 migrate_to_node(0, node2); 196 check_addr_on_node(testp2, node2); 197 198 /* shared mem is on node2, try to migrate in child to node1 */ 199 fflush(stdout); 200 child = fork(); 201 switch (child) { 202 case -1: 203 tst_brkm(TBROK | TERRNO, cleanup, "fork"); 204 break; 205 case 0: 206 tst_resm(TINFO, "child shared anonymous, cap_sys_nice: %d", 207 cap_sys_nice); 208 testp = SAFE_MALLOC(NULL, getpagesize()); 209 testp[0] = 1; 210 testp2[0] = 1; 211 if (!cap_sys_nice) 212 SAFE_SETEUID(NULL, ltpuser->pw_uid); 213 214 migrate_to_node(0, node1); 215 /* child can migrate non-shared memory */ 216 ret = check_addr_on_node(testp, node1); 217 218 free(testp); 219 munmap(testp2, getpagesize()); 220 exit(ret); 221 default: 222 SAFE_WAITPID(cleanup, child, &status, 0); 223 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) 224 tst_resm(TFAIL, "child returns %d", status); 225 if (cap_sys_nice) 226 /* child can migrate shared memory only 227 * with CAP_SYS_NICE */ 228 check_addr_on_node(testp2, node1); 229 else 230 check_addr_on_node(testp2, node2); 231 munmap(testp2, getpagesize()); 232 } 233 } 234 235 static void test_migrate_other_process(int node1, int node2, int cap_sys_nice) 236 { 237 char *testp; 238 int status, ret, tmp; 239 pid_t child; 240 int child_ready[2]; 241 int pages_migrated[2]; 242 243 /* setup pipes to synchronize child/parent */ 244 if (pipe(child_ready) == -1) 245 tst_resm(TBROK | TERRNO, "pipe #1 failed"); 246 if (pipe(pages_migrated) == -1) 247 tst_resm(TBROK | TERRNO, "pipe #2 failed"); 248 249 tst_resm(TINFO, "other_process, cap_sys_nice: %d", cap_sys_nice); 250 251 fflush(stdout); 252 child = fork(); 253 switch (child) { 254 case -1: 255 tst_brkm(TBROK | TERRNO, cleanup, "fork"); 256 break; 257 case 0: 258 close(child_ready[0]); 259 close(pages_migrated[1]); 260 261 testp = SAFE_MALLOC(NULL, getpagesize()); 262 testp[0] = 0; 263 264 /* make sure we are on node1 */ 265 migrate_to_node(0, node1); 266 check_addr_on_node(testp, node1); 267 268 SAFE_SETEUID(NULL, ltpuser->pw_uid); 269 270 /* signal parent it's OK to migrate child and wait */ 271 if (write(child_ready[1], &tmp, 1) != 1) 272 tst_brkm(TBROK | TERRNO, NULL, "write #1 failed"); 273 if (read(pages_migrated[0], &tmp, 1) != 1) 274 tst_brkm(TBROK | TERRNO, NULL, "read #1 failed"); 275 276 /* parent can migrate child process with same euid */ 277 /* parent can migrate child process with CAP_SYS_NICE */ 278 ret = check_addr_on_node(testp, node2); 279 280 free(testp); 281 close(child_ready[1]); 282 close(pages_migrated[0]); 283 exit(ret); 284 default: 285 close(child_ready[1]); 286 close(pages_migrated[0]); 287 288 if (!cap_sys_nice) 289 SAFE_SETEUID(NULL, ltpuser->pw_uid); 290 291 /* wait until child is ready on node1, then migrate and 292 * signal to check current node */ 293 if (read(child_ready[0], &tmp, 1) != 1) 294 tst_brkm(TBROK | TERRNO, NULL, "read #2 failed"); 295 migrate_to_node(child, node2); 296 if (write(pages_migrated[1], &tmp, 1) != 1) 297 tst_brkm(TBROK | TERRNO, NULL, "write #2 failed"); 298 299 SAFE_WAITPID(cleanup, child, &status, 0); 300 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) 301 tst_resm(TFAIL, "child returns %d", status); 302 close(child_ready[0]); 303 close(pages_migrated[1]); 304 305 /* reset euid, so this testcase can be used in loop */ 306 if (!cap_sys_nice) 307 SAFE_SETEUID(NULL, 0); 308 } 309 } 310 311 int main(int argc, char *argv[]) 312 { 313 int lc; 314 315 tst_parse_opts(argc, argv, options, NULL); 316 317 setup(); 318 for (lc = 0; TEST_LOOPING(lc); lc++) { 319 tst_count = 0; 320 test_migrate_current_process(nodeA, nodeB, 1); 321 test_migrate_current_process(nodeA, nodeB, 0); 322 test_migrate_other_process(nodeA, nodeB, 1); 323 test_migrate_other_process(nodeA, nodeB, 0); 324 } 325 cleanup(); 326 tst_exit(); 327 } 328 329 static void setup(void) 330 { 331 int ret, i, j; 332 int pagesize = getpagesize(); 333 void *p; 334 335 tst_require_root(); 336 TEST(ltp_syscall(__NR_migrate_pages, 0, 0, NULL, NULL)); 337 338 if (numa_available() == -1) 339 tst_brkm(TCONF, NULL, "NUMA not available"); 340 341 ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes); 342 if (ret < 0) 343 tst_brkm(TBROK | TERRNO, NULL, "get_allowed_nodes(): %d", ret); 344 345 if (num_nodes < 2) 346 tst_brkm(TCONF, NULL, "at least 2 allowed NUMA nodes" 347 " are required"); 348 else if (tst_kvercmp(2, 6, 18) < 0) 349 tst_brkm(TCONF, NULL, "2.6.18 or greater kernel required"); 350 351 /* 352 * find 2 nodes, which can hold NODE_MIN_FREEMEM bytes 353 * The reason is that: 354 * 1. migrate_pages() is expected to succeed 355 * 2. this test avoids hitting: 356 * Bug 870326 - migrate_pages() reports success, but pages are 357 * not moved to desired node 358 * https://bugzilla.redhat.com/show_bug.cgi?id=870326 359 */ 360 nodeA = nodeB = -1; 361 for (i = 0; i < num_nodes; i++) { 362 p = numa_alloc_onnode(NODE_MIN_FREEMEM, nodes[i]); 363 if (p == NULL) 364 break; 365 memset(p, 0xff, NODE_MIN_FREEMEM); 366 367 j = 0; 368 while (j < NODE_MIN_FREEMEM) { 369 if (addr_on_node(p + j) != nodes[i]) 370 break; 371 j += pagesize; 372 } 373 numa_free(p, NODE_MIN_FREEMEM); 374 375 if (j >= NODE_MIN_FREEMEM) { 376 if (nodeA == -1) 377 nodeA = nodes[i]; 378 else if (nodeB == -1) 379 nodeB = nodes[i]; 380 else 381 break; 382 } 383 } 384 385 if (nodeA == -1 || nodeB == -1) 386 tst_brkm(TCONF, NULL, "at least 2 NUMA nodes with " 387 "free mem > %d are needed", NODE_MIN_FREEMEM); 388 tst_resm(TINFO, "Using nodes: %d %d", nodeA, nodeB); 389 390 ltpuser = getpwnam(nobody_uid); 391 if (ltpuser == NULL) 392 tst_brkm(TBROK | TERRNO, NULL, "getpwnam failed"); 393 394 TEST_PAUSE; 395 } 396 397 static void cleanup(void) 398 { 399 free(nodes); 400 } 401 402 #else 403 int main(void) 404 { 405 tst_brkm(TCONF, NULL, "System doesn't support __NR_migrate_pages or " 406 "libnuma or libnuma development packages are not available"); 407 } 408 #endif 409