1 #define JEMALLOC_PAGES_C_ 2 #include "jemalloc/internal/jemalloc_preamble.h" 3 4 #include "jemalloc/internal/pages.h" 5 6 #include "jemalloc/internal/jemalloc_internal_includes.h" 7 8 #include "jemalloc/internal/assert.h" 9 #include "jemalloc/internal/malloc_io.h" 10 11 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 12 #include <sys/sysctl.h> 13 #ifdef __FreeBSD__ 14 #include <vm/vm_param.h> 15 #endif 16 #endif 17 18 /******************************************************************************/ 19 /* Defines/includes needed for special android code. */ 20 21 #if defined(__ANDROID__) 22 #include <sys/prctl.h> 23 #endif 24 25 /******************************************************************************/ 26 /* Data. */ 27 28 /* Actual operating system page size, detected during bootstrap, <= PAGE. */ 29 static size_t os_page; 30 31 #ifndef _WIN32 32 # define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE) 33 # define PAGES_PROT_DECOMMIT (PROT_NONE) 34 static int mmap_flags; 35 #endif 36 static bool os_overcommits; 37 38 const char *thp_mode_names[] = { 39 "default", 40 "always", 41 "never", 42 "not supported" 43 }; 44 thp_mode_t opt_thp = THP_MODE_DEFAULT; 45 thp_mode_t init_system_thp_mode; 46 47 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */ 48 static bool pages_can_purge_lazy_runtime = true; 49 50 /******************************************************************************/ 51 /* 52 * Function prototypes for static functions that are referenced prior to 53 * definition. 54 */ 55 56 static void os_pages_unmap(void *addr, size_t size); 57 58 /******************************************************************************/ 59 60 static void * 61 os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 62 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 63 assert(ALIGNMENT_CEILING(size, os_page) == size); 64 assert(size != 0); 65 66 if (os_overcommits) { 67 *commit = true; 68 } 69 70 void *ret; 71 #ifdef _WIN32 72 /* 73 * If VirtualAlloc can't allocate at the given address when one is 74 * given, it fails and returns NULL. 75 */ 76 ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0), 77 PAGE_READWRITE); 78 #else 79 /* 80 * We don't use MAP_FIXED here, because it can cause the *replacement* 81 * of existing mappings, and we only want to create new mappings. 82 */ 83 { 84 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 85 86 ret = mmap(addr, size, prot, mmap_flags, -1, 0); 87 } 88 assert(ret != NULL); 89 90 if (ret == MAP_FAILED) { 91 ret = NULL; 92 } else if (addr != NULL && ret != addr) { 93 /* 94 * We succeeded in mapping memory, but not in the right place. 95 */ 96 os_pages_unmap(ret, size); 97 ret = NULL; 98 } 99 #endif 100 #if defined(__ANDROID__) 101 if (ret != NULL) { 102 /* Name this memory as being used by libc */ 103 prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ret, size, 104 "libc_malloc"); 105 } 106 #endif 107 assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL && 108 ret == addr)); 109 return ret; 110 } 111 112 static void * 113 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size, 114 bool *commit) { 115 void *ret = (void *)((uintptr_t)addr + leadsize); 116 117 assert(alloc_size >= leadsize + size); 118 #ifdef _WIN32 119 os_pages_unmap(addr, alloc_size); 120 void *new_addr = os_pages_map(ret, size, PAGE, commit); 121 if (new_addr == ret) { 122 return ret; 123 } 124 if (new_addr != NULL) { 125 os_pages_unmap(new_addr, size); 126 } 127 return NULL; 128 #else 129 size_t trailsize = alloc_size - leadsize - size; 130 131 if (leadsize != 0) { 132 os_pages_unmap(addr, leadsize); 133 } 134 if (trailsize != 0) { 135 os_pages_unmap((void *)((uintptr_t)ret + size), trailsize); 136 } 137 return ret; 138 #endif 139 } 140 141 static void 142 os_pages_unmap(void *addr, size_t size) { 143 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 144 assert(ALIGNMENT_CEILING(size, os_page) == size); 145 146 #ifdef _WIN32 147 if (VirtualFree(addr, 0, MEM_RELEASE) == 0) 148 #else 149 if (munmap(addr, size) == -1) 150 #endif 151 { 152 char buf[BUFERROR_BUF]; 153 154 buferror(get_errno(), buf, sizeof(buf)); 155 malloc_printf("<jemalloc>: Error in " 156 #ifdef _WIN32 157 "VirtualFree" 158 #else 159 "munmap" 160 #endif 161 "(): %s\n", buf); 162 if (opt_abort) { 163 abort(); 164 } 165 } 166 } 167 168 static void * 169 pages_map_slow(size_t size, size_t alignment, bool *commit) { 170 size_t alloc_size = size + alignment - os_page; 171 /* Beware size_t wrap-around. */ 172 if (alloc_size < size) { 173 return NULL; 174 } 175 176 void *ret; 177 do { 178 void *pages = os_pages_map(NULL, alloc_size, alignment, commit); 179 if (pages == NULL) { 180 return NULL; 181 } 182 size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) 183 - (uintptr_t)pages; 184 ret = os_pages_trim(pages, alloc_size, leadsize, size, commit); 185 } while (ret == NULL); 186 187 assert(ret != NULL); 188 assert(PAGE_ADDR2BASE(ret) == ret); 189 return ret; 190 } 191 192 void * 193 pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 194 assert(alignment >= PAGE); 195 assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr); 196 197 /* 198 * Ideally, there would be a way to specify alignment to mmap() (like 199 * NetBSD has), but in the absence of such a feature, we have to work 200 * hard to efficiently create aligned mappings. The reliable, but 201 * slow method is to create a mapping that is over-sized, then trim the 202 * excess. However, that always results in one or two calls to 203 * os_pages_unmap(), and it can leave holes in the process's virtual 204 * memory map if memory grows downward. 205 * 206 * Optimistically try mapping precisely the right amount before falling 207 * back to the slow method, with the expectation that the optimistic 208 * approach works most of the time. 209 */ 210 211 void *ret = os_pages_map(addr, size, os_page, commit); 212 if (ret == NULL || ret == addr) { 213 return ret; 214 } 215 assert(addr == NULL); 216 if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) { 217 os_pages_unmap(ret, size); 218 return pages_map_slow(size, alignment, commit); 219 } 220 221 assert(PAGE_ADDR2BASE(ret) == ret); 222 return ret; 223 } 224 225 void 226 pages_unmap(void *addr, size_t size) { 227 assert(PAGE_ADDR2BASE(addr) == addr); 228 assert(PAGE_CEILING(size) == size); 229 230 os_pages_unmap(addr, size); 231 } 232 233 static bool 234 pages_commit_impl(void *addr, size_t size, bool commit) { 235 assert(PAGE_ADDR2BASE(addr) == addr); 236 assert(PAGE_CEILING(size) == size); 237 238 if (os_overcommits) { 239 return true; 240 } 241 242 #ifdef _WIN32 243 return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT, 244 PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT))); 245 #else 246 { 247 int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 248 void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED, 249 -1, 0); 250 if (result == MAP_FAILED) { 251 return true; 252 } 253 if (result != addr) { 254 /* 255 * We succeeded in mapping memory, but not in the right 256 * place. 257 */ 258 os_pages_unmap(result, size); 259 return true; 260 } 261 return false; 262 } 263 #endif 264 } 265 266 bool 267 pages_commit(void *addr, size_t size) { 268 return pages_commit_impl(addr, size, true); 269 } 270 271 bool 272 pages_decommit(void *addr, size_t size) { 273 return pages_commit_impl(addr, size, false); 274 } 275 276 bool 277 pages_purge_lazy(void *addr, size_t size) { 278 assert(PAGE_ADDR2BASE(addr) == addr); 279 assert(PAGE_CEILING(size) == size); 280 281 if (!pages_can_purge_lazy) { 282 return true; 283 } 284 if (!pages_can_purge_lazy_runtime) { 285 /* 286 * Built with lazy purge enabled, but detected it was not 287 * supported on the current system. 288 */ 289 return true; 290 } 291 292 #ifdef _WIN32 293 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); 294 return false; 295 #elif defined(JEMALLOC_PURGE_MADVISE_FREE) 296 return (madvise(addr, size, 297 # ifdef MADV_FREE 298 MADV_FREE 299 # else 300 JEMALLOC_MADV_FREE 301 # endif 302 ) != 0); 303 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 304 !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 305 return (madvise(addr, size, MADV_DONTNEED) != 0); 306 #else 307 not_reached(); 308 #endif 309 } 310 311 bool 312 pages_purge_forced(void *addr, size_t size) { 313 assert(PAGE_ADDR2BASE(addr) == addr); 314 assert(PAGE_CEILING(size) == size); 315 316 if (!pages_can_purge_forced) { 317 return true; 318 } 319 320 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 321 defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 322 return (madvise(addr, size, MADV_DONTNEED) != 0); 323 #elif defined(JEMALLOC_MAPS_COALESCE) 324 /* Try to overlay a new demand-zeroed mapping. */ 325 return pages_commit(addr, size); 326 #else 327 not_reached(); 328 #endif 329 } 330 331 static bool 332 pages_huge_impl(void *addr, size_t size, bool aligned) { 333 if (aligned) { 334 assert(HUGEPAGE_ADDR2BASE(addr) == addr); 335 assert(HUGEPAGE_CEILING(size) == size); 336 } 337 #ifdef JEMALLOC_HAVE_MADVISE_HUGE 338 return (madvise(addr, size, MADV_HUGEPAGE) != 0); 339 #else 340 return true; 341 #endif 342 } 343 344 bool 345 pages_huge(void *addr, size_t size) { 346 return pages_huge_impl(addr, size, true); 347 } 348 349 static bool 350 pages_huge_unaligned(void *addr, size_t size) { 351 return pages_huge_impl(addr, size, false); 352 } 353 354 static bool 355 pages_nohuge_impl(void *addr, size_t size, bool aligned) { 356 if (aligned) { 357 assert(HUGEPAGE_ADDR2BASE(addr) == addr); 358 assert(HUGEPAGE_CEILING(size) == size); 359 } 360 361 #ifdef JEMALLOC_HAVE_MADVISE_HUGE 362 return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); 363 #else 364 return false; 365 #endif 366 } 367 368 bool 369 pages_nohuge(void *addr, size_t size) { 370 return pages_nohuge_impl(addr, size, true); 371 } 372 373 static bool 374 pages_nohuge_unaligned(void *addr, size_t size) { 375 return pages_nohuge_impl(addr, size, false); 376 } 377 378 bool 379 pages_dontdump(void *addr, size_t size) { 380 assert(PAGE_ADDR2BASE(addr) == addr); 381 assert(PAGE_CEILING(size) == size); 382 #ifdef JEMALLOC_MADVISE_DONTDUMP 383 return madvise(addr, size, MADV_DONTDUMP) != 0; 384 #else 385 return false; 386 #endif 387 } 388 389 bool 390 pages_dodump(void *addr, size_t size) { 391 assert(PAGE_ADDR2BASE(addr) == addr); 392 assert(PAGE_CEILING(size) == size); 393 #ifdef JEMALLOC_MADVISE_DONTDUMP 394 return madvise(addr, size, MADV_DODUMP) != 0; 395 #else 396 return false; 397 #endif 398 } 399 400 401 static size_t 402 os_page_detect(void) { 403 #ifdef _WIN32 404 SYSTEM_INFO si; 405 GetSystemInfo(&si); 406 return si.dwPageSize; 407 #elif defined(__FreeBSD__) 408 return getpagesize(); 409 #else 410 long result = sysconf(_SC_PAGESIZE); 411 if (result == -1) { 412 return LG_PAGE; 413 } 414 return (size_t)result; 415 #endif 416 } 417 418 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 419 static bool 420 os_overcommits_sysctl(void) { 421 int vm_overcommit; 422 size_t sz; 423 424 sz = sizeof(vm_overcommit); 425 #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT) 426 int mib[2]; 427 428 mib[0] = CTL_VM; 429 mib[1] = VM_OVERCOMMIT; 430 if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) { 431 return false; /* Error. */ 432 } 433 #else 434 if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) { 435 return false; /* Error. */ 436 } 437 #endif 438 439 return ((vm_overcommit & 0x3) == 0); 440 } 441 #endif 442 443 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY 444 /* 445 * Use syscall(2) rather than {open,read,close}(2) when possible to avoid 446 * reentry during bootstrapping if another library has interposed system call 447 * wrappers. 448 */ 449 static bool 450 os_overcommits_proc(void) { 451 int fd; 452 char buf[1]; 453 454 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 455 #if defined(O_CLOEXEC) 456 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY | 457 O_CLOEXEC); 458 #else 459 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); 460 if (fd != -1) { 461 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 462 } 463 #endif 464 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) 465 #if defined(O_CLOEXEC) 466 fd = (int)syscall(SYS_openat, 467 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 468 #else 469 fd = (int)syscall(SYS_openat, 470 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY); 471 if (fd != -1) { 472 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 473 } 474 #endif 475 #else 476 #if defined(O_CLOEXEC) 477 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 478 #else 479 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); 480 if (fd != -1) { 481 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 482 } 483 #endif 484 #endif 485 486 if (fd == -1) { 487 return false; /* Error. */ 488 } 489 490 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 491 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 492 syscall(SYS_close, fd); 493 #else 494 close(fd); 495 #endif 496 497 if (nread < 1) { 498 return false; /* Error. */ 499 } 500 /* 501 * /proc/sys/vm/overcommit_memory meanings: 502 * 0: Heuristic overcommit. 503 * 1: Always overcommit. 504 * 2: Never overcommit. 505 */ 506 return (buf[0] == '0' || buf[0] == '1'); 507 } 508 #endif 509 510 void 511 pages_set_thp_state (void *ptr, size_t size) { 512 if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) { 513 return; 514 } 515 assert(opt_thp != thp_mode_not_supported && 516 init_system_thp_mode != thp_mode_not_supported); 517 518 if (opt_thp == thp_mode_always 519 && init_system_thp_mode != thp_mode_never) { 520 assert(init_system_thp_mode == thp_mode_default); 521 pages_huge_unaligned(ptr, size); 522 } else if (opt_thp == thp_mode_never) { 523 assert(init_system_thp_mode == thp_mode_default || 524 init_system_thp_mode == thp_mode_always); 525 pages_nohuge_unaligned(ptr, size); 526 } 527 } 528 529 static void 530 init_thp_state(void) { 531 if (!have_madvise_huge) { 532 if (metadata_thp_enabled() && opt_abort) { 533 malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n"); 534 abort(); 535 } 536 goto label_error; 537 } 538 539 static const char sys_state_madvise[] = "always [madvise] never\n"; 540 static const char sys_state_always[] = "[always] madvise never\n"; 541 static const char sys_state_never[] = "always madvise [never]\n"; 542 char buf[sizeof(sys_state_madvise)]; 543 544 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 545 int fd = (int)syscall(SYS_open, 546 "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 547 #else 548 int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 549 #endif 550 if (fd == -1) { 551 goto label_error; 552 } 553 554 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 555 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 556 syscall(SYS_close, fd); 557 #else 558 close(fd); 559 #endif 560 561 if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) { 562 init_system_thp_mode = thp_mode_default; 563 } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) { 564 init_system_thp_mode = thp_mode_always; 565 } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) { 566 init_system_thp_mode = thp_mode_never; 567 } else { 568 goto label_error; 569 } 570 return; 571 label_error: 572 opt_thp = init_system_thp_mode = thp_mode_not_supported; 573 } 574 575 bool 576 pages_boot(void) { 577 os_page = os_page_detect(); 578 if (os_page > PAGE) { 579 malloc_write("<jemalloc>: Unsupported system page size\n"); 580 if (opt_abort) { 581 abort(); 582 } 583 return true; 584 } 585 586 #ifndef _WIN32 587 mmap_flags = MAP_PRIVATE | MAP_ANON; 588 #endif 589 590 #if defined(__ANDROID__) 591 /* Android always supports overcommits. */ 592 os_overcommits = true; 593 #else /* __ANDROID__ */ 594 595 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 596 os_overcommits = os_overcommits_sysctl(); 597 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY) 598 os_overcommits = os_overcommits_proc(); 599 # ifdef MAP_NORESERVE 600 if (os_overcommits) { 601 mmap_flags |= MAP_NORESERVE; 602 } 603 # endif 604 #else 605 os_overcommits = false; 606 #endif 607 608 #endif /* __ANDROID__ */ 609 610 init_thp_state(); 611 612 /* Detect lazy purge runtime support. */ 613 if (pages_can_purge_lazy) { 614 bool committed = false; 615 void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed); 616 if (madv_free_page == NULL) { 617 return true; 618 } 619 assert(pages_can_purge_lazy_runtime); 620 if (pages_purge_lazy(madv_free_page, PAGE)) { 621 pages_can_purge_lazy_runtime = false; 622 } 623 os_pages_unmap(madv_free_page, PAGE); 624 } 625 626 return false; 627 } 628