1 #define JEMALLOC_PAGES_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3
4 #include "jemalloc/internal/pages.h"
5
6 #include "jemalloc/internal/jemalloc_internal_includes.h"
7
8 #include "jemalloc/internal/assert.h"
9 #include "jemalloc/internal/malloc_io.h"
10
11 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
12 #include <sys/sysctl.h>
13 #ifdef __FreeBSD__
14 #include <vm/vm_param.h>
15 #endif
16 #endif
17
18 /******************************************************************************/
19 /* Defines/includes needed for special android code. */
20
21 #if defined(__ANDROID__)
22 #include <sys/prctl.h>
23 #endif
24
25 /******************************************************************************/
26 /* Data. */
27
28 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
29 static size_t os_page;
30
31 #ifndef _WIN32
32 # define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
33 # define PAGES_PROT_DECOMMIT (PROT_NONE)
34 static int mmap_flags;
35 #endif
36 static bool os_overcommits;
37
38 const char *thp_mode_names[] = {
39 "default",
40 "always",
41 "never",
42 "not supported"
43 };
44 thp_mode_t opt_thp = THP_MODE_DEFAULT;
45 thp_mode_t init_system_thp_mode;
46
47 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
48 static bool pages_can_purge_lazy_runtime = true;
49
50 /******************************************************************************/
51 /*
52 * Function prototypes for static functions that are referenced prior to
53 * definition.
54 */
55
56 static void os_pages_unmap(void *addr, size_t size);
57
58 /******************************************************************************/
59
60 static void *
os_pages_map(void * addr,size_t size,size_t alignment,bool * commit)61 os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
62 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
63 assert(ALIGNMENT_CEILING(size, os_page) == size);
64 assert(size != 0);
65
66 if (os_overcommits) {
67 *commit = true;
68 }
69
70 void *ret;
71 #ifdef _WIN32
72 /*
73 * If VirtualAlloc can't allocate at the given address when one is
74 * given, it fails and returns NULL.
75 */
76 ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
77 PAGE_READWRITE);
78 #else
79 /*
80 * We don't use MAP_FIXED here, because it can cause the *replacement*
81 * of existing mappings, and we only want to create new mappings.
82 */
83 {
84 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
85
86 ret = mmap(addr, size, prot, mmap_flags, -1, 0);
87 }
88 assert(ret != NULL);
89
90 if (ret == MAP_FAILED) {
91 ret = NULL;
92 } else if (addr != NULL && ret != addr) {
93 /*
94 * We succeeded in mapping memory, but not in the right place.
95 */
96 os_pages_unmap(ret, size);
97 ret = NULL;
98 }
99 #endif
100 #if defined(__ANDROID__)
101 if (ret != NULL) {
102 /* Name this memory as being used by libc */
103 prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ret, size,
104 "libc_malloc");
105 }
106 #endif
107 assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
108 ret == addr));
109 return ret;
110 }
111
112 static void *
os_pages_trim(void * addr,size_t alloc_size,size_t leadsize,size_t size,bool * commit)113 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
114 bool *commit) {
115 void *ret = (void *)((uintptr_t)addr + leadsize);
116
117 assert(alloc_size >= leadsize + size);
118 #ifdef _WIN32
119 os_pages_unmap(addr, alloc_size);
120 void *new_addr = os_pages_map(ret, size, PAGE, commit);
121 if (new_addr == ret) {
122 return ret;
123 }
124 if (new_addr != NULL) {
125 os_pages_unmap(new_addr, size);
126 }
127 return NULL;
128 #else
129 size_t trailsize = alloc_size - leadsize - size;
130
131 if (leadsize != 0) {
132 os_pages_unmap(addr, leadsize);
133 }
134 if (trailsize != 0) {
135 os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
136 }
137 return ret;
138 #endif
139 }
140
141 static void
os_pages_unmap(void * addr,size_t size)142 os_pages_unmap(void *addr, size_t size) {
143 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
144 assert(ALIGNMENT_CEILING(size, os_page) == size);
145
146 #ifdef _WIN32
147 if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
148 #else
149 if (munmap(addr, size) == -1)
150 #endif
151 {
152 char buf[BUFERROR_BUF];
153
154 buferror(get_errno(), buf, sizeof(buf));
155 malloc_printf("<jemalloc>: Error in "
156 #ifdef _WIN32
157 "VirtualFree"
158 #else
159 "munmap"
160 #endif
161 "(): %s\n", buf);
162 if (opt_abort) {
163 abort();
164 }
165 }
166 }
167
168 static void *
pages_map_slow(size_t size,size_t alignment,bool * commit)169 pages_map_slow(size_t size, size_t alignment, bool *commit) {
170 size_t alloc_size = size + alignment - os_page;
171 /* Beware size_t wrap-around. */
172 if (alloc_size < size) {
173 return NULL;
174 }
175
176 void *ret;
177 do {
178 void *pages = os_pages_map(NULL, alloc_size, alignment, commit);
179 if (pages == NULL) {
180 return NULL;
181 }
182 size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment)
183 - (uintptr_t)pages;
184 ret = os_pages_trim(pages, alloc_size, leadsize, size, commit);
185 } while (ret == NULL);
186
187 assert(ret != NULL);
188 assert(PAGE_ADDR2BASE(ret) == ret);
189 return ret;
190 }
191
192 void *
pages_map(void * addr,size_t size,size_t alignment,bool * commit)193 pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
194 assert(alignment >= PAGE);
195 assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
196
197 /*
198 * Ideally, there would be a way to specify alignment to mmap() (like
199 * NetBSD has), but in the absence of such a feature, we have to work
200 * hard to efficiently create aligned mappings. The reliable, but
201 * slow method is to create a mapping that is over-sized, then trim the
202 * excess. However, that always results in one or two calls to
203 * os_pages_unmap(), and it can leave holes in the process's virtual
204 * memory map if memory grows downward.
205 *
206 * Optimistically try mapping precisely the right amount before falling
207 * back to the slow method, with the expectation that the optimistic
208 * approach works most of the time.
209 */
210
211 void *ret = os_pages_map(addr, size, os_page, commit);
212 if (ret == NULL || ret == addr) {
213 return ret;
214 }
215 assert(addr == NULL);
216 if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) {
217 os_pages_unmap(ret, size);
218 return pages_map_slow(size, alignment, commit);
219 }
220
221 assert(PAGE_ADDR2BASE(ret) == ret);
222 return ret;
223 }
224
225 void
pages_unmap(void * addr,size_t size)226 pages_unmap(void *addr, size_t size) {
227 assert(PAGE_ADDR2BASE(addr) == addr);
228 assert(PAGE_CEILING(size) == size);
229
230 os_pages_unmap(addr, size);
231 }
232
233 static bool
pages_commit_impl(void * addr,size_t size,bool commit)234 pages_commit_impl(void *addr, size_t size, bool commit) {
235 assert(PAGE_ADDR2BASE(addr) == addr);
236 assert(PAGE_CEILING(size) == size);
237
238 if (os_overcommits) {
239 return true;
240 }
241
242 #ifdef _WIN32
243 return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
244 PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
245 #else
246 {
247 int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
248 void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
249 -1, 0);
250 if (result == MAP_FAILED) {
251 return true;
252 }
253 if (result != addr) {
254 /*
255 * We succeeded in mapping memory, but not in the right
256 * place.
257 */
258 os_pages_unmap(result, size);
259 return true;
260 }
261 return false;
262 }
263 #endif
264 }
265
266 bool
pages_commit(void * addr,size_t size)267 pages_commit(void *addr, size_t size) {
268 return pages_commit_impl(addr, size, true);
269 }
270
271 bool
pages_decommit(void * addr,size_t size)272 pages_decommit(void *addr, size_t size) {
273 return pages_commit_impl(addr, size, false);
274 }
275
276 bool
pages_purge_lazy(void * addr,size_t size)277 pages_purge_lazy(void *addr, size_t size) {
278 assert(PAGE_ADDR2BASE(addr) == addr);
279 assert(PAGE_CEILING(size) == size);
280
281 if (!pages_can_purge_lazy) {
282 return true;
283 }
284 if (!pages_can_purge_lazy_runtime) {
285 /*
286 * Built with lazy purge enabled, but detected it was not
287 * supported on the current system.
288 */
289 return true;
290 }
291
292 #ifdef _WIN32
293 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
294 return false;
295 #elif defined(JEMALLOC_PURGE_MADVISE_FREE)
296 return (madvise(addr, size,
297 # ifdef MADV_FREE
298 MADV_FREE
299 # else
300 JEMALLOC_MADV_FREE
301 # endif
302 ) != 0);
303 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
304 !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
305 return (madvise(addr, size, MADV_DONTNEED) != 0);
306 #else
307 not_reached();
308 #endif
309 }
310
311 bool
pages_purge_forced(void * addr,size_t size)312 pages_purge_forced(void *addr, size_t size) {
313 assert(PAGE_ADDR2BASE(addr) == addr);
314 assert(PAGE_CEILING(size) == size);
315
316 if (!pages_can_purge_forced) {
317 return true;
318 }
319
320 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
321 defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
322 return (madvise(addr, size, MADV_DONTNEED) != 0);
323 #elif defined(JEMALLOC_MAPS_COALESCE)
324 /* Try to overlay a new demand-zeroed mapping. */
325 return pages_commit(addr, size);
326 #else
327 not_reached();
328 #endif
329 }
330
331 static bool
pages_huge_impl(void * addr,size_t size,bool aligned)332 pages_huge_impl(void *addr, size_t size, bool aligned) {
333 if (aligned) {
334 assert(HUGEPAGE_ADDR2BASE(addr) == addr);
335 assert(HUGEPAGE_CEILING(size) == size);
336 }
337 #ifdef JEMALLOC_HAVE_MADVISE_HUGE
338 return (madvise(addr, size, MADV_HUGEPAGE) != 0);
339 #else
340 return true;
341 #endif
342 }
343
344 bool
pages_huge(void * addr,size_t size)345 pages_huge(void *addr, size_t size) {
346 return pages_huge_impl(addr, size, true);
347 }
348
349 static bool
pages_huge_unaligned(void * addr,size_t size)350 pages_huge_unaligned(void *addr, size_t size) {
351 return pages_huge_impl(addr, size, false);
352 }
353
354 static bool
pages_nohuge_impl(void * addr,size_t size,bool aligned)355 pages_nohuge_impl(void *addr, size_t size, bool aligned) {
356 if (aligned) {
357 assert(HUGEPAGE_ADDR2BASE(addr) == addr);
358 assert(HUGEPAGE_CEILING(size) == size);
359 }
360
361 #ifdef JEMALLOC_HAVE_MADVISE_HUGE
362 return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
363 #else
364 return false;
365 #endif
366 }
367
368 bool
pages_nohuge(void * addr,size_t size)369 pages_nohuge(void *addr, size_t size) {
370 return pages_nohuge_impl(addr, size, true);
371 }
372
373 static bool
pages_nohuge_unaligned(void * addr,size_t size)374 pages_nohuge_unaligned(void *addr, size_t size) {
375 return pages_nohuge_impl(addr, size, false);
376 }
377
378 bool
pages_dontdump(void * addr,size_t size)379 pages_dontdump(void *addr, size_t size) {
380 assert(PAGE_ADDR2BASE(addr) == addr);
381 assert(PAGE_CEILING(size) == size);
382 #ifdef JEMALLOC_MADVISE_DONTDUMP
383 return madvise(addr, size, MADV_DONTDUMP) != 0;
384 #else
385 return false;
386 #endif
387 }
388
389 bool
pages_dodump(void * addr,size_t size)390 pages_dodump(void *addr, size_t size) {
391 assert(PAGE_ADDR2BASE(addr) == addr);
392 assert(PAGE_CEILING(size) == size);
393 #ifdef JEMALLOC_MADVISE_DONTDUMP
394 return madvise(addr, size, MADV_DODUMP) != 0;
395 #else
396 return false;
397 #endif
398 }
399
400
401 static size_t
os_page_detect(void)402 os_page_detect(void) {
403 #ifdef _WIN32
404 SYSTEM_INFO si;
405 GetSystemInfo(&si);
406 return si.dwPageSize;
407 #elif defined(__FreeBSD__)
408 return getpagesize();
409 #else
410 long result = sysconf(_SC_PAGESIZE);
411 if (result == -1) {
412 return LG_PAGE;
413 }
414 return (size_t)result;
415 #endif
416 }
417
418 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
419 static bool
os_overcommits_sysctl(void)420 os_overcommits_sysctl(void) {
421 int vm_overcommit;
422 size_t sz;
423
424 sz = sizeof(vm_overcommit);
425 #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
426 int mib[2];
427
428 mib[0] = CTL_VM;
429 mib[1] = VM_OVERCOMMIT;
430 if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
431 return false; /* Error. */
432 }
433 #else
434 if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
435 return false; /* Error. */
436 }
437 #endif
438
439 return ((vm_overcommit & 0x3) == 0);
440 }
441 #endif
442
443 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
444 /*
445 * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
446 * reentry during bootstrapping if another library has interposed system call
447 * wrappers.
448 */
449 static bool
os_overcommits_proc(void)450 os_overcommits_proc(void) {
451 int fd;
452 char buf[1];
453
454 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
455 #if defined(O_CLOEXEC)
456 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
457 O_CLOEXEC);
458 #else
459 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
460 if (fd != -1) {
461 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
462 }
463 #endif
464 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
465 #if defined(O_CLOEXEC)
466 fd = (int)syscall(SYS_openat,
467 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
468 #else
469 fd = (int)syscall(SYS_openat,
470 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
471 if (fd != -1) {
472 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
473 }
474 #endif
475 #else
476 #if defined(O_CLOEXEC)
477 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
478 #else
479 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
480 if (fd != -1) {
481 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
482 }
483 #endif
484 #endif
485
486 if (fd == -1) {
487 return false; /* Error. */
488 }
489
490 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
491 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
492 syscall(SYS_close, fd);
493 #else
494 close(fd);
495 #endif
496
497 if (nread < 1) {
498 return false; /* Error. */
499 }
500 /*
501 * /proc/sys/vm/overcommit_memory meanings:
502 * 0: Heuristic overcommit.
503 * 1: Always overcommit.
504 * 2: Never overcommit.
505 */
506 return (buf[0] == '0' || buf[0] == '1');
507 }
508 #endif
509
510 void
pages_set_thp_state(void * ptr,size_t size)511 pages_set_thp_state (void *ptr, size_t size) {
512 if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
513 return;
514 }
515 assert(opt_thp != thp_mode_not_supported &&
516 init_system_thp_mode != thp_mode_not_supported);
517
518 if (opt_thp == thp_mode_always
519 && init_system_thp_mode != thp_mode_never) {
520 assert(init_system_thp_mode == thp_mode_default);
521 pages_huge_unaligned(ptr, size);
522 } else if (opt_thp == thp_mode_never) {
523 assert(init_system_thp_mode == thp_mode_default ||
524 init_system_thp_mode == thp_mode_always);
525 pages_nohuge_unaligned(ptr, size);
526 }
527 }
528
529 static void
init_thp_state(void)530 init_thp_state(void) {
531 if (!have_madvise_huge) {
532 if (metadata_thp_enabled() && opt_abort) {
533 malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
534 abort();
535 }
536 goto label_error;
537 }
538
539 static const char sys_state_madvise[] = "always [madvise] never\n";
540 static const char sys_state_always[] = "[always] madvise never\n";
541 static const char sys_state_never[] = "always madvise [never]\n";
542 char buf[sizeof(sys_state_madvise)];
543
544 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
545 int fd = (int)syscall(SYS_open,
546 "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
547 #else
548 int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
549 #endif
550 if (fd == -1) {
551 goto label_error;
552 }
553
554 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
555 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
556 syscall(SYS_close, fd);
557 #else
558 close(fd);
559 #endif
560
561 if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
562 init_system_thp_mode = thp_mode_default;
563 } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
564 init_system_thp_mode = thp_mode_always;
565 } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
566 init_system_thp_mode = thp_mode_never;
567 } else {
568 goto label_error;
569 }
570 return;
571 label_error:
572 opt_thp = init_system_thp_mode = thp_mode_not_supported;
573 }
574
575 bool
pages_boot(void)576 pages_boot(void) {
577 os_page = os_page_detect();
578 if (os_page > PAGE) {
579 malloc_write("<jemalloc>: Unsupported system page size\n");
580 if (opt_abort) {
581 abort();
582 }
583 return true;
584 }
585
586 #ifndef _WIN32
587 mmap_flags = MAP_PRIVATE | MAP_ANON;
588 #endif
589
590 #if defined(__ANDROID__)
591 /* Android always supports overcommits. */
592 os_overcommits = true;
593 #else /* __ANDROID__ */
594
595 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
596 os_overcommits = os_overcommits_sysctl();
597 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
598 os_overcommits = os_overcommits_proc();
599 # ifdef MAP_NORESERVE
600 if (os_overcommits) {
601 mmap_flags |= MAP_NORESERVE;
602 }
603 # endif
604 #else
605 os_overcommits = false;
606 #endif
607
608 #endif /* __ANDROID__ */
609
610 init_thp_state();
611
612 /* Detect lazy purge runtime support. */
613 if (pages_can_purge_lazy) {
614 bool committed = false;
615 void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
616 if (madv_free_page == NULL) {
617 return true;
618 }
619 assert(pages_can_purge_lazy_runtime);
620 if (pages_purge_lazy(madv_free_page, PAGE)) {
621 pages_can_purge_lazy_runtime = false;
622 }
623 os_pages_unmap(madv_free_page, PAGE);
624 }
625
626 return false;
627 }
628