1 #define JEMALLOC_PAGES_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 
4 #include "jemalloc/internal/pages.h"
5 
6 #include "jemalloc/internal/jemalloc_internal_includes.h"
7 
8 #include "jemalloc/internal/assert.h"
9 #include "jemalloc/internal/malloc_io.h"
10 
11 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
12 #include <sys/sysctl.h>
13 #ifdef __FreeBSD__
14 #include <vm/vm_param.h>
15 #endif
16 #endif
17 
18 /******************************************************************************/
19 /* Defines/includes needed for special android code. */
20 
21 #if defined(__ANDROID__)
22 #include <sys/prctl.h>
23 #endif
24 
25 /******************************************************************************/
26 /* Data. */
27 
28 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
29 static size_t	os_page;
30 
31 #ifndef _WIN32
32 #  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
33 #  define PAGES_PROT_DECOMMIT (PROT_NONE)
34 static int	mmap_flags;
35 #endif
36 static bool	os_overcommits;
37 
38 const char *thp_mode_names[] = {
39 	"default",
40 	"always",
41 	"never",
42 	"not supported"
43 };
44 thp_mode_t opt_thp = THP_MODE_DEFAULT;
45 thp_mode_t init_system_thp_mode;
46 
47 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
48 static bool pages_can_purge_lazy_runtime = true;
49 
50 /******************************************************************************/
51 /*
52  * Function prototypes for static functions that are referenced prior to
53  * definition.
54  */
55 
56 static void os_pages_unmap(void *addr, size_t size);
57 
58 /******************************************************************************/
59 
60 static void *
os_pages_map(void * addr,size_t size,size_t alignment,bool * commit)61 os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
62 	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
63 	assert(ALIGNMENT_CEILING(size, os_page) == size);
64 	assert(size != 0);
65 
66 	if (os_overcommits) {
67 		*commit = true;
68 	}
69 
70 	void *ret;
71 #ifdef _WIN32
72 	/*
73 	 * If VirtualAlloc can't allocate at the given address when one is
74 	 * given, it fails and returns NULL.
75 	 */
76 	ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
77 	    PAGE_READWRITE);
78 #else
79 	/*
80 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
81 	 * of existing mappings, and we only want to create new mappings.
82 	 */
83 	{
84 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
85 
86 		ret = mmap(addr, size, prot, mmap_flags, -1, 0);
87 	}
88 	assert(ret != NULL);
89 
90 	if (ret == MAP_FAILED) {
91 		ret = NULL;
92 	} else if (addr != NULL && ret != addr) {
93 		/*
94 		 * We succeeded in mapping memory, but not in the right place.
95 		 */
96 		os_pages_unmap(ret, size);
97 		ret = NULL;
98 	}
99 #endif
100 #if defined(__ANDROID__)
101 	if (ret != NULL) {
102 		/* Name this memory as being used by libc */
103 		prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ret, size,
104 		    "libc_malloc");
105 	}
106 #endif
107 	assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
108 	    ret == addr));
109 	return ret;
110 }
111 
112 static void *
os_pages_trim(void * addr,size_t alloc_size,size_t leadsize,size_t size,bool * commit)113 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
114     bool *commit) {
115 	void *ret = (void *)((uintptr_t)addr + leadsize);
116 
117 	assert(alloc_size >= leadsize + size);
118 #ifdef _WIN32
119 	os_pages_unmap(addr, alloc_size);
120 	void *new_addr = os_pages_map(ret, size, PAGE, commit);
121 	if (new_addr == ret) {
122 		return ret;
123 	}
124 	if (new_addr != NULL) {
125 		os_pages_unmap(new_addr, size);
126 	}
127 	return NULL;
128 #else
129 	size_t trailsize = alloc_size - leadsize - size;
130 
131 	if (leadsize != 0) {
132 		os_pages_unmap(addr, leadsize);
133 	}
134 	if (trailsize != 0) {
135 		os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
136 	}
137 	return ret;
138 #endif
139 }
140 
141 static void
os_pages_unmap(void * addr,size_t size)142 os_pages_unmap(void *addr, size_t size) {
143 	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
144 	assert(ALIGNMENT_CEILING(size, os_page) == size);
145 
146 #ifdef _WIN32
147 	if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
148 #else
149 	if (munmap(addr, size) == -1)
150 #endif
151 	{
152 		char buf[BUFERROR_BUF];
153 
154 		buferror(get_errno(), buf, sizeof(buf));
155 		malloc_printf("<jemalloc>: Error in "
156 #ifdef _WIN32
157 		    "VirtualFree"
158 #else
159 		    "munmap"
160 #endif
161 		    "(): %s\n", buf);
162 		if (opt_abort) {
163 			abort();
164 		}
165 	}
166 }
167 
168 static void *
pages_map_slow(size_t size,size_t alignment,bool * commit)169 pages_map_slow(size_t size, size_t alignment, bool *commit) {
170 	size_t alloc_size = size + alignment - os_page;
171 	/* Beware size_t wrap-around. */
172 	if (alloc_size < size) {
173 		return NULL;
174 	}
175 
176 	void *ret;
177 	do {
178 		void *pages = os_pages_map(NULL, alloc_size, alignment, commit);
179 		if (pages == NULL) {
180 			return NULL;
181 		}
182 		size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment)
183 		    - (uintptr_t)pages;
184 		ret = os_pages_trim(pages, alloc_size, leadsize, size, commit);
185 	} while (ret == NULL);
186 
187 	assert(ret != NULL);
188 	assert(PAGE_ADDR2BASE(ret) == ret);
189 	return ret;
190 }
191 
192 void *
pages_map(void * addr,size_t size,size_t alignment,bool * commit)193 pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
194 	assert(alignment >= PAGE);
195 	assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
196 
197 	/*
198 	 * Ideally, there would be a way to specify alignment to mmap() (like
199 	 * NetBSD has), but in the absence of such a feature, we have to work
200 	 * hard to efficiently create aligned mappings.  The reliable, but
201 	 * slow method is to create a mapping that is over-sized, then trim the
202 	 * excess.  However, that always results in one or two calls to
203 	 * os_pages_unmap(), and it can leave holes in the process's virtual
204 	 * memory map if memory grows downward.
205 	 *
206 	 * Optimistically try mapping precisely the right amount before falling
207 	 * back to the slow method, with the expectation that the optimistic
208 	 * approach works most of the time.
209 	 */
210 
211 	void *ret = os_pages_map(addr, size, os_page, commit);
212 	if (ret == NULL || ret == addr) {
213 		return ret;
214 	}
215 	assert(addr == NULL);
216 	if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) {
217 		os_pages_unmap(ret, size);
218 		return pages_map_slow(size, alignment, commit);
219 	}
220 
221 	assert(PAGE_ADDR2BASE(ret) == ret);
222 	return ret;
223 }
224 
225 void
pages_unmap(void * addr,size_t size)226 pages_unmap(void *addr, size_t size) {
227 	assert(PAGE_ADDR2BASE(addr) == addr);
228 	assert(PAGE_CEILING(size) == size);
229 
230 	os_pages_unmap(addr, size);
231 }
232 
233 static bool
pages_commit_impl(void * addr,size_t size,bool commit)234 pages_commit_impl(void *addr, size_t size, bool commit) {
235 	assert(PAGE_ADDR2BASE(addr) == addr);
236 	assert(PAGE_CEILING(size) == size);
237 
238 	if (os_overcommits) {
239 		return true;
240 	}
241 
242 #ifdef _WIN32
243 	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
244 	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
245 #else
246 	{
247 		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
248 		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
249 		    -1, 0);
250 		if (result == MAP_FAILED) {
251 			return true;
252 		}
253 		if (result != addr) {
254 			/*
255 			 * We succeeded in mapping memory, but not in the right
256 			 * place.
257 			 */
258 			os_pages_unmap(result, size);
259 			return true;
260 		}
261 		return false;
262 	}
263 #endif
264 }
265 
266 bool
pages_commit(void * addr,size_t size)267 pages_commit(void *addr, size_t size) {
268 	return pages_commit_impl(addr, size, true);
269 }
270 
271 bool
pages_decommit(void * addr,size_t size)272 pages_decommit(void *addr, size_t size) {
273 	return pages_commit_impl(addr, size, false);
274 }
275 
276 bool
pages_purge_lazy(void * addr,size_t size)277 pages_purge_lazy(void *addr, size_t size) {
278 	assert(PAGE_ADDR2BASE(addr) == addr);
279 	assert(PAGE_CEILING(size) == size);
280 
281 	if (!pages_can_purge_lazy) {
282 		return true;
283 	}
284 	if (!pages_can_purge_lazy_runtime) {
285 		/*
286 		 * Built with lazy purge enabled, but detected it was not
287 		 * supported on the current system.
288 		 */
289 		return true;
290 	}
291 
292 #ifdef _WIN32
293 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
294 	return false;
295 #elif defined(JEMALLOC_PURGE_MADVISE_FREE)
296 	return (madvise(addr, size,
297 #  ifdef MADV_FREE
298 	    MADV_FREE
299 #  else
300 	    JEMALLOC_MADV_FREE
301 #  endif
302 	    ) != 0);
303 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
304     !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
305 	return (madvise(addr, size, MADV_DONTNEED) != 0);
306 #else
307 	not_reached();
308 #endif
309 }
310 
311 bool
pages_purge_forced(void * addr,size_t size)312 pages_purge_forced(void *addr, size_t size) {
313 	assert(PAGE_ADDR2BASE(addr) == addr);
314 	assert(PAGE_CEILING(size) == size);
315 
316 	if (!pages_can_purge_forced) {
317 		return true;
318 	}
319 
320 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
321     defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
322 	return (madvise(addr, size, MADV_DONTNEED) != 0);
323 #elif defined(JEMALLOC_MAPS_COALESCE)
324 	/* Try to overlay a new demand-zeroed mapping. */
325 	return pages_commit(addr, size);
326 #else
327 	not_reached();
328 #endif
329 }
330 
331 static bool
pages_huge_impl(void * addr,size_t size,bool aligned)332 pages_huge_impl(void *addr, size_t size, bool aligned) {
333 	if (aligned) {
334 		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
335 		assert(HUGEPAGE_CEILING(size) == size);
336 	}
337 #ifdef JEMALLOC_HAVE_MADVISE_HUGE
338 	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
339 #else
340 	return true;
341 #endif
342 }
343 
344 bool
pages_huge(void * addr,size_t size)345 pages_huge(void *addr, size_t size) {
346 	return pages_huge_impl(addr, size, true);
347 }
348 
349 static bool
pages_huge_unaligned(void * addr,size_t size)350 pages_huge_unaligned(void *addr, size_t size) {
351 	return pages_huge_impl(addr, size, false);
352 }
353 
354 static bool
pages_nohuge_impl(void * addr,size_t size,bool aligned)355 pages_nohuge_impl(void *addr, size_t size, bool aligned) {
356 	if (aligned) {
357 		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
358 		assert(HUGEPAGE_CEILING(size) == size);
359 	}
360 
361 #ifdef JEMALLOC_HAVE_MADVISE_HUGE
362 	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
363 #else
364 	return false;
365 #endif
366 }
367 
368 bool
pages_nohuge(void * addr,size_t size)369 pages_nohuge(void *addr, size_t size) {
370 	return pages_nohuge_impl(addr, size, true);
371 }
372 
373 static bool
pages_nohuge_unaligned(void * addr,size_t size)374 pages_nohuge_unaligned(void *addr, size_t size) {
375 	return pages_nohuge_impl(addr, size, false);
376 }
377 
378 bool
pages_dontdump(void * addr,size_t size)379 pages_dontdump(void *addr, size_t size) {
380 	assert(PAGE_ADDR2BASE(addr) == addr);
381 	assert(PAGE_CEILING(size) == size);
382 #ifdef JEMALLOC_MADVISE_DONTDUMP
383 	return madvise(addr, size, MADV_DONTDUMP) != 0;
384 #else
385 	return false;
386 #endif
387 }
388 
389 bool
pages_dodump(void * addr,size_t size)390 pages_dodump(void *addr, size_t size) {
391 	assert(PAGE_ADDR2BASE(addr) == addr);
392 	assert(PAGE_CEILING(size) == size);
393 #ifdef JEMALLOC_MADVISE_DONTDUMP
394 	return madvise(addr, size, MADV_DODUMP) != 0;
395 #else
396 	return false;
397 #endif
398 }
399 
400 
401 static size_t
os_page_detect(void)402 os_page_detect(void) {
403 #ifdef _WIN32
404 	SYSTEM_INFO si;
405 	GetSystemInfo(&si);
406 	return si.dwPageSize;
407 #elif defined(__FreeBSD__)
408 	return getpagesize();
409 #else
410 	long result = sysconf(_SC_PAGESIZE);
411 	if (result == -1) {
412 		return LG_PAGE;
413 	}
414 	return (size_t)result;
415 #endif
416 }
417 
418 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
419 static bool
os_overcommits_sysctl(void)420 os_overcommits_sysctl(void) {
421 	int vm_overcommit;
422 	size_t sz;
423 
424 	sz = sizeof(vm_overcommit);
425 #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
426 	int mib[2];
427 
428 	mib[0] = CTL_VM;
429 	mib[1] = VM_OVERCOMMIT;
430 	if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
431 		return false; /* Error. */
432 	}
433 #else
434 	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
435 		return false; /* Error. */
436 	}
437 #endif
438 
439 	return ((vm_overcommit & 0x3) == 0);
440 }
441 #endif
442 
443 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
444 /*
445  * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
446  * reentry during bootstrapping if another library has interposed system call
447  * wrappers.
448  */
449 static bool
os_overcommits_proc(void)450 os_overcommits_proc(void) {
451 	int fd;
452 	char buf[1];
453 
454 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
455 	#if defined(O_CLOEXEC)
456 		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
457 			O_CLOEXEC);
458 	#else
459 		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
460 		if (fd != -1) {
461 			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
462 		}
463 	#endif
464 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
465 	#if defined(O_CLOEXEC)
466 		fd = (int)syscall(SYS_openat,
467 			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
468 	#else
469 		fd = (int)syscall(SYS_openat,
470 			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
471 		if (fd != -1) {
472 			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
473 		}
474 	#endif
475 #else
476 	#if defined(O_CLOEXEC)
477 		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
478 	#else
479 		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
480 		if (fd != -1) {
481 			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
482 		}
483 	#endif
484 #endif
485 
486 	if (fd == -1) {
487 		return false; /* Error. */
488 	}
489 
490 	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
491 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
492 	syscall(SYS_close, fd);
493 #else
494 	close(fd);
495 #endif
496 
497 	if (nread < 1) {
498 		return false; /* Error. */
499 	}
500 	/*
501 	 * /proc/sys/vm/overcommit_memory meanings:
502 	 * 0: Heuristic overcommit.
503 	 * 1: Always overcommit.
504 	 * 2: Never overcommit.
505 	 */
506 	return (buf[0] == '0' || buf[0] == '1');
507 }
508 #endif
509 
510 void
pages_set_thp_state(void * ptr,size_t size)511 pages_set_thp_state (void *ptr, size_t size) {
512 	if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
513 		return;
514 	}
515 	assert(opt_thp != thp_mode_not_supported &&
516 	    init_system_thp_mode != thp_mode_not_supported);
517 
518 	if (opt_thp == thp_mode_always
519 	    && init_system_thp_mode != thp_mode_never) {
520 		assert(init_system_thp_mode == thp_mode_default);
521 		pages_huge_unaligned(ptr, size);
522 	} else if (opt_thp == thp_mode_never) {
523 		assert(init_system_thp_mode == thp_mode_default ||
524 		    init_system_thp_mode == thp_mode_always);
525 		pages_nohuge_unaligned(ptr, size);
526 	}
527 }
528 
529 static void
init_thp_state(void)530 init_thp_state(void) {
531 	if (!have_madvise_huge) {
532 		if (metadata_thp_enabled() && opt_abort) {
533 			malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
534 			abort();
535 		}
536 		goto label_error;
537 	}
538 
539 	static const char sys_state_madvise[] = "always [madvise] never\n";
540 	static const char sys_state_always[] = "[always] madvise never\n";
541 	static const char sys_state_never[] = "always madvise [never]\n";
542 	char buf[sizeof(sys_state_madvise)];
543 
544 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
545 	int fd = (int)syscall(SYS_open,
546 	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
547 #else
548 	int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
549 #endif
550 	if (fd == -1) {
551 		goto label_error;
552 	}
553 
554 	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
555 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
556 	syscall(SYS_close, fd);
557 #else
558 	close(fd);
559 #endif
560 
561 	if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
562 		init_system_thp_mode = thp_mode_default;
563 	} else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
564 		init_system_thp_mode = thp_mode_always;
565 	} else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
566 		init_system_thp_mode = thp_mode_never;
567 	} else {
568 		goto label_error;
569 	}
570 	return;
571 label_error:
572 	opt_thp = init_system_thp_mode = thp_mode_not_supported;
573 }
574 
575 bool
pages_boot(void)576 pages_boot(void) {
577 	os_page = os_page_detect();
578 	if (os_page > PAGE) {
579 		malloc_write("<jemalloc>: Unsupported system page size\n");
580 		if (opt_abort) {
581 			abort();
582 		}
583 		return true;
584 	}
585 
586 #ifndef _WIN32
587 	mmap_flags = MAP_PRIVATE | MAP_ANON;
588 #endif
589 
590 #if defined(__ANDROID__)
591   /* Android always supports overcommits. */
592   os_overcommits = true;
593 #else  /* __ANDROID__ */
594 
595 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
596 	os_overcommits = os_overcommits_sysctl();
597 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
598 	os_overcommits = os_overcommits_proc();
599 #  ifdef MAP_NORESERVE
600 	if (os_overcommits) {
601 		mmap_flags |= MAP_NORESERVE;
602 	}
603 #  endif
604 #else
605 	os_overcommits = false;
606 #endif
607 
608 #endif  /* __ANDROID__ */
609 
610 	init_thp_state();
611 
612 	/* Detect lazy purge runtime support. */
613 	if (pages_can_purge_lazy) {
614 		bool committed = false;
615 		void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
616 		if (madv_free_page == NULL) {
617 			return true;
618 		}
619 		assert(pages_can_purge_lazy_runtime);
620 		if (pages_purge_lazy(madv_free_page, PAGE)) {
621 			pages_can_purge_lazy_runtime = false;
622 		}
623 		os_pages_unmap(madv_free_page, PAGE);
624 	}
625 
626 	return false;
627 }
628