1 #define JEMALLOC_TSD_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4 
5 #include "jemalloc/internal/assert.h"
6 #include "jemalloc/internal/mutex.h"
7 #include "jemalloc/internal/rtree.h"
8 
9 /******************************************************************************/
10 /* Data. */
11 
12 static unsigned ncleanups;
13 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
14 
15 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
16 __thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
17 __thread bool JEMALLOC_TLS_MODEL tsd_initialized = false;
18 bool tsd_booted = false;
19 #elif (defined(JEMALLOC_TLS))
20 __thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
21 pthread_key_t tsd_tsd;
22 bool tsd_booted = false;
23 #elif (defined(_WIN32))
24 DWORD tsd_tsd;
25 tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
26 bool tsd_booted = false;
27 #else
28 
29 /*
30  * This contains a mutex, but it's pretty convenient to allow the mutex code to
31  * have a dependency on tsd.  So we define the struct here, and only refer to it
32  * by pointer in the header.
33  */
34 struct tsd_init_head_s {
35 	ql_head(tsd_init_block_t) blocks;
36 	malloc_mutex_t lock;
37 };
38 
39 pthread_key_t tsd_tsd;
40 tsd_init_head_t	tsd_init_head = {
41 	ql_head_initializer(blocks),
42 	MALLOC_MUTEX_INITIALIZER
43 };
44 tsd_wrapper_t tsd_boot_wrapper = {
45 	false,
46 	TSD_INITIALIZER
47 };
48 bool tsd_booted = false;
49 #endif
50 
51 
52 /******************************************************************************/
53 
54 void
tsd_slow_update(tsd_t * tsd)55 tsd_slow_update(tsd_t *tsd) {
56 	if (tsd_nominal(tsd)) {
57 		if (malloc_slow || !tsd_tcache_enabled_get(tsd) ||
58 		    tsd_reentrancy_level_get(tsd) > 0) {
59 			tsd->state = tsd_state_nominal_slow;
60 		} else {
61 			tsd->state = tsd_state_nominal;
62 		}
63 	}
64 }
65 
66 static bool
tsd_data_init(tsd_t * tsd)67 tsd_data_init(tsd_t *tsd) {
68 	/*
69 	 * We initialize the rtree context first (before the tcache), since the
70 	 * tcache initialization depends on it.
71 	 */
72 	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
73 
74 	/*
75 	 * A nondeterministic seed based on the address of tsd reduces
76 	 * the likelihood of lockstep non-uniform cache index
77 	 * utilization among identical concurrent processes, but at the
78 	 * cost of test repeatability.  For debug builds, instead use a
79 	 * deterministic seed.
80 	 */
81 	*tsd_offset_statep_get(tsd) = config_debug ? 0 :
82 	    (uint64_t)(uintptr_t)tsd;
83 
84 	return tsd_tcache_enabled_data_init(tsd);
85 }
86 
87 static void
assert_tsd_data_cleanup_done(tsd_t * tsd)88 assert_tsd_data_cleanup_done(tsd_t *tsd) {
89 	assert(!tsd_nominal(tsd));
90 	assert(*tsd_arenap_get_unsafe(tsd) == NULL);
91 	assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
92 	assert(*tsd_arenas_tdata_bypassp_get_unsafe(tsd) == true);
93 	assert(*tsd_arenas_tdatap_get_unsafe(tsd) == NULL);
94 	assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
95 	assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
96 }
97 
98 static bool
tsd_data_init_nocleanup(tsd_t * tsd)99 tsd_data_init_nocleanup(tsd_t *tsd) {
100 	assert(tsd->state == tsd_state_reincarnated ||
101 	    tsd->state == tsd_state_minimal_initialized);
102 	/*
103 	 * During reincarnation, there is no guarantee that the cleanup function
104 	 * will be called (deallocation may happen after all tsd destructors).
105 	 * We set up tsd in a way that no cleanup is needed.
106 	 */
107 	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
108 	*tsd_arenas_tdata_bypassp_get(tsd) = true;
109 	*tsd_tcache_enabledp_get_unsafe(tsd) = false;
110 	*tsd_reentrancy_levelp_get(tsd) = 1;
111 	assert_tsd_data_cleanup_done(tsd);
112 
113 	return false;
114 }
115 
116 tsd_t *
tsd_fetch_slow(tsd_t * tsd,bool minimal)117 tsd_fetch_slow(tsd_t *tsd, bool minimal) {
118 	assert(!tsd_fast(tsd));
119 
120 	if (tsd->state == tsd_state_nominal_slow) {
121 		/* On slow path but no work needed. */
122 		assert(malloc_slow || !tsd_tcache_enabled_get(tsd) ||
123 		    tsd_reentrancy_level_get(tsd) > 0 ||
124 		    *tsd_arenas_tdata_bypassp_get(tsd));
125 	} else if (tsd->state == tsd_state_uninitialized) {
126 		if (!minimal) {
127 			tsd->state = tsd_state_nominal;
128 			tsd_slow_update(tsd);
129 			/* Trigger cleanup handler registration. */
130 			tsd_set(tsd);
131 			tsd_data_init(tsd);
132 		} else {
133 			tsd->state = tsd_state_minimal_initialized;
134 			tsd_set(tsd);
135 			tsd_data_init_nocleanup(tsd);
136 		}
137 	} else if (tsd->state == tsd_state_minimal_initialized) {
138 		if (!minimal) {
139 			/* Switch to fully initialized. */
140 			tsd->state = tsd_state_nominal;
141 			assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
142 			(*tsd_reentrancy_levelp_get(tsd))--;
143 			tsd_slow_update(tsd);
144 			tsd_data_init(tsd);
145 		} else {
146 			assert_tsd_data_cleanup_done(tsd);
147 		}
148 	} else if (tsd->state == tsd_state_purgatory) {
149 		tsd->state = tsd_state_reincarnated;
150 		tsd_set(tsd);
151 		tsd_data_init_nocleanup(tsd);
152 	} else {
153 		assert(tsd->state == tsd_state_reincarnated);
154 	}
155 
156 	return tsd;
157 }
158 
159 void *
malloc_tsd_malloc(size_t size)160 malloc_tsd_malloc(size_t size) {
161 	return a0malloc(CACHELINE_CEILING(size));
162 }
163 
164 void
malloc_tsd_dalloc(void * wrapper)165 malloc_tsd_dalloc(void *wrapper) {
166 	a0dalloc(wrapper);
167 }
168 
169 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
170 #ifndef _WIN32
171 JEMALLOC_EXPORT
172 #endif
173 void
_malloc_thread_cleanup(void)174 _malloc_thread_cleanup(void) {
175 	bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
176 	unsigned i;
177 
178 	for (i = 0; i < ncleanups; i++) {
179 		pending[i] = true;
180 	}
181 
182 	do {
183 		again = false;
184 		for (i = 0; i < ncleanups; i++) {
185 			if (pending[i]) {
186 				pending[i] = cleanups[i]();
187 				if (pending[i]) {
188 					again = true;
189 				}
190 			}
191 		}
192 	} while (again);
193 }
194 #endif
195 
196 void
malloc_tsd_cleanup_register(bool (* f)(void))197 malloc_tsd_cleanup_register(bool (*f)(void)) {
198 	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
199 	cleanups[ncleanups] = f;
200 	ncleanups++;
201 }
202 
203 static void
tsd_do_data_cleanup(tsd_t * tsd)204 tsd_do_data_cleanup(tsd_t *tsd) {
205 	prof_tdata_cleanup(tsd);
206 	iarena_cleanup(tsd);
207 	arena_cleanup(tsd);
208 	arenas_tdata_cleanup(tsd);
209 	tcache_cleanup(tsd);
210 	witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
211 }
212 
213 void
tsd_cleanup(void * arg)214 tsd_cleanup(void *arg) {
215 	tsd_t *tsd = (tsd_t *)arg;
216 
217 	switch (tsd->state) {
218 	case tsd_state_uninitialized:
219 		/* Do nothing. */
220 		break;
221 	case tsd_state_minimal_initialized:
222 		/* This implies the thread only did free() in its life time. */
223 		/* Fall through. */
224 	case tsd_state_reincarnated:
225 		/*
226 		 * Reincarnated means another destructor deallocated memory
227 		 * after the destructor was called.  Cleanup isn't required but
228 		 * is still called for testing and completeness.
229 		 */
230 		assert_tsd_data_cleanup_done(tsd);
231 		/* Fall through. */
232 	case tsd_state_nominal:
233 	case tsd_state_nominal_slow:
234 		tsd_do_data_cleanup(tsd);
235 		tsd->state = tsd_state_purgatory;
236 		tsd_set(tsd);
237 		break;
238 	case tsd_state_purgatory:
239 		/*
240 		 * The previous time this destructor was called, we set the
241 		 * state to tsd_state_purgatory so that other destructors
242 		 * wouldn't cause re-creation of the tsd.  This time, do
243 		 * nothing, and do not request another callback.
244 		 */
245 		break;
246 	default:
247 		not_reached();
248 	}
249 #ifdef JEMALLOC_JET
250 	test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
251 	int *data = tsd_test_datap_get_unsafe(tsd);
252 	if (test_callback != NULL) {
253 		test_callback(data);
254 	}
255 #endif
256 }
257 
258 tsd_t *
malloc_tsd_boot0(void)259 malloc_tsd_boot0(void) {
260 	tsd_t *tsd;
261 
262 	ncleanups = 0;
263 	if (tsd_boot0()) {
264 		return NULL;
265 	}
266 	tsd = tsd_fetch();
267 	*tsd_arenas_tdata_bypassp_get(tsd) = true;
268 	return tsd;
269 }
270 
271 void
malloc_tsd_boot1(void)272 malloc_tsd_boot1(void) {
273 	tsd_boot1();
274 	tsd_t *tsd = tsd_fetch();
275 	/* malloc_slow has been set properly.  Update tsd_slow. */
276 	tsd_slow_update(tsd);
277 	*tsd_arenas_tdata_bypassp_get(tsd) = false;
278 }
279 
280 #ifdef _WIN32
281 static BOOL WINAPI
_tls_callback(HINSTANCE hinstDLL,DWORD fdwReason,LPVOID lpvReserved)282 _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
283 	switch (fdwReason) {
284 #ifdef JEMALLOC_LAZY_LOCK
285 	case DLL_THREAD_ATTACH:
286 		isthreaded = true;
287 		break;
288 #endif
289 	case DLL_THREAD_DETACH:
290 		_malloc_thread_cleanup();
291 		break;
292 	default:
293 		break;
294 	}
295 	return true;
296 }
297 
298 /*
299  * We need to be able to say "read" here (in the "pragma section"), but have
300  * hooked "read". We won't read for the rest of the file, so we can get away
301  * with unhooking.
302  */
303 #ifdef read
304 #  undef read
305 #endif
306 
307 #ifdef _MSC_VER
308 #  ifdef _M_IX86
309 #    pragma comment(linker, "/INCLUDE:__tls_used")
310 #    pragma comment(linker, "/INCLUDE:_tls_callback")
311 #  else
312 #    pragma comment(linker, "/INCLUDE:_tls_used")
313 #    pragma comment(linker, "/INCLUDE:tls_callback")
314 #  endif
315 #  pragma section(".CRT$XLY",long,read)
316 #endif
317 JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
318 BOOL	(WINAPI *const tls_callback)(HINSTANCE hinstDLL,
319     DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
320 #endif
321 
322 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
323     !defined(_WIN32))
324 void *
tsd_init_check_recursion(tsd_init_head_t * head,tsd_init_block_t * block)325 tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
326 	pthread_t self = pthread_self();
327 	tsd_init_block_t *iter;
328 
329 	/* Check whether this thread has already inserted into the list. */
330 	malloc_mutex_lock(TSDN_NULL, &head->lock);
331 	ql_foreach(iter, &head->blocks, link) {
332 		if (iter->thread == self) {
333 			malloc_mutex_unlock(TSDN_NULL, &head->lock);
334 			return iter->data;
335 		}
336 	}
337 	/* Insert block into list. */
338 	ql_elm_new(block, link);
339 	block->thread = self;
340 	ql_tail_insert(&head->blocks, block, link);
341 	malloc_mutex_unlock(TSDN_NULL, &head->lock);
342 	return NULL;
343 }
344 
345 void
tsd_init_finish(tsd_init_head_t * head,tsd_init_block_t * block)346 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) {
347 	malloc_mutex_lock(TSDN_NULL, &head->lock);
348 	ql_remove(&head->blocks, block, link);
349 	malloc_mutex_unlock(TSDN_NULL, &head->lock);
350 }
351 #endif
352