1 /******************************************************************************/
2 #ifdef JEMALLOC_H_TYPES
3
4 typedef struct prof_bt_s prof_bt_t;
5 typedef struct prof_cnt_s prof_cnt_t;
6 typedef struct prof_tctx_s prof_tctx_t;
7 typedef struct prof_gctx_s prof_gctx_t;
8 typedef struct prof_tdata_s prof_tdata_t;
9
10 /* Option defaults. */
11 #ifdef JEMALLOC_PROF
12 # define PROF_PREFIX_DEFAULT "jeprof"
13 #else
14 # define PROF_PREFIX_DEFAULT ""
15 #endif
16 #define LG_PROF_SAMPLE_DEFAULT 19
17 #define LG_PROF_INTERVAL_DEFAULT -1
18
19 /*
20 * Hard limit on stack backtrace depth. The version of prof_backtrace() that
21 * is based on __builtin_return_address() necessarily has a hard-coded number
22 * of backtrace frame handlers, and should be kept in sync with this setting.
23 */
24 #define PROF_BT_MAX 128
25
26 /* Initial hash table size. */
27 #define PROF_CKH_MINITEMS 64
28
29 /* Size of memory buffer to use when writing dump files. */
30 #define PROF_DUMP_BUFSIZE 65536
31
32 /* Size of stack-allocated buffer used by prof_printf(). */
33 #define PROF_PRINTF_BUFSIZE 128
34
35 /*
36 * Number of mutexes shared among all gctx's. No space is allocated for these
37 * unless profiling is enabled, so it's okay to over-provision.
38 */
39 #define PROF_NCTX_LOCKS 1024
40
41 /*
42 * Number of mutexes shared among all tdata's. No space is allocated for these
43 * unless profiling is enabled, so it's okay to over-provision.
44 */
45 #define PROF_NTDATA_LOCKS 256
46
47 /*
48 * prof_tdata pointers close to NULL are used to encode state information that
49 * is used for cleaning up during thread shutdown.
50 */
51 #define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1)
52 #define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2)
53 #define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY
54
55 #endif /* JEMALLOC_H_TYPES */
56 /******************************************************************************/
57 #ifdef JEMALLOC_H_STRUCTS
58
59 struct prof_bt_s {
60 /* Backtrace, stored as len program counters. */
61 void **vec;
62 unsigned len;
63 };
64
65 #ifdef JEMALLOC_PROF_LIBGCC
66 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
67 typedef struct {
68 prof_bt_t *bt;
69 unsigned max;
70 } prof_unwind_data_t;
71 #endif
72
73 struct prof_cnt_s {
74 /* Profiling counters. */
75 uint64_t curobjs;
76 uint64_t curbytes;
77 uint64_t accumobjs;
78 uint64_t accumbytes;
79 };
80
81 typedef enum {
82 prof_tctx_state_initializing,
83 prof_tctx_state_nominal,
84 prof_tctx_state_dumping,
85 prof_tctx_state_purgatory /* Dumper must finish destroying. */
86 } prof_tctx_state_t;
87
88 struct prof_tctx_s {
89 /* Thread data for thread that performed the allocation. */
90 prof_tdata_t *tdata;
91
92 /*
93 * Copy of tdata->thr_uid, necessary because tdata may be defunct during
94 * teardown.
95 */
96 uint64_t thr_uid;
97
98 /* Profiling counters, protected by tdata->lock. */
99 prof_cnt_t cnts;
100
101 /* Associated global context. */
102 prof_gctx_t *gctx;
103
104 /*
105 * UID that distinguishes multiple tctx's created by the same thread,
106 * but coexisting in gctx->tctxs. There are two ways that such
107 * coexistence can occur:
108 * - A dumper thread can cause a tctx to be retained in the purgatory
109 * state.
110 * - Although a single "producer" thread must create all tctx's which
111 * share the same thr_uid, multiple "consumers" can each concurrently
112 * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only
113 * gets called once each time cnts.cur{objs,bytes} drop to 0, but this
114 * threshold can be hit again before the first consumer finishes
115 * executing prof_tctx_destroy().
116 */
117 uint64_t tctx_uid;
118
119 /* Linkage into gctx's tctxs. */
120 rb_node(prof_tctx_t) tctx_link;
121
122 /*
123 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
124 * sample vs destroy race.
125 */
126 bool prepared;
127
128 /* Current dump-related state, protected by gctx->lock. */
129 prof_tctx_state_t state;
130
131 /*
132 * Copy of cnts snapshotted during early dump phase, protected by
133 * dump_mtx.
134 */
135 prof_cnt_t dump_cnts;
136 };
137 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
138
139 struct prof_gctx_s {
140 /* Protects nlimbo, cnt_summed, and tctxs. */
141 malloc_mutex_t *lock;
142
143 /*
144 * Number of threads that currently cause this gctx to be in a state of
145 * limbo due to one of:
146 * - Initializing this gctx.
147 * - Initializing per thread counters associated with this gctx.
148 * - Preparing to destroy this gctx.
149 * - Dumping a heap profile that includes this gctx.
150 * nlimbo must be 1 (single destroyer) in order to safely destroy the
151 * gctx.
152 */
153 unsigned nlimbo;
154
155 /*
156 * Tree of profile counters, one for each thread that has allocated in
157 * this context.
158 */
159 prof_tctx_tree_t tctxs;
160
161 /* Linkage for tree of contexts to be dumped. */
162 rb_node(prof_gctx_t) dump_link;
163
164 /* Temporary storage for summation during dump. */
165 prof_cnt_t cnt_summed;
166
167 /* Associated backtrace. */
168 prof_bt_t bt;
169
170 /* Backtrace vector, variable size, referred to by bt. */
171 void *vec[1];
172 };
173 typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
174
175 struct prof_tdata_s {
176 malloc_mutex_t *lock;
177
178 /* Monotonically increasing unique thread identifier. */
179 uint64_t thr_uid;
180
181 /*
182 * Monotonically increasing discriminator among tdata structures
183 * associated with the same thr_uid.
184 */
185 uint64_t thr_discrim;
186
187 /* Included in heap profile dumps if non-NULL. */
188 char *thread_name;
189
190 bool attached;
191 bool expired;
192
193 rb_node(prof_tdata_t) tdata_link;
194
195 /*
196 * Counter used to initialize prof_tctx_t's tctx_uid. No locking is
197 * necessary when incrementing this field, because only one thread ever
198 * does so.
199 */
200 uint64_t tctx_uid_next;
201
202 /*
203 * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks
204 * backtraces for which it has non-zero allocation/deallocation counters
205 * associated with thread-specific prof_tctx_t objects. Other threads
206 * may write to prof_tctx_t contents when freeing associated objects.
207 */
208 ckh_t bt2tctx;
209
210 /* Sampling state. */
211 uint64_t prng_state;
212 uint64_t bytes_until_sample;
213
214 /* State used to avoid dumping while operating on prof internals. */
215 bool enq;
216 bool enq_idump;
217 bool enq_gdump;
218
219 /*
220 * Set to true during an early dump phase for tdata's which are
221 * currently being dumped. New threads' tdata's have this initialized
222 * to false so that they aren't accidentally included in later dump
223 * phases.
224 */
225 bool dumping;
226
227 /*
228 * True if profiling is active for this tdata's thread
229 * (thread.prof.active mallctl).
230 */
231 bool active;
232
233 /* Temporary storage for summation during dump. */
234 prof_cnt_t cnt_summed;
235
236 /* Backtrace vector, used for calls to prof_backtrace(). */
237 void *vec[PROF_BT_MAX];
238 };
239 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
240
241 #endif /* JEMALLOC_H_STRUCTS */
242 /******************************************************************************/
243 #ifdef JEMALLOC_H_EXTERNS
244
245 extern bool opt_prof;
246 extern bool opt_prof_active;
247 extern bool opt_prof_thread_active_init;
248 extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
249 extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
250 extern bool opt_prof_gdump; /* High-water memory dumping. */
251 extern bool opt_prof_final; /* Final profile dumping. */
252 extern bool opt_prof_leak; /* Dump leak summary at exit. */
253 extern bool opt_prof_accum; /* Report cumulative bytes. */
254 extern char opt_prof_prefix[
255 /* Minimize memory bloat for non-prof builds. */
256 #ifdef JEMALLOC_PROF
257 PATH_MAX +
258 #endif
259 1];
260
261 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
262 extern bool prof_active;
263
264 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
265 extern bool prof_gdump_val;
266
267 /*
268 * Profile dump interval, measured in bytes allocated. Each arena triggers a
269 * profile dump when it reaches this threshold. The effect is that the
270 * interval between profile dumps averages prof_interval, though the actual
271 * interval between dumps will tend to be sporadic, and the interval will be a
272 * maximum of approximately (prof_interval * narenas).
273 */
274 extern uint64_t prof_interval;
275
276 /*
277 * Initialized as opt_lg_prof_sample, and potentially modified during profiling
278 * resets.
279 */
280 extern size_t lg_prof_sample;
281
282 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
283 void prof_malloc_sample_object(const void *ptr, size_t usize,
284 prof_tctx_t *tctx);
285 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
286 void bt_init(prof_bt_t *bt, void **vec);
287 void prof_backtrace(prof_bt_t *bt);
288 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
289 #ifdef JEMALLOC_JET
290 size_t prof_tdata_count(void);
291 size_t prof_bt_count(void);
292 const prof_cnt_t *prof_cnt_all(void);
293 typedef int (prof_dump_open_t)(bool, const char *);
294 extern prof_dump_open_t *prof_dump_open;
295 typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *);
296 extern prof_dump_header_t *prof_dump_header;
297 #endif
298 void prof_idump(void);
299 bool prof_mdump(const char *filename);
300 void prof_gdump(void);
301 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
302 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
303 void prof_reset(tsd_t *tsd, size_t lg_sample);
304 void prof_tdata_cleanup(tsd_t *tsd);
305 const char *prof_thread_name_get(void);
306 bool prof_active_get(void);
307 bool prof_active_set(bool active);
308 int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
309 bool prof_thread_active_get(void);
310 bool prof_thread_active_set(bool active);
311 bool prof_thread_active_init_get(void);
312 bool prof_thread_active_init_set(bool active_init);
313 bool prof_gdump_get(void);
314 bool prof_gdump_set(bool active);
315 void prof_boot0(void);
316 void prof_boot1(void);
317 bool prof_boot2(void);
318 void prof_prefork(void);
319 void prof_postfork_parent(void);
320 void prof_postfork_child(void);
321 void prof_sample_threshold_update(prof_tdata_t *tdata);
322
323 #endif /* JEMALLOC_H_EXTERNS */
324 /******************************************************************************/
325 #ifdef JEMALLOC_H_INLINES
326
327 #ifndef JEMALLOC_ENABLE_INLINE
328 bool prof_active_get_unlocked(void);
329 bool prof_gdump_get_unlocked(void);
330 prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create);
331 bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
332 prof_tdata_t **tdata_out);
333 prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool update);
334 prof_tctx_t *prof_tctx_get(const void *ptr);
335 void prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
336 void prof_malloc_sample_object(const void *ptr, size_t usize,
337 prof_tctx_t *tctx);
338 void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
339 void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
340 prof_tctx_t *tctx, bool updated, size_t old_usize, prof_tctx_t *old_tctx);
341 void prof_free(tsd_t *tsd, const void *ptr, size_t usize);
342 #endif
343
344 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
345 JEMALLOC_ALWAYS_INLINE bool
prof_active_get_unlocked(void)346 prof_active_get_unlocked(void)
347 {
348
349 /*
350 * Even if opt_prof is true, sampling can be temporarily disabled by
351 * setting prof_active to false. No locking is used when reading
352 * prof_active in the fast path, so there are no guarantees regarding
353 * how long it will take for all threads to notice state changes.
354 */
355 return (prof_active);
356 }
357
358 JEMALLOC_ALWAYS_INLINE bool
prof_gdump_get_unlocked(void)359 prof_gdump_get_unlocked(void)
360 {
361
362 /*
363 * No locking is used when reading prof_gdump_val in the fast path, so
364 * there are no guarantees regarding how long it will take for all
365 * threads to notice state changes.
366 */
367 return (prof_gdump_val);
368 }
369
370 JEMALLOC_ALWAYS_INLINE prof_tdata_t *
prof_tdata_get(tsd_t * tsd,bool create)371 prof_tdata_get(tsd_t *tsd, bool create)
372 {
373 prof_tdata_t *tdata;
374
375 cassert(config_prof);
376
377 tdata = tsd_prof_tdata_get(tsd);
378 if (create) {
379 if (unlikely(tdata == NULL)) {
380 if (tsd_nominal(tsd)) {
381 tdata = prof_tdata_init(tsd);
382 tsd_prof_tdata_set(tsd, tdata);
383 }
384 } else if (unlikely(tdata->expired)) {
385 tdata = prof_tdata_reinit(tsd, tdata);
386 tsd_prof_tdata_set(tsd, tdata);
387 }
388 assert(tdata == NULL || tdata->attached);
389 }
390
391 return (tdata);
392 }
393
394 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
prof_tctx_get(const void * ptr)395 prof_tctx_get(const void *ptr)
396 {
397
398 cassert(config_prof);
399 assert(ptr != NULL);
400
401 return (arena_prof_tctx_get(ptr));
402 }
403
404 JEMALLOC_ALWAYS_INLINE void
prof_tctx_set(const void * ptr,prof_tctx_t * tctx)405 prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
406 {
407
408 cassert(config_prof);
409 assert(ptr != NULL);
410
411 arena_prof_tctx_set(ptr, tctx);
412 }
413
414 JEMALLOC_ALWAYS_INLINE bool
prof_sample_accum_update(tsd_t * tsd,size_t usize,bool update,prof_tdata_t ** tdata_out)415 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
416 prof_tdata_t **tdata_out)
417 {
418 prof_tdata_t *tdata;
419
420 cassert(config_prof);
421
422 tdata = prof_tdata_get(tsd, true);
423 if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
424 tdata = NULL;
425
426 if (tdata_out != NULL)
427 *tdata_out = tdata;
428
429 if (tdata == NULL)
430 return (true);
431
432 if (tdata->bytes_until_sample >= usize) {
433 if (update)
434 tdata->bytes_until_sample -= usize;
435 return (true);
436 } else {
437 /* Compute new sample threshold. */
438 if (update)
439 prof_sample_threshold_update(tdata);
440 return (!tdata->active);
441 }
442 }
443
444 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
prof_alloc_prep(tsd_t * tsd,size_t usize,bool update)445 prof_alloc_prep(tsd_t *tsd, size_t usize, bool update)
446 {
447 prof_tctx_t *ret;
448 prof_tdata_t *tdata;
449 prof_bt_t bt;
450
451 assert(usize == s2u(usize));
452
453 if (!prof_active_get_unlocked() || likely(prof_sample_accum_update(tsd,
454 usize, update, &tdata)))
455 ret = (prof_tctx_t *)(uintptr_t)1U;
456 else {
457 bt_init(&bt, tdata->vec);
458 prof_backtrace(&bt);
459 ret = prof_lookup(tsd, &bt);
460 }
461
462 return (ret);
463 }
464
465 JEMALLOC_ALWAYS_INLINE void
prof_malloc(const void * ptr,size_t usize,prof_tctx_t * tctx)466 prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
467 {
468
469 cassert(config_prof);
470 assert(ptr != NULL);
471 assert(usize == isalloc(ptr, true));
472
473 if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
474 prof_malloc_sample_object(ptr, usize, tctx);
475 else
476 prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
477 }
478
479 JEMALLOC_ALWAYS_INLINE void
prof_realloc(tsd_t * tsd,const void * ptr,size_t usize,prof_tctx_t * tctx,bool updated,size_t old_usize,prof_tctx_t * old_tctx)480 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
481 bool updated, size_t old_usize, prof_tctx_t *old_tctx)
482 {
483
484 cassert(config_prof);
485 assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
486
487 if (!updated && ptr != NULL) {
488 assert(usize == isalloc(ptr, true));
489 if (prof_sample_accum_update(tsd, usize, true, NULL)) {
490 /*
491 * Don't sample. The usize passed to PROF_ALLOC_PREP()
492 * was larger than what actually got allocated, so a
493 * backtrace was captured for this allocation, even
494 * though its actual usize was insufficient to cross the
495 * sample threshold.
496 */
497 tctx = (prof_tctx_t *)(uintptr_t)1U;
498 }
499 }
500
501 if (unlikely((uintptr_t)old_tctx > (uintptr_t)1U))
502 prof_free_sampled_object(tsd, old_usize, old_tctx);
503 if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
504 prof_malloc_sample_object(ptr, usize, tctx);
505 else
506 prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
507 }
508
509 JEMALLOC_ALWAYS_INLINE void
prof_free(tsd_t * tsd,const void * ptr,size_t usize)510 prof_free(tsd_t *tsd, const void *ptr, size_t usize)
511 {
512 prof_tctx_t *tctx = prof_tctx_get(ptr);
513
514 cassert(config_prof);
515 assert(usize == isalloc(ptr, true));
516
517 if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
518 prof_free_sampled_object(tsd, usize, tctx);
519 }
520 #endif
521
522 #endif /* JEMALLOC_H_INLINES */
523 /******************************************************************************/
524