1 #include "Python.h"
2
3 #include <stdbool.h>
4
5
6 /* Defined in tracemalloc.c */
7 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
8
9
10 /* Python's malloc wrappers (see pymem.h) */
11
12 #undef uint
13 #define uint unsigned int /* assuming >= 16 bits */
14
15 /* Forward declaration */
16 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
17 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
18 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
19 static void _PyMem_DebugRawFree(void *ctx, void *p);
20
21 static void* _PyMem_DebugMalloc(void *ctx, size_t size);
22 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
23 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
24 static void _PyMem_DebugFree(void *ctx, void *p);
25
26 static void _PyObject_DebugDumpAddress(const void *p);
27 static void _PyMem_DebugCheckAddress(char api_id, const void *p);
28
29 #if defined(__has_feature) /* Clang */
30 #if __has_feature(address_sanitizer) /* is ASAN enabled? */
31 #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
32 __attribute__((no_address_safety_analysis))
33 #else
34 #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
35 #endif
36 #else
37 #if defined(__SANITIZE_ADDRESS__) /* GCC 4.8.x, is ASAN enabled? */
38 #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
39 __attribute__((no_address_safety_analysis))
40 #else
41 #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
42 #endif
43 #endif
44
45 #ifdef WITH_PYMALLOC
46
47 #ifdef MS_WINDOWS
48 # include <windows.h>
49 #elif defined(HAVE_MMAP)
50 # include <sys/mman.h>
51 # ifdef MAP_ANONYMOUS
52 # define ARENAS_USE_MMAP
53 # endif
54 #endif
55
56 /* Forward declaration */
57 static void* _PyObject_Malloc(void *ctx, size_t size);
58 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
59 static void _PyObject_Free(void *ctx, void *p);
60 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
61 #endif
62
63
64 static void *
_PyMem_RawMalloc(void * ctx,size_t size)65 _PyMem_RawMalloc(void *ctx, size_t size)
66 {
67 /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
68 for malloc(0), which would be treated as an error. Some platforms would
69 return a pointer with no memory behind it, which would break pymalloc.
70 To solve these problems, allocate an extra byte. */
71 if (size == 0)
72 size = 1;
73 return malloc(size);
74 }
75
76 static void *
_PyMem_RawCalloc(void * ctx,size_t nelem,size_t elsize)77 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
78 {
79 /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
80 for calloc(0, 0), which would be treated as an error. Some platforms
81 would return a pointer with no memory behind it, which would break
82 pymalloc. To solve these problems, allocate an extra byte. */
83 if (nelem == 0 || elsize == 0) {
84 nelem = 1;
85 elsize = 1;
86 }
87 return calloc(nelem, elsize);
88 }
89
90 static void *
_PyMem_RawRealloc(void * ctx,void * ptr,size_t size)91 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
92 {
93 if (size == 0)
94 size = 1;
95 return realloc(ptr, size);
96 }
97
98 static void
_PyMem_RawFree(void * ctx,void * ptr)99 _PyMem_RawFree(void *ctx, void *ptr)
100 {
101 free(ptr);
102 }
103
104
105 #ifdef MS_WINDOWS
106 static void *
_PyObject_ArenaVirtualAlloc(void * ctx,size_t size)107 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
108 {
109 return VirtualAlloc(NULL, size,
110 MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
111 }
112
113 static void
_PyObject_ArenaVirtualFree(void * ctx,void * ptr,size_t size)114 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
115 {
116 VirtualFree(ptr, 0, MEM_RELEASE);
117 }
118
119 #elif defined(ARENAS_USE_MMAP)
120 static void *
_PyObject_ArenaMmap(void * ctx,size_t size)121 _PyObject_ArenaMmap(void *ctx, size_t size)
122 {
123 void *ptr;
124 ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
125 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
126 if (ptr == MAP_FAILED)
127 return NULL;
128 assert(ptr != NULL);
129 return ptr;
130 }
131
132 static void
_PyObject_ArenaMunmap(void * ctx,void * ptr,size_t size)133 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
134 {
135 munmap(ptr, size);
136 }
137
138 #else
139 static void *
_PyObject_ArenaMalloc(void * ctx,size_t size)140 _PyObject_ArenaMalloc(void *ctx, size_t size)
141 {
142 return malloc(size);
143 }
144
145 static void
_PyObject_ArenaFree(void * ctx,void * ptr,size_t size)146 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
147 {
148 free(ptr);
149 }
150 #endif
151
152
153 #define PYRAW_FUNCS _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree
154 #ifdef WITH_PYMALLOC
155 # define PYOBJ_FUNCS _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free
156 #else
157 # define PYOBJ_FUNCS PYRAW_FUNCS
158 #endif
159 #define PYMEM_FUNCS PYOBJ_FUNCS
160
161 typedef struct {
162 /* We tag each block with an API ID in order to tag API violations */
163 char api_id;
164 PyMemAllocatorEx alloc;
165 } debug_alloc_api_t;
166 static struct {
167 debug_alloc_api_t raw;
168 debug_alloc_api_t mem;
169 debug_alloc_api_t obj;
170 } _PyMem_Debug = {
171 {'r', {NULL, PYRAW_FUNCS}},
172 {'m', {NULL, PYMEM_FUNCS}},
173 {'o', {NULL, PYOBJ_FUNCS}}
174 };
175
176 #define PYRAWDBG_FUNCS \
177 _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree
178 #define PYDBG_FUNCS \
179 _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree
180
181 static PyMemAllocatorEx _PyMem_Raw = {
182 #ifdef Py_DEBUG
183 &_PyMem_Debug.raw, PYRAWDBG_FUNCS
184 #else
185 NULL, PYRAW_FUNCS
186 #endif
187 };
188
189 static PyMemAllocatorEx _PyMem = {
190 #ifdef Py_DEBUG
191 &_PyMem_Debug.mem, PYDBG_FUNCS
192 #else
193 NULL, PYMEM_FUNCS
194 #endif
195 };
196
197 static PyMemAllocatorEx _PyObject = {
198 #ifdef Py_DEBUG
199 &_PyMem_Debug.obj, PYDBG_FUNCS
200 #else
201 NULL, PYOBJ_FUNCS
202 #endif
203 };
204
205 int
_PyMem_SetupAllocators(const char * opt)206 _PyMem_SetupAllocators(const char *opt)
207 {
208 if (opt == NULL || *opt == '\0') {
209 /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
210 options): use default allocators */
211 #ifdef Py_DEBUG
212 # ifdef WITH_PYMALLOC
213 opt = "pymalloc_debug";
214 # else
215 opt = "malloc_debug";
216 # endif
217 #else
218 /* !Py_DEBUG */
219 # ifdef WITH_PYMALLOC
220 opt = "pymalloc";
221 # else
222 opt = "malloc";
223 # endif
224 #endif
225 }
226
227 if (strcmp(opt, "debug") == 0) {
228 PyMem_SetupDebugHooks();
229 }
230 else if (strcmp(opt, "malloc") == 0 || strcmp(opt, "malloc_debug") == 0)
231 {
232 PyMemAllocatorEx alloc = {NULL, PYRAW_FUNCS};
233
234 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
235 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
236 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
237
238 if (strcmp(opt, "malloc_debug") == 0)
239 PyMem_SetupDebugHooks();
240 }
241 #ifdef WITH_PYMALLOC
242 else if (strcmp(opt, "pymalloc") == 0
243 || strcmp(opt, "pymalloc_debug") == 0)
244 {
245 PyMemAllocatorEx raw_alloc = {NULL, PYRAW_FUNCS};
246 PyMemAllocatorEx mem_alloc = {NULL, PYMEM_FUNCS};
247 PyMemAllocatorEx obj_alloc = {NULL, PYOBJ_FUNCS};
248
249 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &raw_alloc);
250 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &mem_alloc);
251 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &obj_alloc);
252
253 if (strcmp(opt, "pymalloc_debug") == 0)
254 PyMem_SetupDebugHooks();
255 }
256 #endif
257 else {
258 /* unknown allocator */
259 return -1;
260 }
261 return 0;
262 }
263
264 #undef PYRAW_FUNCS
265 #undef PYMEM_FUNCS
266 #undef PYOBJ_FUNCS
267 #undef PYRAWDBG_FUNCS
268 #undef PYDBG_FUNCS
269
270 static PyObjectArenaAllocator _PyObject_Arena = {NULL,
271 #ifdef MS_WINDOWS
272 _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
273 #elif defined(ARENAS_USE_MMAP)
274 _PyObject_ArenaMmap, _PyObject_ArenaMunmap
275 #else
276 _PyObject_ArenaMalloc, _PyObject_ArenaFree
277 #endif
278 };
279
280 #ifdef WITH_PYMALLOC
281 static int
_PyMem_DebugEnabled(void)282 _PyMem_DebugEnabled(void)
283 {
284 return (_PyObject.malloc == _PyMem_DebugMalloc);
285 }
286
287 int
_PyMem_PymallocEnabled(void)288 _PyMem_PymallocEnabled(void)
289 {
290 if (_PyMem_DebugEnabled()) {
291 return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
292 }
293 else {
294 return (_PyObject.malloc == _PyObject_Malloc);
295 }
296 }
297 #endif
298
299 void
PyMem_SetupDebugHooks(void)300 PyMem_SetupDebugHooks(void)
301 {
302 PyMemAllocatorEx alloc;
303
304 alloc.malloc = _PyMem_DebugRawMalloc;
305 alloc.calloc = _PyMem_DebugRawCalloc;
306 alloc.realloc = _PyMem_DebugRawRealloc;
307 alloc.free = _PyMem_DebugRawFree;
308
309 if (_PyMem_Raw.malloc != _PyMem_DebugRawMalloc) {
310 alloc.ctx = &_PyMem_Debug.raw;
311 PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
312 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
313 }
314
315 alloc.malloc = _PyMem_DebugMalloc;
316 alloc.calloc = _PyMem_DebugCalloc;
317 alloc.realloc = _PyMem_DebugRealloc;
318 alloc.free = _PyMem_DebugFree;
319
320 if (_PyMem.malloc != _PyMem_DebugMalloc) {
321 alloc.ctx = &_PyMem_Debug.mem;
322 PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
323 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
324 }
325
326 if (_PyObject.malloc != _PyMem_DebugMalloc) {
327 alloc.ctx = &_PyMem_Debug.obj;
328 PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
329 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
330 }
331 }
332
333 void
PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)334 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
335 {
336 switch(domain)
337 {
338 case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
339 case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
340 case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
341 default:
342 /* unknown domain: set all attributes to NULL */
343 allocator->ctx = NULL;
344 allocator->malloc = NULL;
345 allocator->calloc = NULL;
346 allocator->realloc = NULL;
347 allocator->free = NULL;
348 }
349 }
350
351 void
PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)352 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
353 {
354 switch(domain)
355 {
356 case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
357 case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
358 case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
359 /* ignore unknown domain */
360 }
361 }
362
363 void
PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)364 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
365 {
366 *allocator = _PyObject_Arena;
367 }
368
369 void
PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)370 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
371 {
372 _PyObject_Arena = *allocator;
373 }
374
375 void *
PyMem_RawMalloc(size_t size)376 PyMem_RawMalloc(size_t size)
377 {
378 /*
379 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
380 * Most python internals blindly use a signed Py_ssize_t to track
381 * things without checking for overflows or negatives.
382 * As size_t is unsigned, checking for size < 0 is not required.
383 */
384 if (size > (size_t)PY_SSIZE_T_MAX)
385 return NULL;
386 return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
387 }
388
389 void *
PyMem_RawCalloc(size_t nelem,size_t elsize)390 PyMem_RawCalloc(size_t nelem, size_t elsize)
391 {
392 /* see PyMem_RawMalloc() */
393 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
394 return NULL;
395 return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
396 }
397
398 void*
PyMem_RawRealloc(void * ptr,size_t new_size)399 PyMem_RawRealloc(void *ptr, size_t new_size)
400 {
401 /* see PyMem_RawMalloc() */
402 if (new_size > (size_t)PY_SSIZE_T_MAX)
403 return NULL;
404 return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
405 }
406
PyMem_RawFree(void * ptr)407 void PyMem_RawFree(void *ptr)
408 {
409 _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
410 }
411
412 void *
PyMem_Malloc(size_t size)413 PyMem_Malloc(size_t size)
414 {
415 /* see PyMem_RawMalloc() */
416 if (size > (size_t)PY_SSIZE_T_MAX)
417 return NULL;
418 return _PyMem.malloc(_PyMem.ctx, size);
419 }
420
421 void *
PyMem_Calloc(size_t nelem,size_t elsize)422 PyMem_Calloc(size_t nelem, size_t elsize)
423 {
424 /* see PyMem_RawMalloc() */
425 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
426 return NULL;
427 return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
428 }
429
430 void *
PyMem_Realloc(void * ptr,size_t new_size)431 PyMem_Realloc(void *ptr, size_t new_size)
432 {
433 /* see PyMem_RawMalloc() */
434 if (new_size > (size_t)PY_SSIZE_T_MAX)
435 return NULL;
436 return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
437 }
438
439 void
PyMem_Free(void * ptr)440 PyMem_Free(void *ptr)
441 {
442 _PyMem.free(_PyMem.ctx, ptr);
443 }
444
445 char *
_PyMem_RawStrdup(const char * str)446 _PyMem_RawStrdup(const char *str)
447 {
448 size_t size;
449 char *copy;
450
451 size = strlen(str) + 1;
452 copy = PyMem_RawMalloc(size);
453 if (copy == NULL)
454 return NULL;
455 memcpy(copy, str, size);
456 return copy;
457 }
458
459 char *
_PyMem_Strdup(const char * str)460 _PyMem_Strdup(const char *str)
461 {
462 size_t size;
463 char *copy;
464
465 size = strlen(str) + 1;
466 copy = PyMem_Malloc(size);
467 if (copy == NULL)
468 return NULL;
469 memcpy(copy, str, size);
470 return copy;
471 }
472
473 void *
PyObject_Malloc(size_t size)474 PyObject_Malloc(size_t size)
475 {
476 /* see PyMem_RawMalloc() */
477 if (size > (size_t)PY_SSIZE_T_MAX)
478 return NULL;
479 return _PyObject.malloc(_PyObject.ctx, size);
480 }
481
482 void *
PyObject_Calloc(size_t nelem,size_t elsize)483 PyObject_Calloc(size_t nelem, size_t elsize)
484 {
485 /* see PyMem_RawMalloc() */
486 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
487 return NULL;
488 return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
489 }
490
491 void *
PyObject_Realloc(void * ptr,size_t new_size)492 PyObject_Realloc(void *ptr, size_t new_size)
493 {
494 /* see PyMem_RawMalloc() */
495 if (new_size > (size_t)PY_SSIZE_T_MAX)
496 return NULL;
497 return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
498 }
499
500 void
PyObject_Free(void * ptr)501 PyObject_Free(void *ptr)
502 {
503 _PyObject.free(_PyObject.ctx, ptr);
504 }
505
506
507 #ifdef WITH_PYMALLOC
508
509 #ifdef WITH_VALGRIND
510 #include <valgrind/valgrind.h>
511
512 /* If we're using GCC, use __builtin_expect() to reduce overhead of
513 the valgrind checks */
514 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
515 # define UNLIKELY(value) __builtin_expect((value), 0)
516 #else
517 # define UNLIKELY(value) (value)
518 #endif
519
520 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
521 static int running_on_valgrind = -1;
522 #endif
523
524 /* An object allocator for Python.
525
526 Here is an introduction to the layers of the Python memory architecture,
527 showing where the object allocator is actually used (layer +2), It is
528 called for every object allocation and deallocation (PyObject_New/Del),
529 unless the object-specific allocators implement a proprietary allocation
530 scheme (ex.: ints use a simple free list). This is also the place where
531 the cyclic garbage collector operates selectively on container objects.
532
533
534 Object-specific allocators
535 _____ ______ ______ ________
536 [ int ] [ dict ] [ list ] ... [ string ] Python core |
537 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
538 _______________________________ | |
539 [ Python's object allocator ] | |
540 +2 | ####### Object memory ####### | <------ Internal buffers ------> |
541 ______________________________________________________________ |
542 [ Python's raw memory allocator (PyMem_ API) ] |
543 +1 | <----- Python memory (under PyMem manager's control) ------> | |
544 __________________________________________________________________
545 [ Underlying general-purpose allocator (ex: C library malloc) ]
546 0 | <------ Virtual memory allocated for the python process -------> |
547
548 =========================================================================
549 _______________________________________________________________________
550 [ OS-specific Virtual Memory Manager (VMM) ]
551 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
552 __________________________________ __________________________________
553 [ ] [ ]
554 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
555
556 */
557 /*==========================================================================*/
558
559 /* A fast, special-purpose memory allocator for small blocks, to be used
560 on top of a general-purpose malloc -- heavily based on previous art. */
561
562 /* Vladimir Marangozov -- August 2000 */
563
564 /*
565 * "Memory management is where the rubber meets the road -- if we do the wrong
566 * thing at any level, the results will not be good. And if we don't make the
567 * levels work well together, we are in serious trouble." (1)
568 *
569 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
570 * "Dynamic Storage Allocation: A Survey and Critical Review",
571 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
572 */
573
574 /* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */
575
576 /*==========================================================================*/
577
578 /*
579 * Allocation strategy abstract:
580 *
581 * For small requests, the allocator sub-allocates <Big> blocks of memory.
582 * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
583 * system's allocator.
584 *
585 * Small requests are grouped in size classes spaced 8 bytes apart, due
586 * to the required valid alignment of the returned address. Requests of
587 * a particular size are serviced from memory pools of 4K (one VMM page).
588 * Pools are fragmented on demand and contain free lists of blocks of one
589 * particular size class. In other words, there is a fixed-size allocator
590 * for each size class. Free pools are shared by the different allocators
591 * thus minimizing the space reserved for a particular size class.
592 *
593 * This allocation strategy is a variant of what is known as "simple
594 * segregated storage based on array of free lists". The main drawback of
595 * simple segregated storage is that we might end up with lot of reserved
596 * memory for the different free lists, which degenerate in time. To avoid
597 * this, we partition each free list in pools and we share dynamically the
598 * reserved space between all free lists. This technique is quite efficient
599 * for memory intensive programs which allocate mainly small-sized blocks.
600 *
601 * For small requests we have the following table:
602 *
603 * Request in bytes Size of allocated block Size class idx
604 * ----------------------------------------------------------------
605 * 1-8 8 0
606 * 9-16 16 1
607 * 17-24 24 2
608 * 25-32 32 3
609 * 33-40 40 4
610 * 41-48 48 5
611 * 49-56 56 6
612 * 57-64 64 7
613 * 65-72 72 8
614 * ... ... ...
615 * 497-504 504 62
616 * 505-512 512 63
617 *
618 * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
619 * allocator.
620 */
621
622 /*==========================================================================*/
623
624 /*
625 * -- Main tunable settings section --
626 */
627
628 /*
629 * Alignment of addresses returned to the user. 8-bytes alignment works
630 * on most current architectures (with 32-bit or 64-bit address busses).
631 * The alignment value is also used for grouping small requests in size
632 * classes spaced ALIGNMENT bytes apart.
633 *
634 * You shouldn't change this unless you know what you are doing.
635 */
636 #define ALIGNMENT 8 /* must be 2^N */
637 #define ALIGNMENT_SHIFT 3
638
639 /* Return the number of bytes in size class I, as a uint. */
640 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
641
642 /*
643 * Max size threshold below which malloc requests are considered to be
644 * small enough in order to use preallocated memory pools. You can tune
645 * this value according to your application behaviour and memory needs.
646 *
647 * Note: a size threshold of 512 guarantees that newly created dictionaries
648 * will be allocated from preallocated memory pools on 64-bit.
649 *
650 * The following invariants must hold:
651 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
652 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
653 *
654 * Although not required, for better performance and space efficiency,
655 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
656 */
657 #define SMALL_REQUEST_THRESHOLD 512
658 #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
659
660 /*
661 * The system's VMM page size can be obtained on most unices with a
662 * getpagesize() call or deduced from various header files. To make
663 * things simpler, we assume that it is 4K, which is OK for most systems.
664 * It is probably better if this is the native page size, but it doesn't
665 * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page
666 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
667 * violation fault. 4K is apparently OK for all the platforms that python
668 * currently targets.
669 */
670 #define SYSTEM_PAGE_SIZE (4 * 1024)
671 #define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1)
672
673 /*
674 * Maximum amount of memory managed by the allocator for small requests.
675 */
676 #ifdef WITH_MEMORY_LIMITS
677 #ifndef SMALL_MEMORY_LIMIT
678 #define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */
679 #endif
680 #endif
681
682 /*
683 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
684 * on a page boundary. This is a reserved virtual address space for the
685 * current process (obtained through a malloc()/mmap() call). In no way this
686 * means that the memory arenas will be used entirely. A malloc(<Big>) is
687 * usually an address range reservation for <Big> bytes, unless all pages within
688 * this space are referenced subsequently. So malloc'ing big blocks and not
689 * using them does not mean "wasting memory". It's an addressable range
690 * wastage...
691 *
692 * Arenas are allocated with mmap() on systems supporting anonymous memory
693 * mappings to reduce heap fragmentation.
694 */
695 #define ARENA_SIZE (256 << 10) /* 256KB */
696
697 #ifdef WITH_MEMORY_LIMITS
698 #define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
699 #endif
700
701 /*
702 * Size of the pools used for small blocks. Should be a power of 2,
703 * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
704 */
705 #define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
706 #define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
707
708 /*
709 * -- End of tunable settings section --
710 */
711
712 /*==========================================================================*/
713
714 /*
715 * Locking
716 *
717 * To reduce lock contention, it would probably be better to refine the
718 * crude function locking with per size class locking. I'm not positive
719 * however, whether it's worth switching to such locking policy because
720 * of the performance penalty it might introduce.
721 *
722 * The following macros describe the simplest (should also be the fastest)
723 * lock object on a particular platform and the init/fini/lock/unlock
724 * operations on it. The locks defined here are not expected to be recursive
725 * because it is assumed that they will always be called in the order:
726 * INIT, [LOCK, UNLOCK]*, FINI.
727 */
728
729 /*
730 * Python's threads are serialized, so object malloc locking is disabled.
731 */
732 #define SIMPLELOCK_DECL(lock) /* simple lock declaration */
733 #define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */
734 #define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */
735 #define SIMPLELOCK_LOCK(lock) /* acquire released lock */
736 #define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
737
738 /* When you say memory, my mind reasons in terms of (pointers to) blocks */
739 typedef uint8_t block;
740
741 /* Pool for small blocks. */
742 struct pool_header {
743 union { block *_padding;
744 uint count; } ref; /* number of allocated blocks */
745 block *freeblock; /* pool's free list head */
746 struct pool_header *nextpool; /* next pool of this size class */
747 struct pool_header *prevpool; /* previous pool "" */
748 uint arenaindex; /* index into arenas of base adr */
749 uint szidx; /* block size class index */
750 uint nextoffset; /* bytes to virgin block */
751 uint maxnextoffset; /* largest valid nextoffset */
752 };
753
754 typedef struct pool_header *poolp;
755
756 /* Record keeping for arenas. */
757 struct arena_object {
758 /* The address of the arena, as returned by malloc. Note that 0
759 * will never be returned by a successful malloc, and is used
760 * here to mark an arena_object that doesn't correspond to an
761 * allocated arena.
762 */
763 uintptr_t address;
764
765 /* Pool-aligned pointer to the next pool to be carved off. */
766 block* pool_address;
767
768 /* The number of available pools in the arena: free pools + never-
769 * allocated pools.
770 */
771 uint nfreepools;
772
773 /* The total number of pools in the arena, whether or not available. */
774 uint ntotalpools;
775
776 /* Singly-linked list of available pools. */
777 struct pool_header* freepools;
778
779 /* Whenever this arena_object is not associated with an allocated
780 * arena, the nextarena member is used to link all unassociated
781 * arena_objects in the singly-linked `unused_arena_objects` list.
782 * The prevarena member is unused in this case.
783 *
784 * When this arena_object is associated with an allocated arena
785 * with at least one available pool, both members are used in the
786 * doubly-linked `usable_arenas` list, which is maintained in
787 * increasing order of `nfreepools` values.
788 *
789 * Else this arena_object is associated with an allocated arena
790 * all of whose pools are in use. `nextarena` and `prevarena`
791 * are both meaningless in this case.
792 */
793 struct arena_object* nextarena;
794 struct arena_object* prevarena;
795 };
796
797 #define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
798
799 #define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */
800
801 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
802 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
803
804 /* Return total number of blocks in pool of size index I, as a uint. */
805 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
806
807 /*==========================================================================*/
808
809 /*
810 * This malloc lock
811 */
812 SIMPLELOCK_DECL(_malloc_lock)
813 #define LOCK() SIMPLELOCK_LOCK(_malloc_lock)
814 #define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock)
815 #define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock)
816 #define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock)
817
818 /*
819 * Pool table -- headed, circular, doubly-linked lists of partially used pools.
820
821 This is involved. For an index i, usedpools[i+i] is the header for a list of
822 all partially used pools holding small blocks with "size class idx" i. So
823 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
824 16, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
825
826 Pools are carved off an arena's highwater mark (an arena_object's pool_address
827 member) as needed. Once carved off, a pool is in one of three states forever
828 after:
829
830 used == partially used, neither empty nor full
831 At least one block in the pool is currently allocated, and at least one
832 block in the pool is not currently allocated (note this implies a pool
833 has room for at least two blocks).
834 This is a pool's initial state, as a pool is created only when malloc
835 needs space.
836 The pool holds blocks of a fixed size, and is in the circular list headed
837 at usedpools[i] (see above). It's linked to the other used pools of the
838 same size class via the pool_header's nextpool and prevpool members.
839 If all but one block is currently allocated, a malloc can cause a
840 transition to the full state. If all but one block is not currently
841 allocated, a free can cause a transition to the empty state.
842
843 full == all the pool's blocks are currently allocated
844 On transition to full, a pool is unlinked from its usedpools[] list.
845 It's not linked to from anything then anymore, and its nextpool and
846 prevpool members are meaningless until it transitions back to used.
847 A free of a block in a full pool puts the pool back in the used state.
848 Then it's linked in at the front of the appropriate usedpools[] list, so
849 that the next allocation for its size class will reuse the freed block.
850
851 empty == all the pool's blocks are currently available for allocation
852 On transition to empty, a pool is unlinked from its usedpools[] list,
853 and linked to the front of its arena_object's singly-linked freepools list,
854 via its nextpool member. The prevpool member has no meaning in this case.
855 Empty pools have no inherent size class: the next time a malloc finds
856 an empty list in usedpools[], it takes the first pool off of freepools.
857 If the size class needed happens to be the same as the size class the pool
858 last had, some pool initialization can be skipped.
859
860
861 Block Management
862
863 Blocks within pools are again carved out as needed. pool->freeblock points to
864 the start of a singly-linked list of free blocks within the pool. When a
865 block is freed, it's inserted at the front of its pool's freeblock list. Note
866 that the available blocks in a pool are *not* linked all together when a pool
867 is initialized. Instead only "the first two" (lowest addresses) blocks are
868 set up, returning the first such block, and setting pool->freeblock to a
869 one-block list holding the second such block. This is consistent with that
870 pymalloc strives at all levels (arena, pool, and block) never to touch a piece
871 of memory until it's actually needed.
872
873 So long as a pool is in the used state, we're certain there *is* a block
874 available for allocating, and pool->freeblock is not NULL. If pool->freeblock
875 points to the end of the free list before we've carved the entire pool into
876 blocks, that means we simply haven't yet gotten to one of the higher-address
877 blocks. The offset from the pool_header to the start of "the next" virgin
878 block is stored in the pool_header nextoffset member, and the largest value
879 of nextoffset that makes sense is stored in the maxnextoffset member when a
880 pool is initialized. All the blocks in a pool have been passed out at least
881 once when and only when nextoffset > maxnextoffset.
882
883
884 Major obscurity: While the usedpools vector is declared to have poolp
885 entries, it doesn't really. It really contains two pointers per (conceptual)
886 poolp entry, the nextpool and prevpool members of a pool_header. The
887 excruciating initialization code below fools C so that
888
889 usedpool[i+i]
890
891 "acts like" a genuine poolp, but only so long as you only reference its
892 nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is
893 compensating for that a pool_header's nextpool and prevpool members
894 immediately follow a pool_header's first two members:
895
896 union { block *_padding;
897 uint count; } ref;
898 block *freeblock;
899
900 each of which consume sizeof(block *) bytes. So what usedpools[i+i] really
901 contains is a fudged-up pointer p such that *if* C believes it's a poolp
902 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
903 circular list is empty).
904
905 It's unclear why the usedpools setup is so convoluted. It could be to
906 minimize the amount of cache required to hold this heavily-referenced table
907 (which only *needs* the two interpool pointer members of a pool_header). OTOH,
908 referencing code has to remember to "double the index" and doing so isn't
909 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
910 on that C doesn't insert any padding anywhere in a pool_header at or before
911 the prevpool member.
912 **************************************************************************** */
913
914 #define PTA(x) ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
915 #define PT(x) PTA(x), PTA(x)
916
917 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
918 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
919 #if NB_SMALL_SIZE_CLASSES > 8
920 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
921 #if NB_SMALL_SIZE_CLASSES > 16
922 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
923 #if NB_SMALL_SIZE_CLASSES > 24
924 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
925 #if NB_SMALL_SIZE_CLASSES > 32
926 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
927 #if NB_SMALL_SIZE_CLASSES > 40
928 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
929 #if NB_SMALL_SIZE_CLASSES > 48
930 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
931 #if NB_SMALL_SIZE_CLASSES > 56
932 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
933 #if NB_SMALL_SIZE_CLASSES > 64
934 #error "NB_SMALL_SIZE_CLASSES should be less than 64"
935 #endif /* NB_SMALL_SIZE_CLASSES > 64 */
936 #endif /* NB_SMALL_SIZE_CLASSES > 56 */
937 #endif /* NB_SMALL_SIZE_CLASSES > 48 */
938 #endif /* NB_SMALL_SIZE_CLASSES > 40 */
939 #endif /* NB_SMALL_SIZE_CLASSES > 32 */
940 #endif /* NB_SMALL_SIZE_CLASSES > 24 */
941 #endif /* NB_SMALL_SIZE_CLASSES > 16 */
942 #endif /* NB_SMALL_SIZE_CLASSES > 8 */
943 };
944
945 /*==========================================================================
946 Arena management.
947
948 `arenas` is a vector of arena_objects. It contains maxarenas entries, some of
949 which may not be currently used (== they're arena_objects that aren't
950 currently associated with an allocated arena). Note that arenas proper are
951 separately malloc'ed.
952
953 Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5,
954 we do try to free() arenas, and use some mild heuristic strategies to increase
955 the likelihood that arenas eventually can be freed.
956
957 unused_arena_objects
958
959 This is a singly-linked list of the arena_objects that are currently not
960 being used (no arena is associated with them). Objects are taken off the
961 head of the list in new_arena(), and are pushed on the head of the list in
962 PyObject_Free() when the arena is empty. Key invariant: an arena_object
963 is on this list if and only if its .address member is 0.
964
965 usable_arenas
966
967 This is a doubly-linked list of the arena_objects associated with arenas
968 that have pools available. These pools are either waiting to be reused,
969 or have not been used before. The list is sorted to have the most-
970 allocated arenas first (ascending order based on the nfreepools member).
971 This means that the next allocation will come from a heavily used arena,
972 which gives the nearly empty arenas a chance to be returned to the system.
973 In my unscientific tests this dramatically improved the number of arenas
974 that could be freed.
975
976 Note that an arena_object associated with an arena all of whose pools are
977 currently in use isn't on either list.
978 */
979
980 /* Array of objects used to track chunks of memory (arenas). */
981 static struct arena_object* arenas = NULL;
982 /* Number of slots currently allocated in the `arenas` vector. */
983 static uint maxarenas = 0;
984
985 /* The head of the singly-linked, NULL-terminated list of available
986 * arena_objects.
987 */
988 static struct arena_object* unused_arena_objects = NULL;
989
990 /* The head of the doubly-linked, NULL-terminated at each end, list of
991 * arena_objects associated with arenas that have pools available.
992 */
993 static struct arena_object* usable_arenas = NULL;
994
995 /* How many arena_objects do we initially allocate?
996 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
997 * `arenas` vector.
998 */
999 #define INITIAL_ARENA_OBJECTS 16
1000
1001 /* Number of arenas allocated that haven't been free()'d. */
1002 static size_t narenas_currently_allocated = 0;
1003
1004 /* Total number of times malloc() called to allocate an arena. */
1005 static size_t ntimes_arena_allocated = 0;
1006 /* High water mark (max value ever seen) for narenas_currently_allocated. */
1007 static size_t narenas_highwater = 0;
1008
1009 static Py_ssize_t _Py_AllocatedBlocks = 0;
1010
1011 Py_ssize_t
_Py_GetAllocatedBlocks(void)1012 _Py_GetAllocatedBlocks(void)
1013 {
1014 return _Py_AllocatedBlocks;
1015 }
1016
1017
1018 /* Allocate a new arena. If we run out of memory, return NULL. Else
1019 * allocate a new arena, and return the address of an arena_object
1020 * describing the new arena. It's expected that the caller will set
1021 * `usable_arenas` to the return value.
1022 */
1023 static struct arena_object*
new_arena(void)1024 new_arena(void)
1025 {
1026 struct arena_object* arenaobj;
1027 uint excess; /* number of bytes above pool alignment */
1028 void *address;
1029 static int debug_stats = -1;
1030
1031 if (debug_stats == -1) {
1032 char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1033 debug_stats = (opt != NULL && *opt != '\0');
1034 }
1035 if (debug_stats)
1036 _PyObject_DebugMallocStats(stderr);
1037
1038 if (unused_arena_objects == NULL) {
1039 uint i;
1040 uint numarenas;
1041 size_t nbytes;
1042
1043 /* Double the number of arena objects on each allocation.
1044 * Note that it's possible for `numarenas` to overflow.
1045 */
1046 numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1047 if (numarenas <= maxarenas)
1048 return NULL; /* overflow */
1049 #if SIZEOF_SIZE_T <= SIZEOF_INT
1050 if (numarenas > SIZE_MAX / sizeof(*arenas))
1051 return NULL; /* overflow */
1052 #endif
1053 nbytes = numarenas * sizeof(*arenas);
1054 arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1055 if (arenaobj == NULL)
1056 return NULL;
1057 arenas = arenaobj;
1058
1059 /* We might need to fix pointers that were copied. However,
1060 * new_arena only gets called when all the pages in the
1061 * previous arenas are full. Thus, there are *no* pointers
1062 * into the old array. Thus, we don't have to worry about
1063 * invalid pointers. Just to be sure, some asserts:
1064 */
1065 assert(usable_arenas == NULL);
1066 assert(unused_arena_objects == NULL);
1067
1068 /* Put the new arenas on the unused_arena_objects list. */
1069 for (i = maxarenas; i < numarenas; ++i) {
1070 arenas[i].address = 0; /* mark as unassociated */
1071 arenas[i].nextarena = i < numarenas - 1 ?
1072 &arenas[i+1] : NULL;
1073 }
1074
1075 /* Update globals. */
1076 unused_arena_objects = &arenas[maxarenas];
1077 maxarenas = numarenas;
1078 }
1079
1080 /* Take the next available arena object off the head of the list. */
1081 assert(unused_arena_objects != NULL);
1082 arenaobj = unused_arena_objects;
1083 unused_arena_objects = arenaobj->nextarena;
1084 assert(arenaobj->address == 0);
1085 address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1086 if (address == NULL) {
1087 /* The allocation failed: return NULL after putting the
1088 * arenaobj back.
1089 */
1090 arenaobj->nextarena = unused_arena_objects;
1091 unused_arena_objects = arenaobj;
1092 return NULL;
1093 }
1094 arenaobj->address = (uintptr_t)address;
1095
1096 ++narenas_currently_allocated;
1097 ++ntimes_arena_allocated;
1098 if (narenas_currently_allocated > narenas_highwater)
1099 narenas_highwater = narenas_currently_allocated;
1100 arenaobj->freepools = NULL;
1101 /* pool_address <- first pool-aligned address in the arena
1102 nfreepools <- number of whole pools that fit after alignment */
1103 arenaobj->pool_address = (block*)arenaobj->address;
1104 arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE;
1105 assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE);
1106 excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1107 if (excess != 0) {
1108 --arenaobj->nfreepools;
1109 arenaobj->pool_address += POOL_SIZE - excess;
1110 }
1111 arenaobj->ntotalpools = arenaobj->nfreepools;
1112
1113 return arenaobj;
1114 }
1115
1116 /*
1117 address_in_range(P, POOL)
1118
1119 Return true if and only if P is an address that was allocated by pymalloc.
1120 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1121 (the caller is asked to compute this because the macro expands POOL more than
1122 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1123 variable and pass the latter to the macro; because address_in_range is
1124 called on every alloc/realloc/free, micro-efficiency is important here).
1125
1126 Tricky: Let B be the arena base address associated with the pool, B =
1127 arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if
1128
1129 B <= P < B + ARENA_SIZE
1130
1131 Subtracting B throughout, this is true iff
1132
1133 0 <= P-B < ARENA_SIZE
1134
1135 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1136
1137 Obscure: A PyMem "free memory" function can call the pymalloc free or realloc
1138 before the first arena has been allocated. `arenas` is still NULL in that
1139 case. We're relying on that maxarenas is also 0 in that case, so that
1140 (POOL)->arenaindex < maxarenas must be false, saving us from trying to index
1141 into a NULL arenas.
1142
1143 Details: given P and POOL, the arena_object corresponding to P is AO =
1144 arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild
1145 stores, etc), POOL is the correct address of P's pool, AO.address is the
1146 correct base address of the pool's arena, and P must be within ARENA_SIZE of
1147 AO.address. In addition, AO.address is not 0 (no arena can start at address 0
1148 (NULL)). Therefore address_in_range correctly reports that obmalloc
1149 controls P.
1150
1151 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1152 call to the system malloc() or realloc()). (POOL)->arenaindex may be anything
1153 in this case -- it may even be uninitialized trash. If the trash arenaindex
1154 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1155 control P.
1156
1157 Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an
1158 allocated arena, obmalloc controls all the memory in slice AO.address :
1159 AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc,
1160 so P doesn't lie in that slice, so the macro correctly reports that P is not
1161 controlled by obmalloc.
1162
1163 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1164 arena_object (one not currently associated with an allocated arena),
1165 AO.address is 0, and the second test in the macro reduces to:
1166
1167 P < ARENA_SIZE
1168
1169 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1170 that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part
1171 of the test still passes, and the third clause (AO.address != 0) is necessary
1172 to get the correct result: AO.address is 0 in this case, so the macro
1173 correctly reports that P is not controlled by obmalloc (despite that P lies in
1174 slice AO.address : AO.address + ARENA_SIZE).
1175
1176 Note: The third (AO.address != 0) clause was added in Python 2.5. Before
1177 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1178 corresponded to a currently-allocated arena, so the "P is not controlled by
1179 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1180 was impossible.
1181
1182 Note that the logic is excruciating, and reading up possibly uninitialized
1183 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1184 creates problems for some memory debuggers. The overwhelming advantage is
1185 that this test determines whether an arbitrary address is controlled by
1186 obmalloc in a small constant time, independent of the number of arenas
1187 obmalloc controls. Since this test is needed at every entry point, it's
1188 extremely desirable that it be this fast.
1189 */
1190
1191 static bool ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
address_in_range(void * p,poolp pool)1192 address_in_range(void *p, poolp pool)
1193 {
1194 // Since address_in_range may be reading from memory which was not allocated
1195 // by Python, it is important that pool->arenaindex is read only once, as
1196 // another thread may be concurrently modifying the value without holding
1197 // the GIL. The following dance forces the compiler to read pool->arenaindex
1198 // only once.
1199 uint arenaindex = *((volatile uint *)&pool->arenaindex);
1200 return arenaindex < maxarenas &&
1201 (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1202 arenas[arenaindex].address != 0;
1203 }
1204
1205 /*==========================================================================*/
1206
1207 /* malloc. Note that nbytes==0 tries to return a non-NULL pointer, distinct
1208 * from all other currently live pointers. This may not be possible.
1209 */
1210
1211 /*
1212 * The basic blocks are ordered by decreasing execution frequency,
1213 * which minimizes the number of jumps in the most common cases,
1214 * improves branching prediction and instruction scheduling (small
1215 * block allocations typically result in a couple of instructions).
1216 * Unless the optimizer reorders everything, being too smart...
1217 */
1218
1219 static void *
_PyObject_Alloc(int use_calloc,void * ctx,size_t nelem,size_t elsize)1220 _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
1221 {
1222 size_t nbytes;
1223 block *bp;
1224 poolp pool;
1225 poolp next;
1226 uint size;
1227
1228 _Py_AllocatedBlocks++;
1229
1230 assert(nelem <= PY_SSIZE_T_MAX / elsize);
1231 nbytes = nelem * elsize;
1232
1233 #ifdef WITH_VALGRIND
1234 if (UNLIKELY(running_on_valgrind == -1))
1235 running_on_valgrind = RUNNING_ON_VALGRIND;
1236 if (UNLIKELY(running_on_valgrind))
1237 goto redirect;
1238 #endif
1239
1240 if (nelem == 0 || elsize == 0)
1241 goto redirect;
1242
1243 if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
1244 LOCK();
1245 /*
1246 * Most frequent paths first
1247 */
1248 size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1249 pool = usedpools[size + size];
1250 if (pool != pool->nextpool) {
1251 /*
1252 * There is a used pool for this size class.
1253 * Pick up the head block of its free list.
1254 */
1255 ++pool->ref.count;
1256 bp = pool->freeblock;
1257 assert(bp != NULL);
1258 if ((pool->freeblock = *(block **)bp) != NULL) {
1259 UNLOCK();
1260 if (use_calloc)
1261 memset(bp, 0, nbytes);
1262 return (void *)bp;
1263 }
1264 /*
1265 * Reached the end of the free list, try to extend it.
1266 */
1267 if (pool->nextoffset <= pool->maxnextoffset) {
1268 /* There is room for another block. */
1269 pool->freeblock = (block*)pool +
1270 pool->nextoffset;
1271 pool->nextoffset += INDEX2SIZE(size);
1272 *(block **)(pool->freeblock) = NULL;
1273 UNLOCK();
1274 if (use_calloc)
1275 memset(bp, 0, nbytes);
1276 return (void *)bp;
1277 }
1278 /* Pool is full, unlink from used pools. */
1279 next = pool->nextpool;
1280 pool = pool->prevpool;
1281 next->prevpool = pool;
1282 pool->nextpool = next;
1283 UNLOCK();
1284 if (use_calloc)
1285 memset(bp, 0, nbytes);
1286 return (void *)bp;
1287 }
1288
1289 /* There isn't a pool of the right size class immediately
1290 * available: use a free pool.
1291 */
1292 if (usable_arenas == NULL) {
1293 /* No arena has a free pool: allocate a new arena. */
1294 #ifdef WITH_MEMORY_LIMITS
1295 if (narenas_currently_allocated >= MAX_ARENAS) {
1296 UNLOCK();
1297 goto redirect;
1298 }
1299 #endif
1300 usable_arenas = new_arena();
1301 if (usable_arenas == NULL) {
1302 UNLOCK();
1303 goto redirect;
1304 }
1305 usable_arenas->nextarena =
1306 usable_arenas->prevarena = NULL;
1307 }
1308 assert(usable_arenas->address != 0);
1309
1310 /* Try to get a cached free pool. */
1311 pool = usable_arenas->freepools;
1312 if (pool != NULL) {
1313 /* Unlink from cached pools. */
1314 usable_arenas->freepools = pool->nextpool;
1315
1316 /* This arena already had the smallest nfreepools
1317 * value, so decreasing nfreepools doesn't change
1318 * that, and we don't need to rearrange the
1319 * usable_arenas list. However, if the arena has
1320 * become wholly allocated, we need to remove its
1321 * arena_object from usable_arenas.
1322 */
1323 --usable_arenas->nfreepools;
1324 if (usable_arenas->nfreepools == 0) {
1325 /* Wholly allocated: remove. */
1326 assert(usable_arenas->freepools == NULL);
1327 assert(usable_arenas->nextarena == NULL ||
1328 usable_arenas->nextarena->prevarena ==
1329 usable_arenas);
1330
1331 usable_arenas = usable_arenas->nextarena;
1332 if (usable_arenas != NULL) {
1333 usable_arenas->prevarena = NULL;
1334 assert(usable_arenas->address != 0);
1335 }
1336 }
1337 else {
1338 /* nfreepools > 0: it must be that freepools
1339 * isn't NULL, or that we haven't yet carved
1340 * off all the arena's pools for the first
1341 * time.
1342 */
1343 assert(usable_arenas->freepools != NULL ||
1344 usable_arenas->pool_address <=
1345 (block*)usable_arenas->address +
1346 ARENA_SIZE - POOL_SIZE);
1347 }
1348 init_pool:
1349 /* Frontlink to used pools. */
1350 next = usedpools[size + size]; /* == prev */
1351 pool->nextpool = next;
1352 pool->prevpool = next;
1353 next->nextpool = pool;
1354 next->prevpool = pool;
1355 pool->ref.count = 1;
1356 if (pool->szidx == size) {
1357 /* Luckily, this pool last contained blocks
1358 * of the same size class, so its header
1359 * and free list are already initialized.
1360 */
1361 bp = pool->freeblock;
1362 assert(bp != NULL);
1363 pool->freeblock = *(block **)bp;
1364 UNLOCK();
1365 if (use_calloc)
1366 memset(bp, 0, nbytes);
1367 return (void *)bp;
1368 }
1369 /*
1370 * Initialize the pool header, set up the free list to
1371 * contain just the second block, and return the first
1372 * block.
1373 */
1374 pool->szidx = size;
1375 size = INDEX2SIZE(size);
1376 bp = (block *)pool + POOL_OVERHEAD;
1377 pool->nextoffset = POOL_OVERHEAD + (size << 1);
1378 pool->maxnextoffset = POOL_SIZE - size;
1379 pool->freeblock = bp + size;
1380 *(block **)(pool->freeblock) = NULL;
1381 UNLOCK();
1382 if (use_calloc)
1383 memset(bp, 0, nbytes);
1384 return (void *)bp;
1385 }
1386
1387 /* Carve off a new pool. */
1388 assert(usable_arenas->nfreepools > 0);
1389 assert(usable_arenas->freepools == NULL);
1390 pool = (poolp)usable_arenas->pool_address;
1391 assert((block*)pool <= (block*)usable_arenas->address +
1392 ARENA_SIZE - POOL_SIZE);
1393 pool->arenaindex = (uint)(usable_arenas - arenas);
1394 assert(&arenas[pool->arenaindex] == usable_arenas);
1395 pool->szidx = DUMMY_SIZE_IDX;
1396 usable_arenas->pool_address += POOL_SIZE;
1397 --usable_arenas->nfreepools;
1398
1399 if (usable_arenas->nfreepools == 0) {
1400 assert(usable_arenas->nextarena == NULL ||
1401 usable_arenas->nextarena->prevarena ==
1402 usable_arenas);
1403 /* Unlink the arena: it is completely allocated. */
1404 usable_arenas = usable_arenas->nextarena;
1405 if (usable_arenas != NULL) {
1406 usable_arenas->prevarena = NULL;
1407 assert(usable_arenas->address != 0);
1408 }
1409 }
1410
1411 goto init_pool;
1412 }
1413
1414 /* The small block allocator ends here. */
1415
1416 redirect:
1417 /* Redirect the original request to the underlying (libc) allocator.
1418 * We jump here on bigger requests, on error in the code above (as a
1419 * last chance to serve the request) or when the max memory limit
1420 * has been reached.
1421 */
1422 {
1423 void *result;
1424 if (use_calloc)
1425 result = PyMem_RawCalloc(nelem, elsize);
1426 else
1427 result = PyMem_RawMalloc(nbytes);
1428 if (!result)
1429 _Py_AllocatedBlocks--;
1430 return result;
1431 }
1432 }
1433
1434 static void *
_PyObject_Malloc(void * ctx,size_t nbytes)1435 _PyObject_Malloc(void *ctx, size_t nbytes)
1436 {
1437 return _PyObject_Alloc(0, ctx, 1, nbytes);
1438 }
1439
1440 static void *
_PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)1441 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
1442 {
1443 return _PyObject_Alloc(1, ctx, nelem, elsize);
1444 }
1445
1446 /* free */
1447
1448 static void
_PyObject_Free(void * ctx,void * p)1449 _PyObject_Free(void *ctx, void *p)
1450 {
1451 poolp pool;
1452 block *lastfree;
1453 poolp next, prev;
1454 uint size;
1455
1456 if (p == NULL) /* free(NULL) has no effect */
1457 return;
1458
1459 _Py_AllocatedBlocks--;
1460
1461 #ifdef WITH_VALGRIND
1462 if (UNLIKELY(running_on_valgrind > 0))
1463 goto redirect;
1464 #endif
1465
1466 pool = POOL_ADDR(p);
1467 if (address_in_range(p, pool)) {
1468 /* We allocated this address. */
1469 LOCK();
1470 /* Link p to the start of the pool's freeblock list. Since
1471 * the pool had at least the p block outstanding, the pool
1472 * wasn't empty (so it's already in a usedpools[] list, or
1473 * was full and is in no list -- it's not in the freeblocks
1474 * list in any case).
1475 */
1476 assert(pool->ref.count > 0); /* else it was empty */
1477 *(block **)p = lastfree = pool->freeblock;
1478 pool->freeblock = (block *)p;
1479 if (lastfree) {
1480 struct arena_object* ao;
1481 uint nf; /* ao->nfreepools */
1482
1483 /* freeblock wasn't NULL, so the pool wasn't full,
1484 * and the pool is in a usedpools[] list.
1485 */
1486 if (--pool->ref.count != 0) {
1487 /* pool isn't empty: leave it in usedpools */
1488 UNLOCK();
1489 return;
1490 }
1491 /* Pool is now empty: unlink from usedpools, and
1492 * link to the front of freepools. This ensures that
1493 * previously freed pools will be allocated later
1494 * (being not referenced, they are perhaps paged out).
1495 */
1496 next = pool->nextpool;
1497 prev = pool->prevpool;
1498 next->prevpool = prev;
1499 prev->nextpool = next;
1500
1501 /* Link the pool to freepools. This is a singly-linked
1502 * list, and pool->prevpool isn't used there.
1503 */
1504 ao = &arenas[pool->arenaindex];
1505 pool->nextpool = ao->freepools;
1506 ao->freepools = pool;
1507 nf = ++ao->nfreepools;
1508
1509 /* All the rest is arena management. We just freed
1510 * a pool, and there are 4 cases for arena mgmt:
1511 * 1. If all the pools are free, return the arena to
1512 * the system free().
1513 * 2. If this is the only free pool in the arena,
1514 * add the arena back to the `usable_arenas` list.
1515 * 3. If the "next" arena has a smaller count of free
1516 * pools, we have to "slide this arena right" to
1517 * restore that usable_arenas is sorted in order of
1518 * nfreepools.
1519 * 4. Else there's nothing more to do.
1520 */
1521 if (nf == ao->ntotalpools) {
1522 /* Case 1. First unlink ao from usable_arenas.
1523 */
1524 assert(ao->prevarena == NULL ||
1525 ao->prevarena->address != 0);
1526 assert(ao ->nextarena == NULL ||
1527 ao->nextarena->address != 0);
1528
1529 /* Fix the pointer in the prevarena, or the
1530 * usable_arenas pointer.
1531 */
1532 if (ao->prevarena == NULL) {
1533 usable_arenas = ao->nextarena;
1534 assert(usable_arenas == NULL ||
1535 usable_arenas->address != 0);
1536 }
1537 else {
1538 assert(ao->prevarena->nextarena == ao);
1539 ao->prevarena->nextarena =
1540 ao->nextarena;
1541 }
1542 /* Fix the pointer in the nextarena. */
1543 if (ao->nextarena != NULL) {
1544 assert(ao->nextarena->prevarena == ao);
1545 ao->nextarena->prevarena =
1546 ao->prevarena;
1547 }
1548 /* Record that this arena_object slot is
1549 * available to be reused.
1550 */
1551 ao->nextarena = unused_arena_objects;
1552 unused_arena_objects = ao;
1553
1554 /* Free the entire arena. */
1555 _PyObject_Arena.free(_PyObject_Arena.ctx,
1556 (void *)ao->address, ARENA_SIZE);
1557 ao->address = 0; /* mark unassociated */
1558 --narenas_currently_allocated;
1559
1560 UNLOCK();
1561 return;
1562 }
1563 if (nf == 1) {
1564 /* Case 2. Put ao at the head of
1565 * usable_arenas. Note that because
1566 * ao->nfreepools was 0 before, ao isn't
1567 * currently on the usable_arenas list.
1568 */
1569 ao->nextarena = usable_arenas;
1570 ao->prevarena = NULL;
1571 if (usable_arenas)
1572 usable_arenas->prevarena = ao;
1573 usable_arenas = ao;
1574 assert(usable_arenas->address != 0);
1575
1576 UNLOCK();
1577 return;
1578 }
1579 /* If this arena is now out of order, we need to keep
1580 * the list sorted. The list is kept sorted so that
1581 * the "most full" arenas are used first, which allows
1582 * the nearly empty arenas to be completely freed. In
1583 * a few un-scientific tests, it seems like this
1584 * approach allowed a lot more memory to be freed.
1585 */
1586 if (ao->nextarena == NULL ||
1587 nf <= ao->nextarena->nfreepools) {
1588 /* Case 4. Nothing to do. */
1589 UNLOCK();
1590 return;
1591 }
1592 /* Case 3: We have to move the arena towards the end
1593 * of the list, because it has more free pools than
1594 * the arena to its right.
1595 * First unlink ao from usable_arenas.
1596 */
1597 if (ao->prevarena != NULL) {
1598 /* ao isn't at the head of the list */
1599 assert(ao->prevarena->nextarena == ao);
1600 ao->prevarena->nextarena = ao->nextarena;
1601 }
1602 else {
1603 /* ao is at the head of the list */
1604 assert(usable_arenas == ao);
1605 usable_arenas = ao->nextarena;
1606 }
1607 ao->nextarena->prevarena = ao->prevarena;
1608
1609 /* Locate the new insertion point by iterating over
1610 * the list, using our nextarena pointer.
1611 */
1612 while (ao->nextarena != NULL &&
1613 nf > ao->nextarena->nfreepools) {
1614 ao->prevarena = ao->nextarena;
1615 ao->nextarena = ao->nextarena->nextarena;
1616 }
1617
1618 /* Insert ao at this point. */
1619 assert(ao->nextarena == NULL ||
1620 ao->prevarena == ao->nextarena->prevarena);
1621 assert(ao->prevarena->nextarena == ao->nextarena);
1622
1623 ao->prevarena->nextarena = ao;
1624 if (ao->nextarena != NULL)
1625 ao->nextarena->prevarena = ao;
1626
1627 /* Verify that the swaps worked. */
1628 assert(ao->nextarena == NULL ||
1629 nf <= ao->nextarena->nfreepools);
1630 assert(ao->prevarena == NULL ||
1631 nf > ao->prevarena->nfreepools);
1632 assert(ao->nextarena == NULL ||
1633 ao->nextarena->prevarena == ao);
1634 assert((usable_arenas == ao &&
1635 ao->prevarena == NULL) ||
1636 ao->prevarena->nextarena == ao);
1637
1638 UNLOCK();
1639 return;
1640 }
1641 /* Pool was full, so doesn't currently live in any list:
1642 * link it to the front of the appropriate usedpools[] list.
1643 * This mimics LRU pool usage for new allocations and
1644 * targets optimal filling when several pools contain
1645 * blocks of the same size class.
1646 */
1647 --pool->ref.count;
1648 assert(pool->ref.count > 0); /* else the pool is empty */
1649 size = pool->szidx;
1650 next = usedpools[size + size];
1651 prev = next->prevpool;
1652 /* insert pool before next: prev <-> pool <-> next */
1653 pool->nextpool = next;
1654 pool->prevpool = prev;
1655 next->prevpool = pool;
1656 prev->nextpool = pool;
1657 UNLOCK();
1658 return;
1659 }
1660
1661 #ifdef WITH_VALGRIND
1662 redirect:
1663 #endif
1664 /* We didn't allocate this address. */
1665 PyMem_RawFree(p);
1666 }
1667
1668 /* realloc. If p is NULL, this acts like malloc(nbytes). Else if nbytes==0,
1669 * then as the Python docs promise, we do not treat this like free(p), and
1670 * return a non-NULL result.
1671 */
1672
1673 static void *
_PyObject_Realloc(void * ctx,void * p,size_t nbytes)1674 _PyObject_Realloc(void *ctx, void *p, size_t nbytes)
1675 {
1676 void *bp;
1677 poolp pool;
1678 size_t size;
1679
1680 if (p == NULL)
1681 return _PyObject_Alloc(0, ctx, 1, nbytes);
1682
1683 #ifdef WITH_VALGRIND
1684 /* Treat running_on_valgrind == -1 the same as 0 */
1685 if (UNLIKELY(running_on_valgrind > 0))
1686 goto redirect;
1687 #endif
1688
1689 pool = POOL_ADDR(p);
1690 if (address_in_range(p, pool)) {
1691 /* We're in charge of this block */
1692 size = INDEX2SIZE(pool->szidx);
1693 if (nbytes <= size) {
1694 /* The block is staying the same or shrinking. If
1695 * it's shrinking, there's a tradeoff: it costs
1696 * cycles to copy the block to a smaller size class,
1697 * but it wastes memory not to copy it. The
1698 * compromise here is to copy on shrink only if at
1699 * least 25% of size can be shaved off.
1700 */
1701 if (4 * nbytes > 3 * size) {
1702 /* It's the same,
1703 * or shrinking and new/old > 3/4.
1704 */
1705 return p;
1706 }
1707 size = nbytes;
1708 }
1709 bp = _PyObject_Alloc(0, ctx, 1, nbytes);
1710 if (bp != NULL) {
1711 memcpy(bp, p, size);
1712 _PyObject_Free(ctx, p);
1713 }
1714 return bp;
1715 }
1716 #ifdef WITH_VALGRIND
1717 redirect:
1718 #endif
1719 /* We're not managing this block. If nbytes <=
1720 * SMALL_REQUEST_THRESHOLD, it's tempting to try to take over this
1721 * block. However, if we do, we need to copy the valid data from
1722 * the C-managed block to one of our blocks, and there's no portable
1723 * way to know how much of the memory space starting at p is valid.
1724 * As bug 1185883 pointed out the hard way, it's possible that the
1725 * C-managed block is "at the end" of allocated VM space, so that
1726 * a memory fault can occur if we try to copy nbytes bytes starting
1727 * at p. Instead we punt: let C continue to manage this block.
1728 */
1729 if (nbytes)
1730 return PyMem_RawRealloc(p, nbytes);
1731 /* C doesn't define the result of realloc(p, 0) (it may or may not
1732 * return NULL then), but Python's docs promise that nbytes==0 never
1733 * returns NULL. We don't pass 0 to realloc(), to avoid that endcase
1734 * to begin with. Even then, we can't be sure that realloc() won't
1735 * return NULL.
1736 */
1737 bp = PyMem_RawRealloc(p, 1);
1738 return bp ? bp : p;
1739 }
1740
1741 #else /* ! WITH_PYMALLOC */
1742
1743 /*==========================================================================*/
1744 /* pymalloc not enabled: Redirect the entry points to malloc. These will
1745 * only be used by extensions that are compiled with pymalloc enabled. */
1746
1747 Py_ssize_t
_Py_GetAllocatedBlocks(void)1748 _Py_GetAllocatedBlocks(void)
1749 {
1750 return 0;
1751 }
1752
1753 #endif /* WITH_PYMALLOC */
1754
1755
1756 /*==========================================================================*/
1757 /* A x-platform debugging allocator. This doesn't manage memory directly,
1758 * it wraps a real allocator, adding extra debugging info to the memory blocks.
1759 */
1760
1761 /* Special bytes broadcast into debug memory blocks at appropriate times.
1762 * Strings of these are unlikely to be valid addresses, floats, ints or
1763 * 7-bit ASCII.
1764 */
1765 #undef CLEANBYTE
1766 #undef DEADBYTE
1767 #undef FORBIDDENBYTE
1768 #define CLEANBYTE 0xCB /* clean (newly allocated) memory */
1769 #define DEADBYTE 0xDB /* dead (newly freed) memory */
1770 #define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */
1771
1772 static size_t serialno = 0; /* incremented on each debug {m,re}alloc */
1773
1774 /* serialno is always incremented via calling this routine. The point is
1775 * to supply a single place to set a breakpoint.
1776 */
1777 static void
bumpserialno(void)1778 bumpserialno(void)
1779 {
1780 ++serialno;
1781 }
1782
1783 #define SST SIZEOF_SIZE_T
1784
1785 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
1786 static size_t
read_size_t(const void * p)1787 read_size_t(const void *p)
1788 {
1789 const uint8_t *q = (const uint8_t *)p;
1790 size_t result = *q++;
1791 int i;
1792
1793 for (i = SST; --i > 0; ++q)
1794 result = (result << 8) | *q;
1795 return result;
1796 }
1797
1798 /* Write n as a big-endian size_t, MSB at address p, LSB at
1799 * p + sizeof(size_t) - 1.
1800 */
1801 static void
write_size_t(void * p,size_t n)1802 write_size_t(void *p, size_t n)
1803 {
1804 uint8_t *q = (uint8_t *)p + SST - 1;
1805 int i;
1806
1807 for (i = SST; --i >= 0; --q) {
1808 *q = (uint8_t)(n & 0xff);
1809 n >>= 8;
1810 }
1811 }
1812
1813 /* Let S = sizeof(size_t). The debug malloc asks for 4*S extra bytes and
1814 fills them with useful stuff, here calling the underlying malloc's result p:
1815
1816 p[0: S]
1817 Number of bytes originally asked for. This is a size_t, big-endian (easier
1818 to read in a memory dump).
1819 p[S]
1820 API ID. See PEP 445. This is a character, but seems undocumented.
1821 p[S+1: 2*S]
1822 Copies of FORBIDDENBYTE. Used to catch under- writes and reads.
1823 p[2*S: 2*S+n]
1824 The requested memory, filled with copies of CLEANBYTE.
1825 Used to catch reference to uninitialized memory.
1826 &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc
1827 handled the request itself.
1828 p[2*S+n: 2*S+n+S]
1829 Copies of FORBIDDENBYTE. Used to catch over- writes and reads.
1830 p[2*S+n+S: 2*S+n+2*S]
1831 A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
1832 and _PyMem_DebugRealloc.
1833 This is a big-endian size_t.
1834 If "bad memory" is detected later, the serial number gives an
1835 excellent way to set a breakpoint on the next run, to capture the
1836 instant at which this block was passed out.
1837 */
1838
1839 static void *
_PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)1840 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
1841 {
1842 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
1843 uint8_t *p; /* base address of malloc'ed block */
1844 uint8_t *tail; /* p + 2*SST + nbytes == pointer to tail pad bytes */
1845 size_t total; /* nbytes + 4*SST */
1846
1847 bumpserialno();
1848 total = nbytes + 4*SST;
1849 if (nbytes > PY_SSIZE_T_MAX - 4*SST)
1850 /* overflow: can't represent total as a Py_ssize_t */
1851 return NULL;
1852
1853 if (use_calloc)
1854 p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
1855 else
1856 p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
1857 if (p == NULL)
1858 return NULL;
1859
1860 /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
1861 write_size_t(p, nbytes);
1862 p[SST] = (uint8_t)api->api_id;
1863 memset(p + SST + 1, FORBIDDENBYTE, SST-1);
1864
1865 if (nbytes > 0 && !use_calloc)
1866 memset(p + 2*SST, CLEANBYTE, nbytes);
1867
1868 /* at tail, write pad (SST bytes) and serialno (SST bytes) */
1869 tail = p + 2*SST + nbytes;
1870 memset(tail, FORBIDDENBYTE, SST);
1871 write_size_t(tail + SST, serialno);
1872
1873 return p + 2*SST;
1874 }
1875
1876 static void *
_PyMem_DebugRawMalloc(void * ctx,size_t nbytes)1877 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
1878 {
1879 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
1880 }
1881
1882 static void *
_PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)1883 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
1884 {
1885 size_t nbytes;
1886 assert(elsize == 0 || nelem <= PY_SSIZE_T_MAX / elsize);
1887 nbytes = nelem * elsize;
1888 return _PyMem_DebugRawAlloc(1, ctx, nbytes);
1889 }
1890
1891 /* The debug free first checks the 2*SST bytes on each end for sanity (in
1892 particular, that the FORBIDDENBYTEs with the api ID are still intact).
1893 Then fills the original bytes with DEADBYTE.
1894 Then calls the underlying free.
1895 */
1896 static void
_PyMem_DebugRawFree(void * ctx,void * p)1897 _PyMem_DebugRawFree(void *ctx, void *p)
1898 {
1899 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
1900 uint8_t *q = (uint8_t *)p - 2*SST; /* address returned from malloc */
1901 size_t nbytes;
1902
1903 if (p == NULL)
1904 return;
1905 _PyMem_DebugCheckAddress(api->api_id, p);
1906 nbytes = read_size_t(q);
1907 nbytes += 4*SST;
1908 if (nbytes > 0)
1909 memset(q, DEADBYTE, nbytes);
1910 api->alloc.free(api->alloc.ctx, q);
1911 }
1912
1913 static void *
_PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)1914 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
1915 {
1916 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
1917 uint8_t *q = (uint8_t *)p, *oldq;
1918 uint8_t *tail;
1919 size_t total; /* nbytes + 4*SST */
1920 size_t original_nbytes;
1921 int i;
1922
1923 if (p == NULL)
1924 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
1925
1926 _PyMem_DebugCheckAddress(api->api_id, p);
1927 bumpserialno();
1928 original_nbytes = read_size_t(q - 2*SST);
1929 total = nbytes + 4*SST;
1930 if (nbytes > PY_SSIZE_T_MAX - 4*SST)
1931 /* overflow: can't represent total as a Py_ssize_t */
1932 return NULL;
1933
1934 /* Resize and add decorations. We may get a new pointer here, in which
1935 * case we didn't get the chance to mark the old memory with DEADBYTE,
1936 * but we live with that.
1937 */
1938 oldq = q;
1939 q = (uint8_t *)api->alloc.realloc(api->alloc.ctx, q - 2*SST, total);
1940 if (q == NULL)
1941 return NULL;
1942
1943 if (q == oldq && nbytes < original_nbytes) {
1944 /* shrinking: mark old extra memory dead */
1945 memset(q + nbytes, DEADBYTE, original_nbytes - nbytes);
1946 }
1947
1948 write_size_t(q, nbytes);
1949 assert(q[SST] == (uint8_t)api->api_id);
1950 for (i = 1; i < SST; ++i)
1951 assert(q[SST + i] == FORBIDDENBYTE);
1952 q += 2*SST;
1953
1954 tail = q + nbytes;
1955 memset(tail, FORBIDDENBYTE, SST);
1956 write_size_t(tail + SST, serialno);
1957
1958 if (nbytes > original_nbytes) {
1959 /* growing: mark new extra memory clean */
1960 memset(q + original_nbytes, CLEANBYTE,
1961 nbytes - original_nbytes);
1962 }
1963
1964 return q;
1965 }
1966
1967 static void
_PyMem_DebugCheckGIL(void)1968 _PyMem_DebugCheckGIL(void)
1969 {
1970 #ifdef WITH_THREAD
1971 if (!PyGILState_Check())
1972 Py_FatalError("Python memory allocator called "
1973 "without holding the GIL");
1974 #endif
1975 }
1976
1977 static void *
_PyMem_DebugMalloc(void * ctx,size_t nbytes)1978 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
1979 {
1980 _PyMem_DebugCheckGIL();
1981 return _PyMem_DebugRawMalloc(ctx, nbytes);
1982 }
1983
1984 static void *
_PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)1985 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
1986 {
1987 _PyMem_DebugCheckGIL();
1988 return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
1989 }
1990
1991 static void
_PyMem_DebugFree(void * ctx,void * ptr)1992 _PyMem_DebugFree(void *ctx, void *ptr)
1993 {
1994 _PyMem_DebugCheckGIL();
1995 _PyMem_DebugRawFree(ctx, ptr);
1996 }
1997
1998 static void *
_PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)1999 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2000 {
2001 _PyMem_DebugCheckGIL();
2002 return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2003 }
2004
2005 /* Check the forbidden bytes on both ends of the memory allocated for p.
2006 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2007 * and call Py_FatalError to kill the program.
2008 * The API id, is also checked.
2009 */
2010 static void
_PyMem_DebugCheckAddress(char api,const void * p)2011 _PyMem_DebugCheckAddress(char api, const void *p)
2012 {
2013 const uint8_t *q = (const uint8_t *)p;
2014 char msgbuf[64];
2015 char *msg;
2016 size_t nbytes;
2017 const uint8_t *tail;
2018 int i;
2019 char id;
2020
2021 if (p == NULL) {
2022 msg = "didn't expect a NULL pointer";
2023 goto error;
2024 }
2025
2026 /* Check the API id */
2027 id = (char)q[-SST];
2028 if (id != api) {
2029 msg = msgbuf;
2030 snprintf(msg, sizeof(msgbuf), "bad ID: Allocated using API '%c', verified using API '%c'", id, api);
2031 msgbuf[sizeof(msgbuf)-1] = 0;
2032 goto error;
2033 }
2034
2035 /* Check the stuff at the start of p first: if there's underwrite
2036 * corruption, the number-of-bytes field may be nuts, and checking
2037 * the tail could lead to a segfault then.
2038 */
2039 for (i = SST-1; i >= 1; --i) {
2040 if (*(q-i) != FORBIDDENBYTE) {
2041 msg = "bad leading pad byte";
2042 goto error;
2043 }
2044 }
2045
2046 nbytes = read_size_t(q - 2*SST);
2047 tail = q + nbytes;
2048 for (i = 0; i < SST; ++i) {
2049 if (tail[i] != FORBIDDENBYTE) {
2050 msg = "bad trailing pad byte";
2051 goto error;
2052 }
2053 }
2054
2055 return;
2056
2057 error:
2058 _PyObject_DebugDumpAddress(p);
2059 Py_FatalError(msg);
2060 }
2061
2062 /* Display info to stderr about the memory block at p. */
2063 static void
_PyObject_DebugDumpAddress(const void * p)2064 _PyObject_DebugDumpAddress(const void *p)
2065 {
2066 const uint8_t *q = (const uint8_t *)p;
2067 const uint8_t *tail;
2068 size_t nbytes, serial;
2069 int i;
2070 int ok;
2071 char id;
2072
2073 fprintf(stderr, "Debug memory block at address p=%p:", p);
2074 if (p == NULL) {
2075 fprintf(stderr, "\n");
2076 return;
2077 }
2078 id = (char)q[-SST];
2079 fprintf(stderr, " API '%c'\n", id);
2080
2081 nbytes = read_size_t(q - 2*SST);
2082 fprintf(stderr, " %" PY_FORMAT_SIZE_T "u bytes originally "
2083 "requested\n", nbytes);
2084
2085 /* In case this is nuts, check the leading pad bytes first. */
2086 fprintf(stderr, " The %d pad bytes at p-%d are ", SST-1, SST-1);
2087 ok = 1;
2088 for (i = 1; i <= SST-1; ++i) {
2089 if (*(q-i) != FORBIDDENBYTE) {
2090 ok = 0;
2091 break;
2092 }
2093 }
2094 if (ok)
2095 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2096 else {
2097 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2098 FORBIDDENBYTE);
2099 for (i = SST-1; i >= 1; --i) {
2100 const uint8_t byte = *(q-i);
2101 fprintf(stderr, " at p-%d: 0x%02x", i, byte);
2102 if (byte != FORBIDDENBYTE)
2103 fputs(" *** OUCH", stderr);
2104 fputc('\n', stderr);
2105 }
2106
2107 fputs(" Because memory is corrupted at the start, the "
2108 "count of bytes requested\n"
2109 " may be bogus, and checking the trailing pad "
2110 "bytes may segfault.\n", stderr);
2111 }
2112
2113 tail = q + nbytes;
2114 fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, tail);
2115 ok = 1;
2116 for (i = 0; i < SST; ++i) {
2117 if (tail[i] != FORBIDDENBYTE) {
2118 ok = 0;
2119 break;
2120 }
2121 }
2122 if (ok)
2123 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2124 else {
2125 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2126 FORBIDDENBYTE);
2127 for (i = 0; i < SST; ++i) {
2128 const uint8_t byte = tail[i];
2129 fprintf(stderr, " at tail+%d: 0x%02x",
2130 i, byte);
2131 if (byte != FORBIDDENBYTE)
2132 fputs(" *** OUCH", stderr);
2133 fputc('\n', stderr);
2134 }
2135 }
2136
2137 serial = read_size_t(tail + SST);
2138 fprintf(stderr, " The block was made by call #%" PY_FORMAT_SIZE_T
2139 "u to debug malloc/realloc.\n", serial);
2140
2141 if (nbytes > 0) {
2142 i = 0;
2143 fputs(" Data at p:", stderr);
2144 /* print up to 8 bytes at the start */
2145 while (q < tail && i < 8) {
2146 fprintf(stderr, " %02x", *q);
2147 ++i;
2148 ++q;
2149 }
2150 /* and up to 8 at the end */
2151 if (q < tail) {
2152 if (tail - q > 8) {
2153 fputs(" ...", stderr);
2154 q = tail - 8;
2155 }
2156 while (q < tail) {
2157 fprintf(stderr, " %02x", *q);
2158 ++q;
2159 }
2160 }
2161 fputc('\n', stderr);
2162 }
2163 fputc('\n', stderr);
2164
2165 fflush(stderr);
2166 _PyMem_DumpTraceback(fileno(stderr), p);
2167 }
2168
2169
2170 static size_t
printone(FILE * out,const char * msg,size_t value)2171 printone(FILE *out, const char* msg, size_t value)
2172 {
2173 int i, k;
2174 char buf[100];
2175 size_t origvalue = value;
2176
2177 fputs(msg, out);
2178 for (i = (int)strlen(msg); i < 35; ++i)
2179 fputc(' ', out);
2180 fputc('=', out);
2181
2182 /* Write the value with commas. */
2183 i = 22;
2184 buf[i--] = '\0';
2185 buf[i--] = '\n';
2186 k = 3;
2187 do {
2188 size_t nextvalue = value / 10;
2189 unsigned int digit = (unsigned int)(value - nextvalue * 10);
2190 value = nextvalue;
2191 buf[i--] = (char)(digit + '0');
2192 --k;
2193 if (k == 0 && value && i >= 0) {
2194 k = 3;
2195 buf[i--] = ',';
2196 }
2197 } while (value && i >= 0);
2198
2199 while (i >= 0)
2200 buf[i--] = ' ';
2201 fputs(buf, out);
2202
2203 return origvalue;
2204 }
2205
2206 void
_PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)2207 _PyDebugAllocatorStats(FILE *out,
2208 const char *block_name, int num_blocks, size_t sizeof_block)
2209 {
2210 char buf1[128];
2211 char buf2[128];
2212 PyOS_snprintf(buf1, sizeof(buf1),
2213 "%d %ss * %" PY_FORMAT_SIZE_T "d bytes each",
2214 num_blocks, block_name, sizeof_block);
2215 PyOS_snprintf(buf2, sizeof(buf2),
2216 "%48s ", buf1);
2217 (void)printone(out, buf2, num_blocks * sizeof_block);
2218 }
2219
2220
2221 #ifdef WITH_PYMALLOC
2222
2223 #ifdef Py_DEBUG
2224 /* Is target in the list? The list is traversed via the nextpool pointers.
2225 * The list may be NULL-terminated, or circular. Return 1 if target is in
2226 * list, else 0.
2227 */
2228 static int
pool_is_in_list(const poolp target,poolp list)2229 pool_is_in_list(const poolp target, poolp list)
2230 {
2231 poolp origlist = list;
2232 assert(target != NULL);
2233 if (list == NULL)
2234 return 0;
2235 do {
2236 if (target == list)
2237 return 1;
2238 list = list->nextpool;
2239 } while (list != NULL && list != origlist);
2240 return 0;
2241 }
2242 #endif
2243
2244 /* Print summary info to "out" about the state of pymalloc's structures.
2245 * In Py_DEBUG mode, also perform some expensive internal consistency
2246 * checks.
2247 */
2248 void
_PyObject_DebugMallocStats(FILE * out)2249 _PyObject_DebugMallocStats(FILE *out)
2250 {
2251 uint i;
2252 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2253 /* # of pools, allocated blocks, and free blocks per class index */
2254 size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2255 size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2256 size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2257 /* total # of allocated bytes in used and full pools */
2258 size_t allocated_bytes = 0;
2259 /* total # of available bytes in used pools */
2260 size_t available_bytes = 0;
2261 /* # of free pools + pools not yet carved out of current arena */
2262 uint numfreepools = 0;
2263 /* # of bytes for arena alignment padding */
2264 size_t arena_alignment = 0;
2265 /* # of bytes in used and full pools used for pool_headers */
2266 size_t pool_header_bytes = 0;
2267 /* # of bytes in used and full pools wasted due to quantization,
2268 * i.e. the necessarily leftover space at the ends of used and
2269 * full pools.
2270 */
2271 size_t quantization = 0;
2272 /* # of arenas actually allocated. */
2273 size_t narenas = 0;
2274 /* running total -- should equal narenas * ARENA_SIZE */
2275 size_t total;
2276 char buf[128];
2277
2278 fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2279 SMALL_REQUEST_THRESHOLD, numclasses);
2280
2281 for (i = 0; i < numclasses; ++i)
2282 numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2283
2284 /* Because full pools aren't linked to from anything, it's easiest
2285 * to march over all the arenas. If we're lucky, most of the memory
2286 * will be living in full pools -- would be a shame to miss them.
2287 */
2288 for (i = 0; i < maxarenas; ++i) {
2289 uint j;
2290 uintptr_t base = arenas[i].address;
2291
2292 /* Skip arenas which are not allocated. */
2293 if (arenas[i].address == (uintptr_t)NULL)
2294 continue;
2295 narenas += 1;
2296
2297 numfreepools += arenas[i].nfreepools;
2298
2299 /* round up to pool alignment */
2300 if (base & (uintptr_t)POOL_SIZE_MASK) {
2301 arena_alignment += POOL_SIZE;
2302 base &= ~(uintptr_t)POOL_SIZE_MASK;
2303 base += POOL_SIZE;
2304 }
2305
2306 /* visit every pool in the arena */
2307 assert(base <= (uintptr_t) arenas[i].pool_address);
2308 for (j = 0; base < (uintptr_t) arenas[i].pool_address;
2309 ++j, base += POOL_SIZE) {
2310 poolp p = (poolp)base;
2311 const uint sz = p->szidx;
2312 uint freeblocks;
2313
2314 if (p->ref.count == 0) {
2315 /* currently unused */
2316 #ifdef Py_DEBUG
2317 assert(pool_is_in_list(p, arenas[i].freepools));
2318 #endif
2319 continue;
2320 }
2321 ++numpools[sz];
2322 numblocks[sz] += p->ref.count;
2323 freeblocks = NUMBLOCKS(sz) - p->ref.count;
2324 numfreeblocks[sz] += freeblocks;
2325 #ifdef Py_DEBUG
2326 if (freeblocks > 0)
2327 assert(pool_is_in_list(p, usedpools[sz + sz]));
2328 #endif
2329 }
2330 }
2331 assert(narenas == narenas_currently_allocated);
2332
2333 fputc('\n', out);
2334 fputs("class size num pools blocks in use avail blocks\n"
2335 "----- ---- --------- ------------- ------------\n",
2336 out);
2337
2338 for (i = 0; i < numclasses; ++i) {
2339 size_t p = numpools[i];
2340 size_t b = numblocks[i];
2341 size_t f = numfreeblocks[i];
2342 uint size = INDEX2SIZE(i);
2343 if (p == 0) {
2344 assert(b == 0 && f == 0);
2345 continue;
2346 }
2347 fprintf(out, "%5u %6u "
2348 "%11" PY_FORMAT_SIZE_T "u "
2349 "%15" PY_FORMAT_SIZE_T "u "
2350 "%13" PY_FORMAT_SIZE_T "u\n",
2351 i, size, p, b, f);
2352 allocated_bytes += b * size;
2353 available_bytes += f * size;
2354 pool_header_bytes += p * POOL_OVERHEAD;
2355 quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
2356 }
2357 fputc('\n', out);
2358 if (_PyMem_DebugEnabled())
2359 (void)printone(out, "# times object malloc called", serialno);
2360 (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
2361 (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
2362 (void)printone(out, "# arenas highwater mark", narenas_highwater);
2363 (void)printone(out, "# arenas allocated current", narenas);
2364
2365 PyOS_snprintf(buf, sizeof(buf),
2366 "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
2367 narenas, ARENA_SIZE);
2368 (void)printone(out, buf, narenas * ARENA_SIZE);
2369
2370 fputc('\n', out);
2371
2372 total = printone(out, "# bytes in allocated blocks", allocated_bytes);
2373 total += printone(out, "# bytes in available blocks", available_bytes);
2374
2375 PyOS_snprintf(buf, sizeof(buf),
2376 "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
2377 total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
2378
2379 total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
2380 total += printone(out, "# bytes lost to quantization", quantization);
2381 total += printone(out, "# bytes lost to arena alignment", arena_alignment);
2382 (void)printone(out, "Total", total);
2383 }
2384
2385 #endif /* #ifdef WITH_PYMALLOC */
2386