1 #include "Python.h"
2 
3 #include <stdbool.h>
4 
5 
6 /* Defined in tracemalloc.c */
7 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
8 
9 
10 /* Python's malloc wrappers (see pymem.h) */
11 
12 #undef  uint
13 #define uint    unsigned int    /* assuming >= 16 bits */
14 
15 /* Forward declaration */
16 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
17 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
18 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
19 static void _PyMem_DebugRawFree(void *ctx, void *ptr);
20 
21 static void* _PyMem_DebugMalloc(void *ctx, size_t size);
22 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
23 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
24 static void _PyMem_DebugFree(void *ctx, void *p);
25 
26 static void _PyObject_DebugDumpAddress(const void *p);
27 static void _PyMem_DebugCheckAddress(char api_id, const void *p);
28 
29 static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain);
30 
31 #if defined(__has_feature)  /* Clang */
32 #  if __has_feature(address_sanitizer) /* is ASAN enabled? */
33 #    define _Py_NO_ADDRESS_SAFETY_ANALYSIS \
34         __attribute__((no_address_safety_analysis))
35 #  endif
36 #  if __has_feature(thread_sanitizer)  /* is TSAN enabled? */
37 #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
38 #  endif
39 #  if __has_feature(memory_sanitizer)  /* is MSAN enabled? */
40 #    define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
41 #  endif
42 #elif defined(__GNUC__)
43 #  if defined(__SANITIZE_ADDRESS__)    /* GCC 4.8+, is ASAN enabled? */
44 #    define _Py_NO_ADDRESS_SAFETY_ANALYSIS \
45         __attribute__((no_address_safety_analysis))
46 #  endif
47    // TSAN is supported since GCC 4.8, but __SANITIZE_THREAD__ macro
48    // is provided only since GCC 7.
49 #  if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
50 #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
51 #  endif
52 #endif
53 
54 #ifndef _Py_NO_ADDRESS_SAFETY_ANALYSIS
55 #  define _Py_NO_ADDRESS_SAFETY_ANALYSIS
56 #endif
57 #ifndef _Py_NO_SANITIZE_THREAD
58 #  define _Py_NO_SANITIZE_THREAD
59 #endif
60 #ifndef _Py_NO_SANITIZE_MEMORY
61 #  define _Py_NO_SANITIZE_MEMORY
62 #endif
63 
64 #ifdef WITH_PYMALLOC
65 
66 #ifdef MS_WINDOWS
67 #  include <windows.h>
68 #elif defined(HAVE_MMAP)
69 #  include <sys/mman.h>
70 #  ifdef MAP_ANONYMOUS
71 #    define ARENAS_USE_MMAP
72 #  endif
73 #endif
74 
75 /* Forward declaration */
76 static void* _PyObject_Malloc(void *ctx, size_t size);
77 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
78 static void _PyObject_Free(void *ctx, void *p);
79 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
80 #endif
81 
82 
83 static void *
_PyMem_RawMalloc(void * ctx,size_t size)84 _PyMem_RawMalloc(void *ctx, size_t size)
85 {
86     /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
87        for malloc(0), which would be treated as an error. Some platforms would
88        return a pointer with no memory behind it, which would break pymalloc.
89        To solve these problems, allocate an extra byte. */
90     if (size == 0)
91         size = 1;
92     return malloc(size);
93 }
94 
95 static void *
_PyMem_RawCalloc(void * ctx,size_t nelem,size_t elsize)96 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
97 {
98     /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
99        for calloc(0, 0), which would be treated as an error. Some platforms
100        would return a pointer with no memory behind it, which would break
101        pymalloc.  To solve these problems, allocate an extra byte. */
102     if (nelem == 0 || elsize == 0) {
103         nelem = 1;
104         elsize = 1;
105     }
106     return calloc(nelem, elsize);
107 }
108 
109 static void *
_PyMem_RawRealloc(void * ctx,void * ptr,size_t size)110 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
111 {
112     if (size == 0)
113         size = 1;
114     return realloc(ptr, size);
115 }
116 
117 static void
_PyMem_RawFree(void * ctx,void * ptr)118 _PyMem_RawFree(void *ctx, void *ptr)
119 {
120     free(ptr);
121 }
122 
123 
124 #ifdef MS_WINDOWS
125 static void *
_PyObject_ArenaVirtualAlloc(void * ctx,size_t size)126 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
127 {
128     return VirtualAlloc(NULL, size,
129                         MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
130 }
131 
132 static void
_PyObject_ArenaVirtualFree(void * ctx,void * ptr,size_t size)133 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
134 {
135     VirtualFree(ptr, 0, MEM_RELEASE);
136 }
137 
138 #elif defined(ARENAS_USE_MMAP)
139 static void *
_PyObject_ArenaMmap(void * ctx,size_t size)140 _PyObject_ArenaMmap(void *ctx, size_t size)
141 {
142     void *ptr;
143     ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
144                MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
145     if (ptr == MAP_FAILED)
146         return NULL;
147     assert(ptr != NULL);
148     return ptr;
149 }
150 
151 static void
_PyObject_ArenaMunmap(void * ctx,void * ptr,size_t size)152 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
153 {
154     munmap(ptr, size);
155 }
156 
157 #else
158 static void *
_PyObject_ArenaMalloc(void * ctx,size_t size)159 _PyObject_ArenaMalloc(void *ctx, size_t size)
160 {
161     return malloc(size);
162 }
163 
164 static void
_PyObject_ArenaFree(void * ctx,void * ptr,size_t size)165 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
166 {
167     free(ptr);
168 }
169 #endif
170 
171 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
172 #ifdef WITH_PYMALLOC
173 #  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
174 #endif
175 
176 #define PYRAW_ALLOC MALLOC_ALLOC
177 #ifdef WITH_PYMALLOC
178 #  define PYOBJ_ALLOC PYMALLOC_ALLOC
179 #else
180 #  define PYOBJ_ALLOC MALLOC_ALLOC
181 #endif
182 #define PYMEM_ALLOC PYOBJ_ALLOC
183 
184 typedef struct {
185     /* We tag each block with an API ID in order to tag API violations */
186     char api_id;
187     PyMemAllocatorEx alloc;
188 } debug_alloc_api_t;
189 static struct {
190     debug_alloc_api_t raw;
191     debug_alloc_api_t mem;
192     debug_alloc_api_t obj;
193 } _PyMem_Debug = {
194     {'r', PYRAW_ALLOC},
195     {'m', PYMEM_ALLOC},
196     {'o', PYOBJ_ALLOC}
197     };
198 
199 #define PYDBGRAW_ALLOC \
200     {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
201 #define PYDBGMEM_ALLOC \
202     {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
203 #define PYDBGOBJ_ALLOC \
204     {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
205 
206 #ifdef Py_DEBUG
207 static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC;
208 static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC;
209 static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC;
210 #else
211 static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC;
212 static PyMemAllocatorEx _PyMem = PYMEM_ALLOC;
213 static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC;
214 #endif
215 
216 
217 static int
pymem_set_default_allocator(PyMemAllocatorDomain domain,int debug,PyMemAllocatorEx * old_alloc)218 pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug,
219                             PyMemAllocatorEx *old_alloc)
220 {
221     if (old_alloc != NULL) {
222         PyMem_GetAllocator(domain, old_alloc);
223     }
224 
225 
226     PyMemAllocatorEx new_alloc;
227     switch(domain)
228     {
229     case PYMEM_DOMAIN_RAW:
230         new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
231         break;
232     case PYMEM_DOMAIN_MEM:
233         new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
234         break;
235     case PYMEM_DOMAIN_OBJ:
236         new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
237         break;
238     default:
239         /* unknown domain */
240         return -1;
241     }
242     PyMem_SetAllocator(domain, &new_alloc);
243     if (debug) {
244         _PyMem_SetupDebugHooksDomain(domain);
245     }
246     return 0;
247 }
248 
249 
250 int
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * old_alloc)251 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
252                            PyMemAllocatorEx *old_alloc)
253 {
254 #ifdef Py_DEBUG
255     const int debug = 1;
256 #else
257     const int debug = 0;
258 #endif
259     return pymem_set_default_allocator(domain, debug, old_alloc);
260 }
261 
262 
263 int
_PyMem_SetupAllocators(const char * opt)264 _PyMem_SetupAllocators(const char *opt)
265 {
266     if (opt == NULL || *opt == '\0') {
267         /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
268            options): use default memory allocators */
269         opt = "default";
270     }
271 
272     if (strcmp(opt, "default") == 0) {
273         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
274         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL);
275         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL);
276     }
277     else if (strcmp(opt, "debug") == 0) {
278         (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL);
279         (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL);
280         (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL);
281     }
282 #ifdef WITH_PYMALLOC
283     else if (strcmp(opt, "pymalloc") == 0 || strcmp(opt, "pymalloc_debug") == 0) {
284         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
285         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
286 
287         PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
288         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc);
289         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc);
290 
291         if (strcmp(opt, "pymalloc_debug") == 0) {
292             PyMem_SetupDebugHooks();
293         }
294     }
295 #endif
296     else if (strcmp(opt, "malloc") == 0 || strcmp(opt, "malloc_debug") == 0) {
297         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
298         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
299         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc);
300         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc);
301 
302         if (strcmp(opt, "malloc_debug") == 0) {
303             PyMem_SetupDebugHooks();
304         }
305     }
306     else {
307         /* unknown allocator */
308         return -1;
309     }
310     return 0;
311 }
312 
313 
314 static int
pymemallocator_eq(PyMemAllocatorEx * a,PyMemAllocatorEx * b)315 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
316 {
317     return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
318 }
319 
320 
321 const char*
_PyMem_GetAllocatorsName(void)322 _PyMem_GetAllocatorsName(void)
323 {
324     PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
325 #ifdef WITH_PYMALLOC
326     PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
327 #endif
328 
329     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
330         pymemallocator_eq(&_PyMem, &malloc_alloc) &&
331         pymemallocator_eq(&_PyObject, &malloc_alloc))
332     {
333         return "malloc";
334     }
335 #ifdef WITH_PYMALLOC
336     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
337         pymemallocator_eq(&_PyMem, &pymalloc) &&
338         pymemallocator_eq(&_PyObject, &pymalloc))
339     {
340         return "pymalloc";
341     }
342 #endif
343 
344     PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
345     PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
346     PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
347 
348     if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
349         pymemallocator_eq(&_PyMem, &dbg_mem) &&
350         pymemallocator_eq(&_PyObject, &dbg_obj))
351     {
352         /* Debug hooks installed */
353         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
354             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
355             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
356         {
357             return "malloc_debug";
358         }
359 #ifdef WITH_PYMALLOC
360         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
361             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
362             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
363         {
364             return "pymalloc_debug";
365         }
366 #endif
367     }
368     return NULL;
369 }
370 
371 
372 #undef MALLOC_ALLOC
373 #undef PYMALLOC_ALLOC
374 #undef PYRAW_ALLOC
375 #undef PYMEM_ALLOC
376 #undef PYOBJ_ALLOC
377 #undef PYDBGRAW_ALLOC
378 #undef PYDBGMEM_ALLOC
379 #undef PYDBGOBJ_ALLOC
380 
381 
382 static PyObjectArenaAllocator _PyObject_Arena = {NULL,
383 #ifdef MS_WINDOWS
384     _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
385 #elif defined(ARENAS_USE_MMAP)
386     _PyObject_ArenaMmap, _PyObject_ArenaMunmap
387 #else
388     _PyObject_ArenaMalloc, _PyObject_ArenaFree
389 #endif
390     };
391 
392 #ifdef WITH_PYMALLOC
393 static int
_PyMem_DebugEnabled(void)394 _PyMem_DebugEnabled(void)
395 {
396     return (_PyObject.malloc == _PyMem_DebugMalloc);
397 }
398 
399 static int
_PyMem_PymallocEnabled(void)400 _PyMem_PymallocEnabled(void)
401 {
402     if (_PyMem_DebugEnabled()) {
403         return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
404     }
405     else {
406         return (_PyObject.malloc == _PyObject_Malloc);
407     }
408 }
409 #endif
410 
411 
412 static void
_PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)413 _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)
414 {
415     PyMemAllocatorEx alloc;
416 
417     if (domain == PYMEM_DOMAIN_RAW) {
418         if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
419             return;
420         }
421 
422         PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
423         alloc.ctx = &_PyMem_Debug.raw;
424         alloc.malloc = _PyMem_DebugRawMalloc;
425         alloc.calloc = _PyMem_DebugRawCalloc;
426         alloc.realloc = _PyMem_DebugRawRealloc;
427         alloc.free = _PyMem_DebugRawFree;
428         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
429     }
430     else if (domain == PYMEM_DOMAIN_MEM) {
431         if (_PyMem.malloc == _PyMem_DebugMalloc) {
432             return;
433         }
434 
435         PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
436         alloc.ctx = &_PyMem_Debug.mem;
437         alloc.malloc = _PyMem_DebugMalloc;
438         alloc.calloc = _PyMem_DebugCalloc;
439         alloc.realloc = _PyMem_DebugRealloc;
440         alloc.free = _PyMem_DebugFree;
441         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
442     }
443     else if (domain == PYMEM_DOMAIN_OBJ)  {
444         if (_PyObject.malloc == _PyMem_DebugMalloc) {
445             return;
446         }
447 
448         PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
449         alloc.ctx = &_PyMem_Debug.obj;
450         alloc.malloc = _PyMem_DebugMalloc;
451         alloc.calloc = _PyMem_DebugCalloc;
452         alloc.realloc = _PyMem_DebugRealloc;
453         alloc.free = _PyMem_DebugFree;
454         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
455     }
456 }
457 
458 
459 void
PyMem_SetupDebugHooks(void)460 PyMem_SetupDebugHooks(void)
461 {
462     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW);
463     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM);
464     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ);
465 }
466 
467 void
PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)468 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
469 {
470     switch(domain)
471     {
472     case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
473     case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
474     case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
475     default:
476         /* unknown domain: set all attributes to NULL */
477         allocator->ctx = NULL;
478         allocator->malloc = NULL;
479         allocator->calloc = NULL;
480         allocator->realloc = NULL;
481         allocator->free = NULL;
482     }
483 }
484 
485 void
PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)486 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
487 {
488     switch(domain)
489     {
490     case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
491     case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
492     case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
493     /* ignore unknown domain */
494     }
495 }
496 
497 void
PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)498 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
499 {
500     *allocator = _PyObject_Arena;
501 }
502 
503 void
PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)504 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
505 {
506     _PyObject_Arena = *allocator;
507 }
508 
509 void *
PyMem_RawMalloc(size_t size)510 PyMem_RawMalloc(size_t size)
511 {
512     /*
513      * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
514      * Most python internals blindly use a signed Py_ssize_t to track
515      * things without checking for overflows or negatives.
516      * As size_t is unsigned, checking for size < 0 is not required.
517      */
518     if (size > (size_t)PY_SSIZE_T_MAX)
519         return NULL;
520     return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
521 }
522 
523 void *
PyMem_RawCalloc(size_t nelem,size_t elsize)524 PyMem_RawCalloc(size_t nelem, size_t elsize)
525 {
526     /* see PyMem_RawMalloc() */
527     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
528         return NULL;
529     return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
530 }
531 
532 void*
PyMem_RawRealloc(void * ptr,size_t new_size)533 PyMem_RawRealloc(void *ptr, size_t new_size)
534 {
535     /* see PyMem_RawMalloc() */
536     if (new_size > (size_t)PY_SSIZE_T_MAX)
537         return NULL;
538     return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
539 }
540 
PyMem_RawFree(void * ptr)541 void PyMem_RawFree(void *ptr)
542 {
543     _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
544 }
545 
546 
547 void *
PyMem_Malloc(size_t size)548 PyMem_Malloc(size_t size)
549 {
550     /* see PyMem_RawMalloc() */
551     if (size > (size_t)PY_SSIZE_T_MAX)
552         return NULL;
553     return _PyMem.malloc(_PyMem.ctx, size);
554 }
555 
556 void *
PyMem_Calloc(size_t nelem,size_t elsize)557 PyMem_Calloc(size_t nelem, size_t elsize)
558 {
559     /* see PyMem_RawMalloc() */
560     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
561         return NULL;
562     return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
563 }
564 
565 void *
PyMem_Realloc(void * ptr,size_t new_size)566 PyMem_Realloc(void *ptr, size_t new_size)
567 {
568     /* see PyMem_RawMalloc() */
569     if (new_size > (size_t)PY_SSIZE_T_MAX)
570         return NULL;
571     return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
572 }
573 
574 void
PyMem_Free(void * ptr)575 PyMem_Free(void *ptr)
576 {
577     _PyMem.free(_PyMem.ctx, ptr);
578 }
579 
580 
581 wchar_t*
_PyMem_RawWcsdup(const wchar_t * str)582 _PyMem_RawWcsdup(const wchar_t *str)
583 {
584     assert(str != NULL);
585 
586     size_t len = wcslen(str);
587     if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
588         return NULL;
589     }
590 
591     size_t size = (len + 1) * sizeof(wchar_t);
592     wchar_t *str2 = PyMem_RawMalloc(size);
593     if (str2 == NULL) {
594         return NULL;
595     }
596 
597     memcpy(str2, str, size);
598     return str2;
599 }
600 
601 char *
_PyMem_RawStrdup(const char * str)602 _PyMem_RawStrdup(const char *str)
603 {
604     assert(str != NULL);
605     size_t size = strlen(str) + 1;
606     char *copy = PyMem_RawMalloc(size);
607     if (copy == NULL) {
608         return NULL;
609     }
610     memcpy(copy, str, size);
611     return copy;
612 }
613 
614 char *
_PyMem_Strdup(const char * str)615 _PyMem_Strdup(const char *str)
616 {
617     assert(str != NULL);
618     size_t size = strlen(str) + 1;
619     char *copy = PyMem_Malloc(size);
620     if (copy == NULL) {
621         return NULL;
622     }
623     memcpy(copy, str, size);
624     return copy;
625 }
626 
627 void *
PyObject_Malloc(size_t size)628 PyObject_Malloc(size_t size)
629 {
630     /* see PyMem_RawMalloc() */
631     if (size > (size_t)PY_SSIZE_T_MAX)
632         return NULL;
633     return _PyObject.malloc(_PyObject.ctx, size);
634 }
635 
636 void *
PyObject_Calloc(size_t nelem,size_t elsize)637 PyObject_Calloc(size_t nelem, size_t elsize)
638 {
639     /* see PyMem_RawMalloc() */
640     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
641         return NULL;
642     return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
643 }
644 
645 void *
PyObject_Realloc(void * ptr,size_t new_size)646 PyObject_Realloc(void *ptr, size_t new_size)
647 {
648     /* see PyMem_RawMalloc() */
649     if (new_size > (size_t)PY_SSIZE_T_MAX)
650         return NULL;
651     return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
652 }
653 
654 void
PyObject_Free(void * ptr)655 PyObject_Free(void *ptr)
656 {
657     _PyObject.free(_PyObject.ctx, ptr);
658 }
659 
660 
661 #ifdef WITH_PYMALLOC
662 
663 #ifdef WITH_VALGRIND
664 #include <valgrind/valgrind.h>
665 
666 /* If we're using GCC, use __builtin_expect() to reduce overhead of
667    the valgrind checks */
668 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
669 #  define UNLIKELY(value) __builtin_expect((value), 0)
670 #else
671 #  define UNLIKELY(value) (value)
672 #endif
673 
674 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
675 static int running_on_valgrind = -1;
676 #endif
677 
678 
679 /* An object allocator for Python.
680 
681    Here is an introduction to the layers of the Python memory architecture,
682    showing where the object allocator is actually used (layer +2), It is
683    called for every object allocation and deallocation (PyObject_New/Del),
684    unless the object-specific allocators implement a proprietary allocation
685    scheme (ex.: ints use a simple free list). This is also the place where
686    the cyclic garbage collector operates selectively on container objects.
687 
688 
689     Object-specific allocators
690     _____   ______   ______       ________
691    [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
692 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
693     _______________________________       |                           |
694    [   Python's object allocator   ]      |                           |
695 +2 | ####### Object memory ####### | <------ Internal buffers ------> |
696     ______________________________________________________________    |
697    [          Python's raw memory allocator (PyMem_ API)          ]   |
698 +1 | <----- Python memory (under PyMem manager's control) ------> |   |
699     __________________________________________________________________
700    [    Underlying general-purpose allocator (ex: C library malloc)   ]
701  0 | <------ Virtual memory allocated for the python process -------> |
702 
703    =========================================================================
704     _______________________________________________________________________
705    [                OS-specific Virtual Memory Manager (VMM)               ]
706 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
707     __________________________________   __________________________________
708    [                                  ] [                                  ]
709 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
710 
711 */
712 /*==========================================================================*/
713 
714 /* A fast, special-purpose memory allocator for small blocks, to be used
715    on top of a general-purpose malloc -- heavily based on previous art. */
716 
717 /* Vladimir Marangozov -- August 2000 */
718 
719 /*
720  * "Memory management is where the rubber meets the road -- if we do the wrong
721  * thing at any level, the results will not be good. And if we don't make the
722  * levels work well together, we are in serious trouble." (1)
723  *
724  * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
725  *    "Dynamic Storage Allocation: A Survey and Critical Review",
726  *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
727  */
728 
729 /* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
730 
731 /*==========================================================================*/
732 
733 /*
734  * Allocation strategy abstract:
735  *
736  * For small requests, the allocator sub-allocates <Big> blocks of memory.
737  * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
738  * system's allocator.
739  *
740  * Small requests are grouped in size classes spaced 8 bytes apart, due
741  * to the required valid alignment of the returned address. Requests of
742  * a particular size are serviced from memory pools of 4K (one VMM page).
743  * Pools are fragmented on demand and contain free lists of blocks of one
744  * particular size class. In other words, there is a fixed-size allocator
745  * for each size class. Free pools are shared by the different allocators
746  * thus minimizing the space reserved for a particular size class.
747  *
748  * This allocation strategy is a variant of what is known as "simple
749  * segregated storage based on array of free lists". The main drawback of
750  * simple segregated storage is that we might end up with lot of reserved
751  * memory for the different free lists, which degenerate in time. To avoid
752  * this, we partition each free list in pools and we share dynamically the
753  * reserved space between all free lists. This technique is quite efficient
754  * for memory intensive programs which allocate mainly small-sized blocks.
755  *
756  * For small requests we have the following table:
757  *
758  * Request in bytes     Size of allocated block      Size class idx
759  * ----------------------------------------------------------------
760  *        1-8                     8                       0
761  *        9-16                   16                       1
762  *       17-24                   24                       2
763  *       25-32                   32                       3
764  *       33-40                   40                       4
765  *       41-48                   48                       5
766  *       49-56                   56                       6
767  *       57-64                   64                       7
768  *       65-72                   72                       8
769  *        ...                   ...                     ...
770  *      497-504                 504                      62
771  *      505-512                 512                      63
772  *
773  *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
774  *      allocator.
775  */
776 
777 /*==========================================================================*/
778 
779 /*
780  * -- Main tunable settings section --
781  */
782 
783 /*
784  * Alignment of addresses returned to the user. 8-bytes alignment works
785  * on most current architectures (with 32-bit or 64-bit address busses).
786  * The alignment value is also used for grouping small requests in size
787  * classes spaced ALIGNMENT bytes apart.
788  *
789  * You shouldn't change this unless you know what you are doing.
790  */
791 #define ALIGNMENT               8               /* must be 2^N */
792 #define ALIGNMENT_SHIFT         3
793 
794 /* Return the number of bytes in size class I, as a uint. */
795 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
796 
797 /*
798  * Max size threshold below which malloc requests are considered to be
799  * small enough in order to use preallocated memory pools. You can tune
800  * this value according to your application behaviour and memory needs.
801  *
802  * Note: a size threshold of 512 guarantees that newly created dictionaries
803  * will be allocated from preallocated memory pools on 64-bit.
804  *
805  * The following invariants must hold:
806  *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
807  *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
808  *
809  * Although not required, for better performance and space efficiency,
810  * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
811  */
812 #define SMALL_REQUEST_THRESHOLD 512
813 #define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
814 
815 /*
816  * The system's VMM page size can be obtained on most unices with a
817  * getpagesize() call or deduced from various header files. To make
818  * things simpler, we assume that it is 4K, which is OK for most systems.
819  * It is probably better if this is the native page size, but it doesn't
820  * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
821  * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
822  * violation fault.  4K is apparently OK for all the platforms that python
823  * currently targets.
824  */
825 #define SYSTEM_PAGE_SIZE        (4 * 1024)
826 #define SYSTEM_PAGE_SIZE_MASK   (SYSTEM_PAGE_SIZE - 1)
827 
828 /*
829  * Maximum amount of memory managed by the allocator for small requests.
830  */
831 #ifdef WITH_MEMORY_LIMITS
832 #ifndef SMALL_MEMORY_LIMIT
833 #define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
834 #endif
835 #endif
836 
837 /*
838  * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
839  * on a page boundary. This is a reserved virtual address space for the
840  * current process (obtained through a malloc()/mmap() call). In no way this
841  * means that the memory arenas will be used entirely. A malloc(<Big>) is
842  * usually an address range reservation for <Big> bytes, unless all pages within
843  * this space are referenced subsequently. So malloc'ing big blocks and not
844  * using them does not mean "wasting memory". It's an addressable range
845  * wastage...
846  *
847  * Arenas are allocated with mmap() on systems supporting anonymous memory
848  * mappings to reduce heap fragmentation.
849  */
850 #define ARENA_SIZE              (256 << 10)     /* 256KB */
851 
852 #ifdef WITH_MEMORY_LIMITS
853 #define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
854 #endif
855 
856 /*
857  * Size of the pools used for small blocks. Should be a power of 2,
858  * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
859  */
860 #define POOL_SIZE               SYSTEM_PAGE_SIZE        /* must be 2^N */
861 #define POOL_SIZE_MASK          SYSTEM_PAGE_SIZE_MASK
862 
863 /*
864  * -- End of tunable settings section --
865  */
866 
867 /*==========================================================================*/
868 
869 /*
870  * Locking
871  *
872  * To reduce lock contention, it would probably be better to refine the
873  * crude function locking with per size class locking. I'm not positive
874  * however, whether it's worth switching to such locking policy because
875  * of the performance penalty it might introduce.
876  *
877  * The following macros describe the simplest (should also be the fastest)
878  * lock object on a particular platform and the init/fini/lock/unlock
879  * operations on it. The locks defined here are not expected to be recursive
880  * because it is assumed that they will always be called in the order:
881  * INIT, [LOCK, UNLOCK]*, FINI.
882  */
883 
884 /*
885  * Python's threads are serialized, so object malloc locking is disabled.
886  */
887 #define SIMPLELOCK_DECL(lock)   /* simple lock declaration              */
888 #define SIMPLELOCK_INIT(lock)   /* allocate (if needed) and initialize  */
889 #define SIMPLELOCK_FINI(lock)   /* free/destroy an existing lock        */
890 #define SIMPLELOCK_LOCK(lock)   /* acquire released lock */
891 #define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
892 
893 /* When you say memory, my mind reasons in terms of (pointers to) blocks */
894 typedef uint8_t block;
895 
896 /* Pool for small blocks. */
897 struct pool_header {
898     union { block *_padding;
899             uint count; } ref;          /* number of allocated blocks    */
900     block *freeblock;                   /* pool's free list head         */
901     struct pool_header *nextpool;       /* next pool of this size class  */
902     struct pool_header *prevpool;       /* previous pool       ""        */
903     uint arenaindex;                    /* index into arenas of base adr */
904     uint szidx;                         /* block size class index        */
905     uint nextoffset;                    /* bytes to virgin block         */
906     uint maxnextoffset;                 /* largest valid nextoffset      */
907 };
908 
909 typedef struct pool_header *poolp;
910 
911 /* Record keeping for arenas. */
912 struct arena_object {
913     /* The address of the arena, as returned by malloc.  Note that 0
914      * will never be returned by a successful malloc, and is used
915      * here to mark an arena_object that doesn't correspond to an
916      * allocated arena.
917      */
918     uintptr_t address;
919 
920     /* Pool-aligned pointer to the next pool to be carved off. */
921     block* pool_address;
922 
923     /* The number of available pools in the arena:  free pools + never-
924      * allocated pools.
925      */
926     uint nfreepools;
927 
928     /* The total number of pools in the arena, whether or not available. */
929     uint ntotalpools;
930 
931     /* Singly-linked list of available pools. */
932     struct pool_header* freepools;
933 
934     /* Whenever this arena_object is not associated with an allocated
935      * arena, the nextarena member is used to link all unassociated
936      * arena_objects in the singly-linked `unused_arena_objects` list.
937      * The prevarena member is unused in this case.
938      *
939      * When this arena_object is associated with an allocated arena
940      * with at least one available pool, both members are used in the
941      * doubly-linked `usable_arenas` list, which is maintained in
942      * increasing order of `nfreepools` values.
943      *
944      * Else this arena_object is associated with an allocated arena
945      * all of whose pools are in use.  `nextarena` and `prevarena`
946      * are both meaningless in this case.
947      */
948     struct arena_object* nextarena;
949     struct arena_object* prevarena;
950 };
951 
952 #define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
953 
954 #define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
955 
956 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
957 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
958 
959 /* Return total number of blocks in pool of size index I, as a uint. */
960 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
961 
962 /*==========================================================================*/
963 
964 /*
965  * This malloc lock
966  */
967 SIMPLELOCK_DECL(_malloc_lock)
968 #define LOCK()          SIMPLELOCK_LOCK(_malloc_lock)
969 #define UNLOCK()        SIMPLELOCK_UNLOCK(_malloc_lock)
970 #define LOCK_INIT()     SIMPLELOCK_INIT(_malloc_lock)
971 #define LOCK_FINI()     SIMPLELOCK_FINI(_malloc_lock)
972 
973 /*
974  * Pool table -- headed, circular, doubly-linked lists of partially used pools.
975 
976 This is involved.  For an index i, usedpools[i+i] is the header for a list of
977 all partially used pools holding small blocks with "size class idx" i. So
978 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
979 16, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
980 
981 Pools are carved off an arena's highwater mark (an arena_object's pool_address
982 member) as needed.  Once carved off, a pool is in one of three states forever
983 after:
984 
985 used == partially used, neither empty nor full
986     At least one block in the pool is currently allocated, and at least one
987     block in the pool is not currently allocated (note this implies a pool
988     has room for at least two blocks).
989     This is a pool's initial state, as a pool is created only when malloc
990     needs space.
991     The pool holds blocks of a fixed size, and is in the circular list headed
992     at usedpools[i] (see above).  It's linked to the other used pools of the
993     same size class via the pool_header's nextpool and prevpool members.
994     If all but one block is currently allocated, a malloc can cause a
995     transition to the full state.  If all but one block is not currently
996     allocated, a free can cause a transition to the empty state.
997 
998 full == all the pool's blocks are currently allocated
999     On transition to full, a pool is unlinked from its usedpools[] list.
1000     It's not linked to from anything then anymore, and its nextpool and
1001     prevpool members are meaningless until it transitions back to used.
1002     A free of a block in a full pool puts the pool back in the used state.
1003     Then it's linked in at the front of the appropriate usedpools[] list, so
1004     that the next allocation for its size class will reuse the freed block.
1005 
1006 empty == all the pool's blocks are currently available for allocation
1007     On transition to empty, a pool is unlinked from its usedpools[] list,
1008     and linked to the front of its arena_object's singly-linked freepools list,
1009     via its nextpool member.  The prevpool member has no meaning in this case.
1010     Empty pools have no inherent size class:  the next time a malloc finds
1011     an empty list in usedpools[], it takes the first pool off of freepools.
1012     If the size class needed happens to be the same as the size class the pool
1013     last had, some pool initialization can be skipped.
1014 
1015 
1016 Block Management
1017 
1018 Blocks within pools are again carved out as needed.  pool->freeblock points to
1019 the start of a singly-linked list of free blocks within the pool.  When a
1020 block is freed, it's inserted at the front of its pool's freeblock list.  Note
1021 that the available blocks in a pool are *not* linked all together when a pool
1022 is initialized.  Instead only "the first two" (lowest addresses) blocks are
1023 set up, returning the first such block, and setting pool->freeblock to a
1024 one-block list holding the second such block.  This is consistent with that
1025 pymalloc strives at all levels (arena, pool, and block) never to touch a piece
1026 of memory until it's actually needed.
1027 
1028 So long as a pool is in the used state, we're certain there *is* a block
1029 available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
1030 points to the end of the free list before we've carved the entire pool into
1031 blocks, that means we simply haven't yet gotten to one of the higher-address
1032 blocks.  The offset from the pool_header to the start of "the next" virgin
1033 block is stored in the pool_header nextoffset member, and the largest value
1034 of nextoffset that makes sense is stored in the maxnextoffset member when a
1035 pool is initialized.  All the blocks in a pool have been passed out at least
1036 once when and only when nextoffset > maxnextoffset.
1037 
1038 
1039 Major obscurity:  While the usedpools vector is declared to have poolp
1040 entries, it doesn't really.  It really contains two pointers per (conceptual)
1041 poolp entry, the nextpool and prevpool members of a pool_header.  The
1042 excruciating initialization code below fools C so that
1043 
1044     usedpool[i+i]
1045 
1046 "acts like" a genuine poolp, but only so long as you only reference its
1047 nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
1048 compensating for that a pool_header's nextpool and prevpool members
1049 immediately follow a pool_header's first two members:
1050 
1051     union { block *_padding;
1052             uint count; } ref;
1053     block *freeblock;
1054 
1055 each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
1056 contains is a fudged-up pointer p such that *if* C believes it's a poolp
1057 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
1058 circular list is empty).
1059 
1060 It's unclear why the usedpools setup is so convoluted.  It could be to
1061 minimize the amount of cache required to hold this heavily-referenced table
1062 (which only *needs* the two interpool pointer members of a pool_header). OTOH,
1063 referencing code has to remember to "double the index" and doing so isn't
1064 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
1065 on that C doesn't insert any padding anywhere in a pool_header at or before
1066 the prevpool member.
1067 **************************************************************************** */
1068 
1069 #define PTA(x)  ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
1070 #define PT(x)   PTA(x), PTA(x)
1071 
1072 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
1073     PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
1074 #if NB_SMALL_SIZE_CLASSES > 8
1075     , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
1076 #if NB_SMALL_SIZE_CLASSES > 16
1077     , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
1078 #if NB_SMALL_SIZE_CLASSES > 24
1079     , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
1080 #if NB_SMALL_SIZE_CLASSES > 32
1081     , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
1082 #if NB_SMALL_SIZE_CLASSES > 40
1083     , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
1084 #if NB_SMALL_SIZE_CLASSES > 48
1085     , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
1086 #if NB_SMALL_SIZE_CLASSES > 56
1087     , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
1088 #if NB_SMALL_SIZE_CLASSES > 64
1089 #error "NB_SMALL_SIZE_CLASSES should be less than 64"
1090 #endif /* NB_SMALL_SIZE_CLASSES > 64 */
1091 #endif /* NB_SMALL_SIZE_CLASSES > 56 */
1092 #endif /* NB_SMALL_SIZE_CLASSES > 48 */
1093 #endif /* NB_SMALL_SIZE_CLASSES > 40 */
1094 #endif /* NB_SMALL_SIZE_CLASSES > 32 */
1095 #endif /* NB_SMALL_SIZE_CLASSES > 24 */
1096 #endif /* NB_SMALL_SIZE_CLASSES > 16 */
1097 #endif /* NB_SMALL_SIZE_CLASSES >  8 */
1098 };
1099 
1100 /*==========================================================================
1101 Arena management.
1102 
1103 `arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
1104 which may not be currently used (== they're arena_objects that aren't
1105 currently associated with an allocated arena).  Note that arenas proper are
1106 separately malloc'ed.
1107 
1108 Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
1109 we do try to free() arenas, and use some mild heuristic strategies to increase
1110 the likelihood that arenas eventually can be freed.
1111 
1112 unused_arena_objects
1113 
1114     This is a singly-linked list of the arena_objects that are currently not
1115     being used (no arena is associated with them).  Objects are taken off the
1116     head of the list in new_arena(), and are pushed on the head of the list in
1117     PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
1118     is on this list if and only if its .address member is 0.
1119 
1120 usable_arenas
1121 
1122     This is a doubly-linked list of the arena_objects associated with arenas
1123     that have pools available.  These pools are either waiting to be reused,
1124     or have not been used before.  The list is sorted to have the most-
1125     allocated arenas first (ascending order based on the nfreepools member).
1126     This means that the next allocation will come from a heavily used arena,
1127     which gives the nearly empty arenas a chance to be returned to the system.
1128     In my unscientific tests this dramatically improved the number of arenas
1129     that could be freed.
1130 
1131 Note that an arena_object associated with an arena all of whose pools are
1132 currently in use isn't on either list.
1133 */
1134 
1135 /* Array of objects used to track chunks of memory (arenas). */
1136 static struct arena_object* arenas = NULL;
1137 /* Number of slots currently allocated in the `arenas` vector. */
1138 static uint maxarenas = 0;
1139 
1140 /* The head of the singly-linked, NULL-terminated list of available
1141  * arena_objects.
1142  */
1143 static struct arena_object* unused_arena_objects = NULL;
1144 
1145 /* The head of the doubly-linked, NULL-terminated at each end, list of
1146  * arena_objects associated with arenas that have pools available.
1147  */
1148 static struct arena_object* usable_arenas = NULL;
1149 
1150 /* How many arena_objects do we initially allocate?
1151  * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
1152  * `arenas` vector.
1153  */
1154 #define INITIAL_ARENA_OBJECTS 16
1155 
1156 /* Number of arenas allocated that haven't been free()'d. */
1157 static size_t narenas_currently_allocated = 0;
1158 
1159 /* Total number of times malloc() called to allocate an arena. */
1160 static size_t ntimes_arena_allocated = 0;
1161 /* High water mark (max value ever seen) for narenas_currently_allocated. */
1162 static size_t narenas_highwater = 0;
1163 
1164 static Py_ssize_t _Py_AllocatedBlocks = 0;
1165 
1166 Py_ssize_t
_Py_GetAllocatedBlocks(void)1167 _Py_GetAllocatedBlocks(void)
1168 {
1169     return _Py_AllocatedBlocks;
1170 }
1171 
1172 
1173 /* Allocate a new arena.  If we run out of memory, return NULL.  Else
1174  * allocate a new arena, and return the address of an arena_object
1175  * describing the new arena.  It's expected that the caller will set
1176  * `usable_arenas` to the return value.
1177  */
1178 static struct arena_object*
new_arena(void)1179 new_arena(void)
1180 {
1181     struct arena_object* arenaobj;
1182     uint excess;        /* number of bytes above pool alignment */
1183     void *address;
1184     static int debug_stats = -1;
1185 
1186     if (debug_stats == -1) {
1187         const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1188         debug_stats = (opt != NULL && *opt != '\0');
1189     }
1190     if (debug_stats)
1191         _PyObject_DebugMallocStats(stderr);
1192 
1193     if (unused_arena_objects == NULL) {
1194         uint i;
1195         uint numarenas;
1196         size_t nbytes;
1197 
1198         /* Double the number of arena objects on each allocation.
1199          * Note that it's possible for `numarenas` to overflow.
1200          */
1201         numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1202         if (numarenas <= maxarenas)
1203             return NULL;                /* overflow */
1204 #if SIZEOF_SIZE_T <= SIZEOF_INT
1205         if (numarenas > SIZE_MAX / sizeof(*arenas))
1206             return NULL;                /* overflow */
1207 #endif
1208         nbytes = numarenas * sizeof(*arenas);
1209         arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1210         if (arenaobj == NULL)
1211             return NULL;
1212         arenas = arenaobj;
1213 
1214         /* We might need to fix pointers that were copied.  However,
1215          * new_arena only gets called when all the pages in the
1216          * previous arenas are full.  Thus, there are *no* pointers
1217          * into the old array. Thus, we don't have to worry about
1218          * invalid pointers.  Just to be sure, some asserts:
1219          */
1220         assert(usable_arenas == NULL);
1221         assert(unused_arena_objects == NULL);
1222 
1223         /* Put the new arenas on the unused_arena_objects list. */
1224         for (i = maxarenas; i < numarenas; ++i) {
1225             arenas[i].address = 0;              /* mark as unassociated */
1226             arenas[i].nextarena = i < numarenas - 1 ?
1227                                    &arenas[i+1] : NULL;
1228         }
1229 
1230         /* Update globals. */
1231         unused_arena_objects = &arenas[maxarenas];
1232         maxarenas = numarenas;
1233     }
1234 
1235     /* Take the next available arena object off the head of the list. */
1236     assert(unused_arena_objects != NULL);
1237     arenaobj = unused_arena_objects;
1238     unused_arena_objects = arenaobj->nextarena;
1239     assert(arenaobj->address == 0);
1240     address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1241     if (address == NULL) {
1242         /* The allocation failed: return NULL after putting the
1243          * arenaobj back.
1244          */
1245         arenaobj->nextarena = unused_arena_objects;
1246         unused_arena_objects = arenaobj;
1247         return NULL;
1248     }
1249     arenaobj->address = (uintptr_t)address;
1250 
1251     ++narenas_currently_allocated;
1252     ++ntimes_arena_allocated;
1253     if (narenas_currently_allocated > narenas_highwater)
1254         narenas_highwater = narenas_currently_allocated;
1255     arenaobj->freepools = NULL;
1256     /* pool_address <- first pool-aligned address in the arena
1257        nfreepools <- number of whole pools that fit after alignment */
1258     arenaobj->pool_address = (block*)arenaobj->address;
1259     arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE;
1260     assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE);
1261     excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1262     if (excess != 0) {
1263         --arenaobj->nfreepools;
1264         arenaobj->pool_address += POOL_SIZE - excess;
1265     }
1266     arenaobj->ntotalpools = arenaobj->nfreepools;
1267 
1268     return arenaobj;
1269 }
1270 
1271 
1272 /*
1273 address_in_range(P, POOL)
1274 
1275 Return true if and only if P is an address that was allocated by pymalloc.
1276 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1277 (the caller is asked to compute this because the macro expands POOL more than
1278 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1279 variable and pass the latter to the macro; because address_in_range is
1280 called on every alloc/realloc/free, micro-efficiency is important here).
1281 
1282 Tricky:  Let B be the arena base address associated with the pool, B =
1283 arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
1284 
1285     B <= P < B + ARENA_SIZE
1286 
1287 Subtracting B throughout, this is true iff
1288 
1289     0 <= P-B < ARENA_SIZE
1290 
1291 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1292 
1293 Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
1294 before the first arena has been allocated.  `arenas` is still NULL in that
1295 case.  We're relying on that maxarenas is also 0 in that case, so that
1296 (POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
1297 into a NULL arenas.
1298 
1299 Details:  given P and POOL, the arena_object corresponding to P is AO =
1300 arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
1301 stores, etc), POOL is the correct address of P's pool, AO.address is the
1302 correct base address of the pool's arena, and P must be within ARENA_SIZE of
1303 AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
1304 (NULL)).  Therefore address_in_range correctly reports that obmalloc
1305 controls P.
1306 
1307 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1308 call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
1309 in this case -- it may even be uninitialized trash.  If the trash arenaindex
1310 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1311 control P.
1312 
1313 Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
1314 allocated arena, obmalloc controls all the memory in slice AO.address :
1315 AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
1316 so P doesn't lie in that slice, so the macro correctly reports that P is not
1317 controlled by obmalloc.
1318 
1319 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1320 arena_object (one not currently associated with an allocated arena),
1321 AO.address is 0, and the second test in the macro reduces to:
1322 
1323     P < ARENA_SIZE
1324 
1325 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1326 that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
1327 of the test still passes, and the third clause (AO.address != 0) is necessary
1328 to get the correct result:  AO.address is 0 in this case, so the macro
1329 correctly reports that P is not controlled by obmalloc (despite that P lies in
1330 slice AO.address : AO.address + ARENA_SIZE).
1331 
1332 Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
1333 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1334 corresponded to a currently-allocated arena, so the "P is not controlled by
1335 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1336 was impossible.
1337 
1338 Note that the logic is excruciating, and reading up possibly uninitialized
1339 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1340 creates problems for some memory debuggers.  The overwhelming advantage is
1341 that this test determines whether an arbitrary address is controlled by
1342 obmalloc in a small constant time, independent of the number of arenas
1343 obmalloc controls.  Since this test is needed at every entry point, it's
1344 extremely desirable that it be this fast.
1345 */
1346 
1347 static bool _Py_NO_ADDRESS_SAFETY_ANALYSIS
1348             _Py_NO_SANITIZE_THREAD
1349             _Py_NO_SANITIZE_MEMORY
address_in_range(void * p,poolp pool)1350 address_in_range(void *p, poolp pool)
1351 {
1352     // Since address_in_range may be reading from memory which was not allocated
1353     // by Python, it is important that pool->arenaindex is read only once, as
1354     // another thread may be concurrently modifying the value without holding
1355     // the GIL. The following dance forces the compiler to read pool->arenaindex
1356     // only once.
1357     uint arenaindex = *((volatile uint *)&pool->arenaindex);
1358     return arenaindex < maxarenas &&
1359         (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1360         arenas[arenaindex].address != 0;
1361 }
1362 
1363 
1364 /*==========================================================================*/
1365 
1366 /* pymalloc allocator
1367 
1368    The basic blocks are ordered by decreasing execution frequency,
1369    which minimizes the number of jumps in the most common cases,
1370    improves branching prediction and instruction scheduling (small
1371    block allocations typically result in a couple of instructions).
1372    Unless the optimizer reorders everything, being too smart...
1373 
1374    Return 1 if pymalloc allocated memory and wrote the pointer into *ptr_p.
1375 
1376    Return 0 if pymalloc failed to allocate the memory block: on bigger
1377    requests, on error in the code below (as a last chance to serve the request)
1378    or when the max memory limit has been reached. */
1379 static int
pymalloc_alloc(void * ctx,void ** ptr_p,size_t nbytes)1380 pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes)
1381 {
1382     block *bp;
1383     poolp pool;
1384     poolp next;
1385     uint size;
1386 
1387 #ifdef WITH_VALGRIND
1388     if (UNLIKELY(running_on_valgrind == -1)) {
1389         running_on_valgrind = RUNNING_ON_VALGRIND;
1390     }
1391     if (UNLIKELY(running_on_valgrind)) {
1392         return 0;
1393     }
1394 #endif
1395 
1396     if (nbytes == 0) {
1397         return 0;
1398     }
1399     if (nbytes > SMALL_REQUEST_THRESHOLD) {
1400         return 0;
1401     }
1402 
1403     LOCK();
1404     /*
1405      * Most frequent paths first
1406      */
1407     size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1408     pool = usedpools[size + size];
1409     if (pool != pool->nextpool) {
1410         /*
1411          * There is a used pool for this size class.
1412          * Pick up the head block of its free list.
1413          */
1414         ++pool->ref.count;
1415         bp = pool->freeblock;
1416         assert(bp != NULL);
1417         if ((pool->freeblock = *(block **)bp) != NULL) {
1418             goto success;
1419         }
1420 
1421         /*
1422          * Reached the end of the free list, try to extend it.
1423          */
1424         if (pool->nextoffset <= pool->maxnextoffset) {
1425             /* There is room for another block. */
1426             pool->freeblock = (block*)pool +
1427                               pool->nextoffset;
1428             pool->nextoffset += INDEX2SIZE(size);
1429             *(block **)(pool->freeblock) = NULL;
1430             goto success;
1431         }
1432 
1433         /* Pool is full, unlink from used pools. */
1434         next = pool->nextpool;
1435         pool = pool->prevpool;
1436         next->prevpool = pool;
1437         pool->nextpool = next;
1438         goto success;
1439     }
1440 
1441     /* There isn't a pool of the right size class immediately
1442      * available:  use a free pool.
1443      */
1444     if (usable_arenas == NULL) {
1445         /* No arena has a free pool:  allocate a new arena. */
1446 #ifdef WITH_MEMORY_LIMITS
1447         if (narenas_currently_allocated >= MAX_ARENAS) {
1448             goto failed;
1449         }
1450 #endif
1451         usable_arenas = new_arena();
1452         if (usable_arenas == NULL) {
1453             goto failed;
1454         }
1455         usable_arenas->nextarena =
1456             usable_arenas->prevarena = NULL;
1457     }
1458     assert(usable_arenas->address != 0);
1459 
1460     /* Try to get a cached free pool. */
1461     pool = usable_arenas->freepools;
1462     if (pool != NULL) {
1463         /* Unlink from cached pools. */
1464         usable_arenas->freepools = pool->nextpool;
1465 
1466         /* This arena already had the smallest nfreepools
1467          * value, so decreasing nfreepools doesn't change
1468          * that, and we don't need to rearrange the
1469          * usable_arenas list.  However, if the arena has
1470          * become wholly allocated, we need to remove its
1471          * arena_object from usable_arenas.
1472          */
1473         --usable_arenas->nfreepools;
1474         if (usable_arenas->nfreepools == 0) {
1475             /* Wholly allocated:  remove. */
1476             assert(usable_arenas->freepools == NULL);
1477             assert(usable_arenas->nextarena == NULL ||
1478                    usable_arenas->nextarena->prevarena ==
1479                    usable_arenas);
1480 
1481             usable_arenas = usable_arenas->nextarena;
1482             if (usable_arenas != NULL) {
1483                 usable_arenas->prevarena = NULL;
1484                 assert(usable_arenas->address != 0);
1485             }
1486         }
1487         else {
1488             /* nfreepools > 0:  it must be that freepools
1489              * isn't NULL, or that we haven't yet carved
1490              * off all the arena's pools for the first
1491              * time.
1492              */
1493             assert(usable_arenas->freepools != NULL ||
1494                    usable_arenas->pool_address <=
1495                    (block*)usable_arenas->address +
1496                        ARENA_SIZE - POOL_SIZE);
1497         }
1498 
1499     init_pool:
1500         /* Frontlink to used pools. */
1501         next = usedpools[size + size]; /* == prev */
1502         pool->nextpool = next;
1503         pool->prevpool = next;
1504         next->nextpool = pool;
1505         next->prevpool = pool;
1506         pool->ref.count = 1;
1507         if (pool->szidx == size) {
1508             /* Luckily, this pool last contained blocks
1509              * of the same size class, so its header
1510              * and free list are already initialized.
1511              */
1512             bp = pool->freeblock;
1513             assert(bp != NULL);
1514             pool->freeblock = *(block **)bp;
1515             goto success;
1516         }
1517         /*
1518          * Initialize the pool header, set up the free list to
1519          * contain just the second block, and return the first
1520          * block.
1521          */
1522         pool->szidx = size;
1523         size = INDEX2SIZE(size);
1524         bp = (block *)pool + POOL_OVERHEAD;
1525         pool->nextoffset = POOL_OVERHEAD + (size << 1);
1526         pool->maxnextoffset = POOL_SIZE - size;
1527         pool->freeblock = bp + size;
1528         *(block **)(pool->freeblock) = NULL;
1529         goto success;
1530     }
1531 
1532     /* Carve off a new pool. */
1533     assert(usable_arenas->nfreepools > 0);
1534     assert(usable_arenas->freepools == NULL);
1535     pool = (poolp)usable_arenas->pool_address;
1536     assert((block*)pool <= (block*)usable_arenas->address +
1537                              ARENA_SIZE - POOL_SIZE);
1538     pool->arenaindex = (uint)(usable_arenas - arenas);
1539     assert(&arenas[pool->arenaindex] == usable_arenas);
1540     pool->szidx = DUMMY_SIZE_IDX;
1541     usable_arenas->pool_address += POOL_SIZE;
1542     --usable_arenas->nfreepools;
1543 
1544     if (usable_arenas->nfreepools == 0) {
1545         assert(usable_arenas->nextarena == NULL ||
1546                usable_arenas->nextarena->prevarena ==
1547                usable_arenas);
1548         /* Unlink the arena:  it is completely allocated. */
1549         usable_arenas = usable_arenas->nextarena;
1550         if (usable_arenas != NULL) {
1551             usable_arenas->prevarena = NULL;
1552             assert(usable_arenas->address != 0);
1553         }
1554     }
1555 
1556     goto init_pool;
1557 
1558 success:
1559     UNLOCK();
1560     assert(bp != NULL);
1561     *ptr_p = (void *)bp;
1562     return 1;
1563 
1564 failed:
1565     UNLOCK();
1566     return 0;
1567 }
1568 
1569 
1570 static void *
_PyObject_Malloc(void * ctx,size_t nbytes)1571 _PyObject_Malloc(void *ctx, size_t nbytes)
1572 {
1573     void* ptr;
1574     if (pymalloc_alloc(ctx, &ptr, nbytes)) {
1575         _Py_AllocatedBlocks++;
1576         return ptr;
1577     }
1578 
1579     ptr = PyMem_RawMalloc(nbytes);
1580     if (ptr != NULL) {
1581         _Py_AllocatedBlocks++;
1582     }
1583     return ptr;
1584 }
1585 
1586 
1587 static void *
_PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)1588 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
1589 {
1590     void* ptr;
1591 
1592     assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
1593     size_t nbytes = nelem * elsize;
1594 
1595     if (pymalloc_alloc(ctx, &ptr, nbytes)) {
1596         memset(ptr, 0, nbytes);
1597         _Py_AllocatedBlocks++;
1598         return ptr;
1599     }
1600 
1601     ptr = PyMem_RawCalloc(nelem, elsize);
1602     if (ptr != NULL) {
1603         _Py_AllocatedBlocks++;
1604     }
1605     return ptr;
1606 }
1607 
1608 
1609 /* Free a memory block allocated by pymalloc_alloc().
1610    Return 1 if it was freed.
1611    Return 0 if the block was not allocated by pymalloc_alloc(). */
1612 static int
pymalloc_free(void * ctx,void * p)1613 pymalloc_free(void *ctx, void *p)
1614 {
1615     poolp pool;
1616     block *lastfree;
1617     poolp next, prev;
1618     uint size;
1619 
1620     assert(p != NULL);
1621 
1622 #ifdef WITH_VALGRIND
1623     if (UNLIKELY(running_on_valgrind > 0)) {
1624         return 0;
1625     }
1626 #endif
1627 
1628     pool = POOL_ADDR(p);
1629     if (!address_in_range(p, pool)) {
1630         return 0;
1631     }
1632     /* We allocated this address. */
1633 
1634     LOCK();
1635 
1636     /* Link p to the start of the pool's freeblock list.  Since
1637      * the pool had at least the p block outstanding, the pool
1638      * wasn't empty (so it's already in a usedpools[] list, or
1639      * was full and is in no list -- it's not in the freeblocks
1640      * list in any case).
1641      */
1642     assert(pool->ref.count > 0);            /* else it was empty */
1643     *(block **)p = lastfree = pool->freeblock;
1644     pool->freeblock = (block *)p;
1645     if (!lastfree) {
1646         /* Pool was full, so doesn't currently live in any list:
1647          * link it to the front of the appropriate usedpools[] list.
1648          * This mimics LRU pool usage for new allocations and
1649          * targets optimal filling when several pools contain
1650          * blocks of the same size class.
1651          */
1652         --pool->ref.count;
1653         assert(pool->ref.count > 0);            /* else the pool is empty */
1654         size = pool->szidx;
1655         next = usedpools[size + size];
1656         prev = next->prevpool;
1657 
1658         /* insert pool before next:   prev <-> pool <-> next */
1659         pool->nextpool = next;
1660         pool->prevpool = prev;
1661         next->prevpool = pool;
1662         prev->nextpool = pool;
1663         goto success;
1664     }
1665 
1666     struct arena_object* ao;
1667     uint nf;  /* ao->nfreepools */
1668 
1669     /* freeblock wasn't NULL, so the pool wasn't full,
1670      * and the pool is in a usedpools[] list.
1671      */
1672     if (--pool->ref.count != 0) {
1673         /* pool isn't empty:  leave it in usedpools */
1674         goto success;
1675     }
1676     /* Pool is now empty:  unlink from usedpools, and
1677      * link to the front of freepools.  This ensures that
1678      * previously freed pools will be allocated later
1679      * (being not referenced, they are perhaps paged out).
1680      */
1681     next = pool->nextpool;
1682     prev = pool->prevpool;
1683     next->prevpool = prev;
1684     prev->nextpool = next;
1685 
1686     /* Link the pool to freepools.  This is a singly-linked
1687      * list, and pool->prevpool isn't used there.
1688      */
1689     ao = &arenas[pool->arenaindex];
1690     pool->nextpool = ao->freepools;
1691     ao->freepools = pool;
1692     nf = ++ao->nfreepools;
1693 
1694     /* All the rest is arena management.  We just freed
1695      * a pool, and there are 4 cases for arena mgmt:
1696      * 1. If all the pools are free, return the arena to
1697      *    the system free().
1698      * 2. If this is the only free pool in the arena,
1699      *    add the arena back to the `usable_arenas` list.
1700      * 3. If the "next" arena has a smaller count of free
1701      *    pools, we have to "slide this arena right" to
1702      *    restore that usable_arenas is sorted in order of
1703      *    nfreepools.
1704      * 4. Else there's nothing more to do.
1705      */
1706     if (nf == ao->ntotalpools) {
1707         /* Case 1.  First unlink ao from usable_arenas.
1708          */
1709         assert(ao->prevarena == NULL ||
1710                ao->prevarena->address != 0);
1711         assert(ao ->nextarena == NULL ||
1712                ao->nextarena->address != 0);
1713 
1714         /* Fix the pointer in the prevarena, or the
1715          * usable_arenas pointer.
1716          */
1717         if (ao->prevarena == NULL) {
1718             usable_arenas = ao->nextarena;
1719             assert(usable_arenas == NULL ||
1720                    usable_arenas->address != 0);
1721         }
1722         else {
1723             assert(ao->prevarena->nextarena == ao);
1724             ao->prevarena->nextarena =
1725                 ao->nextarena;
1726         }
1727         /* Fix the pointer in the nextarena. */
1728         if (ao->nextarena != NULL) {
1729             assert(ao->nextarena->prevarena == ao);
1730             ao->nextarena->prevarena =
1731                 ao->prevarena;
1732         }
1733         /* Record that this arena_object slot is
1734          * available to be reused.
1735          */
1736         ao->nextarena = unused_arena_objects;
1737         unused_arena_objects = ao;
1738 
1739         /* Free the entire arena. */
1740         _PyObject_Arena.free(_PyObject_Arena.ctx,
1741                              (void *)ao->address, ARENA_SIZE);
1742         ao->address = 0;                        /* mark unassociated */
1743         --narenas_currently_allocated;
1744 
1745         goto success;
1746     }
1747 
1748     if (nf == 1) {
1749         /* Case 2.  Put ao at the head of
1750          * usable_arenas.  Note that because
1751          * ao->nfreepools was 0 before, ao isn't
1752          * currently on the usable_arenas list.
1753          */
1754         ao->nextarena = usable_arenas;
1755         ao->prevarena = NULL;
1756         if (usable_arenas)
1757             usable_arenas->prevarena = ao;
1758         usable_arenas = ao;
1759         assert(usable_arenas->address != 0);
1760 
1761         goto success;
1762     }
1763 
1764     /* If this arena is now out of order, we need to keep
1765      * the list sorted.  The list is kept sorted so that
1766      * the "most full" arenas are used first, which allows
1767      * the nearly empty arenas to be completely freed.  In
1768      * a few un-scientific tests, it seems like this
1769      * approach allowed a lot more memory to be freed.
1770      */
1771     if (ao->nextarena == NULL ||
1772                  nf <= ao->nextarena->nfreepools) {
1773         /* Case 4.  Nothing to do. */
1774         goto success;
1775     }
1776     /* Case 3:  We have to move the arena towards the end
1777      * of the list, because it has more free pools than
1778      * the arena to its right.
1779      * First unlink ao from usable_arenas.
1780      */
1781     if (ao->prevarena != NULL) {
1782         /* ao isn't at the head of the list */
1783         assert(ao->prevarena->nextarena == ao);
1784         ao->prevarena->nextarena = ao->nextarena;
1785     }
1786     else {
1787         /* ao is at the head of the list */
1788         assert(usable_arenas == ao);
1789         usable_arenas = ao->nextarena;
1790     }
1791     ao->nextarena->prevarena = ao->prevarena;
1792 
1793     /* Locate the new insertion point by iterating over
1794      * the list, using our nextarena pointer.
1795      */
1796     while (ao->nextarena != NULL && nf > ao->nextarena->nfreepools) {
1797         ao->prevarena = ao->nextarena;
1798         ao->nextarena = ao->nextarena->nextarena;
1799     }
1800 
1801     /* Insert ao at this point. */
1802     assert(ao->nextarena == NULL || ao->prevarena == ao->nextarena->prevarena);
1803     assert(ao->prevarena->nextarena == ao->nextarena);
1804 
1805     ao->prevarena->nextarena = ao;
1806     if (ao->nextarena != NULL) {
1807         ao->nextarena->prevarena = ao;
1808     }
1809 
1810     /* Verify that the swaps worked. */
1811     assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
1812     assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
1813     assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
1814     assert((usable_arenas == ao && ao->prevarena == NULL)
1815            || ao->prevarena->nextarena == ao);
1816 
1817     goto success;
1818 
1819 success:
1820     UNLOCK();
1821     return 1;
1822 }
1823 
1824 
1825 static void
_PyObject_Free(void * ctx,void * p)1826 _PyObject_Free(void *ctx, void *p)
1827 {
1828     /* PyObject_Free(NULL) has no effect */
1829     if (p == NULL) {
1830         return;
1831     }
1832 
1833     _Py_AllocatedBlocks--;
1834     if (!pymalloc_free(ctx, p)) {
1835         /* pymalloc didn't allocate this address */
1836         PyMem_RawFree(p);
1837     }
1838 }
1839 
1840 
1841 /* pymalloc realloc.
1842 
1843    If nbytes==0, then as the Python docs promise, we do not treat this like
1844    free(p), and return a non-NULL result.
1845 
1846    Return 1 if pymalloc reallocated memory and wrote the new pointer into
1847    newptr_p.
1848 
1849    Return 0 if pymalloc didn't allocated p. */
1850 static int
pymalloc_realloc(void * ctx,void ** newptr_p,void * p,size_t nbytes)1851 pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
1852 {
1853     void *bp;
1854     poolp pool;
1855     size_t size;
1856 
1857     assert(p != NULL);
1858 
1859 #ifdef WITH_VALGRIND
1860     /* Treat running_on_valgrind == -1 the same as 0 */
1861     if (UNLIKELY(running_on_valgrind > 0)) {
1862         return 0;
1863     }
1864 #endif
1865 
1866     pool = POOL_ADDR(p);
1867     if (!address_in_range(p, pool)) {
1868         /* pymalloc is not managing this block.
1869 
1870            If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
1871            over this block.  However, if we do, we need to copy the valid data
1872            from the C-managed block to one of our blocks, and there's no
1873            portable way to know how much of the memory space starting at p is
1874            valid.
1875 
1876            As bug 1185883 pointed out the hard way, it's possible that the
1877            C-managed block is "at the end" of allocated VM space, so that a
1878            memory fault can occur if we try to copy nbytes bytes starting at p.
1879            Instead we punt: let C continue to manage this block. */
1880         return 0;
1881     }
1882 
1883     /* pymalloc is in charge of this block */
1884     size = INDEX2SIZE(pool->szidx);
1885     if (nbytes <= size) {
1886         /* The block is staying the same or shrinking.
1887 
1888            If it's shrinking, there's a tradeoff: it costs cycles to copy the
1889            block to a smaller size class, but it wastes memory not to copy it.
1890 
1891            The compromise here is to copy on shrink only if at least 25% of
1892            size can be shaved off. */
1893         if (4 * nbytes > 3 * size) {
1894             /* It's the same, or shrinking and new/old > 3/4. */
1895             *newptr_p = p;
1896             return 1;
1897         }
1898         size = nbytes;
1899     }
1900 
1901     bp = _PyObject_Malloc(ctx, nbytes);
1902     if (bp != NULL) {
1903         memcpy(bp, p, size);
1904         _PyObject_Free(ctx, p);
1905     }
1906     *newptr_p = bp;
1907     return 1;
1908 }
1909 
1910 
1911 static void *
_PyObject_Realloc(void * ctx,void * ptr,size_t nbytes)1912 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
1913 {
1914     void *ptr2;
1915 
1916     if (ptr == NULL) {
1917         return _PyObject_Malloc(ctx, nbytes);
1918     }
1919 
1920     if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
1921         return ptr2;
1922     }
1923 
1924     return PyMem_RawRealloc(ptr, nbytes);
1925 }
1926 
1927 #else   /* ! WITH_PYMALLOC */
1928 
1929 /*==========================================================================*/
1930 /* pymalloc not enabled:  Redirect the entry points to malloc.  These will
1931  * only be used by extensions that are compiled with pymalloc enabled. */
1932 
1933 Py_ssize_t
_Py_GetAllocatedBlocks(void)1934 _Py_GetAllocatedBlocks(void)
1935 {
1936     return 0;
1937 }
1938 
1939 #endif /* WITH_PYMALLOC */
1940 
1941 
1942 /*==========================================================================*/
1943 /* A x-platform debugging allocator.  This doesn't manage memory directly,
1944  * it wraps a real allocator, adding extra debugging info to the memory blocks.
1945  */
1946 
1947 /* Special bytes broadcast into debug memory blocks at appropriate times.
1948  * Strings of these are unlikely to be valid addresses, floats, ints or
1949  * 7-bit ASCII.
1950  */
1951 #undef CLEANBYTE
1952 #undef DEADBYTE
1953 #undef FORBIDDENBYTE
1954 #define CLEANBYTE      0xCB    /* clean (newly allocated) memory */
1955 #define DEADBYTE       0xDB    /* dead (newly freed) memory */
1956 #define FORBIDDENBYTE  0xFB    /* untouchable bytes at each end of a block */
1957 
1958 static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
1959 
1960 /* serialno is always incremented via calling this routine.  The point is
1961  * to supply a single place to set a breakpoint.
1962  */
1963 static void
bumpserialno(void)1964 bumpserialno(void)
1965 {
1966     ++serialno;
1967 }
1968 
1969 #define SST SIZEOF_SIZE_T
1970 
1971 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
1972 static size_t
read_size_t(const void * p)1973 read_size_t(const void *p)
1974 {
1975     const uint8_t *q = (const uint8_t *)p;
1976     size_t result = *q++;
1977     int i;
1978 
1979     for (i = SST; --i > 0; ++q)
1980         result = (result << 8) | *q;
1981     return result;
1982 }
1983 
1984 /* Write n as a big-endian size_t, MSB at address p, LSB at
1985  * p + sizeof(size_t) - 1.
1986  */
1987 static void
write_size_t(void * p,size_t n)1988 write_size_t(void *p, size_t n)
1989 {
1990     uint8_t *q = (uint8_t *)p + SST - 1;
1991     int i;
1992 
1993     for (i = SST; --i >= 0; --q) {
1994         *q = (uint8_t)(n & 0xff);
1995         n >>= 8;
1996     }
1997 }
1998 
1999 /* Let S = sizeof(size_t).  The debug malloc asks for 4*S extra bytes and
2000    fills them with useful stuff, here calling the underlying malloc's result p:
2001 
2002 p[0: S]
2003     Number of bytes originally asked for.  This is a size_t, big-endian (easier
2004     to read in a memory dump).
2005 p[S]
2006     API ID.  See PEP 445.  This is a character, but seems undocumented.
2007 p[S+1: 2*S]
2008     Copies of FORBIDDENBYTE.  Used to catch under- writes and reads.
2009 p[2*S: 2*S+n]
2010     The requested memory, filled with copies of CLEANBYTE.
2011     Used to catch reference to uninitialized memory.
2012     &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2013     handled the request itself.
2014 p[2*S+n: 2*S+n+S]
2015     Copies of FORBIDDENBYTE.  Used to catch over- writes and reads.
2016 p[2*S+n+S: 2*S+n+2*S]
2017     A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2018     and _PyMem_DebugRealloc.
2019     This is a big-endian size_t.
2020     If "bad memory" is detected later, the serial number gives an
2021     excellent way to set a breakpoint on the next run, to capture the
2022     instant at which this block was passed out.
2023 */
2024 
2025 static void *
_PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)2026 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2027 {
2028     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2029     uint8_t *p;           /* base address of malloc'ed pad block */
2030     uint8_t *data;        /* p + 2*SST == pointer to data bytes */
2031     uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2032     size_t total;         /* 2 * SST + nbytes + 2 * SST */
2033 
2034     if (nbytes > (size_t)PY_SSIZE_T_MAX - 4 * SST) {
2035         /* integer overflow: can't represent total as a Py_ssize_t */
2036         return NULL;
2037     }
2038     total = nbytes + 4 * SST;
2039 
2040     /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2041      *          ^--- p    ^--- data   ^--- tail
2042        S: nbytes stored as size_t
2043        I: API identifier (1 byte)
2044        F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2045        C: Clean bytes used later to store actual data
2046        N: Serial number stored as size_t */
2047 
2048     if (use_calloc) {
2049         p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2050     }
2051     else {
2052         p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2053     }
2054     if (p == NULL) {
2055         return NULL;
2056     }
2057     data = p + 2*SST;
2058 
2059     bumpserialno();
2060 
2061     /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2062     write_size_t(p, nbytes);
2063     p[SST] = (uint8_t)api->api_id;
2064     memset(p + SST + 1, FORBIDDENBYTE, SST-1);
2065 
2066     if (nbytes > 0 && !use_calloc) {
2067         memset(data, CLEANBYTE, nbytes);
2068     }
2069 
2070     /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2071     tail = data + nbytes;
2072     memset(tail, FORBIDDENBYTE, SST);
2073     write_size_t(tail + SST, serialno);
2074 
2075     return data;
2076 }
2077 
2078 static void *
_PyMem_DebugRawMalloc(void * ctx,size_t nbytes)2079 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2080 {
2081     return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2082 }
2083 
2084 static void *
_PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)2085 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2086 {
2087     size_t nbytes;
2088     assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2089     nbytes = nelem * elsize;
2090     return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2091 }
2092 
2093 
2094 /* Heuristic checking if the memory has been freed. Rely on the debug hooks on
2095    Python memory allocators which fills the memory with DEADBYTE (0xDB) when
2096    memory is deallocated. */
2097 int
_PyMem_IsFreed(void * ptr,size_t size)2098 _PyMem_IsFreed(void *ptr, size_t size)
2099 {
2100     unsigned char *bytes = ptr;
2101     for (size_t i=0; i < size; i++) {
2102         if (bytes[i] != DEADBYTE) {
2103             return 0;
2104         }
2105     }
2106     return 1;
2107 }
2108 
2109 
2110 /* The debug free first checks the 2*SST bytes on each end for sanity (in
2111    particular, that the FORBIDDENBYTEs with the api ID are still intact).
2112    Then fills the original bytes with DEADBYTE.
2113    Then calls the underlying free.
2114 */
2115 static void
_PyMem_DebugRawFree(void * ctx,void * p)2116 _PyMem_DebugRawFree(void *ctx, void *p)
2117 {
2118     /* PyMem_Free(NULL) has no effect */
2119     if (p == NULL) {
2120         return;
2121     }
2122 
2123     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2124     uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
2125     size_t nbytes;
2126 
2127     _PyMem_DebugCheckAddress(api->api_id, p);
2128     nbytes = read_size_t(q);
2129     nbytes += 4 * SST;
2130     memset(q, DEADBYTE, nbytes);
2131     api->alloc.free(api->alloc.ctx, q);
2132 }
2133 
2134 
2135 static void *
_PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)2136 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2137 {
2138     if (p == NULL) {
2139         return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2140     }
2141 
2142     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2143     uint8_t *head;        /* base address of malloc'ed pad block */
2144     uint8_t *data;        /* pointer to data bytes */
2145     uint8_t *r;
2146     uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2147     size_t total;         /* 2 * SST + nbytes + 2 * SST */
2148     size_t original_nbytes;
2149     size_t block_serialno;
2150 #define ERASED_SIZE 64
2151     uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
2152 
2153     _PyMem_DebugCheckAddress(api->api_id, p);
2154 
2155     data = (uint8_t *)p;
2156     head = data - 2*SST;
2157     original_nbytes = read_size_t(head);
2158     if (nbytes > (size_t)PY_SSIZE_T_MAX - 4*SST) {
2159         /* integer overflow: can't represent total as a Py_ssize_t */
2160         return NULL;
2161     }
2162     total = nbytes + 4*SST;
2163 
2164     tail = data + original_nbytes;
2165     block_serialno = read_size_t(tail + SST);
2166     /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2167        ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2168      */
2169     if (original_nbytes <= sizeof(save)) {
2170         memcpy(save, data, original_nbytes);
2171         memset(data - 2*SST, DEADBYTE, original_nbytes + 4*SST);
2172     }
2173     else {
2174         memcpy(save, data, ERASED_SIZE);
2175         memset(head, DEADBYTE, ERASED_SIZE + 2*SST);
2176         memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2177         memset(tail - ERASED_SIZE, DEADBYTE, ERASED_SIZE + 2*SST);
2178     }
2179 
2180     /* Resize and add decorations. */
2181     r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2182     if (r == NULL) {
2183         nbytes = original_nbytes;
2184     }
2185     else {
2186         head = r;
2187         bumpserialno();
2188         block_serialno = serialno;
2189     }
2190 
2191     write_size_t(head, nbytes);
2192     head[SST] = (uint8_t)api->api_id;
2193     memset(head + SST + 1, FORBIDDENBYTE, SST-1);
2194     data = head + 2*SST;
2195 
2196     tail = data + nbytes;
2197     memset(tail, FORBIDDENBYTE, SST);
2198     write_size_t(tail + SST, block_serialno);
2199 
2200     /* Restore saved bytes. */
2201     if (original_nbytes <= sizeof(save)) {
2202         memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2203     }
2204     else {
2205         size_t i = original_nbytes - ERASED_SIZE;
2206         memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2207         if (nbytes > i) {
2208             memcpy(data + i, &save[ERASED_SIZE],
2209                    Py_MIN(nbytes - i, ERASED_SIZE));
2210         }
2211     }
2212 
2213     if (r == NULL) {
2214         return NULL;
2215     }
2216 
2217     if (nbytes > original_nbytes) {
2218         /* growing:  mark new extra memory clean */
2219         memset(data + original_nbytes, CLEANBYTE, nbytes - original_nbytes);
2220     }
2221 
2222     return data;
2223 }
2224 
2225 static void
_PyMem_DebugCheckGIL(void)2226 _PyMem_DebugCheckGIL(void)
2227 {
2228     if (!PyGILState_Check())
2229         Py_FatalError("Python memory allocator called "
2230                       "without holding the GIL");
2231 }
2232 
2233 static void *
_PyMem_DebugMalloc(void * ctx,size_t nbytes)2234 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
2235 {
2236     _PyMem_DebugCheckGIL();
2237     return _PyMem_DebugRawMalloc(ctx, nbytes);
2238 }
2239 
2240 static void *
_PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)2241 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2242 {
2243     _PyMem_DebugCheckGIL();
2244     return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2245 }
2246 
2247 
2248 static void
_PyMem_DebugFree(void * ctx,void * ptr)2249 _PyMem_DebugFree(void *ctx, void *ptr)
2250 {
2251     _PyMem_DebugCheckGIL();
2252     _PyMem_DebugRawFree(ctx, ptr);
2253 }
2254 
2255 
2256 static void *
_PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)2257 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2258 {
2259     _PyMem_DebugCheckGIL();
2260     return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2261 }
2262 
2263 /* Check the forbidden bytes on both ends of the memory allocated for p.
2264  * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2265  * and call Py_FatalError to kill the program.
2266  * The API id, is also checked.
2267  */
2268 static void
_PyMem_DebugCheckAddress(char api,const void * p)2269 _PyMem_DebugCheckAddress(char api, const void *p)
2270 {
2271     const uint8_t *q = (const uint8_t *)p;
2272     char msgbuf[64];
2273     const char *msg;
2274     size_t nbytes;
2275     const uint8_t *tail;
2276     int i;
2277     char id;
2278 
2279     if (p == NULL) {
2280         msg = "didn't expect a NULL pointer";
2281         goto error;
2282     }
2283 
2284     /* Check the API id */
2285     id = (char)q[-SST];
2286     if (id != api) {
2287         msg = msgbuf;
2288         snprintf(msgbuf, sizeof(msgbuf), "bad ID: Allocated using API '%c', verified using API '%c'", id, api);
2289         msgbuf[sizeof(msgbuf)-1] = 0;
2290         goto error;
2291     }
2292 
2293     /* Check the stuff at the start of p first:  if there's underwrite
2294      * corruption, the number-of-bytes field may be nuts, and checking
2295      * the tail could lead to a segfault then.
2296      */
2297     for (i = SST-1; i >= 1; --i) {
2298         if (*(q-i) != FORBIDDENBYTE) {
2299             msg = "bad leading pad byte";
2300             goto error;
2301         }
2302     }
2303 
2304     nbytes = read_size_t(q - 2*SST);
2305     tail = q + nbytes;
2306     for (i = 0; i < SST; ++i) {
2307         if (tail[i] != FORBIDDENBYTE) {
2308             msg = "bad trailing pad byte";
2309             goto error;
2310         }
2311     }
2312 
2313     return;
2314 
2315 error:
2316     _PyObject_DebugDumpAddress(p);
2317     Py_FatalError(msg);
2318 }
2319 
2320 /* Display info to stderr about the memory block at p. */
2321 static void
_PyObject_DebugDumpAddress(const void * p)2322 _PyObject_DebugDumpAddress(const void *p)
2323 {
2324     const uint8_t *q = (const uint8_t *)p;
2325     const uint8_t *tail;
2326     size_t nbytes, serial;
2327     int i;
2328     int ok;
2329     char id;
2330 
2331     fprintf(stderr, "Debug memory block at address p=%p:", p);
2332     if (p == NULL) {
2333         fprintf(stderr, "\n");
2334         return;
2335     }
2336     id = (char)q[-SST];
2337     fprintf(stderr, " API '%c'\n", id);
2338 
2339     nbytes = read_size_t(q - 2*SST);
2340     fprintf(stderr, "    %" PY_FORMAT_SIZE_T "u bytes originally "
2341                     "requested\n", nbytes);
2342 
2343     /* In case this is nuts, check the leading pad bytes first. */
2344     fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
2345     ok = 1;
2346     for (i = 1; i <= SST-1; ++i) {
2347         if (*(q-i) != FORBIDDENBYTE) {
2348             ok = 0;
2349             break;
2350         }
2351     }
2352     if (ok)
2353         fputs("FORBIDDENBYTE, as expected.\n", stderr);
2354     else {
2355         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2356             FORBIDDENBYTE);
2357         for (i = SST-1; i >= 1; --i) {
2358             const uint8_t byte = *(q-i);
2359             fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
2360             if (byte != FORBIDDENBYTE)
2361                 fputs(" *** OUCH", stderr);
2362             fputc('\n', stderr);
2363         }
2364 
2365         fputs("    Because memory is corrupted at the start, the "
2366               "count of bytes requested\n"
2367               "       may be bogus, and checking the trailing pad "
2368               "bytes may segfault.\n", stderr);
2369     }
2370 
2371     tail = q + nbytes;
2372     fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, tail);
2373     ok = 1;
2374     for (i = 0; i < SST; ++i) {
2375         if (tail[i] != FORBIDDENBYTE) {
2376             ok = 0;
2377             break;
2378         }
2379     }
2380     if (ok)
2381         fputs("FORBIDDENBYTE, as expected.\n", stderr);
2382     else {
2383         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2384                 FORBIDDENBYTE);
2385         for (i = 0; i < SST; ++i) {
2386             const uint8_t byte = tail[i];
2387             fprintf(stderr, "        at tail+%d: 0x%02x",
2388                     i, byte);
2389             if (byte != FORBIDDENBYTE)
2390                 fputs(" *** OUCH", stderr);
2391             fputc('\n', stderr);
2392         }
2393     }
2394 
2395     serial = read_size_t(tail + SST);
2396     fprintf(stderr, "    The block was made by call #%" PY_FORMAT_SIZE_T
2397                     "u to debug malloc/realloc.\n", serial);
2398 
2399     if (nbytes > 0) {
2400         i = 0;
2401         fputs("    Data at p:", stderr);
2402         /* print up to 8 bytes at the start */
2403         while (q < tail && i < 8) {
2404             fprintf(stderr, " %02x", *q);
2405             ++i;
2406             ++q;
2407         }
2408         /* and up to 8 at the end */
2409         if (q < tail) {
2410             if (tail - q > 8) {
2411                 fputs(" ...", stderr);
2412                 q = tail - 8;
2413             }
2414             while (q < tail) {
2415                 fprintf(stderr, " %02x", *q);
2416                 ++q;
2417             }
2418         }
2419         fputc('\n', stderr);
2420     }
2421     fputc('\n', stderr);
2422 
2423     fflush(stderr);
2424     _PyMem_DumpTraceback(fileno(stderr), p);
2425 }
2426 
2427 
2428 static size_t
printone(FILE * out,const char * msg,size_t value)2429 printone(FILE *out, const char* msg, size_t value)
2430 {
2431     int i, k;
2432     char buf[100];
2433     size_t origvalue = value;
2434 
2435     fputs(msg, out);
2436     for (i = (int)strlen(msg); i < 35; ++i)
2437         fputc(' ', out);
2438     fputc('=', out);
2439 
2440     /* Write the value with commas. */
2441     i = 22;
2442     buf[i--] = '\0';
2443     buf[i--] = '\n';
2444     k = 3;
2445     do {
2446         size_t nextvalue = value / 10;
2447         unsigned int digit = (unsigned int)(value - nextvalue * 10);
2448         value = nextvalue;
2449         buf[i--] = (char)(digit + '0');
2450         --k;
2451         if (k == 0 && value && i >= 0) {
2452             k = 3;
2453             buf[i--] = ',';
2454         }
2455     } while (value && i >= 0);
2456 
2457     while (i >= 0)
2458         buf[i--] = ' ';
2459     fputs(buf, out);
2460 
2461     return origvalue;
2462 }
2463 
2464 void
_PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)2465 _PyDebugAllocatorStats(FILE *out,
2466                        const char *block_name, int num_blocks, size_t sizeof_block)
2467 {
2468     char buf1[128];
2469     char buf2[128];
2470     PyOS_snprintf(buf1, sizeof(buf1),
2471                   "%d %ss * %" PY_FORMAT_SIZE_T "d bytes each",
2472                   num_blocks, block_name, sizeof_block);
2473     PyOS_snprintf(buf2, sizeof(buf2),
2474                   "%48s ", buf1);
2475     (void)printone(out, buf2, num_blocks * sizeof_block);
2476 }
2477 
2478 
2479 #ifdef WITH_PYMALLOC
2480 
2481 #ifdef Py_DEBUG
2482 /* Is target in the list?  The list is traversed via the nextpool pointers.
2483  * The list may be NULL-terminated, or circular.  Return 1 if target is in
2484  * list, else 0.
2485  */
2486 static int
pool_is_in_list(const poolp target,poolp list)2487 pool_is_in_list(const poolp target, poolp list)
2488 {
2489     poolp origlist = list;
2490     assert(target != NULL);
2491     if (list == NULL)
2492         return 0;
2493     do {
2494         if (target == list)
2495             return 1;
2496         list = list->nextpool;
2497     } while (list != NULL && list != origlist);
2498     return 0;
2499 }
2500 #endif
2501 
2502 /* Print summary info to "out" about the state of pymalloc's structures.
2503  * In Py_DEBUG mode, also perform some expensive internal consistency
2504  * checks.
2505  *
2506  * Return 0 if the memory debug hooks are not installed or no statistics was
2507  * written into out, return 1 otherwise.
2508  */
2509 int
_PyObject_DebugMallocStats(FILE * out)2510 _PyObject_DebugMallocStats(FILE *out)
2511 {
2512     if (!_PyMem_PymallocEnabled()) {
2513         return 0;
2514     }
2515 
2516     uint i;
2517     const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2518     /* # of pools, allocated blocks, and free blocks per class index */
2519     size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2520     size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2521     size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2522     /* total # of allocated bytes in used and full pools */
2523     size_t allocated_bytes = 0;
2524     /* total # of available bytes in used pools */
2525     size_t available_bytes = 0;
2526     /* # of free pools + pools not yet carved out of current arena */
2527     uint numfreepools = 0;
2528     /* # of bytes for arena alignment padding */
2529     size_t arena_alignment = 0;
2530     /* # of bytes in used and full pools used for pool_headers */
2531     size_t pool_header_bytes = 0;
2532     /* # of bytes in used and full pools wasted due to quantization,
2533      * i.e. the necessarily leftover space at the ends of used and
2534      * full pools.
2535      */
2536     size_t quantization = 0;
2537     /* # of arenas actually allocated. */
2538     size_t narenas = 0;
2539     /* running total -- should equal narenas * ARENA_SIZE */
2540     size_t total;
2541     char buf[128];
2542 
2543     fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2544             SMALL_REQUEST_THRESHOLD, numclasses);
2545 
2546     for (i = 0; i < numclasses; ++i)
2547         numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2548 
2549     /* Because full pools aren't linked to from anything, it's easiest
2550      * to march over all the arenas.  If we're lucky, most of the memory
2551      * will be living in full pools -- would be a shame to miss them.
2552      */
2553     for (i = 0; i < maxarenas; ++i) {
2554         uint j;
2555         uintptr_t base = arenas[i].address;
2556 
2557         /* Skip arenas which are not allocated. */
2558         if (arenas[i].address == (uintptr_t)NULL)
2559             continue;
2560         narenas += 1;
2561 
2562         numfreepools += arenas[i].nfreepools;
2563 
2564         /* round up to pool alignment */
2565         if (base & (uintptr_t)POOL_SIZE_MASK) {
2566             arena_alignment += POOL_SIZE;
2567             base &= ~(uintptr_t)POOL_SIZE_MASK;
2568             base += POOL_SIZE;
2569         }
2570 
2571         /* visit every pool in the arena */
2572         assert(base <= (uintptr_t) arenas[i].pool_address);
2573         for (j = 0; base < (uintptr_t) arenas[i].pool_address;
2574              ++j, base += POOL_SIZE) {
2575             poolp p = (poolp)base;
2576             const uint sz = p->szidx;
2577             uint freeblocks;
2578 
2579             if (p->ref.count == 0) {
2580                 /* currently unused */
2581 #ifdef Py_DEBUG
2582                 assert(pool_is_in_list(p, arenas[i].freepools));
2583 #endif
2584                 continue;
2585             }
2586             ++numpools[sz];
2587             numblocks[sz] += p->ref.count;
2588             freeblocks = NUMBLOCKS(sz) - p->ref.count;
2589             numfreeblocks[sz] += freeblocks;
2590 #ifdef Py_DEBUG
2591             if (freeblocks > 0)
2592                 assert(pool_is_in_list(p, usedpools[sz + sz]));
2593 #endif
2594         }
2595     }
2596     assert(narenas == narenas_currently_allocated);
2597 
2598     fputc('\n', out);
2599     fputs("class   size   num pools   blocks in use  avail blocks\n"
2600           "-----   ----   ---------   -------------  ------------\n",
2601           out);
2602 
2603     for (i = 0; i < numclasses; ++i) {
2604         size_t p = numpools[i];
2605         size_t b = numblocks[i];
2606         size_t f = numfreeblocks[i];
2607         uint size = INDEX2SIZE(i);
2608         if (p == 0) {
2609             assert(b == 0 && f == 0);
2610             continue;
2611         }
2612         fprintf(out, "%5u %6u "
2613                         "%11" PY_FORMAT_SIZE_T "u "
2614                         "%15" PY_FORMAT_SIZE_T "u "
2615                         "%13" PY_FORMAT_SIZE_T "u\n",
2616                 i, size, p, b, f);
2617         allocated_bytes += b * size;
2618         available_bytes += f * size;
2619         pool_header_bytes += p * POOL_OVERHEAD;
2620         quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
2621     }
2622     fputc('\n', out);
2623     if (_PyMem_DebugEnabled())
2624         (void)printone(out, "# times object malloc called", serialno);
2625     (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
2626     (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
2627     (void)printone(out, "# arenas highwater mark", narenas_highwater);
2628     (void)printone(out, "# arenas allocated current", narenas);
2629 
2630     PyOS_snprintf(buf, sizeof(buf),
2631         "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
2632         narenas, ARENA_SIZE);
2633     (void)printone(out, buf, narenas * ARENA_SIZE);
2634 
2635     fputc('\n', out);
2636 
2637     total = printone(out, "# bytes in allocated blocks", allocated_bytes);
2638     total += printone(out, "# bytes in available blocks", available_bytes);
2639 
2640     PyOS_snprintf(buf, sizeof(buf),
2641         "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
2642     total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
2643 
2644     total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
2645     total += printone(out, "# bytes lost to quantization", quantization);
2646     total += printone(out, "# bytes lost to arena alignment", arena_alignment);
2647     (void)printone(out, "Total", total);
2648     return 1;
2649 }
2650 
2651 #endif /* #ifdef WITH_PYMALLOC */
2652