1 #include "Python.h"
2 #include "pycore_pymem.h"         // _PyTraceMalloc_Config
3 
4 #include <stdbool.h>
5 
6 
7 /* Defined in tracemalloc.c */
8 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
9 
10 
11 /* Python's malloc wrappers (see pymem.h) */
12 
13 #undef  uint
14 #define uint    unsigned int    /* assuming >= 16 bits */
15 
16 /* Forward declaration */
17 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
18 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
19 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
20 static void _PyMem_DebugRawFree(void *ctx, void *ptr);
21 
22 static void* _PyMem_DebugMalloc(void *ctx, size_t size);
23 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
24 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
25 static void _PyMem_DebugFree(void *ctx, void *p);
26 
27 static void _PyObject_DebugDumpAddress(const void *p);
28 static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
29 
30 static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain);
31 
32 #if defined(__has_feature)  /* Clang */
33 #  if __has_feature(address_sanitizer) /* is ASAN enabled? */
34 #    define _Py_NO_SANITIZE_ADDRESS \
35         __attribute__((no_sanitize("address")))
36 #  endif
37 #  if __has_feature(thread_sanitizer)  /* is TSAN enabled? */
38 #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
39 #  endif
40 #  if __has_feature(memory_sanitizer)  /* is MSAN enabled? */
41 #    define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
42 #  endif
43 #elif defined(__GNUC__)
44 #  if defined(__SANITIZE_ADDRESS__)    /* GCC 4.8+, is ASAN enabled? */
45 #    define _Py_NO_SANITIZE_ADDRESS \
46         __attribute__((no_sanitize_address))
47 #  endif
48    // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro
49    // is provided only since GCC 7.
50 #  if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
51 #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
52 #  endif
53 #endif
54 
55 #ifndef _Py_NO_SANITIZE_ADDRESS
56 #  define _Py_NO_SANITIZE_ADDRESS
57 #endif
58 #ifndef _Py_NO_SANITIZE_THREAD
59 #  define _Py_NO_SANITIZE_THREAD
60 #endif
61 #ifndef _Py_NO_SANITIZE_MEMORY
62 #  define _Py_NO_SANITIZE_MEMORY
63 #endif
64 
65 #ifdef WITH_PYMALLOC
66 
67 #ifdef MS_WINDOWS
68 #  include <windows.h>
69 #elif defined(HAVE_MMAP)
70 #  include <sys/mman.h>
71 #  ifdef MAP_ANONYMOUS
72 #    define ARENAS_USE_MMAP
73 #  endif
74 #endif
75 
76 /* Forward declaration */
77 static void* _PyObject_Malloc(void *ctx, size_t size);
78 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
79 static void _PyObject_Free(void *ctx, void *p);
80 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
81 #endif
82 
83 
84 /* bpo-35053: Declare tracemalloc configuration here rather than
85    Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic
86    library, whereas _Py_NewReference() requires it. */
87 struct _PyTraceMalloc_Config _Py_tracemalloc_config = _PyTraceMalloc_Config_INIT;
88 
89 
90 static void *
_PyMem_RawMalloc(void * ctx,size_t size)91 _PyMem_RawMalloc(void *ctx, size_t size)
92 {
93     /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
94        for malloc(0), which would be treated as an error. Some platforms would
95        return a pointer with no memory behind it, which would break pymalloc.
96        To solve these problems, allocate an extra byte. */
97     if (size == 0)
98         size = 1;
99     return malloc(size);
100 }
101 
102 static void *
_PyMem_RawCalloc(void * ctx,size_t nelem,size_t elsize)103 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
104 {
105     /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
106        for calloc(0, 0), which would be treated as an error. Some platforms
107        would return a pointer with no memory behind it, which would break
108        pymalloc.  To solve these problems, allocate an extra byte. */
109     if (nelem == 0 || elsize == 0) {
110         nelem = 1;
111         elsize = 1;
112     }
113     return calloc(nelem, elsize);
114 }
115 
116 static void *
_PyMem_RawRealloc(void * ctx,void * ptr,size_t size)117 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
118 {
119     if (size == 0)
120         size = 1;
121     return realloc(ptr, size);
122 }
123 
124 static void
_PyMem_RawFree(void * ctx,void * ptr)125 _PyMem_RawFree(void *ctx, void *ptr)
126 {
127     free(ptr);
128 }
129 
130 
131 #ifdef MS_WINDOWS
132 static void *
_PyObject_ArenaVirtualAlloc(void * ctx,size_t size)133 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
134 {
135     return VirtualAlloc(NULL, size,
136                         MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
137 }
138 
139 static void
_PyObject_ArenaVirtualFree(void * ctx,void * ptr,size_t size)140 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
141 {
142     VirtualFree(ptr, 0, MEM_RELEASE);
143 }
144 
145 #elif defined(ARENAS_USE_MMAP)
146 static void *
_PyObject_ArenaMmap(void * ctx,size_t size)147 _PyObject_ArenaMmap(void *ctx, size_t size)
148 {
149     void *ptr;
150     ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
151                MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
152     if (ptr == MAP_FAILED)
153         return NULL;
154     assert(ptr != NULL);
155     return ptr;
156 }
157 
158 static void
_PyObject_ArenaMunmap(void * ctx,void * ptr,size_t size)159 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
160 {
161     munmap(ptr, size);
162 }
163 
164 #else
165 static void *
_PyObject_ArenaMalloc(void * ctx,size_t size)166 _PyObject_ArenaMalloc(void *ctx, size_t size)
167 {
168     return malloc(size);
169 }
170 
171 static void
_PyObject_ArenaFree(void * ctx,void * ptr,size_t size)172 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
173 {
174     free(ptr);
175 }
176 #endif
177 
178 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
179 #ifdef WITH_PYMALLOC
180 #  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
181 #endif
182 
183 #define PYRAW_ALLOC MALLOC_ALLOC
184 #ifdef WITH_PYMALLOC
185 #  define PYOBJ_ALLOC PYMALLOC_ALLOC
186 #else
187 #  define PYOBJ_ALLOC MALLOC_ALLOC
188 #endif
189 #define PYMEM_ALLOC PYOBJ_ALLOC
190 
191 typedef struct {
192     /* We tag each block with an API ID in order to tag API violations */
193     char api_id;
194     PyMemAllocatorEx alloc;
195 } debug_alloc_api_t;
196 static struct {
197     debug_alloc_api_t raw;
198     debug_alloc_api_t mem;
199     debug_alloc_api_t obj;
200 } _PyMem_Debug = {
201     {'r', PYRAW_ALLOC},
202     {'m', PYMEM_ALLOC},
203     {'o', PYOBJ_ALLOC}
204     };
205 
206 #define PYDBGRAW_ALLOC \
207     {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
208 #define PYDBGMEM_ALLOC \
209     {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
210 #define PYDBGOBJ_ALLOC \
211     {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
212 
213 #ifdef Py_DEBUG
214 static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC;
215 static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC;
216 static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC;
217 #else
218 static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC;
219 static PyMemAllocatorEx _PyMem = PYMEM_ALLOC;
220 static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC;
221 #endif
222 
223 
224 static int
pymem_set_default_allocator(PyMemAllocatorDomain domain,int debug,PyMemAllocatorEx * old_alloc)225 pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug,
226                             PyMemAllocatorEx *old_alloc)
227 {
228     if (old_alloc != NULL) {
229         PyMem_GetAllocator(domain, old_alloc);
230     }
231 
232 
233     PyMemAllocatorEx new_alloc;
234     switch(domain)
235     {
236     case PYMEM_DOMAIN_RAW:
237         new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
238         break;
239     case PYMEM_DOMAIN_MEM:
240         new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
241         break;
242     case PYMEM_DOMAIN_OBJ:
243         new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
244         break;
245     default:
246         /* unknown domain */
247         return -1;
248     }
249     PyMem_SetAllocator(domain, &new_alloc);
250     if (debug) {
251         _PyMem_SetupDebugHooksDomain(domain);
252     }
253     return 0;
254 }
255 
256 
257 int
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * old_alloc)258 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
259                            PyMemAllocatorEx *old_alloc)
260 {
261 #ifdef Py_DEBUG
262     const int debug = 1;
263 #else
264     const int debug = 0;
265 #endif
266     return pymem_set_default_allocator(domain, debug, old_alloc);
267 }
268 
269 
270 int
_PyMem_GetAllocatorName(const char * name,PyMemAllocatorName * allocator)271 _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
272 {
273     if (name == NULL || *name == '\0') {
274         /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
275            nameions): use default memory allocators */
276         *allocator = PYMEM_ALLOCATOR_DEFAULT;
277     }
278     else if (strcmp(name, "default") == 0) {
279         *allocator = PYMEM_ALLOCATOR_DEFAULT;
280     }
281     else if (strcmp(name, "debug") == 0) {
282         *allocator = PYMEM_ALLOCATOR_DEBUG;
283     }
284 #ifdef WITH_PYMALLOC
285     else if (strcmp(name, "pymalloc") == 0) {
286         *allocator = PYMEM_ALLOCATOR_PYMALLOC;
287     }
288     else if (strcmp(name, "pymalloc_debug") == 0) {
289         *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
290     }
291 #endif
292     else if (strcmp(name, "malloc") == 0) {
293         *allocator = PYMEM_ALLOCATOR_MALLOC;
294     }
295     else if (strcmp(name, "malloc_debug") == 0) {
296         *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
297     }
298     else {
299         /* unknown allocator */
300         return -1;
301     }
302     return 0;
303 }
304 
305 
306 int
_PyMem_SetupAllocators(PyMemAllocatorName allocator)307 _PyMem_SetupAllocators(PyMemAllocatorName allocator)
308 {
309     switch (allocator) {
310     case PYMEM_ALLOCATOR_NOT_SET:
311         /* do nothing */
312         break;
313 
314     case PYMEM_ALLOCATOR_DEFAULT:
315         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
316         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL);
317         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL);
318         break;
319 
320     case PYMEM_ALLOCATOR_DEBUG:
321         (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL);
322         (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL);
323         (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL);
324         break;
325 
326 #ifdef WITH_PYMALLOC
327     case PYMEM_ALLOCATOR_PYMALLOC:
328     case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
329     {
330         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
331         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
332 
333         PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
334         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc);
335         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc);
336 
337         if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) {
338             PyMem_SetupDebugHooks();
339         }
340         break;
341     }
342 #endif
343 
344     case PYMEM_ALLOCATOR_MALLOC:
345     case PYMEM_ALLOCATOR_MALLOC_DEBUG:
346     {
347         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
348         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
349         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc);
350         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc);
351 
352         if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) {
353             PyMem_SetupDebugHooks();
354         }
355         break;
356     }
357 
358     default:
359         /* unknown allocator */
360         return -1;
361     }
362     return 0;
363 }
364 
365 
366 static int
pymemallocator_eq(PyMemAllocatorEx * a,PyMemAllocatorEx * b)367 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
368 {
369     return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
370 }
371 
372 
373 const char*
_PyMem_GetCurrentAllocatorName(void)374 _PyMem_GetCurrentAllocatorName(void)
375 {
376     PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
377 #ifdef WITH_PYMALLOC
378     PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
379 #endif
380 
381     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
382         pymemallocator_eq(&_PyMem, &malloc_alloc) &&
383         pymemallocator_eq(&_PyObject, &malloc_alloc))
384     {
385         return "malloc";
386     }
387 #ifdef WITH_PYMALLOC
388     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
389         pymemallocator_eq(&_PyMem, &pymalloc) &&
390         pymemallocator_eq(&_PyObject, &pymalloc))
391     {
392         return "pymalloc";
393     }
394 #endif
395 
396     PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
397     PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
398     PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
399 
400     if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
401         pymemallocator_eq(&_PyMem, &dbg_mem) &&
402         pymemallocator_eq(&_PyObject, &dbg_obj))
403     {
404         /* Debug hooks installed */
405         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
406             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
407             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
408         {
409             return "malloc_debug";
410         }
411 #ifdef WITH_PYMALLOC
412         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
413             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
414             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
415         {
416             return "pymalloc_debug";
417         }
418 #endif
419     }
420     return NULL;
421 }
422 
423 
424 #undef MALLOC_ALLOC
425 #undef PYMALLOC_ALLOC
426 #undef PYRAW_ALLOC
427 #undef PYMEM_ALLOC
428 #undef PYOBJ_ALLOC
429 #undef PYDBGRAW_ALLOC
430 #undef PYDBGMEM_ALLOC
431 #undef PYDBGOBJ_ALLOC
432 
433 
434 static PyObjectArenaAllocator _PyObject_Arena = {NULL,
435 #ifdef MS_WINDOWS
436     _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
437 #elif defined(ARENAS_USE_MMAP)
438     _PyObject_ArenaMmap, _PyObject_ArenaMunmap
439 #else
440     _PyObject_ArenaMalloc, _PyObject_ArenaFree
441 #endif
442     };
443 
444 #ifdef WITH_PYMALLOC
445 static int
_PyMem_DebugEnabled(void)446 _PyMem_DebugEnabled(void)
447 {
448     return (_PyObject.malloc == _PyMem_DebugMalloc);
449 }
450 
451 static int
_PyMem_PymallocEnabled(void)452 _PyMem_PymallocEnabled(void)
453 {
454     if (_PyMem_DebugEnabled()) {
455         return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
456     }
457     else {
458         return (_PyObject.malloc == _PyObject_Malloc);
459     }
460 }
461 #endif
462 
463 
464 static void
_PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)465 _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)
466 {
467     PyMemAllocatorEx alloc;
468 
469     if (domain == PYMEM_DOMAIN_RAW) {
470         if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
471             return;
472         }
473 
474         PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
475         alloc.ctx = &_PyMem_Debug.raw;
476         alloc.malloc = _PyMem_DebugRawMalloc;
477         alloc.calloc = _PyMem_DebugRawCalloc;
478         alloc.realloc = _PyMem_DebugRawRealloc;
479         alloc.free = _PyMem_DebugRawFree;
480         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
481     }
482     else if (domain == PYMEM_DOMAIN_MEM) {
483         if (_PyMem.malloc == _PyMem_DebugMalloc) {
484             return;
485         }
486 
487         PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
488         alloc.ctx = &_PyMem_Debug.mem;
489         alloc.malloc = _PyMem_DebugMalloc;
490         alloc.calloc = _PyMem_DebugCalloc;
491         alloc.realloc = _PyMem_DebugRealloc;
492         alloc.free = _PyMem_DebugFree;
493         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
494     }
495     else if (domain == PYMEM_DOMAIN_OBJ)  {
496         if (_PyObject.malloc == _PyMem_DebugMalloc) {
497             return;
498         }
499 
500         PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
501         alloc.ctx = &_PyMem_Debug.obj;
502         alloc.malloc = _PyMem_DebugMalloc;
503         alloc.calloc = _PyMem_DebugCalloc;
504         alloc.realloc = _PyMem_DebugRealloc;
505         alloc.free = _PyMem_DebugFree;
506         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
507     }
508 }
509 
510 
511 void
PyMem_SetupDebugHooks(void)512 PyMem_SetupDebugHooks(void)
513 {
514     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW);
515     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM);
516     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ);
517 }
518 
519 void
PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)520 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
521 {
522     switch(domain)
523     {
524     case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
525     case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
526     case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
527     default:
528         /* unknown domain: set all attributes to NULL */
529         allocator->ctx = NULL;
530         allocator->malloc = NULL;
531         allocator->calloc = NULL;
532         allocator->realloc = NULL;
533         allocator->free = NULL;
534     }
535 }
536 
537 void
PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)538 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
539 {
540     switch(domain)
541     {
542     case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
543     case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
544     case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
545     /* ignore unknown domain */
546     }
547 }
548 
549 void
PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)550 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
551 {
552     *allocator = _PyObject_Arena;
553 }
554 
555 void
PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)556 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
557 {
558     _PyObject_Arena = *allocator;
559 }
560 
561 void *
PyMem_RawMalloc(size_t size)562 PyMem_RawMalloc(size_t size)
563 {
564     /*
565      * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
566      * Most python internals blindly use a signed Py_ssize_t to track
567      * things without checking for overflows or negatives.
568      * As size_t is unsigned, checking for size < 0 is not required.
569      */
570     if (size > (size_t)PY_SSIZE_T_MAX)
571         return NULL;
572     return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
573 }
574 
575 void *
PyMem_RawCalloc(size_t nelem,size_t elsize)576 PyMem_RawCalloc(size_t nelem, size_t elsize)
577 {
578     /* see PyMem_RawMalloc() */
579     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
580         return NULL;
581     return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
582 }
583 
584 void*
PyMem_RawRealloc(void * ptr,size_t new_size)585 PyMem_RawRealloc(void *ptr, size_t new_size)
586 {
587     /* see PyMem_RawMalloc() */
588     if (new_size > (size_t)PY_SSIZE_T_MAX)
589         return NULL;
590     return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
591 }
592 
PyMem_RawFree(void * ptr)593 void PyMem_RawFree(void *ptr)
594 {
595     _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
596 }
597 
598 
599 void *
PyMem_Malloc(size_t size)600 PyMem_Malloc(size_t size)
601 {
602     /* see PyMem_RawMalloc() */
603     if (size > (size_t)PY_SSIZE_T_MAX)
604         return NULL;
605     return _PyMem.malloc(_PyMem.ctx, size);
606 }
607 
608 void *
PyMem_Calloc(size_t nelem,size_t elsize)609 PyMem_Calloc(size_t nelem, size_t elsize)
610 {
611     /* see PyMem_RawMalloc() */
612     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
613         return NULL;
614     return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
615 }
616 
617 void *
PyMem_Realloc(void * ptr,size_t new_size)618 PyMem_Realloc(void *ptr, size_t new_size)
619 {
620     /* see PyMem_RawMalloc() */
621     if (new_size > (size_t)PY_SSIZE_T_MAX)
622         return NULL;
623     return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
624 }
625 
626 void
PyMem_Free(void * ptr)627 PyMem_Free(void *ptr)
628 {
629     _PyMem.free(_PyMem.ctx, ptr);
630 }
631 
632 
633 wchar_t*
_PyMem_RawWcsdup(const wchar_t * str)634 _PyMem_RawWcsdup(const wchar_t *str)
635 {
636     assert(str != NULL);
637 
638     size_t len = wcslen(str);
639     if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
640         return NULL;
641     }
642 
643     size_t size = (len + 1) * sizeof(wchar_t);
644     wchar_t *str2 = PyMem_RawMalloc(size);
645     if (str2 == NULL) {
646         return NULL;
647     }
648 
649     memcpy(str2, str, size);
650     return str2;
651 }
652 
653 char *
_PyMem_RawStrdup(const char * str)654 _PyMem_RawStrdup(const char *str)
655 {
656     assert(str != NULL);
657     size_t size = strlen(str) + 1;
658     char *copy = PyMem_RawMalloc(size);
659     if (copy == NULL) {
660         return NULL;
661     }
662     memcpy(copy, str, size);
663     return copy;
664 }
665 
666 char *
_PyMem_Strdup(const char * str)667 _PyMem_Strdup(const char *str)
668 {
669     assert(str != NULL);
670     size_t size = strlen(str) + 1;
671     char *copy = PyMem_Malloc(size);
672     if (copy == NULL) {
673         return NULL;
674     }
675     memcpy(copy, str, size);
676     return copy;
677 }
678 
679 void *
PyObject_Malloc(size_t size)680 PyObject_Malloc(size_t size)
681 {
682     /* see PyMem_RawMalloc() */
683     if (size > (size_t)PY_SSIZE_T_MAX)
684         return NULL;
685     return _PyObject.malloc(_PyObject.ctx, size);
686 }
687 
688 void *
PyObject_Calloc(size_t nelem,size_t elsize)689 PyObject_Calloc(size_t nelem, size_t elsize)
690 {
691     /* see PyMem_RawMalloc() */
692     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
693         return NULL;
694     return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
695 }
696 
697 void *
PyObject_Realloc(void * ptr,size_t new_size)698 PyObject_Realloc(void *ptr, size_t new_size)
699 {
700     /* see PyMem_RawMalloc() */
701     if (new_size > (size_t)PY_SSIZE_T_MAX)
702         return NULL;
703     return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
704 }
705 
706 void
PyObject_Free(void * ptr)707 PyObject_Free(void *ptr)
708 {
709     _PyObject.free(_PyObject.ctx, ptr);
710 }
711 
712 
713 /* If we're using GCC, use __builtin_expect() to reduce overhead of
714    the valgrind checks */
715 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
716 #  define UNLIKELY(value) __builtin_expect((value), 0)
717 #  define LIKELY(value) __builtin_expect((value), 1)
718 #else
719 #  define UNLIKELY(value) (value)
720 #  define LIKELY(value) (value)
721 #endif
722 
723 #ifdef WITH_PYMALLOC
724 
725 #ifdef WITH_VALGRIND
726 #include <valgrind/valgrind.h>
727 
728 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
729 static int running_on_valgrind = -1;
730 #endif
731 
732 
733 /* An object allocator for Python.
734 
735    Here is an introduction to the layers of the Python memory architecture,
736    showing where the object allocator is actually used (layer +2), It is
737    called for every object allocation and deallocation (PyObject_New/Del),
738    unless the object-specific allocators implement a proprietary allocation
739    scheme (ex.: ints use a simple free list). This is also the place where
740    the cyclic garbage collector operates selectively on container objects.
741 
742 
743     Object-specific allocators
744     _____   ______   ______       ________
745    [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
746 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
747     _______________________________       |                           |
748    [   Python's object allocator   ]      |                           |
749 +2 | ####### Object memory ####### | <------ Internal buffers ------> |
750     ______________________________________________________________    |
751    [          Python's raw memory allocator (PyMem_ API)          ]   |
752 +1 | <----- Python memory (under PyMem manager's control) ------> |   |
753     __________________________________________________________________
754    [    Underlying general-purpose allocator (ex: C library malloc)   ]
755  0 | <------ Virtual memory allocated for the python process -------> |
756 
757    =========================================================================
758     _______________________________________________________________________
759    [                OS-specific Virtual Memory Manager (VMM)               ]
760 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
761     __________________________________   __________________________________
762    [                                  ] [                                  ]
763 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
764 
765 */
766 /*==========================================================================*/
767 
768 /* A fast, special-purpose memory allocator for small blocks, to be used
769    on top of a general-purpose malloc -- heavily based on previous art. */
770 
771 /* Vladimir Marangozov -- August 2000 */
772 
773 /*
774  * "Memory management is where the rubber meets the road -- if we do the wrong
775  * thing at any level, the results will not be good. And if we don't make the
776  * levels work well together, we are in serious trouble." (1)
777  *
778  * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
779  *    "Dynamic Storage Allocation: A Survey and Critical Review",
780  *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
781  */
782 
783 /* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
784 
785 /*==========================================================================*/
786 
787 /*
788  * Allocation strategy abstract:
789  *
790  * For small requests, the allocator sub-allocates <Big> blocks of memory.
791  * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
792  * system's allocator.
793  *
794  * Small requests are grouped in size classes spaced 8 bytes apart, due
795  * to the required valid alignment of the returned address. Requests of
796  * a particular size are serviced from memory pools of 4K (one VMM page).
797  * Pools are fragmented on demand and contain free lists of blocks of one
798  * particular size class. In other words, there is a fixed-size allocator
799  * for each size class. Free pools are shared by the different allocators
800  * thus minimizing the space reserved for a particular size class.
801  *
802  * This allocation strategy is a variant of what is known as "simple
803  * segregated storage based on array of free lists". The main drawback of
804  * simple segregated storage is that we might end up with lot of reserved
805  * memory for the different free lists, which degenerate in time. To avoid
806  * this, we partition each free list in pools and we share dynamically the
807  * reserved space between all free lists. This technique is quite efficient
808  * for memory intensive programs which allocate mainly small-sized blocks.
809  *
810  * For small requests we have the following table:
811  *
812  * Request in bytes     Size of allocated block      Size class idx
813  * ----------------------------------------------------------------
814  *        1-8                     8                       0
815  *        9-16                   16                       1
816  *       17-24                   24                       2
817  *       25-32                   32                       3
818  *       33-40                   40                       4
819  *       41-48                   48                       5
820  *       49-56                   56                       6
821  *       57-64                   64                       7
822  *       65-72                   72                       8
823  *        ...                   ...                     ...
824  *      497-504                 504                      62
825  *      505-512                 512                      63
826  *
827  *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
828  *      allocator.
829  */
830 
831 /*==========================================================================*/
832 
833 /*
834  * -- Main tunable settings section --
835  */
836 
837 /*
838  * Alignment of addresses returned to the user. 8-bytes alignment works
839  * on most current architectures (with 32-bit or 64-bit address busses).
840  * The alignment value is also used for grouping small requests in size
841  * classes spaced ALIGNMENT bytes apart.
842  *
843  * You shouldn't change this unless you know what you are doing.
844  */
845 
846 #if SIZEOF_VOID_P > 4
847 #define ALIGNMENT              16               /* must be 2^N */
848 #define ALIGNMENT_SHIFT         4
849 #else
850 #define ALIGNMENT               8               /* must be 2^N */
851 #define ALIGNMENT_SHIFT         3
852 #endif
853 
854 /* Return the number of bytes in size class I, as a uint. */
855 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
856 
857 /*
858  * Max size threshold below which malloc requests are considered to be
859  * small enough in order to use preallocated memory pools. You can tune
860  * this value according to your application behaviour and memory needs.
861  *
862  * Note: a size threshold of 512 guarantees that newly created dictionaries
863  * will be allocated from preallocated memory pools on 64-bit.
864  *
865  * The following invariants must hold:
866  *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
867  *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
868  *
869  * Although not required, for better performance and space efficiency,
870  * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
871  */
872 #define SMALL_REQUEST_THRESHOLD 512
873 #define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
874 
875 /*
876  * The system's VMM page size can be obtained on most unices with a
877  * getpagesize() call or deduced from various header files. To make
878  * things simpler, we assume that it is 4K, which is OK for most systems.
879  * It is probably better if this is the native page size, but it doesn't
880  * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
881  * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
882  * violation fault.  4K is apparently OK for all the platforms that python
883  * currently targets.
884  */
885 #define SYSTEM_PAGE_SIZE        (4 * 1024)
886 #define SYSTEM_PAGE_SIZE_MASK   (SYSTEM_PAGE_SIZE - 1)
887 
888 /*
889  * Maximum amount of memory managed by the allocator for small requests.
890  */
891 #ifdef WITH_MEMORY_LIMITS
892 #ifndef SMALL_MEMORY_LIMIT
893 #define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
894 #endif
895 #endif
896 
897 /*
898  * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
899  * on a page boundary. This is a reserved virtual address space for the
900  * current process (obtained through a malloc()/mmap() call). In no way this
901  * means that the memory arenas will be used entirely. A malloc(<Big>) is
902  * usually an address range reservation for <Big> bytes, unless all pages within
903  * this space are referenced subsequently. So malloc'ing big blocks and not
904  * using them does not mean "wasting memory". It's an addressable range
905  * wastage...
906  *
907  * Arenas are allocated with mmap() on systems supporting anonymous memory
908  * mappings to reduce heap fragmentation.
909  */
910 #define ARENA_SIZE              (256 << 10)     /* 256KB */
911 
912 #ifdef WITH_MEMORY_LIMITS
913 #define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
914 #endif
915 
916 /*
917  * Size of the pools used for small blocks. Should be a power of 2,
918  * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
919  */
920 #define POOL_SIZE               SYSTEM_PAGE_SIZE        /* must be 2^N */
921 #define POOL_SIZE_MASK          SYSTEM_PAGE_SIZE_MASK
922 
923 #define MAX_POOLS_IN_ARENA  (ARENA_SIZE / POOL_SIZE)
924 #if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
925 #   error "arena size not an exact multiple of pool size"
926 #endif
927 
928 /*
929  * -- End of tunable settings section --
930  */
931 
932 /*==========================================================================*/
933 
934 /* When you say memory, my mind reasons in terms of (pointers to) blocks */
935 typedef uint8_t block;
936 
937 /* Pool for small blocks. */
938 struct pool_header {
939     union { block *_padding;
940             uint count; } ref;          /* number of allocated blocks    */
941     block *freeblock;                   /* pool's free list head         */
942     struct pool_header *nextpool;       /* next pool of this size class  */
943     struct pool_header *prevpool;       /* previous pool       ""        */
944     uint arenaindex;                    /* index into arenas of base adr */
945     uint szidx;                         /* block size class index        */
946     uint nextoffset;                    /* bytes to virgin block         */
947     uint maxnextoffset;                 /* largest valid nextoffset      */
948 };
949 
950 typedef struct pool_header *poolp;
951 
952 /* Record keeping for arenas. */
953 struct arena_object {
954     /* The address of the arena, as returned by malloc.  Note that 0
955      * will never be returned by a successful malloc, and is used
956      * here to mark an arena_object that doesn't correspond to an
957      * allocated arena.
958      */
959     uintptr_t address;
960 
961     /* Pool-aligned pointer to the next pool to be carved off. */
962     block* pool_address;
963 
964     /* The number of available pools in the arena:  free pools + never-
965      * allocated pools.
966      */
967     uint nfreepools;
968 
969     /* The total number of pools in the arena, whether or not available. */
970     uint ntotalpools;
971 
972     /* Singly-linked list of available pools. */
973     struct pool_header* freepools;
974 
975     /* Whenever this arena_object is not associated with an allocated
976      * arena, the nextarena member is used to link all unassociated
977      * arena_objects in the singly-linked `unused_arena_objects` list.
978      * The prevarena member is unused in this case.
979      *
980      * When this arena_object is associated with an allocated arena
981      * with at least one available pool, both members are used in the
982      * doubly-linked `usable_arenas` list, which is maintained in
983      * increasing order of `nfreepools` values.
984      *
985      * Else this arena_object is associated with an allocated arena
986      * all of whose pools are in use.  `nextarena` and `prevarena`
987      * are both meaningless in this case.
988      */
989     struct arena_object* nextarena;
990     struct arena_object* prevarena;
991 };
992 
993 #define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
994 
995 #define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
996 
997 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
998 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
999 
1000 /* Return total number of blocks in pool of size index I, as a uint. */
1001 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
1002 
1003 /*==========================================================================*/
1004 
1005 /*
1006  * Pool table -- headed, circular, doubly-linked lists of partially used pools.
1007 
1008 This is involved.  For an index i, usedpools[i+i] is the header for a list of
1009 all partially used pools holding small blocks with "size class idx" i. So
1010 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
1011 16, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
1012 
1013 Pools are carved off an arena's highwater mark (an arena_object's pool_address
1014 member) as needed.  Once carved off, a pool is in one of three states forever
1015 after:
1016 
1017 used == partially used, neither empty nor full
1018     At least one block in the pool is currently allocated, and at least one
1019     block in the pool is not currently allocated (note this implies a pool
1020     has room for at least two blocks).
1021     This is a pool's initial state, as a pool is created only when malloc
1022     needs space.
1023     The pool holds blocks of a fixed size, and is in the circular list headed
1024     at usedpools[i] (see above).  It's linked to the other used pools of the
1025     same size class via the pool_header's nextpool and prevpool members.
1026     If all but one block is currently allocated, a malloc can cause a
1027     transition to the full state.  If all but one block is not currently
1028     allocated, a free can cause a transition to the empty state.
1029 
1030 full == all the pool's blocks are currently allocated
1031     On transition to full, a pool is unlinked from its usedpools[] list.
1032     It's not linked to from anything then anymore, and its nextpool and
1033     prevpool members are meaningless until it transitions back to used.
1034     A free of a block in a full pool puts the pool back in the used state.
1035     Then it's linked in at the front of the appropriate usedpools[] list, so
1036     that the next allocation for its size class will reuse the freed block.
1037 
1038 empty == all the pool's blocks are currently available for allocation
1039     On transition to empty, a pool is unlinked from its usedpools[] list,
1040     and linked to the front of its arena_object's singly-linked freepools list,
1041     via its nextpool member.  The prevpool member has no meaning in this case.
1042     Empty pools have no inherent size class:  the next time a malloc finds
1043     an empty list in usedpools[], it takes the first pool off of freepools.
1044     If the size class needed happens to be the same as the size class the pool
1045     last had, some pool initialization can be skipped.
1046 
1047 
1048 Block Management
1049 
1050 Blocks within pools are again carved out as needed.  pool->freeblock points to
1051 the start of a singly-linked list of free blocks within the pool.  When a
1052 block is freed, it's inserted at the front of its pool's freeblock list.  Note
1053 that the available blocks in a pool are *not* linked all together when a pool
1054 is initialized.  Instead only "the first two" (lowest addresses) blocks are
1055 set up, returning the first such block, and setting pool->freeblock to a
1056 one-block list holding the second such block.  This is consistent with that
1057 pymalloc strives at all levels (arena, pool, and block) never to touch a piece
1058 of memory until it's actually needed.
1059 
1060 So long as a pool is in the used state, we're certain there *is* a block
1061 available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
1062 points to the end of the free list before we've carved the entire pool into
1063 blocks, that means we simply haven't yet gotten to one of the higher-address
1064 blocks.  The offset from the pool_header to the start of "the next" virgin
1065 block is stored in the pool_header nextoffset member, and the largest value
1066 of nextoffset that makes sense is stored in the maxnextoffset member when a
1067 pool is initialized.  All the blocks in a pool have been passed out at least
1068 once when and only when nextoffset > maxnextoffset.
1069 
1070 
1071 Major obscurity:  While the usedpools vector is declared to have poolp
1072 entries, it doesn't really.  It really contains two pointers per (conceptual)
1073 poolp entry, the nextpool and prevpool members of a pool_header.  The
1074 excruciating initialization code below fools C so that
1075 
1076     usedpool[i+i]
1077 
1078 "acts like" a genuine poolp, but only so long as you only reference its
1079 nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
1080 compensating for that a pool_header's nextpool and prevpool members
1081 immediately follow a pool_header's first two members:
1082 
1083     union { block *_padding;
1084             uint count; } ref;
1085     block *freeblock;
1086 
1087 each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
1088 contains is a fudged-up pointer p such that *if* C believes it's a poolp
1089 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
1090 circular list is empty).
1091 
1092 It's unclear why the usedpools setup is so convoluted.  It could be to
1093 minimize the amount of cache required to hold this heavily-referenced table
1094 (which only *needs* the two interpool pointer members of a pool_header). OTOH,
1095 referencing code has to remember to "double the index" and doing so isn't
1096 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
1097 on that C doesn't insert any padding anywhere in a pool_header at or before
1098 the prevpool member.
1099 **************************************************************************** */
1100 
1101 #define PTA(x)  ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
1102 #define PT(x)   PTA(x), PTA(x)
1103 
1104 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
1105     PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
1106 #if NB_SMALL_SIZE_CLASSES > 8
1107     , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
1108 #if NB_SMALL_SIZE_CLASSES > 16
1109     , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
1110 #if NB_SMALL_SIZE_CLASSES > 24
1111     , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
1112 #if NB_SMALL_SIZE_CLASSES > 32
1113     , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
1114 #if NB_SMALL_SIZE_CLASSES > 40
1115     , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
1116 #if NB_SMALL_SIZE_CLASSES > 48
1117     , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
1118 #if NB_SMALL_SIZE_CLASSES > 56
1119     , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
1120 #if NB_SMALL_SIZE_CLASSES > 64
1121 #error "NB_SMALL_SIZE_CLASSES should be less than 64"
1122 #endif /* NB_SMALL_SIZE_CLASSES > 64 */
1123 #endif /* NB_SMALL_SIZE_CLASSES > 56 */
1124 #endif /* NB_SMALL_SIZE_CLASSES > 48 */
1125 #endif /* NB_SMALL_SIZE_CLASSES > 40 */
1126 #endif /* NB_SMALL_SIZE_CLASSES > 32 */
1127 #endif /* NB_SMALL_SIZE_CLASSES > 24 */
1128 #endif /* NB_SMALL_SIZE_CLASSES > 16 */
1129 #endif /* NB_SMALL_SIZE_CLASSES >  8 */
1130 };
1131 
1132 /*==========================================================================
1133 Arena management.
1134 
1135 `arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
1136 which may not be currently used (== they're arena_objects that aren't
1137 currently associated with an allocated arena).  Note that arenas proper are
1138 separately malloc'ed.
1139 
1140 Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
1141 we do try to free() arenas, and use some mild heuristic strategies to increase
1142 the likelihood that arenas eventually can be freed.
1143 
1144 unused_arena_objects
1145 
1146     This is a singly-linked list of the arena_objects that are currently not
1147     being used (no arena is associated with them).  Objects are taken off the
1148     head of the list in new_arena(), and are pushed on the head of the list in
1149     PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
1150     is on this list if and only if its .address member is 0.
1151 
1152 usable_arenas
1153 
1154     This is a doubly-linked list of the arena_objects associated with arenas
1155     that have pools available.  These pools are either waiting to be reused,
1156     or have not been used before.  The list is sorted to have the most-
1157     allocated arenas first (ascending order based on the nfreepools member).
1158     This means that the next allocation will come from a heavily used arena,
1159     which gives the nearly empty arenas a chance to be returned to the system.
1160     In my unscientific tests this dramatically improved the number of arenas
1161     that could be freed.
1162 
1163 Note that an arena_object associated with an arena all of whose pools are
1164 currently in use isn't on either list.
1165 
1166 Changed in Python 3.8:  keeping usable_arenas sorted by number of free pools
1167 used to be done by one-at-a-time linear search when an arena's number of
1168 free pools changed.  That could, overall, consume time quadratic in the
1169 number of arenas.  That didn't really matter when there were only a few
1170 hundred arenas (typical!), but could be a timing disaster when there were
1171 hundreds of thousands.  See bpo-37029.
1172 
1173 Now we have a vector of "search fingers" to eliminate the need to search:
1174 nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas
1175 with nfp free pools.  This is NULL if and only if there is no arena with
1176 nfp free pools in usable_arenas.
1177 */
1178 
1179 /* Array of objects used to track chunks of memory (arenas). */
1180 static struct arena_object* arenas = NULL;
1181 /* Number of slots currently allocated in the `arenas` vector. */
1182 static uint maxarenas = 0;
1183 
1184 /* The head of the singly-linked, NULL-terminated list of available
1185  * arena_objects.
1186  */
1187 static struct arena_object* unused_arena_objects = NULL;
1188 
1189 /* The head of the doubly-linked, NULL-terminated at each end, list of
1190  * arena_objects associated with arenas that have pools available.
1191  */
1192 static struct arena_object* usable_arenas = NULL;
1193 
1194 /* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */
1195 static struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1] = { NULL };
1196 
1197 /* How many arena_objects do we initially allocate?
1198  * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
1199  * `arenas` vector.
1200  */
1201 #define INITIAL_ARENA_OBJECTS 16
1202 
1203 /* Number of arenas allocated that haven't been free()'d. */
1204 static size_t narenas_currently_allocated = 0;
1205 
1206 /* Total number of times malloc() called to allocate an arena. */
1207 static size_t ntimes_arena_allocated = 0;
1208 /* High water mark (max value ever seen) for narenas_currently_allocated. */
1209 static size_t narenas_highwater = 0;
1210 
1211 static Py_ssize_t raw_allocated_blocks;
1212 
1213 Py_ssize_t
_Py_GetAllocatedBlocks(void)1214 _Py_GetAllocatedBlocks(void)
1215 {
1216     Py_ssize_t n = raw_allocated_blocks;
1217     /* add up allocated blocks for used pools */
1218     for (uint i = 0; i < maxarenas; ++i) {
1219         /* Skip arenas which are not allocated. */
1220         if (arenas[i].address == 0) {
1221             continue;
1222         }
1223 
1224         uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenas[i].address, POOL_SIZE);
1225 
1226         /* visit every pool in the arena */
1227         assert(base <= (uintptr_t) arenas[i].pool_address);
1228         for (; base < (uintptr_t) arenas[i].pool_address; base += POOL_SIZE) {
1229             poolp p = (poolp)base;
1230             n += p->ref.count;
1231         }
1232     }
1233     return n;
1234 }
1235 
1236 
1237 /* Allocate a new arena.  If we run out of memory, return NULL.  Else
1238  * allocate a new arena, and return the address of an arena_object
1239  * describing the new arena.  It's expected that the caller will set
1240  * `usable_arenas` to the return value.
1241  */
1242 static struct arena_object*
new_arena(void)1243 new_arena(void)
1244 {
1245     struct arena_object* arenaobj;
1246     uint excess;        /* number of bytes above pool alignment */
1247     void *address;
1248     static int debug_stats = -1;
1249 
1250     if (debug_stats == -1) {
1251         const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1252         debug_stats = (opt != NULL && *opt != '\0');
1253     }
1254     if (debug_stats)
1255         _PyObject_DebugMallocStats(stderr);
1256 
1257     if (unused_arena_objects == NULL) {
1258         uint i;
1259         uint numarenas;
1260         size_t nbytes;
1261 
1262         /* Double the number of arena objects on each allocation.
1263          * Note that it's possible for `numarenas` to overflow.
1264          */
1265         numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1266         if (numarenas <= maxarenas)
1267             return NULL;                /* overflow */
1268 #if SIZEOF_SIZE_T <= SIZEOF_INT
1269         if (numarenas > SIZE_MAX / sizeof(*arenas))
1270             return NULL;                /* overflow */
1271 #endif
1272         nbytes = numarenas * sizeof(*arenas);
1273         arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1274         if (arenaobj == NULL)
1275             return NULL;
1276         arenas = arenaobj;
1277 
1278         /* We might need to fix pointers that were copied.  However,
1279          * new_arena only gets called when all the pages in the
1280          * previous arenas are full.  Thus, there are *no* pointers
1281          * into the old array. Thus, we don't have to worry about
1282          * invalid pointers.  Just to be sure, some asserts:
1283          */
1284         assert(usable_arenas == NULL);
1285         assert(unused_arena_objects == NULL);
1286 
1287         /* Put the new arenas on the unused_arena_objects list. */
1288         for (i = maxarenas; i < numarenas; ++i) {
1289             arenas[i].address = 0;              /* mark as unassociated */
1290             arenas[i].nextarena = i < numarenas - 1 ?
1291                                    &arenas[i+1] : NULL;
1292         }
1293 
1294         /* Update globals. */
1295         unused_arena_objects = &arenas[maxarenas];
1296         maxarenas = numarenas;
1297     }
1298 
1299     /* Take the next available arena object off the head of the list. */
1300     assert(unused_arena_objects != NULL);
1301     arenaobj = unused_arena_objects;
1302     unused_arena_objects = arenaobj->nextarena;
1303     assert(arenaobj->address == 0);
1304     address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1305     if (address == NULL) {
1306         /* The allocation failed: return NULL after putting the
1307          * arenaobj back.
1308          */
1309         arenaobj->nextarena = unused_arena_objects;
1310         unused_arena_objects = arenaobj;
1311         return NULL;
1312     }
1313     arenaobj->address = (uintptr_t)address;
1314 
1315     ++narenas_currently_allocated;
1316     ++ntimes_arena_allocated;
1317     if (narenas_currently_allocated > narenas_highwater)
1318         narenas_highwater = narenas_currently_allocated;
1319     arenaobj->freepools = NULL;
1320     /* pool_address <- first pool-aligned address in the arena
1321        nfreepools <- number of whole pools that fit after alignment */
1322     arenaobj->pool_address = (block*)arenaobj->address;
1323     arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1324     excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1325     if (excess != 0) {
1326         --arenaobj->nfreepools;
1327         arenaobj->pool_address += POOL_SIZE - excess;
1328     }
1329     arenaobj->ntotalpools = arenaobj->nfreepools;
1330 
1331     return arenaobj;
1332 }
1333 
1334 
1335 /*
1336 address_in_range(P, POOL)
1337 
1338 Return true if and only if P is an address that was allocated by pymalloc.
1339 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1340 (the caller is asked to compute this because the macro expands POOL more than
1341 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1342 variable and pass the latter to the macro; because address_in_range is
1343 called on every alloc/realloc/free, micro-efficiency is important here).
1344 
1345 Tricky:  Let B be the arena base address associated with the pool, B =
1346 arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
1347 
1348     B <= P < B + ARENA_SIZE
1349 
1350 Subtracting B throughout, this is true iff
1351 
1352     0 <= P-B < ARENA_SIZE
1353 
1354 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1355 
1356 Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
1357 before the first arena has been allocated.  `arenas` is still NULL in that
1358 case.  We're relying on that maxarenas is also 0 in that case, so that
1359 (POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
1360 into a NULL arenas.
1361 
1362 Details:  given P and POOL, the arena_object corresponding to P is AO =
1363 arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
1364 stores, etc), POOL is the correct address of P's pool, AO.address is the
1365 correct base address of the pool's arena, and P must be within ARENA_SIZE of
1366 AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
1367 (NULL)).  Therefore address_in_range correctly reports that obmalloc
1368 controls P.
1369 
1370 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1371 call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
1372 in this case -- it may even be uninitialized trash.  If the trash arenaindex
1373 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1374 control P.
1375 
1376 Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
1377 allocated arena, obmalloc controls all the memory in slice AO.address :
1378 AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
1379 so P doesn't lie in that slice, so the macro correctly reports that P is not
1380 controlled by obmalloc.
1381 
1382 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1383 arena_object (one not currently associated with an allocated arena),
1384 AO.address is 0, and the second test in the macro reduces to:
1385 
1386     P < ARENA_SIZE
1387 
1388 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1389 that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
1390 of the test still passes, and the third clause (AO.address != 0) is necessary
1391 to get the correct result:  AO.address is 0 in this case, so the macro
1392 correctly reports that P is not controlled by obmalloc (despite that P lies in
1393 slice AO.address : AO.address + ARENA_SIZE).
1394 
1395 Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
1396 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1397 corresponded to a currently-allocated arena, so the "P is not controlled by
1398 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1399 was impossible.
1400 
1401 Note that the logic is excruciating, and reading up possibly uninitialized
1402 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1403 creates problems for some memory debuggers.  The overwhelming advantage is
1404 that this test determines whether an arbitrary address is controlled by
1405 obmalloc in a small constant time, independent of the number of arenas
1406 obmalloc controls.  Since this test is needed at every entry point, it's
1407 extremely desirable that it be this fast.
1408 */
1409 
1410 static bool _Py_NO_SANITIZE_ADDRESS
1411             _Py_NO_SANITIZE_THREAD
1412             _Py_NO_SANITIZE_MEMORY
address_in_range(void * p,poolp pool)1413 address_in_range(void *p, poolp pool)
1414 {
1415     // Since address_in_range may be reading from memory which was not allocated
1416     // by Python, it is important that pool->arenaindex is read only once, as
1417     // another thread may be concurrently modifying the value without holding
1418     // the GIL. The following dance forces the compiler to read pool->arenaindex
1419     // only once.
1420     uint arenaindex = *((volatile uint *)&pool->arenaindex);
1421     return arenaindex < maxarenas &&
1422         (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1423         arenas[arenaindex].address != 0;
1424 }
1425 
1426 
1427 /*==========================================================================*/
1428 
1429 // Called when freelist is exhausted.  Extend the freelist if there is
1430 // space for a block.  Otherwise, remove this pool from usedpools.
1431 static void
pymalloc_pool_extend(poolp pool,uint size)1432 pymalloc_pool_extend(poolp pool, uint size)
1433 {
1434     if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
1435         /* There is room for another block. */
1436         pool->freeblock = (block*)pool + pool->nextoffset;
1437         pool->nextoffset += INDEX2SIZE(size);
1438         *(block **)(pool->freeblock) = NULL;
1439         return;
1440     }
1441 
1442     /* Pool is full, unlink from used pools. */
1443     poolp next;
1444     next = pool->nextpool;
1445     pool = pool->prevpool;
1446     next->prevpool = pool;
1447     pool->nextpool = next;
1448 }
1449 
1450 /* called when pymalloc_alloc can not allocate a block from usedpool.
1451  * This function takes new pool and allocate a block from it.
1452  */
1453 static void*
allocate_from_new_pool(uint size)1454 allocate_from_new_pool(uint size)
1455 {
1456     /* There isn't a pool of the right size class immediately
1457      * available:  use a free pool.
1458      */
1459     if (UNLIKELY(usable_arenas == NULL)) {
1460         /* No arena has a free pool:  allocate a new arena. */
1461 #ifdef WITH_MEMORY_LIMITS
1462         if (narenas_currently_allocated >= MAX_ARENAS) {
1463             return NULL;
1464         }
1465 #endif
1466         usable_arenas = new_arena();
1467         if (usable_arenas == NULL) {
1468             return NULL;
1469         }
1470         usable_arenas->nextarena = usable_arenas->prevarena = NULL;
1471         assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1472         nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
1473     }
1474     assert(usable_arenas->address != 0);
1475 
1476     /* This arena already had the smallest nfreepools value, so decreasing
1477      * nfreepools doesn't change that, and we don't need to rearrange the
1478      * usable_arenas list.  However, if the arena becomes wholly allocated,
1479      * we need to remove its arena_object from usable_arenas.
1480      */
1481     assert(usable_arenas->nfreepools > 0);
1482     if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
1483         /* It's the last of this size, so there won't be any. */
1484         nfp2lasta[usable_arenas->nfreepools] = NULL;
1485     }
1486     /* If any free pools will remain, it will be the new smallest. */
1487     if (usable_arenas->nfreepools > 1) {
1488         assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1489         nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1490     }
1491 
1492     /* Try to get a cached free pool. */
1493     poolp pool = usable_arenas->freepools;
1494     if (LIKELY(pool != NULL)) {
1495         /* Unlink from cached pools. */
1496         usable_arenas->freepools = pool->nextpool;
1497         usable_arenas->nfreepools--;
1498         if (UNLIKELY(usable_arenas->nfreepools == 0)) {
1499             /* Wholly allocated:  remove. */
1500             assert(usable_arenas->freepools == NULL);
1501             assert(usable_arenas->nextarena == NULL ||
1502                    usable_arenas->nextarena->prevarena ==
1503                    usable_arenas);
1504             usable_arenas = usable_arenas->nextarena;
1505             if (usable_arenas != NULL) {
1506                 usable_arenas->prevarena = NULL;
1507                 assert(usable_arenas->address != 0);
1508             }
1509         }
1510         else {
1511             /* nfreepools > 0:  it must be that freepools
1512              * isn't NULL, or that we haven't yet carved
1513              * off all the arena's pools for the first
1514              * time.
1515              */
1516             assert(usable_arenas->freepools != NULL ||
1517                    usable_arenas->pool_address <=
1518                    (block*)usable_arenas->address +
1519                        ARENA_SIZE - POOL_SIZE);
1520         }
1521     }
1522     else {
1523         /* Carve off a new pool. */
1524         assert(usable_arenas->nfreepools > 0);
1525         assert(usable_arenas->freepools == NULL);
1526         pool = (poolp)usable_arenas->pool_address;
1527         assert((block*)pool <= (block*)usable_arenas->address +
1528                                  ARENA_SIZE - POOL_SIZE);
1529         pool->arenaindex = (uint)(usable_arenas - arenas);
1530         assert(&arenas[pool->arenaindex] == usable_arenas);
1531         pool->szidx = DUMMY_SIZE_IDX;
1532         usable_arenas->pool_address += POOL_SIZE;
1533         --usable_arenas->nfreepools;
1534 
1535         if (usable_arenas->nfreepools == 0) {
1536             assert(usable_arenas->nextarena == NULL ||
1537                    usable_arenas->nextarena->prevarena ==
1538                    usable_arenas);
1539             /* Unlink the arena:  it is completely allocated. */
1540             usable_arenas = usable_arenas->nextarena;
1541             if (usable_arenas != NULL) {
1542                 usable_arenas->prevarena = NULL;
1543                 assert(usable_arenas->address != 0);
1544             }
1545         }
1546     }
1547 
1548     /* Frontlink to used pools. */
1549     block *bp;
1550     poolp next = usedpools[size + size]; /* == prev */
1551     pool->nextpool = next;
1552     pool->prevpool = next;
1553     next->nextpool = pool;
1554     next->prevpool = pool;
1555     pool->ref.count = 1;
1556     if (pool->szidx == size) {
1557         /* Luckily, this pool last contained blocks
1558          * of the same size class, so its header
1559          * and free list are already initialized.
1560          */
1561         bp = pool->freeblock;
1562         assert(bp != NULL);
1563         pool->freeblock = *(block **)bp;
1564         return bp;
1565     }
1566     /*
1567      * Initialize the pool header, set up the free list to
1568      * contain just the second block, and return the first
1569      * block.
1570      */
1571     pool->szidx = size;
1572     size = INDEX2SIZE(size);
1573     bp = (block *)pool + POOL_OVERHEAD;
1574     pool->nextoffset = POOL_OVERHEAD + (size << 1);
1575     pool->maxnextoffset = POOL_SIZE - size;
1576     pool->freeblock = bp + size;
1577     *(block **)(pool->freeblock) = NULL;
1578     return bp;
1579 }
1580 
1581 /* pymalloc allocator
1582 
1583    Return a pointer to newly allocated memory if pymalloc allocated memory.
1584 
1585    Return NULL if pymalloc failed to allocate the memory block: on bigger
1586    requests, on error in the code below (as a last chance to serve the request)
1587    or when the max memory limit has been reached.
1588 */
1589 static inline void*
pymalloc_alloc(void * ctx,size_t nbytes)1590 pymalloc_alloc(void *ctx, size_t nbytes)
1591 {
1592 #ifdef WITH_VALGRIND
1593     if (UNLIKELY(running_on_valgrind == -1)) {
1594         running_on_valgrind = RUNNING_ON_VALGRIND;
1595     }
1596     if (UNLIKELY(running_on_valgrind)) {
1597         return NULL;
1598     }
1599 #endif
1600 
1601     if (UNLIKELY(nbytes == 0)) {
1602         return NULL;
1603     }
1604     if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
1605         return NULL;
1606     }
1607 
1608     uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1609     poolp pool = usedpools[size + size];
1610     block *bp;
1611 
1612     if (LIKELY(pool != pool->nextpool)) {
1613         /*
1614          * There is a used pool for this size class.
1615          * Pick up the head block of its free list.
1616          */
1617         ++pool->ref.count;
1618         bp = pool->freeblock;
1619         assert(bp != NULL);
1620 
1621         if (UNLIKELY((pool->freeblock = *(block **)bp) == NULL)) {
1622             // Reached the end of the free list, try to extend it.
1623             pymalloc_pool_extend(pool, size);
1624         }
1625     }
1626     else {
1627         /* There isn't a pool of the right size class immediately
1628          * available:  use a free pool.
1629          */
1630         bp = allocate_from_new_pool(size);
1631     }
1632 
1633     return (void *)bp;
1634 }
1635 
1636 
1637 static void *
_PyObject_Malloc(void * ctx,size_t nbytes)1638 _PyObject_Malloc(void *ctx, size_t nbytes)
1639 {
1640     void* ptr = pymalloc_alloc(ctx, nbytes);
1641     if (LIKELY(ptr != NULL)) {
1642         return ptr;
1643     }
1644 
1645     ptr = PyMem_RawMalloc(nbytes);
1646     if (ptr != NULL) {
1647         raw_allocated_blocks++;
1648     }
1649     return ptr;
1650 }
1651 
1652 
1653 static void *
_PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)1654 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
1655 {
1656     assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
1657     size_t nbytes = nelem * elsize;
1658 
1659     void* ptr = pymalloc_alloc(ctx, nbytes);
1660     if (LIKELY(ptr != NULL)) {
1661         memset(ptr, 0, nbytes);
1662         return ptr;
1663     }
1664 
1665     ptr = PyMem_RawCalloc(nelem, elsize);
1666     if (ptr != NULL) {
1667         raw_allocated_blocks++;
1668     }
1669     return ptr;
1670 }
1671 
1672 
1673 static void
insert_to_usedpool(poolp pool)1674 insert_to_usedpool(poolp pool)
1675 {
1676     assert(pool->ref.count > 0);            /* else the pool is empty */
1677 
1678     uint size = pool->szidx;
1679     poolp next = usedpools[size + size];
1680     poolp prev = next->prevpool;
1681 
1682     /* insert pool before next:   prev <-> pool <-> next */
1683     pool->nextpool = next;
1684     pool->prevpool = prev;
1685     next->prevpool = pool;
1686     prev->nextpool = pool;
1687 }
1688 
1689 static void
insert_to_freepool(poolp pool)1690 insert_to_freepool(poolp pool)
1691 {
1692     poolp next = pool->nextpool;
1693     poolp prev = pool->prevpool;
1694     next->prevpool = prev;
1695     prev->nextpool = next;
1696 
1697     /* Link the pool to freepools.  This is a singly-linked
1698      * list, and pool->prevpool isn't used there.
1699      */
1700     struct arena_object *ao = &arenas[pool->arenaindex];
1701     pool->nextpool = ao->freepools;
1702     ao->freepools = pool;
1703     uint nf = ao->nfreepools;
1704     /* If this is the rightmost arena with this number of free pools,
1705      * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
1706      * are no arenas in usable_arenas with that value.
1707      */
1708     struct arena_object* lastnf = nfp2lasta[nf];
1709     assert((nf == 0 && lastnf == NULL) ||
1710            (nf > 0 &&
1711             lastnf != NULL &&
1712             lastnf->nfreepools == nf &&
1713             (lastnf->nextarena == NULL ||
1714              nf < lastnf->nextarena->nfreepools)));
1715     if (lastnf == ao) {  /* it is the rightmost */
1716         struct arena_object* p = ao->prevarena;
1717         nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
1718     }
1719     ao->nfreepools = ++nf;
1720 
1721     /* All the rest is arena management.  We just freed
1722      * a pool, and there are 4 cases for arena mgmt:
1723      * 1. If all the pools are free, return the arena to
1724      *    the system free().  Except if this is the last
1725      *    arena in the list, keep it to avoid thrashing:
1726      *    keeping one wholly free arena in the list avoids
1727      *    pathological cases where a simple loop would
1728      *    otherwise provoke needing to allocate and free an
1729      *    arena on every iteration.  See bpo-37257.
1730      * 2. If this is the only free pool in the arena,
1731      *    add the arena back to the `usable_arenas` list.
1732      * 3. If the "next" arena has a smaller count of free
1733      *    pools, we have to "slide this arena right" to
1734      *    restore that usable_arenas is sorted in order of
1735      *    nfreepools.
1736      * 4. Else there's nothing more to do.
1737      */
1738     if (nf == ao->ntotalpools && ao->nextarena != NULL) {
1739         /* Case 1.  First unlink ao from usable_arenas.
1740          */
1741         assert(ao->prevarena == NULL ||
1742                ao->prevarena->address != 0);
1743         assert(ao ->nextarena == NULL ||
1744                ao->nextarena->address != 0);
1745 
1746         /* Fix the pointer in the prevarena, or the
1747          * usable_arenas pointer.
1748          */
1749         if (ao->prevarena == NULL) {
1750             usable_arenas = ao->nextarena;
1751             assert(usable_arenas == NULL ||
1752                    usable_arenas->address != 0);
1753         }
1754         else {
1755             assert(ao->prevarena->nextarena == ao);
1756             ao->prevarena->nextarena =
1757                 ao->nextarena;
1758         }
1759         /* Fix the pointer in the nextarena. */
1760         if (ao->nextarena != NULL) {
1761             assert(ao->nextarena->prevarena == ao);
1762             ao->nextarena->prevarena =
1763                 ao->prevarena;
1764         }
1765         /* Record that this arena_object slot is
1766          * available to be reused.
1767          */
1768         ao->nextarena = unused_arena_objects;
1769         unused_arena_objects = ao;
1770 
1771         /* Free the entire arena. */
1772         _PyObject_Arena.free(_PyObject_Arena.ctx,
1773                              (void *)ao->address, ARENA_SIZE);
1774         ao->address = 0;                        /* mark unassociated */
1775         --narenas_currently_allocated;
1776 
1777         return;
1778     }
1779 
1780     if (nf == 1) {
1781         /* Case 2.  Put ao at the head of
1782          * usable_arenas.  Note that because
1783          * ao->nfreepools was 0 before, ao isn't
1784          * currently on the usable_arenas list.
1785          */
1786         ao->nextarena = usable_arenas;
1787         ao->prevarena = NULL;
1788         if (usable_arenas)
1789             usable_arenas->prevarena = ao;
1790         usable_arenas = ao;
1791         assert(usable_arenas->address != 0);
1792         if (nfp2lasta[1] == NULL) {
1793             nfp2lasta[1] = ao;
1794         }
1795 
1796         return;
1797     }
1798 
1799     /* If this arena is now out of order, we need to keep
1800      * the list sorted.  The list is kept sorted so that
1801      * the "most full" arenas are used first, which allows
1802      * the nearly empty arenas to be completely freed.  In
1803      * a few un-scientific tests, it seems like this
1804      * approach allowed a lot more memory to be freed.
1805      */
1806     /* If this is the only arena with nf, record that. */
1807     if (nfp2lasta[nf] == NULL) {
1808         nfp2lasta[nf] = ao;
1809     } /* else the rightmost with nf doesn't change */
1810     /* If this was the rightmost of the old size, it remains in place. */
1811     if (ao == lastnf) {
1812         /* Case 4.  Nothing to do. */
1813         return;
1814     }
1815     /* If ao were the only arena in the list, the last block would have
1816      * gotten us out.
1817      */
1818     assert(ao->nextarena != NULL);
1819 
1820     /* Case 3:  We have to move the arena towards the end of the list,
1821      * because it has more free pools than the arena to its right.  It needs
1822      * to move to follow lastnf.
1823      * First unlink ao from usable_arenas.
1824      */
1825     if (ao->prevarena != NULL) {
1826         /* ao isn't at the head of the list */
1827         assert(ao->prevarena->nextarena == ao);
1828         ao->prevarena->nextarena = ao->nextarena;
1829     }
1830     else {
1831         /* ao is at the head of the list */
1832         assert(usable_arenas == ao);
1833         usable_arenas = ao->nextarena;
1834     }
1835     ao->nextarena->prevarena = ao->prevarena;
1836     /* And insert after lastnf. */
1837     ao->prevarena = lastnf;
1838     ao->nextarena = lastnf->nextarena;
1839     if (ao->nextarena != NULL) {
1840         ao->nextarena->prevarena = ao;
1841     }
1842     lastnf->nextarena = ao;
1843     /* Verify that the swaps worked. */
1844     assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
1845     assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
1846     assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
1847     assert((usable_arenas == ao && ao->prevarena == NULL)
1848            || ao->prevarena->nextarena == ao);
1849 }
1850 
1851 /* Free a memory block allocated by pymalloc_alloc().
1852    Return 1 if it was freed.
1853    Return 0 if the block was not allocated by pymalloc_alloc(). */
1854 static inline int
pymalloc_free(void * ctx,void * p)1855 pymalloc_free(void *ctx, void *p)
1856 {
1857     assert(p != NULL);
1858 
1859 #ifdef WITH_VALGRIND
1860     if (UNLIKELY(running_on_valgrind > 0)) {
1861         return 0;
1862     }
1863 #endif
1864 
1865     poolp pool = POOL_ADDR(p);
1866     if (UNLIKELY(!address_in_range(p, pool))) {
1867         return 0;
1868     }
1869     /* We allocated this address. */
1870 
1871     /* Link p to the start of the pool's freeblock list.  Since
1872      * the pool had at least the p block outstanding, the pool
1873      * wasn't empty (so it's already in a usedpools[] list, or
1874      * was full and is in no list -- it's not in the freeblocks
1875      * list in any case).
1876      */
1877     assert(pool->ref.count > 0);            /* else it was empty */
1878     block *lastfree = pool->freeblock;
1879     *(block **)p = lastfree;
1880     pool->freeblock = (block *)p;
1881     pool->ref.count--;
1882 
1883     if (UNLIKELY(lastfree == NULL)) {
1884         /* Pool was full, so doesn't currently live in any list:
1885          * link it to the front of the appropriate usedpools[] list.
1886          * This mimics LRU pool usage for new allocations and
1887          * targets optimal filling when several pools contain
1888          * blocks of the same size class.
1889          */
1890         insert_to_usedpool(pool);
1891         return 1;
1892     }
1893 
1894     /* freeblock wasn't NULL, so the pool wasn't full,
1895      * and the pool is in a usedpools[] list.
1896      */
1897     if (LIKELY(pool->ref.count != 0)) {
1898         /* pool isn't empty:  leave it in usedpools */
1899         return 1;
1900     }
1901 
1902     /* Pool is now empty:  unlink from usedpools, and
1903      * link to the front of freepools.  This ensures that
1904      * previously freed pools will be allocated later
1905      * (being not referenced, they are perhaps paged out).
1906      */
1907     insert_to_freepool(pool);
1908     return 1;
1909 }
1910 
1911 
1912 static void
_PyObject_Free(void * ctx,void * p)1913 _PyObject_Free(void *ctx, void *p)
1914 {
1915     /* PyObject_Free(NULL) has no effect */
1916     if (p == NULL) {
1917         return;
1918     }
1919 
1920     if (UNLIKELY(!pymalloc_free(ctx, p))) {
1921         /* pymalloc didn't allocate this address */
1922         PyMem_RawFree(p);
1923         raw_allocated_blocks--;
1924     }
1925 }
1926 
1927 
1928 /* pymalloc realloc.
1929 
1930    If nbytes==0, then as the Python docs promise, we do not treat this like
1931    free(p), and return a non-NULL result.
1932 
1933    Return 1 if pymalloc reallocated memory and wrote the new pointer into
1934    newptr_p.
1935 
1936    Return 0 if pymalloc didn't allocated p. */
1937 static int
pymalloc_realloc(void * ctx,void ** newptr_p,void * p,size_t nbytes)1938 pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
1939 {
1940     void *bp;
1941     poolp pool;
1942     size_t size;
1943 
1944     assert(p != NULL);
1945 
1946 #ifdef WITH_VALGRIND
1947     /* Treat running_on_valgrind == -1 the same as 0 */
1948     if (UNLIKELY(running_on_valgrind > 0)) {
1949         return 0;
1950     }
1951 #endif
1952 
1953     pool = POOL_ADDR(p);
1954     if (!address_in_range(p, pool)) {
1955         /* pymalloc is not managing this block.
1956 
1957            If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
1958            over this block.  However, if we do, we need to copy the valid data
1959            from the C-managed block to one of our blocks, and there's no
1960            portable way to know how much of the memory space starting at p is
1961            valid.
1962 
1963            As bug 1185883 pointed out the hard way, it's possible that the
1964            C-managed block is "at the end" of allocated VM space, so that a
1965            memory fault can occur if we try to copy nbytes bytes starting at p.
1966            Instead we punt: let C continue to manage this block. */
1967         return 0;
1968     }
1969 
1970     /* pymalloc is in charge of this block */
1971     size = INDEX2SIZE(pool->szidx);
1972     if (nbytes <= size) {
1973         /* The block is staying the same or shrinking.
1974 
1975            If it's shrinking, there's a tradeoff: it costs cycles to copy the
1976            block to a smaller size class, but it wastes memory not to copy it.
1977 
1978            The compromise here is to copy on shrink only if at least 25% of
1979            size can be shaved off. */
1980         if (4 * nbytes > 3 * size) {
1981             /* It's the same, or shrinking and new/old > 3/4. */
1982             *newptr_p = p;
1983             return 1;
1984         }
1985         size = nbytes;
1986     }
1987 
1988     bp = _PyObject_Malloc(ctx, nbytes);
1989     if (bp != NULL) {
1990         memcpy(bp, p, size);
1991         _PyObject_Free(ctx, p);
1992     }
1993     *newptr_p = bp;
1994     return 1;
1995 }
1996 
1997 
1998 static void *
_PyObject_Realloc(void * ctx,void * ptr,size_t nbytes)1999 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2000 {
2001     void *ptr2;
2002 
2003     if (ptr == NULL) {
2004         return _PyObject_Malloc(ctx, nbytes);
2005     }
2006 
2007     if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
2008         return ptr2;
2009     }
2010 
2011     return PyMem_RawRealloc(ptr, nbytes);
2012 }
2013 
2014 #else   /* ! WITH_PYMALLOC */
2015 
2016 /*==========================================================================*/
2017 /* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2018  * only be used by extensions that are compiled with pymalloc enabled. */
2019 
2020 Py_ssize_t
_Py_GetAllocatedBlocks(void)2021 _Py_GetAllocatedBlocks(void)
2022 {
2023     return 0;
2024 }
2025 
2026 #endif /* WITH_PYMALLOC */
2027 
2028 
2029 /*==========================================================================*/
2030 /* A x-platform debugging allocator.  This doesn't manage memory directly,
2031  * it wraps a real allocator, adding extra debugging info to the memory blocks.
2032  */
2033 
2034 /* Uncomment this define to add the "serialno" field */
2035 /* #define PYMEM_DEBUG_SERIALNO */
2036 
2037 #ifdef PYMEM_DEBUG_SERIALNO
2038 static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2039 
2040 /* serialno is always incremented via calling this routine.  The point is
2041  * to supply a single place to set a breakpoint.
2042  */
2043 static void
bumpserialno(void)2044 bumpserialno(void)
2045 {
2046     ++serialno;
2047 }
2048 #endif
2049 
2050 #define SST SIZEOF_SIZE_T
2051 
2052 #ifdef PYMEM_DEBUG_SERIALNO
2053 #  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2054 #else
2055 #  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2056 #endif
2057 
2058 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2059 static size_t
read_size_t(const void * p)2060 read_size_t(const void *p)
2061 {
2062     const uint8_t *q = (const uint8_t *)p;
2063     size_t result = *q++;
2064     int i;
2065 
2066     for (i = SST; --i > 0; ++q)
2067         result = (result << 8) | *q;
2068     return result;
2069 }
2070 
2071 /* Write n as a big-endian size_t, MSB at address p, LSB at
2072  * p + sizeof(size_t) - 1.
2073  */
2074 static void
write_size_t(void * p,size_t n)2075 write_size_t(void *p, size_t n)
2076 {
2077     uint8_t *q = (uint8_t *)p + SST - 1;
2078     int i;
2079 
2080     for (i = SST; --i >= 0; --q) {
2081         *q = (uint8_t)(n & 0xff);
2082         n >>= 8;
2083     }
2084 }
2085 
2086 /* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
2087    fills them with useful stuff, here calling the underlying malloc's result p:
2088 
2089 p[0: S]
2090     Number of bytes originally asked for.  This is a size_t, big-endian (easier
2091     to read in a memory dump).
2092 p[S]
2093     API ID.  See PEP 445.  This is a character, but seems undocumented.
2094 p[S+1: 2*S]
2095     Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
2096 p[2*S: 2*S+n]
2097     The requested memory, filled with copies of PYMEM_CLEANBYTE.
2098     Used to catch reference to uninitialized memory.
2099     &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2100     handled the request itself.
2101 p[2*S+n: 2*S+n+S]
2102     Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
2103 p[2*S+n+S: 2*S+n+2*S]
2104     A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2105     and _PyMem_DebugRealloc.
2106     This is a big-endian size_t.
2107     If "bad memory" is detected later, the serial number gives an
2108     excellent way to set a breakpoint on the next run, to capture the
2109     instant at which this block was passed out.
2110 
2111 If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2112 for 3 * S extra bytes, and omits the last serialno field.
2113 */
2114 
2115 static void *
_PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)2116 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2117 {
2118     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2119     uint8_t *p;           /* base address of malloc'ed pad block */
2120     uint8_t *data;        /* p + 2*SST == pointer to data bytes */
2121     uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2122     size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2123 
2124     if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2125         /* integer overflow: can't represent total as a Py_ssize_t */
2126         return NULL;
2127     }
2128     total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2129 
2130     /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2131                 ^--- p    ^--- data   ^--- tail
2132        S: nbytes stored as size_t
2133        I: API identifier (1 byte)
2134        F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2135        C: Clean bytes used later to store actual data
2136        N: Serial number stored as size_t
2137 
2138        If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2139        is omitted. */
2140 
2141     if (use_calloc) {
2142         p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2143     }
2144     else {
2145         p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2146     }
2147     if (p == NULL) {
2148         return NULL;
2149     }
2150     data = p + 2*SST;
2151 
2152 #ifdef PYMEM_DEBUG_SERIALNO
2153     bumpserialno();
2154 #endif
2155 
2156     /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2157     write_size_t(p, nbytes);
2158     p[SST] = (uint8_t)api->api_id;
2159     memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2160 
2161     if (nbytes > 0 && !use_calloc) {
2162         memset(data, PYMEM_CLEANBYTE, nbytes);
2163     }
2164 
2165     /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2166     tail = data + nbytes;
2167     memset(tail, PYMEM_FORBIDDENBYTE, SST);
2168 #ifdef PYMEM_DEBUG_SERIALNO
2169     write_size_t(tail + SST, serialno);
2170 #endif
2171 
2172     return data;
2173 }
2174 
2175 static void *
_PyMem_DebugRawMalloc(void * ctx,size_t nbytes)2176 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2177 {
2178     return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2179 }
2180 
2181 static void *
_PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)2182 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2183 {
2184     size_t nbytes;
2185     assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2186     nbytes = nelem * elsize;
2187     return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2188 }
2189 
2190 
2191 /* The debug free first checks the 2*SST bytes on each end for sanity (in
2192    particular, that the FORBIDDENBYTEs with the api ID are still intact).
2193    Then fills the original bytes with PYMEM_DEADBYTE.
2194    Then calls the underlying free.
2195 */
2196 static void
_PyMem_DebugRawFree(void * ctx,void * p)2197 _PyMem_DebugRawFree(void *ctx, void *p)
2198 {
2199     /* PyMem_Free(NULL) has no effect */
2200     if (p == NULL) {
2201         return;
2202     }
2203 
2204     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2205     uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
2206     size_t nbytes;
2207 
2208     _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2209     nbytes = read_size_t(q);
2210     nbytes += PYMEM_DEBUG_EXTRA_BYTES;
2211     memset(q, PYMEM_DEADBYTE, nbytes);
2212     api->alloc.free(api->alloc.ctx, q);
2213 }
2214 
2215 
2216 static void *
_PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)2217 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2218 {
2219     if (p == NULL) {
2220         return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2221     }
2222 
2223     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2224     uint8_t *head;        /* base address of malloc'ed pad block */
2225     uint8_t *data;        /* pointer to data bytes */
2226     uint8_t *r;
2227     uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2228     size_t total;         /* 2 * SST + nbytes + 2 * SST */
2229     size_t original_nbytes;
2230 #define ERASED_SIZE 64
2231     uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
2232 
2233     _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2234 
2235     data = (uint8_t *)p;
2236     head = data - 2*SST;
2237     original_nbytes = read_size_t(head);
2238     if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2239         /* integer overflow: can't represent total as a Py_ssize_t */
2240         return NULL;
2241     }
2242     total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2243 
2244     tail = data + original_nbytes;
2245 #ifdef PYMEM_DEBUG_SERIALNO
2246     size_t block_serialno = read_size_t(tail + SST);
2247 #endif
2248     /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2249        ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2250      */
2251     if (original_nbytes <= sizeof(save)) {
2252         memcpy(save, data, original_nbytes);
2253         memset(data - 2 * SST, PYMEM_DEADBYTE,
2254                original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2255     }
2256     else {
2257         memcpy(save, data, ERASED_SIZE);
2258         memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
2259         memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2260         memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
2261                ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
2262     }
2263 
2264     /* Resize and add decorations. */
2265     r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2266     if (r == NULL) {
2267         /* if realloc() failed: rewrite header and footer which have
2268            just been erased */
2269         nbytes = original_nbytes;
2270     }
2271     else {
2272         head = r;
2273 #ifdef PYMEM_DEBUG_SERIALNO
2274         bumpserialno();
2275         block_serialno = serialno;
2276 #endif
2277     }
2278     data = head + 2*SST;
2279 
2280     write_size_t(head, nbytes);
2281     head[SST] = (uint8_t)api->api_id;
2282     memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2283 
2284     tail = data + nbytes;
2285     memset(tail, PYMEM_FORBIDDENBYTE, SST);
2286 #ifdef PYMEM_DEBUG_SERIALNO
2287     write_size_t(tail + SST, block_serialno);
2288 #endif
2289 
2290     /* Restore saved bytes. */
2291     if (original_nbytes <= sizeof(save)) {
2292         memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2293     }
2294     else {
2295         size_t i = original_nbytes - ERASED_SIZE;
2296         memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2297         if (nbytes > i) {
2298             memcpy(data + i, &save[ERASED_SIZE],
2299                    Py_MIN(nbytes - i, ERASED_SIZE));
2300         }
2301     }
2302 
2303     if (r == NULL) {
2304         return NULL;
2305     }
2306 
2307     if (nbytes > original_nbytes) {
2308         /* growing: mark new extra memory clean */
2309         memset(data + original_nbytes, PYMEM_CLEANBYTE,
2310                nbytes - original_nbytes);
2311     }
2312 
2313     return data;
2314 }
2315 
2316 static inline void
_PyMem_DebugCheckGIL(const char * func)2317 _PyMem_DebugCheckGIL(const char *func)
2318 {
2319     if (!PyGILState_Check()) {
2320         _Py_FatalErrorFunc(func,
2321                            "Python memory allocator called "
2322                            "without holding the GIL");
2323     }
2324 }
2325 
2326 static void *
_PyMem_DebugMalloc(void * ctx,size_t nbytes)2327 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
2328 {
2329     _PyMem_DebugCheckGIL(__func__);
2330     return _PyMem_DebugRawMalloc(ctx, nbytes);
2331 }
2332 
2333 static void *
_PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)2334 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2335 {
2336     _PyMem_DebugCheckGIL(__func__);
2337     return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2338 }
2339 
2340 
2341 static void
_PyMem_DebugFree(void * ctx,void * ptr)2342 _PyMem_DebugFree(void *ctx, void *ptr)
2343 {
2344     _PyMem_DebugCheckGIL(__func__);
2345     _PyMem_DebugRawFree(ctx, ptr);
2346 }
2347 
2348 
2349 static void *
_PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)2350 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2351 {
2352     _PyMem_DebugCheckGIL(__func__);
2353     return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2354 }
2355 
2356 /* Check the forbidden bytes on both ends of the memory allocated for p.
2357  * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2358  * and call Py_FatalError to kill the program.
2359  * The API id, is also checked.
2360  */
2361 static void
_PyMem_DebugCheckAddress(const char * func,char api,const void * p)2362 _PyMem_DebugCheckAddress(const char *func, char api, const void *p)
2363 {
2364     assert(p != NULL);
2365 
2366     const uint8_t *q = (const uint8_t *)p;
2367     size_t nbytes;
2368     const uint8_t *tail;
2369     int i;
2370     char id;
2371 
2372     /* Check the API id */
2373     id = (char)q[-SST];
2374     if (id != api) {
2375         _PyObject_DebugDumpAddress(p);
2376         _Py_FatalErrorFormat(func,
2377                              "bad ID: Allocated using API '%c', "
2378                              "verified using API '%c'",
2379                              id, api);
2380     }
2381 
2382     /* Check the stuff at the start of p first:  if there's underwrite
2383      * corruption, the number-of-bytes field may be nuts, and checking
2384      * the tail could lead to a segfault then.
2385      */
2386     for (i = SST-1; i >= 1; --i) {
2387         if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2388             _PyObject_DebugDumpAddress(p);
2389             _Py_FatalErrorFunc(func, "bad leading pad byte");
2390         }
2391     }
2392 
2393     nbytes = read_size_t(q - 2*SST);
2394     tail = q + nbytes;
2395     for (i = 0; i < SST; ++i) {
2396         if (tail[i] != PYMEM_FORBIDDENBYTE) {
2397             _PyObject_DebugDumpAddress(p);
2398             _Py_FatalErrorFunc(func, "bad trailing pad byte");
2399         }
2400     }
2401 }
2402 
2403 /* Display info to stderr about the memory block at p. */
2404 static void
_PyObject_DebugDumpAddress(const void * p)2405 _PyObject_DebugDumpAddress(const void *p)
2406 {
2407     const uint8_t *q = (const uint8_t *)p;
2408     const uint8_t *tail;
2409     size_t nbytes;
2410     int i;
2411     int ok;
2412     char id;
2413 
2414     fprintf(stderr, "Debug memory block at address p=%p:", p);
2415     if (p == NULL) {
2416         fprintf(stderr, "\n");
2417         return;
2418     }
2419     id = (char)q[-SST];
2420     fprintf(stderr, " API '%c'\n", id);
2421 
2422     nbytes = read_size_t(q - 2*SST);
2423     fprintf(stderr, "    %" PY_FORMAT_SIZE_T "u bytes originally "
2424                     "requested\n", nbytes);
2425 
2426     /* In case this is nuts, check the leading pad bytes first. */
2427     fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
2428     ok = 1;
2429     for (i = 1; i <= SST-1; ++i) {
2430         if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2431             ok = 0;
2432             break;
2433         }
2434     }
2435     if (ok)
2436         fputs("FORBIDDENBYTE, as expected.\n", stderr);
2437     else {
2438         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2439             PYMEM_FORBIDDENBYTE);
2440         for (i = SST-1; i >= 1; --i) {
2441             const uint8_t byte = *(q-i);
2442             fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
2443             if (byte != PYMEM_FORBIDDENBYTE)
2444                 fputs(" *** OUCH", stderr);
2445             fputc('\n', stderr);
2446         }
2447 
2448         fputs("    Because memory is corrupted at the start, the "
2449               "count of bytes requested\n"
2450               "       may be bogus, and checking the trailing pad "
2451               "bytes may segfault.\n", stderr);
2452     }
2453 
2454     tail = q + nbytes;
2455     fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
2456     ok = 1;
2457     for (i = 0; i < SST; ++i) {
2458         if (tail[i] != PYMEM_FORBIDDENBYTE) {
2459             ok = 0;
2460             break;
2461         }
2462     }
2463     if (ok)
2464         fputs("FORBIDDENBYTE, as expected.\n", stderr);
2465     else {
2466         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2467                 PYMEM_FORBIDDENBYTE);
2468         for (i = 0; i < SST; ++i) {
2469             const uint8_t byte = tail[i];
2470             fprintf(stderr, "        at tail+%d: 0x%02x",
2471                     i, byte);
2472             if (byte != PYMEM_FORBIDDENBYTE)
2473                 fputs(" *** OUCH", stderr);
2474             fputc('\n', stderr);
2475         }
2476     }
2477 
2478 #ifdef PYMEM_DEBUG_SERIALNO
2479     size_t serial = read_size_t(tail + SST);
2480     fprintf(stderr, "    The block was made by call #%" PY_FORMAT_SIZE_T
2481                     "u to debug malloc/realloc.\n", serial);
2482 #endif
2483 
2484     if (nbytes > 0) {
2485         i = 0;
2486         fputs("    Data at p:", stderr);
2487         /* print up to 8 bytes at the start */
2488         while (q < tail && i < 8) {
2489             fprintf(stderr, " %02x", *q);
2490             ++i;
2491             ++q;
2492         }
2493         /* and up to 8 at the end */
2494         if (q < tail) {
2495             if (tail - q > 8) {
2496                 fputs(" ...", stderr);
2497                 q = tail - 8;
2498             }
2499             while (q < tail) {
2500                 fprintf(stderr, " %02x", *q);
2501                 ++q;
2502             }
2503         }
2504         fputc('\n', stderr);
2505     }
2506     fputc('\n', stderr);
2507 
2508     fflush(stderr);
2509     _PyMem_DumpTraceback(fileno(stderr), p);
2510 }
2511 
2512 
2513 static size_t
printone(FILE * out,const char * msg,size_t value)2514 printone(FILE *out, const char* msg, size_t value)
2515 {
2516     int i, k;
2517     char buf[100];
2518     size_t origvalue = value;
2519 
2520     fputs(msg, out);
2521     for (i = (int)strlen(msg); i < 35; ++i)
2522         fputc(' ', out);
2523     fputc('=', out);
2524 
2525     /* Write the value with commas. */
2526     i = 22;
2527     buf[i--] = '\0';
2528     buf[i--] = '\n';
2529     k = 3;
2530     do {
2531         size_t nextvalue = value / 10;
2532         unsigned int digit = (unsigned int)(value - nextvalue * 10);
2533         value = nextvalue;
2534         buf[i--] = (char)(digit + '0');
2535         --k;
2536         if (k == 0 && value && i >= 0) {
2537             k = 3;
2538             buf[i--] = ',';
2539         }
2540     } while (value && i >= 0);
2541 
2542     while (i >= 0)
2543         buf[i--] = ' ';
2544     fputs(buf, out);
2545 
2546     return origvalue;
2547 }
2548 
2549 void
_PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)2550 _PyDebugAllocatorStats(FILE *out,
2551                        const char *block_name, int num_blocks, size_t sizeof_block)
2552 {
2553     char buf1[128];
2554     char buf2[128];
2555     PyOS_snprintf(buf1, sizeof(buf1),
2556                   "%d %ss * %" PY_FORMAT_SIZE_T "d bytes each",
2557                   num_blocks, block_name, sizeof_block);
2558     PyOS_snprintf(buf2, sizeof(buf2),
2559                   "%48s ", buf1);
2560     (void)printone(out, buf2, num_blocks * sizeof_block);
2561 }
2562 
2563 
2564 #ifdef WITH_PYMALLOC
2565 
2566 #ifdef Py_DEBUG
2567 /* Is target in the list?  The list is traversed via the nextpool pointers.
2568  * The list may be NULL-terminated, or circular.  Return 1 if target is in
2569  * list, else 0.
2570  */
2571 static int
pool_is_in_list(const poolp target,poolp list)2572 pool_is_in_list(const poolp target, poolp list)
2573 {
2574     poolp origlist = list;
2575     assert(target != NULL);
2576     if (list == NULL)
2577         return 0;
2578     do {
2579         if (target == list)
2580             return 1;
2581         list = list->nextpool;
2582     } while (list != NULL && list != origlist);
2583     return 0;
2584 }
2585 #endif
2586 
2587 /* Print summary info to "out" about the state of pymalloc's structures.
2588  * In Py_DEBUG mode, also perform some expensive internal consistency
2589  * checks.
2590  *
2591  * Return 0 if the memory debug hooks are not installed or no statistics was
2592  * written into out, return 1 otherwise.
2593  */
2594 int
_PyObject_DebugMallocStats(FILE * out)2595 _PyObject_DebugMallocStats(FILE *out)
2596 {
2597     if (!_PyMem_PymallocEnabled()) {
2598         return 0;
2599     }
2600 
2601     uint i;
2602     const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2603     /* # of pools, allocated blocks, and free blocks per class index */
2604     size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2605     size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2606     size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2607     /* total # of allocated bytes in used and full pools */
2608     size_t allocated_bytes = 0;
2609     /* total # of available bytes in used pools */
2610     size_t available_bytes = 0;
2611     /* # of free pools + pools not yet carved out of current arena */
2612     uint numfreepools = 0;
2613     /* # of bytes for arena alignment padding */
2614     size_t arena_alignment = 0;
2615     /* # of bytes in used and full pools used for pool_headers */
2616     size_t pool_header_bytes = 0;
2617     /* # of bytes in used and full pools wasted due to quantization,
2618      * i.e. the necessarily leftover space at the ends of used and
2619      * full pools.
2620      */
2621     size_t quantization = 0;
2622     /* # of arenas actually allocated. */
2623     size_t narenas = 0;
2624     /* running total -- should equal narenas * ARENA_SIZE */
2625     size_t total;
2626     char buf[128];
2627 
2628     fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2629             SMALL_REQUEST_THRESHOLD, numclasses);
2630 
2631     for (i = 0; i < numclasses; ++i)
2632         numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2633 
2634     /* Because full pools aren't linked to from anything, it's easiest
2635      * to march over all the arenas.  If we're lucky, most of the memory
2636      * will be living in full pools -- would be a shame to miss them.
2637      */
2638     for (i = 0; i < maxarenas; ++i) {
2639         uint j;
2640         uintptr_t base = arenas[i].address;
2641 
2642         /* Skip arenas which are not allocated. */
2643         if (arenas[i].address == (uintptr_t)NULL)
2644             continue;
2645         narenas += 1;
2646 
2647         numfreepools += arenas[i].nfreepools;
2648 
2649         /* round up to pool alignment */
2650         if (base & (uintptr_t)POOL_SIZE_MASK) {
2651             arena_alignment += POOL_SIZE;
2652             base &= ~(uintptr_t)POOL_SIZE_MASK;
2653             base += POOL_SIZE;
2654         }
2655 
2656         /* visit every pool in the arena */
2657         assert(base <= (uintptr_t) arenas[i].pool_address);
2658         for (j = 0; base < (uintptr_t) arenas[i].pool_address;
2659              ++j, base += POOL_SIZE) {
2660             poolp p = (poolp)base;
2661             const uint sz = p->szidx;
2662             uint freeblocks;
2663 
2664             if (p->ref.count == 0) {
2665                 /* currently unused */
2666 #ifdef Py_DEBUG
2667                 assert(pool_is_in_list(p, arenas[i].freepools));
2668 #endif
2669                 continue;
2670             }
2671             ++numpools[sz];
2672             numblocks[sz] += p->ref.count;
2673             freeblocks = NUMBLOCKS(sz) - p->ref.count;
2674             numfreeblocks[sz] += freeblocks;
2675 #ifdef Py_DEBUG
2676             if (freeblocks > 0)
2677                 assert(pool_is_in_list(p, usedpools[sz + sz]));
2678 #endif
2679         }
2680     }
2681     assert(narenas == narenas_currently_allocated);
2682 
2683     fputc('\n', out);
2684     fputs("class   size   num pools   blocks in use  avail blocks\n"
2685           "-----   ----   ---------   -------------  ------------\n",
2686           out);
2687 
2688     for (i = 0; i < numclasses; ++i) {
2689         size_t p = numpools[i];
2690         size_t b = numblocks[i];
2691         size_t f = numfreeblocks[i];
2692         uint size = INDEX2SIZE(i);
2693         if (p == 0) {
2694             assert(b == 0 && f == 0);
2695             continue;
2696         }
2697         fprintf(out, "%5u %6u "
2698                         "%11" PY_FORMAT_SIZE_T "u "
2699                         "%15" PY_FORMAT_SIZE_T "u "
2700                         "%13" PY_FORMAT_SIZE_T "u\n",
2701                 i, size, p, b, f);
2702         allocated_bytes += b * size;
2703         available_bytes += f * size;
2704         pool_header_bytes += p * POOL_OVERHEAD;
2705         quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
2706     }
2707     fputc('\n', out);
2708 #ifdef PYMEM_DEBUG_SERIALNO
2709     if (_PyMem_DebugEnabled()) {
2710         (void)printone(out, "# times object malloc called", serialno);
2711     }
2712 #endif
2713     (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
2714     (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
2715     (void)printone(out, "# arenas highwater mark", narenas_highwater);
2716     (void)printone(out, "# arenas allocated current", narenas);
2717 
2718     PyOS_snprintf(buf, sizeof(buf),
2719         "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
2720         narenas, ARENA_SIZE);
2721     (void)printone(out, buf, narenas * ARENA_SIZE);
2722 
2723     fputc('\n', out);
2724 
2725     total = printone(out, "# bytes in allocated blocks", allocated_bytes);
2726     total += printone(out, "# bytes in available blocks", available_bytes);
2727 
2728     PyOS_snprintf(buf, sizeof(buf),
2729         "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
2730     total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
2731 
2732     total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
2733     total += printone(out, "# bytes lost to quantization", quantization);
2734     total += printone(out, "# bytes lost to arena alignment", arena_alignment);
2735     (void)printone(out, "Total", total);
2736     return 1;
2737 }
2738 
2739 #endif /* #ifdef WITH_PYMALLOC */
2740