1 /*
2 
3   Reference Cycle Garbage Collection
4   ==================================
5 
6   Neil Schemenauer <nas@arctrix.com>
7 
8   Based on a post on the python-dev list.  Ideas from Guido van Rossum,
9   Eric Tiedemann, and various others.
10 
11   http://www.arctrix.com/nas/python/gc/
12 
13   The following mailing list threads provide a historical perspective on
14   the design of this module.  Note that a fair amount of refinement has
15   occurred since those discussions.
16 
17   http://mail.python.org/pipermail/python-dev/2000-March/002385.html
18   http://mail.python.org/pipermail/python-dev/2000-March/002434.html
19   http://mail.python.org/pipermail/python-dev/2000-March/002497.html
20 
21   For a highlevel view of the collection process, read the collect
22   function.
23 
24 */
25 
26 #include "Python.h"
27 #include "pycore_context.h"
28 #include "pycore_initconfig.h"
29 #include "pycore_interp.h"      // PyInterpreterState.gc
30 #include "pycore_object.h"
31 #include "pycore_pyerrors.h"
32 #include "pycore_pystate.h"     // _PyThreadState_GET()
33 #include "pydtrace.h"
34 #include "pytime.h"             // _PyTime_GetMonotonicClock()
35 
36 typedef struct _gc_runtime_state GCState;
37 
38 /*[clinic input]
39 module gc
40 [clinic start generated code]*/
41 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5c9690ecc842d79]*/
42 
43 
44 #ifdef Py_DEBUG
45 #  define GC_DEBUG
46 #endif
47 
48 #define GC_NEXT _PyGCHead_NEXT
49 #define GC_PREV _PyGCHead_PREV
50 
51 // update_refs() set this bit for all objects in current generation.
52 // subtract_refs() and move_unreachable() uses this to distinguish
53 // visited object is in GCing or not.
54 //
55 // move_unreachable() removes this flag from reachable objects.
56 // Only unreachable objects have this flag.
57 //
58 // No objects in interpreter have this flag after GC ends.
59 #define PREV_MASK_COLLECTING   _PyGC_PREV_MASK_COLLECTING
60 
61 // Lowest bit of _gc_next is used for UNREACHABLE flag.
62 //
63 // This flag represents the object is in unreachable list in move_unreachable()
64 //
65 // Although this flag is used only in move_unreachable(), move_unreachable()
66 // doesn't clear this flag to skip unnecessary iteration.
67 // move_legacy_finalizers() removes this flag instead.
68 // Between them, unreachable list is not normal list and we can not use
69 // most gc_list_* functions for it.
70 #define NEXT_MASK_UNREACHABLE  (1)
71 
72 /* Get an object's GC head */
73 #define AS_GC(o) ((PyGC_Head *)(o)-1)
74 
75 /* Get the object given the GC head */
76 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
77 
78 static inline int
gc_is_collecting(PyGC_Head * g)79 gc_is_collecting(PyGC_Head *g)
80 {
81     return (g->_gc_prev & PREV_MASK_COLLECTING) != 0;
82 }
83 
84 static inline void
gc_clear_collecting(PyGC_Head * g)85 gc_clear_collecting(PyGC_Head *g)
86 {
87     g->_gc_prev &= ~PREV_MASK_COLLECTING;
88 }
89 
90 static inline Py_ssize_t
gc_get_refs(PyGC_Head * g)91 gc_get_refs(PyGC_Head *g)
92 {
93     return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT);
94 }
95 
96 static inline void
gc_set_refs(PyGC_Head * g,Py_ssize_t refs)97 gc_set_refs(PyGC_Head *g, Py_ssize_t refs)
98 {
99     g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK)
100         | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
101 }
102 
103 static inline void
gc_reset_refs(PyGC_Head * g,Py_ssize_t refs)104 gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
105 {
106     g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED)
107         | PREV_MASK_COLLECTING
108         | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
109 }
110 
111 static inline void
gc_decref(PyGC_Head * g)112 gc_decref(PyGC_Head *g)
113 {
114     _PyObject_ASSERT_WITH_MSG(FROM_GC(g),
115                               gc_get_refs(g) > 0,
116                               "refcount is too small");
117     g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
118 }
119 
120 /* set for debugging information */
121 #define DEBUG_STATS             (1<<0) /* print collection statistics */
122 #define DEBUG_COLLECTABLE       (1<<1) /* print collectable objects */
123 #define DEBUG_UNCOLLECTABLE     (1<<2) /* print uncollectable objects */
124 #define DEBUG_SAVEALL           (1<<5) /* save all garbage in gc.garbage */
125 #define DEBUG_LEAK              DEBUG_COLLECTABLE | \
126                 DEBUG_UNCOLLECTABLE | \
127                 DEBUG_SAVEALL
128 
129 #define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head)
130 
131 void
_PyGC_InitState(GCState * gcstate)132 _PyGC_InitState(GCState *gcstate)
133 {
134     gcstate->enabled = 1; /* automatic collection enabled? */
135 
136 #define _GEN_HEAD(n) GEN_HEAD(gcstate, n)
137     struct gc_generation generations[NUM_GENERATIONS] = {
138         /* PyGC_Head,                                    threshold,    count */
139         {{(uintptr_t)_GEN_HEAD(0), (uintptr_t)_GEN_HEAD(0)},   700,        0},
140         {{(uintptr_t)_GEN_HEAD(1), (uintptr_t)_GEN_HEAD(1)},   10,         0},
141         {{(uintptr_t)_GEN_HEAD(2), (uintptr_t)_GEN_HEAD(2)},   10,         0},
142     };
143     for (int i = 0; i < NUM_GENERATIONS; i++) {
144         gcstate->generations[i] = generations[i];
145     };
146     gcstate->generation0 = GEN_HEAD(gcstate, 0);
147     struct gc_generation permanent_generation = {
148           {(uintptr_t)&gcstate->permanent_generation.head,
149            (uintptr_t)&gcstate->permanent_generation.head}, 0, 0
150     };
151     gcstate->permanent_generation = permanent_generation;
152 }
153 
154 
155 PyStatus
_PyGC_Init(PyThreadState * tstate)156 _PyGC_Init(PyThreadState *tstate)
157 {
158     GCState *gcstate = &tstate->interp->gc;
159     if (gcstate->garbage == NULL) {
160         gcstate->garbage = PyList_New(0);
161         if (gcstate->garbage == NULL) {
162             return _PyStatus_NO_MEMORY();
163         }
164     }
165     return _PyStatus_OK();
166 }
167 
168 
169 /*
170 _gc_prev values
171 ---------------
172 
173 Between collections, _gc_prev is used for doubly linked list.
174 
175 Lowest two bits of _gc_prev are used for flags.
176 PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends
177 or _PyObject_GC_UNTRACK() is called.
178 
179 During a collection, _gc_prev is temporary used for gc_refs, and the gc list
180 is singly linked until _gc_prev is restored.
181 
182 gc_refs
183     At the start of a collection, update_refs() copies the true refcount
184     to gc_refs, for each object in the generation being collected.
185     subtract_refs() then adjusts gc_refs so that it equals the number of
186     times an object is referenced directly from outside the generation
187     being collected.
188 
189 PREV_MASK_COLLECTING
190     Objects in generation being collected are marked PREV_MASK_COLLECTING in
191     update_refs().
192 
193 
194 _gc_next values
195 ---------------
196 
197 _gc_next takes these values:
198 
199 0
200     The object is not tracked
201 
202 != 0
203     Pointer to the next object in the GC list.
204     Additionally, lowest bit is used temporary for
205     NEXT_MASK_UNREACHABLE flag described below.
206 
207 NEXT_MASK_UNREACHABLE
208     move_unreachable() then moves objects not reachable (whether directly or
209     indirectly) from outside the generation into an "unreachable" set and
210     set this flag.
211 
212     Objects that are found to be reachable have gc_refs set to 1.
213     When this flag is set for the reachable object, the object must be in
214     "unreachable" set.
215     The flag is unset and the object is moved back to "reachable" set.
216 
217     move_legacy_finalizers() will remove this flag from "unreachable" set.
218 */
219 
220 /*** list functions ***/
221 
222 static inline void
gc_list_init(PyGC_Head * list)223 gc_list_init(PyGC_Head *list)
224 {
225     // List header must not have flags.
226     // We can assign pointer by simple cast.
227     list->_gc_prev = (uintptr_t)list;
228     list->_gc_next = (uintptr_t)list;
229 }
230 
231 static inline int
gc_list_is_empty(PyGC_Head * list)232 gc_list_is_empty(PyGC_Head *list)
233 {
234     return (list->_gc_next == (uintptr_t)list);
235 }
236 
237 /* Append `node` to `list`. */
238 static inline void
gc_list_append(PyGC_Head * node,PyGC_Head * list)239 gc_list_append(PyGC_Head *node, PyGC_Head *list)
240 {
241     PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
242 
243     // last <-> node
244     _PyGCHead_SET_PREV(node, last);
245     _PyGCHead_SET_NEXT(last, node);
246 
247     // node <-> list
248     _PyGCHead_SET_NEXT(node, list);
249     list->_gc_prev = (uintptr_t)node;
250 }
251 
252 /* Remove `node` from the gc list it's currently in. */
253 static inline void
gc_list_remove(PyGC_Head * node)254 gc_list_remove(PyGC_Head *node)
255 {
256     PyGC_Head *prev = GC_PREV(node);
257     PyGC_Head *next = GC_NEXT(node);
258 
259     _PyGCHead_SET_NEXT(prev, next);
260     _PyGCHead_SET_PREV(next, prev);
261 
262     node->_gc_next = 0; /* object is not currently tracked */
263 }
264 
265 /* Move `node` from the gc list it's currently in (which is not explicitly
266  * named here) to the end of `list`.  This is semantically the same as
267  * gc_list_remove(node) followed by gc_list_append(node, list).
268  */
269 static void
gc_list_move(PyGC_Head * node,PyGC_Head * list)270 gc_list_move(PyGC_Head *node, PyGC_Head *list)
271 {
272     /* Unlink from current list. */
273     PyGC_Head *from_prev = GC_PREV(node);
274     PyGC_Head *from_next = GC_NEXT(node);
275     _PyGCHead_SET_NEXT(from_prev, from_next);
276     _PyGCHead_SET_PREV(from_next, from_prev);
277 
278     /* Relink at end of new list. */
279     // list must not have flags.  So we can skip macros.
280     PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev;
281     _PyGCHead_SET_PREV(node, to_prev);
282     _PyGCHead_SET_NEXT(to_prev, node);
283     list->_gc_prev = (uintptr_t)node;
284     _PyGCHead_SET_NEXT(node, list);
285 }
286 
287 /* append list `from` onto list `to`; `from` becomes an empty list */
288 static void
gc_list_merge(PyGC_Head * from,PyGC_Head * to)289 gc_list_merge(PyGC_Head *from, PyGC_Head *to)
290 {
291     assert(from != to);
292     if (!gc_list_is_empty(from)) {
293         PyGC_Head *to_tail = GC_PREV(to);
294         PyGC_Head *from_head = GC_NEXT(from);
295         PyGC_Head *from_tail = GC_PREV(from);
296         assert(from_head != from);
297         assert(from_tail != from);
298 
299         _PyGCHead_SET_NEXT(to_tail, from_head);
300         _PyGCHead_SET_PREV(from_head, to_tail);
301 
302         _PyGCHead_SET_NEXT(from_tail, to);
303         _PyGCHead_SET_PREV(to, from_tail);
304     }
305     gc_list_init(from);
306 }
307 
308 static Py_ssize_t
gc_list_size(PyGC_Head * list)309 gc_list_size(PyGC_Head *list)
310 {
311     PyGC_Head *gc;
312     Py_ssize_t n = 0;
313     for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
314         n++;
315     }
316     return n;
317 }
318 
319 /* Walk the list and mark all objects as non-collecting */
320 static inline void
gc_list_clear_collecting(PyGC_Head * collectable)321 gc_list_clear_collecting(PyGC_Head *collectable)
322 {
323     PyGC_Head *gc;
324     for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) {
325         gc_clear_collecting(gc);
326     }
327 }
328 
329 /* Append objects in a GC list to a Python list.
330  * Return 0 if all OK, < 0 if error (out of memory for list)
331  */
332 static int
append_objects(PyObject * py_list,PyGC_Head * gc_list)333 append_objects(PyObject *py_list, PyGC_Head *gc_list)
334 {
335     PyGC_Head *gc;
336     for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
337         PyObject *op = FROM_GC(gc);
338         if (op != py_list) {
339             if (PyList_Append(py_list, op)) {
340                 return -1; /* exception */
341             }
342         }
343     }
344     return 0;
345 }
346 
347 // Constants for validate_list's flags argument.
348 enum flagstates {collecting_clear_unreachable_clear,
349                  collecting_clear_unreachable_set,
350                  collecting_set_unreachable_clear,
351                  collecting_set_unreachable_set};
352 
353 #ifdef GC_DEBUG
354 // validate_list checks list consistency.  And it works as document
355 // describing when flags are expected to be set / unset.
356 // `head` must be a doubly-linked gc list, although it's fine (expected!) if
357 // the prev and next pointers are "polluted" with flags.
358 // What's checked:
359 // - The `head` pointers are not polluted.
360 // - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all
361 //   `set or clear, as specified by the 'flags' argument.
362 // - The prev and next pointers are mutually consistent.
363 static void
validate_list(PyGC_Head * head,enum flagstates flags)364 validate_list(PyGC_Head *head, enum flagstates flags)
365 {
366     assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0);
367     assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
368     uintptr_t prev_value = 0, next_value = 0;
369     switch (flags) {
370         case collecting_clear_unreachable_clear:
371             break;
372         case collecting_set_unreachable_clear:
373             prev_value = PREV_MASK_COLLECTING;
374             break;
375         case collecting_clear_unreachable_set:
376             next_value = NEXT_MASK_UNREACHABLE;
377             break;
378         case collecting_set_unreachable_set:
379             prev_value = PREV_MASK_COLLECTING;
380             next_value = NEXT_MASK_UNREACHABLE;
381             break;
382         default:
383             assert(! "bad internal flags argument");
384     }
385     PyGC_Head *prev = head;
386     PyGC_Head *gc = GC_NEXT(head);
387     while (gc != head) {
388         PyGC_Head *trueprev = GC_PREV(gc);
389         PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next  & ~NEXT_MASK_UNREACHABLE);
390         assert(truenext != NULL);
391         assert(trueprev == prev);
392         assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
393         assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value);
394         prev = gc;
395         gc = truenext;
396     }
397     assert(prev == GC_PREV(head));
398 }
399 #else
400 #define validate_list(x, y) do{}while(0)
401 #endif
402 
403 /*** end of list stuff ***/
404 
405 
406 /* Set all gc_refs = ob_refcnt.  After this, gc_refs is > 0 and
407  * PREV_MASK_COLLECTING bit is set for all objects in containers.
408  */
409 static void
update_refs(PyGC_Head * containers)410 update_refs(PyGC_Head *containers)
411 {
412     PyGC_Head *gc = GC_NEXT(containers);
413     for (; gc != containers; gc = GC_NEXT(gc)) {
414         gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc)));
415         /* Python's cyclic gc should never see an incoming refcount
416          * of 0:  if something decref'ed to 0, it should have been
417          * deallocated immediately at that time.
418          * Possible cause (if the assert triggers):  a tp_dealloc
419          * routine left a gc-aware object tracked during its teardown
420          * phase, and did something-- or allowed something to happen --
421          * that called back into Python.  gc can trigger then, and may
422          * see the still-tracked dying object.  Before this assert
423          * was added, such mistakes went on to allow gc to try to
424          * delete the object again.  In a debug build, that caused
425          * a mysterious segfault, when _Py_ForgetReference tried
426          * to remove the object from the doubly-linked list of all
427          * objects a second time.  In a release build, an actual
428          * double deallocation occurred, which leads to corruption
429          * of the allocator's internal bookkeeping pointers.  That's
430          * so serious that maybe this should be a release-build
431          * check instead of an assert?
432          */
433         _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0);
434     }
435 }
436 
437 /* A traversal callback for subtract_refs. */
438 static int
visit_decref(PyObject * op,void * parent)439 visit_decref(PyObject *op, void *parent)
440 {
441     _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op));
442 
443     if (_PyObject_IS_GC(op)) {
444         PyGC_Head *gc = AS_GC(op);
445         /* We're only interested in gc_refs for objects in the
446          * generation being collected, which can be recognized
447          * because only they have positive gc_refs.
448          */
449         if (gc_is_collecting(gc)) {
450             gc_decref(gc);
451         }
452     }
453     return 0;
454 }
455 
456 /* Subtract internal references from gc_refs.  After this, gc_refs is >= 0
457  * for all objects in containers, and is GC_REACHABLE for all tracked gc
458  * objects not in containers.  The ones with gc_refs > 0 are directly
459  * reachable from outside containers, and so can't be collected.
460  */
461 static void
subtract_refs(PyGC_Head * containers)462 subtract_refs(PyGC_Head *containers)
463 {
464     traverseproc traverse;
465     PyGC_Head *gc = GC_NEXT(containers);
466     for (; gc != containers; gc = GC_NEXT(gc)) {
467         PyObject *op = FROM_GC(gc);
468         traverse = Py_TYPE(op)->tp_traverse;
469         (void) traverse(FROM_GC(gc),
470                        (visitproc)visit_decref,
471                        op);
472     }
473 }
474 
475 /* A traversal callback for move_unreachable. */
476 static int
visit_reachable(PyObject * op,PyGC_Head * reachable)477 visit_reachable(PyObject *op, PyGC_Head *reachable)
478 {
479     if (!_PyObject_IS_GC(op)) {
480         return 0;
481     }
482 
483     PyGC_Head *gc = AS_GC(op);
484     const Py_ssize_t gc_refs = gc_get_refs(gc);
485 
486     // Ignore objects in other generation.
487     // This also skips objects "to the left" of the current position in
488     // move_unreachable's scan of the 'young' list - they've already been
489     // traversed, and no longer have the PREV_MASK_COLLECTING flag.
490     if (! gc_is_collecting(gc)) {
491         return 0;
492     }
493     // It would be a logic error elsewhere if the collecting flag were set on
494     // an untracked object.
495     assert(gc->_gc_next != 0);
496 
497     if (gc->_gc_next & NEXT_MASK_UNREACHABLE) {
498         /* This had gc_refs = 0 when move_unreachable got
499          * to it, but turns out it's reachable after all.
500          * Move it back to move_unreachable's 'young' list,
501          * and move_unreachable will eventually get to it
502          * again.
503          */
504         // Manually unlink gc from unreachable list because the list functions
505         // don't work right in the presence of NEXT_MASK_UNREACHABLE flags.
506         PyGC_Head *prev = GC_PREV(gc);
507         PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
508         _PyObject_ASSERT(FROM_GC(prev),
509                          prev->_gc_next & NEXT_MASK_UNREACHABLE);
510         _PyObject_ASSERT(FROM_GC(next),
511                          next->_gc_next & NEXT_MASK_UNREACHABLE);
512         prev->_gc_next = gc->_gc_next;  // copy NEXT_MASK_UNREACHABLE
513         _PyGCHead_SET_PREV(next, prev);
514 
515         gc_list_append(gc, reachable);
516         gc_set_refs(gc, 1);
517     }
518     else if (gc_refs == 0) {
519         /* This is in move_unreachable's 'young' list, but
520          * the traversal hasn't yet gotten to it.  All
521          * we need to do is tell move_unreachable that it's
522          * reachable.
523          */
524         gc_set_refs(gc, 1);
525     }
526     /* Else there's nothing to do.
527      * If gc_refs > 0, it must be in move_unreachable's 'young'
528      * list, and move_unreachable will eventually get to it.
529      */
530     else {
531         _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small");
532     }
533     return 0;
534 }
535 
536 /* Move the unreachable objects from young to unreachable.  After this,
537  * all objects in young don't have PREV_MASK_COLLECTING flag and
538  * unreachable have the flag.
539  * All objects in young after this are directly or indirectly reachable
540  * from outside the original young; and all objects in unreachable are
541  * not.
542  *
543  * This function restores _gc_prev pointer.  young and unreachable are
544  * doubly linked list after this function.
545  * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag.
546  * So we can not gc_list_* functions for unreachable until we remove the flag.
547  */
548 static void
move_unreachable(PyGC_Head * young,PyGC_Head * unreachable)549 move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
550 {
551     // previous elem in the young list, used for restore gc_prev.
552     PyGC_Head *prev = young;
553     PyGC_Head *gc = GC_NEXT(young);
554 
555     /* Invariants:  all objects "to the left" of us in young are reachable
556      * (directly or indirectly) from outside the young list as it was at entry.
557      *
558      * All other objects from the original young "to the left" of us are in
559      * unreachable now, and have NEXT_MASK_UNREACHABLE.  All objects to the
560      * left of us in 'young' now have been scanned, and no objects here
561      * or to the right have been scanned yet.
562      */
563 
564     while (gc != young) {
565         if (gc_get_refs(gc)) {
566             /* gc is definitely reachable from outside the
567              * original 'young'.  Mark it as such, and traverse
568              * its pointers to find any other objects that may
569              * be directly reachable from it.  Note that the
570              * call to tp_traverse may append objects to young,
571              * so we have to wait until it returns to determine
572              * the next object to visit.
573              */
574             PyObject *op = FROM_GC(gc);
575             traverseproc traverse = Py_TYPE(op)->tp_traverse;
576             _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0,
577                                       "refcount is too small");
578             // NOTE: visit_reachable may change gc->_gc_next when
579             // young->_gc_prev == gc.  Don't do gc = GC_NEXT(gc) before!
580             (void) traverse(op,
581                     (visitproc)visit_reachable,
582                     (void *)young);
583             // relink gc_prev to prev element.
584             _PyGCHead_SET_PREV(gc, prev);
585             // gc is not COLLECTING state after here.
586             gc_clear_collecting(gc);
587             prev = gc;
588         }
589         else {
590             /* This *may* be unreachable.  To make progress,
591              * assume it is.  gc isn't directly reachable from
592              * any object we've already traversed, but may be
593              * reachable from an object we haven't gotten to yet.
594              * visit_reachable will eventually move gc back into
595              * young if that's so, and we'll see it again.
596              */
597             // Move gc to unreachable.
598             // No need to gc->next->prev = prev because it is single linked.
599             prev->_gc_next = gc->_gc_next;
600 
601             // We can't use gc_list_append() here because we use
602             // NEXT_MASK_UNREACHABLE here.
603             PyGC_Head *last = GC_PREV(unreachable);
604             // NOTE: Since all objects in unreachable set has
605             // NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
606             // But this may pollute the unreachable list head's 'next' pointer
607             // too. That's semantically senseless but expedient here - the
608             // damage is repaired when this function ends.
609             last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
610             _PyGCHead_SET_PREV(gc, last);
611             gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
612             unreachable->_gc_prev = (uintptr_t)gc;
613         }
614         gc = (PyGC_Head*)prev->_gc_next;
615     }
616     // young->_gc_prev must be last element remained in the list.
617     young->_gc_prev = (uintptr_t)prev;
618     // don't let the pollution of the list head's next pointer leak
619     unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE;
620 }
621 
622 static void
untrack_tuples(PyGC_Head * head)623 untrack_tuples(PyGC_Head *head)
624 {
625     PyGC_Head *next, *gc = GC_NEXT(head);
626     while (gc != head) {
627         PyObject *op = FROM_GC(gc);
628         next = GC_NEXT(gc);
629         if (PyTuple_CheckExact(op)) {
630             _PyTuple_MaybeUntrack(op);
631         }
632         gc = next;
633     }
634 }
635 
636 /* Try to untrack all currently tracked dictionaries */
637 static void
untrack_dicts(PyGC_Head * head)638 untrack_dicts(PyGC_Head *head)
639 {
640     PyGC_Head *next, *gc = GC_NEXT(head);
641     while (gc != head) {
642         PyObject *op = FROM_GC(gc);
643         next = GC_NEXT(gc);
644         if (PyDict_CheckExact(op)) {
645             _PyDict_MaybeUntrack(op);
646         }
647         gc = next;
648     }
649 }
650 
651 /* Return true if object has a pre-PEP 442 finalization method. */
652 static int
has_legacy_finalizer(PyObject * op)653 has_legacy_finalizer(PyObject *op)
654 {
655     return Py_TYPE(op)->tp_del != NULL;
656 }
657 
658 /* Move the objects in unreachable with tp_del slots into `finalizers`.
659  *
660  * This function also removes NEXT_MASK_UNREACHABLE flag
661  * from _gc_next in unreachable.
662  */
663 static void
move_legacy_finalizers(PyGC_Head * unreachable,PyGC_Head * finalizers)664 move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
665 {
666     PyGC_Head *gc, *next;
667     assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
668 
669     /* March over unreachable.  Move objects with finalizers into
670      * `finalizers`.
671      */
672     for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
673         PyObject *op = FROM_GC(gc);
674 
675         _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
676         gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
677         next = (PyGC_Head*)gc->_gc_next;
678 
679         if (has_legacy_finalizer(op)) {
680             gc_clear_collecting(gc);
681             gc_list_move(gc, finalizers);
682         }
683     }
684 }
685 
686 static inline void
clear_unreachable_mask(PyGC_Head * unreachable)687 clear_unreachable_mask(PyGC_Head *unreachable)
688 {
689     /* Check that the list head does not have the unreachable bit set */
690     assert(((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0);
691 
692     PyGC_Head *gc, *next;
693     assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
694     for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
695         _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
696         gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
697         next = (PyGC_Head*)gc->_gc_next;
698     }
699     validate_list(unreachable, collecting_set_unreachable_clear);
700 }
701 
702 /* A traversal callback for move_legacy_finalizer_reachable. */
703 static int
visit_move(PyObject * op,PyGC_Head * tolist)704 visit_move(PyObject *op, PyGC_Head *tolist)
705 {
706     if (_PyObject_IS_GC(op)) {
707         PyGC_Head *gc = AS_GC(op);
708         if (gc_is_collecting(gc)) {
709             gc_list_move(gc, tolist);
710             gc_clear_collecting(gc);
711         }
712     }
713     return 0;
714 }
715 
716 /* Move objects that are reachable from finalizers, from the unreachable set
717  * into finalizers set.
718  */
719 static void
move_legacy_finalizer_reachable(PyGC_Head * finalizers)720 move_legacy_finalizer_reachable(PyGC_Head *finalizers)
721 {
722     traverseproc traverse;
723     PyGC_Head *gc = GC_NEXT(finalizers);
724     for (; gc != finalizers; gc = GC_NEXT(gc)) {
725         /* Note that the finalizers list may grow during this. */
726         traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
727         (void) traverse(FROM_GC(gc),
728                         (visitproc)visit_move,
729                         (void *)finalizers);
730     }
731 }
732 
733 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
734  * callback, invoke it if necessary.  Note that it's possible for such
735  * weakrefs to be outside the unreachable set -- indeed, those are precisely
736  * the weakrefs whose callbacks must be invoked.  See gc_weakref.txt for
737  * overview & some details.  Some weakrefs with callbacks may be reclaimed
738  * directly by this routine; the number reclaimed is the return value.  Other
739  * weakrefs with callbacks may be moved into the `old` generation.  Objects
740  * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
741  * unreachable are left at GC_TENTATIVELY_UNREACHABLE.  When this returns,
742  * no object in `unreachable` is weakly referenced anymore.
743  */
744 static int
handle_weakrefs(PyGC_Head * unreachable,PyGC_Head * old)745 handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
746 {
747     PyGC_Head *gc;
748     PyObject *op;               /* generally FROM_GC(gc) */
749     PyWeakReference *wr;        /* generally a cast of op */
750     PyGC_Head wrcb_to_call;     /* weakrefs with callbacks to call */
751     PyGC_Head *next;
752     int num_freed = 0;
753 
754     gc_list_init(&wrcb_to_call);
755 
756     /* Clear all weakrefs to the objects in unreachable.  If such a weakref
757      * also has a callback, move it into `wrcb_to_call` if the callback
758      * needs to be invoked.  Note that we cannot invoke any callbacks until
759      * all weakrefs to unreachable objects are cleared, lest the callback
760      * resurrect an unreachable object via a still-active weakref.  We
761      * make another pass over wrcb_to_call, invoking callbacks, after this
762      * pass completes.
763      */
764     for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
765         PyWeakReference **wrlist;
766 
767         op = FROM_GC(gc);
768         next = GC_NEXT(gc);
769 
770         if (PyWeakref_Check(op)) {
771             /* A weakref inside the unreachable set must be cleared.  If we
772              * allow its callback to execute inside delete_garbage(), it
773              * could expose objects that have tp_clear already called on
774              * them.  Or, it could resurrect unreachable objects.  One way
775              * this can happen is if some container objects do not implement
776              * tp_traverse.  Then, wr_object can be outside the unreachable
777              * set but can be deallocated as a result of breaking the
778              * reference cycle.  If we don't clear the weakref, the callback
779              * will run and potentially cause a crash.  See bpo-38006 for
780              * one example.
781              */
782             _PyWeakref_ClearRef((PyWeakReference *)op);
783         }
784 
785         if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op)))
786             continue;
787 
788         /* It supports weakrefs.  Does it have any? */
789         wrlist = (PyWeakReference **)
790                                 _PyObject_GET_WEAKREFS_LISTPTR(op);
791 
792         /* `op` may have some weakrefs.  March over the list, clear
793          * all the weakrefs, and move the weakrefs with callbacks
794          * that must be called into wrcb_to_call.
795          */
796         for (wr = *wrlist; wr != NULL; wr = *wrlist) {
797             PyGC_Head *wrasgc;                  /* AS_GC(wr) */
798 
799             /* _PyWeakref_ClearRef clears the weakref but leaves
800              * the callback pointer intact.  Obscure:  it also
801              * changes *wrlist.
802              */
803             _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
804             _PyWeakref_ClearRef(wr);
805             _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
806             if (wr->wr_callback == NULL) {
807                 /* no callback */
808                 continue;
809             }
810 
811             /* Headache time.  `op` is going away, and is weakly referenced by
812              * `wr`, which has a callback.  Should the callback be invoked?  If wr
813              * is also trash, no:
814              *
815              * 1. There's no need to call it.  The object and the weakref are
816              *    both going away, so it's legitimate to pretend the weakref is
817              *    going away first.  The user has to ensure a weakref outlives its
818              *    referent if they want a guarantee that the wr callback will get
819              *    invoked.
820              *
821              * 2. It may be catastrophic to call it.  If the callback is also in
822              *    cyclic trash (CT), then although the CT is unreachable from
823              *    outside the current generation, CT may be reachable from the
824              *    callback.  Then the callback could resurrect insane objects.
825              *
826              * Since the callback is never needed and may be unsafe in this case,
827              * wr is simply left in the unreachable set.  Note that because we
828              * already called _PyWeakref_ClearRef(wr), its callback will never
829              * trigger.
830              *
831              * OTOH, if wr isn't part of CT, we should invoke the callback:  the
832              * weakref outlived the trash.  Note that since wr isn't CT in this
833              * case, its callback can't be CT either -- wr acted as an external
834              * root to this generation, and therefore its callback did too.  So
835              * nothing in CT is reachable from the callback either, so it's hard
836              * to imagine how calling it later could create a problem for us.  wr
837              * is moved to wrcb_to_call in this case.
838              */
839             if (gc_is_collecting(AS_GC(wr))) {
840                 /* it should already have been cleared above */
841                 assert(wr->wr_object == Py_None);
842                 continue;
843             }
844 
845             /* Create a new reference so that wr can't go away
846              * before we can process it again.
847              */
848             Py_INCREF(wr);
849 
850             /* Move wr to wrcb_to_call, for the next pass. */
851             wrasgc = AS_GC(wr);
852             assert(wrasgc != next); /* wrasgc is reachable, but
853                                        next isn't, so they can't
854                                        be the same */
855             gc_list_move(wrasgc, &wrcb_to_call);
856         }
857     }
858 
859     /* Invoke the callbacks we decided to honor.  It's safe to invoke them
860      * because they can't reference unreachable objects.
861      */
862     while (! gc_list_is_empty(&wrcb_to_call)) {
863         PyObject *temp;
864         PyObject *callback;
865 
866         gc = (PyGC_Head*)wrcb_to_call._gc_next;
867         op = FROM_GC(gc);
868         _PyObject_ASSERT(op, PyWeakref_Check(op));
869         wr = (PyWeakReference *)op;
870         callback = wr->wr_callback;
871         _PyObject_ASSERT(op, callback != NULL);
872 
873         /* copy-paste of weakrefobject.c's handle_callback() */
874         temp = PyObject_CallOneArg(callback, (PyObject *)wr);
875         if (temp == NULL)
876             PyErr_WriteUnraisable(callback);
877         else
878             Py_DECREF(temp);
879 
880         /* Give up the reference we created in the first pass.  When
881          * op's refcount hits 0 (which it may or may not do right now),
882          * op's tp_dealloc will decref op->wr_callback too.  Note
883          * that the refcount probably will hit 0 now, and because this
884          * weakref was reachable to begin with, gc didn't already
885          * add it to its count of freed objects.  Example:  a reachable
886          * weak value dict maps some key to this reachable weakref.
887          * The callback removes this key->weakref mapping from the
888          * dict, leaving no other references to the weakref (excepting
889          * ours).
890          */
891         Py_DECREF(op);
892         if (wrcb_to_call._gc_next == (uintptr_t)gc) {
893             /* object is still alive -- move it */
894             gc_list_move(gc, old);
895         }
896         else {
897             ++num_freed;
898         }
899     }
900 
901     return num_freed;
902 }
903 
904 static void
debug_cycle(const char * msg,PyObject * op)905 debug_cycle(const char *msg, PyObject *op)
906 {
907     PySys_FormatStderr("gc: %s <%s %p>\n",
908                        msg, Py_TYPE(op)->tp_name, op);
909 }
910 
911 /* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
912  * only from such cycles).
913  * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
914  * garbage list (a Python list), else only the objects in finalizers with
915  * __del__ methods are appended to garbage.  All objects in finalizers are
916  * merged into the old list regardless.
917  */
918 static void
handle_legacy_finalizers(PyThreadState * tstate,GCState * gcstate,PyGC_Head * finalizers,PyGC_Head * old)919 handle_legacy_finalizers(PyThreadState *tstate,
920                          GCState *gcstate,
921                          PyGC_Head *finalizers, PyGC_Head *old)
922 {
923     assert(!_PyErr_Occurred(tstate));
924     assert(gcstate->garbage != NULL);
925 
926     PyGC_Head *gc = GC_NEXT(finalizers);
927     for (; gc != finalizers; gc = GC_NEXT(gc)) {
928         PyObject *op = FROM_GC(gc);
929 
930         if ((gcstate->debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
931             if (PyList_Append(gcstate->garbage, op) < 0) {
932                 _PyErr_Clear(tstate);
933                 break;
934             }
935         }
936     }
937 
938     gc_list_merge(finalizers, old);
939 }
940 
941 /* Run first-time finalizers (if any) on all the objects in collectable.
942  * Note that this may remove some (or even all) of the objects from the
943  * list, due to refcounts falling to 0.
944  */
945 static void
finalize_garbage(PyThreadState * tstate,PyGC_Head * collectable)946 finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
947 {
948     destructor finalize;
949     PyGC_Head seen;
950 
951     /* While we're going through the loop, `finalize(op)` may cause op, or
952      * other objects, to be reclaimed via refcounts falling to zero.  So
953      * there's little we can rely on about the structure of the input
954      * `collectable` list across iterations.  For safety, we always take the
955      * first object in that list and move it to a temporary `seen` list.
956      * If objects vanish from the `collectable` and `seen` lists we don't
957      * care.
958      */
959     gc_list_init(&seen);
960 
961     while (!gc_list_is_empty(collectable)) {
962         PyGC_Head *gc = GC_NEXT(collectable);
963         PyObject *op = FROM_GC(gc);
964         gc_list_move(gc, &seen);
965         if (!_PyGCHead_FINALIZED(gc) &&
966                 (finalize = Py_TYPE(op)->tp_finalize) != NULL) {
967             _PyGCHead_SET_FINALIZED(gc);
968             Py_INCREF(op);
969             finalize(op);
970             assert(!_PyErr_Occurred(tstate));
971             Py_DECREF(op);
972         }
973     }
974     gc_list_merge(&seen, collectable);
975 }
976 
977 /* Break reference cycles by clearing the containers involved.  This is
978  * tricky business as the lists can be changing and we don't know which
979  * objects may be freed.  It is possible I screwed something up here.
980  */
981 static void
delete_garbage(PyThreadState * tstate,GCState * gcstate,PyGC_Head * collectable,PyGC_Head * old)982 delete_garbage(PyThreadState *tstate, GCState *gcstate,
983                PyGC_Head *collectable, PyGC_Head *old)
984 {
985     assert(!_PyErr_Occurred(tstate));
986 
987     while (!gc_list_is_empty(collectable)) {
988         PyGC_Head *gc = GC_NEXT(collectable);
989         PyObject *op = FROM_GC(gc);
990 
991         _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0,
992                                   "refcount is too small");
993 
994         if (gcstate->debug & DEBUG_SAVEALL) {
995             assert(gcstate->garbage != NULL);
996             if (PyList_Append(gcstate->garbage, op) < 0) {
997                 _PyErr_Clear(tstate);
998             }
999         }
1000         else {
1001             inquiry clear;
1002             if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
1003                 Py_INCREF(op);
1004                 (void) clear(op);
1005                 if (_PyErr_Occurred(tstate)) {
1006                     _PyErr_WriteUnraisableMsg("in tp_clear of",
1007                                               (PyObject*)Py_TYPE(op));
1008                 }
1009                 Py_DECREF(op);
1010             }
1011         }
1012         if (GC_NEXT(collectable) == gc) {
1013             /* object is still alive, move it, it may die later */
1014             gc_clear_collecting(gc);
1015             gc_list_move(gc, old);
1016         }
1017     }
1018 }
1019 
1020 /* Clear all free lists
1021  * All free lists are cleared during the collection of the highest generation.
1022  * Allocated items in the free list may keep a pymalloc arena occupied.
1023  * Clearing the free lists may give back memory to the OS earlier.
1024  */
1025 static void
clear_freelists(void)1026 clear_freelists(void)
1027 {
1028     _PyFrame_ClearFreeList();
1029     _PyTuple_ClearFreeList();
1030     _PyFloat_ClearFreeList();
1031     _PyList_ClearFreeList();
1032     _PyDict_ClearFreeList();
1033     _PyAsyncGen_ClearFreeLists();
1034     _PyContext_ClearFreeList();
1035 }
1036 
1037 // Show stats for objects in each generations
1038 static void
show_stats_each_generations(GCState * gcstate)1039 show_stats_each_generations(GCState *gcstate)
1040 {
1041     char buf[100];
1042     size_t pos = 0;
1043 
1044     for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
1045         pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
1046                              " %"PY_FORMAT_SIZE_T"d",
1047                              gc_list_size(GEN_HEAD(gcstate, i)));
1048     }
1049 
1050     PySys_FormatStderr(
1051         "gc: objects in each generation:%s\n"
1052         "gc: objects in permanent generation: %zd\n",
1053         buf, gc_list_size(&gcstate->permanent_generation.head));
1054 }
1055 
1056 /* Deduce which objects among "base" are unreachable from outside the list
1057    and move them to 'unreachable'. The process consist in the following steps:
1058 
1059 1. Copy all reference counts to a different field (gc_prev is used to hold
1060    this copy to save memory).
1061 2. Traverse all objects in "base" and visit all referred objects using
1062    "tp_traverse" and for every visited object, subtract 1 to the reference
1063    count (the one that we copied in the previous step). After this step, all
1064    objects that can be reached directly from outside must have strictly positive
1065    reference count, while all unreachable objects must have a count of exactly 0.
1066 3. Identify all unreachable objects (the ones with 0 reference count) and move
1067    them to the "unreachable" list. This step also needs to move back to "base" all
1068    objects that were initially marked as unreachable but are referred transitively
1069    by the reachable objects (the ones with strictly positive reference count).
1070 
1071 Contracts:
1072 
1073     * The "base" has to be a valid list with no mask set.
1074 
1075     * The "unreachable" list must be uninitialized (this function calls
1076       gc_list_init over 'unreachable').
1077 
1078 IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE
1079 flag set but it does not clear it to skip unnecessary iteration. Before the
1080 flag is cleared (for example, by using 'clear_unreachable_mask' function or
1081 by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
1082 list and we can not use most gc_list_* functions for it. */
1083 static inline void
deduce_unreachable(PyGC_Head * base,PyGC_Head * unreachable)1084 deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
1085     validate_list(base, collecting_clear_unreachable_clear);
1086     /* Using ob_refcnt and gc_refs, calculate which objects in the
1087      * container set are reachable from outside the set (i.e., have a
1088      * refcount greater than 0 when all the references within the
1089      * set are taken into account).
1090      */
1091     update_refs(base);  // gc_prev is used for gc_refs
1092     subtract_refs(base);
1093 
1094     /* Leave everything reachable from outside base in base, and move
1095      * everything else (in base) to unreachable.
1096      *
1097      * NOTE:  This used to move the reachable objects into a reachable
1098      * set instead.  But most things usually turn out to be reachable,
1099      * so it's more efficient to move the unreachable things.  It "sounds slick"
1100      * to move the unreachable objects, until you think about it - the reason it
1101      * pays isn't actually obvious.
1102      *
1103      * Suppose we create objects A, B, C in that order.  They appear in the young
1104      * generation in the same order.  If B points to A, and C to B, and C is
1105      * reachable from outside, then the adjusted refcounts will be 0, 0, and 1
1106      * respectively.
1107      *
1108      * When move_unreachable finds A, A is moved to the unreachable list.  The
1109      * same for B when it's first encountered.  Then C is traversed, B is moved
1110      * _back_ to the reachable list.  B is eventually traversed, and then A is
1111      * moved back to the reachable list.
1112      *
1113      * So instead of not moving at all, the reachable objects B and A are moved
1114      * twice each.  Why is this a win?  A straightforward algorithm to move the
1115      * reachable objects instead would move A, B, and C once each.
1116      *
1117      * The key is that this dance leaves the objects in order C, B, A - it's
1118      * reversed from the original order.  On all _subsequent_ scans, none of
1119      * them will move.  Since most objects aren't in cycles, this can save an
1120      * unbounded number of moves across an unbounded number of later collections.
1121      * It can cost more only the first time the chain is scanned.
1122      *
1123      * Drawback:  move_unreachable is also used to find out what's still trash
1124      * after finalizers may resurrect objects.  In _that_ case most unreachable
1125      * objects will remain unreachable, so it would be more efficient to move
1126      * the reachable objects instead.  But this is a one-time cost, probably not
1127      * worth complicating the code to speed just a little.
1128      */
1129     gc_list_init(unreachable);
1130     move_unreachable(base, unreachable);  // gc_prev is pointer again
1131     validate_list(base, collecting_clear_unreachable_clear);
1132     validate_list(unreachable, collecting_set_unreachable_set);
1133 }
1134 
1135 /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
1136    them to 'old_generation' and placing the rest on 'still_unreachable'.
1137 
1138    Contracts:
1139        * After this function 'unreachable' must not be used anymore and 'still_unreachable'
1140          will contain the objects that did not resurrect.
1141 
1142        * The "still_unreachable" list must be uninitialized (this function calls
1143          gc_list_init over 'still_unreachable').
1144 
1145 IMPORTANT: After a call to this function, the 'still_unreachable' set will have the
1146 PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so
1147 we can skip the expense of clearing the flag to avoid extra iteration. */
1148 static inline void
handle_resurrected_objects(PyGC_Head * unreachable,PyGC_Head * still_unreachable,PyGC_Head * old_generation)1149 handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
1150                            PyGC_Head *old_generation)
1151 {
1152     // Remove the PREV_MASK_COLLECTING from unreachable
1153     // to prepare it for a new call to 'deduce_unreachable'
1154     gc_list_clear_collecting(unreachable);
1155 
1156     // After the call to deduce_unreachable, the 'still_unreachable' set will
1157     // have the PREV_MARK_COLLECTING set, but the objects are going to be
1158     // removed so we can skip the expense of clearing the flag.
1159     PyGC_Head* resurrected = unreachable;
1160     deduce_unreachable(resurrected, still_unreachable);
1161     clear_unreachable_mask(still_unreachable);
1162 
1163     // Move the resurrected objects to the old generation for future collection.
1164     gc_list_merge(resurrected, old_generation);
1165 }
1166 
1167 /* This is the main function.  Read this to understand how the
1168  * collection process works. */
1169 static Py_ssize_t
collect(PyThreadState * tstate,int generation,Py_ssize_t * n_collected,Py_ssize_t * n_uncollectable,int nofail)1170 collect(PyThreadState *tstate, int generation,
1171         Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, int nofail)
1172 {
1173     int i;
1174     Py_ssize_t m = 0; /* # objects collected */
1175     Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
1176     PyGC_Head *young; /* the generation we are examining */
1177     PyGC_Head *old; /* next older generation */
1178     PyGC_Head unreachable; /* non-problematic unreachable trash */
1179     PyGC_Head finalizers;  /* objects with, & reachable from, __del__ */
1180     PyGC_Head *gc;
1181     _PyTime_t t1 = 0;   /* initialize to prevent a compiler warning */
1182     GCState *gcstate = &tstate->interp->gc;
1183 
1184     if (gcstate->debug & DEBUG_STATS) {
1185         PySys_WriteStderr("gc: collecting generation %d...\n", generation);
1186         show_stats_each_generations(gcstate);
1187         t1 = _PyTime_GetMonotonicClock();
1188     }
1189 
1190     if (PyDTrace_GC_START_ENABLED())
1191         PyDTrace_GC_START(generation);
1192 
1193     /* update collection and allocation counters */
1194     if (generation+1 < NUM_GENERATIONS)
1195         gcstate->generations[generation+1].count += 1;
1196     for (i = 0; i <= generation; i++)
1197         gcstate->generations[i].count = 0;
1198 
1199     /* merge younger generations with one we are currently collecting */
1200     for (i = 0; i < generation; i++) {
1201         gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation));
1202     }
1203 
1204     /* handy references */
1205     young = GEN_HEAD(gcstate, generation);
1206     if (generation < NUM_GENERATIONS-1)
1207         old = GEN_HEAD(gcstate, generation+1);
1208     else
1209         old = young;
1210     validate_list(old, collecting_clear_unreachable_clear);
1211 
1212     deduce_unreachable(young, &unreachable);
1213 
1214     untrack_tuples(young);
1215     /* Move reachable objects to next generation. */
1216     if (young != old) {
1217         if (generation == NUM_GENERATIONS - 2) {
1218             gcstate->long_lived_pending += gc_list_size(young);
1219         }
1220         gc_list_merge(young, old);
1221     }
1222     else {
1223         /* We only un-track dicts in full collections, to avoid quadratic
1224            dict build-up. See issue #14775. */
1225         untrack_dicts(young);
1226         gcstate->long_lived_pending = 0;
1227         gcstate->long_lived_total = gc_list_size(young);
1228     }
1229 
1230     /* All objects in unreachable are trash, but objects reachable from
1231      * legacy finalizers (e.g. tp_del) can't safely be deleted.
1232      */
1233     gc_list_init(&finalizers);
1234     // NEXT_MASK_UNREACHABLE is cleared here.
1235     // After move_legacy_finalizers(), unreachable is normal list.
1236     move_legacy_finalizers(&unreachable, &finalizers);
1237     /* finalizers contains the unreachable objects with a legacy finalizer;
1238      * unreachable objects reachable *from* those are also uncollectable,
1239      * and we move those into the finalizers list too.
1240      */
1241     move_legacy_finalizer_reachable(&finalizers);
1242 
1243     validate_list(&finalizers, collecting_clear_unreachable_clear);
1244     validate_list(&unreachable, collecting_set_unreachable_clear);
1245 
1246     /* Print debugging information. */
1247     if (gcstate->debug & DEBUG_COLLECTABLE) {
1248         for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
1249             debug_cycle("collectable", FROM_GC(gc));
1250         }
1251     }
1252 
1253     /* Clear weakrefs and invoke callbacks as necessary. */
1254     m += handle_weakrefs(&unreachable, old);
1255 
1256     validate_list(old, collecting_clear_unreachable_clear);
1257     validate_list(&unreachable, collecting_set_unreachable_clear);
1258 
1259     /* Call tp_finalize on objects which have one. */
1260     finalize_garbage(tstate, &unreachable);
1261 
1262     /* Handle any objects that may have resurrected after the call
1263      * to 'finalize_garbage' and continue the collection with the
1264      * objects that are still unreachable */
1265     PyGC_Head final_unreachable;
1266     handle_resurrected_objects(&unreachable, &final_unreachable, old);
1267 
1268     /* Call tp_clear on objects in the final_unreachable set.  This will cause
1269     * the reference cycles to be broken.  It may also cause some objects
1270     * in finalizers to be freed.
1271     */
1272     m += gc_list_size(&final_unreachable);
1273     delete_garbage(tstate, gcstate, &final_unreachable, old);
1274 
1275     /* Collect statistics on uncollectable objects found and print
1276      * debugging information. */
1277     for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
1278         n++;
1279         if (gcstate->debug & DEBUG_UNCOLLECTABLE)
1280             debug_cycle("uncollectable", FROM_GC(gc));
1281     }
1282     if (gcstate->debug & DEBUG_STATS) {
1283         double d = _PyTime_AsSecondsDouble(_PyTime_GetMonotonicClock() - t1);
1284         PySys_WriteStderr(
1285             "gc: done, %" PY_FORMAT_SIZE_T "d unreachable, "
1286             "%" PY_FORMAT_SIZE_T "d uncollectable, %.4fs elapsed\n",
1287             n+m, n, d);
1288     }
1289 
1290     /* Append instances in the uncollectable set to a Python
1291      * reachable list of garbage.  The programmer has to deal with
1292      * this if they insist on creating this type of structure.
1293      */
1294     handle_legacy_finalizers(tstate, gcstate, &finalizers, old);
1295     validate_list(old, collecting_clear_unreachable_clear);
1296 
1297     /* Clear free list only during the collection of the highest
1298      * generation */
1299     if (generation == NUM_GENERATIONS-1) {
1300         clear_freelists();
1301     }
1302 
1303     if (_PyErr_Occurred(tstate)) {
1304         if (nofail) {
1305             _PyErr_Clear(tstate);
1306         }
1307         else {
1308             _PyErr_WriteUnraisableMsg("in garbage collection", NULL);
1309         }
1310     }
1311 
1312     /* Update stats */
1313     if (n_collected) {
1314         *n_collected = m;
1315     }
1316     if (n_uncollectable) {
1317         *n_uncollectable = n;
1318     }
1319 
1320     struct gc_generation_stats *stats = &gcstate->generation_stats[generation];
1321     stats->collections++;
1322     stats->collected += m;
1323     stats->uncollectable += n;
1324 
1325     if (PyDTrace_GC_DONE_ENABLED()) {
1326         PyDTrace_GC_DONE(n + m);
1327     }
1328 
1329     assert(!_PyErr_Occurred(tstate));
1330     return n + m;
1331 }
1332 
1333 /* Invoke progress callbacks to notify clients that garbage collection
1334  * is starting or stopping
1335  */
1336 static void
invoke_gc_callback(PyThreadState * tstate,const char * phase,int generation,Py_ssize_t collected,Py_ssize_t uncollectable)1337 invoke_gc_callback(PyThreadState *tstate, const char *phase,
1338                    int generation, Py_ssize_t collected,
1339                    Py_ssize_t uncollectable)
1340 {
1341     assert(!_PyErr_Occurred(tstate));
1342 
1343     /* we may get called very early */
1344     GCState *gcstate = &tstate->interp->gc;
1345     if (gcstate->callbacks == NULL) {
1346         return;
1347     }
1348 
1349     /* The local variable cannot be rebound, check it for sanity */
1350     assert(PyList_CheckExact(gcstate->callbacks));
1351     PyObject *info = NULL;
1352     if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
1353         info = Py_BuildValue("{sisnsn}",
1354             "generation", generation,
1355             "collected", collected,
1356             "uncollectable", uncollectable);
1357         if (info == NULL) {
1358             PyErr_WriteUnraisable(NULL);
1359             return;
1360         }
1361     }
1362     for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
1363         PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
1364         Py_INCREF(cb); /* make sure cb doesn't go away */
1365         r = PyObject_CallFunction(cb, "sO", phase, info);
1366         if (r == NULL) {
1367             PyErr_WriteUnraisable(cb);
1368         }
1369         else {
1370             Py_DECREF(r);
1371         }
1372         Py_DECREF(cb);
1373     }
1374     Py_XDECREF(info);
1375     assert(!_PyErr_Occurred(tstate));
1376 }
1377 
1378 /* Perform garbage collection of a generation and invoke
1379  * progress callbacks.
1380  */
1381 static Py_ssize_t
collect_with_callback(PyThreadState * tstate,int generation)1382 collect_with_callback(PyThreadState *tstate, int generation)
1383 {
1384     assert(!_PyErr_Occurred(tstate));
1385     Py_ssize_t result, collected, uncollectable;
1386     invoke_gc_callback(tstate, "start", generation, 0, 0);
1387     result = collect(tstate, generation, &collected, &uncollectable, 0);
1388     invoke_gc_callback(tstate, "stop", generation, collected, uncollectable);
1389     assert(!_PyErr_Occurred(tstate));
1390     return result;
1391 }
1392 
1393 static Py_ssize_t
collect_generations(PyThreadState * tstate)1394 collect_generations(PyThreadState *tstate)
1395 {
1396     GCState *gcstate = &tstate->interp->gc;
1397     /* Find the oldest generation (highest numbered) where the count
1398      * exceeds the threshold.  Objects in the that generation and
1399      * generations younger than it will be collected. */
1400     Py_ssize_t n = 0;
1401     for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
1402         if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
1403             /* Avoid quadratic performance degradation in number
1404                of tracked objects (see also issue #4074):
1405 
1406                To limit the cost of garbage collection, there are two strategies;
1407                  - make each collection faster, e.g. by scanning fewer objects
1408                  - do less collections
1409                This heuristic is about the latter strategy.
1410 
1411                In addition to the various configurable thresholds, we only trigger a
1412                full collection if the ratio
1413 
1414                 long_lived_pending / long_lived_total
1415 
1416                is above a given value (hardwired to 25%).
1417 
1418                The reason is that, while "non-full" collections (i.e., collections of
1419                the young and middle generations) will always examine roughly the same
1420                number of objects -- determined by the aforementioned thresholds --,
1421                the cost of a full collection is proportional to the total number of
1422                long-lived objects, which is virtually unbounded.
1423 
1424                Indeed, it has been remarked that doing a full collection every
1425                <constant number> of object creations entails a dramatic performance
1426                degradation in workloads which consist in creating and storing lots of
1427                long-lived objects (e.g. building a large list of GC-tracked objects would
1428                show quadratic performance, instead of linear as expected: see issue #4074).
1429 
1430                Using the above ratio, instead, yields amortized linear performance in
1431                the total number of objects (the effect of which can be summarized
1432                thusly: "each full garbage collection is more and more costly as the
1433                number of objects grows, but we do fewer and fewer of them").
1434 
1435                This heuristic was suggested by Martin von Löwis on python-dev in
1436                June 2008. His original analysis and proposal can be found at:
1437                http://mail.python.org/pipermail/python-dev/2008-June/080579.html
1438             */
1439             if (i == NUM_GENERATIONS - 1
1440                 && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
1441                 continue;
1442             n = collect_with_callback(tstate, i);
1443             break;
1444         }
1445     }
1446     return n;
1447 }
1448 
1449 #include "clinic/gcmodule.c.h"
1450 
1451 /*[clinic input]
1452 gc.enable
1453 
1454 Enable automatic garbage collection.
1455 [clinic start generated code]*/
1456 
1457 static PyObject *
gc_enable_impl(PyObject * module)1458 gc_enable_impl(PyObject *module)
1459 /*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/
1460 {
1461     PyThreadState *tstate = _PyThreadState_GET();
1462     GCState *gcstate = &tstate->interp->gc;
1463     gcstate->enabled = 1;
1464     Py_RETURN_NONE;
1465 }
1466 
1467 /*[clinic input]
1468 gc.disable
1469 
1470 Disable automatic garbage collection.
1471 [clinic start generated code]*/
1472 
1473 static PyObject *
gc_disable_impl(PyObject * module)1474 gc_disable_impl(PyObject *module)
1475 /*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/
1476 {
1477     PyThreadState *tstate = _PyThreadState_GET();
1478     GCState *gcstate = &tstate->interp->gc;
1479     gcstate->enabled = 0;
1480     Py_RETURN_NONE;
1481 }
1482 
1483 /*[clinic input]
1484 gc.isenabled -> bool
1485 
1486 Returns true if automatic garbage collection is enabled.
1487 [clinic start generated code]*/
1488 
1489 static int
gc_isenabled_impl(PyObject * module)1490 gc_isenabled_impl(PyObject *module)
1491 /*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/
1492 {
1493     PyThreadState *tstate = _PyThreadState_GET();
1494     GCState *gcstate = &tstate->interp->gc;
1495     return gcstate->enabled;
1496 }
1497 
1498 /*[clinic input]
1499 gc.collect -> Py_ssize_t
1500 
1501     generation: int(c_default="NUM_GENERATIONS - 1") = 2
1502 
1503 Run the garbage collector.
1504 
1505 With no arguments, run a full collection.  The optional argument
1506 may be an integer specifying which generation to collect.  A ValueError
1507 is raised if the generation number is invalid.
1508 
1509 The number of unreachable objects is returned.
1510 [clinic start generated code]*/
1511 
1512 static Py_ssize_t
gc_collect_impl(PyObject * module,int generation)1513 gc_collect_impl(PyObject *module, int generation)
1514 /*[clinic end generated code: output=b697e633043233c7 input=40720128b682d879]*/
1515 {
1516     PyThreadState *tstate = _PyThreadState_GET();
1517 
1518     if (generation < 0 || generation >= NUM_GENERATIONS) {
1519         _PyErr_SetString(tstate, PyExc_ValueError, "invalid generation");
1520         return -1;
1521     }
1522 
1523     GCState *gcstate = &tstate->interp->gc;
1524     Py_ssize_t n;
1525     if (gcstate->collecting) {
1526         /* already collecting, don't do anything */
1527         n = 0;
1528     }
1529     else {
1530         gcstate->collecting = 1;
1531         n = collect_with_callback(tstate, generation);
1532         gcstate->collecting = 0;
1533     }
1534     return n;
1535 }
1536 
1537 /*[clinic input]
1538 gc.set_debug
1539 
1540     flags: int
1541         An integer that can have the following bits turned on:
1542           DEBUG_STATS - Print statistics during collection.
1543           DEBUG_COLLECTABLE - Print collectable objects found.
1544           DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects
1545             found.
1546           DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.
1547           DEBUG_LEAK - Debug leaking programs (everything but STATS).
1548     /
1549 
1550 Set the garbage collection debugging flags.
1551 
1552 Debugging information is written to sys.stderr.
1553 [clinic start generated code]*/
1554 
1555 static PyObject *
gc_set_debug_impl(PyObject * module,int flags)1556 gc_set_debug_impl(PyObject *module, int flags)
1557 /*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/
1558 {
1559     PyThreadState *tstate = _PyThreadState_GET();
1560     GCState *gcstate = &tstate->interp->gc;
1561     gcstate->debug = flags;
1562     Py_RETURN_NONE;
1563 }
1564 
1565 /*[clinic input]
1566 gc.get_debug -> int
1567 
1568 Get the garbage collection debugging flags.
1569 [clinic start generated code]*/
1570 
1571 static int
gc_get_debug_impl(PyObject * module)1572 gc_get_debug_impl(PyObject *module)
1573 /*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/
1574 {
1575     PyThreadState *tstate = _PyThreadState_GET();
1576     GCState *gcstate = &tstate->interp->gc;
1577     return gcstate->debug;
1578 }
1579 
1580 PyDoc_STRVAR(gc_set_thresh__doc__,
1581 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
1582 "\n"
1583 "Sets the collection thresholds.  Setting threshold0 to zero disables\n"
1584 "collection.\n");
1585 
1586 static PyObject *
gc_set_threshold(PyObject * self,PyObject * args)1587 gc_set_threshold(PyObject *self, PyObject *args)
1588 {
1589     PyThreadState *tstate = _PyThreadState_GET();
1590     GCState *gcstate = &tstate->interp->gc;
1591     if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
1592                           &gcstate->generations[0].threshold,
1593                           &gcstate->generations[1].threshold,
1594                           &gcstate->generations[2].threshold))
1595         return NULL;
1596     for (int i = 3; i < NUM_GENERATIONS; i++) {
1597         /* generations higher than 2 get the same threshold */
1598         gcstate->generations[i].threshold = gcstate->generations[2].threshold;
1599     }
1600     Py_RETURN_NONE;
1601 }
1602 
1603 /*[clinic input]
1604 gc.get_threshold
1605 
1606 Return the current collection thresholds.
1607 [clinic start generated code]*/
1608 
1609 static PyObject *
gc_get_threshold_impl(PyObject * module)1610 gc_get_threshold_impl(PyObject *module)
1611 /*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/
1612 {
1613     PyThreadState *tstate = _PyThreadState_GET();
1614     GCState *gcstate = &tstate->interp->gc;
1615     return Py_BuildValue("(iii)",
1616                          gcstate->generations[0].threshold,
1617                          gcstate->generations[1].threshold,
1618                          gcstate->generations[2].threshold);
1619 }
1620 
1621 /*[clinic input]
1622 gc.get_count
1623 
1624 Return a three-tuple of the current collection counts.
1625 [clinic start generated code]*/
1626 
1627 static PyObject *
gc_get_count_impl(PyObject * module)1628 gc_get_count_impl(PyObject *module)
1629 /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
1630 {
1631     PyThreadState *tstate = _PyThreadState_GET();
1632     GCState *gcstate = &tstate->interp->gc;
1633     return Py_BuildValue("(iii)",
1634                          gcstate->generations[0].count,
1635                          gcstate->generations[1].count,
1636                          gcstate->generations[2].count);
1637 }
1638 
1639 static int
referrersvisit(PyObject * obj,PyObject * objs)1640 referrersvisit(PyObject* obj, PyObject *objs)
1641 {
1642     Py_ssize_t i;
1643     for (i = 0; i < PyTuple_GET_SIZE(objs); i++)
1644         if (PyTuple_GET_ITEM(objs, i) == obj)
1645             return 1;
1646     return 0;
1647 }
1648 
1649 static int
gc_referrers_for(PyObject * objs,PyGC_Head * list,PyObject * resultlist)1650 gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1651 {
1652     PyGC_Head *gc;
1653     PyObject *obj;
1654     traverseproc traverse;
1655     for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
1656         obj = FROM_GC(gc);
1657         traverse = Py_TYPE(obj)->tp_traverse;
1658         if (obj == objs || obj == resultlist)
1659             continue;
1660         if (traverse(obj, (visitproc)referrersvisit, objs)) {
1661             if (PyList_Append(resultlist, obj) < 0)
1662                 return 0; /* error */
1663         }
1664     }
1665     return 1; /* no error */
1666 }
1667 
1668 PyDoc_STRVAR(gc_get_referrers__doc__,
1669 "get_referrers(*objs) -> list\n\
1670 Return the list of objects that directly refer to any of objs.");
1671 
1672 static PyObject *
gc_get_referrers(PyObject * self,PyObject * args)1673 gc_get_referrers(PyObject *self, PyObject *args)
1674 {
1675     PyThreadState *tstate = _PyThreadState_GET();
1676     int i;
1677     PyObject *result = PyList_New(0);
1678     if (!result) {
1679         return NULL;
1680     }
1681 
1682     GCState *gcstate = &tstate->interp->gc;
1683     for (i = 0; i < NUM_GENERATIONS; i++) {
1684         if (!(gc_referrers_for(args, GEN_HEAD(gcstate, i), result))) {
1685             Py_DECREF(result);
1686             return NULL;
1687         }
1688     }
1689     return result;
1690 }
1691 
1692 /* Append obj to list; return true if error (out of memory), false if OK. */
1693 static int
referentsvisit(PyObject * obj,PyObject * list)1694 referentsvisit(PyObject *obj, PyObject *list)
1695 {
1696     return PyList_Append(list, obj) < 0;
1697 }
1698 
1699 PyDoc_STRVAR(gc_get_referents__doc__,
1700 "get_referents(*objs) -> list\n\
1701 Return the list of objects that are directly referred to by objs.");
1702 
1703 static PyObject *
gc_get_referents(PyObject * self,PyObject * args)1704 gc_get_referents(PyObject *self, PyObject *args)
1705 {
1706     Py_ssize_t i;
1707     PyObject *result = PyList_New(0);
1708 
1709     if (result == NULL)
1710         return NULL;
1711 
1712     for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
1713         traverseproc traverse;
1714         PyObject *obj = PyTuple_GET_ITEM(args, i);
1715 
1716         if (!_PyObject_IS_GC(obj))
1717             continue;
1718         traverse = Py_TYPE(obj)->tp_traverse;
1719         if (! traverse)
1720             continue;
1721         if (traverse(obj, (visitproc)referentsvisit, result)) {
1722             Py_DECREF(result);
1723             return NULL;
1724         }
1725     }
1726     return result;
1727 }
1728 
1729 /*[clinic input]
1730 gc.get_objects
1731     generation: Py_ssize_t(accept={int, NoneType}, c_default="-1") = None
1732         Generation to extract the objects from.
1733 
1734 Return a list of objects tracked by the collector (excluding the list returned).
1735 
1736 If generation is not None, return only the objects tracked by the collector
1737 that are in that generation.
1738 [clinic start generated code]*/
1739 
1740 static PyObject *
gc_get_objects_impl(PyObject * module,Py_ssize_t generation)1741 gc_get_objects_impl(PyObject *module, Py_ssize_t generation)
1742 /*[clinic end generated code: output=48b35fea4ba6cb0e input=ef7da9df9806754c]*/
1743 {
1744     PyThreadState *tstate = _PyThreadState_GET();
1745     int i;
1746     PyObject* result;
1747     GCState *gcstate = &tstate->interp->gc;
1748 
1749     result = PyList_New(0);
1750     if (result == NULL) {
1751         return NULL;
1752     }
1753 
1754     /* If generation is passed, we extract only that generation */
1755     if (generation != -1) {
1756         if (generation >= NUM_GENERATIONS) {
1757             _PyErr_Format(tstate, PyExc_ValueError,
1758                           "generation parameter must be less than the number of "
1759                           "available generations (%i)",
1760                            NUM_GENERATIONS);
1761             goto error;
1762         }
1763 
1764         if (generation < 0) {
1765             _PyErr_SetString(tstate, PyExc_ValueError,
1766                              "generation parameter cannot be negative");
1767             goto error;
1768         }
1769 
1770         if (append_objects(result, GEN_HEAD(gcstate, generation))) {
1771             goto error;
1772         }
1773 
1774         return result;
1775     }
1776 
1777     /* If generation is not passed or None, get all objects from all generations */
1778     for (i = 0; i < NUM_GENERATIONS; i++) {
1779         if (append_objects(result, GEN_HEAD(gcstate, i))) {
1780             goto error;
1781         }
1782     }
1783     return result;
1784 
1785 error:
1786     Py_DECREF(result);
1787     return NULL;
1788 }
1789 
1790 /*[clinic input]
1791 gc.get_stats
1792 
1793 Return a list of dictionaries containing per-generation statistics.
1794 [clinic start generated code]*/
1795 
1796 static PyObject *
gc_get_stats_impl(PyObject * module)1797 gc_get_stats_impl(PyObject *module)
1798 /*[clinic end generated code: output=a8ab1d8a5d26f3ab input=1ef4ed9d17b1a470]*/
1799 {
1800     int i;
1801     struct gc_generation_stats stats[NUM_GENERATIONS], *st;
1802     PyThreadState *tstate = _PyThreadState_GET();
1803 
1804     /* To get consistent values despite allocations while constructing
1805        the result list, we use a snapshot of the running stats. */
1806     GCState *gcstate = &tstate->interp->gc;
1807     for (i = 0; i < NUM_GENERATIONS; i++) {
1808         stats[i] = gcstate->generation_stats[i];
1809     }
1810 
1811     PyObject *result = PyList_New(0);
1812     if (result == NULL)
1813         return NULL;
1814 
1815     for (i = 0; i < NUM_GENERATIONS; i++) {
1816         PyObject *dict;
1817         st = &stats[i];
1818         dict = Py_BuildValue("{snsnsn}",
1819                              "collections", st->collections,
1820                              "collected", st->collected,
1821                              "uncollectable", st->uncollectable
1822                             );
1823         if (dict == NULL)
1824             goto error;
1825         if (PyList_Append(result, dict)) {
1826             Py_DECREF(dict);
1827             goto error;
1828         }
1829         Py_DECREF(dict);
1830     }
1831     return result;
1832 
1833 error:
1834     Py_XDECREF(result);
1835     return NULL;
1836 }
1837 
1838 
1839 /*[clinic input]
1840 gc.is_tracked
1841 
1842     obj: object
1843     /
1844 
1845 Returns true if the object is tracked by the garbage collector.
1846 
1847 Simple atomic objects will return false.
1848 [clinic start generated code]*/
1849 
1850 static PyObject *
gc_is_tracked(PyObject * module,PyObject * obj)1851 gc_is_tracked(PyObject *module, PyObject *obj)
1852 /*[clinic end generated code: output=14f0103423b28e31 input=d83057f170ea2723]*/
1853 {
1854     PyObject *result;
1855 
1856     if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj))
1857         result = Py_True;
1858     else
1859         result = Py_False;
1860     Py_INCREF(result);
1861     return result;
1862 }
1863 
1864 /*[clinic input]
1865 gc.is_finalized
1866 
1867     obj: object
1868     /
1869 
1870 Returns true if the object has been already finalized by the GC.
1871 [clinic start generated code]*/
1872 
1873 static PyObject *
gc_is_finalized(PyObject * module,PyObject * obj)1874 gc_is_finalized(PyObject *module, PyObject *obj)
1875 /*[clinic end generated code: output=e1516ac119a918ed input=201d0c58f69ae390]*/
1876 {
1877     if (_PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(AS_GC(obj))) {
1878          Py_RETURN_TRUE;
1879     }
1880     Py_RETURN_FALSE;
1881 }
1882 
1883 /*[clinic input]
1884 gc.freeze
1885 
1886 Freeze all current tracked objects and ignore them for future collections.
1887 
1888 This can be used before a POSIX fork() call to make the gc copy-on-write friendly.
1889 Note: collection before a POSIX fork() call may free pages for future allocation
1890 which can cause copy-on-write.
1891 [clinic start generated code]*/
1892 
1893 static PyObject *
gc_freeze_impl(PyObject * module)1894 gc_freeze_impl(PyObject *module)
1895 /*[clinic end generated code: output=502159d9cdc4c139 input=b602b16ac5febbe5]*/
1896 {
1897     PyThreadState *tstate = _PyThreadState_GET();
1898     GCState *gcstate = &tstate->interp->gc;
1899     for (int i = 0; i < NUM_GENERATIONS; ++i) {
1900         gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head);
1901         gcstate->generations[i].count = 0;
1902     }
1903     Py_RETURN_NONE;
1904 }
1905 
1906 /*[clinic input]
1907 gc.unfreeze
1908 
1909 Unfreeze all objects in the permanent generation.
1910 
1911 Put all objects in the permanent generation back into oldest generation.
1912 [clinic start generated code]*/
1913 
1914 static PyObject *
gc_unfreeze_impl(PyObject * module)1915 gc_unfreeze_impl(PyObject *module)
1916 /*[clinic end generated code: output=1c15f2043b25e169 input=2dd52b170f4cef6c]*/
1917 {
1918     PyThreadState *tstate = _PyThreadState_GET();
1919     GCState *gcstate = &tstate->interp->gc;
1920     gc_list_merge(&gcstate->permanent_generation.head,
1921                   GEN_HEAD(gcstate, NUM_GENERATIONS-1));
1922     Py_RETURN_NONE;
1923 }
1924 
1925 /*[clinic input]
1926 gc.get_freeze_count -> Py_ssize_t
1927 
1928 Return the number of objects in the permanent generation.
1929 [clinic start generated code]*/
1930 
1931 static Py_ssize_t
gc_get_freeze_count_impl(PyObject * module)1932 gc_get_freeze_count_impl(PyObject *module)
1933 /*[clinic end generated code: output=61cbd9f43aa032e1 input=45ffbc65cfe2a6ed]*/
1934 {
1935     PyThreadState *tstate = _PyThreadState_GET();
1936     GCState *gcstate = &tstate->interp->gc;
1937     return gc_list_size(&gcstate->permanent_generation.head);
1938 }
1939 
1940 
1941 PyDoc_STRVAR(gc__doc__,
1942 "This module provides access to the garbage collector for reference cycles.\n"
1943 "\n"
1944 "enable() -- Enable automatic garbage collection.\n"
1945 "disable() -- Disable automatic garbage collection.\n"
1946 "isenabled() -- Returns true if automatic collection is enabled.\n"
1947 "collect() -- Do a full collection right now.\n"
1948 "get_count() -- Return the current collection counts.\n"
1949 "get_stats() -- Return list of dictionaries containing per-generation stats.\n"
1950 "set_debug() -- Set debugging flags.\n"
1951 "get_debug() -- Get debugging flags.\n"
1952 "set_threshold() -- Set the collection thresholds.\n"
1953 "get_threshold() -- Return the current the collection thresholds.\n"
1954 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1955 "is_tracked() -- Returns true if a given object is tracked.\n"
1956 "is_finalized() -- Returns true if a given object has been already finalized.\n"
1957 "get_referrers() -- Return the list of objects that refer to an object.\n"
1958 "get_referents() -- Return the list of objects that an object refers to.\n"
1959 "freeze() -- Freeze all tracked objects and ignore them for future collections.\n"
1960 "unfreeze() -- Unfreeze all objects in the permanent generation.\n"
1961 "get_freeze_count() -- Return the number of objects in the permanent generation.\n");
1962 
1963 static PyMethodDef GcMethods[] = {
1964     GC_ENABLE_METHODDEF
1965     GC_DISABLE_METHODDEF
1966     GC_ISENABLED_METHODDEF
1967     GC_SET_DEBUG_METHODDEF
1968     GC_GET_DEBUG_METHODDEF
1969     GC_GET_COUNT_METHODDEF
1970     {"set_threshold",  gc_set_threshold, METH_VARARGS, gc_set_thresh__doc__},
1971     GC_GET_THRESHOLD_METHODDEF
1972     GC_COLLECT_METHODDEF
1973     GC_GET_OBJECTS_METHODDEF
1974     GC_GET_STATS_METHODDEF
1975     GC_IS_TRACKED_METHODDEF
1976     GC_IS_FINALIZED_METHODDEF
1977     {"get_referrers",  gc_get_referrers, METH_VARARGS,
1978         gc_get_referrers__doc__},
1979     {"get_referents",  gc_get_referents, METH_VARARGS,
1980         gc_get_referents__doc__},
1981     GC_FREEZE_METHODDEF
1982     GC_UNFREEZE_METHODDEF
1983     GC_GET_FREEZE_COUNT_METHODDEF
1984     {NULL,      NULL}           /* Sentinel */
1985 };
1986 
1987 static struct PyModuleDef gcmodule = {
1988     PyModuleDef_HEAD_INIT,
1989     "gc",              /* m_name */
1990     gc__doc__,         /* m_doc */
1991     -1,                /* m_size */
1992     GcMethods,         /* m_methods */
1993     NULL,              /* m_reload */
1994     NULL,              /* m_traverse */
1995     NULL,              /* m_clear */
1996     NULL               /* m_free */
1997 };
1998 
1999 PyMODINIT_FUNC
PyInit_gc(void)2000 PyInit_gc(void)
2001 {
2002     PyThreadState *tstate = _PyThreadState_GET();
2003     GCState *gcstate = &tstate->interp->gc;
2004 
2005     PyObject *m = PyModule_Create(&gcmodule);
2006 
2007     if (m == NULL) {
2008         return NULL;
2009     }
2010 
2011     if (gcstate->garbage == NULL) {
2012         gcstate->garbage = PyList_New(0);
2013         if (gcstate->garbage == NULL) {
2014             return NULL;
2015         }
2016     }
2017     Py_INCREF(gcstate->garbage);
2018     if (PyModule_AddObject(m, "garbage", gcstate->garbage) < 0) {
2019         return NULL;
2020     }
2021 
2022     if (gcstate->callbacks == NULL) {
2023         gcstate->callbacks = PyList_New(0);
2024         if (gcstate->callbacks == NULL) {
2025             return NULL;
2026         }
2027     }
2028     Py_INCREF(gcstate->callbacks);
2029     if (PyModule_AddObject(m, "callbacks", gcstate->callbacks) < 0) {
2030         return NULL;
2031     }
2032 
2033 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) { return NULL; }
2034     ADD_INT(DEBUG_STATS);
2035     ADD_INT(DEBUG_COLLECTABLE);
2036     ADD_INT(DEBUG_UNCOLLECTABLE);
2037     ADD_INT(DEBUG_SAVEALL);
2038     ADD_INT(DEBUG_LEAK);
2039 #undef ADD_INT
2040     return m;
2041 }
2042 
2043 /* API to invoke gc.collect() from C */
2044 Py_ssize_t
PyGC_Collect(void)2045 PyGC_Collect(void)
2046 {
2047     PyThreadState *tstate = _PyThreadState_GET();
2048     GCState *gcstate = &tstate->interp->gc;
2049 
2050     if (!gcstate->enabled) {
2051         return 0;
2052     }
2053 
2054     Py_ssize_t n;
2055     if (gcstate->collecting) {
2056         /* already collecting, don't do anything */
2057         n = 0;
2058     }
2059     else {
2060         PyObject *exc, *value, *tb;
2061         gcstate->collecting = 1;
2062         _PyErr_Fetch(tstate, &exc, &value, &tb);
2063         n = collect_with_callback(tstate, NUM_GENERATIONS - 1);
2064         _PyErr_Restore(tstate, exc, value, tb);
2065         gcstate->collecting = 0;
2066     }
2067 
2068     return n;
2069 }
2070 
2071 Py_ssize_t
_PyGC_CollectIfEnabled(void)2072 _PyGC_CollectIfEnabled(void)
2073 {
2074     return PyGC_Collect();
2075 }
2076 
2077 Py_ssize_t
_PyGC_CollectNoFail(void)2078 _PyGC_CollectNoFail(void)
2079 {
2080     PyThreadState *tstate = _PyThreadState_GET();
2081     assert(!_PyErr_Occurred(tstate));
2082 
2083     GCState *gcstate = &tstate->interp->gc;
2084     Py_ssize_t n;
2085 
2086     /* Ideally, this function is only called on interpreter shutdown,
2087        and therefore not recursively.  Unfortunately, when there are daemon
2088        threads, a daemon thread can start a cyclic garbage collection
2089        during interpreter shutdown (and then never finish it).
2090        See http://bugs.python.org/issue8713#msg195178 for an example.
2091        */
2092     if (gcstate->collecting) {
2093         n = 0;
2094     }
2095     else {
2096         gcstate->collecting = 1;
2097         n = collect(tstate, NUM_GENERATIONS - 1, NULL, NULL, 1);
2098         gcstate->collecting = 0;
2099     }
2100     return n;
2101 }
2102 
2103 void
_PyGC_DumpShutdownStats(PyThreadState * tstate)2104 _PyGC_DumpShutdownStats(PyThreadState *tstate)
2105 {
2106     GCState *gcstate = &tstate->interp->gc;
2107     if (!(gcstate->debug & DEBUG_SAVEALL)
2108         && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) {
2109         const char *message;
2110         if (gcstate->debug & DEBUG_UNCOLLECTABLE)
2111             message = "gc: %zd uncollectable objects at " \
2112                 "shutdown";
2113         else
2114             message = "gc: %zd uncollectable objects at " \
2115                 "shutdown; use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
2116         /* PyErr_WarnFormat does too many things and we are at shutdown,
2117            the warnings module's dependencies (e.g. linecache) may be gone
2118            already. */
2119         if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2120                                      "gc", NULL, message,
2121                                      PyList_GET_SIZE(gcstate->garbage)))
2122             PyErr_WriteUnraisable(NULL);
2123         if (gcstate->debug & DEBUG_UNCOLLECTABLE) {
2124             PyObject *repr = NULL, *bytes = NULL;
2125             repr = PyObject_Repr(gcstate->garbage);
2126             if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr)))
2127                 PyErr_WriteUnraisable(gcstate->garbage);
2128             else {
2129                 PySys_WriteStderr(
2130                     "      %s\n",
2131                     PyBytes_AS_STRING(bytes)
2132                     );
2133             }
2134             Py_XDECREF(repr);
2135             Py_XDECREF(bytes);
2136         }
2137     }
2138 }
2139 
2140 void
_PyGC_Fini(PyThreadState * tstate)2141 _PyGC_Fini(PyThreadState *tstate)
2142 {
2143     GCState *gcstate = &tstate->interp->gc;
2144     Py_CLEAR(gcstate->garbage);
2145     Py_CLEAR(gcstate->callbacks);
2146 }
2147 
2148 /* for debugging */
2149 void
_PyGC_Dump(PyGC_Head * g)2150 _PyGC_Dump(PyGC_Head *g)
2151 {
2152     _PyObject_Dump(FROM_GC(g));
2153 }
2154 
2155 
2156 #ifdef Py_DEBUG
2157 static int
visit_validate(PyObject * op,void * parent_raw)2158 visit_validate(PyObject *op, void *parent_raw)
2159 {
2160     PyObject *parent = _PyObject_CAST(parent_raw);
2161     if (_PyObject_IsFreed(op)) {
2162         _PyObject_ASSERT_FAILED_MSG(parent,
2163                                     "PyObject_GC_Track() object is not valid");
2164     }
2165     return 0;
2166 }
2167 #endif
2168 
2169 
2170 /* extension modules might be compiled with GC support so these
2171    functions must always be available */
2172 
2173 void
PyObject_GC_Track(void * op_raw)2174 PyObject_GC_Track(void *op_raw)
2175 {
2176     PyObject *op = _PyObject_CAST(op_raw);
2177     if (_PyObject_GC_IS_TRACKED(op)) {
2178         _PyObject_ASSERT_FAILED_MSG(op,
2179                                     "object already tracked "
2180                                     "by the garbage collector");
2181     }
2182     _PyObject_GC_TRACK(op);
2183 
2184 #ifdef Py_DEBUG
2185     /* Check that the object is valid: validate objects traversed
2186        by tp_traverse() */
2187     traverseproc traverse = Py_TYPE(op)->tp_traverse;
2188     (void)traverse(op, visit_validate, op);
2189 #endif
2190 }
2191 
2192 void
PyObject_GC_UnTrack(void * op_raw)2193 PyObject_GC_UnTrack(void *op_raw)
2194 {
2195     PyObject *op = _PyObject_CAST(op_raw);
2196     /* Obscure:  the Py_TRASHCAN mechanism requires that we be able to
2197      * call PyObject_GC_UnTrack twice on an object.
2198      */
2199     if (_PyObject_GC_IS_TRACKED(op)) {
2200         _PyObject_GC_UNTRACK(op);
2201     }
2202 }
2203 
2204 int
PyObject_IS_GC(PyObject * obj)2205 PyObject_IS_GC(PyObject *obj)
2206 {
2207     return _PyObject_IS_GC(obj);
2208 }
2209 
2210 static PyObject *
_PyObject_GC_Alloc(int use_calloc,size_t basicsize)2211 _PyObject_GC_Alloc(int use_calloc, size_t basicsize)
2212 {
2213     PyThreadState *tstate = _PyThreadState_GET();
2214     GCState *gcstate = &tstate->interp->gc;
2215     if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) {
2216         return _PyErr_NoMemory(tstate);
2217     }
2218     size_t size = sizeof(PyGC_Head) + basicsize;
2219 
2220     PyGC_Head *g;
2221     if (use_calloc) {
2222         g = (PyGC_Head *)PyObject_Calloc(1, size);
2223     }
2224     else {
2225         g = (PyGC_Head *)PyObject_Malloc(size);
2226     }
2227     if (g == NULL) {
2228         return _PyErr_NoMemory(tstate);
2229     }
2230     assert(((uintptr_t)g & 3) == 0);  // g must be aligned 4bytes boundary
2231 
2232     g->_gc_next = 0;
2233     g->_gc_prev = 0;
2234     gcstate->generations[0].count++; /* number of allocated GC objects */
2235     if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
2236         gcstate->enabled &&
2237         gcstate->generations[0].threshold &&
2238         !gcstate->collecting &&
2239         !_PyErr_Occurred(tstate))
2240     {
2241         gcstate->collecting = 1;
2242         collect_generations(tstate);
2243         gcstate->collecting = 0;
2244     }
2245     PyObject *op = FROM_GC(g);
2246     return op;
2247 }
2248 
2249 PyObject *
_PyObject_GC_Malloc(size_t basicsize)2250 _PyObject_GC_Malloc(size_t basicsize)
2251 {
2252     return _PyObject_GC_Alloc(0, basicsize);
2253 }
2254 
2255 PyObject *
_PyObject_GC_Calloc(size_t basicsize)2256 _PyObject_GC_Calloc(size_t basicsize)
2257 {
2258     return _PyObject_GC_Alloc(1, basicsize);
2259 }
2260 
2261 PyObject *
_PyObject_GC_New(PyTypeObject * tp)2262 _PyObject_GC_New(PyTypeObject *tp)
2263 {
2264     PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp));
2265     if (op != NULL)
2266         op = PyObject_INIT(op, tp);
2267     return op;
2268 }
2269 
2270 PyVarObject *
_PyObject_GC_NewVar(PyTypeObject * tp,Py_ssize_t nitems)2271 _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
2272 {
2273     size_t size;
2274     PyVarObject *op;
2275 
2276     if (nitems < 0) {
2277         PyErr_BadInternalCall();
2278         return NULL;
2279     }
2280     size = _PyObject_VAR_SIZE(tp, nitems);
2281     op = (PyVarObject *) _PyObject_GC_Malloc(size);
2282     if (op != NULL)
2283         op = PyObject_INIT_VAR(op, tp, nitems);
2284     return op;
2285 }
2286 
2287 PyVarObject *
_PyObject_GC_Resize(PyVarObject * op,Py_ssize_t nitems)2288 _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
2289 {
2290     const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
2291     _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
2292     if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) {
2293         return (PyVarObject *)PyErr_NoMemory();
2294     }
2295 
2296     PyGC_Head *g = AS_GC(op);
2297     g = (PyGC_Head *)PyObject_REALLOC(g,  sizeof(PyGC_Head) + basicsize);
2298     if (g == NULL)
2299         return (PyVarObject *)PyErr_NoMemory();
2300     op = (PyVarObject *) FROM_GC(g);
2301     Py_SET_SIZE(op, nitems);
2302     return op;
2303 }
2304 
2305 void
PyObject_GC_Del(void * op)2306 PyObject_GC_Del(void *op)
2307 {
2308     PyGC_Head *g = AS_GC(op);
2309     if (_PyObject_GC_IS_TRACKED(op)) {
2310         gc_list_remove(g);
2311     }
2312     PyThreadState *tstate = _PyThreadState_GET();
2313     GCState *gcstate = &tstate->interp->gc;
2314     if (gcstate->generations[0].count > 0) {
2315         gcstate->generations[0].count--;
2316     }
2317     PyObject_FREE(g);
2318 }
2319 
2320 int
PyObject_GC_IsTracked(PyObject * obj)2321 PyObject_GC_IsTracked(PyObject* obj)
2322 {
2323     if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) {
2324         return 1;
2325     }
2326     return 0;
2327 }
2328 
2329 int
PyObject_GC_IsFinalized(PyObject * obj)2330 PyObject_GC_IsFinalized(PyObject *obj)
2331 {
2332     if (_PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(AS_GC(obj))) {
2333          return 1;
2334     }
2335     return 0;
2336 }
2337