1 
2 /* Core extension modules are built-in on some platforms (e.g. Windows). */
3 #ifdef Py_BUILD_CORE
4 #define Py_BUILD_CORE_BUILTIN
5 #undef Py_BUILD_CORE
6 #endif
7 
8 #include "Python.h"
9 #include "structmember.h"
10 
11 PyDoc_STRVAR(pickle_module_doc,
12 "Optimized C implementation for the Python pickle module.");
13 
14 /*[clinic input]
15 module _pickle
16 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
17 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
18 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
19 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
20 [clinic start generated code]*/
21 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
22 
23 /* Bump this when new opcodes are added to the pickle protocol. */
24 enum {
25     HIGHEST_PROTOCOL = 4,
26     DEFAULT_PROTOCOL = 3
27 };
28 
29 /* Pickle opcodes. These must be kept updated with pickle.py.
30    Extensive docs are in pickletools.py. */
31 enum opcode {
32     MARK            = '(',
33     STOP            = '.',
34     POP             = '0',
35     POP_MARK        = '1',
36     DUP             = '2',
37     FLOAT           = 'F',
38     INT             = 'I',
39     BININT          = 'J',
40     BININT1         = 'K',
41     LONG            = 'L',
42     BININT2         = 'M',
43     NONE            = 'N',
44     PERSID          = 'P',
45     BINPERSID       = 'Q',
46     REDUCE          = 'R',
47     STRING          = 'S',
48     BINSTRING       = 'T',
49     SHORT_BINSTRING = 'U',
50     UNICODE         = 'V',
51     BINUNICODE      = 'X',
52     APPEND          = 'a',
53     BUILD           = 'b',
54     GLOBAL          = 'c',
55     DICT            = 'd',
56     EMPTY_DICT      = '}',
57     APPENDS         = 'e',
58     GET             = 'g',
59     BINGET          = 'h',
60     INST            = 'i',
61     LONG_BINGET     = 'j',
62     LIST            = 'l',
63     EMPTY_LIST      = ']',
64     OBJ             = 'o',
65     PUT             = 'p',
66     BINPUT          = 'q',
67     LONG_BINPUT     = 'r',
68     SETITEM         = 's',
69     TUPLE           = 't',
70     EMPTY_TUPLE     = ')',
71     SETITEMS        = 'u',
72     BINFLOAT        = 'G',
73 
74     /* Protocol 2. */
75     PROTO       = '\x80',
76     NEWOBJ      = '\x81',
77     EXT1        = '\x82',
78     EXT2        = '\x83',
79     EXT4        = '\x84',
80     TUPLE1      = '\x85',
81     TUPLE2      = '\x86',
82     TUPLE3      = '\x87',
83     NEWTRUE     = '\x88',
84     NEWFALSE    = '\x89',
85     LONG1       = '\x8a',
86     LONG4       = '\x8b',
87 
88     /* Protocol 3 (Python 3.x) */
89     BINBYTES       = 'B',
90     SHORT_BINBYTES = 'C',
91 
92     /* Protocol 4 */
93     SHORT_BINUNICODE = '\x8c',
94     BINUNICODE8      = '\x8d',
95     BINBYTES8        = '\x8e',
96     EMPTY_SET        = '\x8f',
97     ADDITEMS         = '\x90',
98     FROZENSET        = '\x91',
99     NEWOBJ_EX        = '\x92',
100     STACK_GLOBAL     = '\x93',
101     MEMOIZE          = '\x94',
102     FRAME            = '\x95'
103 };
104 
105 enum {
106    /* Keep in synch with pickle.Pickler._BATCHSIZE.  This is how many elements
107       batch_list/dict() pumps out before doing APPENDS/SETITEMS.  Nothing will
108       break if this gets out of synch with pickle.py, but it's unclear that would
109       help anything either. */
110     BATCHSIZE = 1000,
111 
112     /* Nesting limit until Pickler, when running in "fast mode", starts
113        checking for self-referential data-structures. */
114     FAST_NESTING_LIMIT = 50,
115 
116     /* Initial size of the write buffer of Pickler. */
117     WRITE_BUF_SIZE = 4096,
118 
119     /* Prefetch size when unpickling (disabled on unpeekable streams) */
120     PREFETCH = 8192 * 16,
121 
122     FRAME_SIZE_MIN = 4,
123     FRAME_SIZE_TARGET = 64 * 1024,
124     FRAME_HEADER_SIZE = 9
125 };
126 
127 /*************************************************************************/
128 
129 /* State of the pickle module, per PEP 3121. */
130 typedef struct {
131     /* Exception classes for pickle. */
132     PyObject *PickleError;
133     PyObject *PicklingError;
134     PyObject *UnpicklingError;
135 
136     /* copyreg.dispatch_table, {type_object: pickling_function} */
137     PyObject *dispatch_table;
138 
139     /* For the extension opcodes EXT1, EXT2 and EXT4. */
140 
141     /* copyreg._extension_registry, {(module_name, function_name): code} */
142     PyObject *extension_registry;
143     /* copyreg._extension_cache, {code: object} */
144     PyObject *extension_cache;
145     /* copyreg._inverted_registry, {code: (module_name, function_name)} */
146     PyObject *inverted_registry;
147 
148     /* Import mappings for compatibility with Python 2.x */
149 
150     /* _compat_pickle.NAME_MAPPING,
151        {(oldmodule, oldname): (newmodule, newname)} */
152     PyObject *name_mapping_2to3;
153     /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
154     PyObject *import_mapping_2to3;
155     /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
156     PyObject *name_mapping_3to2;
157     PyObject *import_mapping_3to2;
158 
159     /* codecs.encode, used for saving bytes in older protocols */
160     PyObject *codecs_encode;
161     /* builtins.getattr, used for saving nested names with protocol < 4 */
162     PyObject *getattr;
163     /* functools.partial, used for implementing __newobj_ex__ with protocols
164        2 and 3 */
165     PyObject *partial;
166 } PickleState;
167 
168 /* Forward declaration of the _pickle module definition. */
169 static struct PyModuleDef _picklemodule;
170 
171 /* Given a module object, get its per-module state. */
172 static PickleState *
_Pickle_GetState(PyObject * module)173 _Pickle_GetState(PyObject *module)
174 {
175     return (PickleState *)PyModule_GetState(module);
176 }
177 
178 /* Find the module instance imported in the currently running sub-interpreter
179    and get its state. */
180 static PickleState *
_Pickle_GetGlobalState(void)181 _Pickle_GetGlobalState(void)
182 {
183     return _Pickle_GetState(PyState_FindModule(&_picklemodule));
184 }
185 
186 /* Clear the given pickle module state. */
187 static void
_Pickle_ClearState(PickleState * st)188 _Pickle_ClearState(PickleState *st)
189 {
190     Py_CLEAR(st->PickleError);
191     Py_CLEAR(st->PicklingError);
192     Py_CLEAR(st->UnpicklingError);
193     Py_CLEAR(st->dispatch_table);
194     Py_CLEAR(st->extension_registry);
195     Py_CLEAR(st->extension_cache);
196     Py_CLEAR(st->inverted_registry);
197     Py_CLEAR(st->name_mapping_2to3);
198     Py_CLEAR(st->import_mapping_2to3);
199     Py_CLEAR(st->name_mapping_3to2);
200     Py_CLEAR(st->import_mapping_3to2);
201     Py_CLEAR(st->codecs_encode);
202     Py_CLEAR(st->getattr);
203     Py_CLEAR(st->partial);
204 }
205 
206 /* Initialize the given pickle module state. */
207 static int
_Pickle_InitState(PickleState * st)208 _Pickle_InitState(PickleState *st)
209 {
210     PyObject *copyreg = NULL;
211     PyObject *compat_pickle = NULL;
212     PyObject *codecs = NULL;
213     PyObject *functools = NULL;
214     _Py_IDENTIFIER(getattr);
215 
216     st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
217     if (st->getattr == NULL)
218         goto error;
219 
220     copyreg = PyImport_ImportModule("copyreg");
221     if (!copyreg)
222         goto error;
223     st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
224     if (!st->dispatch_table)
225         goto error;
226     if (!PyDict_CheckExact(st->dispatch_table)) {
227         PyErr_Format(PyExc_RuntimeError,
228                      "copyreg.dispatch_table should be a dict, not %.200s",
229                      Py_TYPE(st->dispatch_table)->tp_name);
230         goto error;
231     }
232     st->extension_registry = \
233         PyObject_GetAttrString(copyreg, "_extension_registry");
234     if (!st->extension_registry)
235         goto error;
236     if (!PyDict_CheckExact(st->extension_registry)) {
237         PyErr_Format(PyExc_RuntimeError,
238                      "copyreg._extension_registry should be a dict, "
239                      "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
240         goto error;
241     }
242     st->inverted_registry = \
243         PyObject_GetAttrString(copyreg, "_inverted_registry");
244     if (!st->inverted_registry)
245         goto error;
246     if (!PyDict_CheckExact(st->inverted_registry)) {
247         PyErr_Format(PyExc_RuntimeError,
248                      "copyreg._inverted_registry should be a dict, "
249                      "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
250         goto error;
251     }
252     st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
253     if (!st->extension_cache)
254         goto error;
255     if (!PyDict_CheckExact(st->extension_cache)) {
256         PyErr_Format(PyExc_RuntimeError,
257                      "copyreg._extension_cache should be a dict, "
258                      "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
259         goto error;
260     }
261     Py_CLEAR(copyreg);
262 
263     /* Load the 2.x -> 3.x stdlib module mapping tables */
264     compat_pickle = PyImport_ImportModule("_compat_pickle");
265     if (!compat_pickle)
266         goto error;
267     st->name_mapping_2to3 = \
268         PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
269     if (!st->name_mapping_2to3)
270         goto error;
271     if (!PyDict_CheckExact(st->name_mapping_2to3)) {
272         PyErr_Format(PyExc_RuntimeError,
273                      "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
274                      Py_TYPE(st->name_mapping_2to3)->tp_name);
275         goto error;
276     }
277     st->import_mapping_2to3 = \
278         PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
279     if (!st->import_mapping_2to3)
280         goto error;
281     if (!PyDict_CheckExact(st->import_mapping_2to3)) {
282         PyErr_Format(PyExc_RuntimeError,
283                      "_compat_pickle.IMPORT_MAPPING should be a dict, "
284                      "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
285         goto error;
286     }
287     /* ... and the 3.x -> 2.x mapping tables */
288     st->name_mapping_3to2 = \
289         PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
290     if (!st->name_mapping_3to2)
291         goto error;
292     if (!PyDict_CheckExact(st->name_mapping_3to2)) {
293         PyErr_Format(PyExc_RuntimeError,
294                      "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
295                      "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
296         goto error;
297     }
298     st->import_mapping_3to2 = \
299         PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
300     if (!st->import_mapping_3to2)
301         goto error;
302     if (!PyDict_CheckExact(st->import_mapping_3to2)) {
303         PyErr_Format(PyExc_RuntimeError,
304                      "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
305                      "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
306         goto error;
307     }
308     Py_CLEAR(compat_pickle);
309 
310     codecs = PyImport_ImportModule("codecs");
311     if (codecs == NULL)
312         goto error;
313     st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
314     if (st->codecs_encode == NULL) {
315         goto error;
316     }
317     if (!PyCallable_Check(st->codecs_encode)) {
318         PyErr_Format(PyExc_RuntimeError,
319                      "codecs.encode should be a callable, not %.200s",
320                      Py_TYPE(st->codecs_encode)->tp_name);
321         goto error;
322     }
323     Py_CLEAR(codecs);
324 
325     functools = PyImport_ImportModule("functools");
326     if (!functools)
327         goto error;
328     st->partial = PyObject_GetAttrString(functools, "partial");
329     if (!st->partial)
330         goto error;
331     Py_CLEAR(functools);
332 
333     return 0;
334 
335   error:
336     Py_CLEAR(copyreg);
337     Py_CLEAR(compat_pickle);
338     Py_CLEAR(codecs);
339     Py_CLEAR(functools);
340     _Pickle_ClearState(st);
341     return -1;
342 }
343 
344 /* Helper for calling a function with a single argument quickly.
345 
346    This function steals the reference of the given argument. */
347 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)348 _Pickle_FastCall(PyObject *func, PyObject *obj)
349 {
350     PyObject *result;
351 
352     result = PyObject_CallFunctionObjArgs(func, obj, NULL);
353     Py_DECREF(obj);
354     return result;
355 }
356 
357 /*************************************************************************/
358 
359 /* Retrieve and deconstruct a method for avoiding a reference cycle
360    (pickler -> bound method of pickler -> pickler) */
361 static int
init_method_ref(PyObject * self,_Py_Identifier * name,PyObject ** method_func,PyObject ** method_self)362 init_method_ref(PyObject *self, _Py_Identifier *name,
363                 PyObject **method_func, PyObject **method_self)
364 {
365     PyObject *func, *func2;
366     int ret;
367 
368     /* *method_func and *method_self should be consistent.  All refcount decrements
369        should be occurred after setting *method_self and *method_func. */
370     ret = _PyObject_LookupAttrId(self, name, &func);
371     if (func == NULL) {
372         *method_self = NULL;
373         Py_CLEAR(*method_func);
374         return ret;
375     }
376 
377     if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
378         /* Deconstruct a bound Python method */
379         func2 = PyMethod_GET_FUNCTION(func);
380         Py_INCREF(func2);
381         *method_self = self; /* borrowed */
382         Py_XSETREF(*method_func, func2);
383         Py_DECREF(func);
384         return 0;
385     }
386     else {
387         *method_self = NULL;
388         Py_XSETREF(*method_func, func);
389         return 0;
390     }
391 }
392 
393 /* Bind a method if it was deconstructed */
394 static PyObject *
reconstruct_method(PyObject * func,PyObject * self)395 reconstruct_method(PyObject *func, PyObject *self)
396 {
397     if (self) {
398         return PyMethod_New(func, self);
399     }
400     else {
401         Py_INCREF(func);
402         return func;
403     }
404 }
405 
406 static PyObject *
call_method(PyObject * func,PyObject * self,PyObject * obj)407 call_method(PyObject *func, PyObject *self, PyObject *obj)
408 {
409     if (self) {
410         return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
411     }
412     else {
413         return PyObject_CallFunctionObjArgs(func, obj, NULL);
414     }
415 }
416 
417 /*************************************************************************/
418 
419 /* Internal data type used as the unpickling stack. */
420 typedef struct {
421     PyObject_VAR_HEAD
422     PyObject **data;
423     int mark_set;          /* is MARK set? */
424     Py_ssize_t fence;      /* position of top MARK or 0 */
425     Py_ssize_t allocated;  /* number of slots in data allocated */
426 } Pdata;
427 
428 static void
Pdata_dealloc(Pdata * self)429 Pdata_dealloc(Pdata *self)
430 {
431     Py_ssize_t i = Py_SIZE(self);
432     while (--i >= 0) {
433         Py_DECREF(self->data[i]);
434     }
435     PyMem_FREE(self->data);
436     PyObject_Del(self);
437 }
438 
439 static PyTypeObject Pdata_Type = {
440     PyVarObject_HEAD_INIT(NULL, 0)
441     "_pickle.Pdata",              /*tp_name*/
442     sizeof(Pdata),                /*tp_basicsize*/
443     sizeof(PyObject *),           /*tp_itemsize*/
444     (destructor)Pdata_dealloc,    /*tp_dealloc*/
445 };
446 
447 static PyObject *
Pdata_New(void)448 Pdata_New(void)
449 {
450     Pdata *self;
451 
452     if (!(self = PyObject_New(Pdata, &Pdata_Type)))
453         return NULL;
454     Py_SIZE(self) = 0;
455     self->mark_set = 0;
456     self->fence = 0;
457     self->allocated = 8;
458     self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
459     if (self->data)
460         return (PyObject *)self;
461     Py_DECREF(self);
462     return PyErr_NoMemory();
463 }
464 
465 
466 /* Retain only the initial clearto items.  If clearto >= the current
467  * number of items, this is a (non-erroneous) NOP.
468  */
469 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)470 Pdata_clear(Pdata *self, Py_ssize_t clearto)
471 {
472     Py_ssize_t i = Py_SIZE(self);
473 
474     assert(clearto >= self->fence);
475     if (clearto >= i)
476         return 0;
477 
478     while (--i >= clearto) {
479         Py_CLEAR(self->data[i]);
480     }
481     Py_SIZE(self) = clearto;
482     return 0;
483 }
484 
485 static int
Pdata_grow(Pdata * self)486 Pdata_grow(Pdata *self)
487 {
488     PyObject **data = self->data;
489     size_t allocated = (size_t)self->allocated;
490     size_t new_allocated;
491 
492     new_allocated = (allocated >> 3) + 6;
493     /* check for integer overflow */
494     if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
495         goto nomemory;
496     new_allocated += allocated;
497     PyMem_RESIZE(data, PyObject *, new_allocated);
498     if (data == NULL)
499         goto nomemory;
500 
501     self->data = data;
502     self->allocated = (Py_ssize_t)new_allocated;
503     return 0;
504 
505   nomemory:
506     PyErr_NoMemory();
507     return -1;
508 }
509 
510 static int
Pdata_stack_underflow(Pdata * self)511 Pdata_stack_underflow(Pdata *self)
512 {
513     PickleState *st = _Pickle_GetGlobalState();
514     PyErr_SetString(st->UnpicklingError,
515                     self->mark_set ?
516                     "unexpected MARK found" :
517                     "unpickling stack underflow");
518     return -1;
519 }
520 
521 /* D is a Pdata*.  Pop the topmost element and store it into V, which
522  * must be an lvalue holding PyObject*.  On stack underflow, UnpicklingError
523  * is raised and V is set to NULL.
524  */
525 static PyObject *
Pdata_pop(Pdata * self)526 Pdata_pop(Pdata *self)
527 {
528     if (Py_SIZE(self) <= self->fence) {
529         Pdata_stack_underflow(self);
530         return NULL;
531     }
532     return self->data[--Py_SIZE(self)];
533 }
534 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
535 
536 static int
Pdata_push(Pdata * self,PyObject * obj)537 Pdata_push(Pdata *self, PyObject *obj)
538 {
539     if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
540         return -1;
541     }
542     self->data[Py_SIZE(self)++] = obj;
543     return 0;
544 }
545 
546 /* Push an object on stack, transferring its ownership to the stack. */
547 #define PDATA_PUSH(D, O, ER) do {                               \
548         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
549 
550 /* Push an object on stack, adding a new reference to the object. */
551 #define PDATA_APPEND(D, O, ER) do {                             \
552         Py_INCREF((O));                                         \
553         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
554 
555 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)556 Pdata_poptuple(Pdata *self, Py_ssize_t start)
557 {
558     PyObject *tuple;
559     Py_ssize_t len, i, j;
560 
561     if (start < self->fence) {
562         Pdata_stack_underflow(self);
563         return NULL;
564     }
565     len = Py_SIZE(self) - start;
566     tuple = PyTuple_New(len);
567     if (tuple == NULL)
568         return NULL;
569     for (i = start, j = 0; j < len; i++, j++)
570         PyTuple_SET_ITEM(tuple, j, self->data[i]);
571 
572     Py_SIZE(self) = start;
573     return tuple;
574 }
575 
576 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)577 Pdata_poplist(Pdata *self, Py_ssize_t start)
578 {
579     PyObject *list;
580     Py_ssize_t len, i, j;
581 
582     len = Py_SIZE(self) - start;
583     list = PyList_New(len);
584     if (list == NULL)
585         return NULL;
586     for (i = start, j = 0; j < len; i++, j++)
587         PyList_SET_ITEM(list, j, self->data[i]);
588 
589     Py_SIZE(self) = start;
590     return list;
591 }
592 
593 typedef struct {
594     PyObject *me_key;
595     Py_ssize_t me_value;
596 } PyMemoEntry;
597 
598 typedef struct {
599     size_t mt_mask;
600     size_t mt_used;
601     size_t mt_allocated;
602     PyMemoEntry *mt_table;
603 } PyMemoTable;
604 
605 typedef struct PicklerObject {
606     PyObject_HEAD
607     PyMemoTable *memo;          /* Memo table, keep track of the seen
608                                    objects to support self-referential objects
609                                    pickling. */
610     PyObject *pers_func;        /* persistent_id() method, can be NULL */
611     PyObject *pers_func_self;   /* borrowed reference to self if pers_func
612                                    is an unbound method, NULL otherwise */
613     PyObject *dispatch_table;   /* private dispatch_table, can be NULL */
614 
615     PyObject *write;            /* write() method of the output stream. */
616     PyObject *output_buffer;    /* Write into a local bytearray buffer before
617                                    flushing to the stream. */
618     Py_ssize_t output_len;      /* Length of output_buffer. */
619     Py_ssize_t max_output_len;  /* Allocation size of output_buffer. */
620     int proto;                  /* Pickle protocol number, >= 0 */
621     int bin;                    /* Boolean, true if proto > 0 */
622     int framing;                /* True when framing is enabled, proto >= 4 */
623     Py_ssize_t frame_start;     /* Position in output_buffer where the
624                                    current frame begins. -1 if there
625                                    is no frame currently open. */
626 
627     Py_ssize_t buf_size;        /* Size of the current buffered pickle data */
628     int fast;                   /* Enable fast mode if set to a true value.
629                                    The fast mode disable the usage of memo,
630                                    therefore speeding the pickling process by
631                                    not generating superfluous PUT opcodes. It
632                                    should not be used if with self-referential
633                                    objects. */
634     int fast_nesting;
635     int fix_imports;            /* Indicate whether Pickler should fix
636                                    the name of globals for Python 2.x. */
637     PyObject *fast_memo;
638 } PicklerObject;
639 
640 typedef struct UnpicklerObject {
641     PyObject_HEAD
642     Pdata *stack;               /* Pickle data stack, store unpickled objects. */
643 
644     /* The unpickler memo is just an array of PyObject *s. Using a dict
645        is unnecessary, since the keys are contiguous ints. */
646     PyObject **memo;
647     size_t memo_size;       /* Capacity of the memo array */
648     size_t memo_len;        /* Number of objects in the memo */
649 
650     PyObject *pers_func;        /* persistent_load() method, can be NULL. */
651     PyObject *pers_func_self;   /* borrowed reference to self if pers_func
652                                    is an unbound method, NULL otherwise */
653 
654     Py_buffer buffer;
655     char *input_buffer;
656     char *input_line;
657     Py_ssize_t input_len;
658     Py_ssize_t next_read_idx;
659     Py_ssize_t prefetched_idx;  /* index of first prefetched byte */
660 
661     PyObject *read;             /* read() method of the input stream. */
662     PyObject *readline;         /* readline() method of the input stream. */
663     PyObject *peek;             /* peek() method of the input stream, or NULL */
664 
665     char *encoding;             /* Name of the encoding to be used for
666                                    decoding strings pickled using Python
667                                    2.x. The default value is "ASCII" */
668     char *errors;               /* Name of errors handling scheme to used when
669                                    decoding strings. The default value is
670                                    "strict". */
671     Py_ssize_t *marks;          /* Mark stack, used for unpickling container
672                                    objects. */
673     Py_ssize_t num_marks;       /* Number of marks in the mark stack. */
674     Py_ssize_t marks_size;      /* Current allocated size of the mark stack. */
675     int proto;                  /* Protocol of the pickle loaded. */
676     int fix_imports;            /* Indicate whether Unpickler should fix
677                                    the name of globals pickled by Python 2.x. */
678 } UnpicklerObject;
679 
680 typedef struct {
681     PyObject_HEAD
682     PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
683 }  PicklerMemoProxyObject;
684 
685 typedef struct {
686     PyObject_HEAD
687     UnpicklerObject *unpickler;
688 } UnpicklerMemoProxyObject;
689 
690 /* Forward declarations */
691 static int save(PicklerObject *, PyObject *, int);
692 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
693 static PyTypeObject Pickler_Type;
694 static PyTypeObject Unpickler_Type;
695 
696 #include "clinic/_pickle.c.h"
697 
698 /*************************************************************************
699  A custom hashtable mapping void* to Python ints. This is used by the pickler
700  for memoization. Using a custom hashtable rather than PyDict allows us to skip
701  a bunch of unnecessary object creation. This makes a huge performance
702  difference. */
703 
704 #define MT_MINSIZE 8
705 #define PERTURB_SHIFT 5
706 
707 
708 static PyMemoTable *
PyMemoTable_New(void)709 PyMemoTable_New(void)
710 {
711     PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
712     if (memo == NULL) {
713         PyErr_NoMemory();
714         return NULL;
715     }
716 
717     memo->mt_used = 0;
718     memo->mt_allocated = MT_MINSIZE;
719     memo->mt_mask = MT_MINSIZE - 1;
720     memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
721     if (memo->mt_table == NULL) {
722         PyMem_FREE(memo);
723         PyErr_NoMemory();
724         return NULL;
725     }
726     memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
727 
728     return memo;
729 }
730 
731 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)732 PyMemoTable_Copy(PyMemoTable *self)
733 {
734     PyMemoTable *new = PyMemoTable_New();
735     if (new == NULL)
736         return NULL;
737 
738     new->mt_used = self->mt_used;
739     new->mt_allocated = self->mt_allocated;
740     new->mt_mask = self->mt_mask;
741     /* The table we get from _New() is probably smaller than we wanted.
742        Free it and allocate one that's the right size. */
743     PyMem_FREE(new->mt_table);
744     new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
745     if (new->mt_table == NULL) {
746         PyMem_FREE(new);
747         PyErr_NoMemory();
748         return NULL;
749     }
750     for (size_t i = 0; i < self->mt_allocated; i++) {
751         Py_XINCREF(self->mt_table[i].me_key);
752     }
753     memcpy(new->mt_table, self->mt_table,
754            sizeof(PyMemoEntry) * self->mt_allocated);
755 
756     return new;
757 }
758 
759 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)760 PyMemoTable_Size(PyMemoTable *self)
761 {
762     return self->mt_used;
763 }
764 
765 static int
PyMemoTable_Clear(PyMemoTable * self)766 PyMemoTable_Clear(PyMemoTable *self)
767 {
768     Py_ssize_t i = self->mt_allocated;
769 
770     while (--i >= 0) {
771         Py_XDECREF(self->mt_table[i].me_key);
772     }
773     self->mt_used = 0;
774     memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
775     return 0;
776 }
777 
778 static void
PyMemoTable_Del(PyMemoTable * self)779 PyMemoTable_Del(PyMemoTable *self)
780 {
781     if (self == NULL)
782         return;
783     PyMemoTable_Clear(self);
784 
785     PyMem_FREE(self->mt_table);
786     PyMem_FREE(self);
787 }
788 
789 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
790    can be considerably simpler than dictobject.c's lookdict(). */
791 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)792 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
793 {
794     size_t i;
795     size_t perturb;
796     size_t mask = self->mt_mask;
797     PyMemoEntry *table = self->mt_table;
798     PyMemoEntry *entry;
799     Py_hash_t hash = (Py_hash_t)key >> 3;
800 
801     i = hash & mask;
802     entry = &table[i];
803     if (entry->me_key == NULL || entry->me_key == key)
804         return entry;
805 
806     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
807         i = (i << 2) + i + perturb + 1;
808         entry = &table[i & mask];
809         if (entry->me_key == NULL || entry->me_key == key)
810             return entry;
811     }
812     Py_UNREACHABLE();
813 }
814 
815 /* Returns -1 on failure, 0 on success. */
816 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)817 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
818 {
819     PyMemoEntry *oldtable = NULL;
820     PyMemoEntry *oldentry, *newentry;
821     size_t new_size = MT_MINSIZE;
822     size_t to_process;
823 
824     assert(min_size > 0);
825 
826     if (min_size > PY_SSIZE_T_MAX) {
827         PyErr_NoMemory();
828         return -1;
829     }
830 
831     /* Find the smallest valid table size >= min_size. */
832     while (new_size < min_size) {
833         new_size <<= 1;
834     }
835     /* new_size needs to be a power of two. */
836     assert((new_size & (new_size - 1)) == 0);
837 
838     /* Allocate new table. */
839     oldtable = self->mt_table;
840     self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
841     if (self->mt_table == NULL) {
842         self->mt_table = oldtable;
843         PyErr_NoMemory();
844         return -1;
845     }
846     self->mt_allocated = new_size;
847     self->mt_mask = new_size - 1;
848     memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
849 
850     /* Copy entries from the old table. */
851     to_process = self->mt_used;
852     for (oldentry = oldtable; to_process > 0; oldentry++) {
853         if (oldentry->me_key != NULL) {
854             to_process--;
855             /* newentry is a pointer to a chunk of the new
856                mt_table, so we're setting the key:value pair
857                in-place. */
858             newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
859             newentry->me_key = oldentry->me_key;
860             newentry->me_value = oldentry->me_value;
861         }
862     }
863 
864     /* Deallocate the old table. */
865     PyMem_FREE(oldtable);
866     return 0;
867 }
868 
869 /* Returns NULL on failure, a pointer to the value otherwise. */
870 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)871 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
872 {
873     PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
874     if (entry->me_key == NULL)
875         return NULL;
876     return &entry->me_value;
877 }
878 
879 /* Returns -1 on failure, 0 on success. */
880 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)881 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
882 {
883     PyMemoEntry *entry;
884 
885     assert(key != NULL);
886 
887     entry = _PyMemoTable_Lookup(self, key);
888     if (entry->me_key != NULL) {
889         entry->me_value = value;
890         return 0;
891     }
892     Py_INCREF(key);
893     entry->me_key = key;
894     entry->me_value = value;
895     self->mt_used++;
896 
897     /* If we added a key, we can safely resize. Otherwise just return!
898      * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
899      *
900      * Quadrupling the size improves average table sparseness
901      * (reducing collisions) at the cost of some memory. It also halves
902      * the number of expensive resize operations in a growing memo table.
903      *
904      * Very large memo tables (over 50K items) use doubling instead.
905      * This may help applications with severe memory constraints.
906      */
907     if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
908         return 0;
909     }
910     // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
911     size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
912     return _PyMemoTable_ResizeTable(self, desired_size);
913 }
914 
915 #undef MT_MINSIZE
916 #undef PERTURB_SHIFT
917 
918 /*************************************************************************/
919 
920 
921 static int
_Pickler_ClearBuffer(PicklerObject * self)922 _Pickler_ClearBuffer(PicklerObject *self)
923 {
924     Py_XSETREF(self->output_buffer,
925               PyBytes_FromStringAndSize(NULL, self->max_output_len));
926     if (self->output_buffer == NULL)
927         return -1;
928     self->output_len = 0;
929     self->frame_start = -1;
930     return 0;
931 }
932 
933 static void
_write_size64(char * out,size_t value)934 _write_size64(char *out, size_t value)
935 {
936     size_t i;
937 
938     Py_BUILD_ASSERT(sizeof(size_t) <= 8);
939 
940     for (i = 0; i < sizeof(size_t); i++) {
941         out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
942     }
943     for (i = sizeof(size_t); i < 8; i++) {
944         out[i] = 0;
945     }
946 }
947 
948 static int
_Pickler_CommitFrame(PicklerObject * self)949 _Pickler_CommitFrame(PicklerObject *self)
950 {
951     size_t frame_len;
952     char *qdata;
953 
954     if (!self->framing || self->frame_start == -1)
955         return 0;
956     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
957     qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
958     if (frame_len >= FRAME_SIZE_MIN) {
959         qdata[0] = FRAME;
960         _write_size64(qdata + 1, frame_len);
961     }
962     else {
963         memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
964         self->output_len -= FRAME_HEADER_SIZE;
965     }
966     self->frame_start = -1;
967     return 0;
968 }
969 
970 static PyObject *
_Pickler_GetString(PicklerObject * self)971 _Pickler_GetString(PicklerObject *self)
972 {
973     PyObject *output_buffer = self->output_buffer;
974 
975     assert(self->output_buffer != NULL);
976 
977     if (_Pickler_CommitFrame(self))
978         return NULL;
979 
980     self->output_buffer = NULL;
981     /* Resize down to exact size */
982     if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
983         return NULL;
984     return output_buffer;
985 }
986 
987 static int
_Pickler_FlushToFile(PicklerObject * self)988 _Pickler_FlushToFile(PicklerObject *self)
989 {
990     PyObject *output, *result;
991 
992     assert(self->write != NULL);
993 
994     /* This will commit the frame first */
995     output = _Pickler_GetString(self);
996     if (output == NULL)
997         return -1;
998 
999     result = _Pickle_FastCall(self->write, output);
1000     Py_XDECREF(result);
1001     return (result == NULL) ? -1 : 0;
1002 }
1003 
1004 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1005 _Pickler_OpcodeBoundary(PicklerObject *self)
1006 {
1007     Py_ssize_t frame_len;
1008 
1009     if (!self->framing || self->frame_start == -1) {
1010         return 0;
1011     }
1012     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1013     if (frame_len >= FRAME_SIZE_TARGET) {
1014         if(_Pickler_CommitFrame(self)) {
1015             return -1;
1016         }
1017         /* Flush the content of the committed frame to the underlying
1018          * file and reuse the pickler buffer for the next frame so as
1019          * to limit memory usage when dumping large complex objects to
1020          * a file.
1021          *
1022          * self->write is NULL when called via dumps.
1023          */
1024         if (self->write != NULL) {
1025             if (_Pickler_FlushToFile(self) < 0) {
1026                 return -1;
1027             }
1028             if (_Pickler_ClearBuffer(self) < 0) {
1029                 return -1;
1030             }
1031         }
1032     }
1033     return 0;
1034 }
1035 
1036 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1037 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1038 {
1039     Py_ssize_t i, n, required;
1040     char *buffer;
1041     int need_new_frame;
1042 
1043     assert(s != NULL);
1044     need_new_frame = (self->framing && self->frame_start == -1);
1045 
1046     if (need_new_frame)
1047         n = data_len + FRAME_HEADER_SIZE;
1048     else
1049         n = data_len;
1050 
1051     required = self->output_len + n;
1052     if (required > self->max_output_len) {
1053         /* Make place in buffer for the pickle chunk */
1054         if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1055             PyErr_NoMemory();
1056             return -1;
1057         }
1058         self->max_output_len = (self->output_len + n) / 2 * 3;
1059         if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1060             return -1;
1061     }
1062     buffer = PyBytes_AS_STRING(self->output_buffer);
1063     if (need_new_frame) {
1064         /* Setup new frame */
1065         Py_ssize_t frame_start = self->output_len;
1066         self->frame_start = frame_start;
1067         for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1068             /* Write an invalid value, for debugging */
1069             buffer[frame_start + i] = 0xFE;
1070         }
1071         self->output_len += FRAME_HEADER_SIZE;
1072     }
1073     if (data_len < 8) {
1074         /* This is faster than memcpy when the string is short. */
1075         for (i = 0; i < data_len; i++) {
1076             buffer[self->output_len + i] = s[i];
1077         }
1078     }
1079     else {
1080         memcpy(buffer + self->output_len, s, data_len);
1081     }
1082     self->output_len += data_len;
1083     return data_len;
1084 }
1085 
1086 static PicklerObject *
_Pickler_New(void)1087 _Pickler_New(void)
1088 {
1089     PicklerObject *self;
1090 
1091     self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1092     if (self == NULL)
1093         return NULL;
1094 
1095     self->pers_func = NULL;
1096     self->dispatch_table = NULL;
1097     self->write = NULL;
1098     self->proto = 0;
1099     self->bin = 0;
1100     self->framing = 0;
1101     self->frame_start = -1;
1102     self->fast = 0;
1103     self->fast_nesting = 0;
1104     self->fix_imports = 0;
1105     self->fast_memo = NULL;
1106     self->max_output_len = WRITE_BUF_SIZE;
1107     self->output_len = 0;
1108 
1109     self->memo = PyMemoTable_New();
1110     self->output_buffer = PyBytes_FromStringAndSize(NULL,
1111                                                     self->max_output_len);
1112 
1113     if (self->memo == NULL || self->output_buffer == NULL) {
1114         Py_DECREF(self);
1115         return NULL;
1116     }
1117     return self;
1118 }
1119 
1120 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1121 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1122 {
1123     long proto;
1124 
1125     if (protocol == NULL || protocol == Py_None) {
1126         proto = DEFAULT_PROTOCOL;
1127     }
1128     else {
1129         proto = PyLong_AsLong(protocol);
1130         if (proto < 0) {
1131             if (proto == -1 && PyErr_Occurred())
1132                 return -1;
1133             proto = HIGHEST_PROTOCOL;
1134         }
1135         else if (proto > HIGHEST_PROTOCOL) {
1136             PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1137                          HIGHEST_PROTOCOL);
1138             return -1;
1139         }
1140     }
1141     self->proto = (int)proto;
1142     self->bin = proto > 0;
1143     self->fix_imports = fix_imports && proto < 3;
1144     return 0;
1145 }
1146 
1147 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1148    be called once on a freshly created Pickler. */
1149 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1150 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1151 {
1152     _Py_IDENTIFIER(write);
1153     assert(file != NULL);
1154     if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1155         return -1;
1156     }
1157     if (self->write == NULL) {
1158         PyErr_SetString(PyExc_TypeError,
1159                         "file must have a 'write' attribute");
1160         return -1;
1161     }
1162 
1163     return 0;
1164 }
1165 
1166 /* Returns the size of the input on success, -1 on failure. This takes its
1167    own reference to `input`. */
1168 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1169 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1170 {
1171     if (self->buffer.buf != NULL)
1172         PyBuffer_Release(&self->buffer);
1173     if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1174         return -1;
1175     self->input_buffer = self->buffer.buf;
1176     self->input_len = self->buffer.len;
1177     self->next_read_idx = 0;
1178     self->prefetched_idx = self->input_len;
1179     return self->input_len;
1180 }
1181 
1182 static int
bad_readline(void)1183 bad_readline(void)
1184 {
1185     PickleState *st = _Pickle_GetGlobalState();
1186     PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1187     return -1;
1188 }
1189 
1190 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1191 _Unpickler_SkipConsumed(UnpicklerObject *self)
1192 {
1193     Py_ssize_t consumed;
1194     PyObject *r;
1195 
1196     consumed = self->next_read_idx - self->prefetched_idx;
1197     if (consumed <= 0)
1198         return 0;
1199 
1200     assert(self->peek);  /* otherwise we did something wrong */
1201     /* This makes a useless copy... */
1202     r = PyObject_CallFunction(self->read, "n", consumed);
1203     if (r == NULL)
1204         return -1;
1205     Py_DECREF(r);
1206 
1207     self->prefetched_idx = self->next_read_idx;
1208     return 0;
1209 }
1210 
1211 static const Py_ssize_t READ_WHOLE_LINE = -1;
1212 
1213 /* If reading from a file, we need to only pull the bytes we need, since there
1214    may be multiple pickle objects arranged contiguously in the same input
1215    buffer.
1216 
1217    If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1218    bytes from the input stream/buffer.
1219 
1220    Update the unpickler's input buffer with the newly-read data. Returns -1 on
1221    failure; on success, returns the number of bytes read from the file.
1222 
1223    On success, self->input_len will be 0; this is intentional so that when
1224    unpickling from a file, the "we've run out of data" code paths will trigger,
1225    causing the Unpickler to go back to the file for more data. Use the returned
1226    size to tell you how much data you can process. */
1227 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1228 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1229 {
1230     PyObject *data;
1231     Py_ssize_t read_size;
1232 
1233     assert(self->read != NULL);
1234 
1235     if (_Unpickler_SkipConsumed(self) < 0)
1236         return -1;
1237 
1238     if (n == READ_WHOLE_LINE) {
1239         data = _PyObject_CallNoArg(self->readline);
1240     }
1241     else {
1242         PyObject *len;
1243         /* Prefetch some data without advancing the file pointer, if possible */
1244         if (self->peek && n < PREFETCH) {
1245             len = PyLong_FromSsize_t(PREFETCH);
1246             if (len == NULL)
1247                 return -1;
1248             data = _Pickle_FastCall(self->peek, len);
1249             if (data == NULL) {
1250                 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1251                     return -1;
1252                 /* peek() is probably not supported by the given file object */
1253                 PyErr_Clear();
1254                 Py_CLEAR(self->peek);
1255             }
1256             else {
1257                 read_size = _Unpickler_SetStringInput(self, data);
1258                 Py_DECREF(data);
1259                 self->prefetched_idx = 0;
1260                 if (n <= read_size)
1261                     return n;
1262             }
1263         }
1264         len = PyLong_FromSsize_t(n);
1265         if (len == NULL)
1266             return -1;
1267         data = _Pickle_FastCall(self->read, len);
1268     }
1269     if (data == NULL)
1270         return -1;
1271 
1272     read_size = _Unpickler_SetStringInput(self, data);
1273     Py_DECREF(data);
1274     return read_size;
1275 }
1276 
1277 /* Don't call it directly: use _Unpickler_Read() */
1278 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1279 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1280 {
1281     Py_ssize_t num_read;
1282 
1283     *s = NULL;
1284     if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1285         PickleState *st = _Pickle_GetGlobalState();
1286         PyErr_SetString(st->UnpicklingError,
1287                         "read would overflow (invalid bytecode)");
1288         return -1;
1289     }
1290 
1291     /* This case is handled by the _Unpickler_Read() macro for efficiency */
1292     assert(self->next_read_idx + n > self->input_len);
1293 
1294     if (!self->read)
1295         return bad_readline();
1296 
1297     num_read = _Unpickler_ReadFromFile(self, n);
1298     if (num_read < 0)
1299         return -1;
1300     if (num_read < n)
1301         return bad_readline();
1302     *s = self->input_buffer;
1303     self->next_read_idx = n;
1304     return n;
1305 }
1306 
1307 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1308 
1309    This should be used for all data reads, rather than accessing the unpickler's
1310    input buffer directly. This method deals correctly with reading from input
1311    streams, which the input buffer doesn't deal with.
1312 
1313    Note that when reading from a file-like object, self->next_read_idx won't
1314    be updated (it should remain at 0 for the entire unpickling process). You
1315    should use this function's return value to know how many bytes you can
1316    consume.
1317 
1318    Returns -1 (with an exception set) on failure. On success, return the
1319    number of chars read. */
1320 #define _Unpickler_Read(self, s, n) \
1321     (((n) <= (self)->input_len - (self)->next_read_idx)      \
1322      ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1323         (self)->next_read_idx += (n),                        \
1324         (n))                                                 \
1325      : _Unpickler_ReadImpl(self, (s), (n)))
1326 
1327 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1328 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1329                     char **result)
1330 {
1331     char *input_line = PyMem_Realloc(self->input_line, len + 1);
1332     if (input_line == NULL) {
1333         PyErr_NoMemory();
1334         return -1;
1335     }
1336 
1337     memcpy(input_line, line, len);
1338     input_line[len] = '\0';
1339     self->input_line = input_line;
1340     *result = self->input_line;
1341     return len;
1342 }
1343 
1344 /* Read a line from the input stream/buffer. If we run off the end of the input
1345    before hitting \n, raise an error.
1346 
1347    Returns the number of chars read, or -1 on failure. */
1348 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1349 _Unpickler_Readline(UnpicklerObject *self, char **result)
1350 {
1351     Py_ssize_t i, num_read;
1352 
1353     for (i = self->next_read_idx; i < self->input_len; i++) {
1354         if (self->input_buffer[i] == '\n') {
1355             char *line_start = self->input_buffer + self->next_read_idx;
1356             num_read = i - self->next_read_idx + 1;
1357             self->next_read_idx = i + 1;
1358             return _Unpickler_CopyLine(self, line_start, num_read, result);
1359         }
1360     }
1361     if (!self->read)
1362         return bad_readline();
1363 
1364     num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1365     if (num_read < 0)
1366         return -1;
1367     if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1368         return bad_readline();
1369     self->next_read_idx = num_read;
1370     return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1371 }
1372 
1373 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1374    will be modified in place. */
1375 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1376 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1377 {
1378     size_t i;
1379 
1380     assert(new_size > self->memo_size);
1381 
1382     PyObject **memo_new = self->memo;
1383     PyMem_RESIZE(memo_new, PyObject *, new_size);
1384     if (memo_new == NULL) {
1385         PyErr_NoMemory();
1386         return -1;
1387     }
1388     self->memo = memo_new;
1389     for (i = self->memo_size; i < new_size; i++)
1390         self->memo[i] = NULL;
1391     self->memo_size = new_size;
1392     return 0;
1393 }
1394 
1395 /* Returns NULL if idx is out of bounds. */
1396 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1397 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1398 {
1399     if (idx >= self->memo_size)
1400         return NULL;
1401 
1402     return self->memo[idx];
1403 }
1404 
1405 /* Returns -1 (with an exception set) on failure, 0 on success.
1406    This takes its own reference to `value`. */
1407 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1408 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1409 {
1410     PyObject *old_item;
1411 
1412     if (idx >= self->memo_size) {
1413         if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1414             return -1;
1415         assert(idx < self->memo_size);
1416     }
1417     Py_INCREF(value);
1418     old_item = self->memo[idx];
1419     self->memo[idx] = value;
1420     if (old_item != NULL) {
1421         Py_DECREF(old_item);
1422     }
1423     else {
1424         self->memo_len++;
1425     }
1426     return 0;
1427 }
1428 
1429 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1430 _Unpickler_NewMemo(Py_ssize_t new_size)
1431 {
1432     PyObject **memo = PyMem_NEW(PyObject *, new_size);
1433     if (memo == NULL) {
1434         PyErr_NoMemory();
1435         return NULL;
1436     }
1437     memset(memo, 0, new_size * sizeof(PyObject *));
1438     return memo;
1439 }
1440 
1441 /* Free the unpickler's memo, taking care to decref any items left in it. */
1442 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1443 _Unpickler_MemoCleanup(UnpicklerObject *self)
1444 {
1445     Py_ssize_t i;
1446     PyObject **memo = self->memo;
1447 
1448     if (self->memo == NULL)
1449         return;
1450     self->memo = NULL;
1451     i = self->memo_size;
1452     while (--i >= 0) {
1453         Py_XDECREF(memo[i]);
1454     }
1455     PyMem_FREE(memo);
1456 }
1457 
1458 static UnpicklerObject *
_Unpickler_New(void)1459 _Unpickler_New(void)
1460 {
1461     UnpicklerObject *self;
1462 
1463     self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1464     if (self == NULL)
1465         return NULL;
1466 
1467     self->pers_func = NULL;
1468     self->input_buffer = NULL;
1469     self->input_line = NULL;
1470     self->input_len = 0;
1471     self->next_read_idx = 0;
1472     self->prefetched_idx = 0;
1473     self->read = NULL;
1474     self->readline = NULL;
1475     self->peek = NULL;
1476     self->encoding = NULL;
1477     self->errors = NULL;
1478     self->marks = NULL;
1479     self->num_marks = 0;
1480     self->marks_size = 0;
1481     self->proto = 0;
1482     self->fix_imports = 0;
1483     memset(&self->buffer, 0, sizeof(Py_buffer));
1484     self->memo_size = 32;
1485     self->memo_len = 0;
1486     self->memo = _Unpickler_NewMemo(self->memo_size);
1487     self->stack = (Pdata *)Pdata_New();
1488 
1489     if (self->memo == NULL || self->stack == NULL) {
1490         Py_DECREF(self);
1491         return NULL;
1492     }
1493 
1494     return self;
1495 }
1496 
1497 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1498    be called once on a freshly created Pickler. */
1499 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1500 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1501 {
1502     _Py_IDENTIFIER(peek);
1503     _Py_IDENTIFIER(read);
1504     _Py_IDENTIFIER(readline);
1505 
1506     if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1507         return -1;
1508     }
1509     (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1510     (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
1511     if (self->readline == NULL || self->read == NULL) {
1512         if (!PyErr_Occurred()) {
1513             PyErr_SetString(PyExc_TypeError,
1514                             "file must have 'read' and 'readline' attributes");
1515         }
1516         Py_CLEAR(self->read);
1517         Py_CLEAR(self->readline);
1518         Py_CLEAR(self->peek);
1519         return -1;
1520     }
1521     return 0;
1522 }
1523 
1524 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1525    be called once on a freshly created Pickler. */
1526 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1527 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1528                             const char *encoding,
1529                             const char *errors)
1530 {
1531     if (encoding == NULL)
1532         encoding = "ASCII";
1533     if (errors == NULL)
1534         errors = "strict";
1535 
1536     self->encoding = _PyMem_Strdup(encoding);
1537     self->errors = _PyMem_Strdup(errors);
1538     if (self->encoding == NULL || self->errors == NULL) {
1539         PyErr_NoMemory();
1540         return -1;
1541     }
1542     return 0;
1543 }
1544 
1545 /* Generate a GET opcode for an object stored in the memo. */
1546 static int
memo_get(PicklerObject * self,PyObject * key)1547 memo_get(PicklerObject *self, PyObject *key)
1548 {
1549     Py_ssize_t *value;
1550     char pdata[30];
1551     Py_ssize_t len;
1552 
1553     value = PyMemoTable_Get(self->memo, key);
1554     if (value == NULL)  {
1555         PyErr_SetObject(PyExc_KeyError, key);
1556         return -1;
1557     }
1558 
1559     if (!self->bin) {
1560         pdata[0] = GET;
1561         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1562                       "%" PY_FORMAT_SIZE_T "d\n", *value);
1563         len = strlen(pdata);
1564     }
1565     else {
1566         if (*value < 256) {
1567             pdata[0] = BINGET;
1568             pdata[1] = (unsigned char)(*value & 0xff);
1569             len = 2;
1570         }
1571         else if ((size_t)*value <= 0xffffffffUL) {
1572             pdata[0] = LONG_BINGET;
1573             pdata[1] = (unsigned char)(*value & 0xff);
1574             pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1575             pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1576             pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1577             len = 5;
1578         }
1579         else { /* unlikely */
1580             PickleState *st = _Pickle_GetGlobalState();
1581             PyErr_SetString(st->PicklingError,
1582                             "memo id too large for LONG_BINGET");
1583             return -1;
1584         }
1585     }
1586 
1587     if (_Pickler_Write(self, pdata, len) < 0)
1588         return -1;
1589 
1590     return 0;
1591 }
1592 
1593 /* Store an object in the memo, assign it a new unique ID based on the number
1594    of objects currently stored in the memo and generate a PUT opcode. */
1595 static int
memo_put(PicklerObject * self,PyObject * obj)1596 memo_put(PicklerObject *self, PyObject *obj)
1597 {
1598     char pdata[30];
1599     Py_ssize_t len;
1600     Py_ssize_t idx;
1601 
1602     const char memoize_op = MEMOIZE;
1603 
1604     if (self->fast)
1605         return 0;
1606 
1607     idx = PyMemoTable_Size(self->memo);
1608     if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1609         return -1;
1610 
1611     if (self->proto >= 4) {
1612         if (_Pickler_Write(self, &memoize_op, 1) < 0)
1613             return -1;
1614         return 0;
1615     }
1616     else if (!self->bin) {
1617         pdata[0] = PUT;
1618         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1619                       "%" PY_FORMAT_SIZE_T "d\n", idx);
1620         len = strlen(pdata);
1621     }
1622     else {
1623         if (idx < 256) {
1624             pdata[0] = BINPUT;
1625             pdata[1] = (unsigned char)idx;
1626             len = 2;
1627         }
1628         else if ((size_t)idx <= 0xffffffffUL) {
1629             pdata[0] = LONG_BINPUT;
1630             pdata[1] = (unsigned char)(idx & 0xff);
1631             pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1632             pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1633             pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1634             len = 5;
1635         }
1636         else { /* unlikely */
1637             PickleState *st = _Pickle_GetGlobalState();
1638             PyErr_SetString(st->PicklingError,
1639                             "memo id too large for LONG_BINPUT");
1640             return -1;
1641         }
1642     }
1643     if (_Pickler_Write(self, pdata, len) < 0)
1644         return -1;
1645 
1646     return 0;
1647 }
1648 
1649 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1650 get_dotted_path(PyObject *obj, PyObject *name)
1651 {
1652     _Py_static_string(PyId_dot, ".");
1653     PyObject *dotted_path;
1654     Py_ssize_t i, n;
1655 
1656     dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1657     if (dotted_path == NULL)
1658         return NULL;
1659     n = PyList_GET_SIZE(dotted_path);
1660     assert(n >= 1);
1661     for (i = 0; i < n; i++) {
1662         PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1663         if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1664             if (obj == NULL)
1665                 PyErr_Format(PyExc_AttributeError,
1666                              "Can't pickle local object %R", name);
1667             else
1668                 PyErr_Format(PyExc_AttributeError,
1669                              "Can't pickle local attribute %R on %R", name, obj);
1670             Py_DECREF(dotted_path);
1671             return NULL;
1672         }
1673     }
1674     return dotted_path;
1675 }
1676 
1677 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1678 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1679 {
1680     Py_ssize_t i, n;
1681     PyObject *parent = NULL;
1682 
1683     assert(PyList_CheckExact(names));
1684     Py_INCREF(obj);
1685     n = PyList_GET_SIZE(names);
1686     for (i = 0; i < n; i++) {
1687         PyObject *name = PyList_GET_ITEM(names, i);
1688         Py_XDECREF(parent);
1689         parent = obj;
1690         (void)_PyObject_LookupAttr(parent, name, &obj);
1691         if (obj == NULL) {
1692             Py_DECREF(parent);
1693             return NULL;
1694         }
1695     }
1696     if (pparent != NULL)
1697         *pparent = parent;
1698     else
1699         Py_XDECREF(parent);
1700     return obj;
1701 }
1702 
1703 
1704 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1705 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1706 {
1707     PyObject *dotted_path, *attr;
1708 
1709     if (allow_qualname) {
1710         dotted_path = get_dotted_path(obj, name);
1711         if (dotted_path == NULL)
1712             return NULL;
1713         attr = get_deep_attribute(obj, dotted_path, NULL);
1714         Py_DECREF(dotted_path);
1715     }
1716     else {
1717         (void)_PyObject_LookupAttr(obj, name, &attr);
1718     }
1719     if (attr == NULL && !PyErr_Occurred()) {
1720         PyErr_Format(PyExc_AttributeError,
1721                      "Can't get attribute %R on %R", name, obj);
1722     }
1723     return attr;
1724 }
1725 
1726 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1727 _checkmodule(PyObject *module_name, PyObject *module,
1728              PyObject *global, PyObject *dotted_path)
1729 {
1730     if (module == Py_None) {
1731         return -1;
1732     }
1733     if (PyUnicode_Check(module_name) &&
1734             _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1735         return -1;
1736     }
1737 
1738     PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1739     if (candidate == NULL) {
1740         return -1;
1741     }
1742     if (candidate != global) {
1743         Py_DECREF(candidate);
1744         return -1;
1745     }
1746     Py_DECREF(candidate);
1747     return 0;
1748 }
1749 
1750 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1751 whichmodule(PyObject *global, PyObject *dotted_path)
1752 {
1753     PyObject *module_name;
1754     PyObject *module = NULL;
1755     Py_ssize_t i;
1756     PyObject *modules;
1757     _Py_IDENTIFIER(__module__);
1758     _Py_IDENTIFIER(modules);
1759     _Py_IDENTIFIER(__main__);
1760 
1761     if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1762         return NULL;
1763     }
1764     if (module_name) {
1765         /* In some rare cases (e.g., bound methods of extension types),
1766            __module__ can be None. If it is so, then search sys.modules for
1767            the module of global. */
1768         if (module_name != Py_None)
1769             return module_name;
1770         Py_CLEAR(module_name);
1771     }
1772     assert(module_name == NULL);
1773 
1774     /* Fallback on walking sys.modules */
1775     modules = _PySys_GetObjectId(&PyId_modules);
1776     if (modules == NULL) {
1777         PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1778         return NULL;
1779     }
1780     if (PyDict_CheckExact(modules)) {
1781         i = 0;
1782         while (PyDict_Next(modules, &i, &module_name, &module)) {
1783             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1784                 Py_INCREF(module_name);
1785                 return module_name;
1786             }
1787             if (PyErr_Occurred()) {
1788                 return NULL;
1789             }
1790         }
1791     }
1792     else {
1793         PyObject *iterator = PyObject_GetIter(modules);
1794         if (iterator == NULL) {
1795             return NULL;
1796         }
1797         while ((module_name = PyIter_Next(iterator))) {
1798             module = PyObject_GetItem(modules, module_name);
1799             if (module == NULL) {
1800                 Py_DECREF(module_name);
1801                 Py_DECREF(iterator);
1802                 return NULL;
1803             }
1804             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1805                 Py_DECREF(module);
1806                 Py_DECREF(iterator);
1807                 return module_name;
1808             }
1809             Py_DECREF(module);
1810             Py_DECREF(module_name);
1811             if (PyErr_Occurred()) {
1812                 Py_DECREF(iterator);
1813                 return NULL;
1814             }
1815         }
1816         Py_DECREF(iterator);
1817     }
1818 
1819     /* If no module is found, use __main__. */
1820     module_name = _PyUnicode_FromId(&PyId___main__);
1821     Py_XINCREF(module_name);
1822     return module_name;
1823 }
1824 
1825 /* fast_save_enter() and fast_save_leave() are guards against recursive
1826    objects when Pickler is used with the "fast mode" (i.e., with object
1827    memoization disabled). If the nesting of a list or dict object exceed
1828    FAST_NESTING_LIMIT, these guards will start keeping an internal
1829    reference to the seen list or dict objects and check whether these objects
1830    are recursive. These are not strictly necessary, since save() has a
1831    hard-coded recursion limit, but they give a nicer error message than the
1832    typical RuntimeError. */
1833 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1834 fast_save_enter(PicklerObject *self, PyObject *obj)
1835 {
1836     /* if fast_nesting < 0, we're doing an error exit. */
1837     if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1838         PyObject *key = NULL;
1839         if (self->fast_memo == NULL) {
1840             self->fast_memo = PyDict_New();
1841             if (self->fast_memo == NULL) {
1842                 self->fast_nesting = -1;
1843                 return 0;
1844             }
1845         }
1846         key = PyLong_FromVoidPtr(obj);
1847         if (key == NULL) {
1848             self->fast_nesting = -1;
1849             return 0;
1850         }
1851         if (PyDict_GetItemWithError(self->fast_memo, key)) {
1852             Py_DECREF(key);
1853             PyErr_Format(PyExc_ValueError,
1854                          "fast mode: can't pickle cyclic objects "
1855                          "including object type %.200s at %p",
1856                          obj->ob_type->tp_name, obj);
1857             self->fast_nesting = -1;
1858             return 0;
1859         }
1860         if (PyErr_Occurred()) {
1861             Py_DECREF(key);
1862             self->fast_nesting = -1;
1863             return 0;
1864         }
1865         if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1866             Py_DECREF(key);
1867             self->fast_nesting = -1;
1868             return 0;
1869         }
1870         Py_DECREF(key);
1871     }
1872     return 1;
1873 }
1874 
1875 static int
fast_save_leave(PicklerObject * self,PyObject * obj)1876 fast_save_leave(PicklerObject *self, PyObject *obj)
1877 {
1878     if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1879         PyObject *key = PyLong_FromVoidPtr(obj);
1880         if (key == NULL)
1881             return 0;
1882         if (PyDict_DelItem(self->fast_memo, key) < 0) {
1883             Py_DECREF(key);
1884             return 0;
1885         }
1886         Py_DECREF(key);
1887     }
1888     return 1;
1889 }
1890 
1891 static int
save_none(PicklerObject * self,PyObject * obj)1892 save_none(PicklerObject *self, PyObject *obj)
1893 {
1894     const char none_op = NONE;
1895     if (_Pickler_Write(self, &none_op, 1) < 0)
1896         return -1;
1897 
1898     return 0;
1899 }
1900 
1901 static int
save_bool(PicklerObject * self,PyObject * obj)1902 save_bool(PicklerObject *self, PyObject *obj)
1903 {
1904     if (self->proto >= 2) {
1905         const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
1906         if (_Pickler_Write(self, &bool_op, 1) < 0)
1907             return -1;
1908     }
1909     else {
1910         /* These aren't opcodes -- they're ways to pickle bools before protocol 2
1911          * so that unpicklers written before bools were introduced unpickle them
1912          * as ints, but unpicklers after can recognize that bools were intended.
1913          * Note that protocol 2 added direct ways to pickle bools.
1914          */
1915         const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
1916         if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
1917             return -1;
1918     }
1919     return 0;
1920 }
1921 
1922 static int
save_long(PicklerObject * self,PyObject * obj)1923 save_long(PicklerObject *self, PyObject *obj)
1924 {
1925     PyObject *repr = NULL;
1926     Py_ssize_t size;
1927     long val;
1928     int overflow;
1929     int status = 0;
1930 
1931     val= PyLong_AsLongAndOverflow(obj, &overflow);
1932     if (!overflow && (sizeof(long) <= 4 ||
1933             (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
1934     {
1935         /* result fits in a signed 4-byte integer.
1936 
1937            Note: we can't use -0x80000000L in the above condition because some
1938            compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
1939            before applying the unary minus when sizeof(long) <= 4. The
1940            resulting value stays unsigned which is commonly not what we want,
1941            so MSVC happily warns us about it.  However, that result would have
1942            been fine because we guard for sizeof(long) <= 4 which turns the
1943            condition true in that particular case. */
1944         char pdata[32];
1945         Py_ssize_t len = 0;
1946 
1947         if (self->bin) {
1948             pdata[1] = (unsigned char)(val & 0xff);
1949             pdata[2] = (unsigned char)((val >> 8) & 0xff);
1950             pdata[3] = (unsigned char)((val >> 16) & 0xff);
1951             pdata[4] = (unsigned char)((val >> 24) & 0xff);
1952 
1953             if ((pdata[4] != 0) || (pdata[3] != 0)) {
1954                 pdata[0] = BININT;
1955                 len = 5;
1956             }
1957             else if (pdata[2] != 0) {
1958                 pdata[0] = BININT2;
1959                 len = 3;
1960             }
1961             else {
1962                 pdata[0] = BININT1;
1963                 len = 2;
1964             }
1965         }
1966         else {
1967             sprintf(pdata, "%c%ld\n", INT,  val);
1968             len = strlen(pdata);
1969         }
1970         if (_Pickler_Write(self, pdata, len) < 0)
1971             return -1;
1972 
1973         return 0;
1974     }
1975     assert(!PyErr_Occurred());
1976 
1977     if (self->proto >= 2) {
1978         /* Linear-time pickling. */
1979         size_t nbits;
1980         size_t nbytes;
1981         unsigned char *pdata;
1982         char header[5];
1983         int i;
1984         int sign = _PyLong_Sign(obj);
1985 
1986         if (sign == 0) {
1987             header[0] = LONG1;
1988             header[1] = 0;      /* It's 0 -- an empty bytestring. */
1989             if (_Pickler_Write(self, header, 2) < 0)
1990                 goto error;
1991             return 0;
1992         }
1993         nbits = _PyLong_NumBits(obj);
1994         if (nbits == (size_t)-1 && PyErr_Occurred())
1995             goto error;
1996         /* How many bytes do we need?  There are nbits >> 3 full
1997          * bytes of data, and nbits & 7 leftover bits.  If there
1998          * are any leftover bits, then we clearly need another
1999          * byte.  Wnat's not so obvious is that we *probably*
2000          * need another byte even if there aren't any leftovers:
2001          * the most-significant bit of the most-significant byte
2002          * acts like a sign bit, and it's usually got a sense
2003          * opposite of the one we need.  The exception is ints
2004          * of the form -(2**(8*j-1)) for j > 0.  Such an int is
2005          * its own 256's-complement, so has the right sign bit
2006          * even without the extra byte.  That's a pain to check
2007          * for in advance, though, so we always grab an extra
2008          * byte at the start, and cut it back later if possible.
2009          */
2010         nbytes = (nbits >> 3) + 1;
2011         if (nbytes > 0x7fffffffL) {
2012             PyErr_SetString(PyExc_OverflowError,
2013                             "int too large to pickle");
2014             goto error;
2015         }
2016         repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2017         if (repr == NULL)
2018             goto error;
2019         pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2020         i = _PyLong_AsByteArray((PyLongObject *)obj,
2021                                 pdata, nbytes,
2022                                 1 /* little endian */ , 1 /* signed */ );
2023         if (i < 0)
2024             goto error;
2025         /* If the int is negative, this may be a byte more than
2026          * needed.  This is so iff the MSB is all redundant sign
2027          * bits.
2028          */
2029         if (sign < 0 &&
2030             nbytes > 1 &&
2031             pdata[nbytes - 1] == 0xff &&
2032             (pdata[nbytes - 2] & 0x80) != 0) {
2033             nbytes--;
2034         }
2035 
2036         if (nbytes < 256) {
2037             header[0] = LONG1;
2038             header[1] = (unsigned char)nbytes;
2039             size = 2;
2040         }
2041         else {
2042             header[0] = LONG4;
2043             size = (Py_ssize_t) nbytes;
2044             for (i = 1; i < 5; i++) {
2045                 header[i] = (unsigned char)(size & 0xff);
2046                 size >>= 8;
2047             }
2048             size = 5;
2049         }
2050         if (_Pickler_Write(self, header, size) < 0 ||
2051             _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2052             goto error;
2053     }
2054     else {
2055         const char long_op = LONG;
2056         const char *string;
2057 
2058         /* proto < 2: write the repr and newline.  This is quadratic-time (in
2059            the number of digits), in both directions.  We add a trailing 'L'
2060            to the repr, for compatibility with Python 2.x. */
2061 
2062         repr = PyObject_Repr(obj);
2063         if (repr == NULL)
2064             goto error;
2065 
2066         string = PyUnicode_AsUTF8AndSize(repr, &size);
2067         if (string == NULL)
2068             goto error;
2069 
2070         if (_Pickler_Write(self, &long_op, 1) < 0 ||
2071             _Pickler_Write(self, string, size) < 0 ||
2072             _Pickler_Write(self, "L\n", 2) < 0)
2073             goto error;
2074     }
2075 
2076     if (0) {
2077   error:
2078       status = -1;
2079     }
2080     Py_XDECREF(repr);
2081 
2082     return status;
2083 }
2084 
2085 static int
save_float(PicklerObject * self,PyObject * obj)2086 save_float(PicklerObject *self, PyObject *obj)
2087 {
2088     double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2089 
2090     if (self->bin) {
2091         char pdata[9];
2092         pdata[0] = BINFLOAT;
2093         if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2094             return -1;
2095         if (_Pickler_Write(self, pdata, 9) < 0)
2096             return -1;
2097    }
2098     else {
2099         int result = -1;
2100         char *buf = NULL;
2101         char op = FLOAT;
2102 
2103         if (_Pickler_Write(self, &op, 1) < 0)
2104             goto done;
2105 
2106         buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2107         if (!buf) {
2108             PyErr_NoMemory();
2109             goto done;
2110         }
2111 
2112         if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2113             goto done;
2114 
2115         if (_Pickler_Write(self, "\n", 1) < 0)
2116             goto done;
2117 
2118         result = 0;
2119 done:
2120         PyMem_Free(buf);
2121         return result;
2122     }
2123 
2124     return 0;
2125 }
2126 
2127 /* Perform direct write of the header and payload of the binary object.
2128 
2129    The large contiguous data is written directly into the underlying file
2130    object, bypassing the output_buffer of the Pickler.  We intentionally
2131    do not insert a protocol 4 frame opcode to make it possible to optimize
2132    file.read calls in the loader.
2133  */
2134 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2135 _Pickler_write_bytes(PicklerObject *self,
2136                      const char *header, Py_ssize_t header_size,
2137                      const char *data, Py_ssize_t data_size,
2138                      PyObject *payload)
2139 {
2140     int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2141     int framing = self->framing;
2142 
2143     if (bypass_buffer) {
2144         assert(self->output_buffer != NULL);
2145         /* Commit the previous frame. */
2146         if (_Pickler_CommitFrame(self)) {
2147             return -1;
2148         }
2149         /* Disable framing temporarily */
2150         self->framing = 0;
2151     }
2152 
2153     if (_Pickler_Write(self, header, header_size) < 0) {
2154         return -1;
2155     }
2156 
2157     if (bypass_buffer && self->write != NULL) {
2158         /* Bypass the in-memory buffer to directly stream large data
2159            into the underlying file object. */
2160         PyObject *result, *mem = NULL;
2161         /* Dump the output buffer to the file. */
2162         if (_Pickler_FlushToFile(self) < 0) {
2163             return -1;
2164         }
2165 
2166         /* Stream write the payload into the file without going through the
2167            output buffer. */
2168         if (payload == NULL) {
2169             /* TODO: It would be better to use a memoryview with a linked
2170                original string if this is possible. */
2171             payload = mem = PyBytes_FromStringAndSize(data, data_size);
2172             if (payload == NULL) {
2173                 return -1;
2174             }
2175         }
2176         result = PyObject_CallFunctionObjArgs(self->write, payload, NULL);
2177         Py_XDECREF(mem);
2178         if (result == NULL) {
2179             return -1;
2180         }
2181         Py_DECREF(result);
2182 
2183         /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2184         if (_Pickler_ClearBuffer(self) < 0) {
2185             return -1;
2186         }
2187     }
2188     else {
2189         if (_Pickler_Write(self, data, data_size) < 0) {
2190             return -1;
2191         }
2192     }
2193 
2194     /* Re-enable framing for subsequent calls to _Pickler_Write. */
2195     self->framing = framing;
2196 
2197     return 0;
2198 }
2199 
2200 static int
save_bytes(PicklerObject * self,PyObject * obj)2201 save_bytes(PicklerObject *self, PyObject *obj)
2202 {
2203     if (self->proto < 3) {
2204         /* Older pickle protocols do not have an opcode for pickling bytes
2205            objects. Therefore, we need to fake the copy protocol (i.e.,
2206            the __reduce__ method) to permit bytes object unpickling.
2207 
2208            Here we use a hack to be compatible with Python 2. Since in Python
2209            2 'bytes' is just an alias for 'str' (which has different
2210            parameters than the actual bytes object), we use codecs.encode
2211            to create the appropriate 'str' object when unpickled using
2212            Python 2 *and* the appropriate 'bytes' object when unpickled
2213            using Python 3. Again this is a hack and we don't need to do this
2214            with newer protocols. */
2215         PyObject *reduce_value = NULL;
2216         int status;
2217 
2218         if (PyBytes_GET_SIZE(obj) == 0) {
2219             reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2220         }
2221         else {
2222             PickleState *st = _Pickle_GetGlobalState();
2223             PyObject *unicode_str =
2224                 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2225                                        PyBytes_GET_SIZE(obj),
2226                                        "strict");
2227             _Py_IDENTIFIER(latin1);
2228 
2229             if (unicode_str == NULL)
2230                 return -1;
2231             reduce_value = Py_BuildValue("(O(OO))",
2232                                          st->codecs_encode, unicode_str,
2233                                          _PyUnicode_FromId(&PyId_latin1));
2234             Py_DECREF(unicode_str);
2235         }
2236 
2237         if (reduce_value == NULL)
2238             return -1;
2239 
2240         /* save_reduce() will memoize the object automatically. */
2241         status = save_reduce(self, reduce_value, obj);
2242         Py_DECREF(reduce_value);
2243         return status;
2244     }
2245     else {
2246         Py_ssize_t size;
2247         char header[9];
2248         Py_ssize_t len;
2249 
2250         size = PyBytes_GET_SIZE(obj);
2251         if (size < 0)
2252             return -1;
2253 
2254         if (size <= 0xff) {
2255             header[0] = SHORT_BINBYTES;
2256             header[1] = (unsigned char)size;
2257             len = 2;
2258         }
2259         else if ((size_t)size <= 0xffffffffUL) {
2260             header[0] = BINBYTES;
2261             header[1] = (unsigned char)(size & 0xff);
2262             header[2] = (unsigned char)((size >> 8) & 0xff);
2263             header[3] = (unsigned char)((size >> 16) & 0xff);
2264             header[4] = (unsigned char)((size >> 24) & 0xff);
2265             len = 5;
2266         }
2267         else if (self->proto >= 4) {
2268             header[0] = BINBYTES8;
2269             _write_size64(header + 1, size);
2270             len = 9;
2271         }
2272         else {
2273             PyErr_SetString(PyExc_OverflowError,
2274                             "cannot serialize a bytes object larger than 4 GiB");
2275             return -1;          /* string too large */
2276         }
2277 
2278         if (_Pickler_write_bytes(self, header, len,
2279                                  PyBytes_AS_STRING(obj), size, obj) < 0)
2280         {
2281             return -1;
2282         }
2283 
2284         if (memo_put(self, obj) < 0)
2285             return -1;
2286 
2287         return 0;
2288     }
2289 }
2290 
2291 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2292    backslash and newline characters to \uXXXX escapes. */
2293 static PyObject *
raw_unicode_escape(PyObject * obj)2294 raw_unicode_escape(PyObject *obj)
2295 {
2296     char *p;
2297     Py_ssize_t i, size;
2298     void *data;
2299     unsigned int kind;
2300     _PyBytesWriter writer;
2301 
2302     if (PyUnicode_READY(obj))
2303         return NULL;
2304 
2305     _PyBytesWriter_Init(&writer);
2306 
2307     size = PyUnicode_GET_LENGTH(obj);
2308     data = PyUnicode_DATA(obj);
2309     kind = PyUnicode_KIND(obj);
2310 
2311     p = _PyBytesWriter_Alloc(&writer, size);
2312     if (p == NULL)
2313         goto error;
2314     writer.overallocate = 1;
2315 
2316     for (i=0; i < size; i++) {
2317         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2318         /* Map 32-bit characters to '\Uxxxxxxxx' */
2319         if (ch >= 0x10000) {
2320             /* -1: subtract 1 preallocated byte */
2321             p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2322             if (p == NULL)
2323                 goto error;
2324 
2325             *p++ = '\\';
2326             *p++ = 'U';
2327             *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2328             *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2329             *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2330             *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2331             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2332             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2333             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2334             *p++ = Py_hexdigits[ch & 15];
2335         }
2336         /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2337         else if (ch >= 256 || ch == '\\' || ch == '\n') {
2338             /* -1: subtract 1 preallocated byte */
2339             p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2340             if (p == NULL)
2341                 goto error;
2342 
2343             *p++ = '\\';
2344             *p++ = 'u';
2345             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2346             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2347             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2348             *p++ = Py_hexdigits[ch & 15];
2349         }
2350         /* Copy everything else as-is */
2351         else
2352             *p++ = (char) ch;
2353     }
2354 
2355     return _PyBytesWriter_Finish(&writer, p);
2356 
2357 error:
2358     _PyBytesWriter_Dealloc(&writer);
2359     return NULL;
2360 }
2361 
2362 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2363 write_unicode_binary(PicklerObject *self, PyObject *obj)
2364 {
2365     char header[9];
2366     Py_ssize_t len;
2367     PyObject *encoded = NULL;
2368     Py_ssize_t size;
2369     const char *data;
2370 
2371     if (PyUnicode_READY(obj))
2372         return -1;
2373 
2374     data = PyUnicode_AsUTF8AndSize(obj, &size);
2375     if (data == NULL) {
2376         /* Issue #8383: for strings with lone surrogates, fallback on the
2377            "surrogatepass" error handler. */
2378         PyErr_Clear();
2379         encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2380         if (encoded == NULL)
2381             return -1;
2382 
2383         data = PyBytes_AS_STRING(encoded);
2384         size = PyBytes_GET_SIZE(encoded);
2385     }
2386 
2387     assert(size >= 0);
2388     if (size <= 0xff && self->proto >= 4) {
2389         header[0] = SHORT_BINUNICODE;
2390         header[1] = (unsigned char)(size & 0xff);
2391         len = 2;
2392     }
2393     else if ((size_t)size <= 0xffffffffUL) {
2394         header[0] = BINUNICODE;
2395         header[1] = (unsigned char)(size & 0xff);
2396         header[2] = (unsigned char)((size >> 8) & 0xff);
2397         header[3] = (unsigned char)((size >> 16) & 0xff);
2398         header[4] = (unsigned char)((size >> 24) & 0xff);
2399         len = 5;
2400     }
2401     else if (self->proto >= 4) {
2402         header[0] = BINUNICODE8;
2403         _write_size64(header + 1, size);
2404         len = 9;
2405     }
2406     else {
2407         PyErr_SetString(PyExc_OverflowError,
2408                         "cannot serialize a string larger than 4GiB");
2409         Py_XDECREF(encoded);
2410         return -1;
2411     }
2412 
2413     if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2414         Py_XDECREF(encoded);
2415         return -1;
2416     }
2417     Py_XDECREF(encoded);
2418     return 0;
2419 }
2420 
2421 static int
save_unicode(PicklerObject * self,PyObject * obj)2422 save_unicode(PicklerObject *self, PyObject *obj)
2423 {
2424     if (self->bin) {
2425         if (write_unicode_binary(self, obj) < 0)
2426             return -1;
2427     }
2428     else {
2429         PyObject *encoded;
2430         Py_ssize_t size;
2431         const char unicode_op = UNICODE;
2432 
2433         encoded = raw_unicode_escape(obj);
2434         if (encoded == NULL)
2435             return -1;
2436 
2437         if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2438             Py_DECREF(encoded);
2439             return -1;
2440         }
2441 
2442         size = PyBytes_GET_SIZE(encoded);
2443         if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2444             Py_DECREF(encoded);
2445             return -1;
2446         }
2447         Py_DECREF(encoded);
2448 
2449         if (_Pickler_Write(self, "\n", 1) < 0)
2450             return -1;
2451     }
2452     if (memo_put(self, obj) < 0)
2453         return -1;
2454 
2455     return 0;
2456 }
2457 
2458 /* A helper for save_tuple.  Push the len elements in tuple t on the stack. */
2459 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2460 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2461 {
2462     Py_ssize_t i;
2463 
2464     assert(PyTuple_Size(t) == len);
2465 
2466     for (i = 0; i < len; i++) {
2467         PyObject *element = PyTuple_GET_ITEM(t, i);
2468 
2469         if (element == NULL)
2470             return -1;
2471         if (save(self, element, 0) < 0)
2472             return -1;
2473     }
2474 
2475     return 0;
2476 }
2477 
2478 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2479  * used across protocols to minimize the space needed to pickle them.
2480  * Tuples are also the only builtin immutable type that can be recursive
2481  * (a tuple can be reached from itself), and that requires some subtle
2482  * magic so that it works in all cases.  IOW, this is a long routine.
2483  */
2484 static int
save_tuple(PicklerObject * self,PyObject * obj)2485 save_tuple(PicklerObject *self, PyObject *obj)
2486 {
2487     Py_ssize_t len, i;
2488 
2489     const char mark_op = MARK;
2490     const char tuple_op = TUPLE;
2491     const char pop_op = POP;
2492     const char pop_mark_op = POP_MARK;
2493     const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2494 
2495     if ((len = PyTuple_Size(obj)) < 0)
2496         return -1;
2497 
2498     if (len == 0) {
2499         char pdata[2];
2500 
2501         if (self->proto) {
2502             pdata[0] = EMPTY_TUPLE;
2503             len = 1;
2504         }
2505         else {
2506             pdata[0] = MARK;
2507             pdata[1] = TUPLE;
2508             len = 2;
2509         }
2510         if (_Pickler_Write(self, pdata, len) < 0)
2511             return -1;
2512         return 0;
2513     }
2514 
2515     /* The tuple isn't in the memo now.  If it shows up there after
2516      * saving the tuple elements, the tuple must be recursive, in
2517      * which case we'll pop everything we put on the stack, and fetch
2518      * its value from the memo.
2519      */
2520     if (len <= 3 && self->proto >= 2) {
2521         /* Use TUPLE{1,2,3} opcodes. */
2522         if (store_tuple_elements(self, obj, len) < 0)
2523             return -1;
2524 
2525         if (PyMemoTable_Get(self->memo, obj)) {
2526             /* pop the len elements */
2527             for (i = 0; i < len; i++)
2528                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2529                     return -1;
2530             /* fetch from memo */
2531             if (memo_get(self, obj) < 0)
2532                 return -1;
2533 
2534             return 0;
2535         }
2536         else { /* Not recursive. */
2537             if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2538                 return -1;
2539         }
2540         goto memoize;
2541     }
2542 
2543     /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2544      * Generate MARK e1 e2 ... TUPLE
2545      */
2546     if (_Pickler_Write(self, &mark_op, 1) < 0)
2547         return -1;
2548 
2549     if (store_tuple_elements(self, obj, len) < 0)
2550         return -1;
2551 
2552     if (PyMemoTable_Get(self->memo, obj)) {
2553         /* pop the stack stuff we pushed */
2554         if (self->bin) {
2555             if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2556                 return -1;
2557         }
2558         else {
2559             /* Note that we pop one more than len, to remove
2560              * the MARK too.
2561              */
2562             for (i = 0; i <= len; i++)
2563                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2564                     return -1;
2565         }
2566         /* fetch from memo */
2567         if (memo_get(self, obj) < 0)
2568             return -1;
2569 
2570         return 0;
2571     }
2572     else { /* Not recursive. */
2573         if (_Pickler_Write(self, &tuple_op, 1) < 0)
2574             return -1;
2575     }
2576 
2577   memoize:
2578     if (memo_put(self, obj) < 0)
2579         return -1;
2580 
2581     return 0;
2582 }
2583 
2584 /* iter is an iterator giving items, and we batch up chunks of
2585  *     MARK item item ... item APPENDS
2586  * opcode sequences.  Calling code should have arranged to first create an
2587  * empty list, or list-like object, for the APPENDS to operate on.
2588  * Returns 0 on success, <0 on error.
2589  */
2590 static int
batch_list(PicklerObject * self,PyObject * iter)2591 batch_list(PicklerObject *self, PyObject *iter)
2592 {
2593     PyObject *obj = NULL;
2594     PyObject *firstitem = NULL;
2595     int i, n;
2596 
2597     const char mark_op = MARK;
2598     const char append_op = APPEND;
2599     const char appends_op = APPENDS;
2600 
2601     assert(iter != NULL);
2602 
2603     /* XXX: I think this function could be made faster by avoiding the
2604        iterator interface and fetching objects directly from list using
2605        PyList_GET_ITEM.
2606     */
2607 
2608     if (self->proto == 0) {
2609         /* APPENDS isn't available; do one at a time. */
2610         for (;;) {
2611             obj = PyIter_Next(iter);
2612             if (obj == NULL) {
2613                 if (PyErr_Occurred())
2614                     return -1;
2615                 break;
2616             }
2617             i = save(self, obj, 0);
2618             Py_DECREF(obj);
2619             if (i < 0)
2620                 return -1;
2621             if (_Pickler_Write(self, &append_op, 1) < 0)
2622                 return -1;
2623         }
2624         return 0;
2625     }
2626 
2627     /* proto > 0:  write in batches of BATCHSIZE. */
2628     do {
2629         /* Get first item */
2630         firstitem = PyIter_Next(iter);
2631         if (firstitem == NULL) {
2632             if (PyErr_Occurred())
2633                 goto error;
2634 
2635             /* nothing more to add */
2636             break;
2637         }
2638 
2639         /* Try to get a second item */
2640         obj = PyIter_Next(iter);
2641         if (obj == NULL) {
2642             if (PyErr_Occurred())
2643                 goto error;
2644 
2645             /* Only one item to write */
2646             if (save(self, firstitem, 0) < 0)
2647                 goto error;
2648             if (_Pickler_Write(self, &append_op, 1) < 0)
2649                 goto error;
2650             Py_CLEAR(firstitem);
2651             break;
2652         }
2653 
2654         /* More than one item to write */
2655 
2656         /* Pump out MARK, items, APPENDS. */
2657         if (_Pickler_Write(self, &mark_op, 1) < 0)
2658             goto error;
2659 
2660         if (save(self, firstitem, 0) < 0)
2661             goto error;
2662         Py_CLEAR(firstitem);
2663         n = 1;
2664 
2665         /* Fetch and save up to BATCHSIZE items */
2666         while (obj) {
2667             if (save(self, obj, 0) < 0)
2668                 goto error;
2669             Py_CLEAR(obj);
2670             n += 1;
2671 
2672             if (n == BATCHSIZE)
2673                 break;
2674 
2675             obj = PyIter_Next(iter);
2676             if (obj == NULL) {
2677                 if (PyErr_Occurred())
2678                     goto error;
2679                 break;
2680             }
2681         }
2682 
2683         if (_Pickler_Write(self, &appends_op, 1) < 0)
2684             goto error;
2685 
2686     } while (n == BATCHSIZE);
2687     return 0;
2688 
2689   error:
2690     Py_XDECREF(firstitem);
2691     Py_XDECREF(obj);
2692     return -1;
2693 }
2694 
2695 /* This is a variant of batch_list() above, specialized for lists (with no
2696  * support for list subclasses). Like batch_list(), we batch up chunks of
2697  *     MARK item item ... item APPENDS
2698  * opcode sequences.  Calling code should have arranged to first create an
2699  * empty list, or list-like object, for the APPENDS to operate on.
2700  * Returns 0 on success, -1 on error.
2701  *
2702  * This version is considerably faster than batch_list(), if less general.
2703  *
2704  * Note that this only works for protocols > 0.
2705  */
2706 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2707 batch_list_exact(PicklerObject *self, PyObject *obj)
2708 {
2709     PyObject *item = NULL;
2710     Py_ssize_t this_batch, total;
2711 
2712     const char append_op = APPEND;
2713     const char appends_op = APPENDS;
2714     const char mark_op = MARK;
2715 
2716     assert(obj != NULL);
2717     assert(self->proto > 0);
2718     assert(PyList_CheckExact(obj));
2719 
2720     if (PyList_GET_SIZE(obj) == 1) {
2721         item = PyList_GET_ITEM(obj, 0);
2722         if (save(self, item, 0) < 0)
2723             return -1;
2724         if (_Pickler_Write(self, &append_op, 1) < 0)
2725             return -1;
2726         return 0;
2727     }
2728 
2729     /* Write in batches of BATCHSIZE. */
2730     total = 0;
2731     do {
2732         this_batch = 0;
2733         if (_Pickler_Write(self, &mark_op, 1) < 0)
2734             return -1;
2735         while (total < PyList_GET_SIZE(obj)) {
2736             item = PyList_GET_ITEM(obj, total);
2737             if (save(self, item, 0) < 0)
2738                 return -1;
2739             total++;
2740             if (++this_batch == BATCHSIZE)
2741                 break;
2742         }
2743         if (_Pickler_Write(self, &appends_op, 1) < 0)
2744             return -1;
2745 
2746     } while (total < PyList_GET_SIZE(obj));
2747 
2748     return 0;
2749 }
2750 
2751 static int
save_list(PicklerObject * self,PyObject * obj)2752 save_list(PicklerObject *self, PyObject *obj)
2753 {
2754     char header[3];
2755     Py_ssize_t len;
2756     int status = 0;
2757 
2758     if (self->fast && !fast_save_enter(self, obj))
2759         goto error;
2760 
2761     /* Create an empty list. */
2762     if (self->bin) {
2763         header[0] = EMPTY_LIST;
2764         len = 1;
2765     }
2766     else {
2767         header[0] = MARK;
2768         header[1] = LIST;
2769         len = 2;
2770     }
2771 
2772     if (_Pickler_Write(self, header, len) < 0)
2773         goto error;
2774 
2775     /* Get list length, and bow out early if empty. */
2776     if ((len = PyList_Size(obj)) < 0)
2777         goto error;
2778 
2779     if (memo_put(self, obj) < 0)
2780         goto error;
2781 
2782     if (len != 0) {
2783         /* Materialize the list elements. */
2784         if (PyList_CheckExact(obj) && self->proto > 0) {
2785             if (Py_EnterRecursiveCall(" while pickling an object"))
2786                 goto error;
2787             status = batch_list_exact(self, obj);
2788             Py_LeaveRecursiveCall();
2789         } else {
2790             PyObject *iter = PyObject_GetIter(obj);
2791             if (iter == NULL)
2792                 goto error;
2793 
2794             if (Py_EnterRecursiveCall(" while pickling an object")) {
2795                 Py_DECREF(iter);
2796                 goto error;
2797             }
2798             status = batch_list(self, iter);
2799             Py_LeaveRecursiveCall();
2800             Py_DECREF(iter);
2801         }
2802     }
2803     if (0) {
2804   error:
2805         status = -1;
2806     }
2807 
2808     if (self->fast && !fast_save_leave(self, obj))
2809         status = -1;
2810 
2811     return status;
2812 }
2813 
2814 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2815  *     MARK key value ... key value SETITEMS
2816  * opcode sequences.  Calling code should have arranged to first create an
2817  * empty dict, or dict-like object, for the SETITEMS to operate on.
2818  * Returns 0 on success, <0 on error.
2819  *
2820  * This is very much like batch_list().  The difference between saving
2821  * elements directly, and picking apart two-tuples, is so long-winded at
2822  * the C level, though, that attempts to combine these routines were too
2823  * ugly to bear.
2824  */
2825 static int
batch_dict(PicklerObject * self,PyObject * iter)2826 batch_dict(PicklerObject *self, PyObject *iter)
2827 {
2828     PyObject *obj = NULL;
2829     PyObject *firstitem = NULL;
2830     int i, n;
2831 
2832     const char mark_op = MARK;
2833     const char setitem_op = SETITEM;
2834     const char setitems_op = SETITEMS;
2835 
2836     assert(iter != NULL);
2837 
2838     if (self->proto == 0) {
2839         /* SETITEMS isn't available; do one at a time. */
2840         for (;;) {
2841             obj = PyIter_Next(iter);
2842             if (obj == NULL) {
2843                 if (PyErr_Occurred())
2844                     return -1;
2845                 break;
2846             }
2847             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2848                 PyErr_SetString(PyExc_TypeError, "dict items "
2849                                 "iterator must return 2-tuples");
2850                 return -1;
2851             }
2852             i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2853             if (i >= 0)
2854                 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2855             Py_DECREF(obj);
2856             if (i < 0)
2857                 return -1;
2858             if (_Pickler_Write(self, &setitem_op, 1) < 0)
2859                 return -1;
2860         }
2861         return 0;
2862     }
2863 
2864     /* proto > 0:  write in batches of BATCHSIZE. */
2865     do {
2866         /* Get first item */
2867         firstitem = PyIter_Next(iter);
2868         if (firstitem == NULL) {
2869             if (PyErr_Occurred())
2870                 goto error;
2871 
2872             /* nothing more to add */
2873             break;
2874         }
2875         if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2876             PyErr_SetString(PyExc_TypeError, "dict items "
2877                                 "iterator must return 2-tuples");
2878             goto error;
2879         }
2880 
2881         /* Try to get a second item */
2882         obj = PyIter_Next(iter);
2883         if (obj == NULL) {
2884             if (PyErr_Occurred())
2885                 goto error;
2886 
2887             /* Only one item to write */
2888             if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2889                 goto error;
2890             if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2891                 goto error;
2892             if (_Pickler_Write(self, &setitem_op, 1) < 0)
2893                 goto error;
2894             Py_CLEAR(firstitem);
2895             break;
2896         }
2897 
2898         /* More than one item to write */
2899 
2900         /* Pump out MARK, items, SETITEMS. */
2901         if (_Pickler_Write(self, &mark_op, 1) < 0)
2902             goto error;
2903 
2904         if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2905             goto error;
2906         if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2907             goto error;
2908         Py_CLEAR(firstitem);
2909         n = 1;
2910 
2911         /* Fetch and save up to BATCHSIZE items */
2912         while (obj) {
2913             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2914                 PyErr_SetString(PyExc_TypeError, "dict items "
2915                     "iterator must return 2-tuples");
2916                 goto error;
2917             }
2918             if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2919                 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2920                 goto error;
2921             Py_CLEAR(obj);
2922             n += 1;
2923 
2924             if (n == BATCHSIZE)
2925                 break;
2926 
2927             obj = PyIter_Next(iter);
2928             if (obj == NULL) {
2929                 if (PyErr_Occurred())
2930                     goto error;
2931                 break;
2932             }
2933         }
2934 
2935         if (_Pickler_Write(self, &setitems_op, 1) < 0)
2936             goto error;
2937 
2938     } while (n == BATCHSIZE);
2939     return 0;
2940 
2941   error:
2942     Py_XDECREF(firstitem);
2943     Py_XDECREF(obj);
2944     return -1;
2945 }
2946 
2947 /* This is a variant of batch_dict() above that specializes for dicts, with no
2948  * support for dict subclasses. Like batch_dict(), we batch up chunks of
2949  *     MARK key value ... key value SETITEMS
2950  * opcode sequences.  Calling code should have arranged to first create an
2951  * empty dict, or dict-like object, for the SETITEMS to operate on.
2952  * Returns 0 on success, -1 on error.
2953  *
2954  * Note that this currently doesn't work for protocol 0.
2955  */
2956 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)2957 batch_dict_exact(PicklerObject *self, PyObject *obj)
2958 {
2959     PyObject *key = NULL, *value = NULL;
2960     int i;
2961     Py_ssize_t dict_size, ppos = 0;
2962 
2963     const char mark_op = MARK;
2964     const char setitem_op = SETITEM;
2965     const char setitems_op = SETITEMS;
2966 
2967     assert(obj != NULL && PyDict_CheckExact(obj));
2968     assert(self->proto > 0);
2969 
2970     dict_size = PyDict_GET_SIZE(obj);
2971 
2972     /* Special-case len(d) == 1 to save space. */
2973     if (dict_size == 1) {
2974         PyDict_Next(obj, &ppos, &key, &value);
2975         if (save(self, key, 0) < 0)
2976             return -1;
2977         if (save(self, value, 0) < 0)
2978             return -1;
2979         if (_Pickler_Write(self, &setitem_op, 1) < 0)
2980             return -1;
2981         return 0;
2982     }
2983 
2984     /* Write in batches of BATCHSIZE. */
2985     do {
2986         i = 0;
2987         if (_Pickler_Write(self, &mark_op, 1) < 0)
2988             return -1;
2989         while (PyDict_Next(obj, &ppos, &key, &value)) {
2990             if (save(self, key, 0) < 0)
2991                 return -1;
2992             if (save(self, value, 0) < 0)
2993                 return -1;
2994             if (++i == BATCHSIZE)
2995                 break;
2996         }
2997         if (_Pickler_Write(self, &setitems_op, 1) < 0)
2998             return -1;
2999         if (PyDict_GET_SIZE(obj) != dict_size) {
3000             PyErr_Format(
3001                 PyExc_RuntimeError,
3002                 "dictionary changed size during iteration");
3003             return -1;
3004         }
3005 
3006     } while (i == BATCHSIZE);
3007     return 0;
3008 }
3009 
3010 static int
save_dict(PicklerObject * self,PyObject * obj)3011 save_dict(PicklerObject *self, PyObject *obj)
3012 {
3013     PyObject *items, *iter;
3014     char header[3];
3015     Py_ssize_t len;
3016     int status = 0;
3017     assert(PyDict_Check(obj));
3018 
3019     if (self->fast && !fast_save_enter(self, obj))
3020         goto error;
3021 
3022     /* Create an empty dict. */
3023     if (self->bin) {
3024         header[0] = EMPTY_DICT;
3025         len = 1;
3026     }
3027     else {
3028         header[0] = MARK;
3029         header[1] = DICT;
3030         len = 2;
3031     }
3032 
3033     if (_Pickler_Write(self, header, len) < 0)
3034         goto error;
3035 
3036     if (memo_put(self, obj) < 0)
3037         goto error;
3038 
3039     if (PyDict_GET_SIZE(obj)) {
3040         /* Save the dict items. */
3041         if (PyDict_CheckExact(obj) && self->proto > 0) {
3042             /* We can take certain shortcuts if we know this is a dict and
3043                not a dict subclass. */
3044             if (Py_EnterRecursiveCall(" while pickling an object"))
3045                 goto error;
3046             status = batch_dict_exact(self, obj);
3047             Py_LeaveRecursiveCall();
3048         } else {
3049             _Py_IDENTIFIER(items);
3050 
3051             items = _PyObject_CallMethodId(obj, &PyId_items, NULL);
3052             if (items == NULL)
3053                 goto error;
3054             iter = PyObject_GetIter(items);
3055             Py_DECREF(items);
3056             if (iter == NULL)
3057                 goto error;
3058             if (Py_EnterRecursiveCall(" while pickling an object")) {
3059                 Py_DECREF(iter);
3060                 goto error;
3061             }
3062             status = batch_dict(self, iter);
3063             Py_LeaveRecursiveCall();
3064             Py_DECREF(iter);
3065         }
3066     }
3067 
3068     if (0) {
3069   error:
3070         status = -1;
3071     }
3072 
3073     if (self->fast && !fast_save_leave(self, obj))
3074         status = -1;
3075 
3076     return status;
3077 }
3078 
3079 static int
save_set(PicklerObject * self,PyObject * obj)3080 save_set(PicklerObject *self, PyObject *obj)
3081 {
3082     PyObject *item;
3083     int i;
3084     Py_ssize_t set_size, ppos = 0;
3085     Py_hash_t hash;
3086 
3087     const char empty_set_op = EMPTY_SET;
3088     const char mark_op = MARK;
3089     const char additems_op = ADDITEMS;
3090 
3091     if (self->proto < 4) {
3092         PyObject *items;
3093         PyObject *reduce_value;
3094         int status;
3095 
3096         items = PySequence_List(obj);
3097         if (items == NULL) {
3098             return -1;
3099         }
3100         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3101         Py_DECREF(items);
3102         if (reduce_value == NULL) {
3103             return -1;
3104         }
3105         /* save_reduce() will memoize the object automatically. */
3106         status = save_reduce(self, reduce_value, obj);
3107         Py_DECREF(reduce_value);
3108         return status;
3109     }
3110 
3111     if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3112         return -1;
3113 
3114     if (memo_put(self, obj) < 0)
3115         return -1;
3116 
3117     set_size = PySet_GET_SIZE(obj);
3118     if (set_size == 0)
3119         return 0;  /* nothing to do */
3120 
3121     /* Write in batches of BATCHSIZE. */
3122     do {
3123         i = 0;
3124         if (_Pickler_Write(self, &mark_op, 1) < 0)
3125             return -1;
3126         while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3127             if (save(self, item, 0) < 0)
3128                 return -1;
3129             if (++i == BATCHSIZE)
3130                 break;
3131         }
3132         if (_Pickler_Write(self, &additems_op, 1) < 0)
3133             return -1;
3134         if (PySet_GET_SIZE(obj) != set_size) {
3135             PyErr_Format(
3136                 PyExc_RuntimeError,
3137                 "set changed size during iteration");
3138             return -1;
3139         }
3140     } while (i == BATCHSIZE);
3141 
3142     return 0;
3143 }
3144 
3145 static int
save_frozenset(PicklerObject * self,PyObject * obj)3146 save_frozenset(PicklerObject *self, PyObject *obj)
3147 {
3148     PyObject *iter;
3149 
3150     const char mark_op = MARK;
3151     const char frozenset_op = FROZENSET;
3152 
3153     if (self->fast && !fast_save_enter(self, obj))
3154         return -1;
3155 
3156     if (self->proto < 4) {
3157         PyObject *items;
3158         PyObject *reduce_value;
3159         int status;
3160 
3161         items = PySequence_List(obj);
3162         if (items == NULL) {
3163             return -1;
3164         }
3165         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3166                                      items);
3167         Py_DECREF(items);
3168         if (reduce_value == NULL) {
3169             return -1;
3170         }
3171         /* save_reduce() will memoize the object automatically. */
3172         status = save_reduce(self, reduce_value, obj);
3173         Py_DECREF(reduce_value);
3174         return status;
3175     }
3176 
3177     if (_Pickler_Write(self, &mark_op, 1) < 0)
3178         return -1;
3179 
3180     iter = PyObject_GetIter(obj);
3181     if (iter == NULL) {
3182         return -1;
3183     }
3184     for (;;) {
3185         PyObject *item;
3186 
3187         item = PyIter_Next(iter);
3188         if (item == NULL) {
3189             if (PyErr_Occurred()) {
3190                 Py_DECREF(iter);
3191                 return -1;
3192             }
3193             break;
3194         }
3195         if (save(self, item, 0) < 0) {
3196             Py_DECREF(item);
3197             Py_DECREF(iter);
3198             return -1;
3199         }
3200         Py_DECREF(item);
3201     }
3202     Py_DECREF(iter);
3203 
3204     /* If the object is already in the memo, this means it is
3205        recursive. In this case, throw away everything we put on the
3206        stack, and fetch the object back from the memo. */
3207     if (PyMemoTable_Get(self->memo, obj)) {
3208         const char pop_mark_op = POP_MARK;
3209 
3210         if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3211             return -1;
3212         if (memo_get(self, obj) < 0)
3213             return -1;
3214         return 0;
3215     }
3216 
3217     if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3218         return -1;
3219     if (memo_put(self, obj) < 0)
3220         return -1;
3221 
3222     return 0;
3223 }
3224 
3225 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3226 fix_imports(PyObject **module_name, PyObject **global_name)
3227 {
3228     PyObject *key;
3229     PyObject *item;
3230     PickleState *st = _Pickle_GetGlobalState();
3231 
3232     key = PyTuple_Pack(2, *module_name, *global_name);
3233     if (key == NULL)
3234         return -1;
3235     item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3236     Py_DECREF(key);
3237     if (item) {
3238         PyObject *fixed_module_name;
3239         PyObject *fixed_global_name;
3240 
3241         if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3242             PyErr_Format(PyExc_RuntimeError,
3243                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3244                          "should be 2-tuples, not %.200s",
3245                          Py_TYPE(item)->tp_name);
3246             return -1;
3247         }
3248         fixed_module_name = PyTuple_GET_ITEM(item, 0);
3249         fixed_global_name = PyTuple_GET_ITEM(item, 1);
3250         if (!PyUnicode_Check(fixed_module_name) ||
3251             !PyUnicode_Check(fixed_global_name)) {
3252             PyErr_Format(PyExc_RuntimeError,
3253                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3254                          "should be pairs of str, not (%.200s, %.200s)",
3255                          Py_TYPE(fixed_module_name)->tp_name,
3256                          Py_TYPE(fixed_global_name)->tp_name);
3257             return -1;
3258         }
3259 
3260         Py_CLEAR(*module_name);
3261         Py_CLEAR(*global_name);
3262         Py_INCREF(fixed_module_name);
3263         Py_INCREF(fixed_global_name);
3264         *module_name = fixed_module_name;
3265         *global_name = fixed_global_name;
3266         return 0;
3267     }
3268     else if (PyErr_Occurred()) {
3269         return -1;
3270     }
3271 
3272     item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3273     if (item) {
3274         if (!PyUnicode_Check(item)) {
3275             PyErr_Format(PyExc_RuntimeError,
3276                          "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3277                          "should be strings, not %.200s",
3278                          Py_TYPE(item)->tp_name);
3279             return -1;
3280         }
3281         Py_INCREF(item);
3282         Py_XSETREF(*module_name, item);
3283     }
3284     else if (PyErr_Occurred()) {
3285         return -1;
3286     }
3287 
3288     return 0;
3289 }
3290 
3291 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3292 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3293 {
3294     PyObject *global_name = NULL;
3295     PyObject *module_name = NULL;
3296     PyObject *module = NULL;
3297     PyObject *parent = NULL;
3298     PyObject *dotted_path = NULL;
3299     PyObject *lastname = NULL;
3300     PyObject *cls;
3301     PickleState *st = _Pickle_GetGlobalState();
3302     int status = 0;
3303     _Py_IDENTIFIER(__name__);
3304     _Py_IDENTIFIER(__qualname__);
3305 
3306     const char global_op = GLOBAL;
3307 
3308     if (name) {
3309         Py_INCREF(name);
3310         global_name = name;
3311     }
3312     else {
3313         if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3314             goto error;
3315         if (global_name == NULL) {
3316             global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3317             if (global_name == NULL)
3318                 goto error;
3319         }
3320     }
3321 
3322     dotted_path = get_dotted_path(module, global_name);
3323     if (dotted_path == NULL)
3324         goto error;
3325     module_name = whichmodule(obj, dotted_path);
3326     if (module_name == NULL)
3327         goto error;
3328 
3329     /* XXX: Change to use the import C API directly with level=0 to disallow
3330        relative imports.
3331 
3332        XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3333        builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3334        custom import functions (IMHO, this would be a nice security
3335        feature). The import C API would need to be extended to support the
3336        extra parameters of __import__ to fix that. */
3337     module = PyImport_Import(module_name);
3338     if (module == NULL) {
3339         PyErr_Format(st->PicklingError,
3340                      "Can't pickle %R: import of module %R failed",
3341                      obj, module_name);
3342         goto error;
3343     }
3344     lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3345     Py_INCREF(lastname);
3346     cls = get_deep_attribute(module, dotted_path, &parent);
3347     Py_CLEAR(dotted_path);
3348     if (cls == NULL) {
3349         PyErr_Format(st->PicklingError,
3350                      "Can't pickle %R: attribute lookup %S on %S failed",
3351                      obj, global_name, module_name);
3352         goto error;
3353     }
3354     if (cls != obj) {
3355         Py_DECREF(cls);
3356         PyErr_Format(st->PicklingError,
3357                      "Can't pickle %R: it's not the same object as %S.%S",
3358                      obj, module_name, global_name);
3359         goto error;
3360     }
3361     Py_DECREF(cls);
3362 
3363     if (self->proto >= 2) {
3364         /* See whether this is in the extension registry, and if
3365          * so generate an EXT opcode.
3366          */
3367         PyObject *extension_key;
3368         PyObject *code_obj;      /* extension code as Python object */
3369         long code;               /* extension code as C value */
3370         char pdata[5];
3371         Py_ssize_t n;
3372 
3373         extension_key = PyTuple_Pack(2, module_name, global_name);
3374         if (extension_key == NULL) {
3375             goto error;
3376         }
3377         code_obj = PyDict_GetItemWithError(st->extension_registry,
3378                                            extension_key);
3379         Py_DECREF(extension_key);
3380         /* The object is not registered in the extension registry.
3381            This is the most likely code path. */
3382         if (code_obj == NULL) {
3383             if (PyErr_Occurred()) {
3384                 goto error;
3385             }
3386             goto gen_global;
3387         }
3388 
3389         /* XXX: pickle.py doesn't check neither the type, nor the range
3390            of the value returned by the extension_registry. It should for
3391            consistency. */
3392 
3393         /* Verify code_obj has the right type and value. */
3394         if (!PyLong_Check(code_obj)) {
3395             PyErr_Format(st->PicklingError,
3396                          "Can't pickle %R: extension code %R isn't an integer",
3397                          obj, code_obj);
3398             goto error;
3399         }
3400         code = PyLong_AS_LONG(code_obj);
3401         if (code <= 0 || code > 0x7fffffffL) {
3402             if (!PyErr_Occurred())
3403                 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3404                              "code %ld is out of range", obj, code);
3405             goto error;
3406         }
3407 
3408         /* Generate an EXT opcode. */
3409         if (code <= 0xff) {
3410             pdata[0] = EXT1;
3411             pdata[1] = (unsigned char)code;
3412             n = 2;
3413         }
3414         else if (code <= 0xffff) {
3415             pdata[0] = EXT2;
3416             pdata[1] = (unsigned char)(code & 0xff);
3417             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3418             n = 3;
3419         }
3420         else {
3421             pdata[0] = EXT4;
3422             pdata[1] = (unsigned char)(code & 0xff);
3423             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3424             pdata[3] = (unsigned char)((code >> 16) & 0xff);
3425             pdata[4] = (unsigned char)((code >> 24) & 0xff);
3426             n = 5;
3427         }
3428 
3429         if (_Pickler_Write(self, pdata, n) < 0)
3430             goto error;
3431     }
3432     else {
3433   gen_global:
3434         if (parent == module) {
3435             Py_INCREF(lastname);
3436             Py_DECREF(global_name);
3437             global_name = lastname;
3438         }
3439         if (self->proto >= 4) {
3440             const char stack_global_op = STACK_GLOBAL;
3441 
3442             if (save(self, module_name, 0) < 0)
3443                 goto error;
3444             if (save(self, global_name, 0) < 0)
3445                 goto error;
3446 
3447             if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3448                 goto error;
3449         }
3450         else if (parent != module) {
3451             PickleState *st = _Pickle_GetGlobalState();
3452             PyObject *reduce_value = Py_BuildValue("(O(OO))",
3453                                         st->getattr, parent, lastname);
3454             if (reduce_value == NULL)
3455                 goto error;
3456             status = save_reduce(self, reduce_value, NULL);
3457             Py_DECREF(reduce_value);
3458             if (status < 0)
3459                 goto error;
3460         }
3461         else {
3462             /* Generate a normal global opcode if we are using a pickle
3463                protocol < 4, or if the object is not registered in the
3464                extension registry. */
3465             PyObject *encoded;
3466             PyObject *(*unicode_encoder)(PyObject *);
3467 
3468             if (_Pickler_Write(self, &global_op, 1) < 0)
3469                 goto error;
3470 
3471             /* For protocol < 3 and if the user didn't request against doing
3472                so, we convert module names to the old 2.x module names. */
3473             if (self->proto < 3 && self->fix_imports) {
3474                 if (fix_imports(&module_name, &global_name) < 0) {
3475                     goto error;
3476                 }
3477             }
3478 
3479             /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3480                both the module name and the global name using UTF-8. We do so
3481                only when we are using the pickle protocol newer than version
3482                3. This is to ensure compatibility with older Unpickler running
3483                on Python 2.x. */
3484             if (self->proto == 3) {
3485                 unicode_encoder = PyUnicode_AsUTF8String;
3486             }
3487             else {
3488                 unicode_encoder = PyUnicode_AsASCIIString;
3489             }
3490             encoded = unicode_encoder(module_name);
3491             if (encoded == NULL) {
3492                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3493                     PyErr_Format(st->PicklingError,
3494                                  "can't pickle module identifier '%S' using "
3495                                  "pickle protocol %i",
3496                                  module_name, self->proto);
3497                 goto error;
3498             }
3499             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3500                                PyBytes_GET_SIZE(encoded)) < 0) {
3501                 Py_DECREF(encoded);
3502                 goto error;
3503             }
3504             Py_DECREF(encoded);
3505             if(_Pickler_Write(self, "\n", 1) < 0)
3506                 goto error;
3507 
3508             /* Save the name of the module. */
3509             encoded = unicode_encoder(global_name);
3510             if (encoded == NULL) {
3511                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3512                     PyErr_Format(st->PicklingError,
3513                                  "can't pickle global identifier '%S' using "
3514                                  "pickle protocol %i",
3515                                  global_name, self->proto);
3516                 goto error;
3517             }
3518             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3519                                PyBytes_GET_SIZE(encoded)) < 0) {
3520                 Py_DECREF(encoded);
3521                 goto error;
3522             }
3523             Py_DECREF(encoded);
3524             if (_Pickler_Write(self, "\n", 1) < 0)
3525                 goto error;
3526         }
3527         /* Memoize the object. */
3528         if (memo_put(self, obj) < 0)
3529             goto error;
3530     }
3531 
3532     if (0) {
3533   error:
3534         status = -1;
3535     }
3536     Py_XDECREF(module_name);
3537     Py_XDECREF(global_name);
3538     Py_XDECREF(module);
3539     Py_XDECREF(parent);
3540     Py_XDECREF(dotted_path);
3541     Py_XDECREF(lastname);
3542 
3543     return status;
3544 }
3545 
3546 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3547 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3548 {
3549     PyObject *reduce_value;
3550     int status;
3551 
3552     reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3553     if (reduce_value == NULL) {
3554         return -1;
3555     }
3556     status = save_reduce(self, reduce_value, obj);
3557     Py_DECREF(reduce_value);
3558     return status;
3559 }
3560 
3561 static int
save_type(PicklerObject * self,PyObject * obj)3562 save_type(PicklerObject *self, PyObject *obj)
3563 {
3564     if (obj == (PyObject *)&_PyNone_Type) {
3565         return save_singleton_type(self, obj, Py_None);
3566     }
3567     else if (obj == (PyObject *)&PyEllipsis_Type) {
3568         return save_singleton_type(self, obj, Py_Ellipsis);
3569     }
3570     else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3571         return save_singleton_type(self, obj, Py_NotImplemented);
3572     }
3573     return save_global(self, obj, NULL);
3574 }
3575 
3576 static int
save_pers(PicklerObject * self,PyObject * obj)3577 save_pers(PicklerObject *self, PyObject *obj)
3578 {
3579     PyObject *pid = NULL;
3580     int status = 0;
3581 
3582     const char persid_op = PERSID;
3583     const char binpersid_op = BINPERSID;
3584 
3585     pid = call_method(self->pers_func, self->pers_func_self, obj);
3586     if (pid == NULL)
3587         return -1;
3588 
3589     if (pid != Py_None) {
3590         if (self->bin) {
3591             if (save(self, pid, 1) < 0 ||
3592                 _Pickler_Write(self, &binpersid_op, 1) < 0)
3593                 goto error;
3594         }
3595         else {
3596             PyObject *pid_str;
3597 
3598             pid_str = PyObject_Str(pid);
3599             if (pid_str == NULL)
3600                 goto error;
3601 
3602             /* XXX: Should it check whether the pid contains embedded
3603                newlines? */
3604             if (!PyUnicode_IS_ASCII(pid_str)) {
3605                 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3606                                 "persistent IDs in protocol 0 must be "
3607                                 "ASCII strings");
3608                 Py_DECREF(pid_str);
3609                 goto error;
3610             }
3611 
3612             if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3613                 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3614                                PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3615                 _Pickler_Write(self, "\n", 1) < 0) {
3616                 Py_DECREF(pid_str);
3617                 goto error;
3618             }
3619             Py_DECREF(pid_str);
3620         }
3621         status = 1;
3622     }
3623 
3624     if (0) {
3625   error:
3626         status = -1;
3627     }
3628     Py_XDECREF(pid);
3629 
3630     return status;
3631 }
3632 
3633 static PyObject *
get_class(PyObject * obj)3634 get_class(PyObject *obj)
3635 {
3636     PyObject *cls;
3637     _Py_IDENTIFIER(__class__);
3638 
3639     if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3640         cls = (PyObject *) Py_TYPE(obj);
3641         Py_INCREF(cls);
3642     }
3643     return cls;
3644 }
3645 
3646 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3647  * appropriate __reduce__ method for obj.
3648  */
3649 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3650 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3651 {
3652     PyObject *callable;
3653     PyObject *argtup;
3654     PyObject *state = NULL;
3655     PyObject *listitems = Py_None;
3656     PyObject *dictitems = Py_None;
3657     PickleState *st = _Pickle_GetGlobalState();
3658     Py_ssize_t size;
3659     int use_newobj = 0, use_newobj_ex = 0;
3660 
3661     const char reduce_op = REDUCE;
3662     const char build_op = BUILD;
3663     const char newobj_op = NEWOBJ;
3664     const char newobj_ex_op = NEWOBJ_EX;
3665 
3666     size = PyTuple_Size(args);
3667     if (size < 2 || size > 5) {
3668         PyErr_SetString(st->PicklingError, "tuple returned by "
3669                         "__reduce__ must contain 2 through 5 elements");
3670         return -1;
3671     }
3672 
3673     if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
3674                            &callable, &argtup, &state, &listitems, &dictitems))
3675         return -1;
3676 
3677     if (!PyCallable_Check(callable)) {
3678         PyErr_SetString(st->PicklingError, "first item of the tuple "
3679                         "returned by __reduce__ must be callable");
3680         return -1;
3681     }
3682     if (!PyTuple_Check(argtup)) {
3683         PyErr_SetString(st->PicklingError, "second item of the tuple "
3684                         "returned by __reduce__ must be a tuple");
3685         return -1;
3686     }
3687 
3688     if (state == Py_None)
3689         state = NULL;
3690 
3691     if (listitems == Py_None)
3692         listitems = NULL;
3693     else if (!PyIter_Check(listitems)) {
3694         PyErr_Format(st->PicklingError, "fourth element of the tuple "
3695                      "returned by __reduce__ must be an iterator, not %s",
3696                      Py_TYPE(listitems)->tp_name);
3697         return -1;
3698     }
3699 
3700     if (dictitems == Py_None)
3701         dictitems = NULL;
3702     else if (!PyIter_Check(dictitems)) {
3703         PyErr_Format(st->PicklingError, "fifth element of the tuple "
3704                      "returned by __reduce__ must be an iterator, not %s",
3705                      Py_TYPE(dictitems)->tp_name);
3706         return -1;
3707     }
3708 
3709     if (self->proto >= 2) {
3710         PyObject *name;
3711         _Py_IDENTIFIER(__name__);
3712 
3713         if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
3714             return -1;
3715         }
3716         if (name != NULL && PyUnicode_Check(name)) {
3717             _Py_IDENTIFIER(__newobj_ex__);
3718             use_newobj_ex = _PyUnicode_EqualToASCIIId(
3719                     name, &PyId___newobj_ex__);
3720             if (!use_newobj_ex) {
3721                 _Py_IDENTIFIER(__newobj__);
3722                 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
3723             }
3724         }
3725         Py_XDECREF(name);
3726     }
3727 
3728     if (use_newobj_ex) {
3729         PyObject *cls;
3730         PyObject *args;
3731         PyObject *kwargs;
3732 
3733         if (PyTuple_GET_SIZE(argtup) != 3) {
3734             PyErr_Format(st->PicklingError,
3735                          "length of the NEWOBJ_EX argument tuple must be "
3736                          "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
3737             return -1;
3738         }
3739 
3740         cls = PyTuple_GET_ITEM(argtup, 0);
3741         if (!PyType_Check(cls)) {
3742             PyErr_Format(st->PicklingError,
3743                          "first item from NEWOBJ_EX argument tuple must "
3744                          "be a class, not %.200s", Py_TYPE(cls)->tp_name);
3745             return -1;
3746         }
3747         args = PyTuple_GET_ITEM(argtup, 1);
3748         if (!PyTuple_Check(args)) {
3749             PyErr_Format(st->PicklingError,
3750                          "second item from NEWOBJ_EX argument tuple must "
3751                          "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
3752             return -1;
3753         }
3754         kwargs = PyTuple_GET_ITEM(argtup, 2);
3755         if (!PyDict_Check(kwargs)) {
3756             PyErr_Format(st->PicklingError,
3757                          "third item from NEWOBJ_EX argument tuple must "
3758                          "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
3759             return -1;
3760         }
3761 
3762         if (self->proto >= 4) {
3763             if (save(self, cls, 0) < 0 ||
3764                 save(self, args, 0) < 0 ||
3765                 save(self, kwargs, 0) < 0 ||
3766                 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
3767                 return -1;
3768             }
3769         }
3770         else {
3771             PyObject *newargs;
3772             PyObject *cls_new;
3773             Py_ssize_t i;
3774             _Py_IDENTIFIER(__new__);
3775 
3776             newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
3777             if (newargs == NULL)
3778                 return -1;
3779 
3780             cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
3781             if (cls_new == NULL) {
3782                 Py_DECREF(newargs);
3783                 return -1;
3784             }
3785             PyTuple_SET_ITEM(newargs, 0, cls_new);
3786             Py_INCREF(cls);
3787             PyTuple_SET_ITEM(newargs, 1, cls);
3788             for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
3789                 PyObject *item = PyTuple_GET_ITEM(args, i);
3790                 Py_INCREF(item);
3791                 PyTuple_SET_ITEM(newargs, i + 2, item);
3792             }
3793 
3794             callable = PyObject_Call(st->partial, newargs, kwargs);
3795             Py_DECREF(newargs);
3796             if (callable == NULL)
3797                 return -1;
3798 
3799             newargs = PyTuple_New(0);
3800             if (newargs == NULL) {
3801                 Py_DECREF(callable);
3802                 return -1;
3803             }
3804 
3805             if (save(self, callable, 0) < 0 ||
3806                 save(self, newargs, 0) < 0 ||
3807                 _Pickler_Write(self, &reduce_op, 1) < 0) {
3808                 Py_DECREF(newargs);
3809                 Py_DECREF(callable);
3810                 return -1;
3811             }
3812             Py_DECREF(newargs);
3813             Py_DECREF(callable);
3814         }
3815     }
3816     else if (use_newobj) {
3817         PyObject *cls;
3818         PyObject *newargtup;
3819         PyObject *obj_class;
3820         int p;
3821 
3822         /* Sanity checks. */
3823         if (PyTuple_GET_SIZE(argtup) < 1) {
3824             PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
3825             return -1;
3826         }
3827 
3828         cls = PyTuple_GET_ITEM(argtup, 0);
3829         if (!PyType_Check(cls)) {
3830             PyErr_SetString(st->PicklingError, "args[0] from "
3831                             "__newobj__ args is not a type");
3832             return -1;
3833         }
3834 
3835         if (obj != NULL) {
3836             obj_class = get_class(obj);
3837             if (obj_class == NULL) {
3838                 return -1;
3839             }
3840             p = obj_class != cls;
3841             Py_DECREF(obj_class);
3842             if (p) {
3843                 PyErr_SetString(st->PicklingError, "args[0] from "
3844                                 "__newobj__ args has the wrong class");
3845                 return -1;
3846             }
3847         }
3848         /* XXX: These calls save() are prone to infinite recursion. Imagine
3849            what happen if the value returned by the __reduce__() method of
3850            some extension type contains another object of the same type. Ouch!
3851 
3852            Here is a quick example, that I ran into, to illustrate what I
3853            mean:
3854 
3855              >>> import pickle, copyreg
3856              >>> copyreg.dispatch_table.pop(complex)
3857              >>> pickle.dumps(1+2j)
3858              Traceback (most recent call last):
3859                ...
3860              RecursionError: maximum recursion depth exceeded
3861 
3862            Removing the complex class from copyreg.dispatch_table made the
3863            __reduce_ex__() method emit another complex object:
3864 
3865              >>> (1+1j).__reduce_ex__(2)
3866              (<function __newobj__ at 0xb7b71c3c>,
3867                (<class 'complex'>, (1+1j)), None, None, None)
3868 
3869            Thus when save() was called on newargstup (the 2nd item) recursion
3870            ensued. Of course, the bug was in the complex class which had a
3871            broken __getnewargs__() that emitted another complex object. But,
3872            the point, here, is it is quite easy to end up with a broken reduce
3873            function. */
3874 
3875         /* Save the class and its __new__ arguments. */
3876         if (save(self, cls, 0) < 0)
3877             return -1;
3878 
3879         newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
3880         if (newargtup == NULL)
3881             return -1;
3882 
3883         p = save(self, newargtup, 0);
3884         Py_DECREF(newargtup);
3885         if (p < 0)
3886             return -1;
3887 
3888         /* Add NEWOBJ opcode. */
3889         if (_Pickler_Write(self, &newobj_op, 1) < 0)
3890             return -1;
3891     }
3892     else { /* Not using NEWOBJ. */
3893         if (save(self, callable, 0) < 0 ||
3894             save(self, argtup, 0) < 0 ||
3895             _Pickler_Write(self, &reduce_op, 1) < 0)
3896             return -1;
3897     }
3898 
3899     /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3900        the caller do not want to memoize the object. Not particularly useful,
3901        but that is to mimic the behavior save_reduce() in pickle.py when
3902        obj is None. */
3903     if (obj != NULL) {
3904         /* If the object is already in the memo, this means it is
3905            recursive. In this case, throw away everything we put on the
3906            stack, and fetch the object back from the memo. */
3907         if (PyMemoTable_Get(self->memo, obj)) {
3908             const char pop_op = POP;
3909 
3910             if (_Pickler_Write(self, &pop_op, 1) < 0)
3911                 return -1;
3912             if (memo_get(self, obj) < 0)
3913                 return -1;
3914 
3915             return 0;
3916         }
3917         else if (memo_put(self, obj) < 0)
3918             return -1;
3919     }
3920 
3921     if (listitems && batch_list(self, listitems) < 0)
3922         return -1;
3923 
3924     if (dictitems && batch_dict(self, dictitems) < 0)
3925         return -1;
3926 
3927     if (state) {
3928         if (save(self, state, 0) < 0 ||
3929             _Pickler_Write(self, &build_op, 1) < 0)
3930             return -1;
3931     }
3932 
3933     return 0;
3934 }
3935 
3936 static int
save(PicklerObject * self,PyObject * obj,int pers_save)3937 save(PicklerObject *self, PyObject *obj, int pers_save)
3938 {
3939     PyTypeObject *type;
3940     PyObject *reduce_func = NULL;
3941     PyObject *reduce_value = NULL;
3942     int status = 0;
3943 
3944     if (_Pickler_OpcodeBoundary(self) < 0)
3945         return -1;
3946 
3947     if (Py_EnterRecursiveCall(" while pickling an object"))
3948         return -1;
3949 
3950     /* The extra pers_save argument is necessary to avoid calling save_pers()
3951        on its returned object. */
3952     if (!pers_save && self->pers_func) {
3953         /* save_pers() returns:
3954             -1   to signal an error;
3955              0   if it did nothing successfully;
3956              1   if a persistent id was saved.
3957          */
3958         if ((status = save_pers(self, obj)) != 0)
3959             goto done;
3960     }
3961 
3962     type = Py_TYPE(obj);
3963 
3964     /* The old cPickle had an optimization that used switch-case statement
3965        dispatching on the first letter of the type name.  This has was removed
3966        since benchmarks shown that this optimization was actually slowing
3967        things down. */
3968 
3969     /* Atom types; these aren't memoized, so don't check the memo. */
3970 
3971     if (obj == Py_None) {
3972         status = save_none(self, obj);
3973         goto done;
3974     }
3975     else if (obj == Py_False || obj == Py_True) {
3976         status = save_bool(self, obj);
3977         goto done;
3978     }
3979     else if (type == &PyLong_Type) {
3980         status = save_long(self, obj);
3981         goto done;
3982     }
3983     else if (type == &PyFloat_Type) {
3984         status = save_float(self, obj);
3985         goto done;
3986     }
3987 
3988     /* Check the memo to see if it has the object. If so, generate
3989        a GET (or BINGET) opcode, instead of pickling the object
3990        once again. */
3991     if (PyMemoTable_Get(self->memo, obj)) {
3992         if (memo_get(self, obj) < 0)
3993             goto error;
3994         goto done;
3995     }
3996 
3997     if (type == &PyBytes_Type) {
3998         status = save_bytes(self, obj);
3999         goto done;
4000     }
4001     else if (type == &PyUnicode_Type) {
4002         status = save_unicode(self, obj);
4003         goto done;
4004     }
4005     else if (type == &PyDict_Type) {
4006         status = save_dict(self, obj);
4007         goto done;
4008     }
4009     else if (type == &PySet_Type) {
4010         status = save_set(self, obj);
4011         goto done;
4012     }
4013     else if (type == &PyFrozenSet_Type) {
4014         status = save_frozenset(self, obj);
4015         goto done;
4016     }
4017     else if (type == &PyList_Type) {
4018         status = save_list(self, obj);
4019         goto done;
4020     }
4021     else if (type == &PyTuple_Type) {
4022         status = save_tuple(self, obj);
4023         goto done;
4024     }
4025     else if (type == &PyType_Type) {
4026         status = save_type(self, obj);
4027         goto done;
4028     }
4029     else if (type == &PyFunction_Type) {
4030         status = save_global(self, obj, NULL);
4031         goto done;
4032     }
4033 
4034     /* XXX: This part needs some unit tests. */
4035 
4036     /* Get a reduction callable, and call it.  This may come from
4037      * self.dispatch_table, copyreg.dispatch_table, the object's
4038      * __reduce_ex__ method, or the object's __reduce__ method.
4039      */
4040     if (self->dispatch_table == NULL) {
4041         PickleState *st = _Pickle_GetGlobalState();
4042         reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4043                                               (PyObject *)type);
4044         if (reduce_func == NULL) {
4045             if (PyErr_Occurred()) {
4046                 goto error;
4047             }
4048         } else {
4049             /* PyDict_GetItemWithError() returns a borrowed reference.
4050                Increase the reference count to be consistent with
4051                PyObject_GetItem and _PyObject_GetAttrId used below. */
4052             Py_INCREF(reduce_func);
4053         }
4054     } else {
4055         reduce_func = PyObject_GetItem(self->dispatch_table,
4056                                        (PyObject *)type);
4057         if (reduce_func == NULL) {
4058             if (PyErr_ExceptionMatches(PyExc_KeyError))
4059                 PyErr_Clear();
4060             else
4061                 goto error;
4062         }
4063     }
4064     if (reduce_func != NULL) {
4065         Py_INCREF(obj);
4066         reduce_value = _Pickle_FastCall(reduce_func, obj);
4067     }
4068     else if (PyType_IsSubtype(type, &PyType_Type)) {
4069         status = save_global(self, obj, NULL);
4070         goto done;
4071     }
4072     else {
4073         _Py_IDENTIFIER(__reduce__);
4074         _Py_IDENTIFIER(__reduce_ex__);
4075 
4076 
4077         /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4078            automatically defined as __reduce__. While this is convenient, this
4079            make it impossible to know which method was actually called. Of
4080            course, this is not a big deal. But still, it would be nice to let
4081            the user know which method was called when something go
4082            wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4083            don't actually have to check for a __reduce__ method. */
4084 
4085         /* Check for a __reduce_ex__ method. */
4086         if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4087             goto error;
4088         }
4089         if (reduce_func != NULL) {
4090             PyObject *proto;
4091             proto = PyLong_FromLong(self->proto);
4092             if (proto != NULL) {
4093                 reduce_value = _Pickle_FastCall(reduce_func, proto);
4094             }
4095         }
4096         else {
4097             PickleState *st = _Pickle_GetGlobalState();
4098 
4099             /* Check for a __reduce__ method. */
4100             reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce__);
4101             if (reduce_func != NULL) {
4102                 reduce_value = _PyObject_CallNoArg(reduce_func);
4103             }
4104             else {
4105                 PyErr_Format(st->PicklingError,
4106                              "can't pickle '%.200s' object: %R",
4107                              type->tp_name, obj);
4108                 goto error;
4109             }
4110         }
4111     }
4112 
4113     if (reduce_value == NULL)
4114         goto error;
4115 
4116     if (PyUnicode_Check(reduce_value)) {
4117         status = save_global(self, obj, reduce_value);
4118         goto done;
4119     }
4120 
4121     if (!PyTuple_Check(reduce_value)) {
4122         PickleState *st = _Pickle_GetGlobalState();
4123         PyErr_SetString(st->PicklingError,
4124                         "__reduce__ must return a string or tuple");
4125         goto error;
4126     }
4127 
4128     status = save_reduce(self, reduce_value, obj);
4129 
4130     if (0) {
4131   error:
4132         status = -1;
4133     }
4134   done:
4135 
4136     Py_LeaveRecursiveCall();
4137     Py_XDECREF(reduce_func);
4138     Py_XDECREF(reduce_value);
4139 
4140     return status;
4141 }
4142 
4143 static int
dump(PicklerObject * self,PyObject * obj)4144 dump(PicklerObject *self, PyObject *obj)
4145 {
4146     const char stop_op = STOP;
4147 
4148     if (self->proto >= 2) {
4149         char header[2];
4150 
4151         header[0] = PROTO;
4152         assert(self->proto >= 0 && self->proto < 256);
4153         header[1] = (unsigned char)self->proto;
4154         if (_Pickler_Write(self, header, 2) < 0)
4155             return -1;
4156         if (self->proto >= 4)
4157             self->framing = 1;
4158     }
4159 
4160     if (save(self, obj, 0) < 0 ||
4161         _Pickler_Write(self, &stop_op, 1) < 0 ||
4162         _Pickler_CommitFrame(self) < 0)
4163         return -1;
4164     self->framing = 0;
4165     return 0;
4166 }
4167 
4168 /*[clinic input]
4169 
4170 _pickle.Pickler.clear_memo
4171 
4172 Clears the pickler's "memo".
4173 
4174 The memo is the data structure that remembers which objects the
4175 pickler has already seen, so that shared or recursive objects are
4176 pickled by reference and not by value.  This method is useful when
4177 re-using picklers.
4178 [clinic start generated code]*/
4179 
4180 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4181 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4182 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4183 {
4184     if (self->memo)
4185         PyMemoTable_Clear(self->memo);
4186 
4187     Py_RETURN_NONE;
4188 }
4189 
4190 /*[clinic input]
4191 
4192 _pickle.Pickler.dump
4193 
4194   obj: object
4195   /
4196 
4197 Write a pickled representation of the given object to the open file.
4198 [clinic start generated code]*/
4199 
4200 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4201 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4202 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4203 {
4204     /* Check whether the Pickler was initialized correctly (issue3664).
4205        Developers often forget to call __init__() in their subclasses, which
4206        would trigger a segfault without this check. */
4207     if (self->write == NULL) {
4208         PickleState *st = _Pickle_GetGlobalState();
4209         PyErr_Format(st->PicklingError,
4210                      "Pickler.__init__() was not called by %s.__init__()",
4211                      Py_TYPE(self)->tp_name);
4212         return NULL;
4213     }
4214 
4215     if (_Pickler_ClearBuffer(self) < 0)
4216         return NULL;
4217 
4218     if (dump(self, obj) < 0)
4219         return NULL;
4220 
4221     if (_Pickler_FlushToFile(self) < 0)
4222         return NULL;
4223 
4224     Py_RETURN_NONE;
4225 }
4226 
4227 /*[clinic input]
4228 
4229 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4230 
4231 Returns size in memory, in bytes.
4232 [clinic start generated code]*/
4233 
4234 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4235 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4236 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4237 {
4238     Py_ssize_t res, s;
4239 
4240     res = _PyObject_SIZE(Py_TYPE(self));
4241     if (self->memo != NULL) {
4242         res += sizeof(PyMemoTable);
4243         res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4244     }
4245     if (self->output_buffer != NULL) {
4246         s = _PySys_GetSizeOf(self->output_buffer);
4247         if (s == -1)
4248             return -1;
4249         res += s;
4250     }
4251     return res;
4252 }
4253 
4254 static struct PyMethodDef Pickler_methods[] = {
4255     _PICKLE_PICKLER_DUMP_METHODDEF
4256     _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4257     _PICKLE_PICKLER___SIZEOF___METHODDEF
4258     {NULL, NULL}                /* sentinel */
4259 };
4260 
4261 static void
Pickler_dealloc(PicklerObject * self)4262 Pickler_dealloc(PicklerObject *self)
4263 {
4264     PyObject_GC_UnTrack(self);
4265 
4266     Py_XDECREF(self->output_buffer);
4267     Py_XDECREF(self->write);
4268     Py_XDECREF(self->pers_func);
4269     Py_XDECREF(self->dispatch_table);
4270     Py_XDECREF(self->fast_memo);
4271 
4272     PyMemoTable_Del(self->memo);
4273 
4274     Py_TYPE(self)->tp_free((PyObject *)self);
4275 }
4276 
4277 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4278 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4279 {
4280     Py_VISIT(self->write);
4281     Py_VISIT(self->pers_func);
4282     Py_VISIT(self->dispatch_table);
4283     Py_VISIT(self->fast_memo);
4284     return 0;
4285 }
4286 
4287 static int
Pickler_clear(PicklerObject * self)4288 Pickler_clear(PicklerObject *self)
4289 {
4290     Py_CLEAR(self->output_buffer);
4291     Py_CLEAR(self->write);
4292     Py_CLEAR(self->pers_func);
4293     Py_CLEAR(self->dispatch_table);
4294     Py_CLEAR(self->fast_memo);
4295 
4296     if (self->memo != NULL) {
4297         PyMemoTable *memo = self->memo;
4298         self->memo = NULL;
4299         PyMemoTable_Del(memo);
4300     }
4301     return 0;
4302 }
4303 
4304 
4305 /*[clinic input]
4306 
4307 _pickle.Pickler.__init__
4308 
4309   file: object
4310   protocol: object = NULL
4311   fix_imports: bool = True
4312 
4313 This takes a binary file for writing a pickle data stream.
4314 
4315 The optional *protocol* argument tells the pickler to use the given
4316 protocol; supported protocols are 0, 1, 2, 3 and 4.  The default
4317 protocol is 3; a backward-incompatible protocol designed for Python 3.
4318 
4319 Specifying a negative protocol version selects the highest protocol
4320 version supported.  The higher the protocol used, the more recent the
4321 version of Python needed to read the pickle produced.
4322 
4323 The *file* argument must have a write() method that accepts a single
4324 bytes argument. It can thus be a file object opened for binary
4325 writing, an io.BytesIO instance, or any other custom object that meets
4326 this interface.
4327 
4328 If *fix_imports* is True and protocol is less than 3, pickle will try
4329 to map the new Python 3 names to the old module names used in Python
4330 2, so that the pickle data stream is readable with Python 2.
4331 [clinic start generated code]*/
4332 
4333 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports)4334 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4335                               PyObject *protocol, int fix_imports)
4336 /*[clinic end generated code: output=b5f31078dab17fb0 input=4faabdbc763c2389]*/
4337 {
4338     _Py_IDENTIFIER(persistent_id);
4339     _Py_IDENTIFIER(dispatch_table);
4340 
4341     /* In case of multiple __init__() calls, clear previous content. */
4342     if (self->write != NULL)
4343         (void)Pickler_clear(self);
4344 
4345     if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4346         return -1;
4347 
4348     if (_Pickler_SetOutputStream(self, file) < 0)
4349         return -1;
4350 
4351     /* memo and output_buffer may have already been created in _Pickler_New */
4352     if (self->memo == NULL) {
4353         self->memo = PyMemoTable_New();
4354         if (self->memo == NULL)
4355             return -1;
4356     }
4357     self->output_len = 0;
4358     if (self->output_buffer == NULL) {
4359         self->max_output_len = WRITE_BUF_SIZE;
4360         self->output_buffer = PyBytes_FromStringAndSize(NULL,
4361                                                         self->max_output_len);
4362         if (self->output_buffer == NULL)
4363             return -1;
4364     }
4365 
4366     self->fast = 0;
4367     self->fast_nesting = 0;
4368     self->fast_memo = NULL;
4369 
4370     if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4371                         &self->pers_func, &self->pers_func_self) < 0)
4372     {
4373         return -1;
4374     }
4375 
4376     if (_PyObject_LookupAttrId((PyObject *)self,
4377                                     &PyId_dispatch_table, &self->dispatch_table) < 0) {
4378         return -1;
4379     }
4380 
4381     return 0;
4382 }
4383 
4384 
4385 /* Define a proxy object for the Pickler's internal memo object. This is to
4386  * avoid breaking code like:
4387  *  pickler.memo.clear()
4388  * and
4389  *  pickler.memo = saved_memo
4390  * Is this a good idea? Not really, but we don't want to break code that uses
4391  * it. Note that we don't implement the entire mapping API here. This is
4392  * intentional, as these should be treated as black-box implementation details.
4393  */
4394 
4395 /*[clinic input]
4396 _pickle.PicklerMemoProxy.clear
4397 
4398 Remove all items from memo.
4399 [clinic start generated code]*/
4400 
4401 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4402 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4403 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4404 {
4405     if (self->pickler->memo)
4406         PyMemoTable_Clear(self->pickler->memo);
4407     Py_RETURN_NONE;
4408 }
4409 
4410 /*[clinic input]
4411 _pickle.PicklerMemoProxy.copy
4412 
4413 Copy the memo to a new object.
4414 [clinic start generated code]*/
4415 
4416 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4417 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4418 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4419 {
4420     PyMemoTable *memo;
4421     PyObject *new_memo = PyDict_New();
4422     if (new_memo == NULL)
4423         return NULL;
4424 
4425     memo = self->pickler->memo;
4426     for (size_t i = 0; i < memo->mt_allocated; ++i) {
4427         PyMemoEntry entry = memo->mt_table[i];
4428         if (entry.me_key != NULL) {
4429             int status;
4430             PyObject *key, *value;
4431 
4432             key = PyLong_FromVoidPtr(entry.me_key);
4433             value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4434 
4435             if (key == NULL || value == NULL) {
4436                 Py_XDECREF(key);
4437                 Py_XDECREF(value);
4438                 goto error;
4439             }
4440             status = PyDict_SetItem(new_memo, key, value);
4441             Py_DECREF(key);
4442             Py_DECREF(value);
4443             if (status < 0)
4444                 goto error;
4445         }
4446     }
4447     return new_memo;
4448 
4449   error:
4450     Py_XDECREF(new_memo);
4451     return NULL;
4452 }
4453 
4454 /*[clinic input]
4455 _pickle.PicklerMemoProxy.__reduce__
4456 
4457 Implement pickle support.
4458 [clinic start generated code]*/
4459 
4460 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4461 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4462 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4463 {
4464     PyObject *reduce_value, *dict_args;
4465     PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4466     if (contents == NULL)
4467         return NULL;
4468 
4469     reduce_value = PyTuple_New(2);
4470     if (reduce_value == NULL) {
4471         Py_DECREF(contents);
4472         return NULL;
4473     }
4474     dict_args = PyTuple_New(1);
4475     if (dict_args == NULL) {
4476         Py_DECREF(contents);
4477         Py_DECREF(reduce_value);
4478         return NULL;
4479     }
4480     PyTuple_SET_ITEM(dict_args, 0, contents);
4481     Py_INCREF((PyObject *)&PyDict_Type);
4482     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4483     PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4484     return reduce_value;
4485 }
4486 
4487 static PyMethodDef picklerproxy_methods[] = {
4488     _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4489     _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4490     _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4491     {NULL, NULL} /* sentinel */
4492 };
4493 
4494 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4495 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4496 {
4497     PyObject_GC_UnTrack(self);
4498     Py_XDECREF(self->pickler);
4499     PyObject_GC_Del((PyObject *)self);
4500 }
4501 
4502 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4503 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4504                           visitproc visit, void *arg)
4505 {
4506     Py_VISIT(self->pickler);
4507     return 0;
4508 }
4509 
4510 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4511 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4512 {
4513     Py_CLEAR(self->pickler);
4514     return 0;
4515 }
4516 
4517 static PyTypeObject PicklerMemoProxyType = {
4518     PyVarObject_HEAD_INIT(NULL, 0)
4519     "_pickle.PicklerMemoProxy",                 /*tp_name*/
4520     sizeof(PicklerMemoProxyObject),             /*tp_basicsize*/
4521     0,
4522     (destructor)PicklerMemoProxy_dealloc,       /* tp_dealloc */
4523     0,                                          /* tp_print */
4524     0,                                          /* tp_getattr */
4525     0,                                          /* tp_setattr */
4526     0,                                          /* tp_compare */
4527     0,                                          /* tp_repr */
4528     0,                                          /* tp_as_number */
4529     0,                                          /* tp_as_sequence */
4530     0,                                          /* tp_as_mapping */
4531     PyObject_HashNotImplemented,                /* tp_hash */
4532     0,                                          /* tp_call */
4533     0,                                          /* tp_str */
4534     PyObject_GenericGetAttr,                    /* tp_getattro */
4535     PyObject_GenericSetAttr,                    /* tp_setattro */
4536     0,                                          /* tp_as_buffer */
4537     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4538     0,                                          /* tp_doc */
4539     (traverseproc)PicklerMemoProxy_traverse,    /* tp_traverse */
4540     (inquiry)PicklerMemoProxy_clear,            /* tp_clear */
4541     0,                                          /* tp_richcompare */
4542     0,                                          /* tp_weaklistoffset */
4543     0,                                          /* tp_iter */
4544     0,                                          /* tp_iternext */
4545     picklerproxy_methods,                       /* tp_methods */
4546 };
4547 
4548 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4549 PicklerMemoProxy_New(PicklerObject *pickler)
4550 {
4551     PicklerMemoProxyObject *self;
4552 
4553     self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4554     if (self == NULL)
4555         return NULL;
4556     Py_INCREF(pickler);
4557     self->pickler = pickler;
4558     PyObject_GC_Track(self);
4559     return (PyObject *)self;
4560 }
4561 
4562 /*****************************************************************************/
4563 
4564 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4565 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4566 {
4567     return PicklerMemoProxy_New(self);
4568 }
4569 
4570 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4571 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4572 {
4573     PyMemoTable *new_memo = NULL;
4574 
4575     if (obj == NULL) {
4576         PyErr_SetString(PyExc_TypeError,
4577                         "attribute deletion is not supported");
4578         return -1;
4579     }
4580 
4581     if (Py_TYPE(obj) == &PicklerMemoProxyType) {
4582         PicklerObject *pickler =
4583             ((PicklerMemoProxyObject *)obj)->pickler;
4584 
4585         new_memo = PyMemoTable_Copy(pickler->memo);
4586         if (new_memo == NULL)
4587             return -1;
4588     }
4589     else if (PyDict_Check(obj)) {
4590         Py_ssize_t i = 0;
4591         PyObject *key, *value;
4592 
4593         new_memo = PyMemoTable_New();
4594         if (new_memo == NULL)
4595             return -1;
4596 
4597         while (PyDict_Next(obj, &i, &key, &value)) {
4598             Py_ssize_t memo_id;
4599             PyObject *memo_obj;
4600 
4601             if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
4602                 PyErr_SetString(PyExc_TypeError,
4603                                 "'memo' values must be 2-item tuples");
4604                 goto error;
4605             }
4606             memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
4607             if (memo_id == -1 && PyErr_Occurred())
4608                 goto error;
4609             memo_obj = PyTuple_GET_ITEM(value, 1);
4610             if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
4611                 goto error;
4612         }
4613     }
4614     else {
4615         PyErr_Format(PyExc_TypeError,
4616                      "'memo' attribute must be a PicklerMemoProxy object "
4617                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
4618         return -1;
4619     }
4620 
4621     PyMemoTable_Del(self->memo);
4622     self->memo = new_memo;
4623 
4624     return 0;
4625 
4626   error:
4627     if (new_memo)
4628         PyMemoTable_Del(new_memo);
4629     return -1;
4630 }
4631 
4632 static PyObject *
Pickler_get_persid(PicklerObject * self,void * Py_UNUSED (ignored))4633 Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
4634 {
4635     if (self->pers_func == NULL) {
4636         PyErr_SetString(PyExc_AttributeError, "persistent_id");
4637         return NULL;
4638     }
4639     return reconstruct_method(self->pers_func, self->pers_func_self);
4640 }
4641 
4642 static int
Pickler_set_persid(PicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))4643 Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
4644 {
4645     if (value == NULL) {
4646         PyErr_SetString(PyExc_TypeError,
4647                         "attribute deletion is not supported");
4648         return -1;
4649     }
4650     if (!PyCallable_Check(value)) {
4651         PyErr_SetString(PyExc_TypeError,
4652                         "persistent_id must be a callable taking one argument");
4653         return -1;
4654     }
4655 
4656     self->pers_func_self = NULL;
4657     Py_INCREF(value);
4658     Py_XSETREF(self->pers_func, value);
4659 
4660     return 0;
4661 }
4662 
4663 static PyMemberDef Pickler_members[] = {
4664     {"bin", T_INT, offsetof(PicklerObject, bin)},
4665     {"fast", T_INT, offsetof(PicklerObject, fast)},
4666     {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
4667     {NULL}
4668 };
4669 
4670 static PyGetSetDef Pickler_getsets[] = {
4671     {"memo",          (getter)Pickler_get_memo,
4672                       (setter)Pickler_set_memo},
4673     {"persistent_id", (getter)Pickler_get_persid,
4674                       (setter)Pickler_set_persid},
4675     {NULL}
4676 };
4677 
4678 static PyTypeObject Pickler_Type = {
4679     PyVarObject_HEAD_INIT(NULL, 0)
4680     "_pickle.Pickler"  ,                /*tp_name*/
4681     sizeof(PicklerObject),              /*tp_basicsize*/
4682     0,                                  /*tp_itemsize*/
4683     (destructor)Pickler_dealloc,        /*tp_dealloc*/
4684     0,                                  /*tp_print*/
4685     0,                                  /*tp_getattr*/
4686     0,                                  /*tp_setattr*/
4687     0,                                  /*tp_reserved*/
4688     0,                                  /*tp_repr*/
4689     0,                                  /*tp_as_number*/
4690     0,                                  /*tp_as_sequence*/
4691     0,                                  /*tp_as_mapping*/
4692     0,                                  /*tp_hash*/
4693     0,                                  /*tp_call*/
4694     0,                                  /*tp_str*/
4695     0,                                  /*tp_getattro*/
4696     0,                                  /*tp_setattro*/
4697     0,                                  /*tp_as_buffer*/
4698     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4699     _pickle_Pickler___init____doc__,    /*tp_doc*/
4700     (traverseproc)Pickler_traverse,     /*tp_traverse*/
4701     (inquiry)Pickler_clear,             /*tp_clear*/
4702     0,                                  /*tp_richcompare*/
4703     0,                                  /*tp_weaklistoffset*/
4704     0,                                  /*tp_iter*/
4705     0,                                  /*tp_iternext*/
4706     Pickler_methods,                    /*tp_methods*/
4707     Pickler_members,                    /*tp_members*/
4708     Pickler_getsets,                    /*tp_getset*/
4709     0,                                  /*tp_base*/
4710     0,                                  /*tp_dict*/
4711     0,                                  /*tp_descr_get*/
4712     0,                                  /*tp_descr_set*/
4713     0,                                  /*tp_dictoffset*/
4714     _pickle_Pickler___init__,           /*tp_init*/
4715     PyType_GenericAlloc,                /*tp_alloc*/
4716     PyType_GenericNew,                  /*tp_new*/
4717     PyObject_GC_Del,                    /*tp_free*/
4718     0,                                  /*tp_is_gc*/
4719 };
4720 
4721 /* Temporary helper for calling self.find_class().
4722 
4723    XXX: It would be nice to able to avoid Python function call overhead, by
4724    using directly the C version of find_class(), when find_class() is not
4725    overridden by a subclass. Although, this could become rather hackish. A
4726    simpler optimization would be to call the C function when self is not a
4727    subclass instance. */
4728 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)4729 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
4730 {
4731     _Py_IDENTIFIER(find_class);
4732 
4733     return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
4734                                          module_name, global_name, NULL);
4735 }
4736 
4737 static Py_ssize_t
marker(UnpicklerObject * self)4738 marker(UnpicklerObject *self)
4739 {
4740     Py_ssize_t mark;
4741 
4742     if (self->num_marks < 1) {
4743         PickleState *st = _Pickle_GetGlobalState();
4744         PyErr_SetString(st->UnpicklingError, "could not find MARK");
4745         return -1;
4746     }
4747 
4748     mark = self->marks[--self->num_marks];
4749     self->stack->mark_set = self->num_marks != 0;
4750     self->stack->fence = self->num_marks ?
4751             self->marks[self->num_marks - 1] : 0;
4752     return mark;
4753 }
4754 
4755 static int
load_none(UnpicklerObject * self)4756 load_none(UnpicklerObject *self)
4757 {
4758     PDATA_APPEND(self->stack, Py_None, -1);
4759     return 0;
4760 }
4761 
4762 static int
load_int(UnpicklerObject * self)4763 load_int(UnpicklerObject *self)
4764 {
4765     PyObject *value;
4766     char *endptr, *s;
4767     Py_ssize_t len;
4768     long x;
4769 
4770     if ((len = _Unpickler_Readline(self, &s)) < 0)
4771         return -1;
4772     if (len < 2)
4773         return bad_readline();
4774 
4775     errno = 0;
4776     /* XXX: Should the base argument of strtol() be explicitly set to 10?
4777        XXX(avassalotti): Should this uses PyOS_strtol()? */
4778     x = strtol(s, &endptr, 0);
4779 
4780     if (errno || (*endptr != '\n' && *endptr != '\0')) {
4781         /* Hm, maybe we've got something long.  Let's try reading
4782          * it as a Python int object. */
4783         errno = 0;
4784         /* XXX: Same thing about the base here. */
4785         value = PyLong_FromString(s, NULL, 0);
4786         if (value == NULL) {
4787             PyErr_SetString(PyExc_ValueError,
4788                             "could not convert string to int");
4789             return -1;
4790         }
4791     }
4792     else {
4793         if (len == 3 && (x == 0 || x == 1)) {
4794             if ((value = PyBool_FromLong(x)) == NULL)
4795                 return -1;
4796         }
4797         else {
4798             if ((value = PyLong_FromLong(x)) == NULL)
4799                 return -1;
4800         }
4801     }
4802 
4803     PDATA_PUSH(self->stack, value, -1);
4804     return 0;
4805 }
4806 
4807 static int
load_bool(UnpicklerObject * self,PyObject * boolean)4808 load_bool(UnpicklerObject *self, PyObject *boolean)
4809 {
4810     assert(boolean == Py_True || boolean == Py_False);
4811     PDATA_APPEND(self->stack, boolean, -1);
4812     return 0;
4813 }
4814 
4815 /* s contains x bytes of an unsigned little-endian integer.  Return its value
4816  * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
4817  */
4818 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)4819 calc_binsize(char *bytes, int nbytes)
4820 {
4821     unsigned char *s = (unsigned char *)bytes;
4822     int i;
4823     size_t x = 0;
4824 
4825     if (nbytes > (int)sizeof(size_t)) {
4826         /* Check for integer overflow.  BINBYTES8 and BINUNICODE8 opcodes
4827          * have 64-bit size that can't be represented on 32-bit platform.
4828          */
4829         for (i = (int)sizeof(size_t); i < nbytes; i++) {
4830             if (s[i])
4831                 return -1;
4832         }
4833         nbytes = (int)sizeof(size_t);
4834     }
4835     for (i = 0; i < nbytes; i++) {
4836         x |= (size_t) s[i] << (8 * i);
4837     }
4838 
4839     if (x > PY_SSIZE_T_MAX)
4840         return -1;
4841     else
4842         return (Py_ssize_t) x;
4843 }
4844 
4845 /* s contains x bytes of a little-endian integer.  Return its value as a
4846  * C int.  Obscure:  when x is 1 or 2, this is an unsigned little-endian
4847  * int, but when x is 4 it's a signed one.  This is a historical source
4848  * of x-platform bugs.
4849  */
4850 static long
calc_binint(char * bytes,int nbytes)4851 calc_binint(char *bytes, int nbytes)
4852 {
4853     unsigned char *s = (unsigned char *)bytes;
4854     Py_ssize_t i;
4855     long x = 0;
4856 
4857     for (i = 0; i < nbytes; i++) {
4858         x |= (long)s[i] << (8 * i);
4859     }
4860 
4861     /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
4862      * is signed, so on a box with longs bigger than 4 bytes we need
4863      * to extend a BININT's sign bit to the full width.
4864      */
4865     if (SIZEOF_LONG > 4 && nbytes == 4) {
4866         x |= -(x & (1L << 31));
4867     }
4868 
4869     return x;
4870 }
4871 
4872 static int
load_binintx(UnpicklerObject * self,char * s,int size)4873 load_binintx(UnpicklerObject *self, char *s, int size)
4874 {
4875     PyObject *value;
4876     long x;
4877 
4878     x = calc_binint(s, size);
4879 
4880     if ((value = PyLong_FromLong(x)) == NULL)
4881         return -1;
4882 
4883     PDATA_PUSH(self->stack, value, -1);
4884     return 0;
4885 }
4886 
4887 static int
load_binint(UnpicklerObject * self)4888 load_binint(UnpicklerObject *self)
4889 {
4890     char *s;
4891 
4892     if (_Unpickler_Read(self, &s, 4) < 0)
4893         return -1;
4894 
4895     return load_binintx(self, s, 4);
4896 }
4897 
4898 static int
load_binint1(UnpicklerObject * self)4899 load_binint1(UnpicklerObject *self)
4900 {
4901     char *s;
4902 
4903     if (_Unpickler_Read(self, &s, 1) < 0)
4904         return -1;
4905 
4906     return load_binintx(self, s, 1);
4907 }
4908 
4909 static int
load_binint2(UnpicklerObject * self)4910 load_binint2(UnpicklerObject *self)
4911 {
4912     char *s;
4913 
4914     if (_Unpickler_Read(self, &s, 2) < 0)
4915         return -1;
4916 
4917     return load_binintx(self, s, 2);
4918 }
4919 
4920 static int
load_long(UnpicklerObject * self)4921 load_long(UnpicklerObject *self)
4922 {
4923     PyObject *value;
4924     char *s = NULL;
4925     Py_ssize_t len;
4926 
4927     if ((len = _Unpickler_Readline(self, &s)) < 0)
4928         return -1;
4929     if (len < 2)
4930         return bad_readline();
4931 
4932     /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4933        the 'L' before calling PyLong_FromString.  In order to maintain
4934        compatibility with Python 3.0.0, we don't actually *require*
4935        the 'L' to be present. */
4936     if (s[len-2] == 'L')
4937         s[len-2] = '\0';
4938     /* XXX: Should the base argument explicitly set to 10? */
4939     value = PyLong_FromString(s, NULL, 0);
4940     if (value == NULL)
4941         return -1;
4942 
4943     PDATA_PUSH(self->stack, value, -1);
4944     return 0;
4945 }
4946 
4947 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
4948  * data following.
4949  */
4950 static int
load_counted_long(UnpicklerObject * self,int size)4951 load_counted_long(UnpicklerObject *self, int size)
4952 {
4953     PyObject *value;
4954     char *nbytes;
4955     char *pdata;
4956 
4957     assert(size == 1 || size == 4);
4958     if (_Unpickler_Read(self, &nbytes, size) < 0)
4959         return -1;
4960 
4961     size = calc_binint(nbytes, size);
4962     if (size < 0) {
4963         PickleState *st = _Pickle_GetGlobalState();
4964         /* Corrupt or hostile pickle -- we never write one like this */
4965         PyErr_SetString(st->UnpicklingError,
4966                         "LONG pickle has negative byte count");
4967         return -1;
4968     }
4969 
4970     if (size == 0)
4971         value = PyLong_FromLong(0L);
4972     else {
4973         /* Read the raw little-endian bytes and convert. */
4974         if (_Unpickler_Read(self, &pdata, size) < 0)
4975             return -1;
4976         value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4977                                       1 /* little endian */ , 1 /* signed */ );
4978     }
4979     if (value == NULL)
4980         return -1;
4981     PDATA_PUSH(self->stack, value, -1);
4982     return 0;
4983 }
4984 
4985 static int
load_float(UnpicklerObject * self)4986 load_float(UnpicklerObject *self)
4987 {
4988     PyObject *value;
4989     char *endptr, *s;
4990     Py_ssize_t len;
4991     double d;
4992 
4993     if ((len = _Unpickler_Readline(self, &s)) < 0)
4994         return -1;
4995     if (len < 2)
4996         return bad_readline();
4997 
4998     errno = 0;
4999     d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5000     if (d == -1.0 && PyErr_Occurred())
5001         return -1;
5002     if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5003         PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5004         return -1;
5005     }
5006     value = PyFloat_FromDouble(d);
5007     if (value == NULL)
5008         return -1;
5009 
5010     PDATA_PUSH(self->stack, value, -1);
5011     return 0;
5012 }
5013 
5014 static int
load_binfloat(UnpicklerObject * self)5015 load_binfloat(UnpicklerObject *self)
5016 {
5017     PyObject *value;
5018     double x;
5019     char *s;
5020 
5021     if (_Unpickler_Read(self, &s, 8) < 0)
5022         return -1;
5023 
5024     x = _PyFloat_Unpack8((unsigned char *)s, 0);
5025     if (x == -1.0 && PyErr_Occurred())
5026         return -1;
5027 
5028     if ((value = PyFloat_FromDouble(x)) == NULL)
5029         return -1;
5030 
5031     PDATA_PUSH(self->stack, value, -1);
5032     return 0;
5033 }
5034 
5035 static int
load_string(UnpicklerObject * self)5036 load_string(UnpicklerObject *self)
5037 {
5038     PyObject *bytes;
5039     PyObject *obj;
5040     Py_ssize_t len;
5041     char *s, *p;
5042 
5043     if ((len = _Unpickler_Readline(self, &s)) < 0)
5044         return -1;
5045     /* Strip the newline */
5046     len--;
5047     /* Strip outermost quotes */
5048     if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5049         p = s + 1;
5050         len -= 2;
5051     }
5052     else {
5053         PickleState *st = _Pickle_GetGlobalState();
5054         PyErr_SetString(st->UnpicklingError,
5055                         "the STRING opcode argument must be quoted");
5056         return -1;
5057     }
5058     assert(len >= 0);
5059 
5060     /* Use the PyBytes API to decode the string, since that is what is used
5061        to encode, and then coerce the result to Unicode. */
5062     bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5063     if (bytes == NULL)
5064         return -1;
5065 
5066     /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5067        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5068     if (strcmp(self->encoding, "bytes") == 0) {
5069         obj = bytes;
5070     }
5071     else {
5072         obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5073         Py_DECREF(bytes);
5074         if (obj == NULL) {
5075             return -1;
5076         }
5077     }
5078 
5079     PDATA_PUSH(self->stack, obj, -1);
5080     return 0;
5081 }
5082 
5083 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)5084 load_counted_binstring(UnpicklerObject *self, int nbytes)
5085 {
5086     PyObject *obj;
5087     Py_ssize_t size;
5088     char *s;
5089 
5090     if (_Unpickler_Read(self, &s, nbytes) < 0)
5091         return -1;
5092 
5093     size = calc_binsize(s, nbytes);
5094     if (size < 0) {
5095         PickleState *st = _Pickle_GetGlobalState();
5096         PyErr_Format(st->UnpicklingError,
5097                      "BINSTRING exceeds system's maximum size of %zd bytes",
5098                      PY_SSIZE_T_MAX);
5099         return -1;
5100     }
5101 
5102     if (_Unpickler_Read(self, &s, size) < 0)
5103         return -1;
5104 
5105     /* Convert Python 2.x strings to bytes if the *encoding* given to the
5106        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5107     if (strcmp(self->encoding, "bytes") == 0) {
5108         obj = PyBytes_FromStringAndSize(s, size);
5109     }
5110     else {
5111         obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5112     }
5113     if (obj == NULL) {
5114         return -1;
5115     }
5116 
5117     PDATA_PUSH(self->stack, obj, -1);
5118     return 0;
5119 }
5120 
5121 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)5122 load_counted_binbytes(UnpicklerObject *self, int nbytes)
5123 {
5124     PyObject *bytes;
5125     Py_ssize_t size;
5126     char *s;
5127 
5128     if (_Unpickler_Read(self, &s, nbytes) < 0)
5129         return -1;
5130 
5131     size = calc_binsize(s, nbytes);
5132     if (size < 0) {
5133         PyErr_Format(PyExc_OverflowError,
5134                      "BINBYTES exceeds system's maximum size of %zd bytes",
5135                      PY_SSIZE_T_MAX);
5136         return -1;
5137     }
5138 
5139     if (_Unpickler_Read(self, &s, size) < 0)
5140         return -1;
5141 
5142     bytes = PyBytes_FromStringAndSize(s, size);
5143     if (bytes == NULL)
5144         return -1;
5145 
5146     PDATA_PUSH(self->stack, bytes, -1);
5147     return 0;
5148 }
5149 
5150 static int
load_unicode(UnpicklerObject * self)5151 load_unicode(UnpicklerObject *self)
5152 {
5153     PyObject *str;
5154     Py_ssize_t len;
5155     char *s = NULL;
5156 
5157     if ((len = _Unpickler_Readline(self, &s)) < 0)
5158         return -1;
5159     if (len < 1)
5160         return bad_readline();
5161 
5162     str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5163     if (str == NULL)
5164         return -1;
5165 
5166     PDATA_PUSH(self->stack, str, -1);
5167     return 0;
5168 }
5169 
5170 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5171 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5172 {
5173     PyObject *str;
5174     Py_ssize_t size;
5175     char *s;
5176 
5177     if (_Unpickler_Read(self, &s, nbytes) < 0)
5178         return -1;
5179 
5180     size = calc_binsize(s, nbytes);
5181     if (size < 0) {
5182         PyErr_Format(PyExc_OverflowError,
5183                      "BINUNICODE exceeds system's maximum size of %zd bytes",
5184                      PY_SSIZE_T_MAX);
5185         return -1;
5186     }
5187 
5188     if (_Unpickler_Read(self, &s, size) < 0)
5189         return -1;
5190 
5191     str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5192     if (str == NULL)
5193         return -1;
5194 
5195     PDATA_PUSH(self->stack, str, -1);
5196     return 0;
5197 }
5198 
5199 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5200 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5201 {
5202     PyObject *tuple;
5203 
5204     if (Py_SIZE(self->stack) < len)
5205         return Pdata_stack_underflow(self->stack);
5206 
5207     tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5208     if (tuple == NULL)
5209         return -1;
5210     PDATA_PUSH(self->stack, tuple, -1);
5211     return 0;
5212 }
5213 
5214 static int
load_tuple(UnpicklerObject * self)5215 load_tuple(UnpicklerObject *self)
5216 {
5217     Py_ssize_t i;
5218 
5219     if ((i = marker(self)) < 0)
5220         return -1;
5221 
5222     return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5223 }
5224 
5225 static int
load_empty_list(UnpicklerObject * self)5226 load_empty_list(UnpicklerObject *self)
5227 {
5228     PyObject *list;
5229 
5230     if ((list = PyList_New(0)) == NULL)
5231         return -1;
5232     PDATA_PUSH(self->stack, list, -1);
5233     return 0;
5234 }
5235 
5236 static int
load_empty_dict(UnpicklerObject * self)5237 load_empty_dict(UnpicklerObject *self)
5238 {
5239     PyObject *dict;
5240 
5241     if ((dict = PyDict_New()) == NULL)
5242         return -1;
5243     PDATA_PUSH(self->stack, dict, -1);
5244     return 0;
5245 }
5246 
5247 static int
load_empty_set(UnpicklerObject * self)5248 load_empty_set(UnpicklerObject *self)
5249 {
5250     PyObject *set;
5251 
5252     if ((set = PySet_New(NULL)) == NULL)
5253         return -1;
5254     PDATA_PUSH(self->stack, set, -1);
5255     return 0;
5256 }
5257 
5258 static int
load_list(UnpicklerObject * self)5259 load_list(UnpicklerObject *self)
5260 {
5261     PyObject *list;
5262     Py_ssize_t i;
5263 
5264     if ((i = marker(self)) < 0)
5265         return -1;
5266 
5267     list = Pdata_poplist(self->stack, i);
5268     if (list == NULL)
5269         return -1;
5270     PDATA_PUSH(self->stack, list, -1);
5271     return 0;
5272 }
5273 
5274 static int
load_dict(UnpicklerObject * self)5275 load_dict(UnpicklerObject *self)
5276 {
5277     PyObject *dict, *key, *value;
5278     Py_ssize_t i, j, k;
5279 
5280     if ((i = marker(self)) < 0)
5281         return -1;
5282     j = Py_SIZE(self->stack);
5283 
5284     if ((dict = PyDict_New()) == NULL)
5285         return -1;
5286 
5287     if ((j - i) % 2 != 0) {
5288         PickleState *st = _Pickle_GetGlobalState();
5289         PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5290         Py_DECREF(dict);
5291         return -1;
5292     }
5293 
5294     for (k = i + 1; k < j; k += 2) {
5295         key = self->stack->data[k - 1];
5296         value = self->stack->data[k];
5297         if (PyDict_SetItem(dict, key, value) < 0) {
5298             Py_DECREF(dict);
5299             return -1;
5300         }
5301     }
5302     Pdata_clear(self->stack, i);
5303     PDATA_PUSH(self->stack, dict, -1);
5304     return 0;
5305 }
5306 
5307 static int
load_frozenset(UnpicklerObject * self)5308 load_frozenset(UnpicklerObject *self)
5309 {
5310     PyObject *items;
5311     PyObject *frozenset;
5312     Py_ssize_t i;
5313 
5314     if ((i = marker(self)) < 0)
5315         return -1;
5316 
5317     items = Pdata_poptuple(self->stack, i);
5318     if (items == NULL)
5319         return -1;
5320 
5321     frozenset = PyFrozenSet_New(items);
5322     Py_DECREF(items);
5323     if (frozenset == NULL)
5324         return -1;
5325 
5326     PDATA_PUSH(self->stack, frozenset, -1);
5327     return 0;
5328 }
5329 
5330 static PyObject *
instantiate(PyObject * cls,PyObject * args)5331 instantiate(PyObject *cls, PyObject *args)
5332 {
5333     /* Caller must assure args are a tuple.  Normally, args come from
5334        Pdata_poptuple which packs objects from the top of the stack
5335        into a newly created tuple. */
5336     assert(PyTuple_Check(args));
5337     if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5338         _Py_IDENTIFIER(__getinitargs__);
5339         _Py_IDENTIFIER(__new__);
5340         PyObject *func;
5341         if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5342             return NULL;
5343         }
5344         if (func == NULL) {
5345             return _PyObject_CallMethodIdObjArgs(cls, &PyId___new__, cls, NULL);
5346         }
5347         Py_DECREF(func);
5348     }
5349     return PyObject_CallObject(cls, args);
5350 }
5351 
5352 static int
load_obj(UnpicklerObject * self)5353 load_obj(UnpicklerObject *self)
5354 {
5355     PyObject *cls, *args, *obj = NULL;
5356     Py_ssize_t i;
5357 
5358     if ((i = marker(self)) < 0)
5359         return -1;
5360 
5361     if (Py_SIZE(self->stack) - i < 1)
5362         return Pdata_stack_underflow(self->stack);
5363 
5364     args = Pdata_poptuple(self->stack, i + 1);
5365     if (args == NULL)
5366         return -1;
5367 
5368     PDATA_POP(self->stack, cls);
5369     if (cls) {
5370         obj = instantiate(cls, args);
5371         Py_DECREF(cls);
5372     }
5373     Py_DECREF(args);
5374     if (obj == NULL)
5375         return -1;
5376 
5377     PDATA_PUSH(self->stack, obj, -1);
5378     return 0;
5379 }
5380 
5381 static int
load_inst(UnpicklerObject * self)5382 load_inst(UnpicklerObject *self)
5383 {
5384     PyObject *cls = NULL;
5385     PyObject *args = NULL;
5386     PyObject *obj = NULL;
5387     PyObject *module_name;
5388     PyObject *class_name;
5389     Py_ssize_t len;
5390     Py_ssize_t i;
5391     char *s;
5392 
5393     if ((i = marker(self)) < 0)
5394         return -1;
5395     if ((len = _Unpickler_Readline(self, &s)) < 0)
5396         return -1;
5397     if (len < 2)
5398         return bad_readline();
5399 
5400     /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5401        identifiers are permitted in Python 3.0, since the INST opcode is only
5402        supported by older protocols on Python 2.x. */
5403     module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5404     if (module_name == NULL)
5405         return -1;
5406 
5407     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5408         if (len < 2) {
5409             Py_DECREF(module_name);
5410             return bad_readline();
5411         }
5412         class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5413         if (class_name != NULL) {
5414             cls = find_class(self, module_name, class_name);
5415             Py_DECREF(class_name);
5416         }
5417     }
5418     Py_DECREF(module_name);
5419 
5420     if (cls == NULL)
5421         return -1;
5422 
5423     if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5424         obj = instantiate(cls, args);
5425         Py_DECREF(args);
5426     }
5427     Py_DECREF(cls);
5428 
5429     if (obj == NULL)
5430         return -1;
5431 
5432     PDATA_PUSH(self->stack, obj, -1);
5433     return 0;
5434 }
5435 
5436 static int
load_newobj(UnpicklerObject * self)5437 load_newobj(UnpicklerObject *self)
5438 {
5439     PyObject *args = NULL;
5440     PyObject *clsraw = NULL;
5441     PyTypeObject *cls;          /* clsraw cast to its true type */
5442     PyObject *obj;
5443     PickleState *st = _Pickle_GetGlobalState();
5444 
5445     /* Stack is ... cls argtuple, and we want to call
5446      * cls.__new__(cls, *argtuple).
5447      */
5448     PDATA_POP(self->stack, args);
5449     if (args == NULL)
5450         goto error;
5451     if (!PyTuple_Check(args)) {
5452         PyErr_SetString(st->UnpicklingError,
5453                         "NEWOBJ expected an arg " "tuple.");
5454         goto error;
5455     }
5456 
5457     PDATA_POP(self->stack, clsraw);
5458     cls = (PyTypeObject *)clsraw;
5459     if (cls == NULL)
5460         goto error;
5461     if (!PyType_Check(cls)) {
5462         PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5463                         "isn't a type object");
5464         goto error;
5465     }
5466     if (cls->tp_new == NULL) {
5467         PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5468                         "has NULL tp_new");
5469         goto error;
5470     }
5471 
5472     /* Call __new__. */
5473     obj = cls->tp_new(cls, args, NULL);
5474     if (obj == NULL)
5475         goto error;
5476 
5477     Py_DECREF(args);
5478     Py_DECREF(clsraw);
5479     PDATA_PUSH(self->stack, obj, -1);
5480     return 0;
5481 
5482   error:
5483     Py_XDECREF(args);
5484     Py_XDECREF(clsraw);
5485     return -1;
5486 }
5487 
5488 static int
load_newobj_ex(UnpicklerObject * self)5489 load_newobj_ex(UnpicklerObject *self)
5490 {
5491     PyObject *cls, *args, *kwargs;
5492     PyObject *obj;
5493     PickleState *st = _Pickle_GetGlobalState();
5494 
5495     PDATA_POP(self->stack, kwargs);
5496     if (kwargs == NULL) {
5497         return -1;
5498     }
5499     PDATA_POP(self->stack, args);
5500     if (args == NULL) {
5501         Py_DECREF(kwargs);
5502         return -1;
5503     }
5504     PDATA_POP(self->stack, cls);
5505     if (cls == NULL) {
5506         Py_DECREF(kwargs);
5507         Py_DECREF(args);
5508         return -1;
5509     }
5510 
5511     if (!PyType_Check(cls)) {
5512         Py_DECREF(kwargs);
5513         Py_DECREF(args);
5514         PyErr_Format(st->UnpicklingError,
5515                      "NEWOBJ_EX class argument must be a type, not %.200s",
5516                      Py_TYPE(cls)->tp_name);
5517         Py_DECREF(cls);
5518         return -1;
5519     }
5520 
5521     if (((PyTypeObject *)cls)->tp_new == NULL) {
5522         Py_DECREF(kwargs);
5523         Py_DECREF(args);
5524         Py_DECREF(cls);
5525         PyErr_SetString(st->UnpicklingError,
5526                         "NEWOBJ_EX class argument doesn't have __new__");
5527         return -1;
5528     }
5529     obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5530     Py_DECREF(kwargs);
5531     Py_DECREF(args);
5532     Py_DECREF(cls);
5533     if (obj == NULL) {
5534         return -1;
5535     }
5536     PDATA_PUSH(self->stack, obj, -1);
5537     return 0;
5538 }
5539 
5540 static int
load_global(UnpicklerObject * self)5541 load_global(UnpicklerObject *self)
5542 {
5543     PyObject *global = NULL;
5544     PyObject *module_name;
5545     PyObject *global_name;
5546     Py_ssize_t len;
5547     char *s;
5548 
5549     if ((len = _Unpickler_Readline(self, &s)) < 0)
5550         return -1;
5551     if (len < 2)
5552         return bad_readline();
5553     module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5554     if (!module_name)
5555         return -1;
5556 
5557     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5558         if (len < 2) {
5559             Py_DECREF(module_name);
5560             return bad_readline();
5561         }
5562         global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5563         if (global_name) {
5564             global = find_class(self, module_name, global_name);
5565             Py_DECREF(global_name);
5566         }
5567     }
5568     Py_DECREF(module_name);
5569 
5570     if (global == NULL)
5571         return -1;
5572     PDATA_PUSH(self->stack, global, -1);
5573     return 0;
5574 }
5575 
5576 static int
load_stack_global(UnpicklerObject * self)5577 load_stack_global(UnpicklerObject *self)
5578 {
5579     PyObject *global;
5580     PyObject *module_name;
5581     PyObject *global_name;
5582 
5583     PDATA_POP(self->stack, global_name);
5584     PDATA_POP(self->stack, module_name);
5585     if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
5586         global_name == NULL || !PyUnicode_CheckExact(global_name)) {
5587         PickleState *st = _Pickle_GetGlobalState();
5588         PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
5589         Py_XDECREF(global_name);
5590         Py_XDECREF(module_name);
5591         return -1;
5592     }
5593     global = find_class(self, module_name, global_name);
5594     Py_DECREF(global_name);
5595     Py_DECREF(module_name);
5596     if (global == NULL)
5597         return -1;
5598     PDATA_PUSH(self->stack, global, -1);
5599     return 0;
5600 }
5601 
5602 static int
load_persid(UnpicklerObject * self)5603 load_persid(UnpicklerObject *self)
5604 {
5605     PyObject *pid, *obj;
5606     Py_ssize_t len;
5607     char *s;
5608 
5609     if (self->pers_func) {
5610         if ((len = _Unpickler_Readline(self, &s)) < 0)
5611             return -1;
5612         if (len < 1)
5613             return bad_readline();
5614 
5615         pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
5616         if (pid == NULL) {
5617             if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
5618                 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
5619                                 "persistent IDs in protocol 0 must be "
5620                                 "ASCII strings");
5621             }
5622             return -1;
5623         }
5624 
5625         obj = call_method(self->pers_func, self->pers_func_self, pid);
5626         Py_DECREF(pid);
5627         if (obj == NULL)
5628             return -1;
5629 
5630         PDATA_PUSH(self->stack, obj, -1);
5631         return 0;
5632     }
5633     else {
5634         PickleState *st = _Pickle_GetGlobalState();
5635         PyErr_SetString(st->UnpicklingError,
5636                         "A load persistent id instruction was encountered,\n"
5637                         "but no persistent_load function was specified.");
5638         return -1;
5639     }
5640 }
5641 
5642 static int
load_binpersid(UnpicklerObject * self)5643 load_binpersid(UnpicklerObject *self)
5644 {
5645     PyObject *pid, *obj;
5646 
5647     if (self->pers_func) {
5648         PDATA_POP(self->stack, pid);
5649         if (pid == NULL)
5650             return -1;
5651 
5652         obj = call_method(self->pers_func, self->pers_func_self, pid);
5653         Py_DECREF(pid);
5654         if (obj == NULL)
5655             return -1;
5656 
5657         PDATA_PUSH(self->stack, obj, -1);
5658         return 0;
5659     }
5660     else {
5661         PickleState *st = _Pickle_GetGlobalState();
5662         PyErr_SetString(st->UnpicklingError,
5663                         "A load persistent id instruction was encountered,\n"
5664                         "but no persistent_load function was specified.");
5665         return -1;
5666     }
5667 }
5668 
5669 static int
load_pop(UnpicklerObject * self)5670 load_pop(UnpicklerObject *self)
5671 {
5672     Py_ssize_t len = Py_SIZE(self->stack);
5673 
5674     /* Note that we split the (pickle.py) stack into two stacks,
5675      * an object stack and a mark stack. We have to be clever and
5676      * pop the right one. We do this by looking at the top of the
5677      * mark stack first, and only signalling a stack underflow if
5678      * the object stack is empty and the mark stack doesn't match
5679      * our expectations.
5680      */
5681     if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
5682         self->num_marks--;
5683         self->stack->mark_set = self->num_marks != 0;
5684         self->stack->fence = self->num_marks ?
5685                 self->marks[self->num_marks - 1] : 0;
5686     } else if (len <= self->stack->fence)
5687         return Pdata_stack_underflow(self->stack);
5688     else {
5689         len--;
5690         Py_DECREF(self->stack->data[len]);
5691         Py_SIZE(self->stack) = len;
5692     }
5693     return 0;
5694 }
5695 
5696 static int
load_pop_mark(UnpicklerObject * self)5697 load_pop_mark(UnpicklerObject *self)
5698 {
5699     Py_ssize_t i;
5700 
5701     if ((i = marker(self)) < 0)
5702         return -1;
5703 
5704     Pdata_clear(self->stack, i);
5705 
5706     return 0;
5707 }
5708 
5709 static int
load_dup(UnpicklerObject * self)5710 load_dup(UnpicklerObject *self)
5711 {
5712     PyObject *last;
5713     Py_ssize_t len = Py_SIZE(self->stack);
5714 
5715     if (len <= self->stack->fence)
5716         return Pdata_stack_underflow(self->stack);
5717     last = self->stack->data[len - 1];
5718     PDATA_APPEND(self->stack, last, -1);
5719     return 0;
5720 }
5721 
5722 static int
load_get(UnpicklerObject * self)5723 load_get(UnpicklerObject *self)
5724 {
5725     PyObject *key, *value;
5726     Py_ssize_t idx;
5727     Py_ssize_t len;
5728     char *s;
5729 
5730     if ((len = _Unpickler_Readline(self, &s)) < 0)
5731         return -1;
5732     if (len < 2)
5733         return bad_readline();
5734 
5735     key = PyLong_FromString(s, NULL, 10);
5736     if (key == NULL)
5737         return -1;
5738     idx = PyLong_AsSsize_t(key);
5739     if (idx == -1 && PyErr_Occurred()) {
5740         Py_DECREF(key);
5741         return -1;
5742     }
5743 
5744     value = _Unpickler_MemoGet(self, idx);
5745     if (value == NULL) {
5746         if (!PyErr_Occurred())
5747             PyErr_SetObject(PyExc_KeyError, key);
5748         Py_DECREF(key);
5749         return -1;
5750     }
5751     Py_DECREF(key);
5752 
5753     PDATA_APPEND(self->stack, value, -1);
5754     return 0;
5755 }
5756 
5757 static int
load_binget(UnpicklerObject * self)5758 load_binget(UnpicklerObject *self)
5759 {
5760     PyObject *value;
5761     Py_ssize_t idx;
5762     char *s;
5763 
5764     if (_Unpickler_Read(self, &s, 1) < 0)
5765         return -1;
5766 
5767     idx = Py_CHARMASK(s[0]);
5768 
5769     value = _Unpickler_MemoGet(self, idx);
5770     if (value == NULL) {
5771         PyObject *key = PyLong_FromSsize_t(idx);
5772         if (key != NULL) {
5773             PyErr_SetObject(PyExc_KeyError, key);
5774             Py_DECREF(key);
5775         }
5776         return -1;
5777     }
5778 
5779     PDATA_APPEND(self->stack, value, -1);
5780     return 0;
5781 }
5782 
5783 static int
load_long_binget(UnpicklerObject * self)5784 load_long_binget(UnpicklerObject *self)
5785 {
5786     PyObject *value;
5787     Py_ssize_t idx;
5788     char *s;
5789 
5790     if (_Unpickler_Read(self, &s, 4) < 0)
5791         return -1;
5792 
5793     idx = calc_binsize(s, 4);
5794 
5795     value = _Unpickler_MemoGet(self, idx);
5796     if (value == NULL) {
5797         PyObject *key = PyLong_FromSsize_t(idx);
5798         if (key != NULL) {
5799             PyErr_SetObject(PyExc_KeyError, key);
5800             Py_DECREF(key);
5801         }
5802         return -1;
5803     }
5804 
5805     PDATA_APPEND(self->stack, value, -1);
5806     return 0;
5807 }
5808 
5809 /* Push an object from the extension registry (EXT[124]).  nbytes is
5810  * the number of bytes following the opcode, holding the index (code) value.
5811  */
5812 static int
load_extension(UnpicklerObject * self,int nbytes)5813 load_extension(UnpicklerObject *self, int nbytes)
5814 {
5815     char *codebytes;            /* the nbytes bytes after the opcode */
5816     long code;                  /* calc_binint returns long */
5817     PyObject *py_code;          /* code as a Python int */
5818     PyObject *obj;              /* the object to push */
5819     PyObject *pair;             /* (module_name, class_name) */
5820     PyObject *module_name, *class_name;
5821     PickleState *st = _Pickle_GetGlobalState();
5822 
5823     assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
5824     if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
5825         return -1;
5826     code = calc_binint(codebytes, nbytes);
5827     if (code <= 0) {            /* note that 0 is forbidden */
5828         /* Corrupt or hostile pickle. */
5829         PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
5830         return -1;
5831     }
5832 
5833     /* Look for the code in the cache. */
5834     py_code = PyLong_FromLong(code);
5835     if (py_code == NULL)
5836         return -1;
5837     obj = PyDict_GetItemWithError(st->extension_cache, py_code);
5838     if (obj != NULL) {
5839         /* Bingo. */
5840         Py_DECREF(py_code);
5841         PDATA_APPEND(self->stack, obj, -1);
5842         return 0;
5843     }
5844     if (PyErr_Occurred()) {
5845         Py_DECREF(py_code);
5846         return -1;
5847     }
5848 
5849     /* Look up the (module_name, class_name) pair. */
5850     pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
5851     if (pair == NULL) {
5852         Py_DECREF(py_code);
5853         if (!PyErr_Occurred()) {
5854             PyErr_Format(PyExc_ValueError, "unregistered extension "
5855                          "code %ld", code);
5856         }
5857         return -1;
5858     }
5859     /* Since the extension registry is manipulable via Python code,
5860      * confirm that pair is really a 2-tuple of strings.
5861      */
5862     if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
5863         !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
5864         !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
5865         Py_DECREF(py_code);
5866         PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
5867                      "isn't a 2-tuple of strings", code);
5868         return -1;
5869     }
5870     /* Load the object. */
5871     obj = find_class(self, module_name, class_name);
5872     if (obj == NULL) {
5873         Py_DECREF(py_code);
5874         return -1;
5875     }
5876     /* Cache code -> obj. */
5877     code = PyDict_SetItem(st->extension_cache, py_code, obj);
5878     Py_DECREF(py_code);
5879     if (code < 0) {
5880         Py_DECREF(obj);
5881         return -1;
5882     }
5883     PDATA_PUSH(self->stack, obj, -1);
5884     return 0;
5885 }
5886 
5887 static int
load_put(UnpicklerObject * self)5888 load_put(UnpicklerObject *self)
5889 {
5890     PyObject *key, *value;
5891     Py_ssize_t idx;
5892     Py_ssize_t len;
5893     char *s = NULL;
5894 
5895     if ((len = _Unpickler_Readline(self, &s)) < 0)
5896         return -1;
5897     if (len < 2)
5898         return bad_readline();
5899     if (Py_SIZE(self->stack) <= self->stack->fence)
5900         return Pdata_stack_underflow(self->stack);
5901     value = self->stack->data[Py_SIZE(self->stack) - 1];
5902 
5903     key = PyLong_FromString(s, NULL, 10);
5904     if (key == NULL)
5905         return -1;
5906     idx = PyLong_AsSsize_t(key);
5907     Py_DECREF(key);
5908     if (idx < 0) {
5909         if (!PyErr_Occurred())
5910             PyErr_SetString(PyExc_ValueError,
5911                             "negative PUT argument");
5912         return -1;
5913     }
5914 
5915     return _Unpickler_MemoPut(self, idx, value);
5916 }
5917 
5918 static int
load_binput(UnpicklerObject * self)5919 load_binput(UnpicklerObject *self)
5920 {
5921     PyObject *value;
5922     Py_ssize_t idx;
5923     char *s;
5924 
5925     if (_Unpickler_Read(self, &s, 1) < 0)
5926         return -1;
5927 
5928     if (Py_SIZE(self->stack) <= self->stack->fence)
5929         return Pdata_stack_underflow(self->stack);
5930     value = self->stack->data[Py_SIZE(self->stack) - 1];
5931 
5932     idx = Py_CHARMASK(s[0]);
5933 
5934     return _Unpickler_MemoPut(self, idx, value);
5935 }
5936 
5937 static int
load_long_binput(UnpicklerObject * self)5938 load_long_binput(UnpicklerObject *self)
5939 {
5940     PyObject *value;
5941     Py_ssize_t idx;
5942     char *s;
5943 
5944     if (_Unpickler_Read(self, &s, 4) < 0)
5945         return -1;
5946 
5947     if (Py_SIZE(self->stack) <= self->stack->fence)
5948         return Pdata_stack_underflow(self->stack);
5949     value = self->stack->data[Py_SIZE(self->stack) - 1];
5950 
5951     idx = calc_binsize(s, 4);
5952     if (idx < 0) {
5953         PyErr_SetString(PyExc_ValueError,
5954                         "negative LONG_BINPUT argument");
5955         return -1;
5956     }
5957 
5958     return _Unpickler_MemoPut(self, idx, value);
5959 }
5960 
5961 static int
load_memoize(UnpicklerObject * self)5962 load_memoize(UnpicklerObject *self)
5963 {
5964     PyObject *value;
5965 
5966     if (Py_SIZE(self->stack) <= self->stack->fence)
5967         return Pdata_stack_underflow(self->stack);
5968     value = self->stack->data[Py_SIZE(self->stack) - 1];
5969 
5970     return _Unpickler_MemoPut(self, self->memo_len, value);
5971 }
5972 
5973 static int
do_append(UnpicklerObject * self,Py_ssize_t x)5974 do_append(UnpicklerObject *self, Py_ssize_t x)
5975 {
5976     PyObject *value;
5977     PyObject *slice;
5978     PyObject *list;
5979     PyObject *result;
5980     Py_ssize_t len, i;
5981 
5982     len = Py_SIZE(self->stack);
5983     if (x > len || x <= self->stack->fence)
5984         return Pdata_stack_underflow(self->stack);
5985     if (len == x)  /* nothing to do */
5986         return 0;
5987 
5988     list = self->stack->data[x - 1];
5989 
5990     if (PyList_CheckExact(list)) {
5991         Py_ssize_t list_len;
5992         int ret;
5993 
5994         slice = Pdata_poplist(self->stack, x);
5995         if (!slice)
5996             return -1;
5997         list_len = PyList_GET_SIZE(list);
5998         ret = PyList_SetSlice(list, list_len, list_len, slice);
5999         Py_DECREF(slice);
6000         return ret;
6001     }
6002     else {
6003         PyObject *extend_func;
6004         _Py_IDENTIFIER(extend);
6005 
6006         extend_func = _PyObject_GetAttrId(list, &PyId_extend);
6007         if (extend_func != NULL) {
6008             slice = Pdata_poplist(self->stack, x);
6009             if (!slice) {
6010                 Py_DECREF(extend_func);
6011                 return -1;
6012             }
6013             result = _Pickle_FastCall(extend_func, slice);
6014             Py_DECREF(extend_func);
6015             if (result == NULL)
6016                 return -1;
6017             Py_DECREF(result);
6018         }
6019         else {
6020             PyObject *append_func;
6021             _Py_IDENTIFIER(append);
6022 
6023             /* Even if the PEP 307 requires extend() and append() methods,
6024                fall back on append() if the object has no extend() method
6025                for backward compatibility. */
6026             PyErr_Clear();
6027             append_func = _PyObject_GetAttrId(list, &PyId_append);
6028             if (append_func == NULL)
6029                 return -1;
6030             for (i = x; i < len; i++) {
6031                 value = self->stack->data[i];
6032                 result = _Pickle_FastCall(append_func, value);
6033                 if (result == NULL) {
6034                     Pdata_clear(self->stack, i + 1);
6035                     Py_SIZE(self->stack) = x;
6036                     Py_DECREF(append_func);
6037                     return -1;
6038                 }
6039                 Py_DECREF(result);
6040             }
6041             Py_SIZE(self->stack) = x;
6042             Py_DECREF(append_func);
6043         }
6044     }
6045 
6046     return 0;
6047 }
6048 
6049 static int
load_append(UnpicklerObject * self)6050 load_append(UnpicklerObject *self)
6051 {
6052     if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6053         return Pdata_stack_underflow(self->stack);
6054     return do_append(self, Py_SIZE(self->stack) - 1);
6055 }
6056 
6057 static int
load_appends(UnpicklerObject * self)6058 load_appends(UnpicklerObject *self)
6059 {
6060     Py_ssize_t i = marker(self);
6061     if (i < 0)
6062         return -1;
6063     return do_append(self, i);
6064 }
6065 
6066 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)6067 do_setitems(UnpicklerObject *self, Py_ssize_t x)
6068 {
6069     PyObject *value, *key;
6070     PyObject *dict;
6071     Py_ssize_t len, i;
6072     int status = 0;
6073 
6074     len = Py_SIZE(self->stack);
6075     if (x > len || x <= self->stack->fence)
6076         return Pdata_stack_underflow(self->stack);
6077     if (len == x)  /* nothing to do */
6078         return 0;
6079     if ((len - x) % 2 != 0) {
6080         PickleState *st = _Pickle_GetGlobalState();
6081         /* Currupt or hostile pickle -- we never write one like this. */
6082         PyErr_SetString(st->UnpicklingError,
6083                         "odd number of items for SETITEMS");
6084         return -1;
6085     }
6086 
6087     /* Here, dict does not actually need to be a PyDict; it could be anything
6088        that supports the __setitem__ attribute. */
6089     dict = self->stack->data[x - 1];
6090 
6091     for (i = x + 1; i < len; i += 2) {
6092         key = self->stack->data[i - 1];
6093         value = self->stack->data[i];
6094         if (PyObject_SetItem(dict, key, value) < 0) {
6095             status = -1;
6096             break;
6097         }
6098     }
6099 
6100     Pdata_clear(self->stack, x);
6101     return status;
6102 }
6103 
6104 static int
load_setitem(UnpicklerObject * self)6105 load_setitem(UnpicklerObject *self)
6106 {
6107     return do_setitems(self, Py_SIZE(self->stack) - 2);
6108 }
6109 
6110 static int
load_setitems(UnpicklerObject * self)6111 load_setitems(UnpicklerObject *self)
6112 {
6113     Py_ssize_t i = marker(self);
6114     if (i < 0)
6115         return -1;
6116     return do_setitems(self, i);
6117 }
6118 
6119 static int
load_additems(UnpicklerObject * self)6120 load_additems(UnpicklerObject *self)
6121 {
6122     PyObject *set;
6123     Py_ssize_t mark, len, i;
6124 
6125     mark =  marker(self);
6126     if (mark < 0)
6127         return -1;
6128     len = Py_SIZE(self->stack);
6129     if (mark > len || mark <= self->stack->fence)
6130         return Pdata_stack_underflow(self->stack);
6131     if (len == mark)  /* nothing to do */
6132         return 0;
6133 
6134     set = self->stack->data[mark - 1];
6135 
6136     if (PySet_Check(set)) {
6137         PyObject *items;
6138         int status;
6139 
6140         items = Pdata_poptuple(self->stack, mark);
6141         if (items == NULL)
6142             return -1;
6143 
6144         status = _PySet_Update(set, items);
6145         Py_DECREF(items);
6146         return status;
6147     }
6148     else {
6149         PyObject *add_func;
6150         _Py_IDENTIFIER(add);
6151 
6152         add_func = _PyObject_GetAttrId(set, &PyId_add);
6153         if (add_func == NULL)
6154             return -1;
6155         for (i = mark; i < len; i++) {
6156             PyObject *result;
6157             PyObject *item;
6158 
6159             item = self->stack->data[i];
6160             result = _Pickle_FastCall(add_func, item);
6161             if (result == NULL) {
6162                 Pdata_clear(self->stack, i + 1);
6163                 Py_SIZE(self->stack) = mark;
6164                 return -1;
6165             }
6166             Py_DECREF(result);
6167         }
6168         Py_SIZE(self->stack) = mark;
6169     }
6170 
6171     return 0;
6172 }
6173 
6174 static int
load_build(UnpicklerObject * self)6175 load_build(UnpicklerObject *self)
6176 {
6177     PyObject *state, *inst, *slotstate;
6178     PyObject *setstate;
6179     int status = 0;
6180     _Py_IDENTIFIER(__setstate__);
6181 
6182     /* Stack is ... instance, state.  We want to leave instance at
6183      * the stack top, possibly mutated via instance.__setstate__(state).
6184      */
6185     if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6186         return Pdata_stack_underflow(self->stack);
6187 
6188     PDATA_POP(self->stack, state);
6189     if (state == NULL)
6190         return -1;
6191 
6192     inst = self->stack->data[Py_SIZE(self->stack) - 1];
6193 
6194     if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6195         Py_DECREF(state);
6196         return -1;
6197     }
6198     if (setstate != NULL) {
6199         PyObject *result;
6200 
6201         /* The explicit __setstate__ is responsible for everything. */
6202         result = _Pickle_FastCall(setstate, state);
6203         Py_DECREF(setstate);
6204         if (result == NULL)
6205             return -1;
6206         Py_DECREF(result);
6207         return 0;
6208     }
6209 
6210     /* A default __setstate__.  First see whether state embeds a
6211      * slot state dict too (a proto 2 addition).
6212      */
6213     if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6214         PyObject *tmp = state;
6215 
6216         state = PyTuple_GET_ITEM(tmp, 0);
6217         slotstate = PyTuple_GET_ITEM(tmp, 1);
6218         Py_INCREF(state);
6219         Py_INCREF(slotstate);
6220         Py_DECREF(tmp);
6221     }
6222     else
6223         slotstate = NULL;
6224 
6225     /* Set inst.__dict__ from the state dict (if any). */
6226     if (state != Py_None) {
6227         PyObject *dict;
6228         PyObject *d_key, *d_value;
6229         Py_ssize_t i;
6230         _Py_IDENTIFIER(__dict__);
6231 
6232         if (!PyDict_Check(state)) {
6233             PickleState *st = _Pickle_GetGlobalState();
6234             PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6235             goto error;
6236         }
6237         dict = _PyObject_GetAttrId(inst, &PyId___dict__);
6238         if (dict == NULL)
6239             goto error;
6240 
6241         i = 0;
6242         while (PyDict_Next(state, &i, &d_key, &d_value)) {
6243             /* normally the keys for instance attributes are
6244                interned.  we should try to do that here. */
6245             Py_INCREF(d_key);
6246             if (PyUnicode_CheckExact(d_key))
6247                 PyUnicode_InternInPlace(&d_key);
6248             if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6249                 Py_DECREF(d_key);
6250                 goto error;
6251             }
6252             Py_DECREF(d_key);
6253         }
6254         Py_DECREF(dict);
6255     }
6256 
6257     /* Also set instance attributes from the slotstate dict (if any). */
6258     if (slotstate != NULL) {
6259         PyObject *d_key, *d_value;
6260         Py_ssize_t i;
6261 
6262         if (!PyDict_Check(slotstate)) {
6263             PickleState *st = _Pickle_GetGlobalState();
6264             PyErr_SetString(st->UnpicklingError,
6265                             "slot state is not a dictionary");
6266             goto error;
6267         }
6268         i = 0;
6269         while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6270             if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6271                 goto error;
6272         }
6273     }
6274 
6275     if (0) {
6276   error:
6277         status = -1;
6278     }
6279 
6280     Py_DECREF(state);
6281     Py_XDECREF(slotstate);
6282     return status;
6283 }
6284 
6285 static int
load_mark(UnpicklerObject * self)6286 load_mark(UnpicklerObject *self)
6287 {
6288 
6289     /* Note that we split the (pickle.py) stack into two stacks, an
6290      * object stack and a mark stack. Here we push a mark onto the
6291      * mark stack.
6292      */
6293 
6294     if ((self->num_marks + 1) >= self->marks_size) {
6295         size_t alloc;
6296 
6297         /* Use the size_t type to check for overflow. */
6298         alloc = ((size_t)self->num_marks << 1) + 20;
6299         if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
6300             alloc <= ((size_t)self->num_marks + 1)) {
6301             PyErr_NoMemory();
6302             return -1;
6303         }
6304 
6305         Py_ssize_t *marks_old = self->marks;
6306         PyMem_RESIZE(self->marks, Py_ssize_t, alloc);
6307         if (self->marks == NULL) {
6308             PyMem_FREE(marks_old);
6309             self->marks_size = 0;
6310             PyErr_NoMemory();
6311             return -1;
6312         }
6313         self->marks_size = (Py_ssize_t)alloc;
6314     }
6315 
6316     self->stack->mark_set = 1;
6317     self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6318 
6319     return 0;
6320 }
6321 
6322 static int
load_reduce(UnpicklerObject * self)6323 load_reduce(UnpicklerObject *self)
6324 {
6325     PyObject *callable = NULL;
6326     PyObject *argtup = NULL;
6327     PyObject *obj = NULL;
6328 
6329     PDATA_POP(self->stack, argtup);
6330     if (argtup == NULL)
6331         return -1;
6332     PDATA_POP(self->stack, callable);
6333     if (callable) {
6334         obj = PyObject_CallObject(callable, argtup);
6335         Py_DECREF(callable);
6336     }
6337     Py_DECREF(argtup);
6338 
6339     if (obj == NULL)
6340         return -1;
6341 
6342     PDATA_PUSH(self->stack, obj, -1);
6343     return 0;
6344 }
6345 
6346 /* Just raises an error if we don't know the protocol specified.  PROTO
6347  * is the first opcode for protocols >= 2.
6348  */
6349 static int
load_proto(UnpicklerObject * self)6350 load_proto(UnpicklerObject *self)
6351 {
6352     char *s;
6353     int i;
6354 
6355     if (_Unpickler_Read(self, &s, 1) < 0)
6356         return -1;
6357 
6358     i = (unsigned char)s[0];
6359     if (i <= HIGHEST_PROTOCOL) {
6360         self->proto = i;
6361         return 0;
6362     }
6363 
6364     PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6365     return -1;
6366 }
6367 
6368 static int
load_frame(UnpicklerObject * self)6369 load_frame(UnpicklerObject *self)
6370 {
6371     char *s;
6372     Py_ssize_t frame_len;
6373 
6374     if (_Unpickler_Read(self, &s, 8) < 0)
6375         return -1;
6376 
6377     frame_len = calc_binsize(s, 8);
6378     if (frame_len < 0) {
6379         PyErr_Format(PyExc_OverflowError,
6380                      "FRAME length exceeds system's maximum of %zd bytes",
6381                      PY_SSIZE_T_MAX);
6382         return -1;
6383     }
6384 
6385     if (_Unpickler_Read(self, &s, frame_len) < 0)
6386         return -1;
6387 
6388     /* Rewind to start of frame */
6389     self->next_read_idx -= frame_len;
6390     return 0;
6391 }
6392 
6393 static PyObject *
load(UnpicklerObject * self)6394 load(UnpicklerObject *self)
6395 {
6396     PyObject *value = NULL;
6397     char *s = NULL;
6398 
6399     self->num_marks = 0;
6400     self->stack->mark_set = 0;
6401     self->stack->fence = 0;
6402     self->proto = 0;
6403     if (Py_SIZE(self->stack))
6404         Pdata_clear(self->stack, 0);
6405 
6406     /* Convenient macros for the dispatch while-switch loop just below. */
6407 #define OP(opcode, load_func) \
6408     case opcode: if (load_func(self) < 0) break; continue;
6409 
6410 #define OP_ARG(opcode, load_func, arg) \
6411     case opcode: if (load_func(self, (arg)) < 0) break; continue;
6412 
6413     while (1) {
6414         if (_Unpickler_Read(self, &s, 1) < 0) {
6415             PickleState *st = _Pickle_GetGlobalState();
6416             if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6417                 PyErr_Format(PyExc_EOFError, "Ran out of input");
6418             }
6419             return NULL;
6420         }
6421 
6422         switch ((enum opcode)s[0]) {
6423         OP(NONE, load_none)
6424         OP(BININT, load_binint)
6425         OP(BININT1, load_binint1)
6426         OP(BININT2, load_binint2)
6427         OP(INT, load_int)
6428         OP(LONG, load_long)
6429         OP_ARG(LONG1, load_counted_long, 1)
6430         OP_ARG(LONG4, load_counted_long, 4)
6431         OP(FLOAT, load_float)
6432         OP(BINFLOAT, load_binfloat)
6433         OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6434         OP_ARG(BINBYTES, load_counted_binbytes, 4)
6435         OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6436         OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6437         OP_ARG(BINSTRING, load_counted_binstring, 4)
6438         OP(STRING, load_string)
6439         OP(UNICODE, load_unicode)
6440         OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6441         OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6442         OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6443         OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6444         OP_ARG(TUPLE1, load_counted_tuple, 1)
6445         OP_ARG(TUPLE2, load_counted_tuple, 2)
6446         OP_ARG(TUPLE3, load_counted_tuple, 3)
6447         OP(TUPLE, load_tuple)
6448         OP(EMPTY_LIST, load_empty_list)
6449         OP(LIST, load_list)
6450         OP(EMPTY_DICT, load_empty_dict)
6451         OP(DICT, load_dict)
6452         OP(EMPTY_SET, load_empty_set)
6453         OP(ADDITEMS, load_additems)
6454         OP(FROZENSET, load_frozenset)
6455         OP(OBJ, load_obj)
6456         OP(INST, load_inst)
6457         OP(NEWOBJ, load_newobj)
6458         OP(NEWOBJ_EX, load_newobj_ex)
6459         OP(GLOBAL, load_global)
6460         OP(STACK_GLOBAL, load_stack_global)
6461         OP(APPEND, load_append)
6462         OP(APPENDS, load_appends)
6463         OP(BUILD, load_build)
6464         OP(DUP, load_dup)
6465         OP(BINGET, load_binget)
6466         OP(LONG_BINGET, load_long_binget)
6467         OP(GET, load_get)
6468         OP(MARK, load_mark)
6469         OP(BINPUT, load_binput)
6470         OP(LONG_BINPUT, load_long_binput)
6471         OP(PUT, load_put)
6472         OP(MEMOIZE, load_memoize)
6473         OP(POP, load_pop)
6474         OP(POP_MARK, load_pop_mark)
6475         OP(SETITEM, load_setitem)
6476         OP(SETITEMS, load_setitems)
6477         OP(PERSID, load_persid)
6478         OP(BINPERSID, load_binpersid)
6479         OP(REDUCE, load_reduce)
6480         OP(PROTO, load_proto)
6481         OP(FRAME, load_frame)
6482         OP_ARG(EXT1, load_extension, 1)
6483         OP_ARG(EXT2, load_extension, 2)
6484         OP_ARG(EXT4, load_extension, 4)
6485         OP_ARG(NEWTRUE, load_bool, Py_True)
6486         OP_ARG(NEWFALSE, load_bool, Py_False)
6487 
6488         case STOP:
6489             break;
6490 
6491         default:
6492             {
6493                 PickleState *st = _Pickle_GetGlobalState();
6494                 unsigned char c = (unsigned char) *s;
6495                 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6496                     PyErr_Format(st->UnpicklingError,
6497                                  "invalid load key, '%c'.", c);
6498                 }
6499                 else {
6500                     PyErr_Format(st->UnpicklingError,
6501                                  "invalid load key, '\\x%02x'.", c);
6502                 }
6503                 return NULL;
6504             }
6505         }
6506 
6507         break;                  /* and we are done! */
6508     }
6509 
6510     if (PyErr_Occurred()) {
6511         return NULL;
6512     }
6513 
6514     if (_Unpickler_SkipConsumed(self) < 0)
6515         return NULL;
6516 
6517     PDATA_POP(self->stack, value);
6518     return value;
6519 }
6520 
6521 /*[clinic input]
6522 
6523 _pickle.Unpickler.load
6524 
6525 Load a pickle.
6526 
6527 Read a pickled object representation from the open file object given
6528 in the constructor, and return the reconstituted object hierarchy
6529 specified therein.
6530 [clinic start generated code]*/
6531 
6532 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)6533 _pickle_Unpickler_load_impl(UnpicklerObject *self)
6534 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
6535 {
6536     UnpicklerObject *unpickler = (UnpicklerObject*)self;
6537 
6538     /* Check whether the Unpickler was initialized correctly. This prevents
6539        segfaulting if a subclass overridden __init__ with a function that does
6540        not call Unpickler.__init__(). Here, we simply ensure that self->read
6541        is not NULL. */
6542     if (unpickler->read == NULL) {
6543         PickleState *st = _Pickle_GetGlobalState();
6544         PyErr_Format(st->UnpicklingError,
6545                      "Unpickler.__init__() was not called by %s.__init__()",
6546                      Py_TYPE(unpickler)->tp_name);
6547         return NULL;
6548     }
6549 
6550     return load(unpickler);
6551 }
6552 
6553 /* The name of find_class() is misleading. In newer pickle protocols, this
6554    function is used for loading any global (i.e., functions), not just
6555    classes. The name is kept only for backward compatibility. */
6556 
6557 /*[clinic input]
6558 
6559 _pickle.Unpickler.find_class
6560 
6561   module_name: object
6562   global_name: object
6563   /
6564 
6565 Return an object from a specified module.
6566 
6567 If necessary, the module will be imported. Subclasses may override
6568 this method (e.g. to restrict unpickling of arbitrary classes and
6569 functions).
6570 
6571 This method is called whenever a class or a function object is
6572 needed.  Both arguments passed are str objects.
6573 [clinic start generated code]*/
6574 
6575 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)6576 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
6577                                   PyObject *module_name,
6578                                   PyObject *global_name)
6579 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
6580 {
6581     PyObject *global;
6582     PyObject *module;
6583 
6584     /* Try to map the old names used in Python 2.x to the new ones used in
6585        Python 3.x.  We do this only with old pickle protocols and when the
6586        user has not disabled the feature. */
6587     if (self->proto < 3 && self->fix_imports) {
6588         PyObject *key;
6589         PyObject *item;
6590         PickleState *st = _Pickle_GetGlobalState();
6591 
6592         /* Check if the global (i.e., a function or a class) was renamed
6593            or moved to another module. */
6594         key = PyTuple_Pack(2, module_name, global_name);
6595         if (key == NULL)
6596             return NULL;
6597         item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
6598         Py_DECREF(key);
6599         if (item) {
6600             if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
6601                 PyErr_Format(PyExc_RuntimeError,
6602                              "_compat_pickle.NAME_MAPPING values should be "
6603                              "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
6604                 return NULL;
6605             }
6606             module_name = PyTuple_GET_ITEM(item, 0);
6607             global_name = PyTuple_GET_ITEM(item, 1);
6608             if (!PyUnicode_Check(module_name) ||
6609                 !PyUnicode_Check(global_name)) {
6610                 PyErr_Format(PyExc_RuntimeError,
6611                              "_compat_pickle.NAME_MAPPING values should be "
6612                              "pairs of str, not (%.200s, %.200s)",
6613                              Py_TYPE(module_name)->tp_name,
6614                              Py_TYPE(global_name)->tp_name);
6615                 return NULL;
6616             }
6617         }
6618         else if (PyErr_Occurred()) {
6619             return NULL;
6620         }
6621         else {
6622             /* Check if the module was renamed. */
6623             item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
6624             if (item) {
6625                 if (!PyUnicode_Check(item)) {
6626                     PyErr_Format(PyExc_RuntimeError,
6627                                 "_compat_pickle.IMPORT_MAPPING values should be "
6628                                 "strings, not %.200s", Py_TYPE(item)->tp_name);
6629                     return NULL;
6630                 }
6631                 module_name = item;
6632             }
6633             else if (PyErr_Occurred()) {
6634                 return NULL;
6635             }
6636         }
6637     }
6638 
6639     /*
6640      * we don't use PyImport_GetModule here, because it can return partially-
6641      * initialised modules, which then cause the getattribute to fail.
6642      */
6643     module = PyImport_Import(module_name);
6644     if (module == NULL) {
6645         return NULL;
6646     }
6647     global = getattribute(module, global_name, self->proto >= 4);
6648     Py_DECREF(module);
6649     return global;
6650 }
6651 
6652 /*[clinic input]
6653 
6654 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
6655 
6656 Returns size in memory, in bytes.
6657 [clinic start generated code]*/
6658 
6659 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)6660 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
6661 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
6662 {
6663     Py_ssize_t res;
6664 
6665     res = _PyObject_SIZE(Py_TYPE(self));
6666     if (self->memo != NULL)
6667         res += self->memo_size * sizeof(PyObject *);
6668     if (self->marks != NULL)
6669         res += self->marks_size * sizeof(Py_ssize_t);
6670     if (self->input_line != NULL)
6671         res += strlen(self->input_line) + 1;
6672     if (self->encoding != NULL)
6673         res += strlen(self->encoding) + 1;
6674     if (self->errors != NULL)
6675         res += strlen(self->errors) + 1;
6676     return res;
6677 }
6678 
6679 static struct PyMethodDef Unpickler_methods[] = {
6680     _PICKLE_UNPICKLER_LOAD_METHODDEF
6681     _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
6682     _PICKLE_UNPICKLER___SIZEOF___METHODDEF
6683     {NULL, NULL}                /* sentinel */
6684 };
6685 
6686 static void
Unpickler_dealloc(UnpicklerObject * self)6687 Unpickler_dealloc(UnpicklerObject *self)
6688 {
6689     PyObject_GC_UnTrack((PyObject *)self);
6690     Py_XDECREF(self->readline);
6691     Py_XDECREF(self->read);
6692     Py_XDECREF(self->peek);
6693     Py_XDECREF(self->stack);
6694     Py_XDECREF(self->pers_func);
6695     if (self->buffer.buf != NULL) {
6696         PyBuffer_Release(&self->buffer);
6697         self->buffer.buf = NULL;
6698     }
6699 
6700     _Unpickler_MemoCleanup(self);
6701     PyMem_Free(self->marks);
6702     PyMem_Free(self->input_line);
6703     PyMem_Free(self->encoding);
6704     PyMem_Free(self->errors);
6705 
6706     Py_TYPE(self)->tp_free((PyObject *)self);
6707 }
6708 
6709 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)6710 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
6711 {
6712     Py_VISIT(self->readline);
6713     Py_VISIT(self->read);
6714     Py_VISIT(self->peek);
6715     Py_VISIT(self->stack);
6716     Py_VISIT(self->pers_func);
6717     return 0;
6718 }
6719 
6720 static int
Unpickler_clear(UnpicklerObject * self)6721 Unpickler_clear(UnpicklerObject *self)
6722 {
6723     Py_CLEAR(self->readline);
6724     Py_CLEAR(self->read);
6725     Py_CLEAR(self->peek);
6726     Py_CLEAR(self->stack);
6727     Py_CLEAR(self->pers_func);
6728     if (self->buffer.buf != NULL) {
6729         PyBuffer_Release(&self->buffer);
6730         self->buffer.buf = NULL;
6731     }
6732 
6733     _Unpickler_MemoCleanup(self);
6734     PyMem_Free(self->marks);
6735     self->marks = NULL;
6736     PyMem_Free(self->input_line);
6737     self->input_line = NULL;
6738     PyMem_Free(self->encoding);
6739     self->encoding = NULL;
6740     PyMem_Free(self->errors);
6741     self->errors = NULL;
6742 
6743     return 0;
6744 }
6745 
6746 /*[clinic input]
6747 
6748 _pickle.Unpickler.__init__
6749 
6750   file: object
6751   *
6752   fix_imports: bool = True
6753   encoding: str = 'ASCII'
6754   errors: str = 'strict'
6755 
6756 This takes a binary file for reading a pickle data stream.
6757 
6758 The protocol version of the pickle is detected automatically, so no
6759 protocol argument is needed.  Bytes past the pickled object's
6760 representation are ignored.
6761 
6762 The argument *file* must have two methods, a read() method that takes
6763 an integer argument, and a readline() method that requires no
6764 arguments.  Both methods should return bytes.  Thus *file* can be a
6765 binary file object opened for reading, an io.BytesIO object, or any
6766 other custom object that meets this interface.
6767 
6768 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
6769 which are used to control compatibility support for pickle stream
6770 generated by Python 2.  If *fix_imports* is True, pickle will try to
6771 map the old Python 2 names to the new names used in Python 3.  The
6772 *encoding* and *errors* tell pickle how to decode 8-bit string
6773 instances pickled by Python 2; these default to 'ASCII' and 'strict',
6774 respectively.  The *encoding* can be 'bytes' to read these 8-bit
6775 string instances as bytes objects.
6776 [clinic start generated code]*/
6777 
6778 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors)6779 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
6780                                 int fix_imports, const char *encoding,
6781                                 const char *errors)
6782 /*[clinic end generated code: output=e2c8ce748edc57b0 input=f9b7da04f5f4f335]*/
6783 {
6784     _Py_IDENTIFIER(persistent_load);
6785 
6786     /* In case of multiple __init__() calls, clear previous content. */
6787     if (self->read != NULL)
6788         (void)Unpickler_clear(self);
6789 
6790     if (_Unpickler_SetInputStream(self, file) < 0)
6791         return -1;
6792 
6793     if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
6794         return -1;
6795 
6796     self->fix_imports = fix_imports;
6797 
6798     if (init_method_ref((PyObject *)self, &PyId_persistent_load,
6799                         &self->pers_func, &self->pers_func_self) < 0)
6800     {
6801         return -1;
6802     }
6803 
6804     self->stack = (Pdata *)Pdata_New();
6805     if (self->stack == NULL)
6806         return -1;
6807 
6808     self->memo_size = 32;
6809     self->memo = _Unpickler_NewMemo(self->memo_size);
6810     if (self->memo == NULL)
6811         return -1;
6812 
6813     self->proto = 0;
6814 
6815     return 0;
6816 }
6817 
6818 
6819 /* Define a proxy object for the Unpickler's internal memo object. This is to
6820  * avoid breaking code like:
6821  *  unpickler.memo.clear()
6822  * and
6823  *  unpickler.memo = saved_memo
6824  * Is this a good idea? Not really, but we don't want to break code that uses
6825  * it. Note that we don't implement the entire mapping API here. This is
6826  * intentional, as these should be treated as black-box implementation details.
6827  *
6828  * We do, however, have to implement pickling/unpickling support because of
6829  * real-world code like cvs2svn.
6830  */
6831 
6832 /*[clinic input]
6833 _pickle.UnpicklerMemoProxy.clear
6834 
6835 Remove all items from memo.
6836 [clinic start generated code]*/
6837 
6838 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)6839 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
6840 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
6841 {
6842     _Unpickler_MemoCleanup(self->unpickler);
6843     self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
6844     if (self->unpickler->memo == NULL)
6845         return NULL;
6846     Py_RETURN_NONE;
6847 }
6848 
6849 /*[clinic input]
6850 _pickle.UnpicklerMemoProxy.copy
6851 
6852 Copy the memo to a new object.
6853 [clinic start generated code]*/
6854 
6855 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)6856 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
6857 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
6858 {
6859     size_t i;
6860     PyObject *new_memo = PyDict_New();
6861     if (new_memo == NULL)
6862         return NULL;
6863 
6864     for (i = 0; i < self->unpickler->memo_size; i++) {
6865         int status;
6866         PyObject *key, *value;
6867 
6868         value = self->unpickler->memo[i];
6869         if (value == NULL)
6870             continue;
6871 
6872         key = PyLong_FromSsize_t(i);
6873         if (key == NULL)
6874             goto error;
6875         status = PyDict_SetItem(new_memo, key, value);
6876         Py_DECREF(key);
6877         if (status < 0)
6878             goto error;
6879     }
6880     return new_memo;
6881 
6882 error:
6883     Py_DECREF(new_memo);
6884     return NULL;
6885 }
6886 
6887 /*[clinic input]
6888 _pickle.UnpicklerMemoProxy.__reduce__
6889 
6890 Implement pickling support.
6891 [clinic start generated code]*/
6892 
6893 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)6894 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
6895 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
6896 {
6897     PyObject *reduce_value;
6898     PyObject *constructor_args;
6899     PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
6900     if (contents == NULL)
6901         return NULL;
6902 
6903     reduce_value = PyTuple_New(2);
6904     if (reduce_value == NULL) {
6905         Py_DECREF(contents);
6906         return NULL;
6907     }
6908     constructor_args = PyTuple_New(1);
6909     if (constructor_args == NULL) {
6910         Py_DECREF(contents);
6911         Py_DECREF(reduce_value);
6912         return NULL;
6913     }
6914     PyTuple_SET_ITEM(constructor_args, 0, contents);
6915     Py_INCREF((PyObject *)&PyDict_Type);
6916     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
6917     PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
6918     return reduce_value;
6919 }
6920 
6921 static PyMethodDef unpicklerproxy_methods[] = {
6922     _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
6923     _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
6924     _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
6925     {NULL, NULL}    /* sentinel */
6926 };
6927 
6928 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)6929 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
6930 {
6931     PyObject_GC_UnTrack(self);
6932     Py_XDECREF(self->unpickler);
6933     PyObject_GC_Del((PyObject *)self);
6934 }
6935 
6936 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)6937 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
6938                             visitproc visit, void *arg)
6939 {
6940     Py_VISIT(self->unpickler);
6941     return 0;
6942 }
6943 
6944 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)6945 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
6946 {
6947     Py_CLEAR(self->unpickler);
6948     return 0;
6949 }
6950 
6951 static PyTypeObject UnpicklerMemoProxyType = {
6952     PyVarObject_HEAD_INIT(NULL, 0)
6953     "_pickle.UnpicklerMemoProxy",               /*tp_name*/
6954     sizeof(UnpicklerMemoProxyObject),           /*tp_basicsize*/
6955     0,
6956     (destructor)UnpicklerMemoProxy_dealloc,     /* tp_dealloc */
6957     0,                                          /* tp_print */
6958     0,                                          /* tp_getattr */
6959     0,                                          /* tp_setattr */
6960     0,                                          /* tp_compare */
6961     0,                                          /* tp_repr */
6962     0,                                          /* tp_as_number */
6963     0,                                          /* tp_as_sequence */
6964     0,                                          /* tp_as_mapping */
6965     PyObject_HashNotImplemented,                /* tp_hash */
6966     0,                                          /* tp_call */
6967     0,                                          /* tp_str */
6968     PyObject_GenericGetAttr,                    /* tp_getattro */
6969     PyObject_GenericSetAttr,                    /* tp_setattro */
6970     0,                                          /* tp_as_buffer */
6971     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6972     0,                                          /* tp_doc */
6973     (traverseproc)UnpicklerMemoProxy_traverse,  /* tp_traverse */
6974     (inquiry)UnpicklerMemoProxy_clear,          /* tp_clear */
6975     0,                                          /* tp_richcompare */
6976     0,                                          /* tp_weaklistoffset */
6977     0,                                          /* tp_iter */
6978     0,                                          /* tp_iternext */
6979     unpicklerproxy_methods,                     /* tp_methods */
6980 };
6981 
6982 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)6983 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
6984 {
6985     UnpicklerMemoProxyObject *self;
6986 
6987     self = PyObject_GC_New(UnpicklerMemoProxyObject,
6988                            &UnpicklerMemoProxyType);
6989     if (self == NULL)
6990         return NULL;
6991     Py_INCREF(unpickler);
6992     self->unpickler = unpickler;
6993     PyObject_GC_Track(self);
6994     return (PyObject *)self;
6995 }
6996 
6997 /*****************************************************************************/
6998 
6999 
7000 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7001 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7002 {
7003     return UnpicklerMemoProxy_New(self);
7004 }
7005 
7006 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7007 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7008 {
7009     PyObject **new_memo;
7010     size_t new_memo_size = 0;
7011 
7012     if (obj == NULL) {
7013         PyErr_SetString(PyExc_TypeError,
7014                         "attribute deletion is not supported");
7015         return -1;
7016     }
7017 
7018     if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
7019         UnpicklerObject *unpickler =
7020             ((UnpicklerMemoProxyObject *)obj)->unpickler;
7021 
7022         new_memo_size = unpickler->memo_size;
7023         new_memo = _Unpickler_NewMemo(new_memo_size);
7024         if (new_memo == NULL)
7025             return -1;
7026 
7027         for (size_t i = 0; i < new_memo_size; i++) {
7028             Py_XINCREF(unpickler->memo[i]);
7029             new_memo[i] = unpickler->memo[i];
7030         }
7031     }
7032     else if (PyDict_Check(obj)) {
7033         Py_ssize_t i = 0;
7034         PyObject *key, *value;
7035 
7036         new_memo_size = PyDict_GET_SIZE(obj);
7037         new_memo = _Unpickler_NewMemo(new_memo_size);
7038         if (new_memo == NULL)
7039             return -1;
7040 
7041         while (PyDict_Next(obj, &i, &key, &value)) {
7042             Py_ssize_t idx;
7043             if (!PyLong_Check(key)) {
7044                 PyErr_SetString(PyExc_TypeError,
7045                                 "memo key must be integers");
7046                 goto error;
7047             }
7048             idx = PyLong_AsSsize_t(key);
7049             if (idx == -1 && PyErr_Occurred())
7050                 goto error;
7051             if (idx < 0) {
7052                 PyErr_SetString(PyExc_ValueError,
7053                                 "memo key must be positive integers.");
7054                 goto error;
7055             }
7056             if (_Unpickler_MemoPut(self, idx, value) < 0)
7057                 goto error;
7058         }
7059     }
7060     else {
7061         PyErr_Format(PyExc_TypeError,
7062                      "'memo' attribute must be an UnpicklerMemoProxy object "
7063                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7064         return -1;
7065     }
7066 
7067     _Unpickler_MemoCleanup(self);
7068     self->memo_size = new_memo_size;
7069     self->memo = new_memo;
7070 
7071     return 0;
7072 
7073   error:
7074     if (new_memo_size) {
7075         for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7076             Py_XDECREF(new_memo[i]);
7077         }
7078         PyMem_FREE(new_memo);
7079     }
7080     return -1;
7081 }
7082 
7083 static PyObject *
Unpickler_get_persload(UnpicklerObject * self,void * Py_UNUSED (ignored))7084 Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7085 {
7086     if (self->pers_func == NULL) {
7087         PyErr_SetString(PyExc_AttributeError, "persistent_load");
7088         return NULL;
7089     }
7090     return reconstruct_method(self->pers_func, self->pers_func_self);
7091 }
7092 
7093 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))7094 Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7095 {
7096     if (value == NULL) {
7097         PyErr_SetString(PyExc_TypeError,
7098                         "attribute deletion is not supported");
7099         return -1;
7100     }
7101     if (!PyCallable_Check(value)) {
7102         PyErr_SetString(PyExc_TypeError,
7103                         "persistent_load must be a callable taking "
7104                         "one argument");
7105         return -1;
7106     }
7107 
7108     self->pers_func_self = NULL;
7109     Py_INCREF(value);
7110     Py_XSETREF(self->pers_func, value);
7111 
7112     return 0;
7113 }
7114 
7115 static PyGetSetDef Unpickler_getsets[] = {
7116     {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7117     {"persistent_load", (getter)Unpickler_get_persload,
7118                         (setter)Unpickler_set_persload},
7119     {NULL}
7120 };
7121 
7122 static PyTypeObject Unpickler_Type = {
7123     PyVarObject_HEAD_INIT(NULL, 0)
7124     "_pickle.Unpickler",                /*tp_name*/
7125     sizeof(UnpicklerObject),            /*tp_basicsize*/
7126     0,                                  /*tp_itemsize*/
7127     (destructor)Unpickler_dealloc,      /*tp_dealloc*/
7128     0,                                  /*tp_print*/
7129     0,                                  /*tp_getattr*/
7130     0,                                  /*tp_setattr*/
7131     0,                                  /*tp_reserved*/
7132     0,                                  /*tp_repr*/
7133     0,                                  /*tp_as_number*/
7134     0,                                  /*tp_as_sequence*/
7135     0,                                  /*tp_as_mapping*/
7136     0,                                  /*tp_hash*/
7137     0,                                  /*tp_call*/
7138     0,                                  /*tp_str*/
7139     0,                                  /*tp_getattro*/
7140     0,                                  /*tp_setattro*/
7141     0,                                  /*tp_as_buffer*/
7142     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7143     _pickle_Unpickler___init____doc__,  /*tp_doc*/
7144     (traverseproc)Unpickler_traverse,   /*tp_traverse*/
7145     (inquiry)Unpickler_clear,           /*tp_clear*/
7146     0,                                  /*tp_richcompare*/
7147     0,                                  /*tp_weaklistoffset*/
7148     0,                                  /*tp_iter*/
7149     0,                                  /*tp_iternext*/
7150     Unpickler_methods,                  /*tp_methods*/
7151     0,                                  /*tp_members*/
7152     Unpickler_getsets,                  /*tp_getset*/
7153     0,                                  /*tp_base*/
7154     0,                                  /*tp_dict*/
7155     0,                                  /*tp_descr_get*/
7156     0,                                  /*tp_descr_set*/
7157     0,                                  /*tp_dictoffset*/
7158     _pickle_Unpickler___init__,         /*tp_init*/
7159     PyType_GenericAlloc,                /*tp_alloc*/
7160     PyType_GenericNew,                  /*tp_new*/
7161     PyObject_GC_Del,                    /*tp_free*/
7162     0,                                  /*tp_is_gc*/
7163 };
7164 
7165 /*[clinic input]
7166 
7167 _pickle.dump
7168 
7169   obj: object
7170   file: object
7171   protocol: object = NULL
7172   *
7173   fix_imports: bool = True
7174 
7175 Write a pickled representation of obj to the open file object file.
7176 
7177 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7178 be more efficient.
7179 
7180 The optional *protocol* argument tells the pickler to use the given
7181 protocol supported protocols are 0, 1, 2, 3 and 4.  The default
7182 protocol is 3; a backward-incompatible protocol designed for Python 3.
7183 
7184 Specifying a negative protocol version selects the highest protocol
7185 version supported.  The higher the protocol used, the more recent the
7186 version of Python needed to read the pickle produced.
7187 
7188 The *file* argument must have a write() method that accepts a single
7189 bytes argument.  It can thus be a file object opened for binary
7190 writing, an io.BytesIO instance, or any other custom object that meets
7191 this interface.
7192 
7193 If *fix_imports* is True and protocol is less than 3, pickle will try
7194 to map the new Python 3 names to the old module names used in Python
7195 2, so that the pickle data stream is readable with Python 2.
7196 [clinic start generated code]*/
7197 
7198 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports)7199 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7200                   PyObject *protocol, int fix_imports)
7201 /*[clinic end generated code: output=a4774d5fde7d34de input=830f8a64cef6f042]*/
7202 {
7203     PicklerObject *pickler = _Pickler_New();
7204 
7205     if (pickler == NULL)
7206         return NULL;
7207 
7208     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7209         goto error;
7210 
7211     if (_Pickler_SetOutputStream(pickler, file) < 0)
7212         goto error;
7213 
7214     if (dump(pickler, obj) < 0)
7215         goto error;
7216 
7217     if (_Pickler_FlushToFile(pickler) < 0)
7218         goto error;
7219 
7220     Py_DECREF(pickler);
7221     Py_RETURN_NONE;
7222 
7223   error:
7224     Py_XDECREF(pickler);
7225     return NULL;
7226 }
7227 
7228 /*[clinic input]
7229 
7230 _pickle.dumps
7231 
7232   obj: object
7233   protocol: object = NULL
7234   *
7235   fix_imports: bool = True
7236 
7237 Return the pickled representation of the object as a bytes object.
7238 
7239 The optional *protocol* argument tells the pickler to use the given
7240 protocol; supported protocols are 0, 1, 2, 3 and 4.  The default
7241 protocol is 3; a backward-incompatible protocol designed for Python 3.
7242 
7243 Specifying a negative protocol version selects the highest protocol
7244 version supported.  The higher the protocol used, the more recent the
7245 version of Python needed to read the pickle produced.
7246 
7247 If *fix_imports* is True and *protocol* is less than 3, pickle will
7248 try to map the new Python 3 names to the old module names used in
7249 Python 2, so that the pickle data stream is readable with Python 2.
7250 [clinic start generated code]*/
7251 
7252 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports)7253 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7254                    int fix_imports)
7255 /*[clinic end generated code: output=d75d5cda456fd261 input=293dbeda181580b7]*/
7256 {
7257     PyObject *result;
7258     PicklerObject *pickler = _Pickler_New();
7259 
7260     if (pickler == NULL)
7261         return NULL;
7262 
7263     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7264         goto error;
7265 
7266     if (dump(pickler, obj) < 0)
7267         goto error;
7268 
7269     result = _Pickler_GetString(pickler);
7270     Py_DECREF(pickler);
7271     return result;
7272 
7273   error:
7274     Py_XDECREF(pickler);
7275     return NULL;
7276 }
7277 
7278 /*[clinic input]
7279 
7280 _pickle.load
7281 
7282   file: object
7283   *
7284   fix_imports: bool = True
7285   encoding: str = 'ASCII'
7286   errors: str = 'strict'
7287 
7288 Read and return an object from the pickle data stored in a file.
7289 
7290 This is equivalent to ``Unpickler(file).load()``, but may be more
7291 efficient.
7292 
7293 The protocol version of the pickle is detected automatically, so no
7294 protocol argument is needed.  Bytes past the pickled object's
7295 representation are ignored.
7296 
7297 The argument *file* must have two methods, a read() method that takes
7298 an integer argument, and a readline() method that requires no
7299 arguments.  Both methods should return bytes.  Thus *file* can be a
7300 binary file object opened for reading, an io.BytesIO object, or any
7301 other custom object that meets this interface.
7302 
7303 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7304 which are used to control compatibility support for pickle stream
7305 generated by Python 2.  If *fix_imports* is True, pickle will try to
7306 map the old Python 2 names to the new names used in Python 3.  The
7307 *encoding* and *errors* tell pickle how to decode 8-bit string
7308 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7309 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7310 string instances as bytes objects.
7311 [clinic start generated code]*/
7312 
7313 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors)7314 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7315                   const char *encoding, const char *errors)
7316 /*[clinic end generated code: output=69e298160285199e input=01b44dd3fc07afa7]*/
7317 {
7318     PyObject *result;
7319     UnpicklerObject *unpickler = _Unpickler_New();
7320 
7321     if (unpickler == NULL)
7322         return NULL;
7323 
7324     if (_Unpickler_SetInputStream(unpickler, file) < 0)
7325         goto error;
7326 
7327     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7328         goto error;
7329 
7330     unpickler->fix_imports = fix_imports;
7331 
7332     result = load(unpickler);
7333     Py_DECREF(unpickler);
7334     return result;
7335 
7336   error:
7337     Py_XDECREF(unpickler);
7338     return NULL;
7339 }
7340 
7341 /*[clinic input]
7342 
7343 _pickle.loads
7344 
7345   data: object
7346   *
7347   fix_imports: bool = True
7348   encoding: str = 'ASCII'
7349   errors: str = 'strict'
7350 
7351 Read and return an object from the given pickle data.
7352 
7353 The protocol version of the pickle is detected automatically, so no
7354 protocol argument is needed.  Bytes past the pickled object's
7355 representation are ignored.
7356 
7357 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7358 which are used to control compatibility support for pickle stream
7359 generated by Python 2.  If *fix_imports* is True, pickle will try to
7360 map the old Python 2 names to the new names used in Python 3.  The
7361 *encoding* and *errors* tell pickle how to decode 8-bit string
7362 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7363 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7364 string instances as bytes objects.
7365 [clinic start generated code]*/
7366 
7367 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors)7368 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7369                    const char *encoding, const char *errors)
7370 /*[clinic end generated code: output=1e7cb2343f2c440f input=70605948a719feb9]*/
7371 {
7372     PyObject *result;
7373     UnpicklerObject *unpickler = _Unpickler_New();
7374 
7375     if (unpickler == NULL)
7376         return NULL;
7377 
7378     if (_Unpickler_SetStringInput(unpickler, data) < 0)
7379         goto error;
7380 
7381     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7382         goto error;
7383 
7384     unpickler->fix_imports = fix_imports;
7385 
7386     result = load(unpickler);
7387     Py_DECREF(unpickler);
7388     return result;
7389 
7390   error:
7391     Py_XDECREF(unpickler);
7392     return NULL;
7393 }
7394 
7395 static struct PyMethodDef pickle_methods[] = {
7396     _PICKLE_DUMP_METHODDEF
7397     _PICKLE_DUMPS_METHODDEF
7398     _PICKLE_LOAD_METHODDEF
7399     _PICKLE_LOADS_METHODDEF
7400     {NULL, NULL} /* sentinel */
7401 };
7402 
7403 static int
pickle_clear(PyObject * m)7404 pickle_clear(PyObject *m)
7405 {
7406     _Pickle_ClearState(_Pickle_GetState(m));
7407     return 0;
7408 }
7409 
7410 static void
pickle_free(PyObject * m)7411 pickle_free(PyObject *m)
7412 {
7413     _Pickle_ClearState(_Pickle_GetState(m));
7414 }
7415 
7416 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7417 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7418 {
7419     PickleState *st = _Pickle_GetState(m);
7420     Py_VISIT(st->PickleError);
7421     Py_VISIT(st->PicklingError);
7422     Py_VISIT(st->UnpicklingError);
7423     Py_VISIT(st->dispatch_table);
7424     Py_VISIT(st->extension_registry);
7425     Py_VISIT(st->extension_cache);
7426     Py_VISIT(st->inverted_registry);
7427     Py_VISIT(st->name_mapping_2to3);
7428     Py_VISIT(st->import_mapping_2to3);
7429     Py_VISIT(st->name_mapping_3to2);
7430     Py_VISIT(st->import_mapping_3to2);
7431     Py_VISIT(st->codecs_encode);
7432     Py_VISIT(st->getattr);
7433     return 0;
7434 }
7435 
7436 static struct PyModuleDef _picklemodule = {
7437     PyModuleDef_HEAD_INIT,
7438     "_pickle",            /* m_name */
7439     pickle_module_doc,    /* m_doc */
7440     sizeof(PickleState),  /* m_size */
7441     pickle_methods,       /* m_methods */
7442     NULL,                 /* m_reload */
7443     pickle_traverse,      /* m_traverse */
7444     pickle_clear,         /* m_clear */
7445     (freefunc)pickle_free /* m_free */
7446 };
7447 
7448 PyMODINIT_FUNC
PyInit__pickle(void)7449 PyInit__pickle(void)
7450 {
7451     PyObject *m;
7452     PickleState *st;
7453 
7454     m = PyState_FindModule(&_picklemodule);
7455     if (m) {
7456         Py_INCREF(m);
7457         return m;
7458     }
7459 
7460     if (PyType_Ready(&Unpickler_Type) < 0)
7461         return NULL;
7462     if (PyType_Ready(&Pickler_Type) < 0)
7463         return NULL;
7464     if (PyType_Ready(&Pdata_Type) < 0)
7465         return NULL;
7466     if (PyType_Ready(&PicklerMemoProxyType) < 0)
7467         return NULL;
7468     if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7469         return NULL;
7470 
7471     /* Create the module and add the functions. */
7472     m = PyModule_Create(&_picklemodule);
7473     if (m == NULL)
7474         return NULL;
7475 
7476     Py_INCREF(&Pickler_Type);
7477     if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
7478         return NULL;
7479     Py_INCREF(&Unpickler_Type);
7480     if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
7481         return NULL;
7482 
7483     st = _Pickle_GetState(m);
7484 
7485     /* Initialize the exceptions. */
7486     st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7487     if (st->PickleError == NULL)
7488         return NULL;
7489     st->PicklingError = \
7490         PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7491     if (st->PicklingError == NULL)
7492         return NULL;
7493     st->UnpicklingError = \
7494         PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7495     if (st->UnpicklingError == NULL)
7496         return NULL;
7497 
7498     Py_INCREF(st->PickleError);
7499     if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
7500         return NULL;
7501     Py_INCREF(st->PicklingError);
7502     if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
7503         return NULL;
7504     Py_INCREF(st->UnpicklingError);
7505     if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
7506         return NULL;
7507 
7508     if (_Pickle_InitState(st) < 0)
7509         return NULL;
7510 
7511     return m;
7512 }
7513