1
2 /* Core extension modules are built-in on some platforms (e.g. Windows). */
3 #ifdef Py_BUILD_CORE
4 #define Py_BUILD_CORE_BUILTIN
5 #undef Py_BUILD_CORE
6 #endif
7
8 #include "Python.h"
9 #include "structmember.h"
10
11 PyDoc_STRVAR(pickle_module_doc,
12 "Optimized C implementation for the Python pickle module.");
13
14 /*[clinic input]
15 module _pickle
16 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
17 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
18 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
19 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
20 [clinic start generated code]*/
21 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
22
23 /* Bump this when new opcodes are added to the pickle protocol. */
24 enum {
25 HIGHEST_PROTOCOL = 4,
26 DEFAULT_PROTOCOL = 3
27 };
28
29 /* Pickle opcodes. These must be kept updated with pickle.py.
30 Extensive docs are in pickletools.py. */
31 enum opcode {
32 MARK = '(',
33 STOP = '.',
34 POP = '0',
35 POP_MARK = '1',
36 DUP = '2',
37 FLOAT = 'F',
38 INT = 'I',
39 BININT = 'J',
40 BININT1 = 'K',
41 LONG = 'L',
42 BININT2 = 'M',
43 NONE = 'N',
44 PERSID = 'P',
45 BINPERSID = 'Q',
46 REDUCE = 'R',
47 STRING = 'S',
48 BINSTRING = 'T',
49 SHORT_BINSTRING = 'U',
50 UNICODE = 'V',
51 BINUNICODE = 'X',
52 APPEND = 'a',
53 BUILD = 'b',
54 GLOBAL = 'c',
55 DICT = 'd',
56 EMPTY_DICT = '}',
57 APPENDS = 'e',
58 GET = 'g',
59 BINGET = 'h',
60 INST = 'i',
61 LONG_BINGET = 'j',
62 LIST = 'l',
63 EMPTY_LIST = ']',
64 OBJ = 'o',
65 PUT = 'p',
66 BINPUT = 'q',
67 LONG_BINPUT = 'r',
68 SETITEM = 's',
69 TUPLE = 't',
70 EMPTY_TUPLE = ')',
71 SETITEMS = 'u',
72 BINFLOAT = 'G',
73
74 /* Protocol 2. */
75 PROTO = '\x80',
76 NEWOBJ = '\x81',
77 EXT1 = '\x82',
78 EXT2 = '\x83',
79 EXT4 = '\x84',
80 TUPLE1 = '\x85',
81 TUPLE2 = '\x86',
82 TUPLE3 = '\x87',
83 NEWTRUE = '\x88',
84 NEWFALSE = '\x89',
85 LONG1 = '\x8a',
86 LONG4 = '\x8b',
87
88 /* Protocol 3 (Python 3.x) */
89 BINBYTES = 'B',
90 SHORT_BINBYTES = 'C',
91
92 /* Protocol 4 */
93 SHORT_BINUNICODE = '\x8c',
94 BINUNICODE8 = '\x8d',
95 BINBYTES8 = '\x8e',
96 EMPTY_SET = '\x8f',
97 ADDITEMS = '\x90',
98 FROZENSET = '\x91',
99 NEWOBJ_EX = '\x92',
100 STACK_GLOBAL = '\x93',
101 MEMOIZE = '\x94',
102 FRAME = '\x95'
103 };
104
105 enum {
106 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
107 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
108 break if this gets out of synch with pickle.py, but it's unclear that would
109 help anything either. */
110 BATCHSIZE = 1000,
111
112 /* Nesting limit until Pickler, when running in "fast mode", starts
113 checking for self-referential data-structures. */
114 FAST_NESTING_LIMIT = 50,
115
116 /* Initial size of the write buffer of Pickler. */
117 WRITE_BUF_SIZE = 4096,
118
119 /* Prefetch size when unpickling (disabled on unpeekable streams) */
120 PREFETCH = 8192 * 16,
121
122 FRAME_SIZE_MIN = 4,
123 FRAME_SIZE_TARGET = 64 * 1024,
124 FRAME_HEADER_SIZE = 9
125 };
126
127 /*************************************************************************/
128
129 /* State of the pickle module, per PEP 3121. */
130 typedef struct {
131 /* Exception classes for pickle. */
132 PyObject *PickleError;
133 PyObject *PicklingError;
134 PyObject *UnpicklingError;
135
136 /* copyreg.dispatch_table, {type_object: pickling_function} */
137 PyObject *dispatch_table;
138
139 /* For the extension opcodes EXT1, EXT2 and EXT4. */
140
141 /* copyreg._extension_registry, {(module_name, function_name): code} */
142 PyObject *extension_registry;
143 /* copyreg._extension_cache, {code: object} */
144 PyObject *extension_cache;
145 /* copyreg._inverted_registry, {code: (module_name, function_name)} */
146 PyObject *inverted_registry;
147
148 /* Import mappings for compatibility with Python 2.x */
149
150 /* _compat_pickle.NAME_MAPPING,
151 {(oldmodule, oldname): (newmodule, newname)} */
152 PyObject *name_mapping_2to3;
153 /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
154 PyObject *import_mapping_2to3;
155 /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
156 PyObject *name_mapping_3to2;
157 PyObject *import_mapping_3to2;
158
159 /* codecs.encode, used for saving bytes in older protocols */
160 PyObject *codecs_encode;
161 /* builtins.getattr, used for saving nested names with protocol < 4 */
162 PyObject *getattr;
163 /* functools.partial, used for implementing __newobj_ex__ with protocols
164 2 and 3 */
165 PyObject *partial;
166 } PickleState;
167
168 /* Forward declaration of the _pickle module definition. */
169 static struct PyModuleDef _picklemodule;
170
171 /* Given a module object, get its per-module state. */
172 static PickleState *
_Pickle_GetState(PyObject * module)173 _Pickle_GetState(PyObject *module)
174 {
175 return (PickleState *)PyModule_GetState(module);
176 }
177
178 /* Find the module instance imported in the currently running sub-interpreter
179 and get its state. */
180 static PickleState *
_Pickle_GetGlobalState(void)181 _Pickle_GetGlobalState(void)
182 {
183 return _Pickle_GetState(PyState_FindModule(&_picklemodule));
184 }
185
186 /* Clear the given pickle module state. */
187 static void
_Pickle_ClearState(PickleState * st)188 _Pickle_ClearState(PickleState *st)
189 {
190 Py_CLEAR(st->PickleError);
191 Py_CLEAR(st->PicklingError);
192 Py_CLEAR(st->UnpicklingError);
193 Py_CLEAR(st->dispatch_table);
194 Py_CLEAR(st->extension_registry);
195 Py_CLEAR(st->extension_cache);
196 Py_CLEAR(st->inverted_registry);
197 Py_CLEAR(st->name_mapping_2to3);
198 Py_CLEAR(st->import_mapping_2to3);
199 Py_CLEAR(st->name_mapping_3to2);
200 Py_CLEAR(st->import_mapping_3to2);
201 Py_CLEAR(st->codecs_encode);
202 Py_CLEAR(st->getattr);
203 Py_CLEAR(st->partial);
204 }
205
206 /* Initialize the given pickle module state. */
207 static int
_Pickle_InitState(PickleState * st)208 _Pickle_InitState(PickleState *st)
209 {
210 PyObject *copyreg = NULL;
211 PyObject *compat_pickle = NULL;
212 PyObject *codecs = NULL;
213 PyObject *functools = NULL;
214 _Py_IDENTIFIER(getattr);
215
216 st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
217 if (st->getattr == NULL)
218 goto error;
219
220 copyreg = PyImport_ImportModule("copyreg");
221 if (!copyreg)
222 goto error;
223 st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
224 if (!st->dispatch_table)
225 goto error;
226 if (!PyDict_CheckExact(st->dispatch_table)) {
227 PyErr_Format(PyExc_RuntimeError,
228 "copyreg.dispatch_table should be a dict, not %.200s",
229 Py_TYPE(st->dispatch_table)->tp_name);
230 goto error;
231 }
232 st->extension_registry = \
233 PyObject_GetAttrString(copyreg, "_extension_registry");
234 if (!st->extension_registry)
235 goto error;
236 if (!PyDict_CheckExact(st->extension_registry)) {
237 PyErr_Format(PyExc_RuntimeError,
238 "copyreg._extension_registry should be a dict, "
239 "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
240 goto error;
241 }
242 st->inverted_registry = \
243 PyObject_GetAttrString(copyreg, "_inverted_registry");
244 if (!st->inverted_registry)
245 goto error;
246 if (!PyDict_CheckExact(st->inverted_registry)) {
247 PyErr_Format(PyExc_RuntimeError,
248 "copyreg._inverted_registry should be a dict, "
249 "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
250 goto error;
251 }
252 st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
253 if (!st->extension_cache)
254 goto error;
255 if (!PyDict_CheckExact(st->extension_cache)) {
256 PyErr_Format(PyExc_RuntimeError,
257 "copyreg._extension_cache should be a dict, "
258 "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
259 goto error;
260 }
261 Py_CLEAR(copyreg);
262
263 /* Load the 2.x -> 3.x stdlib module mapping tables */
264 compat_pickle = PyImport_ImportModule("_compat_pickle");
265 if (!compat_pickle)
266 goto error;
267 st->name_mapping_2to3 = \
268 PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
269 if (!st->name_mapping_2to3)
270 goto error;
271 if (!PyDict_CheckExact(st->name_mapping_2to3)) {
272 PyErr_Format(PyExc_RuntimeError,
273 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
274 Py_TYPE(st->name_mapping_2to3)->tp_name);
275 goto error;
276 }
277 st->import_mapping_2to3 = \
278 PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
279 if (!st->import_mapping_2to3)
280 goto error;
281 if (!PyDict_CheckExact(st->import_mapping_2to3)) {
282 PyErr_Format(PyExc_RuntimeError,
283 "_compat_pickle.IMPORT_MAPPING should be a dict, "
284 "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
285 goto error;
286 }
287 /* ... and the 3.x -> 2.x mapping tables */
288 st->name_mapping_3to2 = \
289 PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
290 if (!st->name_mapping_3to2)
291 goto error;
292 if (!PyDict_CheckExact(st->name_mapping_3to2)) {
293 PyErr_Format(PyExc_RuntimeError,
294 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
295 "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
296 goto error;
297 }
298 st->import_mapping_3to2 = \
299 PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
300 if (!st->import_mapping_3to2)
301 goto error;
302 if (!PyDict_CheckExact(st->import_mapping_3to2)) {
303 PyErr_Format(PyExc_RuntimeError,
304 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
305 "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
306 goto error;
307 }
308 Py_CLEAR(compat_pickle);
309
310 codecs = PyImport_ImportModule("codecs");
311 if (codecs == NULL)
312 goto error;
313 st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
314 if (st->codecs_encode == NULL) {
315 goto error;
316 }
317 if (!PyCallable_Check(st->codecs_encode)) {
318 PyErr_Format(PyExc_RuntimeError,
319 "codecs.encode should be a callable, not %.200s",
320 Py_TYPE(st->codecs_encode)->tp_name);
321 goto error;
322 }
323 Py_CLEAR(codecs);
324
325 functools = PyImport_ImportModule("functools");
326 if (!functools)
327 goto error;
328 st->partial = PyObject_GetAttrString(functools, "partial");
329 if (!st->partial)
330 goto error;
331 Py_CLEAR(functools);
332
333 return 0;
334
335 error:
336 Py_CLEAR(copyreg);
337 Py_CLEAR(compat_pickle);
338 Py_CLEAR(codecs);
339 Py_CLEAR(functools);
340 _Pickle_ClearState(st);
341 return -1;
342 }
343
344 /* Helper for calling a function with a single argument quickly.
345
346 This function steals the reference of the given argument. */
347 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)348 _Pickle_FastCall(PyObject *func, PyObject *obj)
349 {
350 PyObject *result;
351
352 result = PyObject_CallFunctionObjArgs(func, obj, NULL);
353 Py_DECREF(obj);
354 return result;
355 }
356
357 /*************************************************************************/
358
359 /* Retrieve and deconstruct a method for avoiding a reference cycle
360 (pickler -> bound method of pickler -> pickler) */
361 static int
init_method_ref(PyObject * self,_Py_Identifier * name,PyObject ** method_func,PyObject ** method_self)362 init_method_ref(PyObject *self, _Py_Identifier *name,
363 PyObject **method_func, PyObject **method_self)
364 {
365 PyObject *func, *func2;
366 int ret;
367
368 /* *method_func and *method_self should be consistent. All refcount decrements
369 should be occurred after setting *method_self and *method_func. */
370 ret = _PyObject_LookupAttrId(self, name, &func);
371 if (func == NULL) {
372 *method_self = NULL;
373 Py_CLEAR(*method_func);
374 return ret;
375 }
376
377 if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
378 /* Deconstruct a bound Python method */
379 func2 = PyMethod_GET_FUNCTION(func);
380 Py_INCREF(func2);
381 *method_self = self; /* borrowed */
382 Py_XSETREF(*method_func, func2);
383 Py_DECREF(func);
384 return 0;
385 }
386 else {
387 *method_self = NULL;
388 Py_XSETREF(*method_func, func);
389 return 0;
390 }
391 }
392
393 /* Bind a method if it was deconstructed */
394 static PyObject *
reconstruct_method(PyObject * func,PyObject * self)395 reconstruct_method(PyObject *func, PyObject *self)
396 {
397 if (self) {
398 return PyMethod_New(func, self);
399 }
400 else {
401 Py_INCREF(func);
402 return func;
403 }
404 }
405
406 static PyObject *
call_method(PyObject * func,PyObject * self,PyObject * obj)407 call_method(PyObject *func, PyObject *self, PyObject *obj)
408 {
409 if (self) {
410 return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
411 }
412 else {
413 return PyObject_CallFunctionObjArgs(func, obj, NULL);
414 }
415 }
416
417 /*************************************************************************/
418
419 /* Internal data type used as the unpickling stack. */
420 typedef struct {
421 PyObject_VAR_HEAD
422 PyObject **data;
423 int mark_set; /* is MARK set? */
424 Py_ssize_t fence; /* position of top MARK or 0 */
425 Py_ssize_t allocated; /* number of slots in data allocated */
426 } Pdata;
427
428 static void
Pdata_dealloc(Pdata * self)429 Pdata_dealloc(Pdata *self)
430 {
431 Py_ssize_t i = Py_SIZE(self);
432 while (--i >= 0) {
433 Py_DECREF(self->data[i]);
434 }
435 PyMem_FREE(self->data);
436 PyObject_Del(self);
437 }
438
439 static PyTypeObject Pdata_Type = {
440 PyVarObject_HEAD_INIT(NULL, 0)
441 "_pickle.Pdata", /*tp_name*/
442 sizeof(Pdata), /*tp_basicsize*/
443 sizeof(PyObject *), /*tp_itemsize*/
444 (destructor)Pdata_dealloc, /*tp_dealloc*/
445 };
446
447 static PyObject *
Pdata_New(void)448 Pdata_New(void)
449 {
450 Pdata *self;
451
452 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
453 return NULL;
454 Py_SIZE(self) = 0;
455 self->mark_set = 0;
456 self->fence = 0;
457 self->allocated = 8;
458 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
459 if (self->data)
460 return (PyObject *)self;
461 Py_DECREF(self);
462 return PyErr_NoMemory();
463 }
464
465
466 /* Retain only the initial clearto items. If clearto >= the current
467 * number of items, this is a (non-erroneous) NOP.
468 */
469 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)470 Pdata_clear(Pdata *self, Py_ssize_t clearto)
471 {
472 Py_ssize_t i = Py_SIZE(self);
473
474 assert(clearto >= self->fence);
475 if (clearto >= i)
476 return 0;
477
478 while (--i >= clearto) {
479 Py_CLEAR(self->data[i]);
480 }
481 Py_SIZE(self) = clearto;
482 return 0;
483 }
484
485 static int
Pdata_grow(Pdata * self)486 Pdata_grow(Pdata *self)
487 {
488 PyObject **data = self->data;
489 size_t allocated = (size_t)self->allocated;
490 size_t new_allocated;
491
492 new_allocated = (allocated >> 3) + 6;
493 /* check for integer overflow */
494 if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
495 goto nomemory;
496 new_allocated += allocated;
497 PyMem_RESIZE(data, PyObject *, new_allocated);
498 if (data == NULL)
499 goto nomemory;
500
501 self->data = data;
502 self->allocated = (Py_ssize_t)new_allocated;
503 return 0;
504
505 nomemory:
506 PyErr_NoMemory();
507 return -1;
508 }
509
510 static int
Pdata_stack_underflow(Pdata * self)511 Pdata_stack_underflow(Pdata *self)
512 {
513 PickleState *st = _Pickle_GetGlobalState();
514 PyErr_SetString(st->UnpicklingError,
515 self->mark_set ?
516 "unexpected MARK found" :
517 "unpickling stack underflow");
518 return -1;
519 }
520
521 /* D is a Pdata*. Pop the topmost element and store it into V, which
522 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
523 * is raised and V is set to NULL.
524 */
525 static PyObject *
Pdata_pop(Pdata * self)526 Pdata_pop(Pdata *self)
527 {
528 if (Py_SIZE(self) <= self->fence) {
529 Pdata_stack_underflow(self);
530 return NULL;
531 }
532 return self->data[--Py_SIZE(self)];
533 }
534 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
535
536 static int
Pdata_push(Pdata * self,PyObject * obj)537 Pdata_push(Pdata *self, PyObject *obj)
538 {
539 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
540 return -1;
541 }
542 self->data[Py_SIZE(self)++] = obj;
543 return 0;
544 }
545
546 /* Push an object on stack, transferring its ownership to the stack. */
547 #define PDATA_PUSH(D, O, ER) do { \
548 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
549
550 /* Push an object on stack, adding a new reference to the object. */
551 #define PDATA_APPEND(D, O, ER) do { \
552 Py_INCREF((O)); \
553 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
554
555 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)556 Pdata_poptuple(Pdata *self, Py_ssize_t start)
557 {
558 PyObject *tuple;
559 Py_ssize_t len, i, j;
560
561 if (start < self->fence) {
562 Pdata_stack_underflow(self);
563 return NULL;
564 }
565 len = Py_SIZE(self) - start;
566 tuple = PyTuple_New(len);
567 if (tuple == NULL)
568 return NULL;
569 for (i = start, j = 0; j < len; i++, j++)
570 PyTuple_SET_ITEM(tuple, j, self->data[i]);
571
572 Py_SIZE(self) = start;
573 return tuple;
574 }
575
576 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)577 Pdata_poplist(Pdata *self, Py_ssize_t start)
578 {
579 PyObject *list;
580 Py_ssize_t len, i, j;
581
582 len = Py_SIZE(self) - start;
583 list = PyList_New(len);
584 if (list == NULL)
585 return NULL;
586 for (i = start, j = 0; j < len; i++, j++)
587 PyList_SET_ITEM(list, j, self->data[i]);
588
589 Py_SIZE(self) = start;
590 return list;
591 }
592
593 typedef struct {
594 PyObject *me_key;
595 Py_ssize_t me_value;
596 } PyMemoEntry;
597
598 typedef struct {
599 size_t mt_mask;
600 size_t mt_used;
601 size_t mt_allocated;
602 PyMemoEntry *mt_table;
603 } PyMemoTable;
604
605 typedef struct PicklerObject {
606 PyObject_HEAD
607 PyMemoTable *memo; /* Memo table, keep track of the seen
608 objects to support self-referential objects
609 pickling. */
610 PyObject *pers_func; /* persistent_id() method, can be NULL */
611 PyObject *pers_func_self; /* borrowed reference to self if pers_func
612 is an unbound method, NULL otherwise */
613 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
614
615 PyObject *write; /* write() method of the output stream. */
616 PyObject *output_buffer; /* Write into a local bytearray buffer before
617 flushing to the stream. */
618 Py_ssize_t output_len; /* Length of output_buffer. */
619 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
620 int proto; /* Pickle protocol number, >= 0 */
621 int bin; /* Boolean, true if proto > 0 */
622 int framing; /* True when framing is enabled, proto >= 4 */
623 Py_ssize_t frame_start; /* Position in output_buffer where the
624 current frame begins. -1 if there
625 is no frame currently open. */
626
627 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
628 int fast; /* Enable fast mode if set to a true value.
629 The fast mode disable the usage of memo,
630 therefore speeding the pickling process by
631 not generating superfluous PUT opcodes. It
632 should not be used if with self-referential
633 objects. */
634 int fast_nesting;
635 int fix_imports; /* Indicate whether Pickler should fix
636 the name of globals for Python 2.x. */
637 PyObject *fast_memo;
638 } PicklerObject;
639
640 typedef struct UnpicklerObject {
641 PyObject_HEAD
642 Pdata *stack; /* Pickle data stack, store unpickled objects. */
643
644 /* The unpickler memo is just an array of PyObject *s. Using a dict
645 is unnecessary, since the keys are contiguous ints. */
646 PyObject **memo;
647 size_t memo_size; /* Capacity of the memo array */
648 size_t memo_len; /* Number of objects in the memo */
649
650 PyObject *pers_func; /* persistent_load() method, can be NULL. */
651 PyObject *pers_func_self; /* borrowed reference to self if pers_func
652 is an unbound method, NULL otherwise */
653
654 Py_buffer buffer;
655 char *input_buffer;
656 char *input_line;
657 Py_ssize_t input_len;
658 Py_ssize_t next_read_idx;
659 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
660
661 PyObject *read; /* read() method of the input stream. */
662 PyObject *readline; /* readline() method of the input stream. */
663 PyObject *peek; /* peek() method of the input stream, or NULL */
664
665 char *encoding; /* Name of the encoding to be used for
666 decoding strings pickled using Python
667 2.x. The default value is "ASCII" */
668 char *errors; /* Name of errors handling scheme to used when
669 decoding strings. The default value is
670 "strict". */
671 Py_ssize_t *marks; /* Mark stack, used for unpickling container
672 objects. */
673 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
674 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
675 int proto; /* Protocol of the pickle loaded. */
676 int fix_imports; /* Indicate whether Unpickler should fix
677 the name of globals pickled by Python 2.x. */
678 } UnpicklerObject;
679
680 typedef struct {
681 PyObject_HEAD
682 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
683 } PicklerMemoProxyObject;
684
685 typedef struct {
686 PyObject_HEAD
687 UnpicklerObject *unpickler;
688 } UnpicklerMemoProxyObject;
689
690 /* Forward declarations */
691 static int save(PicklerObject *, PyObject *, int);
692 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
693 static PyTypeObject Pickler_Type;
694 static PyTypeObject Unpickler_Type;
695
696 #include "clinic/_pickle.c.h"
697
698 /*************************************************************************
699 A custom hashtable mapping void* to Python ints. This is used by the pickler
700 for memoization. Using a custom hashtable rather than PyDict allows us to skip
701 a bunch of unnecessary object creation. This makes a huge performance
702 difference. */
703
704 #define MT_MINSIZE 8
705 #define PERTURB_SHIFT 5
706
707
708 static PyMemoTable *
PyMemoTable_New(void)709 PyMemoTable_New(void)
710 {
711 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
712 if (memo == NULL) {
713 PyErr_NoMemory();
714 return NULL;
715 }
716
717 memo->mt_used = 0;
718 memo->mt_allocated = MT_MINSIZE;
719 memo->mt_mask = MT_MINSIZE - 1;
720 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
721 if (memo->mt_table == NULL) {
722 PyMem_FREE(memo);
723 PyErr_NoMemory();
724 return NULL;
725 }
726 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
727
728 return memo;
729 }
730
731 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)732 PyMemoTable_Copy(PyMemoTable *self)
733 {
734 PyMemoTable *new = PyMemoTable_New();
735 if (new == NULL)
736 return NULL;
737
738 new->mt_used = self->mt_used;
739 new->mt_allocated = self->mt_allocated;
740 new->mt_mask = self->mt_mask;
741 /* The table we get from _New() is probably smaller than we wanted.
742 Free it and allocate one that's the right size. */
743 PyMem_FREE(new->mt_table);
744 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
745 if (new->mt_table == NULL) {
746 PyMem_FREE(new);
747 PyErr_NoMemory();
748 return NULL;
749 }
750 for (size_t i = 0; i < self->mt_allocated; i++) {
751 Py_XINCREF(self->mt_table[i].me_key);
752 }
753 memcpy(new->mt_table, self->mt_table,
754 sizeof(PyMemoEntry) * self->mt_allocated);
755
756 return new;
757 }
758
759 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)760 PyMemoTable_Size(PyMemoTable *self)
761 {
762 return self->mt_used;
763 }
764
765 static int
PyMemoTable_Clear(PyMemoTable * self)766 PyMemoTable_Clear(PyMemoTable *self)
767 {
768 Py_ssize_t i = self->mt_allocated;
769
770 while (--i >= 0) {
771 Py_XDECREF(self->mt_table[i].me_key);
772 }
773 self->mt_used = 0;
774 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
775 return 0;
776 }
777
778 static void
PyMemoTable_Del(PyMemoTable * self)779 PyMemoTable_Del(PyMemoTable *self)
780 {
781 if (self == NULL)
782 return;
783 PyMemoTable_Clear(self);
784
785 PyMem_FREE(self->mt_table);
786 PyMem_FREE(self);
787 }
788
789 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
790 can be considerably simpler than dictobject.c's lookdict(). */
791 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)792 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
793 {
794 size_t i;
795 size_t perturb;
796 size_t mask = self->mt_mask;
797 PyMemoEntry *table = self->mt_table;
798 PyMemoEntry *entry;
799 Py_hash_t hash = (Py_hash_t)key >> 3;
800
801 i = hash & mask;
802 entry = &table[i];
803 if (entry->me_key == NULL || entry->me_key == key)
804 return entry;
805
806 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
807 i = (i << 2) + i + perturb + 1;
808 entry = &table[i & mask];
809 if (entry->me_key == NULL || entry->me_key == key)
810 return entry;
811 }
812 Py_UNREACHABLE();
813 }
814
815 /* Returns -1 on failure, 0 on success. */
816 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)817 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
818 {
819 PyMemoEntry *oldtable = NULL;
820 PyMemoEntry *oldentry, *newentry;
821 size_t new_size = MT_MINSIZE;
822 size_t to_process;
823
824 assert(min_size > 0);
825
826 if (min_size > PY_SSIZE_T_MAX) {
827 PyErr_NoMemory();
828 return -1;
829 }
830
831 /* Find the smallest valid table size >= min_size. */
832 while (new_size < min_size) {
833 new_size <<= 1;
834 }
835 /* new_size needs to be a power of two. */
836 assert((new_size & (new_size - 1)) == 0);
837
838 /* Allocate new table. */
839 oldtable = self->mt_table;
840 self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
841 if (self->mt_table == NULL) {
842 self->mt_table = oldtable;
843 PyErr_NoMemory();
844 return -1;
845 }
846 self->mt_allocated = new_size;
847 self->mt_mask = new_size - 1;
848 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
849
850 /* Copy entries from the old table. */
851 to_process = self->mt_used;
852 for (oldentry = oldtable; to_process > 0; oldentry++) {
853 if (oldentry->me_key != NULL) {
854 to_process--;
855 /* newentry is a pointer to a chunk of the new
856 mt_table, so we're setting the key:value pair
857 in-place. */
858 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
859 newentry->me_key = oldentry->me_key;
860 newentry->me_value = oldentry->me_value;
861 }
862 }
863
864 /* Deallocate the old table. */
865 PyMem_FREE(oldtable);
866 return 0;
867 }
868
869 /* Returns NULL on failure, a pointer to the value otherwise. */
870 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)871 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
872 {
873 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
874 if (entry->me_key == NULL)
875 return NULL;
876 return &entry->me_value;
877 }
878
879 /* Returns -1 on failure, 0 on success. */
880 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)881 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
882 {
883 PyMemoEntry *entry;
884
885 assert(key != NULL);
886
887 entry = _PyMemoTable_Lookup(self, key);
888 if (entry->me_key != NULL) {
889 entry->me_value = value;
890 return 0;
891 }
892 Py_INCREF(key);
893 entry->me_key = key;
894 entry->me_value = value;
895 self->mt_used++;
896
897 /* If we added a key, we can safely resize. Otherwise just return!
898 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
899 *
900 * Quadrupling the size improves average table sparseness
901 * (reducing collisions) at the cost of some memory. It also halves
902 * the number of expensive resize operations in a growing memo table.
903 *
904 * Very large memo tables (over 50K items) use doubling instead.
905 * This may help applications with severe memory constraints.
906 */
907 if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
908 return 0;
909 }
910 // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
911 size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
912 return _PyMemoTable_ResizeTable(self, desired_size);
913 }
914
915 #undef MT_MINSIZE
916 #undef PERTURB_SHIFT
917
918 /*************************************************************************/
919
920
921 static int
_Pickler_ClearBuffer(PicklerObject * self)922 _Pickler_ClearBuffer(PicklerObject *self)
923 {
924 Py_XSETREF(self->output_buffer,
925 PyBytes_FromStringAndSize(NULL, self->max_output_len));
926 if (self->output_buffer == NULL)
927 return -1;
928 self->output_len = 0;
929 self->frame_start = -1;
930 return 0;
931 }
932
933 static void
_write_size64(char * out,size_t value)934 _write_size64(char *out, size_t value)
935 {
936 size_t i;
937
938 Py_BUILD_ASSERT(sizeof(size_t) <= 8);
939
940 for (i = 0; i < sizeof(size_t); i++) {
941 out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
942 }
943 for (i = sizeof(size_t); i < 8; i++) {
944 out[i] = 0;
945 }
946 }
947
948 static int
_Pickler_CommitFrame(PicklerObject * self)949 _Pickler_CommitFrame(PicklerObject *self)
950 {
951 size_t frame_len;
952 char *qdata;
953
954 if (!self->framing || self->frame_start == -1)
955 return 0;
956 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
957 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
958 if (frame_len >= FRAME_SIZE_MIN) {
959 qdata[0] = FRAME;
960 _write_size64(qdata + 1, frame_len);
961 }
962 else {
963 memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
964 self->output_len -= FRAME_HEADER_SIZE;
965 }
966 self->frame_start = -1;
967 return 0;
968 }
969
970 static PyObject *
_Pickler_GetString(PicklerObject * self)971 _Pickler_GetString(PicklerObject *self)
972 {
973 PyObject *output_buffer = self->output_buffer;
974
975 assert(self->output_buffer != NULL);
976
977 if (_Pickler_CommitFrame(self))
978 return NULL;
979
980 self->output_buffer = NULL;
981 /* Resize down to exact size */
982 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
983 return NULL;
984 return output_buffer;
985 }
986
987 static int
_Pickler_FlushToFile(PicklerObject * self)988 _Pickler_FlushToFile(PicklerObject *self)
989 {
990 PyObject *output, *result;
991
992 assert(self->write != NULL);
993
994 /* This will commit the frame first */
995 output = _Pickler_GetString(self);
996 if (output == NULL)
997 return -1;
998
999 result = _Pickle_FastCall(self->write, output);
1000 Py_XDECREF(result);
1001 return (result == NULL) ? -1 : 0;
1002 }
1003
1004 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1005 _Pickler_OpcodeBoundary(PicklerObject *self)
1006 {
1007 Py_ssize_t frame_len;
1008
1009 if (!self->framing || self->frame_start == -1) {
1010 return 0;
1011 }
1012 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1013 if (frame_len >= FRAME_SIZE_TARGET) {
1014 if(_Pickler_CommitFrame(self)) {
1015 return -1;
1016 }
1017 /* Flush the content of the committed frame to the underlying
1018 * file and reuse the pickler buffer for the next frame so as
1019 * to limit memory usage when dumping large complex objects to
1020 * a file.
1021 *
1022 * self->write is NULL when called via dumps.
1023 */
1024 if (self->write != NULL) {
1025 if (_Pickler_FlushToFile(self) < 0) {
1026 return -1;
1027 }
1028 if (_Pickler_ClearBuffer(self) < 0) {
1029 return -1;
1030 }
1031 }
1032 }
1033 return 0;
1034 }
1035
1036 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1037 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1038 {
1039 Py_ssize_t i, n, required;
1040 char *buffer;
1041 int need_new_frame;
1042
1043 assert(s != NULL);
1044 need_new_frame = (self->framing && self->frame_start == -1);
1045
1046 if (need_new_frame)
1047 n = data_len + FRAME_HEADER_SIZE;
1048 else
1049 n = data_len;
1050
1051 required = self->output_len + n;
1052 if (required > self->max_output_len) {
1053 /* Make place in buffer for the pickle chunk */
1054 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1055 PyErr_NoMemory();
1056 return -1;
1057 }
1058 self->max_output_len = (self->output_len + n) / 2 * 3;
1059 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1060 return -1;
1061 }
1062 buffer = PyBytes_AS_STRING(self->output_buffer);
1063 if (need_new_frame) {
1064 /* Setup new frame */
1065 Py_ssize_t frame_start = self->output_len;
1066 self->frame_start = frame_start;
1067 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1068 /* Write an invalid value, for debugging */
1069 buffer[frame_start + i] = 0xFE;
1070 }
1071 self->output_len += FRAME_HEADER_SIZE;
1072 }
1073 if (data_len < 8) {
1074 /* This is faster than memcpy when the string is short. */
1075 for (i = 0; i < data_len; i++) {
1076 buffer[self->output_len + i] = s[i];
1077 }
1078 }
1079 else {
1080 memcpy(buffer + self->output_len, s, data_len);
1081 }
1082 self->output_len += data_len;
1083 return data_len;
1084 }
1085
1086 static PicklerObject *
_Pickler_New(void)1087 _Pickler_New(void)
1088 {
1089 PicklerObject *self;
1090
1091 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1092 if (self == NULL)
1093 return NULL;
1094
1095 self->pers_func = NULL;
1096 self->dispatch_table = NULL;
1097 self->write = NULL;
1098 self->proto = 0;
1099 self->bin = 0;
1100 self->framing = 0;
1101 self->frame_start = -1;
1102 self->fast = 0;
1103 self->fast_nesting = 0;
1104 self->fix_imports = 0;
1105 self->fast_memo = NULL;
1106 self->max_output_len = WRITE_BUF_SIZE;
1107 self->output_len = 0;
1108
1109 self->memo = PyMemoTable_New();
1110 self->output_buffer = PyBytes_FromStringAndSize(NULL,
1111 self->max_output_len);
1112
1113 if (self->memo == NULL || self->output_buffer == NULL) {
1114 Py_DECREF(self);
1115 return NULL;
1116 }
1117 return self;
1118 }
1119
1120 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1121 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1122 {
1123 long proto;
1124
1125 if (protocol == NULL || protocol == Py_None) {
1126 proto = DEFAULT_PROTOCOL;
1127 }
1128 else {
1129 proto = PyLong_AsLong(protocol);
1130 if (proto < 0) {
1131 if (proto == -1 && PyErr_Occurred())
1132 return -1;
1133 proto = HIGHEST_PROTOCOL;
1134 }
1135 else if (proto > HIGHEST_PROTOCOL) {
1136 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1137 HIGHEST_PROTOCOL);
1138 return -1;
1139 }
1140 }
1141 self->proto = (int)proto;
1142 self->bin = proto > 0;
1143 self->fix_imports = fix_imports && proto < 3;
1144 return 0;
1145 }
1146
1147 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1148 be called once on a freshly created Pickler. */
1149 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1150 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1151 {
1152 _Py_IDENTIFIER(write);
1153 assert(file != NULL);
1154 if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1155 return -1;
1156 }
1157 if (self->write == NULL) {
1158 PyErr_SetString(PyExc_TypeError,
1159 "file must have a 'write' attribute");
1160 return -1;
1161 }
1162
1163 return 0;
1164 }
1165
1166 /* Returns the size of the input on success, -1 on failure. This takes its
1167 own reference to `input`. */
1168 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1169 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1170 {
1171 if (self->buffer.buf != NULL)
1172 PyBuffer_Release(&self->buffer);
1173 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1174 return -1;
1175 self->input_buffer = self->buffer.buf;
1176 self->input_len = self->buffer.len;
1177 self->next_read_idx = 0;
1178 self->prefetched_idx = self->input_len;
1179 return self->input_len;
1180 }
1181
1182 static int
bad_readline(void)1183 bad_readline(void)
1184 {
1185 PickleState *st = _Pickle_GetGlobalState();
1186 PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1187 return -1;
1188 }
1189
1190 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1191 _Unpickler_SkipConsumed(UnpicklerObject *self)
1192 {
1193 Py_ssize_t consumed;
1194 PyObject *r;
1195
1196 consumed = self->next_read_idx - self->prefetched_idx;
1197 if (consumed <= 0)
1198 return 0;
1199
1200 assert(self->peek); /* otherwise we did something wrong */
1201 /* This makes a useless copy... */
1202 r = PyObject_CallFunction(self->read, "n", consumed);
1203 if (r == NULL)
1204 return -1;
1205 Py_DECREF(r);
1206
1207 self->prefetched_idx = self->next_read_idx;
1208 return 0;
1209 }
1210
1211 static const Py_ssize_t READ_WHOLE_LINE = -1;
1212
1213 /* If reading from a file, we need to only pull the bytes we need, since there
1214 may be multiple pickle objects arranged contiguously in the same input
1215 buffer.
1216
1217 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1218 bytes from the input stream/buffer.
1219
1220 Update the unpickler's input buffer with the newly-read data. Returns -1 on
1221 failure; on success, returns the number of bytes read from the file.
1222
1223 On success, self->input_len will be 0; this is intentional so that when
1224 unpickling from a file, the "we've run out of data" code paths will trigger,
1225 causing the Unpickler to go back to the file for more data. Use the returned
1226 size to tell you how much data you can process. */
1227 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1228 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1229 {
1230 PyObject *data;
1231 Py_ssize_t read_size;
1232
1233 assert(self->read != NULL);
1234
1235 if (_Unpickler_SkipConsumed(self) < 0)
1236 return -1;
1237
1238 if (n == READ_WHOLE_LINE) {
1239 data = _PyObject_CallNoArg(self->readline);
1240 }
1241 else {
1242 PyObject *len;
1243 /* Prefetch some data without advancing the file pointer, if possible */
1244 if (self->peek && n < PREFETCH) {
1245 len = PyLong_FromSsize_t(PREFETCH);
1246 if (len == NULL)
1247 return -1;
1248 data = _Pickle_FastCall(self->peek, len);
1249 if (data == NULL) {
1250 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1251 return -1;
1252 /* peek() is probably not supported by the given file object */
1253 PyErr_Clear();
1254 Py_CLEAR(self->peek);
1255 }
1256 else {
1257 read_size = _Unpickler_SetStringInput(self, data);
1258 Py_DECREF(data);
1259 self->prefetched_idx = 0;
1260 if (n <= read_size)
1261 return n;
1262 }
1263 }
1264 len = PyLong_FromSsize_t(n);
1265 if (len == NULL)
1266 return -1;
1267 data = _Pickle_FastCall(self->read, len);
1268 }
1269 if (data == NULL)
1270 return -1;
1271
1272 read_size = _Unpickler_SetStringInput(self, data);
1273 Py_DECREF(data);
1274 return read_size;
1275 }
1276
1277 /* Don't call it directly: use _Unpickler_Read() */
1278 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1279 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1280 {
1281 Py_ssize_t num_read;
1282
1283 *s = NULL;
1284 if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1285 PickleState *st = _Pickle_GetGlobalState();
1286 PyErr_SetString(st->UnpicklingError,
1287 "read would overflow (invalid bytecode)");
1288 return -1;
1289 }
1290
1291 /* This case is handled by the _Unpickler_Read() macro for efficiency */
1292 assert(self->next_read_idx + n > self->input_len);
1293
1294 if (!self->read)
1295 return bad_readline();
1296
1297 num_read = _Unpickler_ReadFromFile(self, n);
1298 if (num_read < 0)
1299 return -1;
1300 if (num_read < n)
1301 return bad_readline();
1302 *s = self->input_buffer;
1303 self->next_read_idx = n;
1304 return n;
1305 }
1306
1307 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1308
1309 This should be used for all data reads, rather than accessing the unpickler's
1310 input buffer directly. This method deals correctly with reading from input
1311 streams, which the input buffer doesn't deal with.
1312
1313 Note that when reading from a file-like object, self->next_read_idx won't
1314 be updated (it should remain at 0 for the entire unpickling process). You
1315 should use this function's return value to know how many bytes you can
1316 consume.
1317
1318 Returns -1 (with an exception set) on failure. On success, return the
1319 number of chars read. */
1320 #define _Unpickler_Read(self, s, n) \
1321 (((n) <= (self)->input_len - (self)->next_read_idx) \
1322 ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1323 (self)->next_read_idx += (n), \
1324 (n)) \
1325 : _Unpickler_ReadImpl(self, (s), (n)))
1326
1327 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1328 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1329 char **result)
1330 {
1331 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1332 if (input_line == NULL) {
1333 PyErr_NoMemory();
1334 return -1;
1335 }
1336
1337 memcpy(input_line, line, len);
1338 input_line[len] = '\0';
1339 self->input_line = input_line;
1340 *result = self->input_line;
1341 return len;
1342 }
1343
1344 /* Read a line from the input stream/buffer. If we run off the end of the input
1345 before hitting \n, raise an error.
1346
1347 Returns the number of chars read, or -1 on failure. */
1348 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1349 _Unpickler_Readline(UnpicklerObject *self, char **result)
1350 {
1351 Py_ssize_t i, num_read;
1352
1353 for (i = self->next_read_idx; i < self->input_len; i++) {
1354 if (self->input_buffer[i] == '\n') {
1355 char *line_start = self->input_buffer + self->next_read_idx;
1356 num_read = i - self->next_read_idx + 1;
1357 self->next_read_idx = i + 1;
1358 return _Unpickler_CopyLine(self, line_start, num_read, result);
1359 }
1360 }
1361 if (!self->read)
1362 return bad_readline();
1363
1364 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1365 if (num_read < 0)
1366 return -1;
1367 if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1368 return bad_readline();
1369 self->next_read_idx = num_read;
1370 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1371 }
1372
1373 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1374 will be modified in place. */
1375 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1376 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1377 {
1378 size_t i;
1379
1380 assert(new_size > self->memo_size);
1381
1382 PyObject **memo_new = self->memo;
1383 PyMem_RESIZE(memo_new, PyObject *, new_size);
1384 if (memo_new == NULL) {
1385 PyErr_NoMemory();
1386 return -1;
1387 }
1388 self->memo = memo_new;
1389 for (i = self->memo_size; i < new_size; i++)
1390 self->memo[i] = NULL;
1391 self->memo_size = new_size;
1392 return 0;
1393 }
1394
1395 /* Returns NULL if idx is out of bounds. */
1396 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1397 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1398 {
1399 if (idx >= self->memo_size)
1400 return NULL;
1401
1402 return self->memo[idx];
1403 }
1404
1405 /* Returns -1 (with an exception set) on failure, 0 on success.
1406 This takes its own reference to `value`. */
1407 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1408 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1409 {
1410 PyObject *old_item;
1411
1412 if (idx >= self->memo_size) {
1413 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1414 return -1;
1415 assert(idx < self->memo_size);
1416 }
1417 Py_INCREF(value);
1418 old_item = self->memo[idx];
1419 self->memo[idx] = value;
1420 if (old_item != NULL) {
1421 Py_DECREF(old_item);
1422 }
1423 else {
1424 self->memo_len++;
1425 }
1426 return 0;
1427 }
1428
1429 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1430 _Unpickler_NewMemo(Py_ssize_t new_size)
1431 {
1432 PyObject **memo = PyMem_NEW(PyObject *, new_size);
1433 if (memo == NULL) {
1434 PyErr_NoMemory();
1435 return NULL;
1436 }
1437 memset(memo, 0, new_size * sizeof(PyObject *));
1438 return memo;
1439 }
1440
1441 /* Free the unpickler's memo, taking care to decref any items left in it. */
1442 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1443 _Unpickler_MemoCleanup(UnpicklerObject *self)
1444 {
1445 Py_ssize_t i;
1446 PyObject **memo = self->memo;
1447
1448 if (self->memo == NULL)
1449 return;
1450 self->memo = NULL;
1451 i = self->memo_size;
1452 while (--i >= 0) {
1453 Py_XDECREF(memo[i]);
1454 }
1455 PyMem_FREE(memo);
1456 }
1457
1458 static UnpicklerObject *
_Unpickler_New(void)1459 _Unpickler_New(void)
1460 {
1461 UnpicklerObject *self;
1462
1463 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1464 if (self == NULL)
1465 return NULL;
1466
1467 self->pers_func = NULL;
1468 self->input_buffer = NULL;
1469 self->input_line = NULL;
1470 self->input_len = 0;
1471 self->next_read_idx = 0;
1472 self->prefetched_idx = 0;
1473 self->read = NULL;
1474 self->readline = NULL;
1475 self->peek = NULL;
1476 self->encoding = NULL;
1477 self->errors = NULL;
1478 self->marks = NULL;
1479 self->num_marks = 0;
1480 self->marks_size = 0;
1481 self->proto = 0;
1482 self->fix_imports = 0;
1483 memset(&self->buffer, 0, sizeof(Py_buffer));
1484 self->memo_size = 32;
1485 self->memo_len = 0;
1486 self->memo = _Unpickler_NewMemo(self->memo_size);
1487 self->stack = (Pdata *)Pdata_New();
1488
1489 if (self->memo == NULL || self->stack == NULL) {
1490 Py_DECREF(self);
1491 return NULL;
1492 }
1493
1494 return self;
1495 }
1496
1497 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1498 be called once on a freshly created Pickler. */
1499 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1500 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1501 {
1502 _Py_IDENTIFIER(peek);
1503 _Py_IDENTIFIER(read);
1504 _Py_IDENTIFIER(readline);
1505
1506 if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1507 return -1;
1508 }
1509 (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1510 (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
1511 if (self->readline == NULL || self->read == NULL) {
1512 if (!PyErr_Occurred()) {
1513 PyErr_SetString(PyExc_TypeError,
1514 "file must have 'read' and 'readline' attributes");
1515 }
1516 Py_CLEAR(self->read);
1517 Py_CLEAR(self->readline);
1518 Py_CLEAR(self->peek);
1519 return -1;
1520 }
1521 return 0;
1522 }
1523
1524 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1525 be called once on a freshly created Pickler. */
1526 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1527 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1528 const char *encoding,
1529 const char *errors)
1530 {
1531 if (encoding == NULL)
1532 encoding = "ASCII";
1533 if (errors == NULL)
1534 errors = "strict";
1535
1536 self->encoding = _PyMem_Strdup(encoding);
1537 self->errors = _PyMem_Strdup(errors);
1538 if (self->encoding == NULL || self->errors == NULL) {
1539 PyErr_NoMemory();
1540 return -1;
1541 }
1542 return 0;
1543 }
1544
1545 /* Generate a GET opcode for an object stored in the memo. */
1546 static int
memo_get(PicklerObject * self,PyObject * key)1547 memo_get(PicklerObject *self, PyObject *key)
1548 {
1549 Py_ssize_t *value;
1550 char pdata[30];
1551 Py_ssize_t len;
1552
1553 value = PyMemoTable_Get(self->memo, key);
1554 if (value == NULL) {
1555 PyErr_SetObject(PyExc_KeyError, key);
1556 return -1;
1557 }
1558
1559 if (!self->bin) {
1560 pdata[0] = GET;
1561 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1562 "%" PY_FORMAT_SIZE_T "d\n", *value);
1563 len = strlen(pdata);
1564 }
1565 else {
1566 if (*value < 256) {
1567 pdata[0] = BINGET;
1568 pdata[1] = (unsigned char)(*value & 0xff);
1569 len = 2;
1570 }
1571 else if ((size_t)*value <= 0xffffffffUL) {
1572 pdata[0] = LONG_BINGET;
1573 pdata[1] = (unsigned char)(*value & 0xff);
1574 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1575 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1576 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1577 len = 5;
1578 }
1579 else { /* unlikely */
1580 PickleState *st = _Pickle_GetGlobalState();
1581 PyErr_SetString(st->PicklingError,
1582 "memo id too large for LONG_BINGET");
1583 return -1;
1584 }
1585 }
1586
1587 if (_Pickler_Write(self, pdata, len) < 0)
1588 return -1;
1589
1590 return 0;
1591 }
1592
1593 /* Store an object in the memo, assign it a new unique ID based on the number
1594 of objects currently stored in the memo and generate a PUT opcode. */
1595 static int
memo_put(PicklerObject * self,PyObject * obj)1596 memo_put(PicklerObject *self, PyObject *obj)
1597 {
1598 char pdata[30];
1599 Py_ssize_t len;
1600 Py_ssize_t idx;
1601
1602 const char memoize_op = MEMOIZE;
1603
1604 if (self->fast)
1605 return 0;
1606
1607 idx = PyMemoTable_Size(self->memo);
1608 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1609 return -1;
1610
1611 if (self->proto >= 4) {
1612 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1613 return -1;
1614 return 0;
1615 }
1616 else if (!self->bin) {
1617 pdata[0] = PUT;
1618 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1619 "%" PY_FORMAT_SIZE_T "d\n", idx);
1620 len = strlen(pdata);
1621 }
1622 else {
1623 if (idx < 256) {
1624 pdata[0] = BINPUT;
1625 pdata[1] = (unsigned char)idx;
1626 len = 2;
1627 }
1628 else if ((size_t)idx <= 0xffffffffUL) {
1629 pdata[0] = LONG_BINPUT;
1630 pdata[1] = (unsigned char)(idx & 0xff);
1631 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1632 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1633 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1634 len = 5;
1635 }
1636 else { /* unlikely */
1637 PickleState *st = _Pickle_GetGlobalState();
1638 PyErr_SetString(st->PicklingError,
1639 "memo id too large for LONG_BINPUT");
1640 return -1;
1641 }
1642 }
1643 if (_Pickler_Write(self, pdata, len) < 0)
1644 return -1;
1645
1646 return 0;
1647 }
1648
1649 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1650 get_dotted_path(PyObject *obj, PyObject *name)
1651 {
1652 _Py_static_string(PyId_dot, ".");
1653 PyObject *dotted_path;
1654 Py_ssize_t i, n;
1655
1656 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1657 if (dotted_path == NULL)
1658 return NULL;
1659 n = PyList_GET_SIZE(dotted_path);
1660 assert(n >= 1);
1661 for (i = 0; i < n; i++) {
1662 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1663 if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1664 if (obj == NULL)
1665 PyErr_Format(PyExc_AttributeError,
1666 "Can't pickle local object %R", name);
1667 else
1668 PyErr_Format(PyExc_AttributeError,
1669 "Can't pickle local attribute %R on %R", name, obj);
1670 Py_DECREF(dotted_path);
1671 return NULL;
1672 }
1673 }
1674 return dotted_path;
1675 }
1676
1677 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1678 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1679 {
1680 Py_ssize_t i, n;
1681 PyObject *parent = NULL;
1682
1683 assert(PyList_CheckExact(names));
1684 Py_INCREF(obj);
1685 n = PyList_GET_SIZE(names);
1686 for (i = 0; i < n; i++) {
1687 PyObject *name = PyList_GET_ITEM(names, i);
1688 Py_XDECREF(parent);
1689 parent = obj;
1690 (void)_PyObject_LookupAttr(parent, name, &obj);
1691 if (obj == NULL) {
1692 Py_DECREF(parent);
1693 return NULL;
1694 }
1695 }
1696 if (pparent != NULL)
1697 *pparent = parent;
1698 else
1699 Py_XDECREF(parent);
1700 return obj;
1701 }
1702
1703
1704 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1705 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1706 {
1707 PyObject *dotted_path, *attr;
1708
1709 if (allow_qualname) {
1710 dotted_path = get_dotted_path(obj, name);
1711 if (dotted_path == NULL)
1712 return NULL;
1713 attr = get_deep_attribute(obj, dotted_path, NULL);
1714 Py_DECREF(dotted_path);
1715 }
1716 else {
1717 (void)_PyObject_LookupAttr(obj, name, &attr);
1718 }
1719 if (attr == NULL && !PyErr_Occurred()) {
1720 PyErr_Format(PyExc_AttributeError,
1721 "Can't get attribute %R on %R", name, obj);
1722 }
1723 return attr;
1724 }
1725
1726 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1727 _checkmodule(PyObject *module_name, PyObject *module,
1728 PyObject *global, PyObject *dotted_path)
1729 {
1730 if (module == Py_None) {
1731 return -1;
1732 }
1733 if (PyUnicode_Check(module_name) &&
1734 _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1735 return -1;
1736 }
1737
1738 PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1739 if (candidate == NULL) {
1740 return -1;
1741 }
1742 if (candidate != global) {
1743 Py_DECREF(candidate);
1744 return -1;
1745 }
1746 Py_DECREF(candidate);
1747 return 0;
1748 }
1749
1750 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1751 whichmodule(PyObject *global, PyObject *dotted_path)
1752 {
1753 PyObject *module_name;
1754 PyObject *module = NULL;
1755 Py_ssize_t i;
1756 PyObject *modules;
1757 _Py_IDENTIFIER(__module__);
1758 _Py_IDENTIFIER(modules);
1759 _Py_IDENTIFIER(__main__);
1760
1761 if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1762 return NULL;
1763 }
1764 if (module_name) {
1765 /* In some rare cases (e.g., bound methods of extension types),
1766 __module__ can be None. If it is so, then search sys.modules for
1767 the module of global. */
1768 if (module_name != Py_None)
1769 return module_name;
1770 Py_CLEAR(module_name);
1771 }
1772 assert(module_name == NULL);
1773
1774 /* Fallback on walking sys.modules */
1775 modules = _PySys_GetObjectId(&PyId_modules);
1776 if (modules == NULL) {
1777 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1778 return NULL;
1779 }
1780 if (PyDict_CheckExact(modules)) {
1781 i = 0;
1782 while (PyDict_Next(modules, &i, &module_name, &module)) {
1783 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1784 Py_INCREF(module_name);
1785 return module_name;
1786 }
1787 if (PyErr_Occurred()) {
1788 return NULL;
1789 }
1790 }
1791 }
1792 else {
1793 PyObject *iterator = PyObject_GetIter(modules);
1794 if (iterator == NULL) {
1795 return NULL;
1796 }
1797 while ((module_name = PyIter_Next(iterator))) {
1798 module = PyObject_GetItem(modules, module_name);
1799 if (module == NULL) {
1800 Py_DECREF(module_name);
1801 Py_DECREF(iterator);
1802 return NULL;
1803 }
1804 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1805 Py_DECREF(module);
1806 Py_DECREF(iterator);
1807 return module_name;
1808 }
1809 Py_DECREF(module);
1810 Py_DECREF(module_name);
1811 if (PyErr_Occurred()) {
1812 Py_DECREF(iterator);
1813 return NULL;
1814 }
1815 }
1816 Py_DECREF(iterator);
1817 }
1818
1819 /* If no module is found, use __main__. */
1820 module_name = _PyUnicode_FromId(&PyId___main__);
1821 Py_XINCREF(module_name);
1822 return module_name;
1823 }
1824
1825 /* fast_save_enter() and fast_save_leave() are guards against recursive
1826 objects when Pickler is used with the "fast mode" (i.e., with object
1827 memoization disabled). If the nesting of a list or dict object exceed
1828 FAST_NESTING_LIMIT, these guards will start keeping an internal
1829 reference to the seen list or dict objects and check whether these objects
1830 are recursive. These are not strictly necessary, since save() has a
1831 hard-coded recursion limit, but they give a nicer error message than the
1832 typical RuntimeError. */
1833 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1834 fast_save_enter(PicklerObject *self, PyObject *obj)
1835 {
1836 /* if fast_nesting < 0, we're doing an error exit. */
1837 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1838 PyObject *key = NULL;
1839 if (self->fast_memo == NULL) {
1840 self->fast_memo = PyDict_New();
1841 if (self->fast_memo == NULL) {
1842 self->fast_nesting = -1;
1843 return 0;
1844 }
1845 }
1846 key = PyLong_FromVoidPtr(obj);
1847 if (key == NULL) {
1848 self->fast_nesting = -1;
1849 return 0;
1850 }
1851 if (PyDict_GetItemWithError(self->fast_memo, key)) {
1852 Py_DECREF(key);
1853 PyErr_Format(PyExc_ValueError,
1854 "fast mode: can't pickle cyclic objects "
1855 "including object type %.200s at %p",
1856 obj->ob_type->tp_name, obj);
1857 self->fast_nesting = -1;
1858 return 0;
1859 }
1860 if (PyErr_Occurred()) {
1861 Py_DECREF(key);
1862 self->fast_nesting = -1;
1863 return 0;
1864 }
1865 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1866 Py_DECREF(key);
1867 self->fast_nesting = -1;
1868 return 0;
1869 }
1870 Py_DECREF(key);
1871 }
1872 return 1;
1873 }
1874
1875 static int
fast_save_leave(PicklerObject * self,PyObject * obj)1876 fast_save_leave(PicklerObject *self, PyObject *obj)
1877 {
1878 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1879 PyObject *key = PyLong_FromVoidPtr(obj);
1880 if (key == NULL)
1881 return 0;
1882 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1883 Py_DECREF(key);
1884 return 0;
1885 }
1886 Py_DECREF(key);
1887 }
1888 return 1;
1889 }
1890
1891 static int
save_none(PicklerObject * self,PyObject * obj)1892 save_none(PicklerObject *self, PyObject *obj)
1893 {
1894 const char none_op = NONE;
1895 if (_Pickler_Write(self, &none_op, 1) < 0)
1896 return -1;
1897
1898 return 0;
1899 }
1900
1901 static int
save_bool(PicklerObject * self,PyObject * obj)1902 save_bool(PicklerObject *self, PyObject *obj)
1903 {
1904 if (self->proto >= 2) {
1905 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
1906 if (_Pickler_Write(self, &bool_op, 1) < 0)
1907 return -1;
1908 }
1909 else {
1910 /* These aren't opcodes -- they're ways to pickle bools before protocol 2
1911 * so that unpicklers written before bools were introduced unpickle them
1912 * as ints, but unpicklers after can recognize that bools were intended.
1913 * Note that protocol 2 added direct ways to pickle bools.
1914 */
1915 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
1916 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
1917 return -1;
1918 }
1919 return 0;
1920 }
1921
1922 static int
save_long(PicklerObject * self,PyObject * obj)1923 save_long(PicklerObject *self, PyObject *obj)
1924 {
1925 PyObject *repr = NULL;
1926 Py_ssize_t size;
1927 long val;
1928 int overflow;
1929 int status = 0;
1930
1931 val= PyLong_AsLongAndOverflow(obj, &overflow);
1932 if (!overflow && (sizeof(long) <= 4 ||
1933 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
1934 {
1935 /* result fits in a signed 4-byte integer.
1936
1937 Note: we can't use -0x80000000L in the above condition because some
1938 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
1939 before applying the unary minus when sizeof(long) <= 4. The
1940 resulting value stays unsigned which is commonly not what we want,
1941 so MSVC happily warns us about it. However, that result would have
1942 been fine because we guard for sizeof(long) <= 4 which turns the
1943 condition true in that particular case. */
1944 char pdata[32];
1945 Py_ssize_t len = 0;
1946
1947 if (self->bin) {
1948 pdata[1] = (unsigned char)(val & 0xff);
1949 pdata[2] = (unsigned char)((val >> 8) & 0xff);
1950 pdata[3] = (unsigned char)((val >> 16) & 0xff);
1951 pdata[4] = (unsigned char)((val >> 24) & 0xff);
1952
1953 if ((pdata[4] != 0) || (pdata[3] != 0)) {
1954 pdata[0] = BININT;
1955 len = 5;
1956 }
1957 else if (pdata[2] != 0) {
1958 pdata[0] = BININT2;
1959 len = 3;
1960 }
1961 else {
1962 pdata[0] = BININT1;
1963 len = 2;
1964 }
1965 }
1966 else {
1967 sprintf(pdata, "%c%ld\n", INT, val);
1968 len = strlen(pdata);
1969 }
1970 if (_Pickler_Write(self, pdata, len) < 0)
1971 return -1;
1972
1973 return 0;
1974 }
1975 assert(!PyErr_Occurred());
1976
1977 if (self->proto >= 2) {
1978 /* Linear-time pickling. */
1979 size_t nbits;
1980 size_t nbytes;
1981 unsigned char *pdata;
1982 char header[5];
1983 int i;
1984 int sign = _PyLong_Sign(obj);
1985
1986 if (sign == 0) {
1987 header[0] = LONG1;
1988 header[1] = 0; /* It's 0 -- an empty bytestring. */
1989 if (_Pickler_Write(self, header, 2) < 0)
1990 goto error;
1991 return 0;
1992 }
1993 nbits = _PyLong_NumBits(obj);
1994 if (nbits == (size_t)-1 && PyErr_Occurred())
1995 goto error;
1996 /* How many bytes do we need? There are nbits >> 3 full
1997 * bytes of data, and nbits & 7 leftover bits. If there
1998 * are any leftover bits, then we clearly need another
1999 * byte. Wnat's not so obvious is that we *probably*
2000 * need another byte even if there aren't any leftovers:
2001 * the most-significant bit of the most-significant byte
2002 * acts like a sign bit, and it's usually got a sense
2003 * opposite of the one we need. The exception is ints
2004 * of the form -(2**(8*j-1)) for j > 0. Such an int is
2005 * its own 256's-complement, so has the right sign bit
2006 * even without the extra byte. That's a pain to check
2007 * for in advance, though, so we always grab an extra
2008 * byte at the start, and cut it back later if possible.
2009 */
2010 nbytes = (nbits >> 3) + 1;
2011 if (nbytes > 0x7fffffffL) {
2012 PyErr_SetString(PyExc_OverflowError,
2013 "int too large to pickle");
2014 goto error;
2015 }
2016 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2017 if (repr == NULL)
2018 goto error;
2019 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2020 i = _PyLong_AsByteArray((PyLongObject *)obj,
2021 pdata, nbytes,
2022 1 /* little endian */ , 1 /* signed */ );
2023 if (i < 0)
2024 goto error;
2025 /* If the int is negative, this may be a byte more than
2026 * needed. This is so iff the MSB is all redundant sign
2027 * bits.
2028 */
2029 if (sign < 0 &&
2030 nbytes > 1 &&
2031 pdata[nbytes - 1] == 0xff &&
2032 (pdata[nbytes - 2] & 0x80) != 0) {
2033 nbytes--;
2034 }
2035
2036 if (nbytes < 256) {
2037 header[0] = LONG1;
2038 header[1] = (unsigned char)nbytes;
2039 size = 2;
2040 }
2041 else {
2042 header[0] = LONG4;
2043 size = (Py_ssize_t) nbytes;
2044 for (i = 1; i < 5; i++) {
2045 header[i] = (unsigned char)(size & 0xff);
2046 size >>= 8;
2047 }
2048 size = 5;
2049 }
2050 if (_Pickler_Write(self, header, size) < 0 ||
2051 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2052 goto error;
2053 }
2054 else {
2055 const char long_op = LONG;
2056 const char *string;
2057
2058 /* proto < 2: write the repr and newline. This is quadratic-time (in
2059 the number of digits), in both directions. We add a trailing 'L'
2060 to the repr, for compatibility with Python 2.x. */
2061
2062 repr = PyObject_Repr(obj);
2063 if (repr == NULL)
2064 goto error;
2065
2066 string = PyUnicode_AsUTF8AndSize(repr, &size);
2067 if (string == NULL)
2068 goto error;
2069
2070 if (_Pickler_Write(self, &long_op, 1) < 0 ||
2071 _Pickler_Write(self, string, size) < 0 ||
2072 _Pickler_Write(self, "L\n", 2) < 0)
2073 goto error;
2074 }
2075
2076 if (0) {
2077 error:
2078 status = -1;
2079 }
2080 Py_XDECREF(repr);
2081
2082 return status;
2083 }
2084
2085 static int
save_float(PicklerObject * self,PyObject * obj)2086 save_float(PicklerObject *self, PyObject *obj)
2087 {
2088 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2089
2090 if (self->bin) {
2091 char pdata[9];
2092 pdata[0] = BINFLOAT;
2093 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2094 return -1;
2095 if (_Pickler_Write(self, pdata, 9) < 0)
2096 return -1;
2097 }
2098 else {
2099 int result = -1;
2100 char *buf = NULL;
2101 char op = FLOAT;
2102
2103 if (_Pickler_Write(self, &op, 1) < 0)
2104 goto done;
2105
2106 buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2107 if (!buf) {
2108 PyErr_NoMemory();
2109 goto done;
2110 }
2111
2112 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2113 goto done;
2114
2115 if (_Pickler_Write(self, "\n", 1) < 0)
2116 goto done;
2117
2118 result = 0;
2119 done:
2120 PyMem_Free(buf);
2121 return result;
2122 }
2123
2124 return 0;
2125 }
2126
2127 /* Perform direct write of the header and payload of the binary object.
2128
2129 The large contiguous data is written directly into the underlying file
2130 object, bypassing the output_buffer of the Pickler. We intentionally
2131 do not insert a protocol 4 frame opcode to make it possible to optimize
2132 file.read calls in the loader.
2133 */
2134 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2135 _Pickler_write_bytes(PicklerObject *self,
2136 const char *header, Py_ssize_t header_size,
2137 const char *data, Py_ssize_t data_size,
2138 PyObject *payload)
2139 {
2140 int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2141 int framing = self->framing;
2142
2143 if (bypass_buffer) {
2144 assert(self->output_buffer != NULL);
2145 /* Commit the previous frame. */
2146 if (_Pickler_CommitFrame(self)) {
2147 return -1;
2148 }
2149 /* Disable framing temporarily */
2150 self->framing = 0;
2151 }
2152
2153 if (_Pickler_Write(self, header, header_size) < 0) {
2154 return -1;
2155 }
2156
2157 if (bypass_buffer && self->write != NULL) {
2158 /* Bypass the in-memory buffer to directly stream large data
2159 into the underlying file object. */
2160 PyObject *result, *mem = NULL;
2161 /* Dump the output buffer to the file. */
2162 if (_Pickler_FlushToFile(self) < 0) {
2163 return -1;
2164 }
2165
2166 /* Stream write the payload into the file without going through the
2167 output buffer. */
2168 if (payload == NULL) {
2169 /* TODO: It would be better to use a memoryview with a linked
2170 original string if this is possible. */
2171 payload = mem = PyBytes_FromStringAndSize(data, data_size);
2172 if (payload == NULL) {
2173 return -1;
2174 }
2175 }
2176 result = PyObject_CallFunctionObjArgs(self->write, payload, NULL);
2177 Py_XDECREF(mem);
2178 if (result == NULL) {
2179 return -1;
2180 }
2181 Py_DECREF(result);
2182
2183 /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2184 if (_Pickler_ClearBuffer(self) < 0) {
2185 return -1;
2186 }
2187 }
2188 else {
2189 if (_Pickler_Write(self, data, data_size) < 0) {
2190 return -1;
2191 }
2192 }
2193
2194 /* Re-enable framing for subsequent calls to _Pickler_Write. */
2195 self->framing = framing;
2196
2197 return 0;
2198 }
2199
2200 static int
save_bytes(PicklerObject * self,PyObject * obj)2201 save_bytes(PicklerObject *self, PyObject *obj)
2202 {
2203 if (self->proto < 3) {
2204 /* Older pickle protocols do not have an opcode for pickling bytes
2205 objects. Therefore, we need to fake the copy protocol (i.e.,
2206 the __reduce__ method) to permit bytes object unpickling.
2207
2208 Here we use a hack to be compatible with Python 2. Since in Python
2209 2 'bytes' is just an alias for 'str' (which has different
2210 parameters than the actual bytes object), we use codecs.encode
2211 to create the appropriate 'str' object when unpickled using
2212 Python 2 *and* the appropriate 'bytes' object when unpickled
2213 using Python 3. Again this is a hack and we don't need to do this
2214 with newer protocols. */
2215 PyObject *reduce_value = NULL;
2216 int status;
2217
2218 if (PyBytes_GET_SIZE(obj) == 0) {
2219 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2220 }
2221 else {
2222 PickleState *st = _Pickle_GetGlobalState();
2223 PyObject *unicode_str =
2224 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2225 PyBytes_GET_SIZE(obj),
2226 "strict");
2227 _Py_IDENTIFIER(latin1);
2228
2229 if (unicode_str == NULL)
2230 return -1;
2231 reduce_value = Py_BuildValue("(O(OO))",
2232 st->codecs_encode, unicode_str,
2233 _PyUnicode_FromId(&PyId_latin1));
2234 Py_DECREF(unicode_str);
2235 }
2236
2237 if (reduce_value == NULL)
2238 return -1;
2239
2240 /* save_reduce() will memoize the object automatically. */
2241 status = save_reduce(self, reduce_value, obj);
2242 Py_DECREF(reduce_value);
2243 return status;
2244 }
2245 else {
2246 Py_ssize_t size;
2247 char header[9];
2248 Py_ssize_t len;
2249
2250 size = PyBytes_GET_SIZE(obj);
2251 if (size < 0)
2252 return -1;
2253
2254 if (size <= 0xff) {
2255 header[0] = SHORT_BINBYTES;
2256 header[1] = (unsigned char)size;
2257 len = 2;
2258 }
2259 else if ((size_t)size <= 0xffffffffUL) {
2260 header[0] = BINBYTES;
2261 header[1] = (unsigned char)(size & 0xff);
2262 header[2] = (unsigned char)((size >> 8) & 0xff);
2263 header[3] = (unsigned char)((size >> 16) & 0xff);
2264 header[4] = (unsigned char)((size >> 24) & 0xff);
2265 len = 5;
2266 }
2267 else if (self->proto >= 4) {
2268 header[0] = BINBYTES8;
2269 _write_size64(header + 1, size);
2270 len = 9;
2271 }
2272 else {
2273 PyErr_SetString(PyExc_OverflowError,
2274 "cannot serialize a bytes object larger than 4 GiB");
2275 return -1; /* string too large */
2276 }
2277
2278 if (_Pickler_write_bytes(self, header, len,
2279 PyBytes_AS_STRING(obj), size, obj) < 0)
2280 {
2281 return -1;
2282 }
2283
2284 if (memo_put(self, obj) < 0)
2285 return -1;
2286
2287 return 0;
2288 }
2289 }
2290
2291 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2292 backslash and newline characters to \uXXXX escapes. */
2293 static PyObject *
raw_unicode_escape(PyObject * obj)2294 raw_unicode_escape(PyObject *obj)
2295 {
2296 char *p;
2297 Py_ssize_t i, size;
2298 void *data;
2299 unsigned int kind;
2300 _PyBytesWriter writer;
2301
2302 if (PyUnicode_READY(obj))
2303 return NULL;
2304
2305 _PyBytesWriter_Init(&writer);
2306
2307 size = PyUnicode_GET_LENGTH(obj);
2308 data = PyUnicode_DATA(obj);
2309 kind = PyUnicode_KIND(obj);
2310
2311 p = _PyBytesWriter_Alloc(&writer, size);
2312 if (p == NULL)
2313 goto error;
2314 writer.overallocate = 1;
2315
2316 for (i=0; i < size; i++) {
2317 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2318 /* Map 32-bit characters to '\Uxxxxxxxx' */
2319 if (ch >= 0x10000) {
2320 /* -1: subtract 1 preallocated byte */
2321 p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2322 if (p == NULL)
2323 goto error;
2324
2325 *p++ = '\\';
2326 *p++ = 'U';
2327 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2328 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2329 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2330 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2331 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2332 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2333 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2334 *p++ = Py_hexdigits[ch & 15];
2335 }
2336 /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2337 else if (ch >= 256 || ch == '\\' || ch == '\n') {
2338 /* -1: subtract 1 preallocated byte */
2339 p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2340 if (p == NULL)
2341 goto error;
2342
2343 *p++ = '\\';
2344 *p++ = 'u';
2345 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2346 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2347 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2348 *p++ = Py_hexdigits[ch & 15];
2349 }
2350 /* Copy everything else as-is */
2351 else
2352 *p++ = (char) ch;
2353 }
2354
2355 return _PyBytesWriter_Finish(&writer, p);
2356
2357 error:
2358 _PyBytesWriter_Dealloc(&writer);
2359 return NULL;
2360 }
2361
2362 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2363 write_unicode_binary(PicklerObject *self, PyObject *obj)
2364 {
2365 char header[9];
2366 Py_ssize_t len;
2367 PyObject *encoded = NULL;
2368 Py_ssize_t size;
2369 const char *data;
2370
2371 if (PyUnicode_READY(obj))
2372 return -1;
2373
2374 data = PyUnicode_AsUTF8AndSize(obj, &size);
2375 if (data == NULL) {
2376 /* Issue #8383: for strings with lone surrogates, fallback on the
2377 "surrogatepass" error handler. */
2378 PyErr_Clear();
2379 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2380 if (encoded == NULL)
2381 return -1;
2382
2383 data = PyBytes_AS_STRING(encoded);
2384 size = PyBytes_GET_SIZE(encoded);
2385 }
2386
2387 assert(size >= 0);
2388 if (size <= 0xff && self->proto >= 4) {
2389 header[0] = SHORT_BINUNICODE;
2390 header[1] = (unsigned char)(size & 0xff);
2391 len = 2;
2392 }
2393 else if ((size_t)size <= 0xffffffffUL) {
2394 header[0] = BINUNICODE;
2395 header[1] = (unsigned char)(size & 0xff);
2396 header[2] = (unsigned char)((size >> 8) & 0xff);
2397 header[3] = (unsigned char)((size >> 16) & 0xff);
2398 header[4] = (unsigned char)((size >> 24) & 0xff);
2399 len = 5;
2400 }
2401 else if (self->proto >= 4) {
2402 header[0] = BINUNICODE8;
2403 _write_size64(header + 1, size);
2404 len = 9;
2405 }
2406 else {
2407 PyErr_SetString(PyExc_OverflowError,
2408 "cannot serialize a string larger than 4GiB");
2409 Py_XDECREF(encoded);
2410 return -1;
2411 }
2412
2413 if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2414 Py_XDECREF(encoded);
2415 return -1;
2416 }
2417 Py_XDECREF(encoded);
2418 return 0;
2419 }
2420
2421 static int
save_unicode(PicklerObject * self,PyObject * obj)2422 save_unicode(PicklerObject *self, PyObject *obj)
2423 {
2424 if (self->bin) {
2425 if (write_unicode_binary(self, obj) < 0)
2426 return -1;
2427 }
2428 else {
2429 PyObject *encoded;
2430 Py_ssize_t size;
2431 const char unicode_op = UNICODE;
2432
2433 encoded = raw_unicode_escape(obj);
2434 if (encoded == NULL)
2435 return -1;
2436
2437 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2438 Py_DECREF(encoded);
2439 return -1;
2440 }
2441
2442 size = PyBytes_GET_SIZE(encoded);
2443 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2444 Py_DECREF(encoded);
2445 return -1;
2446 }
2447 Py_DECREF(encoded);
2448
2449 if (_Pickler_Write(self, "\n", 1) < 0)
2450 return -1;
2451 }
2452 if (memo_put(self, obj) < 0)
2453 return -1;
2454
2455 return 0;
2456 }
2457
2458 /* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2459 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2460 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2461 {
2462 Py_ssize_t i;
2463
2464 assert(PyTuple_Size(t) == len);
2465
2466 for (i = 0; i < len; i++) {
2467 PyObject *element = PyTuple_GET_ITEM(t, i);
2468
2469 if (element == NULL)
2470 return -1;
2471 if (save(self, element, 0) < 0)
2472 return -1;
2473 }
2474
2475 return 0;
2476 }
2477
2478 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2479 * used across protocols to minimize the space needed to pickle them.
2480 * Tuples are also the only builtin immutable type that can be recursive
2481 * (a tuple can be reached from itself), and that requires some subtle
2482 * magic so that it works in all cases. IOW, this is a long routine.
2483 */
2484 static int
save_tuple(PicklerObject * self,PyObject * obj)2485 save_tuple(PicklerObject *self, PyObject *obj)
2486 {
2487 Py_ssize_t len, i;
2488
2489 const char mark_op = MARK;
2490 const char tuple_op = TUPLE;
2491 const char pop_op = POP;
2492 const char pop_mark_op = POP_MARK;
2493 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2494
2495 if ((len = PyTuple_Size(obj)) < 0)
2496 return -1;
2497
2498 if (len == 0) {
2499 char pdata[2];
2500
2501 if (self->proto) {
2502 pdata[0] = EMPTY_TUPLE;
2503 len = 1;
2504 }
2505 else {
2506 pdata[0] = MARK;
2507 pdata[1] = TUPLE;
2508 len = 2;
2509 }
2510 if (_Pickler_Write(self, pdata, len) < 0)
2511 return -1;
2512 return 0;
2513 }
2514
2515 /* The tuple isn't in the memo now. If it shows up there after
2516 * saving the tuple elements, the tuple must be recursive, in
2517 * which case we'll pop everything we put on the stack, and fetch
2518 * its value from the memo.
2519 */
2520 if (len <= 3 && self->proto >= 2) {
2521 /* Use TUPLE{1,2,3} opcodes. */
2522 if (store_tuple_elements(self, obj, len) < 0)
2523 return -1;
2524
2525 if (PyMemoTable_Get(self->memo, obj)) {
2526 /* pop the len elements */
2527 for (i = 0; i < len; i++)
2528 if (_Pickler_Write(self, &pop_op, 1) < 0)
2529 return -1;
2530 /* fetch from memo */
2531 if (memo_get(self, obj) < 0)
2532 return -1;
2533
2534 return 0;
2535 }
2536 else { /* Not recursive. */
2537 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2538 return -1;
2539 }
2540 goto memoize;
2541 }
2542
2543 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2544 * Generate MARK e1 e2 ... TUPLE
2545 */
2546 if (_Pickler_Write(self, &mark_op, 1) < 0)
2547 return -1;
2548
2549 if (store_tuple_elements(self, obj, len) < 0)
2550 return -1;
2551
2552 if (PyMemoTable_Get(self->memo, obj)) {
2553 /* pop the stack stuff we pushed */
2554 if (self->bin) {
2555 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2556 return -1;
2557 }
2558 else {
2559 /* Note that we pop one more than len, to remove
2560 * the MARK too.
2561 */
2562 for (i = 0; i <= len; i++)
2563 if (_Pickler_Write(self, &pop_op, 1) < 0)
2564 return -1;
2565 }
2566 /* fetch from memo */
2567 if (memo_get(self, obj) < 0)
2568 return -1;
2569
2570 return 0;
2571 }
2572 else { /* Not recursive. */
2573 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2574 return -1;
2575 }
2576
2577 memoize:
2578 if (memo_put(self, obj) < 0)
2579 return -1;
2580
2581 return 0;
2582 }
2583
2584 /* iter is an iterator giving items, and we batch up chunks of
2585 * MARK item item ... item APPENDS
2586 * opcode sequences. Calling code should have arranged to first create an
2587 * empty list, or list-like object, for the APPENDS to operate on.
2588 * Returns 0 on success, <0 on error.
2589 */
2590 static int
batch_list(PicklerObject * self,PyObject * iter)2591 batch_list(PicklerObject *self, PyObject *iter)
2592 {
2593 PyObject *obj = NULL;
2594 PyObject *firstitem = NULL;
2595 int i, n;
2596
2597 const char mark_op = MARK;
2598 const char append_op = APPEND;
2599 const char appends_op = APPENDS;
2600
2601 assert(iter != NULL);
2602
2603 /* XXX: I think this function could be made faster by avoiding the
2604 iterator interface and fetching objects directly from list using
2605 PyList_GET_ITEM.
2606 */
2607
2608 if (self->proto == 0) {
2609 /* APPENDS isn't available; do one at a time. */
2610 for (;;) {
2611 obj = PyIter_Next(iter);
2612 if (obj == NULL) {
2613 if (PyErr_Occurred())
2614 return -1;
2615 break;
2616 }
2617 i = save(self, obj, 0);
2618 Py_DECREF(obj);
2619 if (i < 0)
2620 return -1;
2621 if (_Pickler_Write(self, &append_op, 1) < 0)
2622 return -1;
2623 }
2624 return 0;
2625 }
2626
2627 /* proto > 0: write in batches of BATCHSIZE. */
2628 do {
2629 /* Get first item */
2630 firstitem = PyIter_Next(iter);
2631 if (firstitem == NULL) {
2632 if (PyErr_Occurred())
2633 goto error;
2634
2635 /* nothing more to add */
2636 break;
2637 }
2638
2639 /* Try to get a second item */
2640 obj = PyIter_Next(iter);
2641 if (obj == NULL) {
2642 if (PyErr_Occurred())
2643 goto error;
2644
2645 /* Only one item to write */
2646 if (save(self, firstitem, 0) < 0)
2647 goto error;
2648 if (_Pickler_Write(self, &append_op, 1) < 0)
2649 goto error;
2650 Py_CLEAR(firstitem);
2651 break;
2652 }
2653
2654 /* More than one item to write */
2655
2656 /* Pump out MARK, items, APPENDS. */
2657 if (_Pickler_Write(self, &mark_op, 1) < 0)
2658 goto error;
2659
2660 if (save(self, firstitem, 0) < 0)
2661 goto error;
2662 Py_CLEAR(firstitem);
2663 n = 1;
2664
2665 /* Fetch and save up to BATCHSIZE items */
2666 while (obj) {
2667 if (save(self, obj, 0) < 0)
2668 goto error;
2669 Py_CLEAR(obj);
2670 n += 1;
2671
2672 if (n == BATCHSIZE)
2673 break;
2674
2675 obj = PyIter_Next(iter);
2676 if (obj == NULL) {
2677 if (PyErr_Occurred())
2678 goto error;
2679 break;
2680 }
2681 }
2682
2683 if (_Pickler_Write(self, &appends_op, 1) < 0)
2684 goto error;
2685
2686 } while (n == BATCHSIZE);
2687 return 0;
2688
2689 error:
2690 Py_XDECREF(firstitem);
2691 Py_XDECREF(obj);
2692 return -1;
2693 }
2694
2695 /* This is a variant of batch_list() above, specialized for lists (with no
2696 * support for list subclasses). Like batch_list(), we batch up chunks of
2697 * MARK item item ... item APPENDS
2698 * opcode sequences. Calling code should have arranged to first create an
2699 * empty list, or list-like object, for the APPENDS to operate on.
2700 * Returns 0 on success, -1 on error.
2701 *
2702 * This version is considerably faster than batch_list(), if less general.
2703 *
2704 * Note that this only works for protocols > 0.
2705 */
2706 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2707 batch_list_exact(PicklerObject *self, PyObject *obj)
2708 {
2709 PyObject *item = NULL;
2710 Py_ssize_t this_batch, total;
2711
2712 const char append_op = APPEND;
2713 const char appends_op = APPENDS;
2714 const char mark_op = MARK;
2715
2716 assert(obj != NULL);
2717 assert(self->proto > 0);
2718 assert(PyList_CheckExact(obj));
2719
2720 if (PyList_GET_SIZE(obj) == 1) {
2721 item = PyList_GET_ITEM(obj, 0);
2722 if (save(self, item, 0) < 0)
2723 return -1;
2724 if (_Pickler_Write(self, &append_op, 1) < 0)
2725 return -1;
2726 return 0;
2727 }
2728
2729 /* Write in batches of BATCHSIZE. */
2730 total = 0;
2731 do {
2732 this_batch = 0;
2733 if (_Pickler_Write(self, &mark_op, 1) < 0)
2734 return -1;
2735 while (total < PyList_GET_SIZE(obj)) {
2736 item = PyList_GET_ITEM(obj, total);
2737 if (save(self, item, 0) < 0)
2738 return -1;
2739 total++;
2740 if (++this_batch == BATCHSIZE)
2741 break;
2742 }
2743 if (_Pickler_Write(self, &appends_op, 1) < 0)
2744 return -1;
2745
2746 } while (total < PyList_GET_SIZE(obj));
2747
2748 return 0;
2749 }
2750
2751 static int
save_list(PicklerObject * self,PyObject * obj)2752 save_list(PicklerObject *self, PyObject *obj)
2753 {
2754 char header[3];
2755 Py_ssize_t len;
2756 int status = 0;
2757
2758 if (self->fast && !fast_save_enter(self, obj))
2759 goto error;
2760
2761 /* Create an empty list. */
2762 if (self->bin) {
2763 header[0] = EMPTY_LIST;
2764 len = 1;
2765 }
2766 else {
2767 header[0] = MARK;
2768 header[1] = LIST;
2769 len = 2;
2770 }
2771
2772 if (_Pickler_Write(self, header, len) < 0)
2773 goto error;
2774
2775 /* Get list length, and bow out early if empty. */
2776 if ((len = PyList_Size(obj)) < 0)
2777 goto error;
2778
2779 if (memo_put(self, obj) < 0)
2780 goto error;
2781
2782 if (len != 0) {
2783 /* Materialize the list elements. */
2784 if (PyList_CheckExact(obj) && self->proto > 0) {
2785 if (Py_EnterRecursiveCall(" while pickling an object"))
2786 goto error;
2787 status = batch_list_exact(self, obj);
2788 Py_LeaveRecursiveCall();
2789 } else {
2790 PyObject *iter = PyObject_GetIter(obj);
2791 if (iter == NULL)
2792 goto error;
2793
2794 if (Py_EnterRecursiveCall(" while pickling an object")) {
2795 Py_DECREF(iter);
2796 goto error;
2797 }
2798 status = batch_list(self, iter);
2799 Py_LeaveRecursiveCall();
2800 Py_DECREF(iter);
2801 }
2802 }
2803 if (0) {
2804 error:
2805 status = -1;
2806 }
2807
2808 if (self->fast && !fast_save_leave(self, obj))
2809 status = -1;
2810
2811 return status;
2812 }
2813
2814 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2815 * MARK key value ... key value SETITEMS
2816 * opcode sequences. Calling code should have arranged to first create an
2817 * empty dict, or dict-like object, for the SETITEMS to operate on.
2818 * Returns 0 on success, <0 on error.
2819 *
2820 * This is very much like batch_list(). The difference between saving
2821 * elements directly, and picking apart two-tuples, is so long-winded at
2822 * the C level, though, that attempts to combine these routines were too
2823 * ugly to bear.
2824 */
2825 static int
batch_dict(PicklerObject * self,PyObject * iter)2826 batch_dict(PicklerObject *self, PyObject *iter)
2827 {
2828 PyObject *obj = NULL;
2829 PyObject *firstitem = NULL;
2830 int i, n;
2831
2832 const char mark_op = MARK;
2833 const char setitem_op = SETITEM;
2834 const char setitems_op = SETITEMS;
2835
2836 assert(iter != NULL);
2837
2838 if (self->proto == 0) {
2839 /* SETITEMS isn't available; do one at a time. */
2840 for (;;) {
2841 obj = PyIter_Next(iter);
2842 if (obj == NULL) {
2843 if (PyErr_Occurred())
2844 return -1;
2845 break;
2846 }
2847 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2848 PyErr_SetString(PyExc_TypeError, "dict items "
2849 "iterator must return 2-tuples");
2850 return -1;
2851 }
2852 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2853 if (i >= 0)
2854 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2855 Py_DECREF(obj);
2856 if (i < 0)
2857 return -1;
2858 if (_Pickler_Write(self, &setitem_op, 1) < 0)
2859 return -1;
2860 }
2861 return 0;
2862 }
2863
2864 /* proto > 0: write in batches of BATCHSIZE. */
2865 do {
2866 /* Get first item */
2867 firstitem = PyIter_Next(iter);
2868 if (firstitem == NULL) {
2869 if (PyErr_Occurred())
2870 goto error;
2871
2872 /* nothing more to add */
2873 break;
2874 }
2875 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2876 PyErr_SetString(PyExc_TypeError, "dict items "
2877 "iterator must return 2-tuples");
2878 goto error;
2879 }
2880
2881 /* Try to get a second item */
2882 obj = PyIter_Next(iter);
2883 if (obj == NULL) {
2884 if (PyErr_Occurred())
2885 goto error;
2886
2887 /* Only one item to write */
2888 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2889 goto error;
2890 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2891 goto error;
2892 if (_Pickler_Write(self, &setitem_op, 1) < 0)
2893 goto error;
2894 Py_CLEAR(firstitem);
2895 break;
2896 }
2897
2898 /* More than one item to write */
2899
2900 /* Pump out MARK, items, SETITEMS. */
2901 if (_Pickler_Write(self, &mark_op, 1) < 0)
2902 goto error;
2903
2904 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2905 goto error;
2906 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2907 goto error;
2908 Py_CLEAR(firstitem);
2909 n = 1;
2910
2911 /* Fetch and save up to BATCHSIZE items */
2912 while (obj) {
2913 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2914 PyErr_SetString(PyExc_TypeError, "dict items "
2915 "iterator must return 2-tuples");
2916 goto error;
2917 }
2918 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2919 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2920 goto error;
2921 Py_CLEAR(obj);
2922 n += 1;
2923
2924 if (n == BATCHSIZE)
2925 break;
2926
2927 obj = PyIter_Next(iter);
2928 if (obj == NULL) {
2929 if (PyErr_Occurred())
2930 goto error;
2931 break;
2932 }
2933 }
2934
2935 if (_Pickler_Write(self, &setitems_op, 1) < 0)
2936 goto error;
2937
2938 } while (n == BATCHSIZE);
2939 return 0;
2940
2941 error:
2942 Py_XDECREF(firstitem);
2943 Py_XDECREF(obj);
2944 return -1;
2945 }
2946
2947 /* This is a variant of batch_dict() above that specializes for dicts, with no
2948 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2949 * MARK key value ... key value SETITEMS
2950 * opcode sequences. Calling code should have arranged to first create an
2951 * empty dict, or dict-like object, for the SETITEMS to operate on.
2952 * Returns 0 on success, -1 on error.
2953 *
2954 * Note that this currently doesn't work for protocol 0.
2955 */
2956 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)2957 batch_dict_exact(PicklerObject *self, PyObject *obj)
2958 {
2959 PyObject *key = NULL, *value = NULL;
2960 int i;
2961 Py_ssize_t dict_size, ppos = 0;
2962
2963 const char mark_op = MARK;
2964 const char setitem_op = SETITEM;
2965 const char setitems_op = SETITEMS;
2966
2967 assert(obj != NULL && PyDict_CheckExact(obj));
2968 assert(self->proto > 0);
2969
2970 dict_size = PyDict_GET_SIZE(obj);
2971
2972 /* Special-case len(d) == 1 to save space. */
2973 if (dict_size == 1) {
2974 PyDict_Next(obj, &ppos, &key, &value);
2975 if (save(self, key, 0) < 0)
2976 return -1;
2977 if (save(self, value, 0) < 0)
2978 return -1;
2979 if (_Pickler_Write(self, &setitem_op, 1) < 0)
2980 return -1;
2981 return 0;
2982 }
2983
2984 /* Write in batches of BATCHSIZE. */
2985 do {
2986 i = 0;
2987 if (_Pickler_Write(self, &mark_op, 1) < 0)
2988 return -1;
2989 while (PyDict_Next(obj, &ppos, &key, &value)) {
2990 if (save(self, key, 0) < 0)
2991 return -1;
2992 if (save(self, value, 0) < 0)
2993 return -1;
2994 if (++i == BATCHSIZE)
2995 break;
2996 }
2997 if (_Pickler_Write(self, &setitems_op, 1) < 0)
2998 return -1;
2999 if (PyDict_GET_SIZE(obj) != dict_size) {
3000 PyErr_Format(
3001 PyExc_RuntimeError,
3002 "dictionary changed size during iteration");
3003 return -1;
3004 }
3005
3006 } while (i == BATCHSIZE);
3007 return 0;
3008 }
3009
3010 static int
save_dict(PicklerObject * self,PyObject * obj)3011 save_dict(PicklerObject *self, PyObject *obj)
3012 {
3013 PyObject *items, *iter;
3014 char header[3];
3015 Py_ssize_t len;
3016 int status = 0;
3017 assert(PyDict_Check(obj));
3018
3019 if (self->fast && !fast_save_enter(self, obj))
3020 goto error;
3021
3022 /* Create an empty dict. */
3023 if (self->bin) {
3024 header[0] = EMPTY_DICT;
3025 len = 1;
3026 }
3027 else {
3028 header[0] = MARK;
3029 header[1] = DICT;
3030 len = 2;
3031 }
3032
3033 if (_Pickler_Write(self, header, len) < 0)
3034 goto error;
3035
3036 if (memo_put(self, obj) < 0)
3037 goto error;
3038
3039 if (PyDict_GET_SIZE(obj)) {
3040 /* Save the dict items. */
3041 if (PyDict_CheckExact(obj) && self->proto > 0) {
3042 /* We can take certain shortcuts if we know this is a dict and
3043 not a dict subclass. */
3044 if (Py_EnterRecursiveCall(" while pickling an object"))
3045 goto error;
3046 status = batch_dict_exact(self, obj);
3047 Py_LeaveRecursiveCall();
3048 } else {
3049 _Py_IDENTIFIER(items);
3050
3051 items = _PyObject_CallMethodId(obj, &PyId_items, NULL);
3052 if (items == NULL)
3053 goto error;
3054 iter = PyObject_GetIter(items);
3055 Py_DECREF(items);
3056 if (iter == NULL)
3057 goto error;
3058 if (Py_EnterRecursiveCall(" while pickling an object")) {
3059 Py_DECREF(iter);
3060 goto error;
3061 }
3062 status = batch_dict(self, iter);
3063 Py_LeaveRecursiveCall();
3064 Py_DECREF(iter);
3065 }
3066 }
3067
3068 if (0) {
3069 error:
3070 status = -1;
3071 }
3072
3073 if (self->fast && !fast_save_leave(self, obj))
3074 status = -1;
3075
3076 return status;
3077 }
3078
3079 static int
save_set(PicklerObject * self,PyObject * obj)3080 save_set(PicklerObject *self, PyObject *obj)
3081 {
3082 PyObject *item;
3083 int i;
3084 Py_ssize_t set_size, ppos = 0;
3085 Py_hash_t hash;
3086
3087 const char empty_set_op = EMPTY_SET;
3088 const char mark_op = MARK;
3089 const char additems_op = ADDITEMS;
3090
3091 if (self->proto < 4) {
3092 PyObject *items;
3093 PyObject *reduce_value;
3094 int status;
3095
3096 items = PySequence_List(obj);
3097 if (items == NULL) {
3098 return -1;
3099 }
3100 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3101 Py_DECREF(items);
3102 if (reduce_value == NULL) {
3103 return -1;
3104 }
3105 /* save_reduce() will memoize the object automatically. */
3106 status = save_reduce(self, reduce_value, obj);
3107 Py_DECREF(reduce_value);
3108 return status;
3109 }
3110
3111 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3112 return -1;
3113
3114 if (memo_put(self, obj) < 0)
3115 return -1;
3116
3117 set_size = PySet_GET_SIZE(obj);
3118 if (set_size == 0)
3119 return 0; /* nothing to do */
3120
3121 /* Write in batches of BATCHSIZE. */
3122 do {
3123 i = 0;
3124 if (_Pickler_Write(self, &mark_op, 1) < 0)
3125 return -1;
3126 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3127 if (save(self, item, 0) < 0)
3128 return -1;
3129 if (++i == BATCHSIZE)
3130 break;
3131 }
3132 if (_Pickler_Write(self, &additems_op, 1) < 0)
3133 return -1;
3134 if (PySet_GET_SIZE(obj) != set_size) {
3135 PyErr_Format(
3136 PyExc_RuntimeError,
3137 "set changed size during iteration");
3138 return -1;
3139 }
3140 } while (i == BATCHSIZE);
3141
3142 return 0;
3143 }
3144
3145 static int
save_frozenset(PicklerObject * self,PyObject * obj)3146 save_frozenset(PicklerObject *self, PyObject *obj)
3147 {
3148 PyObject *iter;
3149
3150 const char mark_op = MARK;
3151 const char frozenset_op = FROZENSET;
3152
3153 if (self->fast && !fast_save_enter(self, obj))
3154 return -1;
3155
3156 if (self->proto < 4) {
3157 PyObject *items;
3158 PyObject *reduce_value;
3159 int status;
3160
3161 items = PySequence_List(obj);
3162 if (items == NULL) {
3163 return -1;
3164 }
3165 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3166 items);
3167 Py_DECREF(items);
3168 if (reduce_value == NULL) {
3169 return -1;
3170 }
3171 /* save_reduce() will memoize the object automatically. */
3172 status = save_reduce(self, reduce_value, obj);
3173 Py_DECREF(reduce_value);
3174 return status;
3175 }
3176
3177 if (_Pickler_Write(self, &mark_op, 1) < 0)
3178 return -1;
3179
3180 iter = PyObject_GetIter(obj);
3181 if (iter == NULL) {
3182 return -1;
3183 }
3184 for (;;) {
3185 PyObject *item;
3186
3187 item = PyIter_Next(iter);
3188 if (item == NULL) {
3189 if (PyErr_Occurred()) {
3190 Py_DECREF(iter);
3191 return -1;
3192 }
3193 break;
3194 }
3195 if (save(self, item, 0) < 0) {
3196 Py_DECREF(item);
3197 Py_DECREF(iter);
3198 return -1;
3199 }
3200 Py_DECREF(item);
3201 }
3202 Py_DECREF(iter);
3203
3204 /* If the object is already in the memo, this means it is
3205 recursive. In this case, throw away everything we put on the
3206 stack, and fetch the object back from the memo. */
3207 if (PyMemoTable_Get(self->memo, obj)) {
3208 const char pop_mark_op = POP_MARK;
3209
3210 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3211 return -1;
3212 if (memo_get(self, obj) < 0)
3213 return -1;
3214 return 0;
3215 }
3216
3217 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3218 return -1;
3219 if (memo_put(self, obj) < 0)
3220 return -1;
3221
3222 return 0;
3223 }
3224
3225 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3226 fix_imports(PyObject **module_name, PyObject **global_name)
3227 {
3228 PyObject *key;
3229 PyObject *item;
3230 PickleState *st = _Pickle_GetGlobalState();
3231
3232 key = PyTuple_Pack(2, *module_name, *global_name);
3233 if (key == NULL)
3234 return -1;
3235 item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3236 Py_DECREF(key);
3237 if (item) {
3238 PyObject *fixed_module_name;
3239 PyObject *fixed_global_name;
3240
3241 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3242 PyErr_Format(PyExc_RuntimeError,
3243 "_compat_pickle.REVERSE_NAME_MAPPING values "
3244 "should be 2-tuples, not %.200s",
3245 Py_TYPE(item)->tp_name);
3246 return -1;
3247 }
3248 fixed_module_name = PyTuple_GET_ITEM(item, 0);
3249 fixed_global_name = PyTuple_GET_ITEM(item, 1);
3250 if (!PyUnicode_Check(fixed_module_name) ||
3251 !PyUnicode_Check(fixed_global_name)) {
3252 PyErr_Format(PyExc_RuntimeError,
3253 "_compat_pickle.REVERSE_NAME_MAPPING values "
3254 "should be pairs of str, not (%.200s, %.200s)",
3255 Py_TYPE(fixed_module_name)->tp_name,
3256 Py_TYPE(fixed_global_name)->tp_name);
3257 return -1;
3258 }
3259
3260 Py_CLEAR(*module_name);
3261 Py_CLEAR(*global_name);
3262 Py_INCREF(fixed_module_name);
3263 Py_INCREF(fixed_global_name);
3264 *module_name = fixed_module_name;
3265 *global_name = fixed_global_name;
3266 return 0;
3267 }
3268 else if (PyErr_Occurred()) {
3269 return -1;
3270 }
3271
3272 item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3273 if (item) {
3274 if (!PyUnicode_Check(item)) {
3275 PyErr_Format(PyExc_RuntimeError,
3276 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3277 "should be strings, not %.200s",
3278 Py_TYPE(item)->tp_name);
3279 return -1;
3280 }
3281 Py_INCREF(item);
3282 Py_XSETREF(*module_name, item);
3283 }
3284 else if (PyErr_Occurred()) {
3285 return -1;
3286 }
3287
3288 return 0;
3289 }
3290
3291 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3292 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3293 {
3294 PyObject *global_name = NULL;
3295 PyObject *module_name = NULL;
3296 PyObject *module = NULL;
3297 PyObject *parent = NULL;
3298 PyObject *dotted_path = NULL;
3299 PyObject *lastname = NULL;
3300 PyObject *cls;
3301 PickleState *st = _Pickle_GetGlobalState();
3302 int status = 0;
3303 _Py_IDENTIFIER(__name__);
3304 _Py_IDENTIFIER(__qualname__);
3305
3306 const char global_op = GLOBAL;
3307
3308 if (name) {
3309 Py_INCREF(name);
3310 global_name = name;
3311 }
3312 else {
3313 if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3314 goto error;
3315 if (global_name == NULL) {
3316 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3317 if (global_name == NULL)
3318 goto error;
3319 }
3320 }
3321
3322 dotted_path = get_dotted_path(module, global_name);
3323 if (dotted_path == NULL)
3324 goto error;
3325 module_name = whichmodule(obj, dotted_path);
3326 if (module_name == NULL)
3327 goto error;
3328
3329 /* XXX: Change to use the import C API directly with level=0 to disallow
3330 relative imports.
3331
3332 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3333 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3334 custom import functions (IMHO, this would be a nice security
3335 feature). The import C API would need to be extended to support the
3336 extra parameters of __import__ to fix that. */
3337 module = PyImport_Import(module_name);
3338 if (module == NULL) {
3339 PyErr_Format(st->PicklingError,
3340 "Can't pickle %R: import of module %R failed",
3341 obj, module_name);
3342 goto error;
3343 }
3344 lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3345 Py_INCREF(lastname);
3346 cls = get_deep_attribute(module, dotted_path, &parent);
3347 Py_CLEAR(dotted_path);
3348 if (cls == NULL) {
3349 PyErr_Format(st->PicklingError,
3350 "Can't pickle %R: attribute lookup %S on %S failed",
3351 obj, global_name, module_name);
3352 goto error;
3353 }
3354 if (cls != obj) {
3355 Py_DECREF(cls);
3356 PyErr_Format(st->PicklingError,
3357 "Can't pickle %R: it's not the same object as %S.%S",
3358 obj, module_name, global_name);
3359 goto error;
3360 }
3361 Py_DECREF(cls);
3362
3363 if (self->proto >= 2) {
3364 /* See whether this is in the extension registry, and if
3365 * so generate an EXT opcode.
3366 */
3367 PyObject *extension_key;
3368 PyObject *code_obj; /* extension code as Python object */
3369 long code; /* extension code as C value */
3370 char pdata[5];
3371 Py_ssize_t n;
3372
3373 extension_key = PyTuple_Pack(2, module_name, global_name);
3374 if (extension_key == NULL) {
3375 goto error;
3376 }
3377 code_obj = PyDict_GetItemWithError(st->extension_registry,
3378 extension_key);
3379 Py_DECREF(extension_key);
3380 /* The object is not registered in the extension registry.
3381 This is the most likely code path. */
3382 if (code_obj == NULL) {
3383 if (PyErr_Occurred()) {
3384 goto error;
3385 }
3386 goto gen_global;
3387 }
3388
3389 /* XXX: pickle.py doesn't check neither the type, nor the range
3390 of the value returned by the extension_registry. It should for
3391 consistency. */
3392
3393 /* Verify code_obj has the right type and value. */
3394 if (!PyLong_Check(code_obj)) {
3395 PyErr_Format(st->PicklingError,
3396 "Can't pickle %R: extension code %R isn't an integer",
3397 obj, code_obj);
3398 goto error;
3399 }
3400 code = PyLong_AS_LONG(code_obj);
3401 if (code <= 0 || code > 0x7fffffffL) {
3402 if (!PyErr_Occurred())
3403 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3404 "code %ld is out of range", obj, code);
3405 goto error;
3406 }
3407
3408 /* Generate an EXT opcode. */
3409 if (code <= 0xff) {
3410 pdata[0] = EXT1;
3411 pdata[1] = (unsigned char)code;
3412 n = 2;
3413 }
3414 else if (code <= 0xffff) {
3415 pdata[0] = EXT2;
3416 pdata[1] = (unsigned char)(code & 0xff);
3417 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3418 n = 3;
3419 }
3420 else {
3421 pdata[0] = EXT4;
3422 pdata[1] = (unsigned char)(code & 0xff);
3423 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3424 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3425 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3426 n = 5;
3427 }
3428
3429 if (_Pickler_Write(self, pdata, n) < 0)
3430 goto error;
3431 }
3432 else {
3433 gen_global:
3434 if (parent == module) {
3435 Py_INCREF(lastname);
3436 Py_DECREF(global_name);
3437 global_name = lastname;
3438 }
3439 if (self->proto >= 4) {
3440 const char stack_global_op = STACK_GLOBAL;
3441
3442 if (save(self, module_name, 0) < 0)
3443 goto error;
3444 if (save(self, global_name, 0) < 0)
3445 goto error;
3446
3447 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3448 goto error;
3449 }
3450 else if (parent != module) {
3451 PickleState *st = _Pickle_GetGlobalState();
3452 PyObject *reduce_value = Py_BuildValue("(O(OO))",
3453 st->getattr, parent, lastname);
3454 if (reduce_value == NULL)
3455 goto error;
3456 status = save_reduce(self, reduce_value, NULL);
3457 Py_DECREF(reduce_value);
3458 if (status < 0)
3459 goto error;
3460 }
3461 else {
3462 /* Generate a normal global opcode if we are using a pickle
3463 protocol < 4, or if the object is not registered in the
3464 extension registry. */
3465 PyObject *encoded;
3466 PyObject *(*unicode_encoder)(PyObject *);
3467
3468 if (_Pickler_Write(self, &global_op, 1) < 0)
3469 goto error;
3470
3471 /* For protocol < 3 and if the user didn't request against doing
3472 so, we convert module names to the old 2.x module names. */
3473 if (self->proto < 3 && self->fix_imports) {
3474 if (fix_imports(&module_name, &global_name) < 0) {
3475 goto error;
3476 }
3477 }
3478
3479 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3480 both the module name and the global name using UTF-8. We do so
3481 only when we are using the pickle protocol newer than version
3482 3. This is to ensure compatibility with older Unpickler running
3483 on Python 2.x. */
3484 if (self->proto == 3) {
3485 unicode_encoder = PyUnicode_AsUTF8String;
3486 }
3487 else {
3488 unicode_encoder = PyUnicode_AsASCIIString;
3489 }
3490 encoded = unicode_encoder(module_name);
3491 if (encoded == NULL) {
3492 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3493 PyErr_Format(st->PicklingError,
3494 "can't pickle module identifier '%S' using "
3495 "pickle protocol %i",
3496 module_name, self->proto);
3497 goto error;
3498 }
3499 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3500 PyBytes_GET_SIZE(encoded)) < 0) {
3501 Py_DECREF(encoded);
3502 goto error;
3503 }
3504 Py_DECREF(encoded);
3505 if(_Pickler_Write(self, "\n", 1) < 0)
3506 goto error;
3507
3508 /* Save the name of the module. */
3509 encoded = unicode_encoder(global_name);
3510 if (encoded == NULL) {
3511 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3512 PyErr_Format(st->PicklingError,
3513 "can't pickle global identifier '%S' using "
3514 "pickle protocol %i",
3515 global_name, self->proto);
3516 goto error;
3517 }
3518 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3519 PyBytes_GET_SIZE(encoded)) < 0) {
3520 Py_DECREF(encoded);
3521 goto error;
3522 }
3523 Py_DECREF(encoded);
3524 if (_Pickler_Write(self, "\n", 1) < 0)
3525 goto error;
3526 }
3527 /* Memoize the object. */
3528 if (memo_put(self, obj) < 0)
3529 goto error;
3530 }
3531
3532 if (0) {
3533 error:
3534 status = -1;
3535 }
3536 Py_XDECREF(module_name);
3537 Py_XDECREF(global_name);
3538 Py_XDECREF(module);
3539 Py_XDECREF(parent);
3540 Py_XDECREF(dotted_path);
3541 Py_XDECREF(lastname);
3542
3543 return status;
3544 }
3545
3546 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3547 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3548 {
3549 PyObject *reduce_value;
3550 int status;
3551
3552 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3553 if (reduce_value == NULL) {
3554 return -1;
3555 }
3556 status = save_reduce(self, reduce_value, obj);
3557 Py_DECREF(reduce_value);
3558 return status;
3559 }
3560
3561 static int
save_type(PicklerObject * self,PyObject * obj)3562 save_type(PicklerObject *self, PyObject *obj)
3563 {
3564 if (obj == (PyObject *)&_PyNone_Type) {
3565 return save_singleton_type(self, obj, Py_None);
3566 }
3567 else if (obj == (PyObject *)&PyEllipsis_Type) {
3568 return save_singleton_type(self, obj, Py_Ellipsis);
3569 }
3570 else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3571 return save_singleton_type(self, obj, Py_NotImplemented);
3572 }
3573 return save_global(self, obj, NULL);
3574 }
3575
3576 static int
save_pers(PicklerObject * self,PyObject * obj)3577 save_pers(PicklerObject *self, PyObject *obj)
3578 {
3579 PyObject *pid = NULL;
3580 int status = 0;
3581
3582 const char persid_op = PERSID;
3583 const char binpersid_op = BINPERSID;
3584
3585 pid = call_method(self->pers_func, self->pers_func_self, obj);
3586 if (pid == NULL)
3587 return -1;
3588
3589 if (pid != Py_None) {
3590 if (self->bin) {
3591 if (save(self, pid, 1) < 0 ||
3592 _Pickler_Write(self, &binpersid_op, 1) < 0)
3593 goto error;
3594 }
3595 else {
3596 PyObject *pid_str;
3597
3598 pid_str = PyObject_Str(pid);
3599 if (pid_str == NULL)
3600 goto error;
3601
3602 /* XXX: Should it check whether the pid contains embedded
3603 newlines? */
3604 if (!PyUnicode_IS_ASCII(pid_str)) {
3605 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3606 "persistent IDs in protocol 0 must be "
3607 "ASCII strings");
3608 Py_DECREF(pid_str);
3609 goto error;
3610 }
3611
3612 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3613 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3614 PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3615 _Pickler_Write(self, "\n", 1) < 0) {
3616 Py_DECREF(pid_str);
3617 goto error;
3618 }
3619 Py_DECREF(pid_str);
3620 }
3621 status = 1;
3622 }
3623
3624 if (0) {
3625 error:
3626 status = -1;
3627 }
3628 Py_XDECREF(pid);
3629
3630 return status;
3631 }
3632
3633 static PyObject *
get_class(PyObject * obj)3634 get_class(PyObject *obj)
3635 {
3636 PyObject *cls;
3637 _Py_IDENTIFIER(__class__);
3638
3639 if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3640 cls = (PyObject *) Py_TYPE(obj);
3641 Py_INCREF(cls);
3642 }
3643 return cls;
3644 }
3645
3646 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3647 * appropriate __reduce__ method for obj.
3648 */
3649 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3650 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3651 {
3652 PyObject *callable;
3653 PyObject *argtup;
3654 PyObject *state = NULL;
3655 PyObject *listitems = Py_None;
3656 PyObject *dictitems = Py_None;
3657 PickleState *st = _Pickle_GetGlobalState();
3658 Py_ssize_t size;
3659 int use_newobj = 0, use_newobj_ex = 0;
3660
3661 const char reduce_op = REDUCE;
3662 const char build_op = BUILD;
3663 const char newobj_op = NEWOBJ;
3664 const char newobj_ex_op = NEWOBJ_EX;
3665
3666 size = PyTuple_Size(args);
3667 if (size < 2 || size > 5) {
3668 PyErr_SetString(st->PicklingError, "tuple returned by "
3669 "__reduce__ must contain 2 through 5 elements");
3670 return -1;
3671 }
3672
3673 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
3674 &callable, &argtup, &state, &listitems, &dictitems))
3675 return -1;
3676
3677 if (!PyCallable_Check(callable)) {
3678 PyErr_SetString(st->PicklingError, "first item of the tuple "
3679 "returned by __reduce__ must be callable");
3680 return -1;
3681 }
3682 if (!PyTuple_Check(argtup)) {
3683 PyErr_SetString(st->PicklingError, "second item of the tuple "
3684 "returned by __reduce__ must be a tuple");
3685 return -1;
3686 }
3687
3688 if (state == Py_None)
3689 state = NULL;
3690
3691 if (listitems == Py_None)
3692 listitems = NULL;
3693 else if (!PyIter_Check(listitems)) {
3694 PyErr_Format(st->PicklingError, "fourth element of the tuple "
3695 "returned by __reduce__ must be an iterator, not %s",
3696 Py_TYPE(listitems)->tp_name);
3697 return -1;
3698 }
3699
3700 if (dictitems == Py_None)
3701 dictitems = NULL;
3702 else if (!PyIter_Check(dictitems)) {
3703 PyErr_Format(st->PicklingError, "fifth element of the tuple "
3704 "returned by __reduce__ must be an iterator, not %s",
3705 Py_TYPE(dictitems)->tp_name);
3706 return -1;
3707 }
3708
3709 if (self->proto >= 2) {
3710 PyObject *name;
3711 _Py_IDENTIFIER(__name__);
3712
3713 if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
3714 return -1;
3715 }
3716 if (name != NULL && PyUnicode_Check(name)) {
3717 _Py_IDENTIFIER(__newobj_ex__);
3718 use_newobj_ex = _PyUnicode_EqualToASCIIId(
3719 name, &PyId___newobj_ex__);
3720 if (!use_newobj_ex) {
3721 _Py_IDENTIFIER(__newobj__);
3722 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
3723 }
3724 }
3725 Py_XDECREF(name);
3726 }
3727
3728 if (use_newobj_ex) {
3729 PyObject *cls;
3730 PyObject *args;
3731 PyObject *kwargs;
3732
3733 if (PyTuple_GET_SIZE(argtup) != 3) {
3734 PyErr_Format(st->PicklingError,
3735 "length of the NEWOBJ_EX argument tuple must be "
3736 "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
3737 return -1;
3738 }
3739
3740 cls = PyTuple_GET_ITEM(argtup, 0);
3741 if (!PyType_Check(cls)) {
3742 PyErr_Format(st->PicklingError,
3743 "first item from NEWOBJ_EX argument tuple must "
3744 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
3745 return -1;
3746 }
3747 args = PyTuple_GET_ITEM(argtup, 1);
3748 if (!PyTuple_Check(args)) {
3749 PyErr_Format(st->PicklingError,
3750 "second item from NEWOBJ_EX argument tuple must "
3751 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
3752 return -1;
3753 }
3754 kwargs = PyTuple_GET_ITEM(argtup, 2);
3755 if (!PyDict_Check(kwargs)) {
3756 PyErr_Format(st->PicklingError,
3757 "third item from NEWOBJ_EX argument tuple must "
3758 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
3759 return -1;
3760 }
3761
3762 if (self->proto >= 4) {
3763 if (save(self, cls, 0) < 0 ||
3764 save(self, args, 0) < 0 ||
3765 save(self, kwargs, 0) < 0 ||
3766 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
3767 return -1;
3768 }
3769 }
3770 else {
3771 PyObject *newargs;
3772 PyObject *cls_new;
3773 Py_ssize_t i;
3774 _Py_IDENTIFIER(__new__);
3775
3776 newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
3777 if (newargs == NULL)
3778 return -1;
3779
3780 cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
3781 if (cls_new == NULL) {
3782 Py_DECREF(newargs);
3783 return -1;
3784 }
3785 PyTuple_SET_ITEM(newargs, 0, cls_new);
3786 Py_INCREF(cls);
3787 PyTuple_SET_ITEM(newargs, 1, cls);
3788 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
3789 PyObject *item = PyTuple_GET_ITEM(args, i);
3790 Py_INCREF(item);
3791 PyTuple_SET_ITEM(newargs, i + 2, item);
3792 }
3793
3794 callable = PyObject_Call(st->partial, newargs, kwargs);
3795 Py_DECREF(newargs);
3796 if (callable == NULL)
3797 return -1;
3798
3799 newargs = PyTuple_New(0);
3800 if (newargs == NULL) {
3801 Py_DECREF(callable);
3802 return -1;
3803 }
3804
3805 if (save(self, callable, 0) < 0 ||
3806 save(self, newargs, 0) < 0 ||
3807 _Pickler_Write(self, &reduce_op, 1) < 0) {
3808 Py_DECREF(newargs);
3809 Py_DECREF(callable);
3810 return -1;
3811 }
3812 Py_DECREF(newargs);
3813 Py_DECREF(callable);
3814 }
3815 }
3816 else if (use_newobj) {
3817 PyObject *cls;
3818 PyObject *newargtup;
3819 PyObject *obj_class;
3820 int p;
3821
3822 /* Sanity checks. */
3823 if (PyTuple_GET_SIZE(argtup) < 1) {
3824 PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
3825 return -1;
3826 }
3827
3828 cls = PyTuple_GET_ITEM(argtup, 0);
3829 if (!PyType_Check(cls)) {
3830 PyErr_SetString(st->PicklingError, "args[0] from "
3831 "__newobj__ args is not a type");
3832 return -1;
3833 }
3834
3835 if (obj != NULL) {
3836 obj_class = get_class(obj);
3837 if (obj_class == NULL) {
3838 return -1;
3839 }
3840 p = obj_class != cls;
3841 Py_DECREF(obj_class);
3842 if (p) {
3843 PyErr_SetString(st->PicklingError, "args[0] from "
3844 "__newobj__ args has the wrong class");
3845 return -1;
3846 }
3847 }
3848 /* XXX: These calls save() are prone to infinite recursion. Imagine
3849 what happen if the value returned by the __reduce__() method of
3850 some extension type contains another object of the same type. Ouch!
3851
3852 Here is a quick example, that I ran into, to illustrate what I
3853 mean:
3854
3855 >>> import pickle, copyreg
3856 >>> copyreg.dispatch_table.pop(complex)
3857 >>> pickle.dumps(1+2j)
3858 Traceback (most recent call last):
3859 ...
3860 RecursionError: maximum recursion depth exceeded
3861
3862 Removing the complex class from copyreg.dispatch_table made the
3863 __reduce_ex__() method emit another complex object:
3864
3865 >>> (1+1j).__reduce_ex__(2)
3866 (<function __newobj__ at 0xb7b71c3c>,
3867 (<class 'complex'>, (1+1j)), None, None, None)
3868
3869 Thus when save() was called on newargstup (the 2nd item) recursion
3870 ensued. Of course, the bug was in the complex class which had a
3871 broken __getnewargs__() that emitted another complex object. But,
3872 the point, here, is it is quite easy to end up with a broken reduce
3873 function. */
3874
3875 /* Save the class and its __new__ arguments. */
3876 if (save(self, cls, 0) < 0)
3877 return -1;
3878
3879 newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
3880 if (newargtup == NULL)
3881 return -1;
3882
3883 p = save(self, newargtup, 0);
3884 Py_DECREF(newargtup);
3885 if (p < 0)
3886 return -1;
3887
3888 /* Add NEWOBJ opcode. */
3889 if (_Pickler_Write(self, &newobj_op, 1) < 0)
3890 return -1;
3891 }
3892 else { /* Not using NEWOBJ. */
3893 if (save(self, callable, 0) < 0 ||
3894 save(self, argtup, 0) < 0 ||
3895 _Pickler_Write(self, &reduce_op, 1) < 0)
3896 return -1;
3897 }
3898
3899 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3900 the caller do not want to memoize the object. Not particularly useful,
3901 but that is to mimic the behavior save_reduce() in pickle.py when
3902 obj is None. */
3903 if (obj != NULL) {
3904 /* If the object is already in the memo, this means it is
3905 recursive. In this case, throw away everything we put on the
3906 stack, and fetch the object back from the memo. */
3907 if (PyMemoTable_Get(self->memo, obj)) {
3908 const char pop_op = POP;
3909
3910 if (_Pickler_Write(self, &pop_op, 1) < 0)
3911 return -1;
3912 if (memo_get(self, obj) < 0)
3913 return -1;
3914
3915 return 0;
3916 }
3917 else if (memo_put(self, obj) < 0)
3918 return -1;
3919 }
3920
3921 if (listitems && batch_list(self, listitems) < 0)
3922 return -1;
3923
3924 if (dictitems && batch_dict(self, dictitems) < 0)
3925 return -1;
3926
3927 if (state) {
3928 if (save(self, state, 0) < 0 ||
3929 _Pickler_Write(self, &build_op, 1) < 0)
3930 return -1;
3931 }
3932
3933 return 0;
3934 }
3935
3936 static int
save(PicklerObject * self,PyObject * obj,int pers_save)3937 save(PicklerObject *self, PyObject *obj, int pers_save)
3938 {
3939 PyTypeObject *type;
3940 PyObject *reduce_func = NULL;
3941 PyObject *reduce_value = NULL;
3942 int status = 0;
3943
3944 if (_Pickler_OpcodeBoundary(self) < 0)
3945 return -1;
3946
3947 if (Py_EnterRecursiveCall(" while pickling an object"))
3948 return -1;
3949
3950 /* The extra pers_save argument is necessary to avoid calling save_pers()
3951 on its returned object. */
3952 if (!pers_save && self->pers_func) {
3953 /* save_pers() returns:
3954 -1 to signal an error;
3955 0 if it did nothing successfully;
3956 1 if a persistent id was saved.
3957 */
3958 if ((status = save_pers(self, obj)) != 0)
3959 goto done;
3960 }
3961
3962 type = Py_TYPE(obj);
3963
3964 /* The old cPickle had an optimization that used switch-case statement
3965 dispatching on the first letter of the type name. This has was removed
3966 since benchmarks shown that this optimization was actually slowing
3967 things down. */
3968
3969 /* Atom types; these aren't memoized, so don't check the memo. */
3970
3971 if (obj == Py_None) {
3972 status = save_none(self, obj);
3973 goto done;
3974 }
3975 else if (obj == Py_False || obj == Py_True) {
3976 status = save_bool(self, obj);
3977 goto done;
3978 }
3979 else if (type == &PyLong_Type) {
3980 status = save_long(self, obj);
3981 goto done;
3982 }
3983 else if (type == &PyFloat_Type) {
3984 status = save_float(self, obj);
3985 goto done;
3986 }
3987
3988 /* Check the memo to see if it has the object. If so, generate
3989 a GET (or BINGET) opcode, instead of pickling the object
3990 once again. */
3991 if (PyMemoTable_Get(self->memo, obj)) {
3992 if (memo_get(self, obj) < 0)
3993 goto error;
3994 goto done;
3995 }
3996
3997 if (type == &PyBytes_Type) {
3998 status = save_bytes(self, obj);
3999 goto done;
4000 }
4001 else if (type == &PyUnicode_Type) {
4002 status = save_unicode(self, obj);
4003 goto done;
4004 }
4005 else if (type == &PyDict_Type) {
4006 status = save_dict(self, obj);
4007 goto done;
4008 }
4009 else if (type == &PySet_Type) {
4010 status = save_set(self, obj);
4011 goto done;
4012 }
4013 else if (type == &PyFrozenSet_Type) {
4014 status = save_frozenset(self, obj);
4015 goto done;
4016 }
4017 else if (type == &PyList_Type) {
4018 status = save_list(self, obj);
4019 goto done;
4020 }
4021 else if (type == &PyTuple_Type) {
4022 status = save_tuple(self, obj);
4023 goto done;
4024 }
4025 else if (type == &PyType_Type) {
4026 status = save_type(self, obj);
4027 goto done;
4028 }
4029 else if (type == &PyFunction_Type) {
4030 status = save_global(self, obj, NULL);
4031 goto done;
4032 }
4033
4034 /* XXX: This part needs some unit tests. */
4035
4036 /* Get a reduction callable, and call it. This may come from
4037 * self.dispatch_table, copyreg.dispatch_table, the object's
4038 * __reduce_ex__ method, or the object's __reduce__ method.
4039 */
4040 if (self->dispatch_table == NULL) {
4041 PickleState *st = _Pickle_GetGlobalState();
4042 reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4043 (PyObject *)type);
4044 if (reduce_func == NULL) {
4045 if (PyErr_Occurred()) {
4046 goto error;
4047 }
4048 } else {
4049 /* PyDict_GetItemWithError() returns a borrowed reference.
4050 Increase the reference count to be consistent with
4051 PyObject_GetItem and _PyObject_GetAttrId used below. */
4052 Py_INCREF(reduce_func);
4053 }
4054 } else {
4055 reduce_func = PyObject_GetItem(self->dispatch_table,
4056 (PyObject *)type);
4057 if (reduce_func == NULL) {
4058 if (PyErr_ExceptionMatches(PyExc_KeyError))
4059 PyErr_Clear();
4060 else
4061 goto error;
4062 }
4063 }
4064 if (reduce_func != NULL) {
4065 Py_INCREF(obj);
4066 reduce_value = _Pickle_FastCall(reduce_func, obj);
4067 }
4068 else if (PyType_IsSubtype(type, &PyType_Type)) {
4069 status = save_global(self, obj, NULL);
4070 goto done;
4071 }
4072 else {
4073 _Py_IDENTIFIER(__reduce__);
4074 _Py_IDENTIFIER(__reduce_ex__);
4075
4076
4077 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4078 automatically defined as __reduce__. While this is convenient, this
4079 make it impossible to know which method was actually called. Of
4080 course, this is not a big deal. But still, it would be nice to let
4081 the user know which method was called when something go
4082 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4083 don't actually have to check for a __reduce__ method. */
4084
4085 /* Check for a __reduce_ex__ method. */
4086 if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4087 goto error;
4088 }
4089 if (reduce_func != NULL) {
4090 PyObject *proto;
4091 proto = PyLong_FromLong(self->proto);
4092 if (proto != NULL) {
4093 reduce_value = _Pickle_FastCall(reduce_func, proto);
4094 }
4095 }
4096 else {
4097 PickleState *st = _Pickle_GetGlobalState();
4098
4099 /* Check for a __reduce__ method. */
4100 reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce__);
4101 if (reduce_func != NULL) {
4102 reduce_value = _PyObject_CallNoArg(reduce_func);
4103 }
4104 else {
4105 PyErr_Format(st->PicklingError,
4106 "can't pickle '%.200s' object: %R",
4107 type->tp_name, obj);
4108 goto error;
4109 }
4110 }
4111 }
4112
4113 if (reduce_value == NULL)
4114 goto error;
4115
4116 if (PyUnicode_Check(reduce_value)) {
4117 status = save_global(self, obj, reduce_value);
4118 goto done;
4119 }
4120
4121 if (!PyTuple_Check(reduce_value)) {
4122 PickleState *st = _Pickle_GetGlobalState();
4123 PyErr_SetString(st->PicklingError,
4124 "__reduce__ must return a string or tuple");
4125 goto error;
4126 }
4127
4128 status = save_reduce(self, reduce_value, obj);
4129
4130 if (0) {
4131 error:
4132 status = -1;
4133 }
4134 done:
4135
4136 Py_LeaveRecursiveCall();
4137 Py_XDECREF(reduce_func);
4138 Py_XDECREF(reduce_value);
4139
4140 return status;
4141 }
4142
4143 static int
dump(PicklerObject * self,PyObject * obj)4144 dump(PicklerObject *self, PyObject *obj)
4145 {
4146 const char stop_op = STOP;
4147
4148 if (self->proto >= 2) {
4149 char header[2];
4150
4151 header[0] = PROTO;
4152 assert(self->proto >= 0 && self->proto < 256);
4153 header[1] = (unsigned char)self->proto;
4154 if (_Pickler_Write(self, header, 2) < 0)
4155 return -1;
4156 if (self->proto >= 4)
4157 self->framing = 1;
4158 }
4159
4160 if (save(self, obj, 0) < 0 ||
4161 _Pickler_Write(self, &stop_op, 1) < 0 ||
4162 _Pickler_CommitFrame(self) < 0)
4163 return -1;
4164 self->framing = 0;
4165 return 0;
4166 }
4167
4168 /*[clinic input]
4169
4170 _pickle.Pickler.clear_memo
4171
4172 Clears the pickler's "memo".
4173
4174 The memo is the data structure that remembers which objects the
4175 pickler has already seen, so that shared or recursive objects are
4176 pickled by reference and not by value. This method is useful when
4177 re-using picklers.
4178 [clinic start generated code]*/
4179
4180 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4181 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4182 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4183 {
4184 if (self->memo)
4185 PyMemoTable_Clear(self->memo);
4186
4187 Py_RETURN_NONE;
4188 }
4189
4190 /*[clinic input]
4191
4192 _pickle.Pickler.dump
4193
4194 obj: object
4195 /
4196
4197 Write a pickled representation of the given object to the open file.
4198 [clinic start generated code]*/
4199
4200 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4201 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4202 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4203 {
4204 /* Check whether the Pickler was initialized correctly (issue3664).
4205 Developers often forget to call __init__() in their subclasses, which
4206 would trigger a segfault without this check. */
4207 if (self->write == NULL) {
4208 PickleState *st = _Pickle_GetGlobalState();
4209 PyErr_Format(st->PicklingError,
4210 "Pickler.__init__() was not called by %s.__init__()",
4211 Py_TYPE(self)->tp_name);
4212 return NULL;
4213 }
4214
4215 if (_Pickler_ClearBuffer(self) < 0)
4216 return NULL;
4217
4218 if (dump(self, obj) < 0)
4219 return NULL;
4220
4221 if (_Pickler_FlushToFile(self) < 0)
4222 return NULL;
4223
4224 Py_RETURN_NONE;
4225 }
4226
4227 /*[clinic input]
4228
4229 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4230
4231 Returns size in memory, in bytes.
4232 [clinic start generated code]*/
4233
4234 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4235 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4236 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4237 {
4238 Py_ssize_t res, s;
4239
4240 res = _PyObject_SIZE(Py_TYPE(self));
4241 if (self->memo != NULL) {
4242 res += sizeof(PyMemoTable);
4243 res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4244 }
4245 if (self->output_buffer != NULL) {
4246 s = _PySys_GetSizeOf(self->output_buffer);
4247 if (s == -1)
4248 return -1;
4249 res += s;
4250 }
4251 return res;
4252 }
4253
4254 static struct PyMethodDef Pickler_methods[] = {
4255 _PICKLE_PICKLER_DUMP_METHODDEF
4256 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4257 _PICKLE_PICKLER___SIZEOF___METHODDEF
4258 {NULL, NULL} /* sentinel */
4259 };
4260
4261 static void
Pickler_dealloc(PicklerObject * self)4262 Pickler_dealloc(PicklerObject *self)
4263 {
4264 PyObject_GC_UnTrack(self);
4265
4266 Py_XDECREF(self->output_buffer);
4267 Py_XDECREF(self->write);
4268 Py_XDECREF(self->pers_func);
4269 Py_XDECREF(self->dispatch_table);
4270 Py_XDECREF(self->fast_memo);
4271
4272 PyMemoTable_Del(self->memo);
4273
4274 Py_TYPE(self)->tp_free((PyObject *)self);
4275 }
4276
4277 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4278 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4279 {
4280 Py_VISIT(self->write);
4281 Py_VISIT(self->pers_func);
4282 Py_VISIT(self->dispatch_table);
4283 Py_VISIT(self->fast_memo);
4284 return 0;
4285 }
4286
4287 static int
Pickler_clear(PicklerObject * self)4288 Pickler_clear(PicklerObject *self)
4289 {
4290 Py_CLEAR(self->output_buffer);
4291 Py_CLEAR(self->write);
4292 Py_CLEAR(self->pers_func);
4293 Py_CLEAR(self->dispatch_table);
4294 Py_CLEAR(self->fast_memo);
4295
4296 if (self->memo != NULL) {
4297 PyMemoTable *memo = self->memo;
4298 self->memo = NULL;
4299 PyMemoTable_Del(memo);
4300 }
4301 return 0;
4302 }
4303
4304
4305 /*[clinic input]
4306
4307 _pickle.Pickler.__init__
4308
4309 file: object
4310 protocol: object = NULL
4311 fix_imports: bool = True
4312
4313 This takes a binary file for writing a pickle data stream.
4314
4315 The optional *protocol* argument tells the pickler to use the given
4316 protocol; supported protocols are 0, 1, 2, 3 and 4. The default
4317 protocol is 3; a backward-incompatible protocol designed for Python 3.
4318
4319 Specifying a negative protocol version selects the highest protocol
4320 version supported. The higher the protocol used, the more recent the
4321 version of Python needed to read the pickle produced.
4322
4323 The *file* argument must have a write() method that accepts a single
4324 bytes argument. It can thus be a file object opened for binary
4325 writing, an io.BytesIO instance, or any other custom object that meets
4326 this interface.
4327
4328 If *fix_imports* is True and protocol is less than 3, pickle will try
4329 to map the new Python 3 names to the old module names used in Python
4330 2, so that the pickle data stream is readable with Python 2.
4331 [clinic start generated code]*/
4332
4333 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports)4334 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4335 PyObject *protocol, int fix_imports)
4336 /*[clinic end generated code: output=b5f31078dab17fb0 input=4faabdbc763c2389]*/
4337 {
4338 _Py_IDENTIFIER(persistent_id);
4339 _Py_IDENTIFIER(dispatch_table);
4340
4341 /* In case of multiple __init__() calls, clear previous content. */
4342 if (self->write != NULL)
4343 (void)Pickler_clear(self);
4344
4345 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4346 return -1;
4347
4348 if (_Pickler_SetOutputStream(self, file) < 0)
4349 return -1;
4350
4351 /* memo and output_buffer may have already been created in _Pickler_New */
4352 if (self->memo == NULL) {
4353 self->memo = PyMemoTable_New();
4354 if (self->memo == NULL)
4355 return -1;
4356 }
4357 self->output_len = 0;
4358 if (self->output_buffer == NULL) {
4359 self->max_output_len = WRITE_BUF_SIZE;
4360 self->output_buffer = PyBytes_FromStringAndSize(NULL,
4361 self->max_output_len);
4362 if (self->output_buffer == NULL)
4363 return -1;
4364 }
4365
4366 self->fast = 0;
4367 self->fast_nesting = 0;
4368 self->fast_memo = NULL;
4369
4370 if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4371 &self->pers_func, &self->pers_func_self) < 0)
4372 {
4373 return -1;
4374 }
4375
4376 if (_PyObject_LookupAttrId((PyObject *)self,
4377 &PyId_dispatch_table, &self->dispatch_table) < 0) {
4378 return -1;
4379 }
4380
4381 return 0;
4382 }
4383
4384
4385 /* Define a proxy object for the Pickler's internal memo object. This is to
4386 * avoid breaking code like:
4387 * pickler.memo.clear()
4388 * and
4389 * pickler.memo = saved_memo
4390 * Is this a good idea? Not really, but we don't want to break code that uses
4391 * it. Note that we don't implement the entire mapping API here. This is
4392 * intentional, as these should be treated as black-box implementation details.
4393 */
4394
4395 /*[clinic input]
4396 _pickle.PicklerMemoProxy.clear
4397
4398 Remove all items from memo.
4399 [clinic start generated code]*/
4400
4401 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4402 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4403 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4404 {
4405 if (self->pickler->memo)
4406 PyMemoTable_Clear(self->pickler->memo);
4407 Py_RETURN_NONE;
4408 }
4409
4410 /*[clinic input]
4411 _pickle.PicklerMemoProxy.copy
4412
4413 Copy the memo to a new object.
4414 [clinic start generated code]*/
4415
4416 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4417 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4418 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4419 {
4420 PyMemoTable *memo;
4421 PyObject *new_memo = PyDict_New();
4422 if (new_memo == NULL)
4423 return NULL;
4424
4425 memo = self->pickler->memo;
4426 for (size_t i = 0; i < memo->mt_allocated; ++i) {
4427 PyMemoEntry entry = memo->mt_table[i];
4428 if (entry.me_key != NULL) {
4429 int status;
4430 PyObject *key, *value;
4431
4432 key = PyLong_FromVoidPtr(entry.me_key);
4433 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4434
4435 if (key == NULL || value == NULL) {
4436 Py_XDECREF(key);
4437 Py_XDECREF(value);
4438 goto error;
4439 }
4440 status = PyDict_SetItem(new_memo, key, value);
4441 Py_DECREF(key);
4442 Py_DECREF(value);
4443 if (status < 0)
4444 goto error;
4445 }
4446 }
4447 return new_memo;
4448
4449 error:
4450 Py_XDECREF(new_memo);
4451 return NULL;
4452 }
4453
4454 /*[clinic input]
4455 _pickle.PicklerMemoProxy.__reduce__
4456
4457 Implement pickle support.
4458 [clinic start generated code]*/
4459
4460 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4461 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4462 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4463 {
4464 PyObject *reduce_value, *dict_args;
4465 PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4466 if (contents == NULL)
4467 return NULL;
4468
4469 reduce_value = PyTuple_New(2);
4470 if (reduce_value == NULL) {
4471 Py_DECREF(contents);
4472 return NULL;
4473 }
4474 dict_args = PyTuple_New(1);
4475 if (dict_args == NULL) {
4476 Py_DECREF(contents);
4477 Py_DECREF(reduce_value);
4478 return NULL;
4479 }
4480 PyTuple_SET_ITEM(dict_args, 0, contents);
4481 Py_INCREF((PyObject *)&PyDict_Type);
4482 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4483 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4484 return reduce_value;
4485 }
4486
4487 static PyMethodDef picklerproxy_methods[] = {
4488 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4489 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4490 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4491 {NULL, NULL} /* sentinel */
4492 };
4493
4494 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4495 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4496 {
4497 PyObject_GC_UnTrack(self);
4498 Py_XDECREF(self->pickler);
4499 PyObject_GC_Del((PyObject *)self);
4500 }
4501
4502 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4503 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4504 visitproc visit, void *arg)
4505 {
4506 Py_VISIT(self->pickler);
4507 return 0;
4508 }
4509
4510 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4511 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4512 {
4513 Py_CLEAR(self->pickler);
4514 return 0;
4515 }
4516
4517 static PyTypeObject PicklerMemoProxyType = {
4518 PyVarObject_HEAD_INIT(NULL, 0)
4519 "_pickle.PicklerMemoProxy", /*tp_name*/
4520 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4521 0,
4522 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
4523 0, /* tp_print */
4524 0, /* tp_getattr */
4525 0, /* tp_setattr */
4526 0, /* tp_compare */
4527 0, /* tp_repr */
4528 0, /* tp_as_number */
4529 0, /* tp_as_sequence */
4530 0, /* tp_as_mapping */
4531 PyObject_HashNotImplemented, /* tp_hash */
4532 0, /* tp_call */
4533 0, /* tp_str */
4534 PyObject_GenericGetAttr, /* tp_getattro */
4535 PyObject_GenericSetAttr, /* tp_setattro */
4536 0, /* tp_as_buffer */
4537 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4538 0, /* tp_doc */
4539 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4540 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4541 0, /* tp_richcompare */
4542 0, /* tp_weaklistoffset */
4543 0, /* tp_iter */
4544 0, /* tp_iternext */
4545 picklerproxy_methods, /* tp_methods */
4546 };
4547
4548 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4549 PicklerMemoProxy_New(PicklerObject *pickler)
4550 {
4551 PicklerMemoProxyObject *self;
4552
4553 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4554 if (self == NULL)
4555 return NULL;
4556 Py_INCREF(pickler);
4557 self->pickler = pickler;
4558 PyObject_GC_Track(self);
4559 return (PyObject *)self;
4560 }
4561
4562 /*****************************************************************************/
4563
4564 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4565 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4566 {
4567 return PicklerMemoProxy_New(self);
4568 }
4569
4570 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4571 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4572 {
4573 PyMemoTable *new_memo = NULL;
4574
4575 if (obj == NULL) {
4576 PyErr_SetString(PyExc_TypeError,
4577 "attribute deletion is not supported");
4578 return -1;
4579 }
4580
4581 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
4582 PicklerObject *pickler =
4583 ((PicklerMemoProxyObject *)obj)->pickler;
4584
4585 new_memo = PyMemoTable_Copy(pickler->memo);
4586 if (new_memo == NULL)
4587 return -1;
4588 }
4589 else if (PyDict_Check(obj)) {
4590 Py_ssize_t i = 0;
4591 PyObject *key, *value;
4592
4593 new_memo = PyMemoTable_New();
4594 if (new_memo == NULL)
4595 return -1;
4596
4597 while (PyDict_Next(obj, &i, &key, &value)) {
4598 Py_ssize_t memo_id;
4599 PyObject *memo_obj;
4600
4601 if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
4602 PyErr_SetString(PyExc_TypeError,
4603 "'memo' values must be 2-item tuples");
4604 goto error;
4605 }
4606 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
4607 if (memo_id == -1 && PyErr_Occurred())
4608 goto error;
4609 memo_obj = PyTuple_GET_ITEM(value, 1);
4610 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
4611 goto error;
4612 }
4613 }
4614 else {
4615 PyErr_Format(PyExc_TypeError,
4616 "'memo' attribute must be a PicklerMemoProxy object "
4617 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
4618 return -1;
4619 }
4620
4621 PyMemoTable_Del(self->memo);
4622 self->memo = new_memo;
4623
4624 return 0;
4625
4626 error:
4627 if (new_memo)
4628 PyMemoTable_Del(new_memo);
4629 return -1;
4630 }
4631
4632 static PyObject *
Pickler_get_persid(PicklerObject * self,void * Py_UNUSED (ignored))4633 Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
4634 {
4635 if (self->pers_func == NULL) {
4636 PyErr_SetString(PyExc_AttributeError, "persistent_id");
4637 return NULL;
4638 }
4639 return reconstruct_method(self->pers_func, self->pers_func_self);
4640 }
4641
4642 static int
Pickler_set_persid(PicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))4643 Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
4644 {
4645 if (value == NULL) {
4646 PyErr_SetString(PyExc_TypeError,
4647 "attribute deletion is not supported");
4648 return -1;
4649 }
4650 if (!PyCallable_Check(value)) {
4651 PyErr_SetString(PyExc_TypeError,
4652 "persistent_id must be a callable taking one argument");
4653 return -1;
4654 }
4655
4656 self->pers_func_self = NULL;
4657 Py_INCREF(value);
4658 Py_XSETREF(self->pers_func, value);
4659
4660 return 0;
4661 }
4662
4663 static PyMemberDef Pickler_members[] = {
4664 {"bin", T_INT, offsetof(PicklerObject, bin)},
4665 {"fast", T_INT, offsetof(PicklerObject, fast)},
4666 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
4667 {NULL}
4668 };
4669
4670 static PyGetSetDef Pickler_getsets[] = {
4671 {"memo", (getter)Pickler_get_memo,
4672 (setter)Pickler_set_memo},
4673 {"persistent_id", (getter)Pickler_get_persid,
4674 (setter)Pickler_set_persid},
4675 {NULL}
4676 };
4677
4678 static PyTypeObject Pickler_Type = {
4679 PyVarObject_HEAD_INIT(NULL, 0)
4680 "_pickle.Pickler" , /*tp_name*/
4681 sizeof(PicklerObject), /*tp_basicsize*/
4682 0, /*tp_itemsize*/
4683 (destructor)Pickler_dealloc, /*tp_dealloc*/
4684 0, /*tp_print*/
4685 0, /*tp_getattr*/
4686 0, /*tp_setattr*/
4687 0, /*tp_reserved*/
4688 0, /*tp_repr*/
4689 0, /*tp_as_number*/
4690 0, /*tp_as_sequence*/
4691 0, /*tp_as_mapping*/
4692 0, /*tp_hash*/
4693 0, /*tp_call*/
4694 0, /*tp_str*/
4695 0, /*tp_getattro*/
4696 0, /*tp_setattro*/
4697 0, /*tp_as_buffer*/
4698 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4699 _pickle_Pickler___init____doc__, /*tp_doc*/
4700 (traverseproc)Pickler_traverse, /*tp_traverse*/
4701 (inquiry)Pickler_clear, /*tp_clear*/
4702 0, /*tp_richcompare*/
4703 0, /*tp_weaklistoffset*/
4704 0, /*tp_iter*/
4705 0, /*tp_iternext*/
4706 Pickler_methods, /*tp_methods*/
4707 Pickler_members, /*tp_members*/
4708 Pickler_getsets, /*tp_getset*/
4709 0, /*tp_base*/
4710 0, /*tp_dict*/
4711 0, /*tp_descr_get*/
4712 0, /*tp_descr_set*/
4713 0, /*tp_dictoffset*/
4714 _pickle_Pickler___init__, /*tp_init*/
4715 PyType_GenericAlloc, /*tp_alloc*/
4716 PyType_GenericNew, /*tp_new*/
4717 PyObject_GC_Del, /*tp_free*/
4718 0, /*tp_is_gc*/
4719 };
4720
4721 /* Temporary helper for calling self.find_class().
4722
4723 XXX: It would be nice to able to avoid Python function call overhead, by
4724 using directly the C version of find_class(), when find_class() is not
4725 overridden by a subclass. Although, this could become rather hackish. A
4726 simpler optimization would be to call the C function when self is not a
4727 subclass instance. */
4728 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)4729 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
4730 {
4731 _Py_IDENTIFIER(find_class);
4732
4733 return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
4734 module_name, global_name, NULL);
4735 }
4736
4737 static Py_ssize_t
marker(UnpicklerObject * self)4738 marker(UnpicklerObject *self)
4739 {
4740 Py_ssize_t mark;
4741
4742 if (self->num_marks < 1) {
4743 PickleState *st = _Pickle_GetGlobalState();
4744 PyErr_SetString(st->UnpicklingError, "could not find MARK");
4745 return -1;
4746 }
4747
4748 mark = self->marks[--self->num_marks];
4749 self->stack->mark_set = self->num_marks != 0;
4750 self->stack->fence = self->num_marks ?
4751 self->marks[self->num_marks - 1] : 0;
4752 return mark;
4753 }
4754
4755 static int
load_none(UnpicklerObject * self)4756 load_none(UnpicklerObject *self)
4757 {
4758 PDATA_APPEND(self->stack, Py_None, -1);
4759 return 0;
4760 }
4761
4762 static int
load_int(UnpicklerObject * self)4763 load_int(UnpicklerObject *self)
4764 {
4765 PyObject *value;
4766 char *endptr, *s;
4767 Py_ssize_t len;
4768 long x;
4769
4770 if ((len = _Unpickler_Readline(self, &s)) < 0)
4771 return -1;
4772 if (len < 2)
4773 return bad_readline();
4774
4775 errno = 0;
4776 /* XXX: Should the base argument of strtol() be explicitly set to 10?
4777 XXX(avassalotti): Should this uses PyOS_strtol()? */
4778 x = strtol(s, &endptr, 0);
4779
4780 if (errno || (*endptr != '\n' && *endptr != '\0')) {
4781 /* Hm, maybe we've got something long. Let's try reading
4782 * it as a Python int object. */
4783 errno = 0;
4784 /* XXX: Same thing about the base here. */
4785 value = PyLong_FromString(s, NULL, 0);
4786 if (value == NULL) {
4787 PyErr_SetString(PyExc_ValueError,
4788 "could not convert string to int");
4789 return -1;
4790 }
4791 }
4792 else {
4793 if (len == 3 && (x == 0 || x == 1)) {
4794 if ((value = PyBool_FromLong(x)) == NULL)
4795 return -1;
4796 }
4797 else {
4798 if ((value = PyLong_FromLong(x)) == NULL)
4799 return -1;
4800 }
4801 }
4802
4803 PDATA_PUSH(self->stack, value, -1);
4804 return 0;
4805 }
4806
4807 static int
load_bool(UnpicklerObject * self,PyObject * boolean)4808 load_bool(UnpicklerObject *self, PyObject *boolean)
4809 {
4810 assert(boolean == Py_True || boolean == Py_False);
4811 PDATA_APPEND(self->stack, boolean, -1);
4812 return 0;
4813 }
4814
4815 /* s contains x bytes of an unsigned little-endian integer. Return its value
4816 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
4817 */
4818 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)4819 calc_binsize(char *bytes, int nbytes)
4820 {
4821 unsigned char *s = (unsigned char *)bytes;
4822 int i;
4823 size_t x = 0;
4824
4825 if (nbytes > (int)sizeof(size_t)) {
4826 /* Check for integer overflow. BINBYTES8 and BINUNICODE8 opcodes
4827 * have 64-bit size that can't be represented on 32-bit platform.
4828 */
4829 for (i = (int)sizeof(size_t); i < nbytes; i++) {
4830 if (s[i])
4831 return -1;
4832 }
4833 nbytes = (int)sizeof(size_t);
4834 }
4835 for (i = 0; i < nbytes; i++) {
4836 x |= (size_t) s[i] << (8 * i);
4837 }
4838
4839 if (x > PY_SSIZE_T_MAX)
4840 return -1;
4841 else
4842 return (Py_ssize_t) x;
4843 }
4844
4845 /* s contains x bytes of a little-endian integer. Return its value as a
4846 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
4847 * int, but when x is 4 it's a signed one. This is a historical source
4848 * of x-platform bugs.
4849 */
4850 static long
calc_binint(char * bytes,int nbytes)4851 calc_binint(char *bytes, int nbytes)
4852 {
4853 unsigned char *s = (unsigned char *)bytes;
4854 Py_ssize_t i;
4855 long x = 0;
4856
4857 for (i = 0; i < nbytes; i++) {
4858 x |= (long)s[i] << (8 * i);
4859 }
4860
4861 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
4862 * is signed, so on a box with longs bigger than 4 bytes we need
4863 * to extend a BININT's sign bit to the full width.
4864 */
4865 if (SIZEOF_LONG > 4 && nbytes == 4) {
4866 x |= -(x & (1L << 31));
4867 }
4868
4869 return x;
4870 }
4871
4872 static int
load_binintx(UnpicklerObject * self,char * s,int size)4873 load_binintx(UnpicklerObject *self, char *s, int size)
4874 {
4875 PyObject *value;
4876 long x;
4877
4878 x = calc_binint(s, size);
4879
4880 if ((value = PyLong_FromLong(x)) == NULL)
4881 return -1;
4882
4883 PDATA_PUSH(self->stack, value, -1);
4884 return 0;
4885 }
4886
4887 static int
load_binint(UnpicklerObject * self)4888 load_binint(UnpicklerObject *self)
4889 {
4890 char *s;
4891
4892 if (_Unpickler_Read(self, &s, 4) < 0)
4893 return -1;
4894
4895 return load_binintx(self, s, 4);
4896 }
4897
4898 static int
load_binint1(UnpicklerObject * self)4899 load_binint1(UnpicklerObject *self)
4900 {
4901 char *s;
4902
4903 if (_Unpickler_Read(self, &s, 1) < 0)
4904 return -1;
4905
4906 return load_binintx(self, s, 1);
4907 }
4908
4909 static int
load_binint2(UnpicklerObject * self)4910 load_binint2(UnpicklerObject *self)
4911 {
4912 char *s;
4913
4914 if (_Unpickler_Read(self, &s, 2) < 0)
4915 return -1;
4916
4917 return load_binintx(self, s, 2);
4918 }
4919
4920 static int
load_long(UnpicklerObject * self)4921 load_long(UnpicklerObject *self)
4922 {
4923 PyObject *value;
4924 char *s = NULL;
4925 Py_ssize_t len;
4926
4927 if ((len = _Unpickler_Readline(self, &s)) < 0)
4928 return -1;
4929 if (len < 2)
4930 return bad_readline();
4931
4932 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4933 the 'L' before calling PyLong_FromString. In order to maintain
4934 compatibility with Python 3.0.0, we don't actually *require*
4935 the 'L' to be present. */
4936 if (s[len-2] == 'L')
4937 s[len-2] = '\0';
4938 /* XXX: Should the base argument explicitly set to 10? */
4939 value = PyLong_FromString(s, NULL, 0);
4940 if (value == NULL)
4941 return -1;
4942
4943 PDATA_PUSH(self->stack, value, -1);
4944 return 0;
4945 }
4946
4947 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
4948 * data following.
4949 */
4950 static int
load_counted_long(UnpicklerObject * self,int size)4951 load_counted_long(UnpicklerObject *self, int size)
4952 {
4953 PyObject *value;
4954 char *nbytes;
4955 char *pdata;
4956
4957 assert(size == 1 || size == 4);
4958 if (_Unpickler_Read(self, &nbytes, size) < 0)
4959 return -1;
4960
4961 size = calc_binint(nbytes, size);
4962 if (size < 0) {
4963 PickleState *st = _Pickle_GetGlobalState();
4964 /* Corrupt or hostile pickle -- we never write one like this */
4965 PyErr_SetString(st->UnpicklingError,
4966 "LONG pickle has negative byte count");
4967 return -1;
4968 }
4969
4970 if (size == 0)
4971 value = PyLong_FromLong(0L);
4972 else {
4973 /* Read the raw little-endian bytes and convert. */
4974 if (_Unpickler_Read(self, &pdata, size) < 0)
4975 return -1;
4976 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4977 1 /* little endian */ , 1 /* signed */ );
4978 }
4979 if (value == NULL)
4980 return -1;
4981 PDATA_PUSH(self->stack, value, -1);
4982 return 0;
4983 }
4984
4985 static int
load_float(UnpicklerObject * self)4986 load_float(UnpicklerObject *self)
4987 {
4988 PyObject *value;
4989 char *endptr, *s;
4990 Py_ssize_t len;
4991 double d;
4992
4993 if ((len = _Unpickler_Readline(self, &s)) < 0)
4994 return -1;
4995 if (len < 2)
4996 return bad_readline();
4997
4998 errno = 0;
4999 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5000 if (d == -1.0 && PyErr_Occurred())
5001 return -1;
5002 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5003 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5004 return -1;
5005 }
5006 value = PyFloat_FromDouble(d);
5007 if (value == NULL)
5008 return -1;
5009
5010 PDATA_PUSH(self->stack, value, -1);
5011 return 0;
5012 }
5013
5014 static int
load_binfloat(UnpicklerObject * self)5015 load_binfloat(UnpicklerObject *self)
5016 {
5017 PyObject *value;
5018 double x;
5019 char *s;
5020
5021 if (_Unpickler_Read(self, &s, 8) < 0)
5022 return -1;
5023
5024 x = _PyFloat_Unpack8((unsigned char *)s, 0);
5025 if (x == -1.0 && PyErr_Occurred())
5026 return -1;
5027
5028 if ((value = PyFloat_FromDouble(x)) == NULL)
5029 return -1;
5030
5031 PDATA_PUSH(self->stack, value, -1);
5032 return 0;
5033 }
5034
5035 static int
load_string(UnpicklerObject * self)5036 load_string(UnpicklerObject *self)
5037 {
5038 PyObject *bytes;
5039 PyObject *obj;
5040 Py_ssize_t len;
5041 char *s, *p;
5042
5043 if ((len = _Unpickler_Readline(self, &s)) < 0)
5044 return -1;
5045 /* Strip the newline */
5046 len--;
5047 /* Strip outermost quotes */
5048 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5049 p = s + 1;
5050 len -= 2;
5051 }
5052 else {
5053 PickleState *st = _Pickle_GetGlobalState();
5054 PyErr_SetString(st->UnpicklingError,
5055 "the STRING opcode argument must be quoted");
5056 return -1;
5057 }
5058 assert(len >= 0);
5059
5060 /* Use the PyBytes API to decode the string, since that is what is used
5061 to encode, and then coerce the result to Unicode. */
5062 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5063 if (bytes == NULL)
5064 return -1;
5065
5066 /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5067 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5068 if (strcmp(self->encoding, "bytes") == 0) {
5069 obj = bytes;
5070 }
5071 else {
5072 obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5073 Py_DECREF(bytes);
5074 if (obj == NULL) {
5075 return -1;
5076 }
5077 }
5078
5079 PDATA_PUSH(self->stack, obj, -1);
5080 return 0;
5081 }
5082
5083 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)5084 load_counted_binstring(UnpicklerObject *self, int nbytes)
5085 {
5086 PyObject *obj;
5087 Py_ssize_t size;
5088 char *s;
5089
5090 if (_Unpickler_Read(self, &s, nbytes) < 0)
5091 return -1;
5092
5093 size = calc_binsize(s, nbytes);
5094 if (size < 0) {
5095 PickleState *st = _Pickle_GetGlobalState();
5096 PyErr_Format(st->UnpicklingError,
5097 "BINSTRING exceeds system's maximum size of %zd bytes",
5098 PY_SSIZE_T_MAX);
5099 return -1;
5100 }
5101
5102 if (_Unpickler_Read(self, &s, size) < 0)
5103 return -1;
5104
5105 /* Convert Python 2.x strings to bytes if the *encoding* given to the
5106 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5107 if (strcmp(self->encoding, "bytes") == 0) {
5108 obj = PyBytes_FromStringAndSize(s, size);
5109 }
5110 else {
5111 obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5112 }
5113 if (obj == NULL) {
5114 return -1;
5115 }
5116
5117 PDATA_PUSH(self->stack, obj, -1);
5118 return 0;
5119 }
5120
5121 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)5122 load_counted_binbytes(UnpicklerObject *self, int nbytes)
5123 {
5124 PyObject *bytes;
5125 Py_ssize_t size;
5126 char *s;
5127
5128 if (_Unpickler_Read(self, &s, nbytes) < 0)
5129 return -1;
5130
5131 size = calc_binsize(s, nbytes);
5132 if (size < 0) {
5133 PyErr_Format(PyExc_OverflowError,
5134 "BINBYTES exceeds system's maximum size of %zd bytes",
5135 PY_SSIZE_T_MAX);
5136 return -1;
5137 }
5138
5139 if (_Unpickler_Read(self, &s, size) < 0)
5140 return -1;
5141
5142 bytes = PyBytes_FromStringAndSize(s, size);
5143 if (bytes == NULL)
5144 return -1;
5145
5146 PDATA_PUSH(self->stack, bytes, -1);
5147 return 0;
5148 }
5149
5150 static int
load_unicode(UnpicklerObject * self)5151 load_unicode(UnpicklerObject *self)
5152 {
5153 PyObject *str;
5154 Py_ssize_t len;
5155 char *s = NULL;
5156
5157 if ((len = _Unpickler_Readline(self, &s)) < 0)
5158 return -1;
5159 if (len < 1)
5160 return bad_readline();
5161
5162 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5163 if (str == NULL)
5164 return -1;
5165
5166 PDATA_PUSH(self->stack, str, -1);
5167 return 0;
5168 }
5169
5170 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5171 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5172 {
5173 PyObject *str;
5174 Py_ssize_t size;
5175 char *s;
5176
5177 if (_Unpickler_Read(self, &s, nbytes) < 0)
5178 return -1;
5179
5180 size = calc_binsize(s, nbytes);
5181 if (size < 0) {
5182 PyErr_Format(PyExc_OverflowError,
5183 "BINUNICODE exceeds system's maximum size of %zd bytes",
5184 PY_SSIZE_T_MAX);
5185 return -1;
5186 }
5187
5188 if (_Unpickler_Read(self, &s, size) < 0)
5189 return -1;
5190
5191 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5192 if (str == NULL)
5193 return -1;
5194
5195 PDATA_PUSH(self->stack, str, -1);
5196 return 0;
5197 }
5198
5199 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5200 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5201 {
5202 PyObject *tuple;
5203
5204 if (Py_SIZE(self->stack) < len)
5205 return Pdata_stack_underflow(self->stack);
5206
5207 tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5208 if (tuple == NULL)
5209 return -1;
5210 PDATA_PUSH(self->stack, tuple, -1);
5211 return 0;
5212 }
5213
5214 static int
load_tuple(UnpicklerObject * self)5215 load_tuple(UnpicklerObject *self)
5216 {
5217 Py_ssize_t i;
5218
5219 if ((i = marker(self)) < 0)
5220 return -1;
5221
5222 return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5223 }
5224
5225 static int
load_empty_list(UnpicklerObject * self)5226 load_empty_list(UnpicklerObject *self)
5227 {
5228 PyObject *list;
5229
5230 if ((list = PyList_New(0)) == NULL)
5231 return -1;
5232 PDATA_PUSH(self->stack, list, -1);
5233 return 0;
5234 }
5235
5236 static int
load_empty_dict(UnpicklerObject * self)5237 load_empty_dict(UnpicklerObject *self)
5238 {
5239 PyObject *dict;
5240
5241 if ((dict = PyDict_New()) == NULL)
5242 return -1;
5243 PDATA_PUSH(self->stack, dict, -1);
5244 return 0;
5245 }
5246
5247 static int
load_empty_set(UnpicklerObject * self)5248 load_empty_set(UnpicklerObject *self)
5249 {
5250 PyObject *set;
5251
5252 if ((set = PySet_New(NULL)) == NULL)
5253 return -1;
5254 PDATA_PUSH(self->stack, set, -1);
5255 return 0;
5256 }
5257
5258 static int
load_list(UnpicklerObject * self)5259 load_list(UnpicklerObject *self)
5260 {
5261 PyObject *list;
5262 Py_ssize_t i;
5263
5264 if ((i = marker(self)) < 0)
5265 return -1;
5266
5267 list = Pdata_poplist(self->stack, i);
5268 if (list == NULL)
5269 return -1;
5270 PDATA_PUSH(self->stack, list, -1);
5271 return 0;
5272 }
5273
5274 static int
load_dict(UnpicklerObject * self)5275 load_dict(UnpicklerObject *self)
5276 {
5277 PyObject *dict, *key, *value;
5278 Py_ssize_t i, j, k;
5279
5280 if ((i = marker(self)) < 0)
5281 return -1;
5282 j = Py_SIZE(self->stack);
5283
5284 if ((dict = PyDict_New()) == NULL)
5285 return -1;
5286
5287 if ((j - i) % 2 != 0) {
5288 PickleState *st = _Pickle_GetGlobalState();
5289 PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5290 Py_DECREF(dict);
5291 return -1;
5292 }
5293
5294 for (k = i + 1; k < j; k += 2) {
5295 key = self->stack->data[k - 1];
5296 value = self->stack->data[k];
5297 if (PyDict_SetItem(dict, key, value) < 0) {
5298 Py_DECREF(dict);
5299 return -1;
5300 }
5301 }
5302 Pdata_clear(self->stack, i);
5303 PDATA_PUSH(self->stack, dict, -1);
5304 return 0;
5305 }
5306
5307 static int
load_frozenset(UnpicklerObject * self)5308 load_frozenset(UnpicklerObject *self)
5309 {
5310 PyObject *items;
5311 PyObject *frozenset;
5312 Py_ssize_t i;
5313
5314 if ((i = marker(self)) < 0)
5315 return -1;
5316
5317 items = Pdata_poptuple(self->stack, i);
5318 if (items == NULL)
5319 return -1;
5320
5321 frozenset = PyFrozenSet_New(items);
5322 Py_DECREF(items);
5323 if (frozenset == NULL)
5324 return -1;
5325
5326 PDATA_PUSH(self->stack, frozenset, -1);
5327 return 0;
5328 }
5329
5330 static PyObject *
instantiate(PyObject * cls,PyObject * args)5331 instantiate(PyObject *cls, PyObject *args)
5332 {
5333 /* Caller must assure args are a tuple. Normally, args come from
5334 Pdata_poptuple which packs objects from the top of the stack
5335 into a newly created tuple. */
5336 assert(PyTuple_Check(args));
5337 if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5338 _Py_IDENTIFIER(__getinitargs__);
5339 _Py_IDENTIFIER(__new__);
5340 PyObject *func;
5341 if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5342 return NULL;
5343 }
5344 if (func == NULL) {
5345 return _PyObject_CallMethodIdObjArgs(cls, &PyId___new__, cls, NULL);
5346 }
5347 Py_DECREF(func);
5348 }
5349 return PyObject_CallObject(cls, args);
5350 }
5351
5352 static int
load_obj(UnpicklerObject * self)5353 load_obj(UnpicklerObject *self)
5354 {
5355 PyObject *cls, *args, *obj = NULL;
5356 Py_ssize_t i;
5357
5358 if ((i = marker(self)) < 0)
5359 return -1;
5360
5361 if (Py_SIZE(self->stack) - i < 1)
5362 return Pdata_stack_underflow(self->stack);
5363
5364 args = Pdata_poptuple(self->stack, i + 1);
5365 if (args == NULL)
5366 return -1;
5367
5368 PDATA_POP(self->stack, cls);
5369 if (cls) {
5370 obj = instantiate(cls, args);
5371 Py_DECREF(cls);
5372 }
5373 Py_DECREF(args);
5374 if (obj == NULL)
5375 return -1;
5376
5377 PDATA_PUSH(self->stack, obj, -1);
5378 return 0;
5379 }
5380
5381 static int
load_inst(UnpicklerObject * self)5382 load_inst(UnpicklerObject *self)
5383 {
5384 PyObject *cls = NULL;
5385 PyObject *args = NULL;
5386 PyObject *obj = NULL;
5387 PyObject *module_name;
5388 PyObject *class_name;
5389 Py_ssize_t len;
5390 Py_ssize_t i;
5391 char *s;
5392
5393 if ((i = marker(self)) < 0)
5394 return -1;
5395 if ((len = _Unpickler_Readline(self, &s)) < 0)
5396 return -1;
5397 if (len < 2)
5398 return bad_readline();
5399
5400 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5401 identifiers are permitted in Python 3.0, since the INST opcode is only
5402 supported by older protocols on Python 2.x. */
5403 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5404 if (module_name == NULL)
5405 return -1;
5406
5407 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5408 if (len < 2) {
5409 Py_DECREF(module_name);
5410 return bad_readline();
5411 }
5412 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5413 if (class_name != NULL) {
5414 cls = find_class(self, module_name, class_name);
5415 Py_DECREF(class_name);
5416 }
5417 }
5418 Py_DECREF(module_name);
5419
5420 if (cls == NULL)
5421 return -1;
5422
5423 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5424 obj = instantiate(cls, args);
5425 Py_DECREF(args);
5426 }
5427 Py_DECREF(cls);
5428
5429 if (obj == NULL)
5430 return -1;
5431
5432 PDATA_PUSH(self->stack, obj, -1);
5433 return 0;
5434 }
5435
5436 static int
load_newobj(UnpicklerObject * self)5437 load_newobj(UnpicklerObject *self)
5438 {
5439 PyObject *args = NULL;
5440 PyObject *clsraw = NULL;
5441 PyTypeObject *cls; /* clsraw cast to its true type */
5442 PyObject *obj;
5443 PickleState *st = _Pickle_GetGlobalState();
5444
5445 /* Stack is ... cls argtuple, and we want to call
5446 * cls.__new__(cls, *argtuple).
5447 */
5448 PDATA_POP(self->stack, args);
5449 if (args == NULL)
5450 goto error;
5451 if (!PyTuple_Check(args)) {
5452 PyErr_SetString(st->UnpicklingError,
5453 "NEWOBJ expected an arg " "tuple.");
5454 goto error;
5455 }
5456
5457 PDATA_POP(self->stack, clsraw);
5458 cls = (PyTypeObject *)clsraw;
5459 if (cls == NULL)
5460 goto error;
5461 if (!PyType_Check(cls)) {
5462 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5463 "isn't a type object");
5464 goto error;
5465 }
5466 if (cls->tp_new == NULL) {
5467 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5468 "has NULL tp_new");
5469 goto error;
5470 }
5471
5472 /* Call __new__. */
5473 obj = cls->tp_new(cls, args, NULL);
5474 if (obj == NULL)
5475 goto error;
5476
5477 Py_DECREF(args);
5478 Py_DECREF(clsraw);
5479 PDATA_PUSH(self->stack, obj, -1);
5480 return 0;
5481
5482 error:
5483 Py_XDECREF(args);
5484 Py_XDECREF(clsraw);
5485 return -1;
5486 }
5487
5488 static int
load_newobj_ex(UnpicklerObject * self)5489 load_newobj_ex(UnpicklerObject *self)
5490 {
5491 PyObject *cls, *args, *kwargs;
5492 PyObject *obj;
5493 PickleState *st = _Pickle_GetGlobalState();
5494
5495 PDATA_POP(self->stack, kwargs);
5496 if (kwargs == NULL) {
5497 return -1;
5498 }
5499 PDATA_POP(self->stack, args);
5500 if (args == NULL) {
5501 Py_DECREF(kwargs);
5502 return -1;
5503 }
5504 PDATA_POP(self->stack, cls);
5505 if (cls == NULL) {
5506 Py_DECREF(kwargs);
5507 Py_DECREF(args);
5508 return -1;
5509 }
5510
5511 if (!PyType_Check(cls)) {
5512 Py_DECREF(kwargs);
5513 Py_DECREF(args);
5514 PyErr_Format(st->UnpicklingError,
5515 "NEWOBJ_EX class argument must be a type, not %.200s",
5516 Py_TYPE(cls)->tp_name);
5517 Py_DECREF(cls);
5518 return -1;
5519 }
5520
5521 if (((PyTypeObject *)cls)->tp_new == NULL) {
5522 Py_DECREF(kwargs);
5523 Py_DECREF(args);
5524 Py_DECREF(cls);
5525 PyErr_SetString(st->UnpicklingError,
5526 "NEWOBJ_EX class argument doesn't have __new__");
5527 return -1;
5528 }
5529 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5530 Py_DECREF(kwargs);
5531 Py_DECREF(args);
5532 Py_DECREF(cls);
5533 if (obj == NULL) {
5534 return -1;
5535 }
5536 PDATA_PUSH(self->stack, obj, -1);
5537 return 0;
5538 }
5539
5540 static int
load_global(UnpicklerObject * self)5541 load_global(UnpicklerObject *self)
5542 {
5543 PyObject *global = NULL;
5544 PyObject *module_name;
5545 PyObject *global_name;
5546 Py_ssize_t len;
5547 char *s;
5548
5549 if ((len = _Unpickler_Readline(self, &s)) < 0)
5550 return -1;
5551 if (len < 2)
5552 return bad_readline();
5553 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5554 if (!module_name)
5555 return -1;
5556
5557 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5558 if (len < 2) {
5559 Py_DECREF(module_name);
5560 return bad_readline();
5561 }
5562 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5563 if (global_name) {
5564 global = find_class(self, module_name, global_name);
5565 Py_DECREF(global_name);
5566 }
5567 }
5568 Py_DECREF(module_name);
5569
5570 if (global == NULL)
5571 return -1;
5572 PDATA_PUSH(self->stack, global, -1);
5573 return 0;
5574 }
5575
5576 static int
load_stack_global(UnpicklerObject * self)5577 load_stack_global(UnpicklerObject *self)
5578 {
5579 PyObject *global;
5580 PyObject *module_name;
5581 PyObject *global_name;
5582
5583 PDATA_POP(self->stack, global_name);
5584 PDATA_POP(self->stack, module_name);
5585 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
5586 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
5587 PickleState *st = _Pickle_GetGlobalState();
5588 PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
5589 Py_XDECREF(global_name);
5590 Py_XDECREF(module_name);
5591 return -1;
5592 }
5593 global = find_class(self, module_name, global_name);
5594 Py_DECREF(global_name);
5595 Py_DECREF(module_name);
5596 if (global == NULL)
5597 return -1;
5598 PDATA_PUSH(self->stack, global, -1);
5599 return 0;
5600 }
5601
5602 static int
load_persid(UnpicklerObject * self)5603 load_persid(UnpicklerObject *self)
5604 {
5605 PyObject *pid, *obj;
5606 Py_ssize_t len;
5607 char *s;
5608
5609 if (self->pers_func) {
5610 if ((len = _Unpickler_Readline(self, &s)) < 0)
5611 return -1;
5612 if (len < 1)
5613 return bad_readline();
5614
5615 pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
5616 if (pid == NULL) {
5617 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
5618 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
5619 "persistent IDs in protocol 0 must be "
5620 "ASCII strings");
5621 }
5622 return -1;
5623 }
5624
5625 obj = call_method(self->pers_func, self->pers_func_self, pid);
5626 Py_DECREF(pid);
5627 if (obj == NULL)
5628 return -1;
5629
5630 PDATA_PUSH(self->stack, obj, -1);
5631 return 0;
5632 }
5633 else {
5634 PickleState *st = _Pickle_GetGlobalState();
5635 PyErr_SetString(st->UnpicklingError,
5636 "A load persistent id instruction was encountered,\n"
5637 "but no persistent_load function was specified.");
5638 return -1;
5639 }
5640 }
5641
5642 static int
load_binpersid(UnpicklerObject * self)5643 load_binpersid(UnpicklerObject *self)
5644 {
5645 PyObject *pid, *obj;
5646
5647 if (self->pers_func) {
5648 PDATA_POP(self->stack, pid);
5649 if (pid == NULL)
5650 return -1;
5651
5652 obj = call_method(self->pers_func, self->pers_func_self, pid);
5653 Py_DECREF(pid);
5654 if (obj == NULL)
5655 return -1;
5656
5657 PDATA_PUSH(self->stack, obj, -1);
5658 return 0;
5659 }
5660 else {
5661 PickleState *st = _Pickle_GetGlobalState();
5662 PyErr_SetString(st->UnpicklingError,
5663 "A load persistent id instruction was encountered,\n"
5664 "but no persistent_load function was specified.");
5665 return -1;
5666 }
5667 }
5668
5669 static int
load_pop(UnpicklerObject * self)5670 load_pop(UnpicklerObject *self)
5671 {
5672 Py_ssize_t len = Py_SIZE(self->stack);
5673
5674 /* Note that we split the (pickle.py) stack into two stacks,
5675 * an object stack and a mark stack. We have to be clever and
5676 * pop the right one. We do this by looking at the top of the
5677 * mark stack first, and only signalling a stack underflow if
5678 * the object stack is empty and the mark stack doesn't match
5679 * our expectations.
5680 */
5681 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
5682 self->num_marks--;
5683 self->stack->mark_set = self->num_marks != 0;
5684 self->stack->fence = self->num_marks ?
5685 self->marks[self->num_marks - 1] : 0;
5686 } else if (len <= self->stack->fence)
5687 return Pdata_stack_underflow(self->stack);
5688 else {
5689 len--;
5690 Py_DECREF(self->stack->data[len]);
5691 Py_SIZE(self->stack) = len;
5692 }
5693 return 0;
5694 }
5695
5696 static int
load_pop_mark(UnpicklerObject * self)5697 load_pop_mark(UnpicklerObject *self)
5698 {
5699 Py_ssize_t i;
5700
5701 if ((i = marker(self)) < 0)
5702 return -1;
5703
5704 Pdata_clear(self->stack, i);
5705
5706 return 0;
5707 }
5708
5709 static int
load_dup(UnpicklerObject * self)5710 load_dup(UnpicklerObject *self)
5711 {
5712 PyObject *last;
5713 Py_ssize_t len = Py_SIZE(self->stack);
5714
5715 if (len <= self->stack->fence)
5716 return Pdata_stack_underflow(self->stack);
5717 last = self->stack->data[len - 1];
5718 PDATA_APPEND(self->stack, last, -1);
5719 return 0;
5720 }
5721
5722 static int
load_get(UnpicklerObject * self)5723 load_get(UnpicklerObject *self)
5724 {
5725 PyObject *key, *value;
5726 Py_ssize_t idx;
5727 Py_ssize_t len;
5728 char *s;
5729
5730 if ((len = _Unpickler_Readline(self, &s)) < 0)
5731 return -1;
5732 if (len < 2)
5733 return bad_readline();
5734
5735 key = PyLong_FromString(s, NULL, 10);
5736 if (key == NULL)
5737 return -1;
5738 idx = PyLong_AsSsize_t(key);
5739 if (idx == -1 && PyErr_Occurred()) {
5740 Py_DECREF(key);
5741 return -1;
5742 }
5743
5744 value = _Unpickler_MemoGet(self, idx);
5745 if (value == NULL) {
5746 if (!PyErr_Occurred())
5747 PyErr_SetObject(PyExc_KeyError, key);
5748 Py_DECREF(key);
5749 return -1;
5750 }
5751 Py_DECREF(key);
5752
5753 PDATA_APPEND(self->stack, value, -1);
5754 return 0;
5755 }
5756
5757 static int
load_binget(UnpicklerObject * self)5758 load_binget(UnpicklerObject *self)
5759 {
5760 PyObject *value;
5761 Py_ssize_t idx;
5762 char *s;
5763
5764 if (_Unpickler_Read(self, &s, 1) < 0)
5765 return -1;
5766
5767 idx = Py_CHARMASK(s[0]);
5768
5769 value = _Unpickler_MemoGet(self, idx);
5770 if (value == NULL) {
5771 PyObject *key = PyLong_FromSsize_t(idx);
5772 if (key != NULL) {
5773 PyErr_SetObject(PyExc_KeyError, key);
5774 Py_DECREF(key);
5775 }
5776 return -1;
5777 }
5778
5779 PDATA_APPEND(self->stack, value, -1);
5780 return 0;
5781 }
5782
5783 static int
load_long_binget(UnpicklerObject * self)5784 load_long_binget(UnpicklerObject *self)
5785 {
5786 PyObject *value;
5787 Py_ssize_t idx;
5788 char *s;
5789
5790 if (_Unpickler_Read(self, &s, 4) < 0)
5791 return -1;
5792
5793 idx = calc_binsize(s, 4);
5794
5795 value = _Unpickler_MemoGet(self, idx);
5796 if (value == NULL) {
5797 PyObject *key = PyLong_FromSsize_t(idx);
5798 if (key != NULL) {
5799 PyErr_SetObject(PyExc_KeyError, key);
5800 Py_DECREF(key);
5801 }
5802 return -1;
5803 }
5804
5805 PDATA_APPEND(self->stack, value, -1);
5806 return 0;
5807 }
5808
5809 /* Push an object from the extension registry (EXT[124]). nbytes is
5810 * the number of bytes following the opcode, holding the index (code) value.
5811 */
5812 static int
load_extension(UnpicklerObject * self,int nbytes)5813 load_extension(UnpicklerObject *self, int nbytes)
5814 {
5815 char *codebytes; /* the nbytes bytes after the opcode */
5816 long code; /* calc_binint returns long */
5817 PyObject *py_code; /* code as a Python int */
5818 PyObject *obj; /* the object to push */
5819 PyObject *pair; /* (module_name, class_name) */
5820 PyObject *module_name, *class_name;
5821 PickleState *st = _Pickle_GetGlobalState();
5822
5823 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
5824 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
5825 return -1;
5826 code = calc_binint(codebytes, nbytes);
5827 if (code <= 0) { /* note that 0 is forbidden */
5828 /* Corrupt or hostile pickle. */
5829 PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
5830 return -1;
5831 }
5832
5833 /* Look for the code in the cache. */
5834 py_code = PyLong_FromLong(code);
5835 if (py_code == NULL)
5836 return -1;
5837 obj = PyDict_GetItemWithError(st->extension_cache, py_code);
5838 if (obj != NULL) {
5839 /* Bingo. */
5840 Py_DECREF(py_code);
5841 PDATA_APPEND(self->stack, obj, -1);
5842 return 0;
5843 }
5844 if (PyErr_Occurred()) {
5845 Py_DECREF(py_code);
5846 return -1;
5847 }
5848
5849 /* Look up the (module_name, class_name) pair. */
5850 pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
5851 if (pair == NULL) {
5852 Py_DECREF(py_code);
5853 if (!PyErr_Occurred()) {
5854 PyErr_Format(PyExc_ValueError, "unregistered extension "
5855 "code %ld", code);
5856 }
5857 return -1;
5858 }
5859 /* Since the extension registry is manipulable via Python code,
5860 * confirm that pair is really a 2-tuple of strings.
5861 */
5862 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
5863 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
5864 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
5865 Py_DECREF(py_code);
5866 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
5867 "isn't a 2-tuple of strings", code);
5868 return -1;
5869 }
5870 /* Load the object. */
5871 obj = find_class(self, module_name, class_name);
5872 if (obj == NULL) {
5873 Py_DECREF(py_code);
5874 return -1;
5875 }
5876 /* Cache code -> obj. */
5877 code = PyDict_SetItem(st->extension_cache, py_code, obj);
5878 Py_DECREF(py_code);
5879 if (code < 0) {
5880 Py_DECREF(obj);
5881 return -1;
5882 }
5883 PDATA_PUSH(self->stack, obj, -1);
5884 return 0;
5885 }
5886
5887 static int
load_put(UnpicklerObject * self)5888 load_put(UnpicklerObject *self)
5889 {
5890 PyObject *key, *value;
5891 Py_ssize_t idx;
5892 Py_ssize_t len;
5893 char *s = NULL;
5894
5895 if ((len = _Unpickler_Readline(self, &s)) < 0)
5896 return -1;
5897 if (len < 2)
5898 return bad_readline();
5899 if (Py_SIZE(self->stack) <= self->stack->fence)
5900 return Pdata_stack_underflow(self->stack);
5901 value = self->stack->data[Py_SIZE(self->stack) - 1];
5902
5903 key = PyLong_FromString(s, NULL, 10);
5904 if (key == NULL)
5905 return -1;
5906 idx = PyLong_AsSsize_t(key);
5907 Py_DECREF(key);
5908 if (idx < 0) {
5909 if (!PyErr_Occurred())
5910 PyErr_SetString(PyExc_ValueError,
5911 "negative PUT argument");
5912 return -1;
5913 }
5914
5915 return _Unpickler_MemoPut(self, idx, value);
5916 }
5917
5918 static int
load_binput(UnpicklerObject * self)5919 load_binput(UnpicklerObject *self)
5920 {
5921 PyObject *value;
5922 Py_ssize_t idx;
5923 char *s;
5924
5925 if (_Unpickler_Read(self, &s, 1) < 0)
5926 return -1;
5927
5928 if (Py_SIZE(self->stack) <= self->stack->fence)
5929 return Pdata_stack_underflow(self->stack);
5930 value = self->stack->data[Py_SIZE(self->stack) - 1];
5931
5932 idx = Py_CHARMASK(s[0]);
5933
5934 return _Unpickler_MemoPut(self, idx, value);
5935 }
5936
5937 static int
load_long_binput(UnpicklerObject * self)5938 load_long_binput(UnpicklerObject *self)
5939 {
5940 PyObject *value;
5941 Py_ssize_t idx;
5942 char *s;
5943
5944 if (_Unpickler_Read(self, &s, 4) < 0)
5945 return -1;
5946
5947 if (Py_SIZE(self->stack) <= self->stack->fence)
5948 return Pdata_stack_underflow(self->stack);
5949 value = self->stack->data[Py_SIZE(self->stack) - 1];
5950
5951 idx = calc_binsize(s, 4);
5952 if (idx < 0) {
5953 PyErr_SetString(PyExc_ValueError,
5954 "negative LONG_BINPUT argument");
5955 return -1;
5956 }
5957
5958 return _Unpickler_MemoPut(self, idx, value);
5959 }
5960
5961 static int
load_memoize(UnpicklerObject * self)5962 load_memoize(UnpicklerObject *self)
5963 {
5964 PyObject *value;
5965
5966 if (Py_SIZE(self->stack) <= self->stack->fence)
5967 return Pdata_stack_underflow(self->stack);
5968 value = self->stack->data[Py_SIZE(self->stack) - 1];
5969
5970 return _Unpickler_MemoPut(self, self->memo_len, value);
5971 }
5972
5973 static int
do_append(UnpicklerObject * self,Py_ssize_t x)5974 do_append(UnpicklerObject *self, Py_ssize_t x)
5975 {
5976 PyObject *value;
5977 PyObject *slice;
5978 PyObject *list;
5979 PyObject *result;
5980 Py_ssize_t len, i;
5981
5982 len = Py_SIZE(self->stack);
5983 if (x > len || x <= self->stack->fence)
5984 return Pdata_stack_underflow(self->stack);
5985 if (len == x) /* nothing to do */
5986 return 0;
5987
5988 list = self->stack->data[x - 1];
5989
5990 if (PyList_CheckExact(list)) {
5991 Py_ssize_t list_len;
5992 int ret;
5993
5994 slice = Pdata_poplist(self->stack, x);
5995 if (!slice)
5996 return -1;
5997 list_len = PyList_GET_SIZE(list);
5998 ret = PyList_SetSlice(list, list_len, list_len, slice);
5999 Py_DECREF(slice);
6000 return ret;
6001 }
6002 else {
6003 PyObject *extend_func;
6004 _Py_IDENTIFIER(extend);
6005
6006 extend_func = _PyObject_GetAttrId(list, &PyId_extend);
6007 if (extend_func != NULL) {
6008 slice = Pdata_poplist(self->stack, x);
6009 if (!slice) {
6010 Py_DECREF(extend_func);
6011 return -1;
6012 }
6013 result = _Pickle_FastCall(extend_func, slice);
6014 Py_DECREF(extend_func);
6015 if (result == NULL)
6016 return -1;
6017 Py_DECREF(result);
6018 }
6019 else {
6020 PyObject *append_func;
6021 _Py_IDENTIFIER(append);
6022
6023 /* Even if the PEP 307 requires extend() and append() methods,
6024 fall back on append() if the object has no extend() method
6025 for backward compatibility. */
6026 PyErr_Clear();
6027 append_func = _PyObject_GetAttrId(list, &PyId_append);
6028 if (append_func == NULL)
6029 return -1;
6030 for (i = x; i < len; i++) {
6031 value = self->stack->data[i];
6032 result = _Pickle_FastCall(append_func, value);
6033 if (result == NULL) {
6034 Pdata_clear(self->stack, i + 1);
6035 Py_SIZE(self->stack) = x;
6036 Py_DECREF(append_func);
6037 return -1;
6038 }
6039 Py_DECREF(result);
6040 }
6041 Py_SIZE(self->stack) = x;
6042 Py_DECREF(append_func);
6043 }
6044 }
6045
6046 return 0;
6047 }
6048
6049 static int
load_append(UnpicklerObject * self)6050 load_append(UnpicklerObject *self)
6051 {
6052 if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6053 return Pdata_stack_underflow(self->stack);
6054 return do_append(self, Py_SIZE(self->stack) - 1);
6055 }
6056
6057 static int
load_appends(UnpicklerObject * self)6058 load_appends(UnpicklerObject *self)
6059 {
6060 Py_ssize_t i = marker(self);
6061 if (i < 0)
6062 return -1;
6063 return do_append(self, i);
6064 }
6065
6066 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)6067 do_setitems(UnpicklerObject *self, Py_ssize_t x)
6068 {
6069 PyObject *value, *key;
6070 PyObject *dict;
6071 Py_ssize_t len, i;
6072 int status = 0;
6073
6074 len = Py_SIZE(self->stack);
6075 if (x > len || x <= self->stack->fence)
6076 return Pdata_stack_underflow(self->stack);
6077 if (len == x) /* nothing to do */
6078 return 0;
6079 if ((len - x) % 2 != 0) {
6080 PickleState *st = _Pickle_GetGlobalState();
6081 /* Currupt or hostile pickle -- we never write one like this. */
6082 PyErr_SetString(st->UnpicklingError,
6083 "odd number of items for SETITEMS");
6084 return -1;
6085 }
6086
6087 /* Here, dict does not actually need to be a PyDict; it could be anything
6088 that supports the __setitem__ attribute. */
6089 dict = self->stack->data[x - 1];
6090
6091 for (i = x + 1; i < len; i += 2) {
6092 key = self->stack->data[i - 1];
6093 value = self->stack->data[i];
6094 if (PyObject_SetItem(dict, key, value) < 0) {
6095 status = -1;
6096 break;
6097 }
6098 }
6099
6100 Pdata_clear(self->stack, x);
6101 return status;
6102 }
6103
6104 static int
load_setitem(UnpicklerObject * self)6105 load_setitem(UnpicklerObject *self)
6106 {
6107 return do_setitems(self, Py_SIZE(self->stack) - 2);
6108 }
6109
6110 static int
load_setitems(UnpicklerObject * self)6111 load_setitems(UnpicklerObject *self)
6112 {
6113 Py_ssize_t i = marker(self);
6114 if (i < 0)
6115 return -1;
6116 return do_setitems(self, i);
6117 }
6118
6119 static int
load_additems(UnpicklerObject * self)6120 load_additems(UnpicklerObject *self)
6121 {
6122 PyObject *set;
6123 Py_ssize_t mark, len, i;
6124
6125 mark = marker(self);
6126 if (mark < 0)
6127 return -1;
6128 len = Py_SIZE(self->stack);
6129 if (mark > len || mark <= self->stack->fence)
6130 return Pdata_stack_underflow(self->stack);
6131 if (len == mark) /* nothing to do */
6132 return 0;
6133
6134 set = self->stack->data[mark - 1];
6135
6136 if (PySet_Check(set)) {
6137 PyObject *items;
6138 int status;
6139
6140 items = Pdata_poptuple(self->stack, mark);
6141 if (items == NULL)
6142 return -1;
6143
6144 status = _PySet_Update(set, items);
6145 Py_DECREF(items);
6146 return status;
6147 }
6148 else {
6149 PyObject *add_func;
6150 _Py_IDENTIFIER(add);
6151
6152 add_func = _PyObject_GetAttrId(set, &PyId_add);
6153 if (add_func == NULL)
6154 return -1;
6155 for (i = mark; i < len; i++) {
6156 PyObject *result;
6157 PyObject *item;
6158
6159 item = self->stack->data[i];
6160 result = _Pickle_FastCall(add_func, item);
6161 if (result == NULL) {
6162 Pdata_clear(self->stack, i + 1);
6163 Py_SIZE(self->stack) = mark;
6164 return -1;
6165 }
6166 Py_DECREF(result);
6167 }
6168 Py_SIZE(self->stack) = mark;
6169 }
6170
6171 return 0;
6172 }
6173
6174 static int
load_build(UnpicklerObject * self)6175 load_build(UnpicklerObject *self)
6176 {
6177 PyObject *state, *inst, *slotstate;
6178 PyObject *setstate;
6179 int status = 0;
6180 _Py_IDENTIFIER(__setstate__);
6181
6182 /* Stack is ... instance, state. We want to leave instance at
6183 * the stack top, possibly mutated via instance.__setstate__(state).
6184 */
6185 if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6186 return Pdata_stack_underflow(self->stack);
6187
6188 PDATA_POP(self->stack, state);
6189 if (state == NULL)
6190 return -1;
6191
6192 inst = self->stack->data[Py_SIZE(self->stack) - 1];
6193
6194 if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6195 Py_DECREF(state);
6196 return -1;
6197 }
6198 if (setstate != NULL) {
6199 PyObject *result;
6200
6201 /* The explicit __setstate__ is responsible for everything. */
6202 result = _Pickle_FastCall(setstate, state);
6203 Py_DECREF(setstate);
6204 if (result == NULL)
6205 return -1;
6206 Py_DECREF(result);
6207 return 0;
6208 }
6209
6210 /* A default __setstate__. First see whether state embeds a
6211 * slot state dict too (a proto 2 addition).
6212 */
6213 if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6214 PyObject *tmp = state;
6215
6216 state = PyTuple_GET_ITEM(tmp, 0);
6217 slotstate = PyTuple_GET_ITEM(tmp, 1);
6218 Py_INCREF(state);
6219 Py_INCREF(slotstate);
6220 Py_DECREF(tmp);
6221 }
6222 else
6223 slotstate = NULL;
6224
6225 /* Set inst.__dict__ from the state dict (if any). */
6226 if (state != Py_None) {
6227 PyObject *dict;
6228 PyObject *d_key, *d_value;
6229 Py_ssize_t i;
6230 _Py_IDENTIFIER(__dict__);
6231
6232 if (!PyDict_Check(state)) {
6233 PickleState *st = _Pickle_GetGlobalState();
6234 PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6235 goto error;
6236 }
6237 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
6238 if (dict == NULL)
6239 goto error;
6240
6241 i = 0;
6242 while (PyDict_Next(state, &i, &d_key, &d_value)) {
6243 /* normally the keys for instance attributes are
6244 interned. we should try to do that here. */
6245 Py_INCREF(d_key);
6246 if (PyUnicode_CheckExact(d_key))
6247 PyUnicode_InternInPlace(&d_key);
6248 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6249 Py_DECREF(d_key);
6250 goto error;
6251 }
6252 Py_DECREF(d_key);
6253 }
6254 Py_DECREF(dict);
6255 }
6256
6257 /* Also set instance attributes from the slotstate dict (if any). */
6258 if (slotstate != NULL) {
6259 PyObject *d_key, *d_value;
6260 Py_ssize_t i;
6261
6262 if (!PyDict_Check(slotstate)) {
6263 PickleState *st = _Pickle_GetGlobalState();
6264 PyErr_SetString(st->UnpicklingError,
6265 "slot state is not a dictionary");
6266 goto error;
6267 }
6268 i = 0;
6269 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6270 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6271 goto error;
6272 }
6273 }
6274
6275 if (0) {
6276 error:
6277 status = -1;
6278 }
6279
6280 Py_DECREF(state);
6281 Py_XDECREF(slotstate);
6282 return status;
6283 }
6284
6285 static int
load_mark(UnpicklerObject * self)6286 load_mark(UnpicklerObject *self)
6287 {
6288
6289 /* Note that we split the (pickle.py) stack into two stacks, an
6290 * object stack and a mark stack. Here we push a mark onto the
6291 * mark stack.
6292 */
6293
6294 if ((self->num_marks + 1) >= self->marks_size) {
6295 size_t alloc;
6296
6297 /* Use the size_t type to check for overflow. */
6298 alloc = ((size_t)self->num_marks << 1) + 20;
6299 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
6300 alloc <= ((size_t)self->num_marks + 1)) {
6301 PyErr_NoMemory();
6302 return -1;
6303 }
6304
6305 Py_ssize_t *marks_old = self->marks;
6306 PyMem_RESIZE(self->marks, Py_ssize_t, alloc);
6307 if (self->marks == NULL) {
6308 PyMem_FREE(marks_old);
6309 self->marks_size = 0;
6310 PyErr_NoMemory();
6311 return -1;
6312 }
6313 self->marks_size = (Py_ssize_t)alloc;
6314 }
6315
6316 self->stack->mark_set = 1;
6317 self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6318
6319 return 0;
6320 }
6321
6322 static int
load_reduce(UnpicklerObject * self)6323 load_reduce(UnpicklerObject *self)
6324 {
6325 PyObject *callable = NULL;
6326 PyObject *argtup = NULL;
6327 PyObject *obj = NULL;
6328
6329 PDATA_POP(self->stack, argtup);
6330 if (argtup == NULL)
6331 return -1;
6332 PDATA_POP(self->stack, callable);
6333 if (callable) {
6334 obj = PyObject_CallObject(callable, argtup);
6335 Py_DECREF(callable);
6336 }
6337 Py_DECREF(argtup);
6338
6339 if (obj == NULL)
6340 return -1;
6341
6342 PDATA_PUSH(self->stack, obj, -1);
6343 return 0;
6344 }
6345
6346 /* Just raises an error if we don't know the protocol specified. PROTO
6347 * is the first opcode for protocols >= 2.
6348 */
6349 static int
load_proto(UnpicklerObject * self)6350 load_proto(UnpicklerObject *self)
6351 {
6352 char *s;
6353 int i;
6354
6355 if (_Unpickler_Read(self, &s, 1) < 0)
6356 return -1;
6357
6358 i = (unsigned char)s[0];
6359 if (i <= HIGHEST_PROTOCOL) {
6360 self->proto = i;
6361 return 0;
6362 }
6363
6364 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6365 return -1;
6366 }
6367
6368 static int
load_frame(UnpicklerObject * self)6369 load_frame(UnpicklerObject *self)
6370 {
6371 char *s;
6372 Py_ssize_t frame_len;
6373
6374 if (_Unpickler_Read(self, &s, 8) < 0)
6375 return -1;
6376
6377 frame_len = calc_binsize(s, 8);
6378 if (frame_len < 0) {
6379 PyErr_Format(PyExc_OverflowError,
6380 "FRAME length exceeds system's maximum of %zd bytes",
6381 PY_SSIZE_T_MAX);
6382 return -1;
6383 }
6384
6385 if (_Unpickler_Read(self, &s, frame_len) < 0)
6386 return -1;
6387
6388 /* Rewind to start of frame */
6389 self->next_read_idx -= frame_len;
6390 return 0;
6391 }
6392
6393 static PyObject *
load(UnpicklerObject * self)6394 load(UnpicklerObject *self)
6395 {
6396 PyObject *value = NULL;
6397 char *s = NULL;
6398
6399 self->num_marks = 0;
6400 self->stack->mark_set = 0;
6401 self->stack->fence = 0;
6402 self->proto = 0;
6403 if (Py_SIZE(self->stack))
6404 Pdata_clear(self->stack, 0);
6405
6406 /* Convenient macros for the dispatch while-switch loop just below. */
6407 #define OP(opcode, load_func) \
6408 case opcode: if (load_func(self) < 0) break; continue;
6409
6410 #define OP_ARG(opcode, load_func, arg) \
6411 case opcode: if (load_func(self, (arg)) < 0) break; continue;
6412
6413 while (1) {
6414 if (_Unpickler_Read(self, &s, 1) < 0) {
6415 PickleState *st = _Pickle_GetGlobalState();
6416 if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6417 PyErr_Format(PyExc_EOFError, "Ran out of input");
6418 }
6419 return NULL;
6420 }
6421
6422 switch ((enum opcode)s[0]) {
6423 OP(NONE, load_none)
6424 OP(BININT, load_binint)
6425 OP(BININT1, load_binint1)
6426 OP(BININT2, load_binint2)
6427 OP(INT, load_int)
6428 OP(LONG, load_long)
6429 OP_ARG(LONG1, load_counted_long, 1)
6430 OP_ARG(LONG4, load_counted_long, 4)
6431 OP(FLOAT, load_float)
6432 OP(BINFLOAT, load_binfloat)
6433 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6434 OP_ARG(BINBYTES, load_counted_binbytes, 4)
6435 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6436 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6437 OP_ARG(BINSTRING, load_counted_binstring, 4)
6438 OP(STRING, load_string)
6439 OP(UNICODE, load_unicode)
6440 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6441 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6442 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6443 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6444 OP_ARG(TUPLE1, load_counted_tuple, 1)
6445 OP_ARG(TUPLE2, load_counted_tuple, 2)
6446 OP_ARG(TUPLE3, load_counted_tuple, 3)
6447 OP(TUPLE, load_tuple)
6448 OP(EMPTY_LIST, load_empty_list)
6449 OP(LIST, load_list)
6450 OP(EMPTY_DICT, load_empty_dict)
6451 OP(DICT, load_dict)
6452 OP(EMPTY_SET, load_empty_set)
6453 OP(ADDITEMS, load_additems)
6454 OP(FROZENSET, load_frozenset)
6455 OP(OBJ, load_obj)
6456 OP(INST, load_inst)
6457 OP(NEWOBJ, load_newobj)
6458 OP(NEWOBJ_EX, load_newobj_ex)
6459 OP(GLOBAL, load_global)
6460 OP(STACK_GLOBAL, load_stack_global)
6461 OP(APPEND, load_append)
6462 OP(APPENDS, load_appends)
6463 OP(BUILD, load_build)
6464 OP(DUP, load_dup)
6465 OP(BINGET, load_binget)
6466 OP(LONG_BINGET, load_long_binget)
6467 OP(GET, load_get)
6468 OP(MARK, load_mark)
6469 OP(BINPUT, load_binput)
6470 OP(LONG_BINPUT, load_long_binput)
6471 OP(PUT, load_put)
6472 OP(MEMOIZE, load_memoize)
6473 OP(POP, load_pop)
6474 OP(POP_MARK, load_pop_mark)
6475 OP(SETITEM, load_setitem)
6476 OP(SETITEMS, load_setitems)
6477 OP(PERSID, load_persid)
6478 OP(BINPERSID, load_binpersid)
6479 OP(REDUCE, load_reduce)
6480 OP(PROTO, load_proto)
6481 OP(FRAME, load_frame)
6482 OP_ARG(EXT1, load_extension, 1)
6483 OP_ARG(EXT2, load_extension, 2)
6484 OP_ARG(EXT4, load_extension, 4)
6485 OP_ARG(NEWTRUE, load_bool, Py_True)
6486 OP_ARG(NEWFALSE, load_bool, Py_False)
6487
6488 case STOP:
6489 break;
6490
6491 default:
6492 {
6493 PickleState *st = _Pickle_GetGlobalState();
6494 unsigned char c = (unsigned char) *s;
6495 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6496 PyErr_Format(st->UnpicklingError,
6497 "invalid load key, '%c'.", c);
6498 }
6499 else {
6500 PyErr_Format(st->UnpicklingError,
6501 "invalid load key, '\\x%02x'.", c);
6502 }
6503 return NULL;
6504 }
6505 }
6506
6507 break; /* and we are done! */
6508 }
6509
6510 if (PyErr_Occurred()) {
6511 return NULL;
6512 }
6513
6514 if (_Unpickler_SkipConsumed(self) < 0)
6515 return NULL;
6516
6517 PDATA_POP(self->stack, value);
6518 return value;
6519 }
6520
6521 /*[clinic input]
6522
6523 _pickle.Unpickler.load
6524
6525 Load a pickle.
6526
6527 Read a pickled object representation from the open file object given
6528 in the constructor, and return the reconstituted object hierarchy
6529 specified therein.
6530 [clinic start generated code]*/
6531
6532 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)6533 _pickle_Unpickler_load_impl(UnpicklerObject *self)
6534 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
6535 {
6536 UnpicklerObject *unpickler = (UnpicklerObject*)self;
6537
6538 /* Check whether the Unpickler was initialized correctly. This prevents
6539 segfaulting if a subclass overridden __init__ with a function that does
6540 not call Unpickler.__init__(). Here, we simply ensure that self->read
6541 is not NULL. */
6542 if (unpickler->read == NULL) {
6543 PickleState *st = _Pickle_GetGlobalState();
6544 PyErr_Format(st->UnpicklingError,
6545 "Unpickler.__init__() was not called by %s.__init__()",
6546 Py_TYPE(unpickler)->tp_name);
6547 return NULL;
6548 }
6549
6550 return load(unpickler);
6551 }
6552
6553 /* The name of find_class() is misleading. In newer pickle protocols, this
6554 function is used for loading any global (i.e., functions), not just
6555 classes. The name is kept only for backward compatibility. */
6556
6557 /*[clinic input]
6558
6559 _pickle.Unpickler.find_class
6560
6561 module_name: object
6562 global_name: object
6563 /
6564
6565 Return an object from a specified module.
6566
6567 If necessary, the module will be imported. Subclasses may override
6568 this method (e.g. to restrict unpickling of arbitrary classes and
6569 functions).
6570
6571 This method is called whenever a class or a function object is
6572 needed. Both arguments passed are str objects.
6573 [clinic start generated code]*/
6574
6575 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)6576 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
6577 PyObject *module_name,
6578 PyObject *global_name)
6579 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
6580 {
6581 PyObject *global;
6582 PyObject *module;
6583
6584 /* Try to map the old names used in Python 2.x to the new ones used in
6585 Python 3.x. We do this only with old pickle protocols and when the
6586 user has not disabled the feature. */
6587 if (self->proto < 3 && self->fix_imports) {
6588 PyObject *key;
6589 PyObject *item;
6590 PickleState *st = _Pickle_GetGlobalState();
6591
6592 /* Check if the global (i.e., a function or a class) was renamed
6593 or moved to another module. */
6594 key = PyTuple_Pack(2, module_name, global_name);
6595 if (key == NULL)
6596 return NULL;
6597 item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
6598 Py_DECREF(key);
6599 if (item) {
6600 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
6601 PyErr_Format(PyExc_RuntimeError,
6602 "_compat_pickle.NAME_MAPPING values should be "
6603 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
6604 return NULL;
6605 }
6606 module_name = PyTuple_GET_ITEM(item, 0);
6607 global_name = PyTuple_GET_ITEM(item, 1);
6608 if (!PyUnicode_Check(module_name) ||
6609 !PyUnicode_Check(global_name)) {
6610 PyErr_Format(PyExc_RuntimeError,
6611 "_compat_pickle.NAME_MAPPING values should be "
6612 "pairs of str, not (%.200s, %.200s)",
6613 Py_TYPE(module_name)->tp_name,
6614 Py_TYPE(global_name)->tp_name);
6615 return NULL;
6616 }
6617 }
6618 else if (PyErr_Occurred()) {
6619 return NULL;
6620 }
6621 else {
6622 /* Check if the module was renamed. */
6623 item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
6624 if (item) {
6625 if (!PyUnicode_Check(item)) {
6626 PyErr_Format(PyExc_RuntimeError,
6627 "_compat_pickle.IMPORT_MAPPING values should be "
6628 "strings, not %.200s", Py_TYPE(item)->tp_name);
6629 return NULL;
6630 }
6631 module_name = item;
6632 }
6633 else if (PyErr_Occurred()) {
6634 return NULL;
6635 }
6636 }
6637 }
6638
6639 /*
6640 * we don't use PyImport_GetModule here, because it can return partially-
6641 * initialised modules, which then cause the getattribute to fail.
6642 */
6643 module = PyImport_Import(module_name);
6644 if (module == NULL) {
6645 return NULL;
6646 }
6647 global = getattribute(module, global_name, self->proto >= 4);
6648 Py_DECREF(module);
6649 return global;
6650 }
6651
6652 /*[clinic input]
6653
6654 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
6655
6656 Returns size in memory, in bytes.
6657 [clinic start generated code]*/
6658
6659 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)6660 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
6661 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
6662 {
6663 Py_ssize_t res;
6664
6665 res = _PyObject_SIZE(Py_TYPE(self));
6666 if (self->memo != NULL)
6667 res += self->memo_size * sizeof(PyObject *);
6668 if (self->marks != NULL)
6669 res += self->marks_size * sizeof(Py_ssize_t);
6670 if (self->input_line != NULL)
6671 res += strlen(self->input_line) + 1;
6672 if (self->encoding != NULL)
6673 res += strlen(self->encoding) + 1;
6674 if (self->errors != NULL)
6675 res += strlen(self->errors) + 1;
6676 return res;
6677 }
6678
6679 static struct PyMethodDef Unpickler_methods[] = {
6680 _PICKLE_UNPICKLER_LOAD_METHODDEF
6681 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
6682 _PICKLE_UNPICKLER___SIZEOF___METHODDEF
6683 {NULL, NULL} /* sentinel */
6684 };
6685
6686 static void
Unpickler_dealloc(UnpicklerObject * self)6687 Unpickler_dealloc(UnpicklerObject *self)
6688 {
6689 PyObject_GC_UnTrack((PyObject *)self);
6690 Py_XDECREF(self->readline);
6691 Py_XDECREF(self->read);
6692 Py_XDECREF(self->peek);
6693 Py_XDECREF(self->stack);
6694 Py_XDECREF(self->pers_func);
6695 if (self->buffer.buf != NULL) {
6696 PyBuffer_Release(&self->buffer);
6697 self->buffer.buf = NULL;
6698 }
6699
6700 _Unpickler_MemoCleanup(self);
6701 PyMem_Free(self->marks);
6702 PyMem_Free(self->input_line);
6703 PyMem_Free(self->encoding);
6704 PyMem_Free(self->errors);
6705
6706 Py_TYPE(self)->tp_free((PyObject *)self);
6707 }
6708
6709 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)6710 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
6711 {
6712 Py_VISIT(self->readline);
6713 Py_VISIT(self->read);
6714 Py_VISIT(self->peek);
6715 Py_VISIT(self->stack);
6716 Py_VISIT(self->pers_func);
6717 return 0;
6718 }
6719
6720 static int
Unpickler_clear(UnpicklerObject * self)6721 Unpickler_clear(UnpicklerObject *self)
6722 {
6723 Py_CLEAR(self->readline);
6724 Py_CLEAR(self->read);
6725 Py_CLEAR(self->peek);
6726 Py_CLEAR(self->stack);
6727 Py_CLEAR(self->pers_func);
6728 if (self->buffer.buf != NULL) {
6729 PyBuffer_Release(&self->buffer);
6730 self->buffer.buf = NULL;
6731 }
6732
6733 _Unpickler_MemoCleanup(self);
6734 PyMem_Free(self->marks);
6735 self->marks = NULL;
6736 PyMem_Free(self->input_line);
6737 self->input_line = NULL;
6738 PyMem_Free(self->encoding);
6739 self->encoding = NULL;
6740 PyMem_Free(self->errors);
6741 self->errors = NULL;
6742
6743 return 0;
6744 }
6745
6746 /*[clinic input]
6747
6748 _pickle.Unpickler.__init__
6749
6750 file: object
6751 *
6752 fix_imports: bool = True
6753 encoding: str = 'ASCII'
6754 errors: str = 'strict'
6755
6756 This takes a binary file for reading a pickle data stream.
6757
6758 The protocol version of the pickle is detected automatically, so no
6759 protocol argument is needed. Bytes past the pickled object's
6760 representation are ignored.
6761
6762 The argument *file* must have two methods, a read() method that takes
6763 an integer argument, and a readline() method that requires no
6764 arguments. Both methods should return bytes. Thus *file* can be a
6765 binary file object opened for reading, an io.BytesIO object, or any
6766 other custom object that meets this interface.
6767
6768 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
6769 which are used to control compatibility support for pickle stream
6770 generated by Python 2. If *fix_imports* is True, pickle will try to
6771 map the old Python 2 names to the new names used in Python 3. The
6772 *encoding* and *errors* tell pickle how to decode 8-bit string
6773 instances pickled by Python 2; these default to 'ASCII' and 'strict',
6774 respectively. The *encoding* can be 'bytes' to read these 8-bit
6775 string instances as bytes objects.
6776 [clinic start generated code]*/
6777
6778 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors)6779 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
6780 int fix_imports, const char *encoding,
6781 const char *errors)
6782 /*[clinic end generated code: output=e2c8ce748edc57b0 input=f9b7da04f5f4f335]*/
6783 {
6784 _Py_IDENTIFIER(persistent_load);
6785
6786 /* In case of multiple __init__() calls, clear previous content. */
6787 if (self->read != NULL)
6788 (void)Unpickler_clear(self);
6789
6790 if (_Unpickler_SetInputStream(self, file) < 0)
6791 return -1;
6792
6793 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
6794 return -1;
6795
6796 self->fix_imports = fix_imports;
6797
6798 if (init_method_ref((PyObject *)self, &PyId_persistent_load,
6799 &self->pers_func, &self->pers_func_self) < 0)
6800 {
6801 return -1;
6802 }
6803
6804 self->stack = (Pdata *)Pdata_New();
6805 if (self->stack == NULL)
6806 return -1;
6807
6808 self->memo_size = 32;
6809 self->memo = _Unpickler_NewMemo(self->memo_size);
6810 if (self->memo == NULL)
6811 return -1;
6812
6813 self->proto = 0;
6814
6815 return 0;
6816 }
6817
6818
6819 /* Define a proxy object for the Unpickler's internal memo object. This is to
6820 * avoid breaking code like:
6821 * unpickler.memo.clear()
6822 * and
6823 * unpickler.memo = saved_memo
6824 * Is this a good idea? Not really, but we don't want to break code that uses
6825 * it. Note that we don't implement the entire mapping API here. This is
6826 * intentional, as these should be treated as black-box implementation details.
6827 *
6828 * We do, however, have to implement pickling/unpickling support because of
6829 * real-world code like cvs2svn.
6830 */
6831
6832 /*[clinic input]
6833 _pickle.UnpicklerMemoProxy.clear
6834
6835 Remove all items from memo.
6836 [clinic start generated code]*/
6837
6838 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)6839 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
6840 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
6841 {
6842 _Unpickler_MemoCleanup(self->unpickler);
6843 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
6844 if (self->unpickler->memo == NULL)
6845 return NULL;
6846 Py_RETURN_NONE;
6847 }
6848
6849 /*[clinic input]
6850 _pickle.UnpicklerMemoProxy.copy
6851
6852 Copy the memo to a new object.
6853 [clinic start generated code]*/
6854
6855 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)6856 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
6857 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
6858 {
6859 size_t i;
6860 PyObject *new_memo = PyDict_New();
6861 if (new_memo == NULL)
6862 return NULL;
6863
6864 for (i = 0; i < self->unpickler->memo_size; i++) {
6865 int status;
6866 PyObject *key, *value;
6867
6868 value = self->unpickler->memo[i];
6869 if (value == NULL)
6870 continue;
6871
6872 key = PyLong_FromSsize_t(i);
6873 if (key == NULL)
6874 goto error;
6875 status = PyDict_SetItem(new_memo, key, value);
6876 Py_DECREF(key);
6877 if (status < 0)
6878 goto error;
6879 }
6880 return new_memo;
6881
6882 error:
6883 Py_DECREF(new_memo);
6884 return NULL;
6885 }
6886
6887 /*[clinic input]
6888 _pickle.UnpicklerMemoProxy.__reduce__
6889
6890 Implement pickling support.
6891 [clinic start generated code]*/
6892
6893 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)6894 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
6895 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
6896 {
6897 PyObject *reduce_value;
6898 PyObject *constructor_args;
6899 PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
6900 if (contents == NULL)
6901 return NULL;
6902
6903 reduce_value = PyTuple_New(2);
6904 if (reduce_value == NULL) {
6905 Py_DECREF(contents);
6906 return NULL;
6907 }
6908 constructor_args = PyTuple_New(1);
6909 if (constructor_args == NULL) {
6910 Py_DECREF(contents);
6911 Py_DECREF(reduce_value);
6912 return NULL;
6913 }
6914 PyTuple_SET_ITEM(constructor_args, 0, contents);
6915 Py_INCREF((PyObject *)&PyDict_Type);
6916 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
6917 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
6918 return reduce_value;
6919 }
6920
6921 static PyMethodDef unpicklerproxy_methods[] = {
6922 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
6923 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
6924 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
6925 {NULL, NULL} /* sentinel */
6926 };
6927
6928 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)6929 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
6930 {
6931 PyObject_GC_UnTrack(self);
6932 Py_XDECREF(self->unpickler);
6933 PyObject_GC_Del((PyObject *)self);
6934 }
6935
6936 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)6937 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
6938 visitproc visit, void *arg)
6939 {
6940 Py_VISIT(self->unpickler);
6941 return 0;
6942 }
6943
6944 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)6945 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
6946 {
6947 Py_CLEAR(self->unpickler);
6948 return 0;
6949 }
6950
6951 static PyTypeObject UnpicklerMemoProxyType = {
6952 PyVarObject_HEAD_INIT(NULL, 0)
6953 "_pickle.UnpicklerMemoProxy", /*tp_name*/
6954 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
6955 0,
6956 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
6957 0, /* tp_print */
6958 0, /* tp_getattr */
6959 0, /* tp_setattr */
6960 0, /* tp_compare */
6961 0, /* tp_repr */
6962 0, /* tp_as_number */
6963 0, /* tp_as_sequence */
6964 0, /* tp_as_mapping */
6965 PyObject_HashNotImplemented, /* tp_hash */
6966 0, /* tp_call */
6967 0, /* tp_str */
6968 PyObject_GenericGetAttr, /* tp_getattro */
6969 PyObject_GenericSetAttr, /* tp_setattro */
6970 0, /* tp_as_buffer */
6971 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6972 0, /* tp_doc */
6973 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
6974 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
6975 0, /* tp_richcompare */
6976 0, /* tp_weaklistoffset */
6977 0, /* tp_iter */
6978 0, /* tp_iternext */
6979 unpicklerproxy_methods, /* tp_methods */
6980 };
6981
6982 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)6983 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
6984 {
6985 UnpicklerMemoProxyObject *self;
6986
6987 self = PyObject_GC_New(UnpicklerMemoProxyObject,
6988 &UnpicklerMemoProxyType);
6989 if (self == NULL)
6990 return NULL;
6991 Py_INCREF(unpickler);
6992 self->unpickler = unpickler;
6993 PyObject_GC_Track(self);
6994 return (PyObject *)self;
6995 }
6996
6997 /*****************************************************************************/
6998
6999
7000 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7001 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7002 {
7003 return UnpicklerMemoProxy_New(self);
7004 }
7005
7006 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7007 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7008 {
7009 PyObject **new_memo;
7010 size_t new_memo_size = 0;
7011
7012 if (obj == NULL) {
7013 PyErr_SetString(PyExc_TypeError,
7014 "attribute deletion is not supported");
7015 return -1;
7016 }
7017
7018 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
7019 UnpicklerObject *unpickler =
7020 ((UnpicklerMemoProxyObject *)obj)->unpickler;
7021
7022 new_memo_size = unpickler->memo_size;
7023 new_memo = _Unpickler_NewMemo(new_memo_size);
7024 if (new_memo == NULL)
7025 return -1;
7026
7027 for (size_t i = 0; i < new_memo_size; i++) {
7028 Py_XINCREF(unpickler->memo[i]);
7029 new_memo[i] = unpickler->memo[i];
7030 }
7031 }
7032 else if (PyDict_Check(obj)) {
7033 Py_ssize_t i = 0;
7034 PyObject *key, *value;
7035
7036 new_memo_size = PyDict_GET_SIZE(obj);
7037 new_memo = _Unpickler_NewMemo(new_memo_size);
7038 if (new_memo == NULL)
7039 return -1;
7040
7041 while (PyDict_Next(obj, &i, &key, &value)) {
7042 Py_ssize_t idx;
7043 if (!PyLong_Check(key)) {
7044 PyErr_SetString(PyExc_TypeError,
7045 "memo key must be integers");
7046 goto error;
7047 }
7048 idx = PyLong_AsSsize_t(key);
7049 if (idx == -1 && PyErr_Occurred())
7050 goto error;
7051 if (idx < 0) {
7052 PyErr_SetString(PyExc_ValueError,
7053 "memo key must be positive integers.");
7054 goto error;
7055 }
7056 if (_Unpickler_MemoPut(self, idx, value) < 0)
7057 goto error;
7058 }
7059 }
7060 else {
7061 PyErr_Format(PyExc_TypeError,
7062 "'memo' attribute must be an UnpicklerMemoProxy object "
7063 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7064 return -1;
7065 }
7066
7067 _Unpickler_MemoCleanup(self);
7068 self->memo_size = new_memo_size;
7069 self->memo = new_memo;
7070
7071 return 0;
7072
7073 error:
7074 if (new_memo_size) {
7075 for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7076 Py_XDECREF(new_memo[i]);
7077 }
7078 PyMem_FREE(new_memo);
7079 }
7080 return -1;
7081 }
7082
7083 static PyObject *
Unpickler_get_persload(UnpicklerObject * self,void * Py_UNUSED (ignored))7084 Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7085 {
7086 if (self->pers_func == NULL) {
7087 PyErr_SetString(PyExc_AttributeError, "persistent_load");
7088 return NULL;
7089 }
7090 return reconstruct_method(self->pers_func, self->pers_func_self);
7091 }
7092
7093 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))7094 Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7095 {
7096 if (value == NULL) {
7097 PyErr_SetString(PyExc_TypeError,
7098 "attribute deletion is not supported");
7099 return -1;
7100 }
7101 if (!PyCallable_Check(value)) {
7102 PyErr_SetString(PyExc_TypeError,
7103 "persistent_load must be a callable taking "
7104 "one argument");
7105 return -1;
7106 }
7107
7108 self->pers_func_self = NULL;
7109 Py_INCREF(value);
7110 Py_XSETREF(self->pers_func, value);
7111
7112 return 0;
7113 }
7114
7115 static PyGetSetDef Unpickler_getsets[] = {
7116 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7117 {"persistent_load", (getter)Unpickler_get_persload,
7118 (setter)Unpickler_set_persload},
7119 {NULL}
7120 };
7121
7122 static PyTypeObject Unpickler_Type = {
7123 PyVarObject_HEAD_INIT(NULL, 0)
7124 "_pickle.Unpickler", /*tp_name*/
7125 sizeof(UnpicklerObject), /*tp_basicsize*/
7126 0, /*tp_itemsize*/
7127 (destructor)Unpickler_dealloc, /*tp_dealloc*/
7128 0, /*tp_print*/
7129 0, /*tp_getattr*/
7130 0, /*tp_setattr*/
7131 0, /*tp_reserved*/
7132 0, /*tp_repr*/
7133 0, /*tp_as_number*/
7134 0, /*tp_as_sequence*/
7135 0, /*tp_as_mapping*/
7136 0, /*tp_hash*/
7137 0, /*tp_call*/
7138 0, /*tp_str*/
7139 0, /*tp_getattro*/
7140 0, /*tp_setattro*/
7141 0, /*tp_as_buffer*/
7142 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7143 _pickle_Unpickler___init____doc__, /*tp_doc*/
7144 (traverseproc)Unpickler_traverse, /*tp_traverse*/
7145 (inquiry)Unpickler_clear, /*tp_clear*/
7146 0, /*tp_richcompare*/
7147 0, /*tp_weaklistoffset*/
7148 0, /*tp_iter*/
7149 0, /*tp_iternext*/
7150 Unpickler_methods, /*tp_methods*/
7151 0, /*tp_members*/
7152 Unpickler_getsets, /*tp_getset*/
7153 0, /*tp_base*/
7154 0, /*tp_dict*/
7155 0, /*tp_descr_get*/
7156 0, /*tp_descr_set*/
7157 0, /*tp_dictoffset*/
7158 _pickle_Unpickler___init__, /*tp_init*/
7159 PyType_GenericAlloc, /*tp_alloc*/
7160 PyType_GenericNew, /*tp_new*/
7161 PyObject_GC_Del, /*tp_free*/
7162 0, /*tp_is_gc*/
7163 };
7164
7165 /*[clinic input]
7166
7167 _pickle.dump
7168
7169 obj: object
7170 file: object
7171 protocol: object = NULL
7172 *
7173 fix_imports: bool = True
7174
7175 Write a pickled representation of obj to the open file object file.
7176
7177 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7178 be more efficient.
7179
7180 The optional *protocol* argument tells the pickler to use the given
7181 protocol supported protocols are 0, 1, 2, 3 and 4. The default
7182 protocol is 3; a backward-incompatible protocol designed for Python 3.
7183
7184 Specifying a negative protocol version selects the highest protocol
7185 version supported. The higher the protocol used, the more recent the
7186 version of Python needed to read the pickle produced.
7187
7188 The *file* argument must have a write() method that accepts a single
7189 bytes argument. It can thus be a file object opened for binary
7190 writing, an io.BytesIO instance, or any other custom object that meets
7191 this interface.
7192
7193 If *fix_imports* is True and protocol is less than 3, pickle will try
7194 to map the new Python 3 names to the old module names used in Python
7195 2, so that the pickle data stream is readable with Python 2.
7196 [clinic start generated code]*/
7197
7198 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports)7199 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7200 PyObject *protocol, int fix_imports)
7201 /*[clinic end generated code: output=a4774d5fde7d34de input=830f8a64cef6f042]*/
7202 {
7203 PicklerObject *pickler = _Pickler_New();
7204
7205 if (pickler == NULL)
7206 return NULL;
7207
7208 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7209 goto error;
7210
7211 if (_Pickler_SetOutputStream(pickler, file) < 0)
7212 goto error;
7213
7214 if (dump(pickler, obj) < 0)
7215 goto error;
7216
7217 if (_Pickler_FlushToFile(pickler) < 0)
7218 goto error;
7219
7220 Py_DECREF(pickler);
7221 Py_RETURN_NONE;
7222
7223 error:
7224 Py_XDECREF(pickler);
7225 return NULL;
7226 }
7227
7228 /*[clinic input]
7229
7230 _pickle.dumps
7231
7232 obj: object
7233 protocol: object = NULL
7234 *
7235 fix_imports: bool = True
7236
7237 Return the pickled representation of the object as a bytes object.
7238
7239 The optional *protocol* argument tells the pickler to use the given
7240 protocol; supported protocols are 0, 1, 2, 3 and 4. The default
7241 protocol is 3; a backward-incompatible protocol designed for Python 3.
7242
7243 Specifying a negative protocol version selects the highest protocol
7244 version supported. The higher the protocol used, the more recent the
7245 version of Python needed to read the pickle produced.
7246
7247 If *fix_imports* is True and *protocol* is less than 3, pickle will
7248 try to map the new Python 3 names to the old module names used in
7249 Python 2, so that the pickle data stream is readable with Python 2.
7250 [clinic start generated code]*/
7251
7252 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports)7253 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7254 int fix_imports)
7255 /*[clinic end generated code: output=d75d5cda456fd261 input=293dbeda181580b7]*/
7256 {
7257 PyObject *result;
7258 PicklerObject *pickler = _Pickler_New();
7259
7260 if (pickler == NULL)
7261 return NULL;
7262
7263 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7264 goto error;
7265
7266 if (dump(pickler, obj) < 0)
7267 goto error;
7268
7269 result = _Pickler_GetString(pickler);
7270 Py_DECREF(pickler);
7271 return result;
7272
7273 error:
7274 Py_XDECREF(pickler);
7275 return NULL;
7276 }
7277
7278 /*[clinic input]
7279
7280 _pickle.load
7281
7282 file: object
7283 *
7284 fix_imports: bool = True
7285 encoding: str = 'ASCII'
7286 errors: str = 'strict'
7287
7288 Read and return an object from the pickle data stored in a file.
7289
7290 This is equivalent to ``Unpickler(file).load()``, but may be more
7291 efficient.
7292
7293 The protocol version of the pickle is detected automatically, so no
7294 protocol argument is needed. Bytes past the pickled object's
7295 representation are ignored.
7296
7297 The argument *file* must have two methods, a read() method that takes
7298 an integer argument, and a readline() method that requires no
7299 arguments. Both methods should return bytes. Thus *file* can be a
7300 binary file object opened for reading, an io.BytesIO object, or any
7301 other custom object that meets this interface.
7302
7303 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7304 which are used to control compatibility support for pickle stream
7305 generated by Python 2. If *fix_imports* is True, pickle will try to
7306 map the old Python 2 names to the new names used in Python 3. The
7307 *encoding* and *errors* tell pickle how to decode 8-bit string
7308 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7309 respectively. The *encoding* can be 'bytes' to read these 8-bit
7310 string instances as bytes objects.
7311 [clinic start generated code]*/
7312
7313 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors)7314 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7315 const char *encoding, const char *errors)
7316 /*[clinic end generated code: output=69e298160285199e input=01b44dd3fc07afa7]*/
7317 {
7318 PyObject *result;
7319 UnpicklerObject *unpickler = _Unpickler_New();
7320
7321 if (unpickler == NULL)
7322 return NULL;
7323
7324 if (_Unpickler_SetInputStream(unpickler, file) < 0)
7325 goto error;
7326
7327 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7328 goto error;
7329
7330 unpickler->fix_imports = fix_imports;
7331
7332 result = load(unpickler);
7333 Py_DECREF(unpickler);
7334 return result;
7335
7336 error:
7337 Py_XDECREF(unpickler);
7338 return NULL;
7339 }
7340
7341 /*[clinic input]
7342
7343 _pickle.loads
7344
7345 data: object
7346 *
7347 fix_imports: bool = True
7348 encoding: str = 'ASCII'
7349 errors: str = 'strict'
7350
7351 Read and return an object from the given pickle data.
7352
7353 The protocol version of the pickle is detected automatically, so no
7354 protocol argument is needed. Bytes past the pickled object's
7355 representation are ignored.
7356
7357 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7358 which are used to control compatibility support for pickle stream
7359 generated by Python 2. If *fix_imports* is True, pickle will try to
7360 map the old Python 2 names to the new names used in Python 3. The
7361 *encoding* and *errors* tell pickle how to decode 8-bit string
7362 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7363 respectively. The *encoding* can be 'bytes' to read these 8-bit
7364 string instances as bytes objects.
7365 [clinic start generated code]*/
7366
7367 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors)7368 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7369 const char *encoding, const char *errors)
7370 /*[clinic end generated code: output=1e7cb2343f2c440f input=70605948a719feb9]*/
7371 {
7372 PyObject *result;
7373 UnpicklerObject *unpickler = _Unpickler_New();
7374
7375 if (unpickler == NULL)
7376 return NULL;
7377
7378 if (_Unpickler_SetStringInput(unpickler, data) < 0)
7379 goto error;
7380
7381 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7382 goto error;
7383
7384 unpickler->fix_imports = fix_imports;
7385
7386 result = load(unpickler);
7387 Py_DECREF(unpickler);
7388 return result;
7389
7390 error:
7391 Py_XDECREF(unpickler);
7392 return NULL;
7393 }
7394
7395 static struct PyMethodDef pickle_methods[] = {
7396 _PICKLE_DUMP_METHODDEF
7397 _PICKLE_DUMPS_METHODDEF
7398 _PICKLE_LOAD_METHODDEF
7399 _PICKLE_LOADS_METHODDEF
7400 {NULL, NULL} /* sentinel */
7401 };
7402
7403 static int
pickle_clear(PyObject * m)7404 pickle_clear(PyObject *m)
7405 {
7406 _Pickle_ClearState(_Pickle_GetState(m));
7407 return 0;
7408 }
7409
7410 static void
pickle_free(PyObject * m)7411 pickle_free(PyObject *m)
7412 {
7413 _Pickle_ClearState(_Pickle_GetState(m));
7414 }
7415
7416 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7417 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7418 {
7419 PickleState *st = _Pickle_GetState(m);
7420 Py_VISIT(st->PickleError);
7421 Py_VISIT(st->PicklingError);
7422 Py_VISIT(st->UnpicklingError);
7423 Py_VISIT(st->dispatch_table);
7424 Py_VISIT(st->extension_registry);
7425 Py_VISIT(st->extension_cache);
7426 Py_VISIT(st->inverted_registry);
7427 Py_VISIT(st->name_mapping_2to3);
7428 Py_VISIT(st->import_mapping_2to3);
7429 Py_VISIT(st->name_mapping_3to2);
7430 Py_VISIT(st->import_mapping_3to2);
7431 Py_VISIT(st->codecs_encode);
7432 Py_VISIT(st->getattr);
7433 return 0;
7434 }
7435
7436 static struct PyModuleDef _picklemodule = {
7437 PyModuleDef_HEAD_INIT,
7438 "_pickle", /* m_name */
7439 pickle_module_doc, /* m_doc */
7440 sizeof(PickleState), /* m_size */
7441 pickle_methods, /* m_methods */
7442 NULL, /* m_reload */
7443 pickle_traverse, /* m_traverse */
7444 pickle_clear, /* m_clear */
7445 (freefunc)pickle_free /* m_free */
7446 };
7447
7448 PyMODINIT_FUNC
PyInit__pickle(void)7449 PyInit__pickle(void)
7450 {
7451 PyObject *m;
7452 PickleState *st;
7453
7454 m = PyState_FindModule(&_picklemodule);
7455 if (m) {
7456 Py_INCREF(m);
7457 return m;
7458 }
7459
7460 if (PyType_Ready(&Unpickler_Type) < 0)
7461 return NULL;
7462 if (PyType_Ready(&Pickler_Type) < 0)
7463 return NULL;
7464 if (PyType_Ready(&Pdata_Type) < 0)
7465 return NULL;
7466 if (PyType_Ready(&PicklerMemoProxyType) < 0)
7467 return NULL;
7468 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7469 return NULL;
7470
7471 /* Create the module and add the functions. */
7472 m = PyModule_Create(&_picklemodule);
7473 if (m == NULL)
7474 return NULL;
7475
7476 Py_INCREF(&Pickler_Type);
7477 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
7478 return NULL;
7479 Py_INCREF(&Unpickler_Type);
7480 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
7481 return NULL;
7482
7483 st = _Pickle_GetState(m);
7484
7485 /* Initialize the exceptions. */
7486 st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7487 if (st->PickleError == NULL)
7488 return NULL;
7489 st->PicklingError = \
7490 PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7491 if (st->PicklingError == NULL)
7492 return NULL;
7493 st->UnpicklingError = \
7494 PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7495 if (st->UnpicklingError == NULL)
7496 return NULL;
7497
7498 Py_INCREF(st->PickleError);
7499 if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
7500 return NULL;
7501 Py_INCREF(st->PicklingError);
7502 if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
7503 return NULL;
7504 Py_INCREF(st->UnpicklingError);
7505 if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
7506 return NULL;
7507
7508 if (_Pickle_InitState(st) < 0)
7509 return NULL;
7510
7511 return m;
7512 }
7513