1 
2 /* Write Python objects to files and read them back.
3    This is primarily intended for writing and reading compiled Python code,
4    even though dicts, lists, sets and frozensets, not commonly seen in
5    code objects, are supported.
6    Version 3 of this protocol properly supports circular links
7    and sharing. */
8 
9 #define PY_SSIZE_T_CLEAN
10 
11 #include "Python.h"
12 #include "longintrepr.h"
13 #include "code.h"
14 #include "marshal.h"
15 #include "../Modules/hashtable.h"
16 
17 /*[clinic input]
18 module marshal
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
21 
22 #include "clinic/marshal.c.h"
23 
24 /* High water mark to determine when the marshalled object is dangerously deep
25  * and risks coring the interpreter.  When the object stack gets this deep,
26  * raise an exception instead of continuing.
27  * On Windows debug builds, reduce this value.
28  *
29  * BUG: https://bugs.python.org/issue33720
30  * On Windows PGO builds, the r_object function overallocates its stack and
31  * can cause a stack overflow. We reduce the maximum depth for all Windows
32  * releases to protect against this.
33  * #if defined(MS_WINDOWS) && defined(_DEBUG)
34  */
35 #if defined(MS_WINDOWS)
36 #define MAX_MARSHAL_STACK_DEPTH 1000
37 #else
38 #define MAX_MARSHAL_STACK_DEPTH 2000
39 #endif
40 
41 #define TYPE_NULL               '0'
42 #define TYPE_NONE               'N'
43 #define TYPE_FALSE              'F'
44 #define TYPE_TRUE               'T'
45 #define TYPE_STOPITER           'S'
46 #define TYPE_ELLIPSIS           '.'
47 #define TYPE_INT                'i'
48 /* TYPE_INT64 is not generated anymore.
49    Supported for backward compatibility only. */
50 #define TYPE_INT64              'I'
51 #define TYPE_FLOAT              'f'
52 #define TYPE_BINARY_FLOAT       'g'
53 #define TYPE_COMPLEX            'x'
54 #define TYPE_BINARY_COMPLEX     'y'
55 #define TYPE_LONG               'l'
56 #define TYPE_STRING             's'
57 #define TYPE_INTERNED           't'
58 #define TYPE_REF                'r'
59 #define TYPE_TUPLE              '('
60 #define TYPE_LIST               '['
61 #define TYPE_DICT               '{'
62 #define TYPE_CODE               'c'
63 #define TYPE_UNICODE            'u'
64 #define TYPE_UNKNOWN            '?'
65 #define TYPE_SET                '<'
66 #define TYPE_FROZENSET          '>'
67 #define FLAG_REF                '\x80' /* with a type, add obj to index */
68 
69 #define TYPE_ASCII              'a'
70 #define TYPE_ASCII_INTERNED     'A'
71 #define TYPE_SMALL_TUPLE        ')'
72 #define TYPE_SHORT_ASCII        'z'
73 #define TYPE_SHORT_ASCII_INTERNED 'Z'
74 
75 #define WFERR_OK 0
76 #define WFERR_UNMARSHALLABLE 1
77 #define WFERR_NESTEDTOODEEP 2
78 #define WFERR_NOMEMORY 3
79 
80 typedef struct {
81     FILE *fp;
82     int error;  /* see WFERR_* values */
83     int depth;
84     PyObject *str;
85     char *ptr;
86     char *end;
87     char *buf;
88     _Py_hashtable_t *hashtable;
89     int version;
90 } WFILE;
91 
92 #define w_byte(c, p) do {                               \
93         if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
94             *(p)->ptr++ = (c);                          \
95     } while(0)
96 
97 static void
w_flush(WFILE * p)98 w_flush(WFILE *p)
99 {
100     assert(p->fp != NULL);
101     fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
102     p->ptr = p->buf;
103 }
104 
105 static int
w_reserve(WFILE * p,Py_ssize_t needed)106 w_reserve(WFILE *p, Py_ssize_t needed)
107 {
108     Py_ssize_t pos, size, delta;
109     if (p->ptr == NULL)
110         return 0; /* An error already occurred */
111     if (p->fp != NULL) {
112         w_flush(p);
113         return needed <= p->end - p->ptr;
114     }
115     assert(p->str != NULL);
116     pos = p->ptr - p->buf;
117     size = PyBytes_Size(p->str);
118     if (size > 16*1024*1024)
119         delta = (size >> 3);            /* 12.5% overallocation */
120     else
121         delta = size + 1024;
122     delta = Py_MAX(delta, needed);
123     if (delta > PY_SSIZE_T_MAX - size) {
124         p->error = WFERR_NOMEMORY;
125         return 0;
126     }
127     size += delta;
128     if (_PyBytes_Resize(&p->str, size) != 0) {
129         p->ptr = p->buf = p->end = NULL;
130         return 0;
131     }
132     else {
133         p->buf = PyBytes_AS_STRING(p->str);
134         p->ptr = p->buf + pos;
135         p->end = p->buf + size;
136         return 1;
137     }
138 }
139 
140 static void
w_string(const char * s,Py_ssize_t n,WFILE * p)141 w_string(const char *s, Py_ssize_t n, WFILE *p)
142 {
143     Py_ssize_t m;
144     if (!n || p->ptr == NULL)
145         return;
146     m = p->end - p->ptr;
147     if (p->fp != NULL) {
148         if (n <= m) {
149             memcpy(p->ptr, s, n);
150             p->ptr += n;
151         }
152         else {
153             w_flush(p);
154             fwrite(s, 1, n, p->fp);
155         }
156     }
157     else {
158         if (n <= m || w_reserve(p, n - m)) {
159             memcpy(p->ptr, s, n);
160             p->ptr += n;
161         }
162     }
163 }
164 
165 static void
w_short(int x,WFILE * p)166 w_short(int x, WFILE *p)
167 {
168     w_byte((char)( x      & 0xff), p);
169     w_byte((char)((x>> 8) & 0xff), p);
170 }
171 
172 static void
w_long(long x,WFILE * p)173 w_long(long x, WFILE *p)
174 {
175     w_byte((char)( x      & 0xff), p);
176     w_byte((char)((x>> 8) & 0xff), p);
177     w_byte((char)((x>>16) & 0xff), p);
178     w_byte((char)((x>>24) & 0xff), p);
179 }
180 
181 #define SIZE32_MAX  0x7FFFFFFF
182 
183 #if SIZEOF_SIZE_T > 4
184 # define W_SIZE(n, p)  do {                     \
185         if ((n) > SIZE32_MAX) {                 \
186             (p)->depth--;                       \
187             (p)->error = WFERR_UNMARSHALLABLE;  \
188             return;                             \
189         }                                       \
190         w_long((long)(n), p);                   \
191     } while(0)
192 #else
193 # define W_SIZE  w_long
194 #endif
195 
196 static void
w_pstring(const char * s,Py_ssize_t n,WFILE * p)197 w_pstring(const char *s, Py_ssize_t n, WFILE *p)
198 {
199         W_SIZE(n, p);
200         w_string(s, n, p);
201 }
202 
203 static void
w_short_pstring(const char * s,Py_ssize_t n,WFILE * p)204 w_short_pstring(const char *s, Py_ssize_t n, WFILE *p)
205 {
206     w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
207     w_string(s, n, p);
208 }
209 
210 /* We assume that Python ints are stored internally in base some power of
211    2**15; for the sake of portability we'll always read and write them in base
212    exactly 2**15. */
213 
214 #define PyLong_MARSHAL_SHIFT 15
215 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
216 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
217 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
218 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
219 #endif
220 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
221 
222 #define W_TYPE(t, p) do { \
223     w_byte((t) | flag, (p)); \
224 } while(0)
225 
226 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)227 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
228 {
229     Py_ssize_t i, j, n, l;
230     digit d;
231 
232     W_TYPE(TYPE_LONG, p);
233     if (Py_SIZE(ob) == 0) {
234         w_long((long)0, p);
235         return;
236     }
237 
238     /* set l to number of base PyLong_MARSHAL_BASE digits */
239     n = Py_ABS(Py_SIZE(ob));
240     l = (n-1) * PyLong_MARSHAL_RATIO;
241     d = ob->ob_digit[n-1];
242     assert(d != 0); /* a PyLong is always normalized */
243     do {
244         d >>= PyLong_MARSHAL_SHIFT;
245         l++;
246     } while (d != 0);
247     if (l > SIZE32_MAX) {
248         p->depth--;
249         p->error = WFERR_UNMARSHALLABLE;
250         return;
251     }
252     w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
253 
254     for (i=0; i < n-1; i++) {
255         d = ob->ob_digit[i];
256         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
257             w_short(d & PyLong_MARSHAL_MASK, p);
258             d >>= PyLong_MARSHAL_SHIFT;
259         }
260         assert (d == 0);
261     }
262     d = ob->ob_digit[n-1];
263     do {
264         w_short(d & PyLong_MARSHAL_MASK, p);
265         d >>= PyLong_MARSHAL_SHIFT;
266     } while (d != 0);
267 }
268 
269 static int
w_ref(PyObject * v,char * flag,WFILE * p)270 w_ref(PyObject *v, char *flag, WFILE *p)
271 {
272     _Py_hashtable_entry_t *entry;
273     int w;
274 
275     if (p->version < 3 || p->hashtable == NULL)
276         return 0; /* not writing object references */
277 
278     /* if it has only one reference, it definitely isn't shared */
279     if (Py_REFCNT(v) == 1)
280         return 0;
281 
282     entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
283     if (entry != NULL) {
284         /* write the reference index to the stream */
285         _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, w);
286         /* we don't store "long" indices in the dict */
287         assert(0 <= w && w <= 0x7fffffff);
288         w_byte(TYPE_REF, p);
289         w_long(w, p);
290         return 1;
291     } else {
292         size_t s = p->hashtable->entries;
293         /* we don't support long indices */
294         if (s >= 0x7fffffff) {
295             PyErr_SetString(PyExc_ValueError, "too many objects");
296             goto err;
297         }
298         w = (int)s;
299         Py_INCREF(v);
300         if (_Py_HASHTABLE_SET(p->hashtable, v, w) < 0) {
301             Py_DECREF(v);
302             goto err;
303         }
304         *flag |= FLAG_REF;
305         return 0;
306     }
307 err:
308     p->error = WFERR_UNMARSHALLABLE;
309     return 1;
310 }
311 
312 static void
313 w_complex_object(PyObject *v, char flag, WFILE *p);
314 
315 static void
w_object(PyObject * v,WFILE * p)316 w_object(PyObject *v, WFILE *p)
317 {
318     char flag = '\0';
319 
320     p->depth++;
321 
322     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
323         p->error = WFERR_NESTEDTOODEEP;
324     }
325     else if (v == NULL) {
326         w_byte(TYPE_NULL, p);
327     }
328     else if (v == Py_None) {
329         w_byte(TYPE_NONE, p);
330     }
331     else if (v == PyExc_StopIteration) {
332         w_byte(TYPE_STOPITER, p);
333     }
334     else if (v == Py_Ellipsis) {
335         w_byte(TYPE_ELLIPSIS, p);
336     }
337     else if (v == Py_False) {
338         w_byte(TYPE_FALSE, p);
339     }
340     else if (v == Py_True) {
341         w_byte(TYPE_TRUE, p);
342     }
343     else if (!w_ref(v, &flag, p))
344         w_complex_object(v, flag, p);
345 
346     p->depth--;
347 }
348 
349 static void
w_complex_object(PyObject * v,char flag,WFILE * p)350 w_complex_object(PyObject *v, char flag, WFILE *p)
351 {
352     Py_ssize_t i, n;
353 
354     if (PyLong_CheckExact(v)) {
355         long x = PyLong_AsLong(v);
356         if ((x == -1)  && PyErr_Occurred()) {
357             PyLongObject *ob = (PyLongObject *)v;
358             PyErr_Clear();
359             w_PyLong(ob, flag, p);
360         }
361         else {
362 #if SIZEOF_LONG > 4
363             long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
364             if (y && y != -1) {
365                 /* Too large for TYPE_INT */
366                 w_PyLong((PyLongObject*)v, flag, p);
367             }
368             else
369 #endif
370             {
371                 W_TYPE(TYPE_INT, p);
372                 w_long(x, p);
373             }
374         }
375     }
376     else if (PyFloat_CheckExact(v)) {
377         if (p->version > 1) {
378             unsigned char buf[8];
379             if (_PyFloat_Pack8(PyFloat_AsDouble(v),
380                                buf, 1) < 0) {
381                 p->error = WFERR_UNMARSHALLABLE;
382                 return;
383             }
384             W_TYPE(TYPE_BINARY_FLOAT, p);
385             w_string((char*)buf, 8, p);
386         }
387         else {
388             char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
389                                               'g', 17, 0, NULL);
390             if (!buf) {
391                 p->error = WFERR_NOMEMORY;
392                 return;
393             }
394             n = strlen(buf);
395             W_TYPE(TYPE_FLOAT, p);
396             w_byte((int)n, p);
397             w_string(buf, n, p);
398             PyMem_Free(buf);
399         }
400     }
401     else if (PyComplex_CheckExact(v)) {
402         if (p->version > 1) {
403             unsigned char buf[8];
404             if (_PyFloat_Pack8(PyComplex_RealAsDouble(v),
405                                buf, 1) < 0) {
406                 p->error = WFERR_UNMARSHALLABLE;
407                 return;
408             }
409             W_TYPE(TYPE_BINARY_COMPLEX, p);
410             w_string((char*)buf, 8, p);
411             if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v),
412                                buf, 1) < 0) {
413                 p->error = WFERR_UNMARSHALLABLE;
414                 return;
415             }
416             w_string((char*)buf, 8, p);
417         }
418         else {
419             char *buf;
420             W_TYPE(TYPE_COMPLEX, p);
421             buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
422                                         'g', 17, 0, NULL);
423             if (!buf) {
424                 p->error = WFERR_NOMEMORY;
425                 return;
426             }
427             n = strlen(buf);
428             w_byte((int)n, p);
429             w_string(buf, n, p);
430             PyMem_Free(buf);
431             buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
432                                         'g', 17, 0, NULL);
433             if (!buf) {
434                 p->error = WFERR_NOMEMORY;
435                 return;
436             }
437             n = strlen(buf);
438             w_byte((int)n, p);
439             w_string(buf, n, p);
440             PyMem_Free(buf);
441         }
442     }
443     else if (PyBytes_CheckExact(v)) {
444         W_TYPE(TYPE_STRING, p);
445         w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
446     }
447     else if (PyUnicode_CheckExact(v)) {
448         if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
449             int is_short = PyUnicode_GET_LENGTH(v) < 256;
450             if (is_short) {
451                 if (PyUnicode_CHECK_INTERNED(v))
452                     W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
453                 else
454                     W_TYPE(TYPE_SHORT_ASCII, p);
455                 w_short_pstring((char *) PyUnicode_1BYTE_DATA(v),
456                                 PyUnicode_GET_LENGTH(v), p);
457             }
458             else {
459                 if (PyUnicode_CHECK_INTERNED(v))
460                     W_TYPE(TYPE_ASCII_INTERNED, p);
461                 else
462                     W_TYPE(TYPE_ASCII, p);
463                 w_pstring((char *) PyUnicode_1BYTE_DATA(v),
464                           PyUnicode_GET_LENGTH(v), p);
465             }
466         }
467         else {
468             PyObject *utf8;
469             utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
470             if (utf8 == NULL) {
471                 p->depth--;
472                 p->error = WFERR_UNMARSHALLABLE;
473                 return;
474             }
475             if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
476                 W_TYPE(TYPE_INTERNED, p);
477             else
478                 W_TYPE(TYPE_UNICODE, p);
479             w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
480             Py_DECREF(utf8);
481         }
482     }
483     else if (PyTuple_CheckExact(v)) {
484         n = PyTuple_Size(v);
485         if (p->version >= 4 && n < 256) {
486             W_TYPE(TYPE_SMALL_TUPLE, p);
487             w_byte((unsigned char)n, p);
488         }
489         else {
490             W_TYPE(TYPE_TUPLE, p);
491             W_SIZE(n, p);
492         }
493         for (i = 0; i < n; i++) {
494             w_object(PyTuple_GET_ITEM(v, i), p);
495         }
496     }
497     else if (PyList_CheckExact(v)) {
498         W_TYPE(TYPE_LIST, p);
499         n = PyList_GET_SIZE(v);
500         W_SIZE(n, p);
501         for (i = 0; i < n; i++) {
502             w_object(PyList_GET_ITEM(v, i), p);
503         }
504     }
505     else if (PyDict_CheckExact(v)) {
506         Py_ssize_t pos;
507         PyObject *key, *value;
508         W_TYPE(TYPE_DICT, p);
509         /* This one is NULL object terminated! */
510         pos = 0;
511         while (PyDict_Next(v, &pos, &key, &value)) {
512             w_object(key, p);
513             w_object(value, p);
514         }
515         w_object((PyObject *)NULL, p);
516     }
517     else if (PyAnySet_CheckExact(v)) {
518         PyObject *value, *it;
519 
520         if (PyObject_TypeCheck(v, &PySet_Type))
521             W_TYPE(TYPE_SET, p);
522         else
523             W_TYPE(TYPE_FROZENSET, p);
524         n = PyObject_Size(v);
525         if (n == -1) {
526             p->depth--;
527             p->error = WFERR_UNMARSHALLABLE;
528             return;
529         }
530         W_SIZE(n, p);
531         it = PyObject_GetIter(v);
532         if (it == NULL) {
533             p->depth--;
534             p->error = WFERR_UNMARSHALLABLE;
535             return;
536         }
537         while ((value = PyIter_Next(it)) != NULL) {
538             w_object(value, p);
539             Py_DECREF(value);
540         }
541         Py_DECREF(it);
542         if (PyErr_Occurred()) {
543             p->depth--;
544             p->error = WFERR_UNMARSHALLABLE;
545             return;
546         }
547     }
548     else if (PyCode_Check(v)) {
549         PyCodeObject *co = (PyCodeObject *)v;
550         W_TYPE(TYPE_CODE, p);
551         w_long(co->co_argcount, p);
552         w_long(co->co_kwonlyargcount, p);
553         w_long(co->co_nlocals, p);
554         w_long(co->co_stacksize, p);
555         w_long(co->co_flags, p);
556         w_object(co->co_code, p);
557         w_object(co->co_consts, p);
558         w_object(co->co_names, p);
559         w_object(co->co_varnames, p);
560         w_object(co->co_freevars, p);
561         w_object(co->co_cellvars, p);
562         w_object(co->co_filename, p);
563         w_object(co->co_name, p);
564         w_long(co->co_firstlineno, p);
565         w_object(co->co_lnotab, p);
566     }
567     else if (PyObject_CheckBuffer(v)) {
568         /* Write unknown bytes-like objects as a bytes object */
569         Py_buffer view;
570         if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
571             w_byte(TYPE_UNKNOWN, p);
572             p->depth--;
573             p->error = WFERR_UNMARSHALLABLE;
574             return;
575         }
576         W_TYPE(TYPE_STRING, p);
577         w_pstring(view.buf, view.len, p);
578         PyBuffer_Release(&view);
579     }
580     else {
581         W_TYPE(TYPE_UNKNOWN, p);
582         p->error = WFERR_UNMARSHALLABLE;
583     }
584 }
585 
586 static int
w_init_refs(WFILE * wf,int version)587 w_init_refs(WFILE *wf, int version)
588 {
589     if (version >= 3) {
590         wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int),
591                                           _Py_hashtable_hash_ptr,
592                                           _Py_hashtable_compare_direct);
593         if (wf->hashtable == NULL) {
594             PyErr_NoMemory();
595             return -1;
596         }
597     }
598     return 0;
599 }
600 
601 static int
w_decref_entry(_Py_hashtable_t * ht,_Py_hashtable_entry_t * entry,void * Py_UNUSED (data))602 w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
603                void *Py_UNUSED(data))
604 {
605     PyObject *entry_key;
606 
607     _Py_HASHTABLE_ENTRY_READ_KEY(ht, entry, entry_key);
608     Py_XDECREF(entry_key);
609     return 0;
610 }
611 
612 static void
w_clear_refs(WFILE * wf)613 w_clear_refs(WFILE *wf)
614 {
615     if (wf->hashtable != NULL) {
616         _Py_hashtable_foreach(wf->hashtable, w_decref_entry, NULL);
617         _Py_hashtable_destroy(wf->hashtable);
618     }
619 }
620 
621 /* version currently has no effect for writing ints. */
622 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)623 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
624 {
625     char buf[4];
626     WFILE wf;
627     memset(&wf, 0, sizeof(wf));
628     wf.fp = fp;
629     wf.ptr = wf.buf = buf;
630     wf.end = wf.ptr + sizeof(buf);
631     wf.error = WFERR_OK;
632     wf.version = version;
633     w_long(x, &wf);
634     w_flush(&wf);
635 }
636 
637 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)638 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
639 {
640     char buf[BUFSIZ];
641     WFILE wf;
642     memset(&wf, 0, sizeof(wf));
643     wf.fp = fp;
644     wf.ptr = wf.buf = buf;
645     wf.end = wf.ptr + sizeof(buf);
646     wf.error = WFERR_OK;
647     wf.version = version;
648     if (w_init_refs(&wf, version))
649         return; /* caller mush check PyErr_Occurred() */
650     w_object(x, &wf);
651     w_clear_refs(&wf);
652     w_flush(&wf);
653 }
654 
655 typedef struct {
656     FILE *fp;
657     int depth;
658     PyObject *readable;  /* Stream-like object being read from */
659     char *ptr;
660     char *end;
661     char *buf;
662     Py_ssize_t buf_size;
663     PyObject *refs;  /* a list */
664 } RFILE;
665 
666 static const char *
r_string(Py_ssize_t n,RFILE * p)667 r_string(Py_ssize_t n, RFILE *p)
668 {
669     Py_ssize_t read = -1;
670 
671     if (p->ptr != NULL) {
672         /* Fast path for loads() */
673         char *res = p->ptr;
674         Py_ssize_t left = p->end - p->ptr;
675         if (left < n) {
676             PyErr_SetString(PyExc_EOFError,
677                             "marshal data too short");
678             return NULL;
679         }
680         p->ptr += n;
681         return res;
682     }
683     if (p->buf == NULL) {
684         p->buf = PyMem_MALLOC(n);
685         if (p->buf == NULL) {
686             PyErr_NoMemory();
687             return NULL;
688         }
689         p->buf_size = n;
690     }
691     else if (p->buf_size < n) {
692         char *tmp = PyMem_REALLOC(p->buf, n);
693         if (tmp == NULL) {
694             PyErr_NoMemory();
695             return NULL;
696         }
697         p->buf = tmp;
698         p->buf_size = n;
699     }
700 
701     if (!p->readable) {
702         assert(p->fp != NULL);
703         read = fread(p->buf, 1, n, p->fp);
704     }
705     else {
706         _Py_IDENTIFIER(readinto);
707         PyObject *res, *mview;
708         Py_buffer buf;
709 
710         if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
711             return NULL;
712         mview = PyMemoryView_FromBuffer(&buf);
713         if (mview == NULL)
714             return NULL;
715 
716         res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview);
717         if (res != NULL) {
718             read = PyNumber_AsSsize_t(res, PyExc_ValueError);
719             Py_DECREF(res);
720         }
721     }
722     if (read != n) {
723         if (!PyErr_Occurred()) {
724             if (read > n)
725                 PyErr_Format(PyExc_ValueError,
726                              "read() returned too much data: "
727                              "%zd bytes requested, %zd returned",
728                              n, read);
729             else
730                 PyErr_SetString(PyExc_EOFError,
731                                 "EOF read where not expected");
732         }
733         return NULL;
734     }
735     return p->buf;
736 }
737 
738 static int
r_byte(RFILE * p)739 r_byte(RFILE *p)
740 {
741     int c = EOF;
742 
743     if (p->ptr != NULL) {
744         if (p->ptr < p->end)
745             c = (unsigned char) *p->ptr++;
746         return c;
747     }
748     if (!p->readable) {
749         assert(p->fp);
750         c = getc(p->fp);
751     }
752     else {
753         const char *ptr = r_string(1, p);
754         if (ptr != NULL)
755             c = *(unsigned char *) ptr;
756     }
757     return c;
758 }
759 
760 static int
r_short(RFILE * p)761 r_short(RFILE *p)
762 {
763     short x = -1;
764     const unsigned char *buffer;
765 
766     buffer = (const unsigned char *) r_string(2, p);
767     if (buffer != NULL) {
768         x = buffer[0];
769         x |= buffer[1] << 8;
770         /* Sign-extension, in case short greater than 16 bits */
771         x |= -(x & 0x8000);
772     }
773     return x;
774 }
775 
776 static long
r_long(RFILE * p)777 r_long(RFILE *p)
778 {
779     long x = -1;
780     const unsigned char *buffer;
781 
782     buffer = (const unsigned char *) r_string(4, p);
783     if (buffer != NULL) {
784         x = buffer[0];
785         x |= (long)buffer[1] << 8;
786         x |= (long)buffer[2] << 16;
787         x |= (long)buffer[3] << 24;
788 #if SIZEOF_LONG > 4
789         /* Sign extension for 64-bit machines */
790         x |= -(x & 0x80000000L);
791 #endif
792     }
793     return x;
794 }
795 
796 /* r_long64 deals with the TYPE_INT64 code. */
797 static PyObject *
r_long64(RFILE * p)798 r_long64(RFILE *p)
799 {
800     const unsigned char *buffer = (const unsigned char *) r_string(8, p);
801     if (buffer == NULL) {
802         return NULL;
803     }
804     return _PyLong_FromByteArray(buffer, 8,
805                                  1 /* little endian */,
806                                  1 /* signed */);
807 }
808 
809 static PyObject *
r_PyLong(RFILE * p)810 r_PyLong(RFILE *p)
811 {
812     PyLongObject *ob;
813     long n, size, i;
814     int j, md, shorts_in_top_digit;
815     digit d;
816 
817     n = r_long(p);
818     if (PyErr_Occurred())
819         return NULL;
820     if (n == 0)
821         return (PyObject *)_PyLong_New(0);
822     if (n < -SIZE32_MAX || n > SIZE32_MAX) {
823         PyErr_SetString(PyExc_ValueError,
824                        "bad marshal data (long size out of range)");
825         return NULL;
826     }
827 
828     size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
829     shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
830     ob = _PyLong_New(size);
831     if (ob == NULL)
832         return NULL;
833 
834     Py_SIZE(ob) = n > 0 ? size : -size;
835 
836     for (i = 0; i < size-1; i++) {
837         d = 0;
838         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
839             md = r_short(p);
840             if (PyErr_Occurred()) {
841                 Py_DECREF(ob);
842                 return NULL;
843             }
844             if (md < 0 || md > PyLong_MARSHAL_BASE)
845                 goto bad_digit;
846             d += (digit)md << j*PyLong_MARSHAL_SHIFT;
847         }
848         ob->ob_digit[i] = d;
849     }
850 
851     d = 0;
852     for (j=0; j < shorts_in_top_digit; j++) {
853         md = r_short(p);
854         if (PyErr_Occurred()) {
855             Py_DECREF(ob);
856             return NULL;
857         }
858         if (md < 0 || md > PyLong_MARSHAL_BASE)
859             goto bad_digit;
860         /* topmost marshal digit should be nonzero */
861         if (md == 0 && j == shorts_in_top_digit - 1) {
862             Py_DECREF(ob);
863             PyErr_SetString(PyExc_ValueError,
864                 "bad marshal data (unnormalized long data)");
865             return NULL;
866         }
867         d += (digit)md << j*PyLong_MARSHAL_SHIFT;
868     }
869     if (PyErr_Occurred()) {
870         Py_DECREF(ob);
871         return NULL;
872     }
873     /* top digit should be nonzero, else the resulting PyLong won't be
874        normalized */
875     ob->ob_digit[size-1] = d;
876     return (PyObject *)ob;
877   bad_digit:
878     Py_DECREF(ob);
879     PyErr_SetString(PyExc_ValueError,
880                     "bad marshal data (digit out of range in long)");
881     return NULL;
882 }
883 
884 /* allocate the reflist index for a new object. Return -1 on failure */
885 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)886 r_ref_reserve(int flag, RFILE *p)
887 {
888     if (flag) { /* currently only FLAG_REF is defined */
889         Py_ssize_t idx = PyList_GET_SIZE(p->refs);
890         if (idx >= 0x7ffffffe) {
891             PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
892             return -1;
893         }
894         if (PyList_Append(p->refs, Py_None) < 0)
895             return -1;
896         return idx;
897     } else
898         return 0;
899 }
900 
901 /* insert the new object 'o' to the reflist at previously
902  * allocated index 'idx'.
903  * 'o' can be NULL, in which case nothing is done.
904  * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
905  * if 'o' was non-NULL, and the function fails, 'o' is released and
906  * NULL returned. This simplifies error checking at the call site since
907  * a single test for NULL for the function result is enough.
908  */
909 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)910 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
911 {
912     if (o != NULL && flag) { /* currently only FLAG_REF is defined */
913         PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
914         Py_INCREF(o);
915         PyList_SET_ITEM(p->refs, idx, o);
916         Py_DECREF(tmp);
917     }
918     return o;
919 }
920 
921 /* combination of both above, used when an object can be
922  * created whenever it is seen in the file, as opposed to
923  * after having loaded its sub-objects.
924  */
925 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)926 r_ref(PyObject *o, int flag, RFILE *p)
927 {
928     assert(flag & FLAG_REF);
929     if (o == NULL)
930         return NULL;
931     if (PyList_Append(p->refs, o) < 0) {
932         Py_DECREF(o); /* release the new object */
933         return NULL;
934     }
935     return o;
936 }
937 
938 static PyObject *
r_object(RFILE * p)939 r_object(RFILE *p)
940 {
941     /* NULL is a valid return value, it does not necessarily means that
942        an exception is set. */
943     PyObject *v, *v2;
944     Py_ssize_t idx = 0;
945     long i, n;
946     int type, code = r_byte(p);
947     int flag, is_interned = 0;
948     PyObject *retval = NULL;
949 
950     if (code == EOF) {
951         PyErr_SetString(PyExc_EOFError,
952                         "EOF read where object expected");
953         return NULL;
954     }
955 
956     p->depth++;
957 
958     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
959         p->depth--;
960         PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
961         return NULL;
962     }
963 
964     flag = code & FLAG_REF;
965     type = code & ~FLAG_REF;
966 
967 #define R_REF(O) do{\
968     if (flag) \
969         O = r_ref(O, flag, p);\
970 } while (0)
971 
972     switch (type) {
973 
974     case TYPE_NULL:
975         break;
976 
977     case TYPE_NONE:
978         Py_INCREF(Py_None);
979         retval = Py_None;
980         break;
981 
982     case TYPE_STOPITER:
983         Py_INCREF(PyExc_StopIteration);
984         retval = PyExc_StopIteration;
985         break;
986 
987     case TYPE_ELLIPSIS:
988         Py_INCREF(Py_Ellipsis);
989         retval = Py_Ellipsis;
990         break;
991 
992     case TYPE_FALSE:
993         Py_INCREF(Py_False);
994         retval = Py_False;
995         break;
996 
997     case TYPE_TRUE:
998         Py_INCREF(Py_True);
999         retval = Py_True;
1000         break;
1001 
1002     case TYPE_INT:
1003         n = r_long(p);
1004         retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
1005         R_REF(retval);
1006         break;
1007 
1008     case TYPE_INT64:
1009         retval = r_long64(p);
1010         R_REF(retval);
1011         break;
1012 
1013     case TYPE_LONG:
1014         retval = r_PyLong(p);
1015         R_REF(retval);
1016         break;
1017 
1018     case TYPE_FLOAT:
1019         {
1020             char buf[256];
1021             const char *ptr;
1022             double dx;
1023             n = r_byte(p);
1024             if (n == EOF) {
1025                 PyErr_SetString(PyExc_EOFError,
1026                     "EOF read where object expected");
1027                 break;
1028             }
1029             ptr = r_string(n, p);
1030             if (ptr == NULL)
1031                 break;
1032             memcpy(buf, ptr, n);
1033             buf[n] = '\0';
1034             dx = PyOS_string_to_double(buf, NULL, NULL);
1035             if (dx == -1.0 && PyErr_Occurred())
1036                 break;
1037             retval = PyFloat_FromDouble(dx);
1038             R_REF(retval);
1039             break;
1040         }
1041 
1042     case TYPE_BINARY_FLOAT:
1043         {
1044             const unsigned char *buf;
1045             double x;
1046             buf = (const unsigned char *) r_string(8, p);
1047             if (buf == NULL)
1048                 break;
1049             x = _PyFloat_Unpack8(buf, 1);
1050             if (x == -1.0 && PyErr_Occurred())
1051                 break;
1052             retval = PyFloat_FromDouble(x);
1053             R_REF(retval);
1054             break;
1055         }
1056 
1057     case TYPE_COMPLEX:
1058         {
1059             char buf[256];
1060             const char *ptr;
1061             Py_complex c;
1062             n = r_byte(p);
1063             if (n == EOF) {
1064                 PyErr_SetString(PyExc_EOFError,
1065                     "EOF read where object expected");
1066                 break;
1067             }
1068             ptr = r_string(n, p);
1069             if (ptr == NULL)
1070                 break;
1071             memcpy(buf, ptr, n);
1072             buf[n] = '\0';
1073             c.real = PyOS_string_to_double(buf, NULL, NULL);
1074             if (c.real == -1.0 && PyErr_Occurred())
1075                 break;
1076             n = r_byte(p);
1077             if (n == EOF) {
1078                 PyErr_SetString(PyExc_EOFError,
1079                     "EOF read where object expected");
1080                 break;
1081             }
1082             ptr = r_string(n, p);
1083             if (ptr == NULL)
1084                 break;
1085             memcpy(buf, ptr, n);
1086             buf[n] = '\0';
1087             c.imag = PyOS_string_to_double(buf, NULL, NULL);
1088             if (c.imag == -1.0 && PyErr_Occurred())
1089                 break;
1090             retval = PyComplex_FromCComplex(c);
1091             R_REF(retval);
1092             break;
1093         }
1094 
1095     case TYPE_BINARY_COMPLEX:
1096         {
1097             const unsigned char *buf;
1098             Py_complex c;
1099             buf = (const unsigned char *) r_string(8, p);
1100             if (buf == NULL)
1101                 break;
1102             c.real = _PyFloat_Unpack8(buf, 1);
1103             if (c.real == -1.0 && PyErr_Occurred())
1104                 break;
1105             buf = (const unsigned char *) r_string(8, p);
1106             if (buf == NULL)
1107                 break;
1108             c.imag = _PyFloat_Unpack8(buf, 1);
1109             if (c.imag == -1.0 && PyErr_Occurred())
1110                 break;
1111             retval = PyComplex_FromCComplex(c);
1112             R_REF(retval);
1113             break;
1114         }
1115 
1116     case TYPE_STRING:
1117         {
1118             const char *ptr;
1119             n = r_long(p);
1120             if (PyErr_Occurred())
1121                 break;
1122             if (n < 0 || n > SIZE32_MAX) {
1123                 PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1124                 break;
1125             }
1126             v = PyBytes_FromStringAndSize((char *)NULL, n);
1127             if (v == NULL)
1128                 break;
1129             ptr = r_string(n, p);
1130             if (ptr == NULL) {
1131                 Py_DECREF(v);
1132                 break;
1133             }
1134             memcpy(PyBytes_AS_STRING(v), ptr, n);
1135             retval = v;
1136             R_REF(retval);
1137             break;
1138         }
1139 
1140     case TYPE_ASCII_INTERNED:
1141         is_interned = 1;
1142         /* fall through */
1143     case TYPE_ASCII:
1144         n = r_long(p);
1145         if (PyErr_Occurred())
1146             break;
1147         if (n < 0 || n > SIZE32_MAX) {
1148             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1149             break;
1150         }
1151         goto _read_ascii;
1152 
1153     case TYPE_SHORT_ASCII_INTERNED:
1154         is_interned = 1;
1155         /* fall through */
1156     case TYPE_SHORT_ASCII:
1157         n = r_byte(p);
1158         if (n == EOF) {
1159             PyErr_SetString(PyExc_EOFError,
1160                 "EOF read where object expected");
1161             break;
1162         }
1163     _read_ascii:
1164         {
1165             const char *ptr;
1166             ptr = r_string(n, p);
1167             if (ptr == NULL)
1168                 break;
1169             v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1170             if (v == NULL)
1171                 break;
1172             if (is_interned)
1173                 PyUnicode_InternInPlace(&v);
1174             retval = v;
1175             R_REF(retval);
1176             break;
1177         }
1178 
1179     case TYPE_INTERNED:
1180         is_interned = 1;
1181         /* fall through */
1182     case TYPE_UNICODE:
1183         {
1184         const char *buffer;
1185 
1186         n = r_long(p);
1187         if (PyErr_Occurred())
1188             break;
1189         if (n < 0 || n > SIZE32_MAX) {
1190             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1191             break;
1192         }
1193         if (n != 0) {
1194             buffer = r_string(n, p);
1195             if (buffer == NULL)
1196                 break;
1197             v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1198         }
1199         else {
1200             v = PyUnicode_New(0, 0);
1201         }
1202         if (v == NULL)
1203             break;
1204         if (is_interned)
1205             PyUnicode_InternInPlace(&v);
1206         retval = v;
1207         R_REF(retval);
1208         break;
1209         }
1210 
1211     case TYPE_SMALL_TUPLE:
1212         n = (unsigned char) r_byte(p);
1213         if (PyErr_Occurred())
1214             break;
1215         goto _read_tuple;
1216     case TYPE_TUPLE:
1217         n = r_long(p);
1218         if (PyErr_Occurred())
1219             break;
1220         if (n < 0 || n > SIZE32_MAX) {
1221             PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1222             break;
1223         }
1224     _read_tuple:
1225         v = PyTuple_New(n);
1226         R_REF(v);
1227         if (v == NULL)
1228             break;
1229 
1230         for (i = 0; i < n; i++) {
1231             v2 = r_object(p);
1232             if ( v2 == NULL ) {
1233                 if (!PyErr_Occurred())
1234                     PyErr_SetString(PyExc_TypeError,
1235                         "NULL object in marshal data for tuple");
1236                 Py_DECREF(v);
1237                 v = NULL;
1238                 break;
1239             }
1240             PyTuple_SET_ITEM(v, i, v2);
1241         }
1242         retval = v;
1243         break;
1244 
1245     case TYPE_LIST:
1246         n = r_long(p);
1247         if (PyErr_Occurred())
1248             break;
1249         if (n < 0 || n > SIZE32_MAX) {
1250             PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1251             break;
1252         }
1253         v = PyList_New(n);
1254         R_REF(v);
1255         if (v == NULL)
1256             break;
1257         for (i = 0; i < n; i++) {
1258             v2 = r_object(p);
1259             if ( v2 == NULL ) {
1260                 if (!PyErr_Occurred())
1261                     PyErr_SetString(PyExc_TypeError,
1262                         "NULL object in marshal data for list");
1263                 Py_DECREF(v);
1264                 v = NULL;
1265                 break;
1266             }
1267             PyList_SET_ITEM(v, i, v2);
1268         }
1269         retval = v;
1270         break;
1271 
1272     case TYPE_DICT:
1273         v = PyDict_New();
1274         R_REF(v);
1275         if (v == NULL)
1276             break;
1277         for (;;) {
1278             PyObject *key, *val;
1279             key = r_object(p);
1280             if (key == NULL)
1281                 break;
1282             val = r_object(p);
1283             if (val == NULL) {
1284                 Py_DECREF(key);
1285                 break;
1286             }
1287             if (PyDict_SetItem(v, key, val) < 0) {
1288                 Py_DECREF(key);
1289                 Py_DECREF(val);
1290                 break;
1291             }
1292             Py_DECREF(key);
1293             Py_DECREF(val);
1294         }
1295         if (PyErr_Occurred()) {
1296             Py_DECREF(v);
1297             v = NULL;
1298         }
1299         retval = v;
1300         break;
1301 
1302     case TYPE_SET:
1303     case TYPE_FROZENSET:
1304         n = r_long(p);
1305         if (PyErr_Occurred())
1306             break;
1307         if (n < 0 || n > SIZE32_MAX) {
1308             PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1309             break;
1310         }
1311 
1312         if (n == 0 && type == TYPE_FROZENSET) {
1313             /* call frozenset() to get the empty frozenset singleton */
1314             v = _PyObject_CallNoArg((PyObject*)&PyFrozenSet_Type);
1315             if (v == NULL)
1316                 break;
1317             R_REF(v);
1318             retval = v;
1319         }
1320         else {
1321             v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1322             if (type == TYPE_SET) {
1323                 R_REF(v);
1324             } else {
1325                 /* must use delayed registration of frozensets because they must
1326                  * be init with a refcount of 1
1327                  */
1328                 idx = r_ref_reserve(flag, p);
1329                 if (idx < 0)
1330                     Py_CLEAR(v); /* signal error */
1331             }
1332             if (v == NULL)
1333                 break;
1334 
1335             for (i = 0; i < n; i++) {
1336                 v2 = r_object(p);
1337                 if ( v2 == NULL ) {
1338                     if (!PyErr_Occurred())
1339                         PyErr_SetString(PyExc_TypeError,
1340                             "NULL object in marshal data for set");
1341                     Py_DECREF(v);
1342                     v = NULL;
1343                     break;
1344                 }
1345                 if (PySet_Add(v, v2) == -1) {
1346                     Py_DECREF(v);
1347                     Py_DECREF(v2);
1348                     v = NULL;
1349                     break;
1350                 }
1351                 Py_DECREF(v2);
1352             }
1353             if (type != TYPE_SET)
1354                 v = r_ref_insert(v, idx, flag, p);
1355             retval = v;
1356         }
1357         break;
1358 
1359     case TYPE_CODE:
1360         {
1361             int argcount;
1362             int kwonlyargcount;
1363             int nlocals;
1364             int stacksize;
1365             int flags;
1366             PyObject *code = NULL;
1367             PyObject *consts = NULL;
1368             PyObject *names = NULL;
1369             PyObject *varnames = NULL;
1370             PyObject *freevars = NULL;
1371             PyObject *cellvars = NULL;
1372             PyObject *filename = NULL;
1373             PyObject *name = NULL;
1374             int firstlineno;
1375             PyObject *lnotab = NULL;
1376 
1377             idx = r_ref_reserve(flag, p);
1378             if (idx < 0)
1379                 break;
1380 
1381             v = NULL;
1382 
1383             /* XXX ignore long->int overflows for now */
1384             argcount = (int)r_long(p);
1385             if (PyErr_Occurred())
1386                 goto code_error;
1387             kwonlyargcount = (int)r_long(p);
1388             if (PyErr_Occurred())
1389                 goto code_error;
1390             nlocals = (int)r_long(p);
1391             if (PyErr_Occurred())
1392                 goto code_error;
1393             stacksize = (int)r_long(p);
1394             if (PyErr_Occurred())
1395                 goto code_error;
1396             flags = (int)r_long(p);
1397             if (PyErr_Occurred())
1398                 goto code_error;
1399             code = r_object(p);
1400             if (code == NULL)
1401                 goto code_error;
1402             consts = r_object(p);
1403             if (consts == NULL)
1404                 goto code_error;
1405             names = r_object(p);
1406             if (names == NULL)
1407                 goto code_error;
1408             varnames = r_object(p);
1409             if (varnames == NULL)
1410                 goto code_error;
1411             freevars = r_object(p);
1412             if (freevars == NULL)
1413                 goto code_error;
1414             cellvars = r_object(p);
1415             if (cellvars == NULL)
1416                 goto code_error;
1417             filename = r_object(p);
1418             if (filename == NULL)
1419                 goto code_error;
1420             name = r_object(p);
1421             if (name == NULL)
1422                 goto code_error;
1423             firstlineno = (int)r_long(p);
1424             if (firstlineno == -1 && PyErr_Occurred())
1425                 break;
1426             lnotab = r_object(p);
1427             if (lnotab == NULL)
1428                 goto code_error;
1429 
1430             v = (PyObject *) PyCode_New(
1431                             argcount, kwonlyargcount,
1432                             nlocals, stacksize, flags,
1433                             code, consts, names, varnames,
1434                             freevars, cellvars, filename, name,
1435                             firstlineno, lnotab);
1436             v = r_ref_insert(v, idx, flag, p);
1437 
1438           code_error:
1439             Py_XDECREF(code);
1440             Py_XDECREF(consts);
1441             Py_XDECREF(names);
1442             Py_XDECREF(varnames);
1443             Py_XDECREF(freevars);
1444             Py_XDECREF(cellvars);
1445             Py_XDECREF(filename);
1446             Py_XDECREF(name);
1447             Py_XDECREF(lnotab);
1448         }
1449         retval = v;
1450         break;
1451 
1452     case TYPE_REF:
1453         n = r_long(p);
1454         if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1455             if (n == -1 && PyErr_Occurred())
1456                 break;
1457             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1458             break;
1459         }
1460         v = PyList_GET_ITEM(p->refs, n);
1461         if (v == Py_None) {
1462             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1463             break;
1464         }
1465         Py_INCREF(v);
1466         retval = v;
1467         break;
1468 
1469     default:
1470         /* Bogus data got written, which isn't ideal.
1471            This will let you keep working and recover. */
1472         PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1473         break;
1474 
1475     }
1476     p->depth--;
1477     return retval;
1478 }
1479 
1480 static PyObject *
read_object(RFILE * p)1481 read_object(RFILE *p)
1482 {
1483     PyObject *v;
1484     if (PyErr_Occurred()) {
1485         fprintf(stderr, "XXX readobject called with exception set\n");
1486         return NULL;
1487     }
1488     v = r_object(p);
1489     if (v == NULL && !PyErr_Occurred())
1490         PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1491     return v;
1492 }
1493 
1494 int
PyMarshal_ReadShortFromFile(FILE * fp)1495 PyMarshal_ReadShortFromFile(FILE *fp)
1496 {
1497     RFILE rf;
1498     int res;
1499     assert(fp);
1500     rf.readable = NULL;
1501     rf.fp = fp;
1502     rf.end = rf.ptr = NULL;
1503     rf.buf = NULL;
1504     res = r_short(&rf);
1505     if (rf.buf != NULL)
1506         PyMem_FREE(rf.buf);
1507     return res;
1508 }
1509 
1510 long
PyMarshal_ReadLongFromFile(FILE * fp)1511 PyMarshal_ReadLongFromFile(FILE *fp)
1512 {
1513     RFILE rf;
1514     long res;
1515     rf.fp = fp;
1516     rf.readable = NULL;
1517     rf.ptr = rf.end = NULL;
1518     rf.buf = NULL;
1519     res = r_long(&rf);
1520     if (rf.buf != NULL)
1521         PyMem_FREE(rf.buf);
1522     return res;
1523 }
1524 
1525 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1526 static off_t
getfilesize(FILE * fp)1527 getfilesize(FILE *fp)
1528 {
1529     struct _Py_stat_struct st;
1530     if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1531         return -1;
1532 #if SIZEOF_OFF_T == 4
1533     else if (st.st_size >= INT_MAX)
1534         return (off_t)INT_MAX;
1535 #endif
1536     else
1537         return (off_t)st.st_size;
1538 }
1539 
1540 /* If we can get the size of the file up-front, and it's reasonably small,
1541  * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1542  * than reading a byte at a time from file; speeds .pyc imports.
1543  * CAUTION:  since this may read the entire remainder of the file, don't
1544  * call it unless you know you're done with the file.
1545  */
1546 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1547 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1548 {
1549 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1550 #define REASONABLE_FILE_LIMIT (1L << 18)
1551     off_t filesize;
1552     filesize = getfilesize(fp);
1553     if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1554         char* pBuf = (char *)PyMem_MALLOC(filesize);
1555         if (pBuf != NULL) {
1556             size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1557             PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1558             PyMem_FREE(pBuf);
1559             return v;
1560         }
1561 
1562     }
1563     /* We don't have fstat, or we do but the file is larger than
1564      * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1565      */
1566     return PyMarshal_ReadObjectFromFile(fp);
1567 
1568 #undef REASONABLE_FILE_LIMIT
1569 }
1570 
1571 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1572 PyMarshal_ReadObjectFromFile(FILE *fp)
1573 {
1574     RFILE rf;
1575     PyObject *result;
1576     rf.fp = fp;
1577     rf.readable = NULL;
1578     rf.depth = 0;
1579     rf.ptr = rf.end = NULL;
1580     rf.buf = NULL;
1581     rf.refs = PyList_New(0);
1582     if (rf.refs == NULL)
1583         return NULL;
1584     result = r_object(&rf);
1585     Py_DECREF(rf.refs);
1586     if (rf.buf != NULL)
1587         PyMem_FREE(rf.buf);
1588     return result;
1589 }
1590 
1591 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1592 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1593 {
1594     RFILE rf;
1595     PyObject *result;
1596     rf.fp = NULL;
1597     rf.readable = NULL;
1598     rf.ptr = (char *)str;
1599     rf.end = (char *)str + len;
1600     rf.buf = NULL;
1601     rf.depth = 0;
1602     rf.refs = PyList_New(0);
1603     if (rf.refs == NULL)
1604         return NULL;
1605     result = r_object(&rf);
1606     Py_DECREF(rf.refs);
1607     if (rf.buf != NULL)
1608         PyMem_FREE(rf.buf);
1609     return result;
1610 }
1611 
1612 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1613 PyMarshal_WriteObjectToString(PyObject *x, int version)
1614 {
1615     WFILE wf;
1616 
1617     memset(&wf, 0, sizeof(wf));
1618     wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1619     if (wf.str == NULL)
1620         return NULL;
1621     wf.ptr = wf.buf = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1622     wf.end = wf.ptr + PyBytes_Size(wf.str);
1623     wf.error = WFERR_OK;
1624     wf.version = version;
1625     if (w_init_refs(&wf, version)) {
1626         Py_DECREF(wf.str);
1627         return NULL;
1628     }
1629     w_object(x, &wf);
1630     w_clear_refs(&wf);
1631     if (wf.str != NULL) {
1632         char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1633         if (wf.ptr - base > PY_SSIZE_T_MAX) {
1634             Py_DECREF(wf.str);
1635             PyErr_SetString(PyExc_OverflowError,
1636                             "too much marshal data for a bytes object");
1637             return NULL;
1638         }
1639         if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1640             return NULL;
1641     }
1642     if (wf.error != WFERR_OK) {
1643         Py_XDECREF(wf.str);
1644         if (wf.error == WFERR_NOMEMORY)
1645             PyErr_NoMemory();
1646         else
1647             PyErr_SetString(PyExc_ValueError,
1648               (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1649                :"object too deeply nested to marshal");
1650         return NULL;
1651     }
1652     return wf.str;
1653 }
1654 
1655 /* And an interface for Python programs... */
1656 /*[clinic input]
1657 marshal.dump
1658 
1659     value: object
1660         Must be a supported type.
1661     file: object
1662         Must be a writeable binary file.
1663     version: int(c_default="Py_MARSHAL_VERSION") = version
1664         Indicates the data format that dump should use.
1665     /
1666 
1667 Write the value on the open file.
1668 
1669 If the value has (or contains an object that has) an unsupported type, a
1670 ValueError exception is raised - but garbage data will also be written
1671 to the file. The object will not be properly read back by load().
1672 [clinic start generated code]*/
1673 
1674 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version)1675 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1676                   int version)
1677 /*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1678 {
1679     /* XXX Quick hack -- need to do this differently */
1680     PyObject *s;
1681     PyObject *res;
1682     _Py_IDENTIFIER(write);
1683 
1684     s = PyMarshal_WriteObjectToString(value, version);
1685     if (s == NULL)
1686         return NULL;
1687     res = _PyObject_CallMethodIdObjArgs(file, &PyId_write, s, NULL);
1688     Py_DECREF(s);
1689     return res;
1690 }
1691 
1692 /*[clinic input]
1693 marshal.load
1694 
1695     file: object
1696         Must be readable binary file.
1697     /
1698 
1699 Read one value from the open file and return it.
1700 
1701 If no valid value is read (e.g. because the data has a different Python
1702 version's incompatible marshal format), raise EOFError, ValueError or
1703 TypeError.
1704 
1705 Note: If an object containing an unsupported type was marshalled with
1706 dump(), load() will substitute None for the unmarshallable type.
1707 [clinic start generated code]*/
1708 
1709 static PyObject *
marshal_load(PyObject * module,PyObject * file)1710 marshal_load(PyObject *module, PyObject *file)
1711 /*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
1712 {
1713     PyObject *data, *result;
1714     _Py_IDENTIFIER(read);
1715     RFILE rf;
1716 
1717     /*
1718      * Make a call to the read method, but read zero bytes.
1719      * This is to ensure that the object passed in at least
1720      * has a read method which returns bytes.
1721      * This can be removed if we guarantee good error handling
1722      * for r_string()
1723      */
1724     data = _PyObject_CallMethodId(file, &PyId_read, "i", 0);
1725     if (data == NULL)
1726         return NULL;
1727     if (!PyBytes_Check(data)) {
1728         PyErr_Format(PyExc_TypeError,
1729                      "file.read() returned not bytes but %.100s",
1730                      data->ob_type->tp_name);
1731         result = NULL;
1732     }
1733     else {
1734         rf.depth = 0;
1735         rf.fp = NULL;
1736         rf.readable = file;
1737         rf.ptr = rf.end = NULL;
1738         rf.buf = NULL;
1739         if ((rf.refs = PyList_New(0)) != NULL) {
1740             result = read_object(&rf);
1741             Py_DECREF(rf.refs);
1742             if (rf.buf != NULL)
1743                 PyMem_FREE(rf.buf);
1744         } else
1745             result = NULL;
1746     }
1747     Py_DECREF(data);
1748     return result;
1749 }
1750 
1751 /*[clinic input]
1752 marshal.dumps
1753 
1754     value: object
1755         Must be a supported type.
1756     version: int(c_default="Py_MARSHAL_VERSION") = version
1757         Indicates the data format that dumps should use.
1758     /
1759 
1760 Return the bytes object that would be written to a file by dump(value, file).
1761 
1762 Raise a ValueError exception if value has (or contains an object that has) an
1763 unsupported type.
1764 [clinic start generated code]*/
1765 
1766 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version)1767 marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1768 /*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1769 {
1770     return PyMarshal_WriteObjectToString(value, version);
1771 }
1772 
1773 /*[clinic input]
1774 marshal.loads
1775 
1776     bytes: Py_buffer
1777     /
1778 
1779 Convert the bytes-like object to a value.
1780 
1781 If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
1782 bytes in the input are ignored.
1783 [clinic start generated code]*/
1784 
1785 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes)1786 marshal_loads_impl(PyObject *module, Py_buffer *bytes)
1787 /*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
1788 {
1789     RFILE rf;
1790     char *s = bytes->buf;
1791     Py_ssize_t n = bytes->len;
1792     PyObject* result;
1793     rf.fp = NULL;
1794     rf.readable = NULL;
1795     rf.ptr = s;
1796     rf.end = s + n;
1797     rf.depth = 0;
1798     if ((rf.refs = PyList_New(0)) == NULL)
1799         return NULL;
1800     result = read_object(&rf);
1801     Py_DECREF(rf.refs);
1802     return result;
1803 }
1804 
1805 static PyMethodDef marshal_methods[] = {
1806     MARSHAL_DUMP_METHODDEF
1807     MARSHAL_LOAD_METHODDEF
1808     MARSHAL_DUMPS_METHODDEF
1809     MARSHAL_LOADS_METHODDEF
1810     {NULL,              NULL}           /* sentinel */
1811 };
1812 
1813 
1814 PyDoc_STRVAR(module_doc,
1815 "This module contains functions that can read and write Python values in\n\
1816 a binary format. The format is specific to Python, but independent of\n\
1817 machine architecture issues.\n\
1818 \n\
1819 Not all Python object types are supported; in general, only objects\n\
1820 whose value is independent from a particular invocation of Python can be\n\
1821 written and read by this module. The following types are supported:\n\
1822 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1823 tuples, lists, sets, dictionaries, and code objects, where it\n\
1824 should be understood that tuples, lists and dictionaries are only\n\
1825 supported as long as the values contained therein are themselves\n\
1826 supported; and recursive lists and dictionaries should not be written\n\
1827 (they will cause infinite loops).\n\
1828 \n\
1829 Variables:\n\
1830 \n\
1831 version -- indicates the format that the module uses. Version 0 is the\n\
1832     historical format, version 1 shares interned strings and version 2\n\
1833     uses a binary format for floating point numbers.\n\
1834     Version 3 shares common object references (New in version 3.4).\n\
1835 \n\
1836 Functions:\n\
1837 \n\
1838 dump() -- write value to a file\n\
1839 load() -- read value from a file\n\
1840 dumps() -- marshal value as a bytes object\n\
1841 loads() -- read value from a bytes-like object");
1842 
1843 
1844 
1845 static struct PyModuleDef marshalmodule = {
1846     PyModuleDef_HEAD_INIT,
1847     "marshal",
1848     module_doc,
1849     0,
1850     marshal_methods,
1851     NULL,
1852     NULL,
1853     NULL,
1854     NULL
1855 };
1856 
1857 PyMODINIT_FUNC
PyMarshal_Init(void)1858 PyMarshal_Init(void)
1859 {
1860     PyObject *mod = PyModule_Create(&marshalmodule);
1861     if (mod == NULL)
1862         return NULL;
1863     PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION);
1864     return mod;
1865 }
1866