1import collections
2import copyreg
3import dbm
4import io
5import functools
6import os
7import math
8import pickle
9import pickletools
10import shutil
11import struct
12import sys
13import threading
14import unittest
15import weakref
16from textwrap import dedent
17from http.cookies import SimpleCookie
18
19try:
20    import _testbuffer
21except ImportError:
22    _testbuffer = None
23
24from test import support
25from test.support import (
26    TestFailed, TESTFN, run_with_locale, no_tracing,
27    _2G, _4G, bigmemtest, reap_threads, forget,
28    save_restore_warnings_filters
29    )
30
31from pickle import bytes_types
32
33
34# bpo-41003: Save/restore warnings filters to leave them unchanged.
35# Ignore filters installed by numpy.
36try:
37    with save_restore_warnings_filters():
38        import numpy as np
39except ImportError:
40    np = None
41
42
43requires_32b = unittest.skipUnless(sys.maxsize < 2**32,
44                                   "test is only meaningful on 32-bit builds")
45
46# Tests that try a number of pickle protocols should have a
47#     for proto in protocols:
48# kind of outer loop.
49protocols = range(pickle.HIGHEST_PROTOCOL + 1)
50
51
52# Return True if opcode code appears in the pickle, else False.
53def opcode_in_pickle(code, pickle):
54    for op, dummy, dummy in pickletools.genops(pickle):
55        if op.code == code.decode("latin-1"):
56            return True
57    return False
58
59# Return the number of times opcode code appears in pickle.
60def count_opcode(code, pickle):
61    n = 0
62    for op, dummy, dummy in pickletools.genops(pickle):
63        if op.code == code.decode("latin-1"):
64            n += 1
65    return n
66
67
68class UnseekableIO(io.BytesIO):
69    def peek(self, *args):
70        raise NotImplementedError
71
72    def seekable(self):
73        return False
74
75    def seek(self, *args):
76        raise io.UnsupportedOperation
77
78    def tell(self):
79        raise io.UnsupportedOperation
80
81
82class MinimalIO(object):
83    """
84    A file-like object that doesn't support readinto().
85    """
86    def __init__(self, *args):
87        self._bio = io.BytesIO(*args)
88        self.getvalue = self._bio.getvalue
89        self.read = self._bio.read
90        self.readline = self._bio.readline
91        self.write = self._bio.write
92
93
94# We can't very well test the extension registry without putting known stuff
95# in it, but we have to be careful to restore its original state.  Code
96# should do this:
97#
98#     e = ExtensionSaver(extension_code)
99#     try:
100#         fiddle w/ the extension registry's stuff for extension_code
101#     finally:
102#         e.restore()
103
104class ExtensionSaver:
105    # Remember current registration for code (if any), and remove it (if
106    # there is one).
107    def __init__(self, code):
108        self.code = code
109        if code in copyreg._inverted_registry:
110            self.pair = copyreg._inverted_registry[code]
111            copyreg.remove_extension(self.pair[0], self.pair[1], code)
112        else:
113            self.pair = None
114
115    # Restore previous registration for code.
116    def restore(self):
117        code = self.code
118        curpair = copyreg._inverted_registry.get(code)
119        if curpair is not None:
120            copyreg.remove_extension(curpair[0], curpair[1], code)
121        pair = self.pair
122        if pair is not None:
123            copyreg.add_extension(pair[0], pair[1], code)
124
125class C:
126    def __eq__(self, other):
127        return self.__dict__ == other.__dict__
128
129class D(C):
130    def __init__(self, arg):
131        pass
132
133class E(C):
134    def __getinitargs__(self):
135        return ()
136
137class H(object):
138    pass
139
140# Hashable mutable key
141class K(object):
142    def __init__(self, value):
143        self.value = value
144
145    def __reduce__(self):
146        # Shouldn't support the recursion itself
147        return K, (self.value,)
148
149import __main__
150__main__.C = C
151C.__module__ = "__main__"
152__main__.D = D
153D.__module__ = "__main__"
154__main__.E = E
155E.__module__ = "__main__"
156__main__.H = H
157H.__module__ = "__main__"
158__main__.K = K
159K.__module__ = "__main__"
160
161class myint(int):
162    def __init__(self, x):
163        self.str = str(x)
164
165class initarg(C):
166
167    def __init__(self, a, b):
168        self.a = a
169        self.b = b
170
171    def __getinitargs__(self):
172        return self.a, self.b
173
174class metaclass(type):
175    pass
176
177class use_metaclass(object, metaclass=metaclass):
178    pass
179
180class pickling_metaclass(type):
181    def __eq__(self, other):
182        return (type(self) == type(other) and
183                self.reduce_args == other.reduce_args)
184
185    def __reduce__(self):
186        return (create_dynamic_class, self.reduce_args)
187
188def create_dynamic_class(name, bases):
189    result = pickling_metaclass(name, bases, dict())
190    result.reduce_args = (name, bases)
191    return result
192
193
194class ZeroCopyBytes(bytes):
195    readonly = True
196    c_contiguous = True
197    f_contiguous = True
198    zero_copy_reconstruct = True
199
200    def __reduce_ex__(self, protocol):
201        if protocol >= 5:
202            return type(self)._reconstruct, (pickle.PickleBuffer(self),), None
203        else:
204            return type(self)._reconstruct, (bytes(self),)
205
206    def __repr__(self):
207        return "{}({!r})".format(self.__class__.__name__, bytes(self))
208
209    __str__ = __repr__
210
211    @classmethod
212    def _reconstruct(cls, obj):
213        with memoryview(obj) as m:
214            obj = m.obj
215            if type(obj) is cls:
216                # Zero-copy
217                return obj
218            else:
219                return cls(obj)
220
221
222class ZeroCopyBytearray(bytearray):
223    readonly = False
224    c_contiguous = True
225    f_contiguous = True
226    zero_copy_reconstruct = True
227
228    def __reduce_ex__(self, protocol):
229        if protocol >= 5:
230            return type(self)._reconstruct, (pickle.PickleBuffer(self),), None
231        else:
232            return type(self)._reconstruct, (bytes(self),)
233
234    def __repr__(self):
235        return "{}({!r})".format(self.__class__.__name__, bytes(self))
236
237    __str__ = __repr__
238
239    @classmethod
240    def _reconstruct(cls, obj):
241        with memoryview(obj) as m:
242            obj = m.obj
243            if type(obj) is cls:
244                # Zero-copy
245                return obj
246            else:
247                return cls(obj)
248
249
250if _testbuffer is not None:
251
252    class PicklableNDArray:
253        # A not-really-zero-copy picklable ndarray, as the ndarray()
254        # constructor doesn't allow for it
255
256        zero_copy_reconstruct = False
257
258        def __init__(self, *args, **kwargs):
259            self.array = _testbuffer.ndarray(*args, **kwargs)
260
261        def __getitem__(self, idx):
262            cls = type(self)
263            new = cls.__new__(cls)
264            new.array = self.array[idx]
265            return new
266
267        @property
268        def readonly(self):
269            return self.array.readonly
270
271        @property
272        def c_contiguous(self):
273            return self.array.c_contiguous
274
275        @property
276        def f_contiguous(self):
277            return self.array.f_contiguous
278
279        def __eq__(self, other):
280            if not isinstance(other, PicklableNDArray):
281                return NotImplemented
282            return (other.array.format == self.array.format and
283                    other.array.shape == self.array.shape and
284                    other.array.strides == self.array.strides and
285                    other.array.readonly == self.array.readonly and
286                    other.array.tobytes() == self.array.tobytes())
287
288        def __ne__(self, other):
289            if not isinstance(other, PicklableNDArray):
290                return NotImplemented
291            return not (self == other)
292
293        def __repr__(self):
294            return (f"{type(self)}(shape={self.array.shape},"
295                    f"strides={self.array.strides}, "
296                    f"bytes={self.array.tobytes()})")
297
298        def __reduce_ex__(self, protocol):
299            if not self.array.contiguous:
300                raise NotImplementedError("Reconstructing a non-contiguous "
301                                          "ndarray does not seem possible")
302            ndarray_kwargs = {"shape": self.array.shape,
303                              "strides": self.array.strides,
304                              "format": self.array.format,
305                              "flags": (0 if self.readonly
306                                        else _testbuffer.ND_WRITABLE)}
307            pb = pickle.PickleBuffer(self.array)
308            if protocol >= 5:
309                return (type(self)._reconstruct,
310                        (pb, ndarray_kwargs))
311            else:
312                # Need to serialize the bytes in physical order
313                with pb.raw() as m:
314                    return (type(self)._reconstruct,
315                            (m.tobytes(), ndarray_kwargs))
316
317        @classmethod
318        def _reconstruct(cls, obj, kwargs):
319            with memoryview(obj) as m:
320                # For some reason, ndarray() wants a list of integers...
321                # XXX This only works if format == 'B'
322                items = list(m.tobytes())
323            return cls(items, **kwargs)
324
325
326# DATA0 .. DATA4 are the pickles we expect under the various protocols, for
327# the object returned by create_data().
328
329DATA0 = (
330    b'(lp0\nL0L\naL1L\naF2.0\n'
331    b'ac__builtin__\ncomple'
332    b'x\np1\n(F3.0\nF0.0\ntp2\n'
333    b'Rp3\naL1L\naL-1L\naL255'
334    b'L\naL-255L\naL-256L\naL'
335    b'65535L\naL-65535L\naL-'
336    b'65536L\naL2147483647L'
337    b'\naL-2147483647L\naL-2'
338    b'147483648L\na(Vabc\np4'
339    b'\ng4\nccopy_reg\n_recon'
340    b'structor\np5\n(c__main'
341    b'__\nC\np6\nc__builtin__'
342    b'\nobject\np7\nNtp8\nRp9\n'
343    b'(dp10\nVfoo\np11\nL1L\ns'
344    b'Vbar\np12\nL2L\nsbg9\ntp'
345    b'13\nag13\naL5L\na.'
346)
347
348# Disassembly of DATA0
349DATA0_DIS = """\
350    0: (    MARK
351    1: l        LIST       (MARK at 0)
352    2: p    PUT        0
353    5: L    LONG       0
354    9: a    APPEND
355   10: L    LONG       1
356   14: a    APPEND
357   15: F    FLOAT      2.0
358   20: a    APPEND
359   21: c    GLOBAL     '__builtin__ complex'
360   42: p    PUT        1
361   45: (    MARK
362   46: F        FLOAT      3.0
363   51: F        FLOAT      0.0
364   56: t        TUPLE      (MARK at 45)
365   57: p    PUT        2
366   60: R    REDUCE
367   61: p    PUT        3
368   64: a    APPEND
369   65: L    LONG       1
370   69: a    APPEND
371   70: L    LONG       -1
372   75: a    APPEND
373   76: L    LONG       255
374   82: a    APPEND
375   83: L    LONG       -255
376   90: a    APPEND
377   91: L    LONG       -256
378   98: a    APPEND
379   99: L    LONG       65535
380  107: a    APPEND
381  108: L    LONG       -65535
382  117: a    APPEND
383  118: L    LONG       -65536
384  127: a    APPEND
385  128: L    LONG       2147483647
386  141: a    APPEND
387  142: L    LONG       -2147483647
388  156: a    APPEND
389  157: L    LONG       -2147483648
390  171: a    APPEND
391  172: (    MARK
392  173: V        UNICODE    'abc'
393  178: p        PUT        4
394  181: g        GET        4
395  184: c        GLOBAL     'copy_reg _reconstructor'
396  209: p        PUT        5
397  212: (        MARK
398  213: c            GLOBAL     '__main__ C'
399  225: p            PUT        6
400  228: c            GLOBAL     '__builtin__ object'
401  248: p            PUT        7
402  251: N            NONE
403  252: t            TUPLE      (MARK at 212)
404  253: p        PUT        8
405  256: R        REDUCE
406  257: p        PUT        9
407  260: (        MARK
408  261: d            DICT       (MARK at 260)
409  262: p        PUT        10
410  266: V        UNICODE    'foo'
411  271: p        PUT        11
412  275: L        LONG       1
413  279: s        SETITEM
414  280: V        UNICODE    'bar'
415  285: p        PUT        12
416  289: L        LONG       2
417  293: s        SETITEM
418  294: b        BUILD
419  295: g        GET        9
420  298: t        TUPLE      (MARK at 172)
421  299: p    PUT        13
422  303: a    APPEND
423  304: g    GET        13
424  308: a    APPEND
425  309: L    LONG       5
426  313: a    APPEND
427  314: .    STOP
428highest protocol among opcodes = 0
429"""
430
431DATA1 = (
432    b']q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c__'
433    b'builtin__\ncomplex\nq\x01'
434    b'(G@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00t'
435    b'q\x02Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xffJ'
436    b'\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff\xff'
437    b'\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00ab'
438    b'cq\x04h\x04ccopy_reg\n_reco'
439    b'nstructor\nq\x05(c__main'
440    b'__\nC\nq\x06c__builtin__\n'
441    b'object\nq\x07Ntq\x08Rq\t}q\n('
442    b'X\x03\x00\x00\x00fooq\x0bK\x01X\x03\x00\x00\x00bar'
443    b'q\x0cK\x02ubh\ttq\rh\rK\x05e.'
444)
445
446# Disassembly of DATA1
447DATA1_DIS = """\
448    0: ]    EMPTY_LIST
449    1: q    BINPUT     0
450    3: (    MARK
451    4: K        BININT1    0
452    6: K        BININT1    1
453    8: G        BINFLOAT   2.0
454   17: c        GLOBAL     '__builtin__ complex'
455   38: q        BINPUT     1
456   40: (        MARK
457   41: G            BINFLOAT   3.0
458   50: G            BINFLOAT   0.0
459   59: t            TUPLE      (MARK at 40)
460   60: q        BINPUT     2
461   62: R        REDUCE
462   63: q        BINPUT     3
463   65: K        BININT1    1
464   67: J        BININT     -1
465   72: K        BININT1    255
466   74: J        BININT     -255
467   79: J        BININT     -256
468   84: M        BININT2    65535
469   87: J        BININT     -65535
470   92: J        BININT     -65536
471   97: J        BININT     2147483647
472  102: J        BININT     -2147483647
473  107: J        BININT     -2147483648
474  112: (        MARK
475  113: X            BINUNICODE 'abc'
476  121: q            BINPUT     4
477  123: h            BINGET     4
478  125: c            GLOBAL     'copy_reg _reconstructor'
479  150: q            BINPUT     5
480  152: (            MARK
481  153: c                GLOBAL     '__main__ C'
482  165: q                BINPUT     6
483  167: c                GLOBAL     '__builtin__ object'
484  187: q                BINPUT     7
485  189: N                NONE
486  190: t                TUPLE      (MARK at 152)
487  191: q            BINPUT     8
488  193: R            REDUCE
489  194: q            BINPUT     9
490  196: }            EMPTY_DICT
491  197: q            BINPUT     10
492  199: (            MARK
493  200: X                BINUNICODE 'foo'
494  208: q                BINPUT     11
495  210: K                BININT1    1
496  212: X                BINUNICODE 'bar'
497  220: q                BINPUT     12
498  222: K                BININT1    2
499  224: u                SETITEMS   (MARK at 199)
500  225: b            BUILD
501  226: h            BINGET     9
502  228: t            TUPLE      (MARK at 112)
503  229: q        BINPUT     13
504  231: h        BINGET     13
505  233: K        BININT1    5
506  235: e        APPENDS    (MARK at 3)
507  236: .    STOP
508highest protocol among opcodes = 1
509"""
510
511DATA2 = (
512    b'\x80\x02]q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c'
513    b'__builtin__\ncomplex\n'
514    b'q\x01G@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00'
515    b'\x86q\x02Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xff'
516    b'J\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff'
517    b'\xff\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00a'
518    b'bcq\x04h\x04c__main__\nC\nq\x05'
519    b')\x81q\x06}q\x07(X\x03\x00\x00\x00fooq\x08K\x01'
520    b'X\x03\x00\x00\x00barq\tK\x02ubh\x06tq\nh'
521    b'\nK\x05e.'
522)
523
524# Disassembly of DATA2
525DATA2_DIS = """\
526    0: \x80 PROTO      2
527    2: ]    EMPTY_LIST
528    3: q    BINPUT     0
529    5: (    MARK
530    6: K        BININT1    0
531    8: K        BININT1    1
532   10: G        BINFLOAT   2.0
533   19: c        GLOBAL     '__builtin__ complex'
534   40: q        BINPUT     1
535   42: G        BINFLOAT   3.0
536   51: G        BINFLOAT   0.0
537   60: \x86     TUPLE2
538   61: q        BINPUT     2
539   63: R        REDUCE
540   64: q        BINPUT     3
541   66: K        BININT1    1
542   68: J        BININT     -1
543   73: K        BININT1    255
544   75: J        BININT     -255
545   80: J        BININT     -256
546   85: M        BININT2    65535
547   88: J        BININT     -65535
548   93: J        BININT     -65536
549   98: J        BININT     2147483647
550  103: J        BININT     -2147483647
551  108: J        BININT     -2147483648
552  113: (        MARK
553  114: X            BINUNICODE 'abc'
554  122: q            BINPUT     4
555  124: h            BINGET     4
556  126: c            GLOBAL     '__main__ C'
557  138: q            BINPUT     5
558  140: )            EMPTY_TUPLE
559  141: \x81         NEWOBJ
560  142: q            BINPUT     6
561  144: }            EMPTY_DICT
562  145: q            BINPUT     7
563  147: (            MARK
564  148: X                BINUNICODE 'foo'
565  156: q                BINPUT     8
566  158: K                BININT1    1
567  160: X                BINUNICODE 'bar'
568  168: q                BINPUT     9
569  170: K                BININT1    2
570  172: u                SETITEMS   (MARK at 147)
571  173: b            BUILD
572  174: h            BINGET     6
573  176: t            TUPLE      (MARK at 113)
574  177: q        BINPUT     10
575  179: h        BINGET     10
576  181: K        BININT1    5
577  183: e        APPENDS    (MARK at 5)
578  184: .    STOP
579highest protocol among opcodes = 2
580"""
581
582DATA3 = (
583    b'\x80\x03]q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c'
584    b'builtins\ncomplex\nq\x01G'
585    b'@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00\x86q\x02'
586    b'Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xffJ\x00\xff'
587    b'\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff\xff\xff\x7f'
588    b'J\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00abcq'
589    b'\x04h\x04c__main__\nC\nq\x05)\x81q'
590    b'\x06}q\x07(X\x03\x00\x00\x00barq\x08K\x02X\x03\x00'
591    b'\x00\x00fooq\tK\x01ubh\x06tq\nh\nK\x05'
592    b'e.'
593)
594
595# Disassembly of DATA3
596DATA3_DIS = """\
597    0: \x80 PROTO      3
598    2: ]    EMPTY_LIST
599    3: q    BINPUT     0
600    5: (    MARK
601    6: K        BININT1    0
602    8: K        BININT1    1
603   10: G        BINFLOAT   2.0
604   19: c        GLOBAL     'builtins complex'
605   37: q        BINPUT     1
606   39: G        BINFLOAT   3.0
607   48: G        BINFLOAT   0.0
608   57: \x86     TUPLE2
609   58: q        BINPUT     2
610   60: R        REDUCE
611   61: q        BINPUT     3
612   63: K        BININT1    1
613   65: J        BININT     -1
614   70: K        BININT1    255
615   72: J        BININT     -255
616   77: J        BININT     -256
617   82: M        BININT2    65535
618   85: J        BININT     -65535
619   90: J        BININT     -65536
620   95: J        BININT     2147483647
621  100: J        BININT     -2147483647
622  105: J        BININT     -2147483648
623  110: (        MARK
624  111: X            BINUNICODE 'abc'
625  119: q            BINPUT     4
626  121: h            BINGET     4
627  123: c            GLOBAL     '__main__ C'
628  135: q            BINPUT     5
629  137: )            EMPTY_TUPLE
630  138: \x81         NEWOBJ
631  139: q            BINPUT     6
632  141: }            EMPTY_DICT
633  142: q            BINPUT     7
634  144: (            MARK
635  145: X                BINUNICODE 'bar'
636  153: q                BINPUT     8
637  155: K                BININT1    2
638  157: X                BINUNICODE 'foo'
639  165: q                BINPUT     9
640  167: K                BININT1    1
641  169: u                SETITEMS   (MARK at 144)
642  170: b            BUILD
643  171: h            BINGET     6
644  173: t            TUPLE      (MARK at 110)
645  174: q        BINPUT     10
646  176: h        BINGET     10
647  178: K        BININT1    5
648  180: e        APPENDS    (MARK at 5)
649  181: .    STOP
650highest protocol among opcodes = 2
651"""
652
653DATA4 = (
654    b'\x80\x04\x95\xa8\x00\x00\x00\x00\x00\x00\x00]\x94(K\x00K\x01G@'
655    b'\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x94\x8c\x07'
656    b'complex\x94\x93\x94G@\x08\x00\x00\x00\x00\x00\x00G'
657    b'\x00\x00\x00\x00\x00\x00\x00\x00\x86\x94R\x94K\x01J\xff\xff\xff\xffK'
658    b'\xffJ\x01\xff\xff\xffJ\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ'
659    b'\x00\x00\xff\xffJ\xff\xff\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80('
660    b'\x8c\x03abc\x94h\x06\x8c\x08__main__\x94\x8c'
661    b'\x01C\x94\x93\x94)\x81\x94}\x94(\x8c\x03bar\x94K\x02\x8c'
662    b'\x03foo\x94K\x01ubh\nt\x94h\x0eK\x05e.'
663)
664
665# Disassembly of DATA4
666DATA4_DIS = """\
667    0: \x80 PROTO      4
668    2: \x95 FRAME      168
669   11: ]    EMPTY_LIST
670   12: \x94 MEMOIZE
671   13: (    MARK
672   14: K        BININT1    0
673   16: K        BININT1    1
674   18: G        BINFLOAT   2.0
675   27: \x8c     SHORT_BINUNICODE 'builtins'
676   37: \x94     MEMOIZE
677   38: \x8c     SHORT_BINUNICODE 'complex'
678   47: \x94     MEMOIZE
679   48: \x93     STACK_GLOBAL
680   49: \x94     MEMOIZE
681   50: G        BINFLOAT   3.0
682   59: G        BINFLOAT   0.0
683   68: \x86     TUPLE2
684   69: \x94     MEMOIZE
685   70: R        REDUCE
686   71: \x94     MEMOIZE
687   72: K        BININT1    1
688   74: J        BININT     -1
689   79: K        BININT1    255
690   81: J        BININT     -255
691   86: J        BININT     -256
692   91: M        BININT2    65535
693   94: J        BININT     -65535
694   99: J        BININT     -65536
695  104: J        BININT     2147483647
696  109: J        BININT     -2147483647
697  114: J        BININT     -2147483648
698  119: (        MARK
699  120: \x8c         SHORT_BINUNICODE 'abc'
700  125: \x94         MEMOIZE
701  126: h            BINGET     6
702  128: \x8c         SHORT_BINUNICODE '__main__'
703  138: \x94         MEMOIZE
704  139: \x8c         SHORT_BINUNICODE 'C'
705  142: \x94         MEMOIZE
706  143: \x93         STACK_GLOBAL
707  144: \x94         MEMOIZE
708  145: )            EMPTY_TUPLE
709  146: \x81         NEWOBJ
710  147: \x94         MEMOIZE
711  148: }            EMPTY_DICT
712  149: \x94         MEMOIZE
713  150: (            MARK
714  151: \x8c             SHORT_BINUNICODE 'bar'
715  156: \x94             MEMOIZE
716  157: K                BININT1    2
717  159: \x8c             SHORT_BINUNICODE 'foo'
718  164: \x94             MEMOIZE
719  165: K                BININT1    1
720  167: u                SETITEMS   (MARK at 150)
721  168: b            BUILD
722  169: h            BINGET     10
723  171: t            TUPLE      (MARK at 119)
724  172: \x94     MEMOIZE
725  173: h        BINGET     14
726  175: K        BININT1    5
727  177: e        APPENDS    (MARK at 13)
728  178: .    STOP
729highest protocol among opcodes = 4
730"""
731
732# set([1,2]) pickled from 2.x with protocol 2
733DATA_SET = b'\x80\x02c__builtin__\nset\nq\x00]q\x01(K\x01K\x02e\x85q\x02Rq\x03.'
734
735# xrange(5) pickled from 2.x with protocol 2
736DATA_XRANGE = b'\x80\x02c__builtin__\nxrange\nq\x00K\x00K\x05K\x01\x87q\x01Rq\x02.'
737
738# a SimpleCookie() object pickled from 2.x with protocol 2
739DATA_COOKIE = (b'\x80\x02cCookie\nSimpleCookie\nq\x00)\x81q\x01U\x03key'
740               b'q\x02cCookie\nMorsel\nq\x03)\x81q\x04(U\x07commentq\x05U'
741               b'\x00q\x06U\x06domainq\x07h\x06U\x06secureq\x08h\x06U\x07'
742               b'expiresq\th\x06U\x07max-ageq\nh\x06U\x07versionq\x0bh\x06U'
743               b'\x04pathq\x0ch\x06U\x08httponlyq\rh\x06u}q\x0e(U\x0b'
744               b'coded_valueq\x0fU\x05valueq\x10h\x10h\x10h\x02h\x02ubs}q\x11b.')
745
746# set([3]) pickled from 2.x with protocol 2
747DATA_SET2 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01K\x03a\x85q\x02Rq\x03.'
748
749python2_exceptions_without_args = (
750    ArithmeticError,
751    AssertionError,
752    AttributeError,
753    BaseException,
754    BufferError,
755    BytesWarning,
756    DeprecationWarning,
757    EOFError,
758    EnvironmentError,
759    Exception,
760    FloatingPointError,
761    FutureWarning,
762    GeneratorExit,
763    IOError,
764    ImportError,
765    ImportWarning,
766    IndentationError,
767    IndexError,
768    KeyError,
769    KeyboardInterrupt,
770    LookupError,
771    MemoryError,
772    NameError,
773    NotImplementedError,
774    OSError,
775    OverflowError,
776    PendingDeprecationWarning,
777    ReferenceError,
778    RuntimeError,
779    RuntimeWarning,
780    # StandardError is gone in Python 3, we map it to Exception
781    StopIteration,
782    SyntaxError,
783    SyntaxWarning,
784    SystemError,
785    SystemExit,
786    TabError,
787    TypeError,
788    UnboundLocalError,
789    UnicodeError,
790    UnicodeWarning,
791    UserWarning,
792    ValueError,
793    Warning,
794    ZeroDivisionError,
795)
796
797exception_pickle = b'\x80\x02cexceptions\n?\nq\x00)Rq\x01.'
798
799# UnicodeEncodeError object pickled from 2.x with protocol 2
800DATA_UEERR = (b'\x80\x02cexceptions\nUnicodeEncodeError\n'
801              b'q\x00(U\x05asciiq\x01X\x03\x00\x00\x00fooq\x02K\x00K\x01'
802              b'U\x03badq\x03tq\x04Rq\x05.')
803
804
805def create_data():
806    c = C()
807    c.foo = 1
808    c.bar = 2
809    x = [0, 1, 2.0, 3.0+0j]
810    # Append some integer test cases at cPickle.c's internal size
811    # cutoffs.
812    uint1max = 0xff
813    uint2max = 0xffff
814    int4max = 0x7fffffff
815    x.extend([1, -1,
816              uint1max, -uint1max, -uint1max-1,
817              uint2max, -uint2max, -uint2max-1,
818               int4max,  -int4max,  -int4max-1])
819    y = ('abc', 'abc', c, c)
820    x.append(y)
821    x.append(y)
822    x.append(5)
823    return x
824
825
826class AbstractUnpickleTests(unittest.TestCase):
827    # Subclass must define self.loads.
828
829    _testdata = create_data()
830
831    def assert_is_copy(self, obj, objcopy, msg=None):
832        """Utility method to verify if two objects are copies of each others.
833        """
834        if msg is None:
835            msg = "{!r} is not a copy of {!r}".format(obj, objcopy)
836        self.assertEqual(obj, objcopy, msg=msg)
837        self.assertIs(type(obj), type(objcopy), msg=msg)
838        if hasattr(obj, '__dict__'):
839            self.assertDictEqual(obj.__dict__, objcopy.__dict__, msg=msg)
840            self.assertIsNot(obj.__dict__, objcopy.__dict__, msg=msg)
841        if hasattr(obj, '__slots__'):
842            self.assertListEqual(obj.__slots__, objcopy.__slots__, msg=msg)
843            for slot in obj.__slots__:
844                self.assertEqual(
845                    hasattr(obj, slot), hasattr(objcopy, slot), msg=msg)
846                self.assertEqual(getattr(obj, slot, None),
847                                 getattr(objcopy, slot, None), msg=msg)
848
849    def check_unpickling_error(self, errors, data):
850        with self.subTest(data=data), \
851             self.assertRaises(errors):
852            try:
853                self.loads(data)
854            except BaseException as exc:
855                if support.verbose > 1:
856                    print('%-32r - %s: %s' %
857                          (data, exc.__class__.__name__, exc))
858                raise
859
860    def test_load_from_data0(self):
861        self.assert_is_copy(self._testdata, self.loads(DATA0))
862
863    def test_load_from_data1(self):
864        self.assert_is_copy(self._testdata, self.loads(DATA1))
865
866    def test_load_from_data2(self):
867        self.assert_is_copy(self._testdata, self.loads(DATA2))
868
869    def test_load_from_data3(self):
870        self.assert_is_copy(self._testdata, self.loads(DATA3))
871
872    def test_load_from_data4(self):
873        self.assert_is_copy(self._testdata, self.loads(DATA4))
874
875    def test_load_classic_instance(self):
876        # See issue5180.  Test loading 2.x pickles that
877        # contain an instance of old style class.
878        for X, args in [(C, ()), (D, ('x',)), (E, ())]:
879            xname = X.__name__.encode('ascii')
880            # Protocol 0 (text mode pickle):
881            """
882             0: (    MARK
883             1: i        INST       '__main__ X' (MARK at 0)
884            13: p    PUT        0
885            16: (    MARK
886            17: d        DICT       (MARK at 16)
887            18: p    PUT        1
888            21: b    BUILD
889            22: .    STOP
890            """
891            pickle0 = (b"(i__main__\n"
892                       b"X\n"
893                       b"p0\n"
894                       b"(dp1\nb.").replace(b'X', xname)
895            self.assert_is_copy(X(*args), self.loads(pickle0))
896
897            # Protocol 1 (binary mode pickle)
898            """
899             0: (    MARK
900             1: c        GLOBAL     '__main__ X'
901            13: q        BINPUT     0
902            15: o        OBJ        (MARK at 0)
903            16: q    BINPUT     1
904            18: }    EMPTY_DICT
905            19: q    BINPUT     2
906            21: b    BUILD
907            22: .    STOP
908            """
909            pickle1 = (b'(c__main__\n'
910                       b'X\n'
911                       b'q\x00oq\x01}q\x02b.').replace(b'X', xname)
912            self.assert_is_copy(X(*args), self.loads(pickle1))
913
914            # Protocol 2 (pickle2 = b'\x80\x02' + pickle1)
915            """
916             0: \x80 PROTO      2
917             2: (    MARK
918             3: c        GLOBAL     '__main__ X'
919            15: q        BINPUT     0
920            17: o        OBJ        (MARK at 2)
921            18: q    BINPUT     1
922            20: }    EMPTY_DICT
923            21: q    BINPUT     2
924            23: b    BUILD
925            24: .    STOP
926            """
927            pickle2 = (b'\x80\x02(c__main__\n'
928                       b'X\n'
929                       b'q\x00oq\x01}q\x02b.').replace(b'X', xname)
930            self.assert_is_copy(X(*args), self.loads(pickle2))
931
932    def test_maxint64(self):
933        maxint64 = (1 << 63) - 1
934        data = b'I' + str(maxint64).encode("ascii") + b'\n.'
935        got = self.loads(data)
936        self.assert_is_copy(maxint64, got)
937
938        # Try too with a bogus literal.
939        data = b'I' + str(maxint64).encode("ascii") + b'JUNK\n.'
940        self.check_unpickling_error(ValueError, data)
941
942    def test_unpickle_from_2x(self):
943        # Unpickle non-trivial data from Python 2.x.
944        loaded = self.loads(DATA_SET)
945        self.assertEqual(loaded, set([1, 2]))
946        loaded = self.loads(DATA_XRANGE)
947        self.assertEqual(type(loaded), type(range(0)))
948        self.assertEqual(list(loaded), list(range(5)))
949        loaded = self.loads(DATA_COOKIE)
950        self.assertEqual(type(loaded), SimpleCookie)
951        self.assertEqual(list(loaded.keys()), ["key"])
952        self.assertEqual(loaded["key"].value, "value")
953
954        # Exception objects without arguments pickled from 2.x with protocol 2
955        for exc in python2_exceptions_without_args:
956            data = exception_pickle.replace(b'?', exc.__name__.encode("ascii"))
957            loaded = self.loads(data)
958            self.assertIs(type(loaded), exc)
959
960        # StandardError is mapped to Exception, test that separately
961        loaded = self.loads(exception_pickle.replace(b'?', b'StandardError'))
962        self.assertIs(type(loaded), Exception)
963
964        loaded = self.loads(DATA_UEERR)
965        self.assertIs(type(loaded), UnicodeEncodeError)
966        self.assertEqual(loaded.object, "foo")
967        self.assertEqual(loaded.encoding, "ascii")
968        self.assertEqual(loaded.start, 0)
969        self.assertEqual(loaded.end, 1)
970        self.assertEqual(loaded.reason, "bad")
971
972    def test_load_python2_str_as_bytes(self):
973        # From Python 2: pickle.dumps('a\x00\xa0', protocol=0)
974        self.assertEqual(self.loads(b"S'a\\x00\\xa0'\n.",
975                                    encoding="bytes"), b'a\x00\xa0')
976        # From Python 2: pickle.dumps('a\x00\xa0', protocol=1)
977        self.assertEqual(self.loads(b'U\x03a\x00\xa0.',
978                                    encoding="bytes"), b'a\x00\xa0')
979        # From Python 2: pickle.dumps('a\x00\xa0', protocol=2)
980        self.assertEqual(self.loads(b'\x80\x02U\x03a\x00\xa0.',
981                                    encoding="bytes"), b'a\x00\xa0')
982
983    def test_load_python2_unicode_as_str(self):
984        # From Python 2: pickle.dumps(u'π', protocol=0)
985        self.assertEqual(self.loads(b'V\\u03c0\n.',
986                                    encoding='bytes'), 'π')
987        # From Python 2: pickle.dumps(u'π', protocol=1)
988        self.assertEqual(self.loads(b'X\x02\x00\x00\x00\xcf\x80.',
989                                    encoding="bytes"), 'π')
990        # From Python 2: pickle.dumps(u'π', protocol=2)
991        self.assertEqual(self.loads(b'\x80\x02X\x02\x00\x00\x00\xcf\x80.',
992                                    encoding="bytes"), 'π')
993
994    def test_load_long_python2_str_as_bytes(self):
995        # From Python 2: pickle.dumps('x' * 300, protocol=1)
996        self.assertEqual(self.loads(pickle.BINSTRING +
997                                    struct.pack("<I", 300) +
998                                    b'x' * 300 + pickle.STOP,
999                                    encoding='bytes'), b'x' * 300)
1000
1001    def test_constants(self):
1002        self.assertIsNone(self.loads(b'N.'))
1003        self.assertIs(self.loads(b'\x88.'), True)
1004        self.assertIs(self.loads(b'\x89.'), False)
1005        self.assertIs(self.loads(b'I01\n.'), True)
1006        self.assertIs(self.loads(b'I00\n.'), False)
1007
1008    def test_empty_bytestring(self):
1009        # issue 11286
1010        empty = self.loads(b'\x80\x03U\x00q\x00.', encoding='koi8-r')
1011        self.assertEqual(empty, '')
1012
1013    def test_short_binbytes(self):
1014        dumped = b'\x80\x03C\x04\xe2\x82\xac\x00.'
1015        self.assertEqual(self.loads(dumped), b'\xe2\x82\xac\x00')
1016
1017    def test_binbytes(self):
1018        dumped = b'\x80\x03B\x04\x00\x00\x00\xe2\x82\xac\x00.'
1019        self.assertEqual(self.loads(dumped), b'\xe2\x82\xac\x00')
1020
1021    @requires_32b
1022    def test_negative_32b_binbytes(self):
1023        # On 32-bit builds, a BINBYTES of 2**31 or more is refused
1024        dumped = b'\x80\x03B\xff\xff\xff\xffxyzq\x00.'
1025        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1026                                    dumped)
1027
1028    @requires_32b
1029    def test_negative_32b_binunicode(self):
1030        # On 32-bit builds, a BINUNICODE of 2**31 or more is refused
1031        dumped = b'\x80\x03X\xff\xff\xff\xffxyzq\x00.'
1032        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1033                                    dumped)
1034
1035    def test_short_binunicode(self):
1036        dumped = b'\x80\x04\x8c\x04\xe2\x82\xac\x00.'
1037        self.assertEqual(self.loads(dumped), '\u20ac\x00')
1038
1039    def test_misc_get(self):
1040        self.check_unpickling_error(pickle.UnpicklingError, b'g0\np0')
1041        self.check_unpickling_error(pickle.UnpicklingError, b'jens:')
1042        self.check_unpickling_error(pickle.UnpicklingError, b'hens:')
1043        self.assert_is_copy([(100,), (100,)],
1044                            self.loads(b'((Kdtp0\nh\x00l.))'))
1045
1046    def test_binbytes8(self):
1047        dumped = b'\x80\x04\x8e\4\0\0\0\0\0\0\0\xe2\x82\xac\x00.'
1048        self.assertEqual(self.loads(dumped), b'\xe2\x82\xac\x00')
1049
1050    def test_binunicode8(self):
1051        dumped = b'\x80\x04\x8d\4\0\0\0\0\0\0\0\xe2\x82\xac\x00.'
1052        self.assertEqual(self.loads(dumped), '\u20ac\x00')
1053
1054    def test_bytearray8(self):
1055        dumped = b'\x80\x05\x96\x03\x00\x00\x00\x00\x00\x00\x00xxx.'
1056        self.assertEqual(self.loads(dumped), bytearray(b'xxx'))
1057
1058    @requires_32b
1059    def test_large_32b_binbytes8(self):
1060        dumped = b'\x80\x04\x8e\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
1061        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1062                                    dumped)
1063
1064    @requires_32b
1065    def test_large_32b_bytearray8(self):
1066        dumped = b'\x80\x05\x96\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
1067        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1068                                    dumped)
1069
1070    @requires_32b
1071    def test_large_32b_binunicode8(self):
1072        dumped = b'\x80\x04\x8d\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
1073        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1074                                    dumped)
1075
1076    def test_get(self):
1077        pickled = b'((lp100000\ng100000\nt.'
1078        unpickled = self.loads(pickled)
1079        self.assertEqual(unpickled, ([],)*2)
1080        self.assertIs(unpickled[0], unpickled[1])
1081
1082    def test_binget(self):
1083        pickled = b'(]q\xffh\xfft.'
1084        unpickled = self.loads(pickled)
1085        self.assertEqual(unpickled, ([],)*2)
1086        self.assertIs(unpickled[0], unpickled[1])
1087
1088    def test_long_binget(self):
1089        pickled = b'(]r\x00\x00\x01\x00j\x00\x00\x01\x00t.'
1090        unpickled = self.loads(pickled)
1091        self.assertEqual(unpickled, ([],)*2)
1092        self.assertIs(unpickled[0], unpickled[1])
1093
1094    def test_dup(self):
1095        pickled = b'((l2t.'
1096        unpickled = self.loads(pickled)
1097        self.assertEqual(unpickled, ([],)*2)
1098        self.assertIs(unpickled[0], unpickled[1])
1099
1100    def test_negative_put(self):
1101        # Issue #12847
1102        dumped = b'Va\np-1\n.'
1103        self.check_unpickling_error(ValueError, dumped)
1104
1105    @requires_32b
1106    def test_negative_32b_binput(self):
1107        # Issue #12847
1108        dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.'
1109        self.check_unpickling_error(ValueError, dumped)
1110
1111    def test_badly_escaped_string(self):
1112        self.check_unpickling_error(ValueError, b"S'\\'\n.")
1113
1114    def test_badly_quoted_string(self):
1115        # Issue #17710
1116        badpickles = [b"S'\n.",
1117                      b'S"\n.',
1118                      b'S\' \n.',
1119                      b'S" \n.',
1120                      b'S\'"\n.',
1121                      b'S"\'\n.',
1122                      b"S' ' \n.",
1123                      b'S" " \n.',
1124                      b"S ''\n.",
1125                      b'S ""\n.',
1126                      b'S \n.',
1127                      b'S\n.',
1128                      b'S.']
1129        for p in badpickles:
1130            self.check_unpickling_error(pickle.UnpicklingError, p)
1131
1132    def test_correctly_quoted_string(self):
1133        goodpickles = [(b"S''\n.", ''),
1134                       (b'S""\n.', ''),
1135                       (b'S"\\n"\n.', '\n'),
1136                       (b"S'\\n'\n.", '\n')]
1137        for p, expected in goodpickles:
1138            self.assertEqual(self.loads(p), expected)
1139
1140    def test_frame_readline(self):
1141        pickled = b'\x80\x04\x95\x05\x00\x00\x00\x00\x00\x00\x00I42\n.'
1142        #    0: \x80 PROTO      4
1143        #    2: \x95 FRAME      5
1144        #   11: I    INT        42
1145        #   15: .    STOP
1146        self.assertEqual(self.loads(pickled), 42)
1147
1148    def test_compat_unpickle(self):
1149        # xrange(1, 7)
1150        pickled = b'\x80\x02c__builtin__\nxrange\nK\x01K\x07K\x01\x87R.'
1151        unpickled = self.loads(pickled)
1152        self.assertIs(type(unpickled), range)
1153        self.assertEqual(unpickled, range(1, 7))
1154        self.assertEqual(list(unpickled), [1, 2, 3, 4, 5, 6])
1155        # reduce
1156        pickled = b'\x80\x02c__builtin__\nreduce\n.'
1157        self.assertIs(self.loads(pickled), functools.reduce)
1158        # whichdb.whichdb
1159        pickled = b'\x80\x02cwhichdb\nwhichdb\n.'
1160        self.assertIs(self.loads(pickled), dbm.whichdb)
1161        # Exception(), StandardError()
1162        for name in (b'Exception', b'StandardError'):
1163            pickled = (b'\x80\x02cexceptions\n' + name + b'\nU\x03ugh\x85R.')
1164            unpickled = self.loads(pickled)
1165            self.assertIs(type(unpickled), Exception)
1166            self.assertEqual(str(unpickled), 'ugh')
1167        # UserDict.UserDict({1: 2}), UserDict.IterableUserDict({1: 2})
1168        for name in (b'UserDict', b'IterableUserDict'):
1169            pickled = (b'\x80\x02(cUserDict\n' + name +
1170                       b'\no}U\x04data}K\x01K\x02ssb.')
1171            unpickled = self.loads(pickled)
1172            self.assertIs(type(unpickled), collections.UserDict)
1173            self.assertEqual(unpickled, collections.UserDict({1: 2}))
1174
1175    def test_bad_reduce(self):
1176        self.assertEqual(self.loads(b'cbuiltins\nint\n)R.'), 0)
1177        self.check_unpickling_error(TypeError, b'N)R.')
1178        self.check_unpickling_error(TypeError, b'cbuiltins\nint\nNR.')
1179
1180    def test_bad_newobj(self):
1181        error = (pickle.UnpicklingError, TypeError)
1182        self.assertEqual(self.loads(b'cbuiltins\nint\n)\x81.'), 0)
1183        self.check_unpickling_error(error, b'cbuiltins\nlen\n)\x81.')
1184        self.check_unpickling_error(error, b'cbuiltins\nint\nN\x81.')
1185
1186    def test_bad_newobj_ex(self):
1187        error = (pickle.UnpicklingError, TypeError)
1188        self.assertEqual(self.loads(b'cbuiltins\nint\n)}\x92.'), 0)
1189        self.check_unpickling_error(error, b'cbuiltins\nlen\n)}\x92.')
1190        self.check_unpickling_error(error, b'cbuiltins\nint\nN}\x92.')
1191        self.check_unpickling_error(error, b'cbuiltins\nint\n)N\x92.')
1192
1193    def test_bad_stack(self):
1194        badpickles = [
1195            b'.',                       # STOP
1196            b'0',                       # POP
1197            b'1',                       # POP_MARK
1198            b'2',                       # DUP
1199            b'(2',
1200            b'R',                       # REDUCE
1201            b')R',
1202            b'a',                       # APPEND
1203            b'Na',
1204            b'b',                       # BUILD
1205            b'Nb',
1206            b'd',                       # DICT
1207            b'e',                       # APPENDS
1208            b'(e',
1209            b'ibuiltins\nlist\n',       # INST
1210            b'l',                       # LIST
1211            b'o',                       # OBJ
1212            b'(o',
1213            b'p1\n',                    # PUT
1214            b'q\x00',                   # BINPUT
1215            b'r\x00\x00\x00\x00',       # LONG_BINPUT
1216            b's',                       # SETITEM
1217            b'Ns',
1218            b'NNs',
1219            b't',                       # TUPLE
1220            b'u',                       # SETITEMS
1221            b'(u',
1222            b'}(Nu',
1223            b'\x81',                    # NEWOBJ
1224            b')\x81',
1225            b'\x85',                    # TUPLE1
1226            b'\x86',                    # TUPLE2
1227            b'N\x86',
1228            b'\x87',                    # TUPLE3
1229            b'N\x87',
1230            b'NN\x87',
1231            b'\x90',                    # ADDITEMS
1232            b'(\x90',
1233            b'\x91',                    # FROZENSET
1234            b'\x92',                    # NEWOBJ_EX
1235            b')}\x92',
1236            b'\x93',                    # STACK_GLOBAL
1237            b'Vlist\n\x93',
1238            b'\x94',                    # MEMOIZE
1239        ]
1240        for p in badpickles:
1241            self.check_unpickling_error(self.bad_stack_errors, p)
1242
1243    def test_bad_mark(self):
1244        badpickles = [
1245            b'N(.',                     # STOP
1246            b'N(2',                     # DUP
1247            b'cbuiltins\nlist\n)(R',    # REDUCE
1248            b'cbuiltins\nlist\n()R',
1249            b']N(a',                    # APPEND
1250                                        # BUILD
1251            b'cbuiltins\nValueError\n)R}(b',
1252            b'cbuiltins\nValueError\n)R(}b',
1253            b'(Nd',                     # DICT
1254            b'N(p1\n',                  # PUT
1255            b'N(q\x00',                 # BINPUT
1256            b'N(r\x00\x00\x00\x00',     # LONG_BINPUT
1257            b'}NN(s',                   # SETITEM
1258            b'}N(Ns',
1259            b'}(NNs',
1260            b'}((u',                    # SETITEMS
1261            b'cbuiltins\nlist\n)(\x81', # NEWOBJ
1262            b'cbuiltins\nlist\n()\x81',
1263            b'N(\x85',                  # TUPLE1
1264            b'NN(\x86',                 # TUPLE2
1265            b'N(N\x86',
1266            b'NNN(\x87',                # TUPLE3
1267            b'NN(N\x87',
1268            b'N(NN\x87',
1269            b']((\x90',                 # ADDITEMS
1270                                        # NEWOBJ_EX
1271            b'cbuiltins\nlist\n)}(\x92',
1272            b'cbuiltins\nlist\n)(}\x92',
1273            b'cbuiltins\nlist\n()}\x92',
1274                                        # STACK_GLOBAL
1275            b'Vbuiltins\n(Vlist\n\x93',
1276            b'Vbuiltins\nVlist\n(\x93',
1277            b'N(\x94',                  # MEMOIZE
1278        ]
1279        for p in badpickles:
1280            self.check_unpickling_error(self.bad_stack_errors, p)
1281
1282    def test_truncated_data(self):
1283        self.check_unpickling_error(EOFError, b'')
1284        self.check_unpickling_error(EOFError, b'N')
1285        badpickles = [
1286            b'B',                       # BINBYTES
1287            b'B\x03\x00\x00',
1288            b'B\x03\x00\x00\x00',
1289            b'B\x03\x00\x00\x00ab',
1290            b'C',                       # SHORT_BINBYTES
1291            b'C\x03',
1292            b'C\x03ab',
1293            b'F',                       # FLOAT
1294            b'F0.0',
1295            b'F0.00',
1296            b'G',                       # BINFLOAT
1297            b'G\x00\x00\x00\x00\x00\x00\x00',
1298            b'I',                       # INT
1299            b'I0',
1300            b'J',                       # BININT
1301            b'J\x00\x00\x00',
1302            b'K',                       # BININT1
1303            b'L',                       # LONG
1304            b'L0',
1305            b'L10',
1306            b'L0L',
1307            b'L10L',
1308            b'M',                       # BININT2
1309            b'M\x00',
1310            # b'P',                       # PERSID
1311            # b'Pabc',
1312            b'S',                       # STRING
1313            b"S'abc'",
1314            b'T',                       # BINSTRING
1315            b'T\x03\x00\x00',
1316            b'T\x03\x00\x00\x00',
1317            b'T\x03\x00\x00\x00ab',
1318            b'U',                       # SHORT_BINSTRING
1319            b'U\x03',
1320            b'U\x03ab',
1321            b'V',                       # UNICODE
1322            b'Vabc',
1323            b'X',                       # BINUNICODE
1324            b'X\x03\x00\x00',
1325            b'X\x03\x00\x00\x00',
1326            b'X\x03\x00\x00\x00ab',
1327            b'(c',                      # GLOBAL
1328            b'(cbuiltins',
1329            b'(cbuiltins\n',
1330            b'(cbuiltins\nlist',
1331            b'Ng',                      # GET
1332            b'Ng0',
1333            b'(i',                      # INST
1334            b'(ibuiltins',
1335            b'(ibuiltins\n',
1336            b'(ibuiltins\nlist',
1337            b'Nh',                      # BINGET
1338            b'Nj',                      # LONG_BINGET
1339            b'Nj\x00\x00\x00',
1340            b'Np',                      # PUT
1341            b'Np0',
1342            b'Nq',                      # BINPUT
1343            b'Nr',                      # LONG_BINPUT
1344            b'Nr\x00\x00\x00',
1345            b'\x80',                    # PROTO
1346            b'\x82',                    # EXT1
1347            b'\x83',                    # EXT2
1348            b'\x84\x01',
1349            b'\x84',                    # EXT4
1350            b'\x84\x01\x00\x00',
1351            b'\x8a',                    # LONG1
1352            b'\x8b',                    # LONG4
1353            b'\x8b\x00\x00\x00',
1354            b'\x8c',                    # SHORT_BINUNICODE
1355            b'\x8c\x03',
1356            b'\x8c\x03ab',
1357            b'\x8d',                    # BINUNICODE8
1358            b'\x8d\x03\x00\x00\x00\x00\x00\x00',
1359            b'\x8d\x03\x00\x00\x00\x00\x00\x00\x00',
1360            b'\x8d\x03\x00\x00\x00\x00\x00\x00\x00ab',
1361            b'\x8e',                    # BINBYTES8
1362            b'\x8e\x03\x00\x00\x00\x00\x00\x00',
1363            b'\x8e\x03\x00\x00\x00\x00\x00\x00\x00',
1364            b'\x8e\x03\x00\x00\x00\x00\x00\x00\x00ab',
1365            b'\x96',                    # BYTEARRAY8
1366            b'\x96\x03\x00\x00\x00\x00\x00\x00',
1367            b'\x96\x03\x00\x00\x00\x00\x00\x00\x00',
1368            b'\x96\x03\x00\x00\x00\x00\x00\x00\x00ab',
1369            b'\x95',                    # FRAME
1370            b'\x95\x02\x00\x00\x00\x00\x00\x00',
1371            b'\x95\x02\x00\x00\x00\x00\x00\x00\x00',
1372            b'\x95\x02\x00\x00\x00\x00\x00\x00\x00N',
1373        ]
1374        for p in badpickles:
1375            self.check_unpickling_error(self.truncated_errors, p)
1376
1377    @reap_threads
1378    def test_unpickle_module_race(self):
1379        # https://bugs.python.org/issue34572
1380        locker_module = dedent("""
1381        import threading
1382        barrier = threading.Barrier(2)
1383        """)
1384        locking_import_module = dedent("""
1385        import locker
1386        locker.barrier.wait()
1387        class ToBeUnpickled(object):
1388            pass
1389        """)
1390
1391        os.mkdir(TESTFN)
1392        self.addCleanup(shutil.rmtree, TESTFN)
1393        sys.path.insert(0, TESTFN)
1394        self.addCleanup(sys.path.remove, TESTFN)
1395        with open(os.path.join(TESTFN, "locker.py"), "wb") as f:
1396            f.write(locker_module.encode('utf-8'))
1397        with open(os.path.join(TESTFN, "locking_import.py"), "wb") as f:
1398            f.write(locking_import_module.encode('utf-8'))
1399        self.addCleanup(forget, "locker")
1400        self.addCleanup(forget, "locking_import")
1401
1402        import locker
1403
1404        pickle_bytes = (
1405            b'\x80\x03clocking_import\nToBeUnpickled\nq\x00)\x81q\x01.')
1406
1407        # Then try to unpickle two of these simultaneously
1408        # One of them will cause the module import, and we want it to block
1409        # until the other one either:
1410        #   - fails (before the patch for this issue)
1411        #   - blocks on the import lock for the module, as it should
1412        results = []
1413        barrier = threading.Barrier(3)
1414        def t():
1415            # This ensures the threads have all started
1416            # presumably barrier release is faster than thread startup
1417            barrier.wait()
1418            results.append(pickle.loads(pickle_bytes))
1419
1420        t1 = threading.Thread(target=t)
1421        t2 = threading.Thread(target=t)
1422        t1.start()
1423        t2.start()
1424
1425        barrier.wait()
1426        # could have delay here
1427        locker.barrier.wait()
1428
1429        t1.join()
1430        t2.join()
1431
1432        from locking_import import ToBeUnpickled
1433        self.assertEqual(
1434            [type(x) for x in results],
1435            [ToBeUnpickled] * 2)
1436
1437
1438
1439class AbstractPickleTests(unittest.TestCase):
1440    # Subclass must define self.dumps, self.loads.
1441
1442    optimized = False
1443
1444    _testdata = AbstractUnpickleTests._testdata
1445
1446    def setUp(self):
1447        pass
1448
1449    assert_is_copy = AbstractUnpickleTests.assert_is_copy
1450
1451    def test_misc(self):
1452        # test various datatypes not tested by testdata
1453        for proto in protocols:
1454            x = myint(4)
1455            s = self.dumps(x, proto)
1456            y = self.loads(s)
1457            self.assert_is_copy(x, y)
1458
1459            x = (1, ())
1460            s = self.dumps(x, proto)
1461            y = self.loads(s)
1462            self.assert_is_copy(x, y)
1463
1464            x = initarg(1, x)
1465            s = self.dumps(x, proto)
1466            y = self.loads(s)
1467            self.assert_is_copy(x, y)
1468
1469        # XXX test __reduce__ protocol?
1470
1471    def test_roundtrip_equality(self):
1472        expected = self._testdata
1473        for proto in protocols:
1474            s = self.dumps(expected, proto)
1475            got = self.loads(s)
1476            self.assert_is_copy(expected, got)
1477
1478    # There are gratuitous differences between pickles produced by
1479    # pickle and cPickle, largely because cPickle starts PUT indices at
1480    # 1 and pickle starts them at 0.  See XXX comment in cPickle's put2() --
1481    # there's a comment with an exclamation point there whose meaning
1482    # is a mystery.  cPickle also suppresses PUT for objects with a refcount
1483    # of 1.
1484    def dont_test_disassembly(self):
1485        from io import StringIO
1486        from pickletools import dis
1487
1488        for proto, expected in (0, DATA0_DIS), (1, DATA1_DIS):
1489            s = self.dumps(self._testdata, proto)
1490            filelike = StringIO()
1491            dis(s, out=filelike)
1492            got = filelike.getvalue()
1493            self.assertEqual(expected, got)
1494
1495    def test_recursive_list(self):
1496        l = []
1497        l.append(l)
1498        for proto in protocols:
1499            s = self.dumps(l, proto)
1500            x = self.loads(s)
1501            self.assertIsInstance(x, list)
1502            self.assertEqual(len(x), 1)
1503            self.assertIs(x[0], x)
1504
1505    def test_recursive_tuple_and_list(self):
1506        t = ([],)
1507        t[0].append(t)
1508        for proto in protocols:
1509            s = self.dumps(t, proto)
1510            x = self.loads(s)
1511            self.assertIsInstance(x, tuple)
1512            self.assertEqual(len(x), 1)
1513            self.assertIsInstance(x[0], list)
1514            self.assertEqual(len(x[0]), 1)
1515            self.assertIs(x[0][0], x)
1516
1517    def test_recursive_dict(self):
1518        d = {}
1519        d[1] = d
1520        for proto in protocols:
1521            s = self.dumps(d, proto)
1522            x = self.loads(s)
1523            self.assertIsInstance(x, dict)
1524            self.assertEqual(list(x.keys()), [1])
1525            self.assertIs(x[1], x)
1526
1527    def test_recursive_dict_key(self):
1528        d = {}
1529        k = K(d)
1530        d[k] = 1
1531        for proto in protocols:
1532            s = self.dumps(d, proto)
1533            x = self.loads(s)
1534            self.assertIsInstance(x, dict)
1535            self.assertEqual(len(x.keys()), 1)
1536            self.assertIsInstance(list(x.keys())[0], K)
1537            self.assertIs(list(x.keys())[0].value, x)
1538
1539    def test_recursive_set(self):
1540        y = set()
1541        k = K(y)
1542        y.add(k)
1543        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
1544            s = self.dumps(y, proto)
1545            x = self.loads(s)
1546            self.assertIsInstance(x, set)
1547            self.assertEqual(len(x), 1)
1548            self.assertIsInstance(list(x)[0], K)
1549            self.assertIs(list(x)[0].value, x)
1550
1551    def test_recursive_list_subclass(self):
1552        y = MyList()
1553        y.append(y)
1554        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
1555            s = self.dumps(y, proto)
1556            x = self.loads(s)
1557            self.assertIsInstance(x, MyList)
1558            self.assertEqual(len(x), 1)
1559            self.assertIs(x[0], x)
1560
1561    def test_recursive_dict_subclass(self):
1562        d = MyDict()
1563        d[1] = d
1564        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
1565            s = self.dumps(d, proto)
1566            x = self.loads(s)
1567            self.assertIsInstance(x, MyDict)
1568            self.assertEqual(list(x.keys()), [1])
1569            self.assertIs(x[1], x)
1570
1571    def test_recursive_dict_subclass_key(self):
1572        d = MyDict()
1573        k = K(d)
1574        d[k] = 1
1575        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
1576            s = self.dumps(d, proto)
1577            x = self.loads(s)
1578            self.assertIsInstance(x, MyDict)
1579            self.assertEqual(len(list(x.keys())), 1)
1580            self.assertIsInstance(list(x.keys())[0], K)
1581            self.assertIs(list(x.keys())[0].value, x)
1582
1583    def test_recursive_inst(self):
1584        i = C()
1585        i.attr = i
1586        for proto in protocols:
1587            s = self.dumps(i, proto)
1588            x = self.loads(s)
1589            self.assertIsInstance(x, C)
1590            self.assertEqual(dir(x), dir(i))
1591            self.assertIs(x.attr, x)
1592
1593    def test_recursive_multi(self):
1594        l = []
1595        d = {1:l}
1596        i = C()
1597        i.attr = d
1598        l.append(i)
1599        for proto in protocols:
1600            s = self.dumps(l, proto)
1601            x = self.loads(s)
1602            self.assertIsInstance(x, list)
1603            self.assertEqual(len(x), 1)
1604            self.assertEqual(dir(x[0]), dir(i))
1605            self.assertEqual(list(x[0].attr.keys()), [1])
1606            self.assertTrue(x[0].attr[1] is x)
1607
1608    def check_recursive_collection_and_inst(self, factory):
1609        h = H()
1610        y = factory([h])
1611        h.attr = y
1612        for proto in protocols:
1613            s = self.dumps(y, proto)
1614            x = self.loads(s)
1615            self.assertIsInstance(x, type(y))
1616            self.assertEqual(len(x), 1)
1617            self.assertIsInstance(list(x)[0], H)
1618            self.assertIs(list(x)[0].attr, x)
1619
1620    def test_recursive_list_and_inst(self):
1621        self.check_recursive_collection_and_inst(list)
1622
1623    def test_recursive_tuple_and_inst(self):
1624        self.check_recursive_collection_and_inst(tuple)
1625
1626    def test_recursive_dict_and_inst(self):
1627        self.check_recursive_collection_and_inst(dict.fromkeys)
1628
1629    def test_recursive_set_and_inst(self):
1630        self.check_recursive_collection_and_inst(set)
1631
1632    def test_recursive_frozenset_and_inst(self):
1633        self.check_recursive_collection_and_inst(frozenset)
1634
1635    def test_recursive_list_subclass_and_inst(self):
1636        self.check_recursive_collection_and_inst(MyList)
1637
1638    def test_recursive_tuple_subclass_and_inst(self):
1639        self.check_recursive_collection_and_inst(MyTuple)
1640
1641    def test_recursive_dict_subclass_and_inst(self):
1642        self.check_recursive_collection_and_inst(MyDict.fromkeys)
1643
1644    def test_recursive_set_subclass_and_inst(self):
1645        self.check_recursive_collection_and_inst(MySet)
1646
1647    def test_recursive_frozenset_subclass_and_inst(self):
1648        self.check_recursive_collection_and_inst(MyFrozenSet)
1649
1650    def test_unicode(self):
1651        endcases = ['', '<\\u>', '<\\\u1234>', '<\n>',
1652                    '<\\>', '<\\\U00012345>',
1653                    # surrogates
1654                    '<\udc80>']
1655        for proto in protocols:
1656            for u in endcases:
1657                p = self.dumps(u, proto)
1658                u2 = self.loads(p)
1659                self.assert_is_copy(u, u2)
1660
1661    def test_unicode_high_plane(self):
1662        t = '\U00012345'
1663        for proto in protocols:
1664            p = self.dumps(t, proto)
1665            t2 = self.loads(p)
1666            self.assert_is_copy(t, t2)
1667
1668    def test_bytes(self):
1669        for proto in protocols:
1670            for s in b'', b'xyz', b'xyz'*100:
1671                p = self.dumps(s, proto)
1672                self.assert_is_copy(s, self.loads(p))
1673            for s in [bytes([i]) for i in range(256)]:
1674                p = self.dumps(s, proto)
1675                self.assert_is_copy(s, self.loads(p))
1676            for s in [bytes([i, i]) for i in range(256)]:
1677                p = self.dumps(s, proto)
1678                self.assert_is_copy(s, self.loads(p))
1679
1680    def test_bytearray(self):
1681        for proto in protocols:
1682            for s in b'', b'xyz', b'xyz'*100:
1683                b = bytearray(s)
1684                p = self.dumps(b, proto)
1685                bb = self.loads(p)
1686                self.assertIsNot(bb, b)
1687                self.assert_is_copy(b, bb)
1688                if proto <= 3:
1689                    # bytearray is serialized using a global reference
1690                    self.assertIn(b'bytearray', p)
1691                    self.assertTrue(opcode_in_pickle(pickle.GLOBAL, p))
1692                elif proto == 4:
1693                    self.assertIn(b'bytearray', p)
1694                    self.assertTrue(opcode_in_pickle(pickle.STACK_GLOBAL, p))
1695                elif proto == 5:
1696                    self.assertNotIn(b'bytearray', p)
1697                    self.assertTrue(opcode_in_pickle(pickle.BYTEARRAY8, p))
1698
1699    def test_ints(self):
1700        for proto in protocols:
1701            n = sys.maxsize
1702            while n:
1703                for expected in (-n, n):
1704                    s = self.dumps(expected, proto)
1705                    n2 = self.loads(s)
1706                    self.assert_is_copy(expected, n2)
1707                n = n >> 1
1708
1709    def test_long(self):
1710        for proto in protocols:
1711            # 256 bytes is where LONG4 begins.
1712            for nbits in 1, 8, 8*254, 8*255, 8*256, 8*257:
1713                nbase = 1 << nbits
1714                for npos in nbase-1, nbase, nbase+1:
1715                    for n in npos, -npos:
1716                        pickle = self.dumps(n, proto)
1717                        got = self.loads(pickle)
1718                        self.assert_is_copy(n, got)
1719        # Try a monster.  This is quadratic-time in protos 0 & 1, so don't
1720        # bother with those.
1721        nbase = int("deadbeeffeedface", 16)
1722        nbase += nbase << 1000000
1723        for n in nbase, -nbase:
1724            p = self.dumps(n, 2)
1725            got = self.loads(p)
1726            # assert_is_copy is very expensive here as it precomputes
1727            # a failure message by computing the repr() of n and got,
1728            # we just do the check ourselves.
1729            self.assertIs(type(got), int)
1730            self.assertEqual(n, got)
1731
1732    def test_float(self):
1733        test_values = [0.0, 4.94e-324, 1e-310, 7e-308, 6.626e-34, 0.1, 0.5,
1734                       3.14, 263.44582062374053, 6.022e23, 1e30]
1735        test_values = test_values + [-x for x in test_values]
1736        for proto in protocols:
1737            for value in test_values:
1738                pickle = self.dumps(value, proto)
1739                got = self.loads(pickle)
1740                self.assert_is_copy(value, got)
1741
1742    @run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
1743    def test_float_format(self):
1744        # make sure that floats are formatted locale independent with proto 0
1745        self.assertEqual(self.dumps(1.2, 0)[0:3], b'F1.')
1746
1747    def test_reduce(self):
1748        for proto in protocols:
1749            inst = AAA()
1750            dumped = self.dumps(inst, proto)
1751            loaded = self.loads(dumped)
1752            self.assertEqual(loaded, REDUCE_A)
1753
1754    def test_getinitargs(self):
1755        for proto in protocols:
1756            inst = initarg(1, 2)
1757            dumped = self.dumps(inst, proto)
1758            loaded = self.loads(dumped)
1759            self.assert_is_copy(inst, loaded)
1760
1761    def test_metaclass(self):
1762        a = use_metaclass()
1763        for proto in protocols:
1764            s = self.dumps(a, proto)
1765            b = self.loads(s)
1766            self.assertEqual(a.__class__, b.__class__)
1767
1768    def test_dynamic_class(self):
1769        a = create_dynamic_class("my_dynamic_class", (object,))
1770        copyreg.pickle(pickling_metaclass, pickling_metaclass.__reduce__)
1771        for proto in protocols:
1772            s = self.dumps(a, proto)
1773            b = self.loads(s)
1774            self.assertEqual(a, b)
1775            self.assertIs(type(a), type(b))
1776
1777    def test_structseq(self):
1778        import time
1779        import os
1780
1781        t = time.localtime()
1782        for proto in protocols:
1783            s = self.dumps(t, proto)
1784            u = self.loads(s)
1785            self.assert_is_copy(t, u)
1786            t = os.stat(os.curdir)
1787            s = self.dumps(t, proto)
1788            u = self.loads(s)
1789            self.assert_is_copy(t, u)
1790            if hasattr(os, "statvfs"):
1791                t = os.statvfs(os.curdir)
1792                s = self.dumps(t, proto)
1793                u = self.loads(s)
1794                self.assert_is_copy(t, u)
1795
1796    def test_ellipsis(self):
1797        for proto in protocols:
1798            s = self.dumps(..., proto)
1799            u = self.loads(s)
1800            self.assertIs(..., u)
1801
1802    def test_notimplemented(self):
1803        for proto in protocols:
1804            s = self.dumps(NotImplemented, proto)
1805            u = self.loads(s)
1806            self.assertIs(NotImplemented, u)
1807
1808    def test_singleton_types(self):
1809        # Issue #6477: Test that types of built-in singletons can be pickled.
1810        singletons = [None, ..., NotImplemented]
1811        for singleton in singletons:
1812            for proto in protocols:
1813                s = self.dumps(type(singleton), proto)
1814                u = self.loads(s)
1815                self.assertIs(type(singleton), u)
1816
1817    # Tests for protocol 2
1818
1819    def test_proto(self):
1820        for proto in protocols:
1821            pickled = self.dumps(None, proto)
1822            if proto >= 2:
1823                proto_header = pickle.PROTO + bytes([proto])
1824                self.assertTrue(pickled.startswith(proto_header))
1825            else:
1826                self.assertEqual(count_opcode(pickle.PROTO, pickled), 0)
1827
1828        oob = protocols[-1] + 1     # a future protocol
1829        build_none = pickle.NONE + pickle.STOP
1830        badpickle = pickle.PROTO + bytes([oob]) + build_none
1831        try:
1832            self.loads(badpickle)
1833        except ValueError as err:
1834            self.assertIn("unsupported pickle protocol", str(err))
1835        else:
1836            self.fail("expected bad protocol number to raise ValueError")
1837
1838    def test_long1(self):
1839        x = 12345678910111213141516178920
1840        for proto in protocols:
1841            s = self.dumps(x, proto)
1842            y = self.loads(s)
1843            self.assert_is_copy(x, y)
1844            self.assertEqual(opcode_in_pickle(pickle.LONG1, s), proto >= 2)
1845
1846    def test_long4(self):
1847        x = 12345678910111213141516178920 << (256*8)
1848        for proto in protocols:
1849            s = self.dumps(x, proto)
1850            y = self.loads(s)
1851            self.assert_is_copy(x, y)
1852            self.assertEqual(opcode_in_pickle(pickle.LONG4, s), proto >= 2)
1853
1854    def test_short_tuples(self):
1855        # Map (proto, len(tuple)) to expected opcode.
1856        expected_opcode = {(0, 0): pickle.TUPLE,
1857                           (0, 1): pickle.TUPLE,
1858                           (0, 2): pickle.TUPLE,
1859                           (0, 3): pickle.TUPLE,
1860                           (0, 4): pickle.TUPLE,
1861
1862                           (1, 0): pickle.EMPTY_TUPLE,
1863                           (1, 1): pickle.TUPLE,
1864                           (1, 2): pickle.TUPLE,
1865                           (1, 3): pickle.TUPLE,
1866                           (1, 4): pickle.TUPLE,
1867
1868                           (2, 0): pickle.EMPTY_TUPLE,
1869                           (2, 1): pickle.TUPLE1,
1870                           (2, 2): pickle.TUPLE2,
1871                           (2, 3): pickle.TUPLE3,
1872                           (2, 4): pickle.TUPLE,
1873
1874                           (3, 0): pickle.EMPTY_TUPLE,
1875                           (3, 1): pickle.TUPLE1,
1876                           (3, 2): pickle.TUPLE2,
1877                           (3, 3): pickle.TUPLE3,
1878                           (3, 4): pickle.TUPLE,
1879                          }
1880        a = ()
1881        b = (1,)
1882        c = (1, 2)
1883        d = (1, 2, 3)
1884        e = (1, 2, 3, 4)
1885        for proto in protocols:
1886            for x in a, b, c, d, e:
1887                s = self.dumps(x, proto)
1888                y = self.loads(s)
1889                self.assert_is_copy(x, y)
1890                expected = expected_opcode[min(proto, 3), len(x)]
1891                self.assertTrue(opcode_in_pickle(expected, s))
1892
1893    def test_singletons(self):
1894        # Map (proto, singleton) to expected opcode.
1895        expected_opcode = {(0, None): pickle.NONE,
1896                           (1, None): pickle.NONE,
1897                           (2, None): pickle.NONE,
1898                           (3, None): pickle.NONE,
1899
1900                           (0, True): pickle.INT,
1901                           (1, True): pickle.INT,
1902                           (2, True): pickle.NEWTRUE,
1903                           (3, True): pickle.NEWTRUE,
1904
1905                           (0, False): pickle.INT,
1906                           (1, False): pickle.INT,
1907                           (2, False): pickle.NEWFALSE,
1908                           (3, False): pickle.NEWFALSE,
1909                          }
1910        for proto in protocols:
1911            for x in None, False, True:
1912                s = self.dumps(x, proto)
1913                y = self.loads(s)
1914                self.assertTrue(x is y, (proto, x, s, y))
1915                expected = expected_opcode[min(proto, 3), x]
1916                self.assertTrue(opcode_in_pickle(expected, s))
1917
1918    def test_newobj_tuple(self):
1919        x = MyTuple([1, 2, 3])
1920        x.foo = 42
1921        x.bar = "hello"
1922        for proto in protocols:
1923            s = self.dumps(x, proto)
1924            y = self.loads(s)
1925            self.assert_is_copy(x, y)
1926
1927    def test_newobj_list(self):
1928        x = MyList([1, 2, 3])
1929        x.foo = 42
1930        x.bar = "hello"
1931        for proto in protocols:
1932            s = self.dumps(x, proto)
1933            y = self.loads(s)
1934            self.assert_is_copy(x, y)
1935
1936    def test_newobj_generic(self):
1937        for proto in protocols:
1938            for C in myclasses:
1939                B = C.__base__
1940                x = C(C.sample)
1941                x.foo = 42
1942                s = self.dumps(x, proto)
1943                y = self.loads(s)
1944                detail = (proto, C, B, x, y, type(y))
1945                self.assert_is_copy(x, y) # XXX revisit
1946                self.assertEqual(B(x), B(y), detail)
1947                self.assertEqual(x.__dict__, y.__dict__, detail)
1948
1949    def test_newobj_proxies(self):
1950        # NEWOBJ should use the __class__ rather than the raw type
1951        classes = myclasses[:]
1952        # Cannot create weakproxies to these classes
1953        for c in (MyInt, MyTuple):
1954            classes.remove(c)
1955        for proto in protocols:
1956            for C in classes:
1957                B = C.__base__
1958                x = C(C.sample)
1959                x.foo = 42
1960                p = weakref.proxy(x)
1961                s = self.dumps(p, proto)
1962                y = self.loads(s)
1963                self.assertEqual(type(y), type(x))  # rather than type(p)
1964                detail = (proto, C, B, x, y, type(y))
1965                self.assertEqual(B(x), B(y), detail)
1966                self.assertEqual(x.__dict__, y.__dict__, detail)
1967
1968    def test_newobj_overridden_new(self):
1969        # Test that Python class with C implemented __new__ is pickleable
1970        for proto in protocols:
1971            x = MyIntWithNew2(1)
1972            x.foo = 42
1973            s = self.dumps(x, proto)
1974            y = self.loads(s)
1975            self.assertIs(type(y), MyIntWithNew2)
1976            self.assertEqual(int(y), 1)
1977            self.assertEqual(y.foo, 42)
1978
1979    def test_newobj_not_class(self):
1980        # Issue 24552
1981        global SimpleNewObj
1982        save = SimpleNewObj
1983        o = SimpleNewObj.__new__(SimpleNewObj)
1984        b = self.dumps(o, 4)
1985        try:
1986            SimpleNewObj = 42
1987            self.assertRaises((TypeError, pickle.UnpicklingError), self.loads, b)
1988        finally:
1989            SimpleNewObj = save
1990
1991    # Register a type with copyreg, with extension code extcode.  Pickle
1992    # an object of that type.  Check that the resulting pickle uses opcode
1993    # (EXT[124]) under proto 2, and not in proto 1.
1994
1995    def produce_global_ext(self, extcode, opcode):
1996        e = ExtensionSaver(extcode)
1997        try:
1998            copyreg.add_extension(__name__, "MyList", extcode)
1999            x = MyList([1, 2, 3])
2000            x.foo = 42
2001            x.bar = "hello"
2002
2003            # Dump using protocol 1 for comparison.
2004            s1 = self.dumps(x, 1)
2005            self.assertIn(__name__.encode("utf-8"), s1)
2006            self.assertIn(b"MyList", s1)
2007            self.assertFalse(opcode_in_pickle(opcode, s1))
2008
2009            y = self.loads(s1)
2010            self.assert_is_copy(x, y)
2011
2012            # Dump using protocol 2 for test.
2013            s2 = self.dumps(x, 2)
2014            self.assertNotIn(__name__.encode("utf-8"), s2)
2015            self.assertNotIn(b"MyList", s2)
2016            self.assertEqual(opcode_in_pickle(opcode, s2), True, repr(s2))
2017
2018            y = self.loads(s2)
2019            self.assert_is_copy(x, y)
2020        finally:
2021            e.restore()
2022
2023    def test_global_ext1(self):
2024        self.produce_global_ext(0x00000001, pickle.EXT1)  # smallest EXT1 code
2025        self.produce_global_ext(0x000000ff, pickle.EXT1)  # largest EXT1 code
2026
2027    def test_global_ext2(self):
2028        self.produce_global_ext(0x00000100, pickle.EXT2)  # smallest EXT2 code
2029        self.produce_global_ext(0x0000ffff, pickle.EXT2)  # largest EXT2 code
2030        self.produce_global_ext(0x0000abcd, pickle.EXT2)  # check endianness
2031
2032    def test_global_ext4(self):
2033        self.produce_global_ext(0x00010000, pickle.EXT4)  # smallest EXT4 code
2034        self.produce_global_ext(0x7fffffff, pickle.EXT4)  # largest EXT4 code
2035        self.produce_global_ext(0x12abcdef, pickle.EXT4)  # check endianness
2036
2037    def test_list_chunking(self):
2038        n = 10  # too small to chunk
2039        x = list(range(n))
2040        for proto in protocols:
2041            s = self.dumps(x, proto)
2042            y = self.loads(s)
2043            self.assert_is_copy(x, y)
2044            num_appends = count_opcode(pickle.APPENDS, s)
2045            self.assertEqual(num_appends, proto > 0)
2046
2047        n = 2500  # expect at least two chunks when proto > 0
2048        x = list(range(n))
2049        for proto in protocols:
2050            s = self.dumps(x, proto)
2051            y = self.loads(s)
2052            self.assert_is_copy(x, y)
2053            num_appends = count_opcode(pickle.APPENDS, s)
2054            if proto == 0:
2055                self.assertEqual(num_appends, 0)
2056            else:
2057                self.assertTrue(num_appends >= 2)
2058
2059    def test_dict_chunking(self):
2060        n = 10  # too small to chunk
2061        x = dict.fromkeys(range(n))
2062        for proto in protocols:
2063            s = self.dumps(x, proto)
2064            self.assertIsInstance(s, bytes_types)
2065            y = self.loads(s)
2066            self.assert_is_copy(x, y)
2067            num_setitems = count_opcode(pickle.SETITEMS, s)
2068            self.assertEqual(num_setitems, proto > 0)
2069
2070        n = 2500  # expect at least two chunks when proto > 0
2071        x = dict.fromkeys(range(n))
2072        for proto in protocols:
2073            s = self.dumps(x, proto)
2074            y = self.loads(s)
2075            self.assert_is_copy(x, y)
2076            num_setitems = count_opcode(pickle.SETITEMS, s)
2077            if proto == 0:
2078                self.assertEqual(num_setitems, 0)
2079            else:
2080                self.assertTrue(num_setitems >= 2)
2081
2082    def test_set_chunking(self):
2083        n = 10  # too small to chunk
2084        x = set(range(n))
2085        for proto in protocols:
2086            s = self.dumps(x, proto)
2087            y = self.loads(s)
2088            self.assert_is_copy(x, y)
2089            num_additems = count_opcode(pickle.ADDITEMS, s)
2090            if proto < 4:
2091                self.assertEqual(num_additems, 0)
2092            else:
2093                self.assertEqual(num_additems, 1)
2094
2095        n = 2500  # expect at least two chunks when proto >= 4
2096        x = set(range(n))
2097        for proto in protocols:
2098            s = self.dumps(x, proto)
2099            y = self.loads(s)
2100            self.assert_is_copy(x, y)
2101            num_additems = count_opcode(pickle.ADDITEMS, s)
2102            if proto < 4:
2103                self.assertEqual(num_additems, 0)
2104            else:
2105                self.assertGreaterEqual(num_additems, 2)
2106
2107    def test_simple_newobj(self):
2108        x = SimpleNewObj.__new__(SimpleNewObj, 0xface)  # avoid __init__
2109        x.abc = 666
2110        for proto in protocols:
2111            with self.subTest(proto=proto):
2112                s = self.dumps(x, proto)
2113                if proto < 1:
2114                    self.assertIn(b'\nI64206', s)  # INT
2115                else:
2116                    self.assertIn(b'M\xce\xfa', s)  # BININT2
2117                self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s),
2118                                 2 <= proto)
2119                self.assertFalse(opcode_in_pickle(pickle.NEWOBJ_EX, s))
2120                y = self.loads(s)   # will raise TypeError if __init__ called
2121                self.assert_is_copy(x, y)
2122
2123    def test_complex_newobj(self):
2124        x = ComplexNewObj.__new__(ComplexNewObj, 0xface)  # avoid __init__
2125        x.abc = 666
2126        for proto in protocols:
2127            with self.subTest(proto=proto):
2128                s = self.dumps(x, proto)
2129                if proto < 1:
2130                    self.assertIn(b'\nI64206', s)  # INT
2131                elif proto < 2:
2132                    self.assertIn(b'M\xce\xfa', s)  # BININT2
2133                elif proto < 4:
2134                    self.assertIn(b'X\x04\x00\x00\x00FACE', s)  # BINUNICODE
2135                else:
2136                    self.assertIn(b'\x8c\x04FACE', s)  # SHORT_BINUNICODE
2137                self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s),
2138                                 2 <= proto)
2139                self.assertFalse(opcode_in_pickle(pickle.NEWOBJ_EX, s))
2140                y = self.loads(s)   # will raise TypeError if __init__ called
2141                self.assert_is_copy(x, y)
2142
2143    def test_complex_newobj_ex(self):
2144        x = ComplexNewObjEx.__new__(ComplexNewObjEx, 0xface)  # avoid __init__
2145        x.abc = 666
2146        for proto in protocols:
2147            with self.subTest(proto=proto):
2148                s = self.dumps(x, proto)
2149                if proto < 1:
2150                    self.assertIn(b'\nI64206', s)  # INT
2151                elif proto < 2:
2152                    self.assertIn(b'M\xce\xfa', s)  # BININT2
2153                elif proto < 4:
2154                    self.assertIn(b'X\x04\x00\x00\x00FACE', s)  # BINUNICODE
2155                else:
2156                    self.assertIn(b'\x8c\x04FACE', s)  # SHORT_BINUNICODE
2157                self.assertFalse(opcode_in_pickle(pickle.NEWOBJ, s))
2158                self.assertEqual(opcode_in_pickle(pickle.NEWOBJ_EX, s),
2159                                 4 <= proto)
2160                y = self.loads(s)   # will raise TypeError if __init__ called
2161                self.assert_is_copy(x, y)
2162
2163    def test_newobj_list_slots(self):
2164        x = SlotList([1, 2, 3])
2165        x.foo = 42
2166        x.bar = "hello"
2167        s = self.dumps(x, 2)
2168        y = self.loads(s)
2169        self.assert_is_copy(x, y)
2170
2171    def test_reduce_overrides_default_reduce_ex(self):
2172        for proto in protocols:
2173            x = REX_one()
2174            self.assertEqual(x._reduce_called, 0)
2175            s = self.dumps(x, proto)
2176            self.assertEqual(x._reduce_called, 1)
2177            y = self.loads(s)
2178            self.assertEqual(y._reduce_called, 0)
2179
2180    def test_reduce_ex_called(self):
2181        for proto in protocols:
2182            x = REX_two()
2183            self.assertEqual(x._proto, None)
2184            s = self.dumps(x, proto)
2185            self.assertEqual(x._proto, proto)
2186            y = self.loads(s)
2187            self.assertEqual(y._proto, None)
2188
2189    def test_reduce_ex_overrides_reduce(self):
2190        for proto in protocols:
2191            x = REX_three()
2192            self.assertEqual(x._proto, None)
2193            s = self.dumps(x, proto)
2194            self.assertEqual(x._proto, proto)
2195            y = self.loads(s)
2196            self.assertEqual(y._proto, None)
2197
2198    def test_reduce_ex_calls_base(self):
2199        for proto in protocols:
2200            x = REX_four()
2201            self.assertEqual(x._proto, None)
2202            s = self.dumps(x, proto)
2203            self.assertEqual(x._proto, proto)
2204            y = self.loads(s)
2205            self.assertEqual(y._proto, proto)
2206
2207    def test_reduce_calls_base(self):
2208        for proto in protocols:
2209            x = REX_five()
2210            self.assertEqual(x._reduce_called, 0)
2211            s = self.dumps(x, proto)
2212            self.assertEqual(x._reduce_called, 1)
2213            y = self.loads(s)
2214            self.assertEqual(y._reduce_called, 1)
2215
2216    @no_tracing
2217    def test_bad_getattr(self):
2218        # Issue #3514: crash when there is an infinite loop in __getattr__
2219        x = BadGetattr()
2220        for proto in protocols:
2221            self.assertRaises(RuntimeError, self.dumps, x, proto)
2222
2223    def test_reduce_bad_iterator(self):
2224        # Issue4176: crash when 4th and 5th items of __reduce__()
2225        # are not iterators
2226        class C(object):
2227            def __reduce__(self):
2228                # 4th item is not an iterator
2229                return list, (), None, [], None
2230        class D(object):
2231            def __reduce__(self):
2232                # 5th item is not an iterator
2233                return dict, (), None, None, []
2234
2235        # Python implementation is less strict and also accepts iterables.
2236        for proto in protocols:
2237            try:
2238                self.dumps(C(), proto)
2239            except pickle.PicklingError:
2240                pass
2241            try:
2242                self.dumps(D(), proto)
2243            except pickle.PicklingError:
2244                pass
2245
2246    def test_many_puts_and_gets(self):
2247        # Test that internal data structures correctly deal with lots of
2248        # puts/gets.
2249        keys = ("aaa" + str(i) for i in range(100))
2250        large_dict = dict((k, [4, 5, 6]) for k in keys)
2251        obj = [dict(large_dict), dict(large_dict), dict(large_dict)]
2252
2253        for proto in protocols:
2254            with self.subTest(proto=proto):
2255                dumped = self.dumps(obj, proto)
2256                loaded = self.loads(dumped)
2257                self.assert_is_copy(obj, loaded)
2258
2259    def test_attribute_name_interning(self):
2260        # Test that attribute names of pickled objects are interned when
2261        # unpickling.
2262        for proto in protocols:
2263            x = C()
2264            x.foo = 42
2265            x.bar = "hello"
2266            s = self.dumps(x, proto)
2267            y = self.loads(s)
2268            x_keys = sorted(x.__dict__)
2269            y_keys = sorted(y.__dict__)
2270            for x_key, y_key in zip(x_keys, y_keys):
2271                self.assertIs(x_key, y_key)
2272
2273    def test_pickle_to_2x(self):
2274        # Pickle non-trivial data with protocol 2, expecting that it yields
2275        # the same result as Python 2.x did.
2276        # NOTE: this test is a bit too strong since we can produce different
2277        # bytecode that 2.x will still understand.
2278        dumped = self.dumps(range(5), 2)
2279        self.assertEqual(dumped, DATA_XRANGE)
2280        dumped = self.dumps(set([3]), 2)
2281        self.assertEqual(dumped, DATA_SET2)
2282
2283    def test_large_pickles(self):
2284        # Test the correctness of internal buffering routines when handling
2285        # large data.
2286        for proto in protocols:
2287            data = (1, min, b'xy' * (30 * 1024), len)
2288            dumped = self.dumps(data, proto)
2289            loaded = self.loads(dumped)
2290            self.assertEqual(len(loaded), len(data))
2291            self.assertEqual(loaded, data)
2292
2293    def test_int_pickling_efficiency(self):
2294        # Test compacity of int representation (see issue #12744)
2295        for proto in protocols:
2296            with self.subTest(proto=proto):
2297                pickles = [self.dumps(2**n, proto) for n in range(70)]
2298                sizes = list(map(len, pickles))
2299                # the size function is monotonic
2300                self.assertEqual(sorted(sizes), sizes)
2301                if proto >= 2:
2302                    for p in pickles:
2303                        self.assertFalse(opcode_in_pickle(pickle.LONG, p))
2304
2305    def _check_pickling_with_opcode(self, obj, opcode, proto):
2306        pickled = self.dumps(obj, proto)
2307        self.assertTrue(opcode_in_pickle(opcode, pickled))
2308        unpickled = self.loads(pickled)
2309        self.assertEqual(obj, unpickled)
2310
2311    def test_appends_on_non_lists(self):
2312        # Issue #17720
2313        obj = REX_six([1, 2, 3])
2314        for proto in protocols:
2315            if proto == 0:
2316                self._check_pickling_with_opcode(obj, pickle.APPEND, proto)
2317            else:
2318                self._check_pickling_with_opcode(obj, pickle.APPENDS, proto)
2319
2320    def test_setitems_on_non_dicts(self):
2321        obj = REX_seven({1: -1, 2: -2, 3: -3})
2322        for proto in protocols:
2323            if proto == 0:
2324                self._check_pickling_with_opcode(obj, pickle.SETITEM, proto)
2325            else:
2326                self._check_pickling_with_opcode(obj, pickle.SETITEMS, proto)
2327
2328    # Exercise framing (proto >= 4) for significant workloads
2329
2330    FRAME_SIZE_MIN = 4
2331    FRAME_SIZE_TARGET = 64 * 1024
2332
2333    def check_frame_opcodes(self, pickled):
2334        """
2335        Check the arguments of FRAME opcodes in a protocol 4+ pickle.
2336
2337        Note that binary objects that are larger than FRAME_SIZE_TARGET are not
2338        framed by default and are therefore considered a frame by themselves in
2339        the following consistency check.
2340        """
2341        frame_end = frameless_start = None
2342        frameless_opcodes = {'BINBYTES', 'BINUNICODE', 'BINBYTES8',
2343                             'BINUNICODE8', 'BYTEARRAY8'}
2344        for op, arg, pos in pickletools.genops(pickled):
2345            if frame_end is not None:
2346                self.assertLessEqual(pos, frame_end)
2347                if pos == frame_end:
2348                    frame_end = None
2349
2350            if frame_end is not None:  # framed
2351                self.assertNotEqual(op.name, 'FRAME')
2352                if op.name in frameless_opcodes:
2353                    # Only short bytes and str objects should be written
2354                    # in a frame
2355                    self.assertLessEqual(len(arg), self.FRAME_SIZE_TARGET)
2356
2357            else:  # not framed
2358                if (op.name == 'FRAME' or
2359                    (op.name in frameless_opcodes and
2360                     len(arg) > self.FRAME_SIZE_TARGET)):
2361                    # Frame or large bytes or str object
2362                    if frameless_start is not None:
2363                        # Only short data should be written outside of a frame
2364                        self.assertLess(pos - frameless_start,
2365                                        self.FRAME_SIZE_MIN)
2366                        frameless_start = None
2367                elif frameless_start is None and op.name != 'PROTO':
2368                    frameless_start = pos
2369
2370            if op.name == 'FRAME':
2371                self.assertGreaterEqual(arg, self.FRAME_SIZE_MIN)
2372                frame_end = pos + 9 + arg
2373
2374        pos = len(pickled)
2375        if frame_end is not None:
2376            self.assertEqual(frame_end, pos)
2377        elif frameless_start is not None:
2378            self.assertLess(pos - frameless_start, self.FRAME_SIZE_MIN)
2379
2380    @support.skip_if_pgo_task
2381    def test_framing_many_objects(self):
2382        obj = list(range(10**5))
2383        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2384            with self.subTest(proto=proto):
2385                pickled = self.dumps(obj, proto)
2386                unpickled = self.loads(pickled)
2387                self.assertEqual(obj, unpickled)
2388                bytes_per_frame = (len(pickled) /
2389                                   count_opcode(pickle.FRAME, pickled))
2390                self.assertGreater(bytes_per_frame,
2391                                   self.FRAME_SIZE_TARGET / 2)
2392                self.assertLessEqual(bytes_per_frame,
2393                                     self.FRAME_SIZE_TARGET * 1)
2394                self.check_frame_opcodes(pickled)
2395
2396    def test_framing_large_objects(self):
2397        N = 1024 * 1024
2398        small_items = [[i] for i in range(10)]
2399        obj = [b'x' * N, *small_items, b'y' * N, 'z' * N]
2400        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2401            for fast in [False, True]:
2402                with self.subTest(proto=proto, fast=fast):
2403                    if not fast:
2404                        # fast=False by default.
2405                        # This covers in-memory pickling with pickle.dumps().
2406                        pickled = self.dumps(obj, proto)
2407                    else:
2408                        # Pickler is required when fast=True.
2409                        if not hasattr(self, 'pickler'):
2410                            continue
2411                        buf = io.BytesIO()
2412                        pickler = self.pickler(buf, protocol=proto)
2413                        pickler.fast = fast
2414                        pickler.dump(obj)
2415                        pickled = buf.getvalue()
2416                    unpickled = self.loads(pickled)
2417                    # More informative error message in case of failure.
2418                    self.assertEqual([len(x) for x in obj],
2419                                     [len(x) for x in unpickled])
2420                    # Perform full equality check if the lengths match.
2421                    self.assertEqual(obj, unpickled)
2422                    n_frames = count_opcode(pickle.FRAME, pickled)
2423                    # A single frame for small objects between
2424                    # first two large objects.
2425                    self.assertEqual(n_frames, 1)
2426                    self.check_frame_opcodes(pickled)
2427
2428    def test_optional_frames(self):
2429        if pickle.HIGHEST_PROTOCOL < 4:
2430            return
2431
2432        def remove_frames(pickled, keep_frame=None):
2433            """Remove frame opcodes from the given pickle."""
2434            frame_starts = []
2435            # 1 byte for the opcode and 8 for the argument
2436            frame_opcode_size = 9
2437            for opcode, _, pos in pickletools.genops(pickled):
2438                if opcode.name == 'FRAME':
2439                    frame_starts.append(pos)
2440
2441            newpickle = bytearray()
2442            last_frame_end = 0
2443            for i, pos in enumerate(frame_starts):
2444                if keep_frame and keep_frame(i):
2445                    continue
2446                newpickle += pickled[last_frame_end:pos]
2447                last_frame_end = pos + frame_opcode_size
2448            newpickle += pickled[last_frame_end:]
2449            return newpickle
2450
2451        frame_size = self.FRAME_SIZE_TARGET
2452        num_frames = 20
2453        # Large byte objects (dict values) intermittent with small objects
2454        # (dict keys)
2455        for bytes_type in (bytes, bytearray):
2456            obj = {i: bytes_type([i]) * frame_size for i in range(num_frames)}
2457
2458            for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2459                pickled = self.dumps(obj, proto)
2460
2461                frameless_pickle = remove_frames(pickled)
2462                self.assertEqual(count_opcode(pickle.FRAME, frameless_pickle), 0)
2463                self.assertEqual(obj, self.loads(frameless_pickle))
2464
2465                some_frames_pickle = remove_frames(pickled, lambda i: i % 2)
2466                self.assertLess(count_opcode(pickle.FRAME, some_frames_pickle),
2467                                count_opcode(pickle.FRAME, pickled))
2468                self.assertEqual(obj, self.loads(some_frames_pickle))
2469
2470    @support.skip_if_pgo_task
2471    def test_framed_write_sizes_with_delayed_writer(self):
2472        class ChunkAccumulator:
2473            """Accumulate pickler output in a list of raw chunks."""
2474            def __init__(self):
2475                self.chunks = []
2476            def write(self, chunk):
2477                self.chunks.append(chunk)
2478            def concatenate_chunks(self):
2479                return b"".join(self.chunks)
2480
2481        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2482            objects = [(str(i).encode('ascii'), i % 42, {'i': str(i)})
2483                       for i in range(int(1e4))]
2484            # Add a large unique ASCII string
2485            objects.append('0123456789abcdef' *
2486                           (self.FRAME_SIZE_TARGET // 16 + 1))
2487
2488            # Protocol 4 packs groups of small objects into frames and issues
2489            # calls to write only once or twice per frame:
2490            # The C pickler issues one call to write per-frame (header and
2491            # contents) while Python pickler issues two calls to write: one for
2492            # the frame header and one for the frame binary contents.
2493            writer = ChunkAccumulator()
2494            self.pickler(writer, proto).dump(objects)
2495
2496            # Actually read the binary content of the chunks after the end
2497            # of the call to dump: any memoryview passed to write should not
2498            # be released otherwise this delayed access would not be possible.
2499            pickled = writer.concatenate_chunks()
2500            reconstructed = self.loads(pickled)
2501            self.assertEqual(reconstructed, objects)
2502            self.assertGreater(len(writer.chunks), 1)
2503
2504            # memoryviews should own the memory.
2505            del objects
2506            support.gc_collect()
2507            self.assertEqual(writer.concatenate_chunks(), pickled)
2508
2509            n_frames = (len(pickled) - 1) // self.FRAME_SIZE_TARGET + 1
2510            # There should be at least one call to write per frame
2511            self.assertGreaterEqual(len(writer.chunks), n_frames)
2512
2513            # but not too many either: there can be one for the proto,
2514            # one per-frame header, one per frame for the actual contents,
2515            # and two for the header.
2516            self.assertLessEqual(len(writer.chunks), 2 * n_frames + 3)
2517
2518            chunk_sizes = [len(c) for c in writer.chunks]
2519            large_sizes = [s for s in chunk_sizes
2520                           if s >= self.FRAME_SIZE_TARGET]
2521            medium_sizes = [s for s in chunk_sizes
2522                           if 9 < s < self.FRAME_SIZE_TARGET]
2523            small_sizes = [s for s in chunk_sizes if s <= 9]
2524
2525            # Large chunks should not be too large:
2526            for chunk_size in large_sizes:
2527                self.assertLess(chunk_size, 2 * self.FRAME_SIZE_TARGET,
2528                                chunk_sizes)
2529            # There shouldn't bee too many small chunks: the protocol header,
2530            # the frame headers and the large string headers are written
2531            # in small chunks.
2532            self.assertLessEqual(len(small_sizes),
2533                                 len(large_sizes) + len(medium_sizes) + 3,
2534                                 chunk_sizes)
2535
2536    def test_nested_names(self):
2537        global Nested
2538        class Nested:
2539            class A:
2540                class B:
2541                    class C:
2542                        pass
2543        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2544            for obj in [Nested.A, Nested.A.B, Nested.A.B.C]:
2545                with self.subTest(proto=proto, obj=obj):
2546                    unpickled = self.loads(self.dumps(obj, proto))
2547                    self.assertIs(obj, unpickled)
2548
2549    def test_recursive_nested_names(self):
2550        global Recursive
2551        class Recursive:
2552            pass
2553        Recursive.mod = sys.modules[Recursive.__module__]
2554        Recursive.__qualname__ = 'Recursive.mod.Recursive'
2555        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2556            with self.subTest(proto=proto):
2557                unpickled = self.loads(self.dumps(Recursive, proto))
2558                self.assertIs(unpickled, Recursive)
2559        del Recursive.mod # break reference loop
2560
2561    def test_py_methods(self):
2562        global PyMethodsTest
2563        class PyMethodsTest:
2564            @staticmethod
2565            def cheese():
2566                return "cheese"
2567            @classmethod
2568            def wine(cls):
2569                assert cls is PyMethodsTest
2570                return "wine"
2571            def biscuits(self):
2572                assert isinstance(self, PyMethodsTest)
2573                return "biscuits"
2574            class Nested:
2575                "Nested class"
2576                @staticmethod
2577                def ketchup():
2578                    return "ketchup"
2579                @classmethod
2580                def maple(cls):
2581                    assert cls is PyMethodsTest.Nested
2582                    return "maple"
2583                def pie(self):
2584                    assert isinstance(self, PyMethodsTest.Nested)
2585                    return "pie"
2586
2587        py_methods = (
2588            PyMethodsTest.cheese,
2589            PyMethodsTest.wine,
2590            PyMethodsTest().biscuits,
2591            PyMethodsTest.Nested.ketchup,
2592            PyMethodsTest.Nested.maple,
2593            PyMethodsTest.Nested().pie
2594        )
2595        py_unbound_methods = (
2596            (PyMethodsTest.biscuits, PyMethodsTest),
2597            (PyMethodsTest.Nested.pie, PyMethodsTest.Nested)
2598        )
2599        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2600            for method in py_methods:
2601                with self.subTest(proto=proto, method=method):
2602                    unpickled = self.loads(self.dumps(method, proto))
2603                    self.assertEqual(method(), unpickled())
2604            for method, cls in py_unbound_methods:
2605                obj = cls()
2606                with self.subTest(proto=proto, method=method):
2607                    unpickled = self.loads(self.dumps(method, proto))
2608                    self.assertEqual(method(obj), unpickled(obj))
2609
2610    def test_c_methods(self):
2611        global Subclass
2612        class Subclass(tuple):
2613            class Nested(str):
2614                pass
2615
2616        c_methods = (
2617            # bound built-in method
2618            ("abcd".index, ("c",)),
2619            # unbound built-in method
2620            (str.index, ("abcd", "c")),
2621            # bound "slot" method
2622            ([1, 2, 3].__len__, ()),
2623            # unbound "slot" method
2624            (list.__len__, ([1, 2, 3],)),
2625            # bound "coexist" method
2626            ({1, 2}.__contains__, (2,)),
2627            # unbound "coexist" method
2628            (set.__contains__, ({1, 2}, 2)),
2629            # built-in class method
2630            (dict.fromkeys, (("a", 1), ("b", 2))),
2631            # built-in static method
2632            (bytearray.maketrans, (b"abc", b"xyz")),
2633            # subclass methods
2634            (Subclass([1,2,2]).count, (2,)),
2635            (Subclass.count, (Subclass([1,2,2]), 2)),
2636            (Subclass.Nested("sweet").count, ("e",)),
2637            (Subclass.Nested.count, (Subclass.Nested("sweet"), "e")),
2638        )
2639        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2640            for method, args in c_methods:
2641                with self.subTest(proto=proto, method=method):
2642                    unpickled = self.loads(self.dumps(method, proto))
2643                    self.assertEqual(method(*args), unpickled(*args))
2644
2645    def test_compat_pickle(self):
2646        tests = [
2647            (range(1, 7), '__builtin__', 'xrange'),
2648            (map(int, '123'), 'itertools', 'imap'),
2649            (functools.reduce, '__builtin__', 'reduce'),
2650            (dbm.whichdb, 'whichdb', 'whichdb'),
2651            (Exception(), 'exceptions', 'Exception'),
2652            (collections.UserDict(), 'UserDict', 'IterableUserDict'),
2653            (collections.UserList(), 'UserList', 'UserList'),
2654            (collections.defaultdict(), 'collections', 'defaultdict'),
2655        ]
2656        for val, mod, name in tests:
2657            for proto in range(3):
2658                with self.subTest(type=type(val), proto=proto):
2659                    pickled = self.dumps(val, proto)
2660                    self.assertIn(('c%s\n%s' % (mod, name)).encode(), pickled)
2661                    self.assertIs(type(self.loads(pickled)), type(val))
2662
2663    def test_local_lookup_error(self):
2664        # Test that whichmodule() errors out cleanly when looking up
2665        # an assumed globally-reachable object fails.
2666        def f():
2667            pass
2668        # Since the function is local, lookup will fail
2669        for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
2670            with self.assertRaises((AttributeError, pickle.PicklingError)):
2671                pickletools.dis(self.dumps(f, proto))
2672        # Same without a __module__ attribute (exercises a different path
2673        # in _pickle.c).
2674        del f.__module__
2675        for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
2676            with self.assertRaises((AttributeError, pickle.PicklingError)):
2677                pickletools.dis(self.dumps(f, proto))
2678        # Yet a different path.
2679        f.__name__ = f.__qualname__
2680        for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
2681            with self.assertRaises((AttributeError, pickle.PicklingError)):
2682                pickletools.dis(self.dumps(f, proto))
2683
2684    #
2685    # PEP 574 tests below
2686    #
2687
2688    def buffer_like_objects(self):
2689        # Yield buffer-like objects with the bytestring "abcdef" in them
2690        bytestring = b"abcdefgh"
2691        yield ZeroCopyBytes(bytestring)
2692        yield ZeroCopyBytearray(bytestring)
2693        if _testbuffer is not None:
2694            items = list(bytestring)
2695            value = int.from_bytes(bytestring, byteorder='little')
2696            for flags in (0, _testbuffer.ND_WRITABLE):
2697                # 1-D, contiguous
2698                yield PicklableNDArray(items, format='B', shape=(8,),
2699                                       flags=flags)
2700                # 2-D, C-contiguous
2701                yield PicklableNDArray(items, format='B', shape=(4, 2),
2702                                       strides=(2, 1), flags=flags)
2703                # 2-D, Fortran-contiguous
2704                yield PicklableNDArray(items, format='B',
2705                                       shape=(4, 2), strides=(1, 4),
2706                                       flags=flags)
2707
2708    def test_in_band_buffers(self):
2709        # Test in-band buffers (PEP 574)
2710        for obj in self.buffer_like_objects():
2711            for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
2712                data = self.dumps(obj, proto)
2713                if obj.c_contiguous and proto >= 5:
2714                    # The raw memory bytes are serialized in physical order
2715                    self.assertIn(b"abcdefgh", data)
2716                self.assertEqual(count_opcode(pickle.NEXT_BUFFER, data), 0)
2717                if proto >= 5:
2718                    self.assertEqual(count_opcode(pickle.SHORT_BINBYTES, data),
2719                                     1 if obj.readonly else 0)
2720                    self.assertEqual(count_opcode(pickle.BYTEARRAY8, data),
2721                                     0 if obj.readonly else 1)
2722                    # Return a true value from buffer_callback should have
2723                    # the same effect
2724                    def buffer_callback(obj):
2725                        return True
2726                    data2 = self.dumps(obj, proto,
2727                                       buffer_callback=buffer_callback)
2728                    self.assertEqual(data2, data)
2729
2730                new = self.loads(data)
2731                # It's a copy
2732                self.assertIsNot(new, obj)
2733                self.assertIs(type(new), type(obj))
2734                self.assertEqual(new, obj)
2735
2736    # XXX Unfortunately cannot test non-contiguous array
2737    # (see comment in PicklableNDArray.__reduce_ex__)
2738
2739    def test_oob_buffers(self):
2740        # Test out-of-band buffers (PEP 574)
2741        for obj in self.buffer_like_objects():
2742            for proto in range(0, 5):
2743                # Need protocol >= 5 for buffer_callback
2744                with self.assertRaises(ValueError):
2745                    self.dumps(obj, proto,
2746                               buffer_callback=[].append)
2747            for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
2748                buffers = []
2749                buffer_callback = lambda pb: buffers.append(pb.raw())
2750                data = self.dumps(obj, proto,
2751                                  buffer_callback=buffer_callback)
2752                self.assertNotIn(b"abcdefgh", data)
2753                self.assertEqual(count_opcode(pickle.SHORT_BINBYTES, data), 0)
2754                self.assertEqual(count_opcode(pickle.BYTEARRAY8, data), 0)
2755                self.assertEqual(count_opcode(pickle.NEXT_BUFFER, data), 1)
2756                self.assertEqual(count_opcode(pickle.READONLY_BUFFER, data),
2757                                 1 if obj.readonly else 0)
2758
2759                if obj.c_contiguous:
2760                    self.assertEqual(bytes(buffers[0]), b"abcdefgh")
2761                # Need buffers argument to unpickle properly
2762                with self.assertRaises(pickle.UnpicklingError):
2763                    self.loads(data)
2764
2765                new = self.loads(data, buffers=buffers)
2766                if obj.zero_copy_reconstruct:
2767                    # Zero-copy achieved
2768                    self.assertIs(new, obj)
2769                else:
2770                    self.assertIs(type(new), type(obj))
2771                    self.assertEqual(new, obj)
2772                # Non-sequence buffers accepted too
2773                new = self.loads(data, buffers=iter(buffers))
2774                if obj.zero_copy_reconstruct:
2775                    # Zero-copy achieved
2776                    self.assertIs(new, obj)
2777                else:
2778                    self.assertIs(type(new), type(obj))
2779                    self.assertEqual(new, obj)
2780
2781    def test_oob_buffers_writable_to_readonly(self):
2782        # Test reconstructing readonly object from writable buffer
2783        obj = ZeroCopyBytes(b"foobar")
2784        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
2785            buffers = []
2786            buffer_callback = buffers.append
2787            data = self.dumps(obj, proto, buffer_callback=buffer_callback)
2788
2789            buffers = map(bytearray, buffers)
2790            new = self.loads(data, buffers=buffers)
2791            self.assertIs(type(new), type(obj))
2792            self.assertEqual(new, obj)
2793
2794    def test_picklebuffer_error(self):
2795        # PickleBuffer forbidden with protocol < 5
2796        pb = pickle.PickleBuffer(b"foobar")
2797        for proto in range(0, 5):
2798            with self.assertRaises(pickle.PickleError):
2799                self.dumps(pb, proto)
2800
2801    def test_buffer_callback_error(self):
2802        def buffer_callback(buffers):
2803            1/0
2804        pb = pickle.PickleBuffer(b"foobar")
2805        with self.assertRaises(ZeroDivisionError):
2806            self.dumps(pb, 5, buffer_callback=buffer_callback)
2807
2808    def test_buffers_error(self):
2809        pb = pickle.PickleBuffer(b"foobar")
2810        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
2811            data = self.dumps(pb, proto, buffer_callback=[].append)
2812            # Non iterable buffers
2813            with self.assertRaises(TypeError):
2814                self.loads(data, buffers=object())
2815            # Buffer iterable exhausts too early
2816            with self.assertRaises(pickle.UnpicklingError):
2817                self.loads(data, buffers=[])
2818
2819    def test_inband_accept_default_buffers_argument(self):
2820        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
2821            data_pickled = self.dumps(1, proto, buffer_callback=None)
2822            data = self.loads(data_pickled, buffers=None)
2823
2824    @unittest.skipIf(np is None, "Test needs Numpy")
2825    def test_buffers_numpy(self):
2826        def check_no_copy(x, y):
2827            np.testing.assert_equal(x, y)
2828            self.assertEqual(x.ctypes.data, y.ctypes.data)
2829
2830        def check_copy(x, y):
2831            np.testing.assert_equal(x, y)
2832            self.assertNotEqual(x.ctypes.data, y.ctypes.data)
2833
2834        def check_array(arr):
2835            # In-band
2836            for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
2837                data = self.dumps(arr, proto)
2838                new = self.loads(data)
2839                check_copy(arr, new)
2840            for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
2841                buffer_callback = lambda _: True
2842                data = self.dumps(arr, proto, buffer_callback=buffer_callback)
2843                new = self.loads(data)
2844                check_copy(arr, new)
2845            # Out-of-band
2846            for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
2847                buffers = []
2848                buffer_callback = buffers.append
2849                data = self.dumps(arr, proto, buffer_callback=buffer_callback)
2850                new = self.loads(data, buffers=buffers)
2851                if arr.flags.c_contiguous or arr.flags.f_contiguous:
2852                    check_no_copy(arr, new)
2853                else:
2854                    check_copy(arr, new)
2855
2856        # 1-D
2857        arr = np.arange(6)
2858        check_array(arr)
2859        # 1-D, non-contiguous
2860        check_array(arr[::2])
2861        # 2-D, C-contiguous
2862        arr = np.arange(12).reshape((3, 4))
2863        check_array(arr)
2864        # 2-D, F-contiguous
2865        check_array(arr.T)
2866        # 2-D, non-contiguous
2867        check_array(arr[::2])
2868
2869
2870class BigmemPickleTests(unittest.TestCase):
2871
2872    # Binary protocols can serialize longs of up to 2 GiB-1
2873
2874    @bigmemtest(size=_2G, memuse=3.6, dry_run=False)
2875    def test_huge_long_32b(self, size):
2876        data = 1 << (8 * size)
2877        try:
2878            for proto in protocols:
2879                if proto < 2:
2880                    continue
2881                with self.subTest(proto=proto):
2882                    with self.assertRaises((ValueError, OverflowError)):
2883                        self.dumps(data, protocol=proto)
2884        finally:
2885            data = None
2886
2887    # Protocol 3 can serialize up to 4 GiB-1 as a bytes object
2888    # (older protocols don't have a dedicated opcode for bytes and are
2889    # too inefficient)
2890
2891    @bigmemtest(size=_2G, memuse=2.5, dry_run=False)
2892    def test_huge_bytes_32b(self, size):
2893        data = b"abcd" * (size // 4)
2894        try:
2895            for proto in protocols:
2896                if proto < 3:
2897                    continue
2898                with self.subTest(proto=proto):
2899                    try:
2900                        pickled = self.dumps(data, protocol=proto)
2901                        header = (pickle.BINBYTES +
2902                                  struct.pack("<I", len(data)))
2903                        data_start = pickled.index(data)
2904                        self.assertEqual(
2905                            header,
2906                            pickled[data_start-len(header):data_start])
2907                    finally:
2908                        pickled = None
2909        finally:
2910            data = None
2911
2912    @bigmemtest(size=_4G, memuse=2.5, dry_run=False)
2913    def test_huge_bytes_64b(self, size):
2914        data = b"acbd" * (size // 4)
2915        try:
2916            for proto in protocols:
2917                if proto < 3:
2918                    continue
2919                with self.subTest(proto=proto):
2920                    if proto == 3:
2921                        # Protocol 3 does not support large bytes objects.
2922                        # Verify that we do not crash when processing one.
2923                        with self.assertRaises((ValueError, OverflowError)):
2924                            self.dumps(data, protocol=proto)
2925                        continue
2926                    try:
2927                        pickled = self.dumps(data, protocol=proto)
2928                        header = (pickle.BINBYTES8 +
2929                                  struct.pack("<Q", len(data)))
2930                        data_start = pickled.index(data)
2931                        self.assertEqual(
2932                            header,
2933                            pickled[data_start-len(header):data_start])
2934                    finally:
2935                        pickled = None
2936        finally:
2937            data = None
2938
2939    # All protocols use 1-byte per printable ASCII character; we add another
2940    # byte because the encoded form has to be copied into the internal buffer.
2941
2942    @bigmemtest(size=_2G, memuse=8, dry_run=False)
2943    def test_huge_str_32b(self, size):
2944        data = "abcd" * (size // 4)
2945        try:
2946            for proto in protocols:
2947                if proto == 0:
2948                    continue
2949                with self.subTest(proto=proto):
2950                    try:
2951                        pickled = self.dumps(data, protocol=proto)
2952                        header = (pickle.BINUNICODE +
2953                                  struct.pack("<I", len(data)))
2954                        data_start = pickled.index(b'abcd')
2955                        self.assertEqual(
2956                            header,
2957                            pickled[data_start-len(header):data_start])
2958                        self.assertEqual((pickled.rindex(b"abcd") + len(b"abcd") -
2959                                          pickled.index(b"abcd")), len(data))
2960                    finally:
2961                        pickled = None
2962        finally:
2963            data = None
2964
2965    # BINUNICODE (protocols 1, 2 and 3) cannot carry more than 2**32 - 1 bytes
2966    # of utf-8 encoded unicode. BINUNICODE8 (protocol 4) supports these huge
2967    # unicode strings however.
2968
2969    @bigmemtest(size=_4G, memuse=8, dry_run=False)
2970    def test_huge_str_64b(self, size):
2971        data = "abcd" * (size // 4)
2972        try:
2973            for proto in protocols:
2974                if proto == 0:
2975                    continue
2976                with self.subTest(proto=proto):
2977                    if proto < 4:
2978                        with self.assertRaises((ValueError, OverflowError)):
2979                            self.dumps(data, protocol=proto)
2980                        continue
2981                    try:
2982                        pickled = self.dumps(data, protocol=proto)
2983                        header = (pickle.BINUNICODE8 +
2984                                  struct.pack("<Q", len(data)))
2985                        data_start = pickled.index(b'abcd')
2986                        self.assertEqual(
2987                            header,
2988                            pickled[data_start-len(header):data_start])
2989                        self.assertEqual((pickled.rindex(b"abcd") + len(b"abcd") -
2990                                          pickled.index(b"abcd")), len(data))
2991                    finally:
2992                        pickled = None
2993        finally:
2994            data = None
2995
2996
2997# Test classes for reduce_ex
2998
2999class REX_one(object):
3000    """No __reduce_ex__ here, but inheriting it from object"""
3001    _reduce_called = 0
3002    def __reduce__(self):
3003        self._reduce_called = 1
3004        return REX_one, ()
3005
3006class REX_two(object):
3007    """No __reduce__ here, but inheriting it from object"""
3008    _proto = None
3009    def __reduce_ex__(self, proto):
3010        self._proto = proto
3011        return REX_two, ()
3012
3013class REX_three(object):
3014    _proto = None
3015    def __reduce_ex__(self, proto):
3016        self._proto = proto
3017        return REX_two, ()
3018    def __reduce__(self):
3019        raise TestFailed("This __reduce__ shouldn't be called")
3020
3021class REX_four(object):
3022    """Calling base class method should succeed"""
3023    _proto = None
3024    def __reduce_ex__(self, proto):
3025        self._proto = proto
3026        return object.__reduce_ex__(self, proto)
3027
3028class REX_five(object):
3029    """This one used to fail with infinite recursion"""
3030    _reduce_called = 0
3031    def __reduce__(self):
3032        self._reduce_called = 1
3033        return object.__reduce__(self)
3034
3035class REX_six(object):
3036    """This class is used to check the 4th argument (list iterator) of
3037    the reduce protocol.
3038    """
3039    def __init__(self, items=None):
3040        self.items = items if items is not None else []
3041    def __eq__(self, other):
3042        return type(self) is type(other) and self.items == other.items
3043    def append(self, item):
3044        self.items.append(item)
3045    def __reduce__(self):
3046        return type(self), (), None, iter(self.items), None
3047
3048class REX_seven(object):
3049    """This class is used to check the 5th argument (dict iterator) of
3050    the reduce protocol.
3051    """
3052    def __init__(self, table=None):
3053        self.table = table if table is not None else {}
3054    def __eq__(self, other):
3055        return type(self) is type(other) and self.table == other.table
3056    def __setitem__(self, key, value):
3057        self.table[key] = value
3058    def __reduce__(self):
3059        return type(self), (), None, None, iter(self.table.items())
3060
3061
3062# Test classes for newobj
3063
3064class MyInt(int):
3065    sample = 1
3066
3067class MyFloat(float):
3068    sample = 1.0
3069
3070class MyComplex(complex):
3071    sample = 1.0 + 0.0j
3072
3073class MyStr(str):
3074    sample = "hello"
3075
3076class MyUnicode(str):
3077    sample = "hello \u1234"
3078
3079class MyTuple(tuple):
3080    sample = (1, 2, 3)
3081
3082class MyList(list):
3083    sample = [1, 2, 3]
3084
3085class MyDict(dict):
3086    sample = {"a": 1, "b": 2}
3087
3088class MySet(set):
3089    sample = {"a", "b"}
3090
3091class MyFrozenSet(frozenset):
3092    sample = frozenset({"a", "b"})
3093
3094myclasses = [MyInt, MyFloat,
3095             MyComplex,
3096             MyStr, MyUnicode,
3097             MyTuple, MyList, MyDict, MySet, MyFrozenSet]
3098
3099class MyIntWithNew(int):
3100    def __new__(cls, value):
3101        raise AssertionError
3102
3103class MyIntWithNew2(MyIntWithNew):
3104    __new__ = int.__new__
3105
3106
3107class SlotList(MyList):
3108    __slots__ = ["foo"]
3109
3110class SimpleNewObj(int):
3111    def __init__(self, *args, **kwargs):
3112        # raise an error, to make sure this isn't called
3113        raise TypeError("SimpleNewObj.__init__() didn't expect to get called")
3114    def __eq__(self, other):
3115        return int(self) == int(other) and self.__dict__ == other.__dict__
3116
3117class ComplexNewObj(SimpleNewObj):
3118    def __getnewargs__(self):
3119        return ('%X' % self, 16)
3120
3121class ComplexNewObjEx(SimpleNewObj):
3122    def __getnewargs_ex__(self):
3123        return ('%X' % self,), {'base': 16}
3124
3125class BadGetattr:
3126    def __getattr__(self, key):
3127        self.foo
3128
3129
3130class AbstractPickleModuleTests(unittest.TestCase):
3131
3132    def test_dump_closed_file(self):
3133        f = open(TESTFN, "wb")
3134        try:
3135            f.close()
3136            self.assertRaises(ValueError, self.dump, 123, f)
3137        finally:
3138            support.unlink(TESTFN)
3139
3140    def test_load_closed_file(self):
3141        f = open(TESTFN, "wb")
3142        try:
3143            f.close()
3144            self.assertRaises(ValueError, self.dump, 123, f)
3145        finally:
3146            support.unlink(TESTFN)
3147
3148    def test_load_from_and_dump_to_file(self):
3149        stream = io.BytesIO()
3150        data = [123, {}, 124]
3151        self.dump(data, stream)
3152        stream.seek(0)
3153        unpickled = self.load(stream)
3154        self.assertEqual(unpickled, data)
3155
3156    def test_highest_protocol(self):
3157        # Of course this needs to be changed when HIGHEST_PROTOCOL changes.
3158        self.assertEqual(pickle.HIGHEST_PROTOCOL, 5)
3159
3160    def test_callapi(self):
3161        f = io.BytesIO()
3162        # With and without keyword arguments
3163        self.dump(123, f, -1)
3164        self.dump(123, file=f, protocol=-1)
3165        self.dumps(123, -1)
3166        self.dumps(123, protocol=-1)
3167        self.Pickler(f, -1)
3168        self.Pickler(f, protocol=-1)
3169
3170    def test_dump_text_file(self):
3171        f = open(TESTFN, "w")
3172        try:
3173            for proto in protocols:
3174                self.assertRaises(TypeError, self.dump, 123, f, proto)
3175        finally:
3176            f.close()
3177            support.unlink(TESTFN)
3178
3179    def test_incomplete_input(self):
3180        s = io.BytesIO(b"X''.")
3181        self.assertRaises((EOFError, struct.error, pickle.UnpicklingError), self.load, s)
3182
3183    def test_bad_init(self):
3184        # Test issue3664 (pickle can segfault from a badly initialized Pickler).
3185        # Override initialization without calling __init__() of the superclass.
3186        class BadPickler(self.Pickler):
3187            def __init__(self): pass
3188
3189        class BadUnpickler(self.Unpickler):
3190            def __init__(self): pass
3191
3192        self.assertRaises(pickle.PicklingError, BadPickler().dump, 0)
3193        self.assertRaises(pickle.UnpicklingError, BadUnpickler().load)
3194
3195    def check_dumps_loads_oob_buffers(self, dumps, loads):
3196        # No need to do the full gamut of tests here, just enough to
3197        # check that dumps() and loads() redirect their arguments
3198        # to the underlying Pickler and Unpickler, respectively.
3199        obj = ZeroCopyBytes(b"foo")
3200
3201        for proto in range(0, 5):
3202            # Need protocol >= 5 for buffer_callback
3203            with self.assertRaises(ValueError):
3204                dumps(obj, protocol=proto,
3205                      buffer_callback=[].append)
3206        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
3207            buffers = []
3208            buffer_callback = buffers.append
3209            data = dumps(obj, protocol=proto,
3210                         buffer_callback=buffer_callback)
3211            self.assertNotIn(b"foo", data)
3212            self.assertEqual(bytes(buffers[0]), b"foo")
3213            # Need buffers argument to unpickle properly
3214            with self.assertRaises(pickle.UnpicklingError):
3215                loads(data)
3216            new = loads(data, buffers=buffers)
3217            self.assertIs(new, obj)
3218
3219    def test_dumps_loads_oob_buffers(self):
3220        # Test out-of-band buffers (PEP 574) with top-level dumps() and loads()
3221        self.check_dumps_loads_oob_buffers(self.dumps, self.loads)
3222
3223    def test_dump_load_oob_buffers(self):
3224        # Test out-of-band buffers (PEP 574) with top-level dump() and load()
3225        def dumps(obj, **kwargs):
3226            f = io.BytesIO()
3227            self.dump(obj, f, **kwargs)
3228            return f.getvalue()
3229
3230        def loads(data, **kwargs):
3231            f = io.BytesIO(data)
3232            return self.load(f, **kwargs)
3233
3234        self.check_dumps_loads_oob_buffers(dumps, loads)
3235
3236
3237class AbstractPersistentPicklerTests(unittest.TestCase):
3238
3239    # This class defines persistent_id() and persistent_load()
3240    # functions that should be used by the pickler.  All even integers
3241    # are pickled using persistent ids.
3242
3243    def persistent_id(self, object):
3244        if isinstance(object, int) and object % 2 == 0:
3245            self.id_count += 1
3246            return str(object)
3247        elif object == "test_false_value":
3248            self.false_count += 1
3249            return ""
3250        else:
3251            return None
3252
3253    def persistent_load(self, oid):
3254        if not oid:
3255            self.load_false_count += 1
3256            return "test_false_value"
3257        else:
3258            self.load_count += 1
3259            object = int(oid)
3260            assert object % 2 == 0
3261            return object
3262
3263    def test_persistence(self):
3264        L = list(range(10)) + ["test_false_value"]
3265        for proto in protocols:
3266            self.id_count = 0
3267            self.false_count = 0
3268            self.load_false_count = 0
3269            self.load_count = 0
3270            self.assertEqual(self.loads(self.dumps(L, proto)), L)
3271            self.assertEqual(self.id_count, 5)
3272            self.assertEqual(self.false_count, 1)
3273            self.assertEqual(self.load_count, 5)
3274            self.assertEqual(self.load_false_count, 1)
3275
3276
3277class AbstractIdentityPersistentPicklerTests(unittest.TestCase):
3278
3279    def persistent_id(self, obj):
3280        return obj
3281
3282    def persistent_load(self, pid):
3283        return pid
3284
3285    def _check_return_correct_type(self, obj, proto):
3286        unpickled = self.loads(self.dumps(obj, proto))
3287        self.assertIsInstance(unpickled, type(obj))
3288        self.assertEqual(unpickled, obj)
3289
3290    def test_return_correct_type(self):
3291        for proto in protocols:
3292            # Protocol 0 supports only ASCII strings.
3293            if proto == 0:
3294                self._check_return_correct_type("abc", 0)
3295            else:
3296                for obj in [b"abc\n", "abc\n", -1, -1.1 * 0.1, str]:
3297                    self._check_return_correct_type(obj, proto)
3298
3299    def test_protocol0_is_ascii_only(self):
3300        non_ascii_str = "\N{EMPTY SET}"
3301        self.assertRaises(pickle.PicklingError, self.dumps, non_ascii_str, 0)
3302        pickled = pickle.PERSID + non_ascii_str.encode('utf-8') + b'\n.'
3303        self.assertRaises(pickle.UnpicklingError, self.loads, pickled)
3304
3305
3306class AbstractPicklerUnpicklerObjectTests(unittest.TestCase):
3307
3308    pickler_class = None
3309    unpickler_class = None
3310
3311    def setUp(self):
3312        assert self.pickler_class
3313        assert self.unpickler_class
3314
3315    def test_clear_pickler_memo(self):
3316        # To test whether clear_memo() has any effect, we pickle an object,
3317        # then pickle it again without clearing the memo; the two serialized
3318        # forms should be different. If we clear_memo() and then pickle the
3319        # object again, the third serialized form should be identical to the
3320        # first one we obtained.
3321        data = ["abcdefg", "abcdefg", 44]
3322        for proto in protocols:
3323            f = io.BytesIO()
3324            pickler = self.pickler_class(f, proto)
3325
3326            pickler.dump(data)
3327            first_pickled = f.getvalue()
3328
3329            # Reset BytesIO object.
3330            f.seek(0)
3331            f.truncate()
3332
3333            pickler.dump(data)
3334            second_pickled = f.getvalue()
3335
3336            # Reset the Pickler and BytesIO objects.
3337            pickler.clear_memo()
3338            f.seek(0)
3339            f.truncate()
3340
3341            pickler.dump(data)
3342            third_pickled = f.getvalue()
3343
3344            self.assertNotEqual(first_pickled, second_pickled)
3345            self.assertEqual(first_pickled, third_pickled)
3346
3347    def test_priming_pickler_memo(self):
3348        # Verify that we can set the Pickler's memo attribute.
3349        data = ["abcdefg", "abcdefg", 44]
3350        f = io.BytesIO()
3351        pickler = self.pickler_class(f)
3352
3353        pickler.dump(data)
3354        first_pickled = f.getvalue()
3355
3356        f = io.BytesIO()
3357        primed = self.pickler_class(f)
3358        primed.memo = pickler.memo
3359
3360        primed.dump(data)
3361        primed_pickled = f.getvalue()
3362
3363        self.assertNotEqual(first_pickled, primed_pickled)
3364
3365    def test_priming_unpickler_memo(self):
3366        # Verify that we can set the Unpickler's memo attribute.
3367        data = ["abcdefg", "abcdefg", 44]
3368        f = io.BytesIO()
3369        pickler = self.pickler_class(f)
3370
3371        pickler.dump(data)
3372        first_pickled = f.getvalue()
3373
3374        f = io.BytesIO()
3375        primed = self.pickler_class(f)
3376        primed.memo = pickler.memo
3377
3378        primed.dump(data)
3379        primed_pickled = f.getvalue()
3380
3381        unpickler = self.unpickler_class(io.BytesIO(first_pickled))
3382        unpickled_data1 = unpickler.load()
3383
3384        self.assertEqual(unpickled_data1, data)
3385
3386        primed = self.unpickler_class(io.BytesIO(primed_pickled))
3387        primed.memo = unpickler.memo
3388        unpickled_data2 = primed.load()
3389
3390        primed.memo.clear()
3391
3392        self.assertEqual(unpickled_data2, data)
3393        self.assertTrue(unpickled_data2 is unpickled_data1)
3394
3395    def test_reusing_unpickler_objects(self):
3396        data1 = ["abcdefg", "abcdefg", 44]
3397        f = io.BytesIO()
3398        pickler = self.pickler_class(f)
3399        pickler.dump(data1)
3400        pickled1 = f.getvalue()
3401
3402        data2 = ["abcdefg", 44, 44]
3403        f = io.BytesIO()
3404        pickler = self.pickler_class(f)
3405        pickler.dump(data2)
3406        pickled2 = f.getvalue()
3407
3408        f = io.BytesIO()
3409        f.write(pickled1)
3410        f.seek(0)
3411        unpickler = self.unpickler_class(f)
3412        self.assertEqual(unpickler.load(), data1)
3413
3414        f.seek(0)
3415        f.truncate()
3416        f.write(pickled2)
3417        f.seek(0)
3418        self.assertEqual(unpickler.load(), data2)
3419
3420    def _check_multiple_unpicklings(self, ioclass, *, seekable=True):
3421        for proto in protocols:
3422            with self.subTest(proto=proto):
3423                data1 = [(x, str(x)) for x in range(2000)] + [b"abcde", len]
3424                f = ioclass()
3425                pickler = self.pickler_class(f, protocol=proto)
3426                pickler.dump(data1)
3427                pickled = f.getvalue()
3428
3429                N = 5
3430                f = ioclass(pickled * N)
3431                unpickler = self.unpickler_class(f)
3432                for i in range(N):
3433                    if seekable:
3434                        pos = f.tell()
3435                    self.assertEqual(unpickler.load(), data1)
3436                    if seekable:
3437                        self.assertEqual(f.tell(), pos + len(pickled))
3438                self.assertRaises(EOFError, unpickler.load)
3439
3440    def test_multiple_unpicklings_seekable(self):
3441        self._check_multiple_unpicklings(io.BytesIO)
3442
3443    def test_multiple_unpicklings_unseekable(self):
3444        self._check_multiple_unpicklings(UnseekableIO, seekable=False)
3445
3446    def test_multiple_unpicklings_minimal(self):
3447        # File-like object that doesn't support peek() and readinto()
3448        # (bpo-39681)
3449        self._check_multiple_unpicklings(MinimalIO, seekable=False)
3450
3451    def test_unpickling_buffering_readline(self):
3452        # Issue #12687: the unpickler's buffering logic could fail with
3453        # text mode opcodes.
3454        data = list(range(10))
3455        for proto in protocols:
3456            for buf_size in range(1, 11):
3457                f = io.BufferedRandom(io.BytesIO(), buffer_size=buf_size)
3458                pickler = self.pickler_class(f, protocol=proto)
3459                pickler.dump(data)
3460                f.seek(0)
3461                unpickler = self.unpickler_class(f)
3462                self.assertEqual(unpickler.load(), data)
3463
3464
3465# Tests for dispatch_table attribute
3466
3467REDUCE_A = 'reduce_A'
3468
3469class AAA(object):
3470    def __reduce__(self):
3471        return str, (REDUCE_A,)
3472
3473class BBB(object):
3474    def __init__(self):
3475        # Add an instance attribute to enable state-saving routines at pickling
3476        # time.
3477        self.a = "some attribute"
3478
3479    def __setstate__(self, state):
3480        self.a = "BBB.__setstate__"
3481
3482
3483def setstate_bbb(obj, state):
3484    """Custom state setter for BBB objects
3485
3486    Such callable may be created by other persons than the ones who created the
3487    BBB class. If passed as the state_setter item of a custom reducer, this
3488    allows for custom state setting behavior of BBB objects. One can think of
3489    it as the analogous of list_setitems or dict_setitems but for foreign
3490    classes/functions.
3491    """
3492    obj.a = "custom state_setter"
3493
3494
3495
3496class AbstractCustomPicklerClass:
3497    """Pickler implementing a reducing hook using reducer_override."""
3498    def reducer_override(self, obj):
3499        obj_name = getattr(obj, "__name__", None)
3500
3501        if obj_name == 'f':
3502            # asking the pickler to save f as 5
3503            return int, (5, )
3504
3505        if obj_name == 'MyClass':
3506            return str, ('some str',)
3507
3508        elif obj_name == 'g':
3509            # in this case, the callback returns an invalid result (not a 2-5
3510            # tuple or a string), the pickler should raise a proper error.
3511            return False
3512
3513        elif obj_name == 'h':
3514            # Simulate a case when the reducer fails. The error should
3515            # be propagated to the original ``dump`` call.
3516            raise ValueError('The reducer just failed')
3517
3518        return NotImplemented
3519
3520class AbstractHookTests(unittest.TestCase):
3521    def test_pickler_hook(self):
3522        # test the ability of a custom, user-defined CPickler subclass to
3523        # override the default reducing routines of any type using the method
3524        # reducer_override
3525
3526        def f():
3527            pass
3528
3529        def g():
3530            pass
3531
3532        def h():
3533            pass
3534
3535        class MyClass:
3536            pass
3537
3538        for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
3539            with self.subTest(proto=proto):
3540                bio = io.BytesIO()
3541                p = self.pickler_class(bio, proto)
3542
3543                p.dump([f, MyClass, math.log])
3544                new_f, some_str, math_log = pickle.loads(bio.getvalue())
3545
3546                self.assertEqual(new_f, 5)
3547                self.assertEqual(some_str, 'some str')
3548                # math.log does not have its usual reducer overriden, so the
3549                # custom reduction callback should silently direct the pickler
3550                # to the default pickling by attribute, by returning
3551                # NotImplemented
3552                self.assertIs(math_log, math.log)
3553
3554                with self.assertRaises(pickle.PicklingError):
3555                    p.dump(g)
3556
3557                with self.assertRaisesRegex(
3558                        ValueError, 'The reducer just failed'):
3559                    p.dump(h)
3560
3561    @support.cpython_only
3562    def test_reducer_override_no_reference_cycle(self):
3563        # bpo-39492: reducer_override used to induce a spurious reference cycle
3564        # inside the Pickler object, that could prevent all serialized objects
3565        # from being garbage-collected without explicity invoking gc.collect.
3566
3567        for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
3568            with self.subTest(proto=proto):
3569                def f():
3570                    pass
3571
3572                wr = weakref.ref(f)
3573
3574                bio = io.BytesIO()
3575                p = self.pickler_class(bio, proto)
3576                p.dump(f)
3577                new_f = pickle.loads(bio.getvalue())
3578                assert new_f == 5
3579
3580                del p
3581                del f
3582
3583                self.assertIsNone(wr())
3584
3585
3586class AbstractDispatchTableTests(unittest.TestCase):
3587
3588    def test_default_dispatch_table(self):
3589        # No dispatch_table attribute by default
3590        f = io.BytesIO()
3591        p = self.pickler_class(f, 0)
3592        with self.assertRaises(AttributeError):
3593            p.dispatch_table
3594        self.assertFalse(hasattr(p, 'dispatch_table'))
3595
3596    def test_class_dispatch_table(self):
3597        # A dispatch_table attribute can be specified class-wide
3598        dt = self.get_dispatch_table()
3599
3600        class MyPickler(self.pickler_class):
3601            dispatch_table = dt
3602
3603        def dumps(obj, protocol=None):
3604            f = io.BytesIO()
3605            p = MyPickler(f, protocol)
3606            self.assertEqual(p.dispatch_table, dt)
3607            p.dump(obj)
3608            return f.getvalue()
3609
3610        self._test_dispatch_table(dumps, dt)
3611
3612    def test_instance_dispatch_table(self):
3613        # A dispatch_table attribute can also be specified instance-wide
3614        dt = self.get_dispatch_table()
3615
3616        def dumps(obj, protocol=None):
3617            f = io.BytesIO()
3618            p = self.pickler_class(f, protocol)
3619            p.dispatch_table = dt
3620            self.assertEqual(p.dispatch_table, dt)
3621            p.dump(obj)
3622            return f.getvalue()
3623
3624        self._test_dispatch_table(dumps, dt)
3625
3626    def _test_dispatch_table(self, dumps, dispatch_table):
3627        def custom_load_dump(obj):
3628            return pickle.loads(dumps(obj, 0))
3629
3630        def default_load_dump(obj):
3631            return pickle.loads(pickle.dumps(obj, 0))
3632
3633        # pickling complex numbers using protocol 0 relies on copyreg
3634        # so check pickling a complex number still works
3635        z = 1 + 2j
3636        self.assertEqual(custom_load_dump(z), z)
3637        self.assertEqual(default_load_dump(z), z)
3638
3639        # modify pickling of complex
3640        REDUCE_1 = 'reduce_1'
3641        def reduce_1(obj):
3642            return str, (REDUCE_1,)
3643        dispatch_table[complex] = reduce_1
3644        self.assertEqual(custom_load_dump(z), REDUCE_1)
3645        self.assertEqual(default_load_dump(z), z)
3646
3647        # check picklability of AAA and BBB
3648        a = AAA()
3649        b = BBB()
3650        self.assertEqual(custom_load_dump(a), REDUCE_A)
3651        self.assertIsInstance(custom_load_dump(b), BBB)
3652        self.assertEqual(default_load_dump(a), REDUCE_A)
3653        self.assertIsInstance(default_load_dump(b), BBB)
3654
3655        # modify pickling of BBB
3656        dispatch_table[BBB] = reduce_1
3657        self.assertEqual(custom_load_dump(a), REDUCE_A)
3658        self.assertEqual(custom_load_dump(b), REDUCE_1)
3659        self.assertEqual(default_load_dump(a), REDUCE_A)
3660        self.assertIsInstance(default_load_dump(b), BBB)
3661
3662        # revert pickling of BBB and modify pickling of AAA
3663        REDUCE_2 = 'reduce_2'
3664        def reduce_2(obj):
3665            return str, (REDUCE_2,)
3666        dispatch_table[AAA] = reduce_2
3667        del dispatch_table[BBB]
3668        self.assertEqual(custom_load_dump(a), REDUCE_2)
3669        self.assertIsInstance(custom_load_dump(b), BBB)
3670        self.assertEqual(default_load_dump(a), REDUCE_A)
3671        self.assertIsInstance(default_load_dump(b), BBB)
3672
3673        # End-to-end testing of save_reduce with the state_setter keyword
3674        # argument. This is a dispatch_table test as the primary goal of
3675        # state_setter is to tweak objects reduction behavior.
3676        # In particular, state_setter is useful when the default __setstate__
3677        # behavior is not flexible enough.
3678
3679        # No custom reducer for b has been registered for now, so
3680        # BBB.__setstate__ should be used at unpickling time
3681        self.assertEqual(default_load_dump(b).a, "BBB.__setstate__")
3682
3683        def reduce_bbb(obj):
3684            return BBB, (), obj.__dict__, None, None, setstate_bbb
3685
3686        dispatch_table[BBB] = reduce_bbb
3687
3688        # The custom reducer reduce_bbb includes a state setter, that should
3689        # have priority over BBB.__setstate__
3690        self.assertEqual(custom_load_dump(b).a, "custom state_setter")
3691
3692
3693if __name__ == "__main__":
3694    # Print some stuff that can be used to rewrite DATA{0,1,2}
3695    from pickletools import dis
3696    x = create_data()
3697    for i in range(pickle.HIGHEST_PROTOCOL+1):
3698        p = pickle.dumps(x, i)
3699        print("DATA{0} = (".format(i))
3700        for j in range(0, len(p), 20):
3701            b = bytes(p[j:j+20])
3702            print("    {0!r}".format(b))
3703        print(")")
3704        print()
3705        print("# Disassembly of DATA{0}".format(i))
3706        print("DATA{0}_DIS = \"\"\"\\".format(i))
3707        dis(p)
3708        print("\"\"\"")
3709        print()
3710