1import collections
2import copyreg
3import dbm
4import io
5import functools
6import os
7import pickle
8import pickletools
9import shutil
10import struct
11import sys
12import threading
13import unittest
14import weakref
15from textwrap import dedent
16from http.cookies import SimpleCookie
17
18from test import support
19from test.support import (
20    TestFailed, TESTFN, run_with_locale, no_tracing,
21    _2G, _4G, bigmemtest, reap_threads, forget,
22    )
23
24from pickle import bytes_types
25
26requires_32b = unittest.skipUnless(sys.maxsize < 2**32,
27                                   "test is only meaningful on 32-bit builds")
28
29# Tests that try a number of pickle protocols should have a
30#     for proto in protocols:
31# kind of outer loop.
32protocols = range(pickle.HIGHEST_PROTOCOL + 1)
33
34
35# Return True if opcode code appears in the pickle, else False.
36def opcode_in_pickle(code, pickle):
37    for op, dummy, dummy in pickletools.genops(pickle):
38        if op.code == code.decode("latin-1"):
39            return True
40    return False
41
42# Return the number of times opcode code appears in pickle.
43def count_opcode(code, pickle):
44    n = 0
45    for op, dummy, dummy in pickletools.genops(pickle):
46        if op.code == code.decode("latin-1"):
47            n += 1
48    return n
49
50
51class UnseekableIO(io.BytesIO):
52    def peek(self, *args):
53        raise NotImplementedError
54
55    def seekable(self):
56        return False
57
58    def seek(self, *args):
59        raise io.UnsupportedOperation
60
61    def tell(self):
62        raise io.UnsupportedOperation
63
64
65# We can't very well test the extension registry without putting known stuff
66# in it, but we have to be careful to restore its original state.  Code
67# should do this:
68#
69#     e = ExtensionSaver(extension_code)
70#     try:
71#         fiddle w/ the extension registry's stuff for extension_code
72#     finally:
73#         e.restore()
74
75class ExtensionSaver:
76    # Remember current registration for code (if any), and remove it (if
77    # there is one).
78    def __init__(self, code):
79        self.code = code
80        if code in copyreg._inverted_registry:
81            self.pair = copyreg._inverted_registry[code]
82            copyreg.remove_extension(self.pair[0], self.pair[1], code)
83        else:
84            self.pair = None
85
86    # Restore previous registration for code.
87    def restore(self):
88        code = self.code
89        curpair = copyreg._inverted_registry.get(code)
90        if curpair is not None:
91            copyreg.remove_extension(curpair[0], curpair[1], code)
92        pair = self.pair
93        if pair is not None:
94            copyreg.add_extension(pair[0], pair[1], code)
95
96class C:
97    def __eq__(self, other):
98        return self.__dict__ == other.__dict__
99
100class D(C):
101    def __init__(self, arg):
102        pass
103
104class E(C):
105    def __getinitargs__(self):
106        return ()
107
108class H(object):
109    pass
110
111# Hashable mutable key
112class K(object):
113    def __init__(self, value):
114        self.value = value
115
116    def __reduce__(self):
117        # Shouldn't support the recursion itself
118        return K, (self.value,)
119
120import __main__
121__main__.C = C
122C.__module__ = "__main__"
123__main__.D = D
124D.__module__ = "__main__"
125__main__.E = E
126E.__module__ = "__main__"
127__main__.H = H
128H.__module__ = "__main__"
129__main__.K = K
130K.__module__ = "__main__"
131
132class myint(int):
133    def __init__(self, x):
134        self.str = str(x)
135
136class initarg(C):
137
138    def __init__(self, a, b):
139        self.a = a
140        self.b = b
141
142    def __getinitargs__(self):
143        return self.a, self.b
144
145class metaclass(type):
146    pass
147
148class use_metaclass(object, metaclass=metaclass):
149    pass
150
151class pickling_metaclass(type):
152    def __eq__(self, other):
153        return (type(self) == type(other) and
154                self.reduce_args == other.reduce_args)
155
156    def __reduce__(self):
157        return (create_dynamic_class, self.reduce_args)
158
159def create_dynamic_class(name, bases):
160    result = pickling_metaclass(name, bases, dict())
161    result.reduce_args = (name, bases)
162    return result
163
164# DATA0 .. DATA4 are the pickles we expect under the various protocols, for
165# the object returned by create_data().
166
167DATA0 = (
168    b'(lp0\nL0L\naL1L\naF2.0\n'
169    b'ac__builtin__\ncomple'
170    b'x\np1\n(F3.0\nF0.0\ntp2\n'
171    b'Rp3\naL1L\naL-1L\naL255'
172    b'L\naL-255L\naL-256L\naL'
173    b'65535L\naL-65535L\naL-'
174    b'65536L\naL2147483647L'
175    b'\naL-2147483647L\naL-2'
176    b'147483648L\na(Vabc\np4'
177    b'\ng4\nccopy_reg\n_recon'
178    b'structor\np5\n(c__main'
179    b'__\nC\np6\nc__builtin__'
180    b'\nobject\np7\nNtp8\nRp9\n'
181    b'(dp10\nVfoo\np11\nL1L\ns'
182    b'Vbar\np12\nL2L\nsbg9\ntp'
183    b'13\nag13\naL5L\na.'
184)
185
186# Disassembly of DATA0
187DATA0_DIS = """\
188    0: (    MARK
189    1: l        LIST       (MARK at 0)
190    2: p    PUT        0
191    5: L    LONG       0
192    9: a    APPEND
193   10: L    LONG       1
194   14: a    APPEND
195   15: F    FLOAT      2.0
196   20: a    APPEND
197   21: c    GLOBAL     '__builtin__ complex'
198   42: p    PUT        1
199   45: (    MARK
200   46: F        FLOAT      3.0
201   51: F        FLOAT      0.0
202   56: t        TUPLE      (MARK at 45)
203   57: p    PUT        2
204   60: R    REDUCE
205   61: p    PUT        3
206   64: a    APPEND
207   65: L    LONG       1
208   69: a    APPEND
209   70: L    LONG       -1
210   75: a    APPEND
211   76: L    LONG       255
212   82: a    APPEND
213   83: L    LONG       -255
214   90: a    APPEND
215   91: L    LONG       -256
216   98: a    APPEND
217   99: L    LONG       65535
218  107: a    APPEND
219  108: L    LONG       -65535
220  117: a    APPEND
221  118: L    LONG       -65536
222  127: a    APPEND
223  128: L    LONG       2147483647
224  141: a    APPEND
225  142: L    LONG       -2147483647
226  156: a    APPEND
227  157: L    LONG       -2147483648
228  171: a    APPEND
229  172: (    MARK
230  173: V        UNICODE    'abc'
231  178: p        PUT        4
232  181: g        GET        4
233  184: c        GLOBAL     'copy_reg _reconstructor'
234  209: p        PUT        5
235  212: (        MARK
236  213: c            GLOBAL     '__main__ C'
237  225: p            PUT        6
238  228: c            GLOBAL     '__builtin__ object'
239  248: p            PUT        7
240  251: N            NONE
241  252: t            TUPLE      (MARK at 212)
242  253: p        PUT        8
243  256: R        REDUCE
244  257: p        PUT        9
245  260: (        MARK
246  261: d            DICT       (MARK at 260)
247  262: p        PUT        10
248  266: V        UNICODE    'foo'
249  271: p        PUT        11
250  275: L        LONG       1
251  279: s        SETITEM
252  280: V        UNICODE    'bar'
253  285: p        PUT        12
254  289: L        LONG       2
255  293: s        SETITEM
256  294: b        BUILD
257  295: g        GET        9
258  298: t        TUPLE      (MARK at 172)
259  299: p    PUT        13
260  303: a    APPEND
261  304: g    GET        13
262  308: a    APPEND
263  309: L    LONG       5
264  313: a    APPEND
265  314: .    STOP
266highest protocol among opcodes = 0
267"""
268
269DATA1 = (
270    b']q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c__'
271    b'builtin__\ncomplex\nq\x01'
272    b'(G@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00t'
273    b'q\x02Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xffJ'
274    b'\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff\xff'
275    b'\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00ab'
276    b'cq\x04h\x04ccopy_reg\n_reco'
277    b'nstructor\nq\x05(c__main'
278    b'__\nC\nq\x06c__builtin__\n'
279    b'object\nq\x07Ntq\x08Rq\t}q\n('
280    b'X\x03\x00\x00\x00fooq\x0bK\x01X\x03\x00\x00\x00bar'
281    b'q\x0cK\x02ubh\ttq\rh\rK\x05e.'
282)
283
284# Disassembly of DATA1
285DATA1_DIS = """\
286    0: ]    EMPTY_LIST
287    1: q    BINPUT     0
288    3: (    MARK
289    4: K        BININT1    0
290    6: K        BININT1    1
291    8: G        BINFLOAT   2.0
292   17: c        GLOBAL     '__builtin__ complex'
293   38: q        BINPUT     1
294   40: (        MARK
295   41: G            BINFLOAT   3.0
296   50: G            BINFLOAT   0.0
297   59: t            TUPLE      (MARK at 40)
298   60: q        BINPUT     2
299   62: R        REDUCE
300   63: q        BINPUT     3
301   65: K        BININT1    1
302   67: J        BININT     -1
303   72: K        BININT1    255
304   74: J        BININT     -255
305   79: J        BININT     -256
306   84: M        BININT2    65535
307   87: J        BININT     -65535
308   92: J        BININT     -65536
309   97: J        BININT     2147483647
310  102: J        BININT     -2147483647
311  107: J        BININT     -2147483648
312  112: (        MARK
313  113: X            BINUNICODE 'abc'
314  121: q            BINPUT     4
315  123: h            BINGET     4
316  125: c            GLOBAL     'copy_reg _reconstructor'
317  150: q            BINPUT     5
318  152: (            MARK
319  153: c                GLOBAL     '__main__ C'
320  165: q                BINPUT     6
321  167: c                GLOBAL     '__builtin__ object'
322  187: q                BINPUT     7
323  189: N                NONE
324  190: t                TUPLE      (MARK at 152)
325  191: q            BINPUT     8
326  193: R            REDUCE
327  194: q            BINPUT     9
328  196: }            EMPTY_DICT
329  197: q            BINPUT     10
330  199: (            MARK
331  200: X                BINUNICODE 'foo'
332  208: q                BINPUT     11
333  210: K                BININT1    1
334  212: X                BINUNICODE 'bar'
335  220: q                BINPUT     12
336  222: K                BININT1    2
337  224: u                SETITEMS   (MARK at 199)
338  225: b            BUILD
339  226: h            BINGET     9
340  228: t            TUPLE      (MARK at 112)
341  229: q        BINPUT     13
342  231: h        BINGET     13
343  233: K        BININT1    5
344  235: e        APPENDS    (MARK at 3)
345  236: .    STOP
346highest protocol among opcodes = 1
347"""
348
349DATA2 = (
350    b'\x80\x02]q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c'
351    b'__builtin__\ncomplex\n'
352    b'q\x01G@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00'
353    b'\x86q\x02Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xff'
354    b'J\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff'
355    b'\xff\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00a'
356    b'bcq\x04h\x04c__main__\nC\nq\x05'
357    b')\x81q\x06}q\x07(X\x03\x00\x00\x00fooq\x08K\x01'
358    b'X\x03\x00\x00\x00barq\tK\x02ubh\x06tq\nh'
359    b'\nK\x05e.'
360)
361
362# Disassembly of DATA2
363DATA2_DIS = """\
364    0: \x80 PROTO      2
365    2: ]    EMPTY_LIST
366    3: q    BINPUT     0
367    5: (    MARK
368    6: K        BININT1    0
369    8: K        BININT1    1
370   10: G        BINFLOAT   2.0
371   19: c        GLOBAL     '__builtin__ complex'
372   40: q        BINPUT     1
373   42: G        BINFLOAT   3.0
374   51: G        BINFLOAT   0.0
375   60: \x86     TUPLE2
376   61: q        BINPUT     2
377   63: R        REDUCE
378   64: q        BINPUT     3
379   66: K        BININT1    1
380   68: J        BININT     -1
381   73: K        BININT1    255
382   75: J        BININT     -255
383   80: J        BININT     -256
384   85: M        BININT2    65535
385   88: J        BININT     -65535
386   93: J        BININT     -65536
387   98: J        BININT     2147483647
388  103: J        BININT     -2147483647
389  108: J        BININT     -2147483648
390  113: (        MARK
391  114: X            BINUNICODE 'abc'
392  122: q            BINPUT     4
393  124: h            BINGET     4
394  126: c            GLOBAL     '__main__ C'
395  138: q            BINPUT     5
396  140: )            EMPTY_TUPLE
397  141: \x81         NEWOBJ
398  142: q            BINPUT     6
399  144: }            EMPTY_DICT
400  145: q            BINPUT     7
401  147: (            MARK
402  148: X                BINUNICODE 'foo'
403  156: q                BINPUT     8
404  158: K                BININT1    1
405  160: X                BINUNICODE 'bar'
406  168: q                BINPUT     9
407  170: K                BININT1    2
408  172: u                SETITEMS   (MARK at 147)
409  173: b            BUILD
410  174: h            BINGET     6
411  176: t            TUPLE      (MARK at 113)
412  177: q        BINPUT     10
413  179: h        BINGET     10
414  181: K        BININT1    5
415  183: e        APPENDS    (MARK at 5)
416  184: .    STOP
417highest protocol among opcodes = 2
418"""
419
420DATA3 = (
421    b'\x80\x03]q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c'
422    b'builtins\ncomplex\nq\x01G'
423    b'@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00\x86q\x02'
424    b'Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xffJ\x00\xff'
425    b'\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff\xff\xff\x7f'
426    b'J\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00abcq'
427    b'\x04h\x04c__main__\nC\nq\x05)\x81q'
428    b'\x06}q\x07(X\x03\x00\x00\x00barq\x08K\x02X\x03\x00'
429    b'\x00\x00fooq\tK\x01ubh\x06tq\nh\nK\x05'
430    b'e.'
431)
432
433# Disassembly of DATA3
434DATA3_DIS = """\
435    0: \x80 PROTO      3
436    2: ]    EMPTY_LIST
437    3: q    BINPUT     0
438    5: (    MARK
439    6: K        BININT1    0
440    8: K        BININT1    1
441   10: G        BINFLOAT   2.0
442   19: c        GLOBAL     'builtins complex'
443   37: q        BINPUT     1
444   39: G        BINFLOAT   3.0
445   48: G        BINFLOAT   0.0
446   57: \x86     TUPLE2
447   58: q        BINPUT     2
448   60: R        REDUCE
449   61: q        BINPUT     3
450   63: K        BININT1    1
451   65: J        BININT     -1
452   70: K        BININT1    255
453   72: J        BININT     -255
454   77: J        BININT     -256
455   82: M        BININT2    65535
456   85: J        BININT     -65535
457   90: J        BININT     -65536
458   95: J        BININT     2147483647
459  100: J        BININT     -2147483647
460  105: J        BININT     -2147483648
461  110: (        MARK
462  111: X            BINUNICODE 'abc'
463  119: q            BINPUT     4
464  121: h            BINGET     4
465  123: c            GLOBAL     '__main__ C'
466  135: q            BINPUT     5
467  137: )            EMPTY_TUPLE
468  138: \x81         NEWOBJ
469  139: q            BINPUT     6
470  141: }            EMPTY_DICT
471  142: q            BINPUT     7
472  144: (            MARK
473  145: X                BINUNICODE 'bar'
474  153: q                BINPUT     8
475  155: K                BININT1    2
476  157: X                BINUNICODE 'foo'
477  165: q                BINPUT     9
478  167: K                BININT1    1
479  169: u                SETITEMS   (MARK at 144)
480  170: b            BUILD
481  171: h            BINGET     6
482  173: t            TUPLE      (MARK at 110)
483  174: q        BINPUT     10
484  176: h        BINGET     10
485  178: K        BININT1    5
486  180: e        APPENDS    (MARK at 5)
487  181: .    STOP
488highest protocol among opcodes = 2
489"""
490
491DATA4 = (
492    b'\x80\x04\x95\xa8\x00\x00\x00\x00\x00\x00\x00]\x94(K\x00K\x01G@'
493    b'\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x94\x8c\x07'
494    b'complex\x94\x93\x94G@\x08\x00\x00\x00\x00\x00\x00G'
495    b'\x00\x00\x00\x00\x00\x00\x00\x00\x86\x94R\x94K\x01J\xff\xff\xff\xffK'
496    b'\xffJ\x01\xff\xff\xffJ\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ'
497    b'\x00\x00\xff\xffJ\xff\xff\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80('
498    b'\x8c\x03abc\x94h\x06\x8c\x08__main__\x94\x8c'
499    b'\x01C\x94\x93\x94)\x81\x94}\x94(\x8c\x03bar\x94K\x02\x8c'
500    b'\x03foo\x94K\x01ubh\nt\x94h\x0eK\x05e.'
501)
502
503# Disassembly of DATA4
504DATA4_DIS = """\
505    0: \x80 PROTO      4
506    2: \x95 FRAME      168
507   11: ]    EMPTY_LIST
508   12: \x94 MEMOIZE
509   13: (    MARK
510   14: K        BININT1    0
511   16: K        BININT1    1
512   18: G        BINFLOAT   2.0
513   27: \x8c     SHORT_BINUNICODE 'builtins'
514   37: \x94     MEMOIZE
515   38: \x8c     SHORT_BINUNICODE 'complex'
516   47: \x94     MEMOIZE
517   48: \x93     STACK_GLOBAL
518   49: \x94     MEMOIZE
519   50: G        BINFLOAT   3.0
520   59: G        BINFLOAT   0.0
521   68: \x86     TUPLE2
522   69: \x94     MEMOIZE
523   70: R        REDUCE
524   71: \x94     MEMOIZE
525   72: K        BININT1    1
526   74: J        BININT     -1
527   79: K        BININT1    255
528   81: J        BININT     -255
529   86: J        BININT     -256
530   91: M        BININT2    65535
531   94: J        BININT     -65535
532   99: J        BININT     -65536
533  104: J        BININT     2147483647
534  109: J        BININT     -2147483647
535  114: J        BININT     -2147483648
536  119: (        MARK
537  120: \x8c         SHORT_BINUNICODE 'abc'
538  125: \x94         MEMOIZE
539  126: h            BINGET     6
540  128: \x8c         SHORT_BINUNICODE '__main__'
541  138: \x94         MEMOIZE
542  139: \x8c         SHORT_BINUNICODE 'C'
543  142: \x94         MEMOIZE
544  143: \x93         STACK_GLOBAL
545  144: \x94         MEMOIZE
546  145: )            EMPTY_TUPLE
547  146: \x81         NEWOBJ
548  147: \x94         MEMOIZE
549  148: }            EMPTY_DICT
550  149: \x94         MEMOIZE
551  150: (            MARK
552  151: \x8c             SHORT_BINUNICODE 'bar'
553  156: \x94             MEMOIZE
554  157: K                BININT1    2
555  159: \x8c             SHORT_BINUNICODE 'foo'
556  164: \x94             MEMOIZE
557  165: K                BININT1    1
558  167: u                SETITEMS   (MARK at 150)
559  168: b            BUILD
560  169: h            BINGET     10
561  171: t            TUPLE      (MARK at 119)
562  172: \x94     MEMOIZE
563  173: h        BINGET     14
564  175: K        BININT1    5
565  177: e        APPENDS    (MARK at 13)
566  178: .    STOP
567highest protocol among opcodes = 4
568"""
569
570# set([1,2]) pickled from 2.x with protocol 2
571DATA_SET = b'\x80\x02c__builtin__\nset\nq\x00]q\x01(K\x01K\x02e\x85q\x02Rq\x03.'
572
573# xrange(5) pickled from 2.x with protocol 2
574DATA_XRANGE = b'\x80\x02c__builtin__\nxrange\nq\x00K\x00K\x05K\x01\x87q\x01Rq\x02.'
575
576# a SimpleCookie() object pickled from 2.x with protocol 2
577DATA_COOKIE = (b'\x80\x02cCookie\nSimpleCookie\nq\x00)\x81q\x01U\x03key'
578               b'q\x02cCookie\nMorsel\nq\x03)\x81q\x04(U\x07commentq\x05U'
579               b'\x00q\x06U\x06domainq\x07h\x06U\x06secureq\x08h\x06U\x07'
580               b'expiresq\th\x06U\x07max-ageq\nh\x06U\x07versionq\x0bh\x06U'
581               b'\x04pathq\x0ch\x06U\x08httponlyq\rh\x06u}q\x0e(U\x0b'
582               b'coded_valueq\x0fU\x05valueq\x10h\x10h\x10h\x02h\x02ubs}q\x11b.')
583
584# set([3]) pickled from 2.x with protocol 2
585DATA_SET2 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01K\x03a\x85q\x02Rq\x03.'
586
587python2_exceptions_without_args = (
588    ArithmeticError,
589    AssertionError,
590    AttributeError,
591    BaseException,
592    BufferError,
593    BytesWarning,
594    DeprecationWarning,
595    EOFError,
596    EnvironmentError,
597    Exception,
598    FloatingPointError,
599    FutureWarning,
600    GeneratorExit,
601    IOError,
602    ImportError,
603    ImportWarning,
604    IndentationError,
605    IndexError,
606    KeyError,
607    KeyboardInterrupt,
608    LookupError,
609    MemoryError,
610    NameError,
611    NotImplementedError,
612    OSError,
613    OverflowError,
614    PendingDeprecationWarning,
615    ReferenceError,
616    RuntimeError,
617    RuntimeWarning,
618    # StandardError is gone in Python 3, we map it to Exception
619    StopIteration,
620    SyntaxError,
621    SyntaxWarning,
622    SystemError,
623    SystemExit,
624    TabError,
625    TypeError,
626    UnboundLocalError,
627    UnicodeError,
628    UnicodeWarning,
629    UserWarning,
630    ValueError,
631    Warning,
632    ZeroDivisionError,
633)
634
635exception_pickle = b'\x80\x02cexceptions\n?\nq\x00)Rq\x01.'
636
637# UnicodeEncodeError object pickled from 2.x with protocol 2
638DATA_UEERR = (b'\x80\x02cexceptions\nUnicodeEncodeError\n'
639              b'q\x00(U\x05asciiq\x01X\x03\x00\x00\x00fooq\x02K\x00K\x01'
640              b'U\x03badq\x03tq\x04Rq\x05.')
641
642
643def create_data():
644    c = C()
645    c.foo = 1
646    c.bar = 2
647    x = [0, 1, 2.0, 3.0+0j]
648    # Append some integer test cases at cPickle.c's internal size
649    # cutoffs.
650    uint1max = 0xff
651    uint2max = 0xffff
652    int4max = 0x7fffffff
653    x.extend([1, -1,
654              uint1max, -uint1max, -uint1max-1,
655              uint2max, -uint2max, -uint2max-1,
656               int4max,  -int4max,  -int4max-1])
657    y = ('abc', 'abc', c, c)
658    x.append(y)
659    x.append(y)
660    x.append(5)
661    return x
662
663
664class AbstractUnpickleTests(unittest.TestCase):
665    # Subclass must define self.loads.
666
667    _testdata = create_data()
668
669    def assert_is_copy(self, obj, objcopy, msg=None):
670        """Utility method to verify if two objects are copies of each others.
671        """
672        if msg is None:
673            msg = "{!r} is not a copy of {!r}".format(obj, objcopy)
674        self.assertEqual(obj, objcopy, msg=msg)
675        self.assertIs(type(obj), type(objcopy), msg=msg)
676        if hasattr(obj, '__dict__'):
677            self.assertDictEqual(obj.__dict__, objcopy.__dict__, msg=msg)
678            self.assertIsNot(obj.__dict__, objcopy.__dict__, msg=msg)
679        if hasattr(obj, '__slots__'):
680            self.assertListEqual(obj.__slots__, objcopy.__slots__, msg=msg)
681            for slot in obj.__slots__:
682                self.assertEqual(
683                    hasattr(obj, slot), hasattr(objcopy, slot), msg=msg)
684                self.assertEqual(getattr(obj, slot, None),
685                                 getattr(objcopy, slot, None), msg=msg)
686
687    def check_unpickling_error(self, errors, data):
688        with self.subTest(data=data), \
689             self.assertRaises(errors):
690            try:
691                self.loads(data)
692            except BaseException as exc:
693                if support.verbose > 1:
694                    print('%-32r - %s: %s' %
695                          (data, exc.__class__.__name__, exc))
696                raise
697
698    def test_load_from_data0(self):
699        self.assert_is_copy(self._testdata, self.loads(DATA0))
700
701    def test_load_from_data1(self):
702        self.assert_is_copy(self._testdata, self.loads(DATA1))
703
704    def test_load_from_data2(self):
705        self.assert_is_copy(self._testdata, self.loads(DATA2))
706
707    def test_load_from_data3(self):
708        self.assert_is_copy(self._testdata, self.loads(DATA3))
709
710    def test_load_from_data4(self):
711        self.assert_is_copy(self._testdata, self.loads(DATA4))
712
713    def test_load_classic_instance(self):
714        # See issue5180.  Test loading 2.x pickles that
715        # contain an instance of old style class.
716        for X, args in [(C, ()), (D, ('x',)), (E, ())]:
717            xname = X.__name__.encode('ascii')
718            # Protocol 0 (text mode pickle):
719            """
720             0: (    MARK
721             1: i        INST       '__main__ X' (MARK at 0)
722            13: p    PUT        0
723            16: (    MARK
724            17: d        DICT       (MARK at 16)
725            18: p    PUT        1
726            21: b    BUILD
727            22: .    STOP
728            """
729            pickle0 = (b"(i__main__\n"
730                       b"X\n"
731                       b"p0\n"
732                       b"(dp1\nb.").replace(b'X', xname)
733            self.assert_is_copy(X(*args), self.loads(pickle0))
734
735            # Protocol 1 (binary mode pickle)
736            """
737             0: (    MARK
738             1: c        GLOBAL     '__main__ X'
739            13: q        BINPUT     0
740            15: o        OBJ        (MARK at 0)
741            16: q    BINPUT     1
742            18: }    EMPTY_DICT
743            19: q    BINPUT     2
744            21: b    BUILD
745            22: .    STOP
746            """
747            pickle1 = (b'(c__main__\n'
748                       b'X\n'
749                       b'q\x00oq\x01}q\x02b.').replace(b'X', xname)
750            self.assert_is_copy(X(*args), self.loads(pickle1))
751
752            # Protocol 2 (pickle2 = b'\x80\x02' + pickle1)
753            """
754             0: \x80 PROTO      2
755             2: (    MARK
756             3: c        GLOBAL     '__main__ X'
757            15: q        BINPUT     0
758            17: o        OBJ        (MARK at 2)
759            18: q    BINPUT     1
760            20: }    EMPTY_DICT
761            21: q    BINPUT     2
762            23: b    BUILD
763            24: .    STOP
764            """
765            pickle2 = (b'\x80\x02(c__main__\n'
766                       b'X\n'
767                       b'q\x00oq\x01}q\x02b.').replace(b'X', xname)
768            self.assert_is_copy(X(*args), self.loads(pickle2))
769
770    def test_maxint64(self):
771        maxint64 = (1 << 63) - 1
772        data = b'I' + str(maxint64).encode("ascii") + b'\n.'
773        got = self.loads(data)
774        self.assert_is_copy(maxint64, got)
775
776        # Try too with a bogus literal.
777        data = b'I' + str(maxint64).encode("ascii") + b'JUNK\n.'
778        self.check_unpickling_error(ValueError, data)
779
780    def test_unpickle_from_2x(self):
781        # Unpickle non-trivial data from Python 2.x.
782        loaded = self.loads(DATA_SET)
783        self.assertEqual(loaded, set([1, 2]))
784        loaded = self.loads(DATA_XRANGE)
785        self.assertEqual(type(loaded), type(range(0)))
786        self.assertEqual(list(loaded), list(range(5)))
787        loaded = self.loads(DATA_COOKIE)
788        self.assertEqual(type(loaded), SimpleCookie)
789        self.assertEqual(list(loaded.keys()), ["key"])
790        self.assertEqual(loaded["key"].value, "value")
791
792        # Exception objects without arguments pickled from 2.x with protocol 2
793        for exc in python2_exceptions_without_args:
794            data = exception_pickle.replace(b'?', exc.__name__.encode("ascii"))
795            loaded = self.loads(data)
796            self.assertIs(type(loaded), exc)
797
798        # StandardError is mapped to Exception, test that separately
799        loaded = self.loads(exception_pickle.replace(b'?', b'StandardError'))
800        self.assertIs(type(loaded), Exception)
801
802        loaded = self.loads(DATA_UEERR)
803        self.assertIs(type(loaded), UnicodeEncodeError)
804        self.assertEqual(loaded.object, "foo")
805        self.assertEqual(loaded.encoding, "ascii")
806        self.assertEqual(loaded.start, 0)
807        self.assertEqual(loaded.end, 1)
808        self.assertEqual(loaded.reason, "bad")
809
810    def test_load_python2_str_as_bytes(self):
811        # From Python 2: pickle.dumps('a\x00\xa0', protocol=0)
812        self.assertEqual(self.loads(b"S'a\\x00\\xa0'\n.",
813                                    encoding="bytes"), b'a\x00\xa0')
814        # From Python 2: pickle.dumps('a\x00\xa0', protocol=1)
815        self.assertEqual(self.loads(b'U\x03a\x00\xa0.',
816                                    encoding="bytes"), b'a\x00\xa0')
817        # From Python 2: pickle.dumps('a\x00\xa0', protocol=2)
818        self.assertEqual(self.loads(b'\x80\x02U\x03a\x00\xa0.',
819                                    encoding="bytes"), b'a\x00\xa0')
820
821    def test_load_python2_unicode_as_str(self):
822        # From Python 2: pickle.dumps(u'π', protocol=0)
823        self.assertEqual(self.loads(b'V\\u03c0\n.',
824                                    encoding='bytes'), 'π')
825        # From Python 2: pickle.dumps(u'π', protocol=1)
826        self.assertEqual(self.loads(b'X\x02\x00\x00\x00\xcf\x80.',
827                                    encoding="bytes"), 'π')
828        # From Python 2: pickle.dumps(u'π', protocol=2)
829        self.assertEqual(self.loads(b'\x80\x02X\x02\x00\x00\x00\xcf\x80.',
830                                    encoding="bytes"), 'π')
831
832    def test_load_long_python2_str_as_bytes(self):
833        # From Python 2: pickle.dumps('x' * 300, protocol=1)
834        self.assertEqual(self.loads(pickle.BINSTRING +
835                                    struct.pack("<I", 300) +
836                                    b'x' * 300 + pickle.STOP,
837                                    encoding='bytes'), b'x' * 300)
838
839    def test_constants(self):
840        self.assertIsNone(self.loads(b'N.'))
841        self.assertIs(self.loads(b'\x88.'), True)
842        self.assertIs(self.loads(b'\x89.'), False)
843        self.assertIs(self.loads(b'I01\n.'), True)
844        self.assertIs(self.loads(b'I00\n.'), False)
845
846    def test_empty_bytestring(self):
847        # issue 11286
848        empty = self.loads(b'\x80\x03U\x00q\x00.', encoding='koi8-r')
849        self.assertEqual(empty, '')
850
851    def test_short_binbytes(self):
852        dumped = b'\x80\x03C\x04\xe2\x82\xac\x00.'
853        self.assertEqual(self.loads(dumped), b'\xe2\x82\xac\x00')
854
855    def test_binbytes(self):
856        dumped = b'\x80\x03B\x04\x00\x00\x00\xe2\x82\xac\x00.'
857        self.assertEqual(self.loads(dumped), b'\xe2\x82\xac\x00')
858
859    @requires_32b
860    def test_negative_32b_binbytes(self):
861        # On 32-bit builds, a BINBYTES of 2**31 or more is refused
862        dumped = b'\x80\x03B\xff\xff\xff\xffxyzq\x00.'
863        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
864                                    dumped)
865
866    @requires_32b
867    def test_negative_32b_binunicode(self):
868        # On 32-bit builds, a BINUNICODE of 2**31 or more is refused
869        dumped = b'\x80\x03X\xff\xff\xff\xffxyzq\x00.'
870        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
871                                    dumped)
872
873    def test_short_binunicode(self):
874        dumped = b'\x80\x04\x8c\x04\xe2\x82\xac\x00.'
875        self.assertEqual(self.loads(dumped), '\u20ac\x00')
876
877    def test_misc_get(self):
878        self.check_unpickling_error(KeyError, b'g0\np0')
879        self.assert_is_copy([(100,), (100,)],
880                            self.loads(b'((Kdtp0\nh\x00l.))'))
881
882    def test_binbytes8(self):
883        dumped = b'\x80\x04\x8e\4\0\0\0\0\0\0\0\xe2\x82\xac\x00.'
884        self.assertEqual(self.loads(dumped), b'\xe2\x82\xac\x00')
885
886    def test_binunicode8(self):
887        dumped = b'\x80\x04\x8d\4\0\0\0\0\0\0\0\xe2\x82\xac\x00.'
888        self.assertEqual(self.loads(dumped), '\u20ac\x00')
889
890    @requires_32b
891    def test_large_32b_binbytes8(self):
892        dumped = b'\x80\x04\x8e\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
893        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
894                                    dumped)
895
896    @requires_32b
897    def test_large_32b_binunicode8(self):
898        dumped = b'\x80\x04\x8d\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
899        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
900                                    dumped)
901
902    def test_get(self):
903        pickled = b'((lp100000\ng100000\nt.'
904        unpickled = self.loads(pickled)
905        self.assertEqual(unpickled, ([],)*2)
906        self.assertIs(unpickled[0], unpickled[1])
907
908    def test_binget(self):
909        pickled = b'(]q\xffh\xfft.'
910        unpickled = self.loads(pickled)
911        self.assertEqual(unpickled, ([],)*2)
912        self.assertIs(unpickled[0], unpickled[1])
913
914    def test_long_binget(self):
915        pickled = b'(]r\x00\x00\x01\x00j\x00\x00\x01\x00t.'
916        unpickled = self.loads(pickled)
917        self.assertEqual(unpickled, ([],)*2)
918        self.assertIs(unpickled[0], unpickled[1])
919
920    def test_dup(self):
921        pickled = b'((l2t.'
922        unpickled = self.loads(pickled)
923        self.assertEqual(unpickled, ([],)*2)
924        self.assertIs(unpickled[0], unpickled[1])
925
926    def test_negative_put(self):
927        # Issue #12847
928        dumped = b'Va\np-1\n.'
929        self.check_unpickling_error(ValueError, dumped)
930
931    @requires_32b
932    def test_negative_32b_binput(self):
933        # Issue #12847
934        dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.'
935        self.check_unpickling_error(ValueError, dumped)
936
937    def test_badly_escaped_string(self):
938        self.check_unpickling_error(ValueError, b"S'\\'\n.")
939
940    def test_badly_quoted_string(self):
941        # Issue #17710
942        badpickles = [b"S'\n.",
943                      b'S"\n.',
944                      b'S\' \n.',
945                      b'S" \n.',
946                      b'S\'"\n.',
947                      b'S"\'\n.',
948                      b"S' ' \n.",
949                      b'S" " \n.',
950                      b"S ''\n.",
951                      b'S ""\n.',
952                      b'S \n.',
953                      b'S\n.',
954                      b'S.']
955        for p in badpickles:
956            self.check_unpickling_error(pickle.UnpicklingError, p)
957
958    def test_correctly_quoted_string(self):
959        goodpickles = [(b"S''\n.", ''),
960                       (b'S""\n.', ''),
961                       (b'S"\\n"\n.', '\n'),
962                       (b"S'\\n'\n.", '\n')]
963        for p, expected in goodpickles:
964            self.assertEqual(self.loads(p), expected)
965
966    def test_frame_readline(self):
967        pickled = b'\x80\x04\x95\x05\x00\x00\x00\x00\x00\x00\x00I42\n.'
968        #    0: \x80 PROTO      4
969        #    2: \x95 FRAME      5
970        #   11: I    INT        42
971        #   15: .    STOP
972        self.assertEqual(self.loads(pickled), 42)
973
974    def test_compat_unpickle(self):
975        # xrange(1, 7)
976        pickled = b'\x80\x02c__builtin__\nxrange\nK\x01K\x07K\x01\x87R.'
977        unpickled = self.loads(pickled)
978        self.assertIs(type(unpickled), range)
979        self.assertEqual(unpickled, range(1, 7))
980        self.assertEqual(list(unpickled), [1, 2, 3, 4, 5, 6])
981        # reduce
982        pickled = b'\x80\x02c__builtin__\nreduce\n.'
983        self.assertIs(self.loads(pickled), functools.reduce)
984        # whichdb.whichdb
985        pickled = b'\x80\x02cwhichdb\nwhichdb\n.'
986        self.assertIs(self.loads(pickled), dbm.whichdb)
987        # Exception(), StandardError()
988        for name in (b'Exception', b'StandardError'):
989            pickled = (b'\x80\x02cexceptions\n' + name + b'\nU\x03ugh\x85R.')
990            unpickled = self.loads(pickled)
991            self.assertIs(type(unpickled), Exception)
992            self.assertEqual(str(unpickled), 'ugh')
993        # UserDict.UserDict({1: 2}), UserDict.IterableUserDict({1: 2})
994        for name in (b'UserDict', b'IterableUserDict'):
995            pickled = (b'\x80\x02(cUserDict\n' + name +
996                       b'\no}U\x04data}K\x01K\x02ssb.')
997            unpickled = self.loads(pickled)
998            self.assertIs(type(unpickled), collections.UserDict)
999            self.assertEqual(unpickled, collections.UserDict({1: 2}))
1000
1001    def test_bad_stack(self):
1002        badpickles = [
1003            b'.',                       # STOP
1004            b'0',                       # POP
1005            b'1',                       # POP_MARK
1006            b'2',                       # DUP
1007            b'(2',
1008            b'R',                       # REDUCE
1009            b')R',
1010            b'a',                       # APPEND
1011            b'Na',
1012            b'b',                       # BUILD
1013            b'Nb',
1014            b'd',                       # DICT
1015            b'e',                       # APPENDS
1016            b'(e',
1017            b'ibuiltins\nlist\n',       # INST
1018            b'l',                       # LIST
1019            b'o',                       # OBJ
1020            b'(o',
1021            b'p1\n',                    # PUT
1022            b'q\x00',                   # BINPUT
1023            b'r\x00\x00\x00\x00',       # LONG_BINPUT
1024            b's',                       # SETITEM
1025            b'Ns',
1026            b'NNs',
1027            b't',                       # TUPLE
1028            b'u',                       # SETITEMS
1029            b'(u',
1030            b'}(Nu',
1031            b'\x81',                    # NEWOBJ
1032            b')\x81',
1033            b'\x85',                    # TUPLE1
1034            b'\x86',                    # TUPLE2
1035            b'N\x86',
1036            b'\x87',                    # TUPLE3
1037            b'N\x87',
1038            b'NN\x87',
1039            b'\x90',                    # ADDITEMS
1040            b'(\x90',
1041            b'\x91',                    # FROZENSET
1042            b'\x92',                    # NEWOBJ_EX
1043            b')}\x92',
1044            b'\x93',                    # STACK_GLOBAL
1045            b'Vlist\n\x93',
1046            b'\x94',                    # MEMOIZE
1047        ]
1048        for p in badpickles:
1049            self.check_unpickling_error(self.bad_stack_errors, p)
1050
1051    def test_bad_mark(self):
1052        badpickles = [
1053            b'N(.',                     # STOP
1054            b'N(2',                     # DUP
1055            b'cbuiltins\nlist\n)(R',    # REDUCE
1056            b'cbuiltins\nlist\n()R',
1057            b']N(a',                    # APPEND
1058                                        # BUILD
1059            b'cbuiltins\nValueError\n)R}(b',
1060            b'cbuiltins\nValueError\n)R(}b',
1061            b'(Nd',                     # DICT
1062            b'N(p1\n',                  # PUT
1063            b'N(q\x00',                 # BINPUT
1064            b'N(r\x00\x00\x00\x00',     # LONG_BINPUT
1065            b'}NN(s',                   # SETITEM
1066            b'}N(Ns',
1067            b'}(NNs',
1068            b'}((u',                    # SETITEMS
1069            b'cbuiltins\nlist\n)(\x81', # NEWOBJ
1070            b'cbuiltins\nlist\n()\x81',
1071            b'N(\x85',                  # TUPLE1
1072            b'NN(\x86',                 # TUPLE2
1073            b'N(N\x86',
1074            b'NNN(\x87',                # TUPLE3
1075            b'NN(N\x87',
1076            b'N(NN\x87',
1077            b']((\x90',                 # ADDITEMS
1078                                        # NEWOBJ_EX
1079            b'cbuiltins\nlist\n)}(\x92',
1080            b'cbuiltins\nlist\n)(}\x92',
1081            b'cbuiltins\nlist\n()}\x92',
1082                                        # STACK_GLOBAL
1083            b'Vbuiltins\n(Vlist\n\x93',
1084            b'Vbuiltins\nVlist\n(\x93',
1085            b'N(\x94',                  # MEMOIZE
1086        ]
1087        for p in badpickles:
1088            self.check_unpickling_error(self.bad_stack_errors, p)
1089
1090    def test_truncated_data(self):
1091        self.check_unpickling_error(EOFError, b'')
1092        self.check_unpickling_error(EOFError, b'N')
1093        badpickles = [
1094            b'B',                       # BINBYTES
1095            b'B\x03\x00\x00',
1096            b'B\x03\x00\x00\x00',
1097            b'B\x03\x00\x00\x00ab',
1098            b'C',                       # SHORT_BINBYTES
1099            b'C\x03',
1100            b'C\x03ab',
1101            b'F',                       # FLOAT
1102            b'F0.0',
1103            b'F0.00',
1104            b'G',                       # BINFLOAT
1105            b'G\x00\x00\x00\x00\x00\x00\x00',
1106            b'I',                       # INT
1107            b'I0',
1108            b'J',                       # BININT
1109            b'J\x00\x00\x00',
1110            b'K',                       # BININT1
1111            b'L',                       # LONG
1112            b'L0',
1113            b'L10',
1114            b'L0L',
1115            b'L10L',
1116            b'M',                       # BININT2
1117            b'M\x00',
1118            # b'P',                       # PERSID
1119            # b'Pabc',
1120            b'S',                       # STRING
1121            b"S'abc'",
1122            b'T',                       # BINSTRING
1123            b'T\x03\x00\x00',
1124            b'T\x03\x00\x00\x00',
1125            b'T\x03\x00\x00\x00ab',
1126            b'U',                       # SHORT_BINSTRING
1127            b'U\x03',
1128            b'U\x03ab',
1129            b'V',                       # UNICODE
1130            b'Vabc',
1131            b'X',                       # BINUNICODE
1132            b'X\x03\x00\x00',
1133            b'X\x03\x00\x00\x00',
1134            b'X\x03\x00\x00\x00ab',
1135            b'(c',                      # GLOBAL
1136            b'(cbuiltins',
1137            b'(cbuiltins\n',
1138            b'(cbuiltins\nlist',
1139            b'Ng',                      # GET
1140            b'Ng0',
1141            b'(i',                      # INST
1142            b'(ibuiltins',
1143            b'(ibuiltins\n',
1144            b'(ibuiltins\nlist',
1145            b'Nh',                      # BINGET
1146            b'Nj',                      # LONG_BINGET
1147            b'Nj\x00\x00\x00',
1148            b'Np',                      # PUT
1149            b'Np0',
1150            b'Nq',                      # BINPUT
1151            b'Nr',                      # LONG_BINPUT
1152            b'Nr\x00\x00\x00',
1153            b'\x80',                    # PROTO
1154            b'\x82',                    # EXT1
1155            b'\x83',                    # EXT2
1156            b'\x84\x01',
1157            b'\x84',                    # EXT4
1158            b'\x84\x01\x00\x00',
1159            b'\x8a',                    # LONG1
1160            b'\x8b',                    # LONG4
1161            b'\x8b\x00\x00\x00',
1162            b'\x8c',                    # SHORT_BINUNICODE
1163            b'\x8c\x03',
1164            b'\x8c\x03ab',
1165            b'\x8d',                    # BINUNICODE8
1166            b'\x8d\x03\x00\x00\x00\x00\x00\x00',
1167            b'\x8d\x03\x00\x00\x00\x00\x00\x00\x00',
1168            b'\x8d\x03\x00\x00\x00\x00\x00\x00\x00ab',
1169            b'\x8e',                    # BINBYTES8
1170            b'\x8e\x03\x00\x00\x00\x00\x00\x00',
1171            b'\x8e\x03\x00\x00\x00\x00\x00\x00\x00',
1172            b'\x8e\x03\x00\x00\x00\x00\x00\x00\x00ab',
1173            b'\x95',                    # FRAME
1174            b'\x95\x02\x00\x00\x00\x00\x00\x00',
1175            b'\x95\x02\x00\x00\x00\x00\x00\x00\x00',
1176            b'\x95\x02\x00\x00\x00\x00\x00\x00\x00N',
1177        ]
1178        for p in badpickles:
1179            self.check_unpickling_error(self.truncated_errors, p)
1180
1181    @reap_threads
1182    def test_unpickle_module_race(self):
1183        # https://bugs.python.org/issue34572
1184        locker_module = dedent("""
1185        import threading
1186        barrier = threading.Barrier(2)
1187        """)
1188        locking_import_module = dedent("""
1189        import locker
1190        locker.barrier.wait()
1191        class ToBeUnpickled(object):
1192            pass
1193        """)
1194
1195        os.mkdir(TESTFN)
1196        self.addCleanup(shutil.rmtree, TESTFN)
1197        sys.path.insert(0, TESTFN)
1198        self.addCleanup(sys.path.remove, TESTFN)
1199        with open(os.path.join(TESTFN, "locker.py"), "wb") as f:
1200            f.write(locker_module.encode('utf-8'))
1201        with open(os.path.join(TESTFN, "locking_import.py"), "wb") as f:
1202            f.write(locking_import_module.encode('utf-8'))
1203        self.addCleanup(forget, "locker")
1204        self.addCleanup(forget, "locking_import")
1205
1206        import locker
1207
1208        pickle_bytes = (
1209            b'\x80\x03clocking_import\nToBeUnpickled\nq\x00)\x81q\x01.')
1210
1211        # Then try to unpickle two of these simultaneously
1212        # One of them will cause the module import, and we want it to block
1213        # until the other one either:
1214        #   - fails (before the patch for this issue)
1215        #   - blocks on the import lock for the module, as it should
1216        results = []
1217        barrier = threading.Barrier(3)
1218        def t():
1219            # This ensures the threads have all started
1220            # presumably barrier release is faster than thread startup
1221            barrier.wait()
1222            results.append(pickle.loads(pickle_bytes))
1223
1224        t1 = threading.Thread(target=t)
1225        t2 = threading.Thread(target=t)
1226        t1.start()
1227        t2.start()
1228
1229        barrier.wait()
1230        # could have delay here
1231        locker.barrier.wait()
1232
1233        t1.join()
1234        t2.join()
1235
1236        from locking_import import ToBeUnpickled
1237        self.assertEqual(
1238            [type(x) for x in results],
1239            [ToBeUnpickled] * 2)
1240
1241
1242
1243class AbstractPickleTests(unittest.TestCase):
1244    # Subclass must define self.dumps, self.loads.
1245
1246    optimized = False
1247
1248    _testdata = AbstractUnpickleTests._testdata
1249
1250    def setUp(self):
1251        pass
1252
1253    assert_is_copy = AbstractUnpickleTests.assert_is_copy
1254
1255    def test_misc(self):
1256        # test various datatypes not tested by testdata
1257        for proto in protocols:
1258            x = myint(4)
1259            s = self.dumps(x, proto)
1260            y = self.loads(s)
1261            self.assert_is_copy(x, y)
1262
1263            x = (1, ())
1264            s = self.dumps(x, proto)
1265            y = self.loads(s)
1266            self.assert_is_copy(x, y)
1267
1268            x = initarg(1, x)
1269            s = self.dumps(x, proto)
1270            y = self.loads(s)
1271            self.assert_is_copy(x, y)
1272
1273        # XXX test __reduce__ protocol?
1274
1275    def test_roundtrip_equality(self):
1276        expected = self._testdata
1277        for proto in protocols:
1278            s = self.dumps(expected, proto)
1279            got = self.loads(s)
1280            self.assert_is_copy(expected, got)
1281
1282    # There are gratuitous differences between pickles produced by
1283    # pickle and cPickle, largely because cPickle starts PUT indices at
1284    # 1 and pickle starts them at 0.  See XXX comment in cPickle's put2() --
1285    # there's a comment with an exclamation point there whose meaning
1286    # is a mystery.  cPickle also suppresses PUT for objects with a refcount
1287    # of 1.
1288    def dont_test_disassembly(self):
1289        from io import StringIO
1290        from pickletools import dis
1291
1292        for proto, expected in (0, DATA0_DIS), (1, DATA1_DIS):
1293            s = self.dumps(self._testdata, proto)
1294            filelike = StringIO()
1295            dis(s, out=filelike)
1296            got = filelike.getvalue()
1297            self.assertEqual(expected, got)
1298
1299    def test_recursive_list(self):
1300        l = []
1301        l.append(l)
1302        for proto in protocols:
1303            s = self.dumps(l, proto)
1304            x = self.loads(s)
1305            self.assertIsInstance(x, list)
1306            self.assertEqual(len(x), 1)
1307            self.assertIs(x[0], x)
1308
1309    def test_recursive_tuple_and_list(self):
1310        t = ([],)
1311        t[0].append(t)
1312        for proto in protocols:
1313            s = self.dumps(t, proto)
1314            x = self.loads(s)
1315            self.assertIsInstance(x, tuple)
1316            self.assertEqual(len(x), 1)
1317            self.assertIsInstance(x[0], list)
1318            self.assertEqual(len(x[0]), 1)
1319            self.assertIs(x[0][0], x)
1320
1321    def test_recursive_dict(self):
1322        d = {}
1323        d[1] = d
1324        for proto in protocols:
1325            s = self.dumps(d, proto)
1326            x = self.loads(s)
1327            self.assertIsInstance(x, dict)
1328            self.assertEqual(list(x.keys()), [1])
1329            self.assertIs(x[1], x)
1330
1331    def test_recursive_dict_key(self):
1332        d = {}
1333        k = K(d)
1334        d[k] = 1
1335        for proto in protocols:
1336            s = self.dumps(d, proto)
1337            x = self.loads(s)
1338            self.assertIsInstance(x, dict)
1339            self.assertEqual(len(x.keys()), 1)
1340            self.assertIsInstance(list(x.keys())[0], K)
1341            self.assertIs(list(x.keys())[0].value, x)
1342
1343    def test_recursive_set(self):
1344        y = set()
1345        k = K(y)
1346        y.add(k)
1347        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
1348            s = self.dumps(y, proto)
1349            x = self.loads(s)
1350            self.assertIsInstance(x, set)
1351            self.assertEqual(len(x), 1)
1352            self.assertIsInstance(list(x)[0], K)
1353            self.assertIs(list(x)[0].value, x)
1354
1355    def test_recursive_list_subclass(self):
1356        y = MyList()
1357        y.append(y)
1358        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
1359            s = self.dumps(y, proto)
1360            x = self.loads(s)
1361            self.assertIsInstance(x, MyList)
1362            self.assertEqual(len(x), 1)
1363            self.assertIs(x[0], x)
1364
1365    def test_recursive_dict_subclass(self):
1366        d = MyDict()
1367        d[1] = d
1368        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
1369            s = self.dumps(d, proto)
1370            x = self.loads(s)
1371            self.assertIsInstance(x, MyDict)
1372            self.assertEqual(list(x.keys()), [1])
1373            self.assertIs(x[1], x)
1374
1375    def test_recursive_dict_subclass_key(self):
1376        d = MyDict()
1377        k = K(d)
1378        d[k] = 1
1379        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
1380            s = self.dumps(d, proto)
1381            x = self.loads(s)
1382            self.assertIsInstance(x, MyDict)
1383            self.assertEqual(len(list(x.keys())), 1)
1384            self.assertIsInstance(list(x.keys())[0], K)
1385            self.assertIs(list(x.keys())[0].value, x)
1386
1387    def test_recursive_inst(self):
1388        i = C()
1389        i.attr = i
1390        for proto in protocols:
1391            s = self.dumps(i, proto)
1392            x = self.loads(s)
1393            self.assertIsInstance(x, C)
1394            self.assertEqual(dir(x), dir(i))
1395            self.assertIs(x.attr, x)
1396
1397    def test_recursive_multi(self):
1398        l = []
1399        d = {1:l}
1400        i = C()
1401        i.attr = d
1402        l.append(i)
1403        for proto in protocols:
1404            s = self.dumps(l, proto)
1405            x = self.loads(s)
1406            self.assertIsInstance(x, list)
1407            self.assertEqual(len(x), 1)
1408            self.assertEqual(dir(x[0]), dir(i))
1409            self.assertEqual(list(x[0].attr.keys()), [1])
1410            self.assertTrue(x[0].attr[1] is x)
1411
1412    def check_recursive_collection_and_inst(self, factory):
1413        h = H()
1414        y = factory([h])
1415        h.attr = y
1416        for proto in protocols:
1417            s = self.dumps(y, proto)
1418            x = self.loads(s)
1419            self.assertIsInstance(x, type(y))
1420            self.assertEqual(len(x), 1)
1421            self.assertIsInstance(list(x)[0], H)
1422            self.assertIs(list(x)[0].attr, x)
1423
1424    def test_recursive_list_and_inst(self):
1425        self.check_recursive_collection_and_inst(list)
1426
1427    def test_recursive_tuple_and_inst(self):
1428        self.check_recursive_collection_and_inst(tuple)
1429
1430    def test_recursive_dict_and_inst(self):
1431        self.check_recursive_collection_and_inst(dict.fromkeys)
1432
1433    def test_recursive_set_and_inst(self):
1434        self.check_recursive_collection_and_inst(set)
1435
1436    def test_recursive_frozenset_and_inst(self):
1437        self.check_recursive_collection_and_inst(frozenset)
1438
1439    def test_recursive_list_subclass_and_inst(self):
1440        self.check_recursive_collection_and_inst(MyList)
1441
1442    def test_recursive_tuple_subclass_and_inst(self):
1443        self.check_recursive_collection_and_inst(MyTuple)
1444
1445    def test_recursive_dict_subclass_and_inst(self):
1446        self.check_recursive_collection_and_inst(MyDict.fromkeys)
1447
1448    def test_recursive_set_subclass_and_inst(self):
1449        self.check_recursive_collection_and_inst(MySet)
1450
1451    def test_recursive_frozenset_subclass_and_inst(self):
1452        self.check_recursive_collection_and_inst(MyFrozenSet)
1453
1454    def test_unicode(self):
1455        endcases = ['', '<\\u>', '<\\\u1234>', '<\n>',
1456                    '<\\>', '<\\\U00012345>',
1457                    # surrogates
1458                    '<\udc80>']
1459        for proto in protocols:
1460            for u in endcases:
1461                p = self.dumps(u, proto)
1462                u2 = self.loads(p)
1463                self.assert_is_copy(u, u2)
1464
1465    def test_unicode_high_plane(self):
1466        t = '\U00012345'
1467        for proto in protocols:
1468            p = self.dumps(t, proto)
1469            t2 = self.loads(p)
1470            self.assert_is_copy(t, t2)
1471
1472    def test_bytes(self):
1473        for proto in protocols:
1474            for s in b'', b'xyz', b'xyz'*100:
1475                p = self.dumps(s, proto)
1476                self.assert_is_copy(s, self.loads(p))
1477            for s in [bytes([i]) for i in range(256)]:
1478                p = self.dumps(s, proto)
1479                self.assert_is_copy(s, self.loads(p))
1480            for s in [bytes([i, i]) for i in range(256)]:
1481                p = self.dumps(s, proto)
1482                self.assert_is_copy(s, self.loads(p))
1483
1484    def test_ints(self):
1485        for proto in protocols:
1486            n = sys.maxsize
1487            while n:
1488                for expected in (-n, n):
1489                    s = self.dumps(expected, proto)
1490                    n2 = self.loads(s)
1491                    self.assert_is_copy(expected, n2)
1492                n = n >> 1
1493
1494    def test_long(self):
1495        for proto in protocols:
1496            # 256 bytes is where LONG4 begins.
1497            for nbits in 1, 8, 8*254, 8*255, 8*256, 8*257:
1498                nbase = 1 << nbits
1499                for npos in nbase-1, nbase, nbase+1:
1500                    for n in npos, -npos:
1501                        pickle = self.dumps(n, proto)
1502                        got = self.loads(pickle)
1503                        self.assert_is_copy(n, got)
1504        # Try a monster.  This is quadratic-time in protos 0 & 1, so don't
1505        # bother with those.
1506        nbase = int("deadbeeffeedface", 16)
1507        nbase += nbase << 1000000
1508        for n in nbase, -nbase:
1509            p = self.dumps(n, 2)
1510            got = self.loads(p)
1511            # assert_is_copy is very expensive here as it precomputes
1512            # a failure message by computing the repr() of n and got,
1513            # we just do the check ourselves.
1514            self.assertIs(type(got), int)
1515            self.assertEqual(n, got)
1516
1517    def test_float(self):
1518        test_values = [0.0, 4.94e-324, 1e-310, 7e-308, 6.626e-34, 0.1, 0.5,
1519                       3.14, 263.44582062374053, 6.022e23, 1e30]
1520        test_values = test_values + [-x for x in test_values]
1521        for proto in protocols:
1522            for value in test_values:
1523                pickle = self.dumps(value, proto)
1524                got = self.loads(pickle)
1525                self.assert_is_copy(value, got)
1526
1527    @run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
1528    def test_float_format(self):
1529        # make sure that floats are formatted locale independent with proto 0
1530        self.assertEqual(self.dumps(1.2, 0)[0:3], b'F1.')
1531
1532    def test_reduce(self):
1533        for proto in protocols:
1534            inst = AAA()
1535            dumped = self.dumps(inst, proto)
1536            loaded = self.loads(dumped)
1537            self.assertEqual(loaded, REDUCE_A)
1538
1539    def test_getinitargs(self):
1540        for proto in protocols:
1541            inst = initarg(1, 2)
1542            dumped = self.dumps(inst, proto)
1543            loaded = self.loads(dumped)
1544            self.assert_is_copy(inst, loaded)
1545
1546    def test_metaclass(self):
1547        a = use_metaclass()
1548        for proto in protocols:
1549            s = self.dumps(a, proto)
1550            b = self.loads(s)
1551            self.assertEqual(a.__class__, b.__class__)
1552
1553    def test_dynamic_class(self):
1554        a = create_dynamic_class("my_dynamic_class", (object,))
1555        copyreg.pickle(pickling_metaclass, pickling_metaclass.__reduce__)
1556        for proto in protocols:
1557            s = self.dumps(a, proto)
1558            b = self.loads(s)
1559            self.assertEqual(a, b)
1560            self.assertIs(type(a), type(b))
1561
1562    def test_structseq(self):
1563        import time
1564        import os
1565
1566        t = time.localtime()
1567        for proto in protocols:
1568            s = self.dumps(t, proto)
1569            u = self.loads(s)
1570            self.assert_is_copy(t, u)
1571            if hasattr(os, "stat"):
1572                t = os.stat(os.curdir)
1573                s = self.dumps(t, proto)
1574                u = self.loads(s)
1575                self.assert_is_copy(t, u)
1576            if hasattr(os, "statvfs"):
1577                t = os.statvfs(os.curdir)
1578                s = self.dumps(t, proto)
1579                u = self.loads(s)
1580                self.assert_is_copy(t, u)
1581
1582    def test_ellipsis(self):
1583        for proto in protocols:
1584            s = self.dumps(..., proto)
1585            u = self.loads(s)
1586            self.assertIs(..., u)
1587
1588    def test_notimplemented(self):
1589        for proto in protocols:
1590            s = self.dumps(NotImplemented, proto)
1591            u = self.loads(s)
1592            self.assertIs(NotImplemented, u)
1593
1594    def test_singleton_types(self):
1595        # Issue #6477: Test that types of built-in singletons can be pickled.
1596        singletons = [None, ..., NotImplemented]
1597        for singleton in singletons:
1598            for proto in protocols:
1599                s = self.dumps(type(singleton), proto)
1600                u = self.loads(s)
1601                self.assertIs(type(singleton), u)
1602
1603    # Tests for protocol 2
1604
1605    def test_proto(self):
1606        for proto in protocols:
1607            pickled = self.dumps(None, proto)
1608            if proto >= 2:
1609                proto_header = pickle.PROTO + bytes([proto])
1610                self.assertTrue(pickled.startswith(proto_header))
1611            else:
1612                self.assertEqual(count_opcode(pickle.PROTO, pickled), 0)
1613
1614        oob = protocols[-1] + 1     # a future protocol
1615        build_none = pickle.NONE + pickle.STOP
1616        badpickle = pickle.PROTO + bytes([oob]) + build_none
1617        try:
1618            self.loads(badpickle)
1619        except ValueError as err:
1620            self.assertIn("unsupported pickle protocol", str(err))
1621        else:
1622            self.fail("expected bad protocol number to raise ValueError")
1623
1624    def test_long1(self):
1625        x = 12345678910111213141516178920
1626        for proto in protocols:
1627            s = self.dumps(x, proto)
1628            y = self.loads(s)
1629            self.assert_is_copy(x, y)
1630            self.assertEqual(opcode_in_pickle(pickle.LONG1, s), proto >= 2)
1631
1632    def test_long4(self):
1633        x = 12345678910111213141516178920 << (256*8)
1634        for proto in protocols:
1635            s = self.dumps(x, proto)
1636            y = self.loads(s)
1637            self.assert_is_copy(x, y)
1638            self.assertEqual(opcode_in_pickle(pickle.LONG4, s), proto >= 2)
1639
1640    def test_short_tuples(self):
1641        # Map (proto, len(tuple)) to expected opcode.
1642        expected_opcode = {(0, 0): pickle.TUPLE,
1643                           (0, 1): pickle.TUPLE,
1644                           (0, 2): pickle.TUPLE,
1645                           (0, 3): pickle.TUPLE,
1646                           (0, 4): pickle.TUPLE,
1647
1648                           (1, 0): pickle.EMPTY_TUPLE,
1649                           (1, 1): pickle.TUPLE,
1650                           (1, 2): pickle.TUPLE,
1651                           (1, 3): pickle.TUPLE,
1652                           (1, 4): pickle.TUPLE,
1653
1654                           (2, 0): pickle.EMPTY_TUPLE,
1655                           (2, 1): pickle.TUPLE1,
1656                           (2, 2): pickle.TUPLE2,
1657                           (2, 3): pickle.TUPLE3,
1658                           (2, 4): pickle.TUPLE,
1659
1660                           (3, 0): pickle.EMPTY_TUPLE,
1661                           (3, 1): pickle.TUPLE1,
1662                           (3, 2): pickle.TUPLE2,
1663                           (3, 3): pickle.TUPLE3,
1664                           (3, 4): pickle.TUPLE,
1665                          }
1666        a = ()
1667        b = (1,)
1668        c = (1, 2)
1669        d = (1, 2, 3)
1670        e = (1, 2, 3, 4)
1671        for proto in protocols:
1672            for x in a, b, c, d, e:
1673                s = self.dumps(x, proto)
1674                y = self.loads(s)
1675                self.assert_is_copy(x, y)
1676                expected = expected_opcode[min(proto, 3), len(x)]
1677                self.assertTrue(opcode_in_pickle(expected, s))
1678
1679    def test_singletons(self):
1680        # Map (proto, singleton) to expected opcode.
1681        expected_opcode = {(0, None): pickle.NONE,
1682                           (1, None): pickle.NONE,
1683                           (2, None): pickle.NONE,
1684                           (3, None): pickle.NONE,
1685
1686                           (0, True): pickle.INT,
1687                           (1, True): pickle.INT,
1688                           (2, True): pickle.NEWTRUE,
1689                           (3, True): pickle.NEWTRUE,
1690
1691                           (0, False): pickle.INT,
1692                           (1, False): pickle.INT,
1693                           (2, False): pickle.NEWFALSE,
1694                           (3, False): pickle.NEWFALSE,
1695                          }
1696        for proto in protocols:
1697            for x in None, False, True:
1698                s = self.dumps(x, proto)
1699                y = self.loads(s)
1700                self.assertTrue(x is y, (proto, x, s, y))
1701                expected = expected_opcode[min(proto, 3), x]
1702                self.assertTrue(opcode_in_pickle(expected, s))
1703
1704    def test_newobj_tuple(self):
1705        x = MyTuple([1, 2, 3])
1706        x.foo = 42
1707        x.bar = "hello"
1708        for proto in protocols:
1709            s = self.dumps(x, proto)
1710            y = self.loads(s)
1711            self.assert_is_copy(x, y)
1712
1713    def test_newobj_list(self):
1714        x = MyList([1, 2, 3])
1715        x.foo = 42
1716        x.bar = "hello"
1717        for proto in protocols:
1718            s = self.dumps(x, proto)
1719            y = self.loads(s)
1720            self.assert_is_copy(x, y)
1721
1722    def test_newobj_generic(self):
1723        for proto in protocols:
1724            for C in myclasses:
1725                B = C.__base__
1726                x = C(C.sample)
1727                x.foo = 42
1728                s = self.dumps(x, proto)
1729                y = self.loads(s)
1730                detail = (proto, C, B, x, y, type(y))
1731                self.assert_is_copy(x, y) # XXX revisit
1732                self.assertEqual(B(x), B(y), detail)
1733                self.assertEqual(x.__dict__, y.__dict__, detail)
1734
1735    def test_newobj_proxies(self):
1736        # NEWOBJ should use the __class__ rather than the raw type
1737        classes = myclasses[:]
1738        # Cannot create weakproxies to these classes
1739        for c in (MyInt, MyTuple):
1740            classes.remove(c)
1741        for proto in protocols:
1742            for C in classes:
1743                B = C.__base__
1744                x = C(C.sample)
1745                x.foo = 42
1746                p = weakref.proxy(x)
1747                s = self.dumps(p, proto)
1748                y = self.loads(s)
1749                self.assertEqual(type(y), type(x))  # rather than type(p)
1750                detail = (proto, C, B, x, y, type(y))
1751                self.assertEqual(B(x), B(y), detail)
1752                self.assertEqual(x.__dict__, y.__dict__, detail)
1753
1754    def test_newobj_not_class(self):
1755        # Issue 24552
1756        global SimpleNewObj
1757        save = SimpleNewObj
1758        o = SimpleNewObj.__new__(SimpleNewObj)
1759        b = self.dumps(o, 4)
1760        try:
1761            SimpleNewObj = 42
1762            self.assertRaises((TypeError, pickle.UnpicklingError), self.loads, b)
1763        finally:
1764            SimpleNewObj = save
1765
1766    # Register a type with copyreg, with extension code extcode.  Pickle
1767    # an object of that type.  Check that the resulting pickle uses opcode
1768    # (EXT[124]) under proto 2, and not in proto 1.
1769
1770    def produce_global_ext(self, extcode, opcode):
1771        e = ExtensionSaver(extcode)
1772        try:
1773            copyreg.add_extension(__name__, "MyList", extcode)
1774            x = MyList([1, 2, 3])
1775            x.foo = 42
1776            x.bar = "hello"
1777
1778            # Dump using protocol 1 for comparison.
1779            s1 = self.dumps(x, 1)
1780            self.assertIn(__name__.encode("utf-8"), s1)
1781            self.assertIn(b"MyList", s1)
1782            self.assertFalse(opcode_in_pickle(opcode, s1))
1783
1784            y = self.loads(s1)
1785            self.assert_is_copy(x, y)
1786
1787            # Dump using protocol 2 for test.
1788            s2 = self.dumps(x, 2)
1789            self.assertNotIn(__name__.encode("utf-8"), s2)
1790            self.assertNotIn(b"MyList", s2)
1791            self.assertEqual(opcode_in_pickle(opcode, s2), True, repr(s2))
1792
1793            y = self.loads(s2)
1794            self.assert_is_copy(x, y)
1795        finally:
1796            e.restore()
1797
1798    def test_global_ext1(self):
1799        self.produce_global_ext(0x00000001, pickle.EXT1)  # smallest EXT1 code
1800        self.produce_global_ext(0x000000ff, pickle.EXT1)  # largest EXT1 code
1801
1802    def test_global_ext2(self):
1803        self.produce_global_ext(0x00000100, pickle.EXT2)  # smallest EXT2 code
1804        self.produce_global_ext(0x0000ffff, pickle.EXT2)  # largest EXT2 code
1805        self.produce_global_ext(0x0000abcd, pickle.EXT2)  # check endianness
1806
1807    def test_global_ext4(self):
1808        self.produce_global_ext(0x00010000, pickle.EXT4)  # smallest EXT4 code
1809        self.produce_global_ext(0x7fffffff, pickle.EXT4)  # largest EXT4 code
1810        self.produce_global_ext(0x12abcdef, pickle.EXT4)  # check endianness
1811
1812    def test_list_chunking(self):
1813        n = 10  # too small to chunk
1814        x = list(range(n))
1815        for proto in protocols:
1816            s = self.dumps(x, proto)
1817            y = self.loads(s)
1818            self.assert_is_copy(x, y)
1819            num_appends = count_opcode(pickle.APPENDS, s)
1820            self.assertEqual(num_appends, proto > 0)
1821
1822        n = 2500  # expect at least two chunks when proto > 0
1823        x = list(range(n))
1824        for proto in protocols:
1825            s = self.dumps(x, proto)
1826            y = self.loads(s)
1827            self.assert_is_copy(x, y)
1828            num_appends = count_opcode(pickle.APPENDS, s)
1829            if proto == 0:
1830                self.assertEqual(num_appends, 0)
1831            else:
1832                self.assertTrue(num_appends >= 2)
1833
1834    def test_dict_chunking(self):
1835        n = 10  # too small to chunk
1836        x = dict.fromkeys(range(n))
1837        for proto in protocols:
1838            s = self.dumps(x, proto)
1839            self.assertIsInstance(s, bytes_types)
1840            y = self.loads(s)
1841            self.assert_is_copy(x, y)
1842            num_setitems = count_opcode(pickle.SETITEMS, s)
1843            self.assertEqual(num_setitems, proto > 0)
1844
1845        n = 2500  # expect at least two chunks when proto > 0
1846        x = dict.fromkeys(range(n))
1847        for proto in protocols:
1848            s = self.dumps(x, proto)
1849            y = self.loads(s)
1850            self.assert_is_copy(x, y)
1851            num_setitems = count_opcode(pickle.SETITEMS, s)
1852            if proto == 0:
1853                self.assertEqual(num_setitems, 0)
1854            else:
1855                self.assertTrue(num_setitems >= 2)
1856
1857    def test_set_chunking(self):
1858        n = 10  # too small to chunk
1859        x = set(range(n))
1860        for proto in protocols:
1861            s = self.dumps(x, proto)
1862            y = self.loads(s)
1863            self.assert_is_copy(x, y)
1864            num_additems = count_opcode(pickle.ADDITEMS, s)
1865            if proto < 4:
1866                self.assertEqual(num_additems, 0)
1867            else:
1868                self.assertEqual(num_additems, 1)
1869
1870        n = 2500  # expect at least two chunks when proto >= 4
1871        x = set(range(n))
1872        for proto in protocols:
1873            s = self.dumps(x, proto)
1874            y = self.loads(s)
1875            self.assert_is_copy(x, y)
1876            num_additems = count_opcode(pickle.ADDITEMS, s)
1877            if proto < 4:
1878                self.assertEqual(num_additems, 0)
1879            else:
1880                self.assertGreaterEqual(num_additems, 2)
1881
1882    def test_simple_newobj(self):
1883        x = SimpleNewObj.__new__(SimpleNewObj, 0xface)  # avoid __init__
1884        x.abc = 666
1885        for proto in protocols:
1886            with self.subTest(proto=proto):
1887                s = self.dumps(x, proto)
1888                if proto < 1:
1889                    self.assertIn(b'\nI64206', s)  # INT
1890                else:
1891                    self.assertIn(b'M\xce\xfa', s)  # BININT2
1892                self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s),
1893                                 2 <= proto)
1894                self.assertFalse(opcode_in_pickle(pickle.NEWOBJ_EX, s))
1895                y = self.loads(s)   # will raise TypeError if __init__ called
1896                self.assert_is_copy(x, y)
1897
1898    def test_complex_newobj(self):
1899        x = ComplexNewObj.__new__(ComplexNewObj, 0xface)  # avoid __init__
1900        x.abc = 666
1901        for proto in protocols:
1902            with self.subTest(proto=proto):
1903                s = self.dumps(x, proto)
1904                if proto < 1:
1905                    self.assertIn(b'\nI64206', s)  # INT
1906                elif proto < 2:
1907                    self.assertIn(b'M\xce\xfa', s)  # BININT2
1908                elif proto < 4:
1909                    self.assertIn(b'X\x04\x00\x00\x00FACE', s)  # BINUNICODE
1910                else:
1911                    self.assertIn(b'\x8c\x04FACE', s)  # SHORT_BINUNICODE
1912                self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s),
1913                                 2 <= proto)
1914                self.assertFalse(opcode_in_pickle(pickle.NEWOBJ_EX, s))
1915                y = self.loads(s)   # will raise TypeError if __init__ called
1916                self.assert_is_copy(x, y)
1917
1918    def test_complex_newobj_ex(self):
1919        x = ComplexNewObjEx.__new__(ComplexNewObjEx, 0xface)  # avoid __init__
1920        x.abc = 666
1921        for proto in protocols:
1922            with self.subTest(proto=proto):
1923                s = self.dumps(x, proto)
1924                if proto < 1:
1925                    self.assertIn(b'\nI64206', s)  # INT
1926                elif proto < 2:
1927                    self.assertIn(b'M\xce\xfa', s)  # BININT2
1928                elif proto < 4:
1929                    self.assertIn(b'X\x04\x00\x00\x00FACE', s)  # BINUNICODE
1930                else:
1931                    self.assertIn(b'\x8c\x04FACE', s)  # SHORT_BINUNICODE
1932                self.assertFalse(opcode_in_pickle(pickle.NEWOBJ, s))
1933                self.assertEqual(opcode_in_pickle(pickle.NEWOBJ_EX, s),
1934                                 4 <= proto)
1935                y = self.loads(s)   # will raise TypeError if __init__ called
1936                self.assert_is_copy(x, y)
1937
1938    def test_newobj_list_slots(self):
1939        x = SlotList([1, 2, 3])
1940        x.foo = 42
1941        x.bar = "hello"
1942        s = self.dumps(x, 2)
1943        y = self.loads(s)
1944        self.assert_is_copy(x, y)
1945
1946    def test_reduce_overrides_default_reduce_ex(self):
1947        for proto in protocols:
1948            x = REX_one()
1949            self.assertEqual(x._reduce_called, 0)
1950            s = self.dumps(x, proto)
1951            self.assertEqual(x._reduce_called, 1)
1952            y = self.loads(s)
1953            self.assertEqual(y._reduce_called, 0)
1954
1955    def test_reduce_ex_called(self):
1956        for proto in protocols:
1957            x = REX_two()
1958            self.assertEqual(x._proto, None)
1959            s = self.dumps(x, proto)
1960            self.assertEqual(x._proto, proto)
1961            y = self.loads(s)
1962            self.assertEqual(y._proto, None)
1963
1964    def test_reduce_ex_overrides_reduce(self):
1965        for proto in protocols:
1966            x = REX_three()
1967            self.assertEqual(x._proto, None)
1968            s = self.dumps(x, proto)
1969            self.assertEqual(x._proto, proto)
1970            y = self.loads(s)
1971            self.assertEqual(y._proto, None)
1972
1973    def test_reduce_ex_calls_base(self):
1974        for proto in protocols:
1975            x = REX_four()
1976            self.assertEqual(x._proto, None)
1977            s = self.dumps(x, proto)
1978            self.assertEqual(x._proto, proto)
1979            y = self.loads(s)
1980            self.assertEqual(y._proto, proto)
1981
1982    def test_reduce_calls_base(self):
1983        for proto in protocols:
1984            x = REX_five()
1985            self.assertEqual(x._reduce_called, 0)
1986            s = self.dumps(x, proto)
1987            self.assertEqual(x._reduce_called, 1)
1988            y = self.loads(s)
1989            self.assertEqual(y._reduce_called, 1)
1990
1991    @no_tracing
1992    def test_bad_getattr(self):
1993        # Issue #3514: crash when there is an infinite loop in __getattr__
1994        x = BadGetattr()
1995        for proto in protocols:
1996            self.assertRaises(RuntimeError, self.dumps, x, proto)
1997
1998    def test_reduce_bad_iterator(self):
1999        # Issue4176: crash when 4th and 5th items of __reduce__()
2000        # are not iterators
2001        class C(object):
2002            def __reduce__(self):
2003                # 4th item is not an iterator
2004                return list, (), None, [], None
2005        class D(object):
2006            def __reduce__(self):
2007                # 5th item is not an iterator
2008                return dict, (), None, None, []
2009
2010        # Python implementation is less strict and also accepts iterables.
2011        for proto in protocols:
2012            try:
2013                self.dumps(C(), proto)
2014            except pickle.PicklingError:
2015                pass
2016            try:
2017                self.dumps(D(), proto)
2018            except pickle.PicklingError:
2019                pass
2020
2021    def test_many_puts_and_gets(self):
2022        # Test that internal data structures correctly deal with lots of
2023        # puts/gets.
2024        keys = ("aaa" + str(i) for i in range(100))
2025        large_dict = dict((k, [4, 5, 6]) for k in keys)
2026        obj = [dict(large_dict), dict(large_dict), dict(large_dict)]
2027
2028        for proto in protocols:
2029            with self.subTest(proto=proto):
2030                dumped = self.dumps(obj, proto)
2031                loaded = self.loads(dumped)
2032                self.assert_is_copy(obj, loaded)
2033
2034    def test_attribute_name_interning(self):
2035        # Test that attribute names of pickled objects are interned when
2036        # unpickling.
2037        for proto in protocols:
2038            x = C()
2039            x.foo = 42
2040            x.bar = "hello"
2041            s = self.dumps(x, proto)
2042            y = self.loads(s)
2043            x_keys = sorted(x.__dict__)
2044            y_keys = sorted(y.__dict__)
2045            for x_key, y_key in zip(x_keys, y_keys):
2046                self.assertIs(x_key, y_key)
2047
2048    def test_pickle_to_2x(self):
2049        # Pickle non-trivial data with protocol 2, expecting that it yields
2050        # the same result as Python 2.x did.
2051        # NOTE: this test is a bit too strong since we can produce different
2052        # bytecode that 2.x will still understand.
2053        dumped = self.dumps(range(5), 2)
2054        self.assertEqual(dumped, DATA_XRANGE)
2055        dumped = self.dumps(set([3]), 2)
2056        self.assertEqual(dumped, DATA_SET2)
2057
2058    def test_large_pickles(self):
2059        # Test the correctness of internal buffering routines when handling
2060        # large data.
2061        for proto in protocols:
2062            data = (1, min, b'xy' * (30 * 1024), len)
2063            dumped = self.dumps(data, proto)
2064            loaded = self.loads(dumped)
2065            self.assertEqual(len(loaded), len(data))
2066            self.assertEqual(loaded, data)
2067
2068    def test_int_pickling_efficiency(self):
2069        # Test compacity of int representation (see issue #12744)
2070        for proto in protocols:
2071            with self.subTest(proto=proto):
2072                pickles = [self.dumps(2**n, proto) for n in range(70)]
2073                sizes = list(map(len, pickles))
2074                # the size function is monotonic
2075                self.assertEqual(sorted(sizes), sizes)
2076                if proto >= 2:
2077                    for p in pickles:
2078                        self.assertFalse(opcode_in_pickle(pickle.LONG, p))
2079
2080    def _check_pickling_with_opcode(self, obj, opcode, proto):
2081        pickled = self.dumps(obj, proto)
2082        self.assertTrue(opcode_in_pickle(opcode, pickled))
2083        unpickled = self.loads(pickled)
2084        self.assertEqual(obj, unpickled)
2085
2086    def test_appends_on_non_lists(self):
2087        # Issue #17720
2088        obj = REX_six([1, 2, 3])
2089        for proto in protocols:
2090            if proto == 0:
2091                self._check_pickling_with_opcode(obj, pickle.APPEND, proto)
2092            else:
2093                self._check_pickling_with_opcode(obj, pickle.APPENDS, proto)
2094
2095    def test_setitems_on_non_dicts(self):
2096        obj = REX_seven({1: -1, 2: -2, 3: -3})
2097        for proto in protocols:
2098            if proto == 0:
2099                self._check_pickling_with_opcode(obj, pickle.SETITEM, proto)
2100            else:
2101                self._check_pickling_with_opcode(obj, pickle.SETITEMS, proto)
2102
2103    # Exercise framing (proto >= 4) for significant workloads
2104
2105    FRAME_SIZE_MIN = 4
2106    FRAME_SIZE_TARGET = 64 * 1024
2107
2108    def check_frame_opcodes(self, pickled):
2109        """
2110        Check the arguments of FRAME opcodes in a protocol 4+ pickle.
2111
2112        Note that binary objects that are larger than FRAME_SIZE_TARGET are not
2113        framed by default and are therefore considered a frame by themselves in
2114        the following consistency check.
2115        """
2116        frame_end = frameless_start = None
2117        frameless_opcodes = {'BINBYTES', 'BINUNICODE', 'BINBYTES8', 'BINUNICODE8'}
2118        for op, arg, pos in pickletools.genops(pickled):
2119            if frame_end is not None:
2120                self.assertLessEqual(pos, frame_end)
2121                if pos == frame_end:
2122                    frame_end = None
2123
2124            if frame_end is not None:  # framed
2125                self.assertNotEqual(op.name, 'FRAME')
2126                if op.name in frameless_opcodes:
2127                    # Only short bytes and str objects should be written
2128                    # in a frame
2129                    self.assertLessEqual(len(arg), self.FRAME_SIZE_TARGET)
2130
2131            else:  # not framed
2132                if (op.name == 'FRAME' or
2133                    (op.name in frameless_opcodes and
2134                     len(arg) > self.FRAME_SIZE_TARGET)):
2135                    # Frame or large bytes or str object
2136                    if frameless_start is not None:
2137                        # Only short data should be written outside of a frame
2138                        self.assertLess(pos - frameless_start,
2139                                        self.FRAME_SIZE_MIN)
2140                        frameless_start = None
2141                elif frameless_start is None and op.name != 'PROTO':
2142                    frameless_start = pos
2143
2144            if op.name == 'FRAME':
2145                self.assertGreaterEqual(arg, self.FRAME_SIZE_MIN)
2146                frame_end = pos + 9 + arg
2147
2148        pos = len(pickled)
2149        if frame_end is not None:
2150            self.assertEqual(frame_end, pos)
2151        elif frameless_start is not None:
2152            self.assertLess(pos - frameless_start, self.FRAME_SIZE_MIN)
2153
2154    def test_framing_many_objects(self):
2155        obj = list(range(10**5))
2156        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2157            with self.subTest(proto=proto):
2158                pickled = self.dumps(obj, proto)
2159                unpickled = self.loads(pickled)
2160                self.assertEqual(obj, unpickled)
2161                bytes_per_frame = (len(pickled) /
2162                                   count_opcode(pickle.FRAME, pickled))
2163                self.assertGreater(bytes_per_frame,
2164                                   self.FRAME_SIZE_TARGET / 2)
2165                self.assertLessEqual(bytes_per_frame,
2166                                     self.FRAME_SIZE_TARGET * 1)
2167                self.check_frame_opcodes(pickled)
2168
2169    def test_framing_large_objects(self):
2170        N = 1024 * 1024
2171        small_items = [[i] for i in range(10)]
2172        obj = [b'x' * N, *small_items, b'y' * N, 'z' * N]
2173        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2174            for fast in [False, True]:
2175                with self.subTest(proto=proto, fast=fast):
2176                    if not fast:
2177                        # fast=False by default.
2178                        # This covers in-memory pickling with pickle.dumps().
2179                        pickled = self.dumps(obj, proto)
2180                    else:
2181                        # Pickler is required when fast=True.
2182                        if not hasattr(self, 'pickler'):
2183                            continue
2184                        buf = io.BytesIO()
2185                        pickler = self.pickler(buf, protocol=proto)
2186                        pickler.fast = fast
2187                        pickler.dump(obj)
2188                        pickled = buf.getvalue()
2189                    unpickled = self.loads(pickled)
2190                    # More informative error message in case of failure.
2191                    self.assertEqual([len(x) for x in obj],
2192                                     [len(x) for x in unpickled])
2193                    # Perform full equality check if the lengths match.
2194                    self.assertEqual(obj, unpickled)
2195                    n_frames = count_opcode(pickle.FRAME, pickled)
2196                    # A single frame for small objects between
2197                    # first two large objects.
2198                    self.assertEqual(n_frames, 1)
2199                    self.check_frame_opcodes(pickled)
2200
2201    def test_optional_frames(self):
2202        if pickle.HIGHEST_PROTOCOL < 4:
2203            return
2204
2205        def remove_frames(pickled, keep_frame=None):
2206            """Remove frame opcodes from the given pickle."""
2207            frame_starts = []
2208            # 1 byte for the opcode and 8 for the argument
2209            frame_opcode_size = 9
2210            for opcode, _, pos in pickletools.genops(pickled):
2211                if opcode.name == 'FRAME':
2212                    frame_starts.append(pos)
2213
2214            newpickle = bytearray()
2215            last_frame_end = 0
2216            for i, pos in enumerate(frame_starts):
2217                if keep_frame and keep_frame(i):
2218                    continue
2219                newpickle += pickled[last_frame_end:pos]
2220                last_frame_end = pos + frame_opcode_size
2221            newpickle += pickled[last_frame_end:]
2222            return newpickle
2223
2224        frame_size = self.FRAME_SIZE_TARGET
2225        num_frames = 20
2226        # Large byte objects (dict values) intermitted with small objects
2227        # (dict keys)
2228        obj = {i: bytes([i]) * frame_size for i in range(num_frames)}
2229
2230        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2231            pickled = self.dumps(obj, proto)
2232
2233            frameless_pickle = remove_frames(pickled)
2234            self.assertEqual(count_opcode(pickle.FRAME, frameless_pickle), 0)
2235            self.assertEqual(obj, self.loads(frameless_pickle))
2236
2237            some_frames_pickle = remove_frames(pickled, lambda i: i % 2)
2238            self.assertLess(count_opcode(pickle.FRAME, some_frames_pickle),
2239                            count_opcode(pickle.FRAME, pickled))
2240            self.assertEqual(obj, self.loads(some_frames_pickle))
2241
2242    def test_framed_write_sizes_with_delayed_writer(self):
2243        class ChunkAccumulator:
2244            """Accumulate pickler output in a list of raw chunks."""
2245            def __init__(self):
2246                self.chunks = []
2247            def write(self, chunk):
2248                self.chunks.append(chunk)
2249            def concatenate_chunks(self):
2250                return b"".join(self.chunks)
2251
2252        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2253            objects = [(str(i).encode('ascii'), i % 42, {'i': str(i)})
2254                       for i in range(int(1e4))]
2255            # Add a large unique ASCII string
2256            objects.append('0123456789abcdef' *
2257                           (self.FRAME_SIZE_TARGET // 16 + 1))
2258
2259            # Protocol 4 packs groups of small objects into frames and issues
2260            # calls to write only once or twice per frame:
2261            # The C pickler issues one call to write per-frame (header and
2262            # contents) while Python pickler issues two calls to write: one for
2263            # the frame header and one for the frame binary contents.
2264            writer = ChunkAccumulator()
2265            self.pickler(writer, proto).dump(objects)
2266
2267            # Actually read the binary content of the chunks after the end
2268            # of the call to dump: any memoryview passed to write should not
2269            # be released otherwise this delayed access would not be possible.
2270            pickled = writer.concatenate_chunks()
2271            reconstructed = self.loads(pickled)
2272            self.assertEqual(reconstructed, objects)
2273            self.assertGreater(len(writer.chunks), 1)
2274
2275            # memoryviews should own the memory.
2276            del objects
2277            support.gc_collect()
2278            self.assertEqual(writer.concatenate_chunks(), pickled)
2279
2280            n_frames = (len(pickled) - 1) // self.FRAME_SIZE_TARGET + 1
2281            # There should be at least one call to write per frame
2282            self.assertGreaterEqual(len(writer.chunks), n_frames)
2283
2284            # but not too many either: there can be one for the proto,
2285            # one per-frame header, one per frame for the actual contents,
2286            # and two for the header.
2287            self.assertLessEqual(len(writer.chunks), 2 * n_frames + 3)
2288
2289            chunk_sizes = [len(c) for c in writer.chunks]
2290            large_sizes = [s for s in chunk_sizes
2291                           if s >= self.FRAME_SIZE_TARGET]
2292            medium_sizes = [s for s in chunk_sizes
2293                           if 9 < s < self.FRAME_SIZE_TARGET]
2294            small_sizes = [s for s in chunk_sizes if s <= 9]
2295
2296            # Large chunks should not be too large:
2297            for chunk_size in large_sizes:
2298                self.assertLess(chunk_size, 2 * self.FRAME_SIZE_TARGET,
2299                                chunk_sizes)
2300            # There shouldn't bee too many small chunks: the protocol header,
2301            # the frame headers and the large string headers are written
2302            # in small chunks.
2303            self.assertLessEqual(len(small_sizes),
2304                                 len(large_sizes) + len(medium_sizes) + 3,
2305                                 chunk_sizes)
2306
2307    def test_nested_names(self):
2308        global Nested
2309        class Nested:
2310            class A:
2311                class B:
2312                    class C:
2313                        pass
2314        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2315            for obj in [Nested.A, Nested.A.B, Nested.A.B.C]:
2316                with self.subTest(proto=proto, obj=obj):
2317                    unpickled = self.loads(self.dumps(obj, proto))
2318                    self.assertIs(obj, unpickled)
2319
2320    def test_recursive_nested_names(self):
2321        global Recursive
2322        class Recursive:
2323            pass
2324        Recursive.mod = sys.modules[Recursive.__module__]
2325        Recursive.__qualname__ = 'Recursive.mod.Recursive'
2326        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2327            with self.subTest(proto=proto):
2328                unpickled = self.loads(self.dumps(Recursive, proto))
2329                self.assertIs(unpickled, Recursive)
2330        del Recursive.mod # break reference loop
2331
2332    def test_py_methods(self):
2333        global PyMethodsTest
2334        class PyMethodsTest:
2335            @staticmethod
2336            def cheese():
2337                return "cheese"
2338            @classmethod
2339            def wine(cls):
2340                assert cls is PyMethodsTest
2341                return "wine"
2342            def biscuits(self):
2343                assert isinstance(self, PyMethodsTest)
2344                return "biscuits"
2345            class Nested:
2346                "Nested class"
2347                @staticmethod
2348                def ketchup():
2349                    return "ketchup"
2350                @classmethod
2351                def maple(cls):
2352                    assert cls is PyMethodsTest.Nested
2353                    return "maple"
2354                def pie(self):
2355                    assert isinstance(self, PyMethodsTest.Nested)
2356                    return "pie"
2357
2358        py_methods = (
2359            PyMethodsTest.cheese,
2360            PyMethodsTest.wine,
2361            PyMethodsTest().biscuits,
2362            PyMethodsTest.Nested.ketchup,
2363            PyMethodsTest.Nested.maple,
2364            PyMethodsTest.Nested().pie
2365        )
2366        py_unbound_methods = (
2367            (PyMethodsTest.biscuits, PyMethodsTest),
2368            (PyMethodsTest.Nested.pie, PyMethodsTest.Nested)
2369        )
2370        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2371            for method in py_methods:
2372                with self.subTest(proto=proto, method=method):
2373                    unpickled = self.loads(self.dumps(method, proto))
2374                    self.assertEqual(method(), unpickled())
2375            for method, cls in py_unbound_methods:
2376                obj = cls()
2377                with self.subTest(proto=proto, method=method):
2378                    unpickled = self.loads(self.dumps(method, proto))
2379                    self.assertEqual(method(obj), unpickled(obj))
2380
2381    def test_c_methods(self):
2382        global Subclass
2383        class Subclass(tuple):
2384            class Nested(str):
2385                pass
2386
2387        c_methods = (
2388            # bound built-in method
2389            ("abcd".index, ("c",)),
2390            # unbound built-in method
2391            (str.index, ("abcd", "c")),
2392            # bound "slot" method
2393            ([1, 2, 3].__len__, ()),
2394            # unbound "slot" method
2395            (list.__len__, ([1, 2, 3],)),
2396            # bound "coexist" method
2397            ({1, 2}.__contains__, (2,)),
2398            # unbound "coexist" method
2399            (set.__contains__, ({1, 2}, 2)),
2400            # built-in class method
2401            (dict.fromkeys, (("a", 1), ("b", 2))),
2402            # built-in static method
2403            (bytearray.maketrans, (b"abc", b"xyz")),
2404            # subclass methods
2405            (Subclass([1,2,2]).count, (2,)),
2406            (Subclass.count, (Subclass([1,2,2]), 2)),
2407            (Subclass.Nested("sweet").count, ("e",)),
2408            (Subclass.Nested.count, (Subclass.Nested("sweet"), "e")),
2409        )
2410        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2411            for method, args in c_methods:
2412                with self.subTest(proto=proto, method=method):
2413                    unpickled = self.loads(self.dumps(method, proto))
2414                    self.assertEqual(method(*args), unpickled(*args))
2415
2416    def test_compat_pickle(self):
2417        tests = [
2418            (range(1, 7), '__builtin__', 'xrange'),
2419            (map(int, '123'), 'itertools', 'imap'),
2420            (functools.reduce, '__builtin__', 'reduce'),
2421            (dbm.whichdb, 'whichdb', 'whichdb'),
2422            (Exception(), 'exceptions', 'Exception'),
2423            (collections.UserDict(), 'UserDict', 'IterableUserDict'),
2424            (collections.UserList(), 'UserList', 'UserList'),
2425            (collections.defaultdict(), 'collections', 'defaultdict'),
2426        ]
2427        for val, mod, name in tests:
2428            for proto in range(3):
2429                with self.subTest(type=type(val), proto=proto):
2430                    pickled = self.dumps(val, proto)
2431                    self.assertIn(('c%s\n%s' % (mod, name)).encode(), pickled)
2432                    self.assertIs(type(self.loads(pickled)), type(val))
2433
2434    def test_local_lookup_error(self):
2435        # Test that whichmodule() errors out cleanly when looking up
2436        # an assumed globally-reachable object fails.
2437        def f():
2438            pass
2439        # Since the function is local, lookup will fail
2440        for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
2441            with self.assertRaises((AttributeError, pickle.PicklingError)):
2442                pickletools.dis(self.dumps(f, proto))
2443        # Same without a __module__ attribute (exercises a different path
2444        # in _pickle.c).
2445        del f.__module__
2446        for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
2447            with self.assertRaises((AttributeError, pickle.PicklingError)):
2448                pickletools.dis(self.dumps(f, proto))
2449        # Yet a different path.
2450        f.__name__ = f.__qualname__
2451        for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
2452            with self.assertRaises((AttributeError, pickle.PicklingError)):
2453                pickletools.dis(self.dumps(f, proto))
2454
2455
2456class BigmemPickleTests(unittest.TestCase):
2457
2458    # Binary protocols can serialize longs of up to 2 GiB-1
2459
2460    @bigmemtest(size=_2G, memuse=3.6, dry_run=False)
2461    def test_huge_long_32b(self, size):
2462        data = 1 << (8 * size)
2463        try:
2464            for proto in protocols:
2465                if proto < 2:
2466                    continue
2467                with self.subTest(proto=proto):
2468                    with self.assertRaises((ValueError, OverflowError)):
2469                        self.dumps(data, protocol=proto)
2470        finally:
2471            data = None
2472
2473    # Protocol 3 can serialize up to 4 GiB-1 as a bytes object
2474    # (older protocols don't have a dedicated opcode for bytes and are
2475    # too inefficient)
2476
2477    @bigmemtest(size=_2G, memuse=2.5, dry_run=False)
2478    def test_huge_bytes_32b(self, size):
2479        data = b"abcd" * (size // 4)
2480        try:
2481            for proto in protocols:
2482                if proto < 3:
2483                    continue
2484                with self.subTest(proto=proto):
2485                    try:
2486                        pickled = self.dumps(data, protocol=proto)
2487                        header = (pickle.BINBYTES +
2488                                  struct.pack("<I", len(data)))
2489                        data_start = pickled.index(data)
2490                        self.assertEqual(
2491                            header,
2492                            pickled[data_start-len(header):data_start])
2493                    finally:
2494                        pickled = None
2495        finally:
2496            data = None
2497
2498    @bigmemtest(size=_4G, memuse=2.5, dry_run=False)
2499    def test_huge_bytes_64b(self, size):
2500        data = b"acbd" * (size // 4)
2501        try:
2502            for proto in protocols:
2503                if proto < 3:
2504                    continue
2505                with self.subTest(proto=proto):
2506                    if proto == 3:
2507                        # Protocol 3 does not support large bytes objects.
2508                        # Verify that we do not crash when processing one.
2509                        with self.assertRaises((ValueError, OverflowError)):
2510                            self.dumps(data, protocol=proto)
2511                        continue
2512                    try:
2513                        pickled = self.dumps(data, protocol=proto)
2514                        header = (pickle.BINBYTES8 +
2515                                  struct.pack("<Q", len(data)))
2516                        data_start = pickled.index(data)
2517                        self.assertEqual(
2518                            header,
2519                            pickled[data_start-len(header):data_start])
2520                    finally:
2521                        pickled = None
2522        finally:
2523            data = None
2524
2525    # All protocols use 1-byte per printable ASCII character; we add another
2526    # byte because the encoded form has to be copied into the internal buffer.
2527
2528    @bigmemtest(size=_2G, memuse=8, dry_run=False)
2529    def test_huge_str_32b(self, size):
2530        data = "abcd" * (size // 4)
2531        try:
2532            for proto in protocols:
2533                if proto == 0:
2534                    continue
2535                with self.subTest(proto=proto):
2536                    try:
2537                        pickled = self.dumps(data, protocol=proto)
2538                        header = (pickle.BINUNICODE +
2539                                  struct.pack("<I", len(data)))
2540                        data_start = pickled.index(b'abcd')
2541                        self.assertEqual(
2542                            header,
2543                            pickled[data_start-len(header):data_start])
2544                        self.assertEqual((pickled.rindex(b"abcd") + len(b"abcd") -
2545                                          pickled.index(b"abcd")), len(data))
2546                    finally:
2547                        pickled = None
2548        finally:
2549            data = None
2550
2551    # BINUNICODE (protocols 1, 2 and 3) cannot carry more than 2**32 - 1 bytes
2552    # of utf-8 encoded unicode. BINUNICODE8 (protocol 4) supports these huge
2553    # unicode strings however.
2554
2555    @bigmemtest(size=_4G, memuse=8, dry_run=False)
2556    def test_huge_str_64b(self, size):
2557        data = "abcd" * (size // 4)
2558        try:
2559            for proto in protocols:
2560                if proto == 0:
2561                    continue
2562                with self.subTest(proto=proto):
2563                    if proto < 4:
2564                        with self.assertRaises((ValueError, OverflowError)):
2565                            self.dumps(data, protocol=proto)
2566                        continue
2567                    try:
2568                        pickled = self.dumps(data, protocol=proto)
2569                        header = (pickle.BINUNICODE8 +
2570                                  struct.pack("<Q", len(data)))
2571                        data_start = pickled.index(b'abcd')
2572                        self.assertEqual(
2573                            header,
2574                            pickled[data_start-len(header):data_start])
2575                        self.assertEqual((pickled.rindex(b"abcd") + len(b"abcd") -
2576                                          pickled.index(b"abcd")), len(data))
2577                    finally:
2578                        pickled = None
2579        finally:
2580            data = None
2581
2582
2583# Test classes for reduce_ex
2584
2585class REX_one(object):
2586    """No __reduce_ex__ here, but inheriting it from object"""
2587    _reduce_called = 0
2588    def __reduce__(self):
2589        self._reduce_called = 1
2590        return REX_one, ()
2591
2592class REX_two(object):
2593    """No __reduce__ here, but inheriting it from object"""
2594    _proto = None
2595    def __reduce_ex__(self, proto):
2596        self._proto = proto
2597        return REX_two, ()
2598
2599class REX_three(object):
2600    _proto = None
2601    def __reduce_ex__(self, proto):
2602        self._proto = proto
2603        return REX_two, ()
2604    def __reduce__(self):
2605        raise TestFailed("This __reduce__ shouldn't be called")
2606
2607class REX_four(object):
2608    """Calling base class method should succeed"""
2609    _proto = None
2610    def __reduce_ex__(self, proto):
2611        self._proto = proto
2612        return object.__reduce_ex__(self, proto)
2613
2614class REX_five(object):
2615    """This one used to fail with infinite recursion"""
2616    _reduce_called = 0
2617    def __reduce__(self):
2618        self._reduce_called = 1
2619        return object.__reduce__(self)
2620
2621class REX_six(object):
2622    """This class is used to check the 4th argument (list iterator) of
2623    the reduce protocol.
2624    """
2625    def __init__(self, items=None):
2626        self.items = items if items is not None else []
2627    def __eq__(self, other):
2628        return type(self) is type(other) and self.items == other.items
2629    def append(self, item):
2630        self.items.append(item)
2631    def __reduce__(self):
2632        return type(self), (), None, iter(self.items), None
2633
2634class REX_seven(object):
2635    """This class is used to check the 5th argument (dict iterator) of
2636    the reduce protocol.
2637    """
2638    def __init__(self, table=None):
2639        self.table = table if table is not None else {}
2640    def __eq__(self, other):
2641        return type(self) is type(other) and self.table == other.table
2642    def __setitem__(self, key, value):
2643        self.table[key] = value
2644    def __reduce__(self):
2645        return type(self), (), None, None, iter(self.table.items())
2646
2647
2648# Test classes for newobj
2649
2650class MyInt(int):
2651    sample = 1
2652
2653class MyFloat(float):
2654    sample = 1.0
2655
2656class MyComplex(complex):
2657    sample = 1.0 + 0.0j
2658
2659class MyStr(str):
2660    sample = "hello"
2661
2662class MyUnicode(str):
2663    sample = "hello \u1234"
2664
2665class MyTuple(tuple):
2666    sample = (1, 2, 3)
2667
2668class MyList(list):
2669    sample = [1, 2, 3]
2670
2671class MyDict(dict):
2672    sample = {"a": 1, "b": 2}
2673
2674class MySet(set):
2675    sample = {"a", "b"}
2676
2677class MyFrozenSet(frozenset):
2678    sample = frozenset({"a", "b"})
2679
2680myclasses = [MyInt, MyFloat,
2681             MyComplex,
2682             MyStr, MyUnicode,
2683             MyTuple, MyList, MyDict, MySet, MyFrozenSet]
2684
2685
2686class SlotList(MyList):
2687    __slots__ = ["foo"]
2688
2689class SimpleNewObj(int):
2690    def __init__(self, *args, **kwargs):
2691        # raise an error, to make sure this isn't called
2692        raise TypeError("SimpleNewObj.__init__() didn't expect to get called")
2693    def __eq__(self, other):
2694        return int(self) == int(other) and self.__dict__ == other.__dict__
2695
2696class ComplexNewObj(SimpleNewObj):
2697    def __getnewargs__(self):
2698        return ('%X' % self, 16)
2699
2700class ComplexNewObjEx(SimpleNewObj):
2701    def __getnewargs_ex__(self):
2702        return ('%X' % self,), {'base': 16}
2703
2704class BadGetattr:
2705    def __getattr__(self, key):
2706        self.foo
2707
2708
2709class AbstractPickleModuleTests(unittest.TestCase):
2710
2711    def test_dump_closed_file(self):
2712        import os
2713        f = open(TESTFN, "wb")
2714        try:
2715            f.close()
2716            self.assertRaises(ValueError, self.dump, 123, f)
2717        finally:
2718            os.remove(TESTFN)
2719
2720    def test_load_closed_file(self):
2721        import os
2722        f = open(TESTFN, "wb")
2723        try:
2724            f.close()
2725            self.assertRaises(ValueError, self.dump, 123, f)
2726        finally:
2727            os.remove(TESTFN)
2728
2729    def test_load_from_and_dump_to_file(self):
2730        stream = io.BytesIO()
2731        data = [123, {}, 124]
2732        self.dump(data, stream)
2733        stream.seek(0)
2734        unpickled = self.load(stream)
2735        self.assertEqual(unpickled, data)
2736
2737    def test_highest_protocol(self):
2738        # Of course this needs to be changed when HIGHEST_PROTOCOL changes.
2739        self.assertEqual(pickle.HIGHEST_PROTOCOL, 4)
2740
2741    def test_callapi(self):
2742        f = io.BytesIO()
2743        # With and without keyword arguments
2744        self.dump(123, f, -1)
2745        self.dump(123, file=f, protocol=-1)
2746        self.dumps(123, -1)
2747        self.dumps(123, protocol=-1)
2748        self.Pickler(f, -1)
2749        self.Pickler(f, protocol=-1)
2750
2751    def test_bad_init(self):
2752        # Test issue3664 (pickle can segfault from a badly initialized Pickler).
2753        # Override initialization without calling __init__() of the superclass.
2754        class BadPickler(self.Pickler):
2755            def __init__(self): pass
2756
2757        class BadUnpickler(self.Unpickler):
2758            def __init__(self): pass
2759
2760        self.assertRaises(pickle.PicklingError, BadPickler().dump, 0)
2761        self.assertRaises(pickle.UnpicklingError, BadUnpickler().load)
2762
2763
2764class AbstractPersistentPicklerTests(unittest.TestCase):
2765
2766    # This class defines persistent_id() and persistent_load()
2767    # functions that should be used by the pickler.  All even integers
2768    # are pickled using persistent ids.
2769
2770    def persistent_id(self, object):
2771        if isinstance(object, int) and object % 2 == 0:
2772            self.id_count += 1
2773            return str(object)
2774        elif object == "test_false_value":
2775            self.false_count += 1
2776            return ""
2777        else:
2778            return None
2779
2780    def persistent_load(self, oid):
2781        if not oid:
2782            self.load_false_count += 1
2783            return "test_false_value"
2784        else:
2785            self.load_count += 1
2786            object = int(oid)
2787            assert object % 2 == 0
2788            return object
2789
2790    def test_persistence(self):
2791        L = list(range(10)) + ["test_false_value"]
2792        for proto in protocols:
2793            self.id_count = 0
2794            self.false_count = 0
2795            self.load_false_count = 0
2796            self.load_count = 0
2797            self.assertEqual(self.loads(self.dumps(L, proto)), L)
2798            self.assertEqual(self.id_count, 5)
2799            self.assertEqual(self.false_count, 1)
2800            self.assertEqual(self.load_count, 5)
2801            self.assertEqual(self.load_false_count, 1)
2802
2803
2804class AbstractIdentityPersistentPicklerTests(unittest.TestCase):
2805
2806    def persistent_id(self, obj):
2807        return obj
2808
2809    def persistent_load(self, pid):
2810        return pid
2811
2812    def _check_return_correct_type(self, obj, proto):
2813        unpickled = self.loads(self.dumps(obj, proto))
2814        self.assertIsInstance(unpickled, type(obj))
2815        self.assertEqual(unpickled, obj)
2816
2817    def test_return_correct_type(self):
2818        for proto in protocols:
2819            # Protocol 0 supports only ASCII strings.
2820            if proto == 0:
2821                self._check_return_correct_type("abc", 0)
2822            else:
2823                for obj in [b"abc\n", "abc\n", -1, -1.1 * 0.1, str]:
2824                    self._check_return_correct_type(obj, proto)
2825
2826    def test_protocol0_is_ascii_only(self):
2827        non_ascii_str = "\N{EMPTY SET}"
2828        self.assertRaises(pickle.PicklingError, self.dumps, non_ascii_str, 0)
2829        pickled = pickle.PERSID + non_ascii_str.encode('utf-8') + b'\n.'
2830        self.assertRaises(pickle.UnpicklingError, self.loads, pickled)
2831
2832
2833class AbstractPicklerUnpicklerObjectTests(unittest.TestCase):
2834
2835    pickler_class = None
2836    unpickler_class = None
2837
2838    def setUp(self):
2839        assert self.pickler_class
2840        assert self.unpickler_class
2841
2842    def test_clear_pickler_memo(self):
2843        # To test whether clear_memo() has any effect, we pickle an object,
2844        # then pickle it again without clearing the memo; the two serialized
2845        # forms should be different. If we clear_memo() and then pickle the
2846        # object again, the third serialized form should be identical to the
2847        # first one we obtained.
2848        data = ["abcdefg", "abcdefg", 44]
2849        for proto in protocols:
2850            f = io.BytesIO()
2851            pickler = self.pickler_class(f, proto)
2852
2853            pickler.dump(data)
2854            first_pickled = f.getvalue()
2855
2856            # Reset BytesIO object.
2857            f.seek(0)
2858            f.truncate()
2859
2860            pickler.dump(data)
2861            second_pickled = f.getvalue()
2862
2863            # Reset the Pickler and BytesIO objects.
2864            pickler.clear_memo()
2865            f.seek(0)
2866            f.truncate()
2867
2868            pickler.dump(data)
2869            third_pickled = f.getvalue()
2870
2871            self.assertNotEqual(first_pickled, second_pickled)
2872            self.assertEqual(first_pickled, third_pickled)
2873
2874    def test_priming_pickler_memo(self):
2875        # Verify that we can set the Pickler's memo attribute.
2876        data = ["abcdefg", "abcdefg", 44]
2877        f = io.BytesIO()
2878        pickler = self.pickler_class(f)
2879
2880        pickler.dump(data)
2881        first_pickled = f.getvalue()
2882
2883        f = io.BytesIO()
2884        primed = self.pickler_class(f)
2885        primed.memo = pickler.memo
2886
2887        primed.dump(data)
2888        primed_pickled = f.getvalue()
2889
2890        self.assertNotEqual(first_pickled, primed_pickled)
2891
2892    def test_priming_unpickler_memo(self):
2893        # Verify that we can set the Unpickler's memo attribute.
2894        data = ["abcdefg", "abcdefg", 44]
2895        f = io.BytesIO()
2896        pickler = self.pickler_class(f)
2897
2898        pickler.dump(data)
2899        first_pickled = f.getvalue()
2900
2901        f = io.BytesIO()
2902        primed = self.pickler_class(f)
2903        primed.memo = pickler.memo
2904
2905        primed.dump(data)
2906        primed_pickled = f.getvalue()
2907
2908        unpickler = self.unpickler_class(io.BytesIO(first_pickled))
2909        unpickled_data1 = unpickler.load()
2910
2911        self.assertEqual(unpickled_data1, data)
2912
2913        primed = self.unpickler_class(io.BytesIO(primed_pickled))
2914        primed.memo = unpickler.memo
2915        unpickled_data2 = primed.load()
2916
2917        primed.memo.clear()
2918
2919        self.assertEqual(unpickled_data2, data)
2920        self.assertTrue(unpickled_data2 is unpickled_data1)
2921
2922    def test_reusing_unpickler_objects(self):
2923        data1 = ["abcdefg", "abcdefg", 44]
2924        f = io.BytesIO()
2925        pickler = self.pickler_class(f)
2926        pickler.dump(data1)
2927        pickled1 = f.getvalue()
2928
2929        data2 = ["abcdefg", 44, 44]
2930        f = io.BytesIO()
2931        pickler = self.pickler_class(f)
2932        pickler.dump(data2)
2933        pickled2 = f.getvalue()
2934
2935        f = io.BytesIO()
2936        f.write(pickled1)
2937        f.seek(0)
2938        unpickler = self.unpickler_class(f)
2939        self.assertEqual(unpickler.load(), data1)
2940
2941        f.seek(0)
2942        f.truncate()
2943        f.write(pickled2)
2944        f.seek(0)
2945        self.assertEqual(unpickler.load(), data2)
2946
2947    def _check_multiple_unpicklings(self, ioclass):
2948        for proto in protocols:
2949            with self.subTest(proto=proto):
2950                data1 = [(x, str(x)) for x in range(2000)] + [b"abcde", len]
2951                f = ioclass()
2952                pickler = self.pickler_class(f, protocol=proto)
2953                pickler.dump(data1)
2954                pickled = f.getvalue()
2955
2956                N = 5
2957                f = ioclass(pickled * N)
2958                unpickler = self.unpickler_class(f)
2959                for i in range(N):
2960                    if f.seekable():
2961                        pos = f.tell()
2962                    self.assertEqual(unpickler.load(), data1)
2963                    if f.seekable():
2964                        self.assertEqual(f.tell(), pos + len(pickled))
2965                self.assertRaises(EOFError, unpickler.load)
2966
2967    def test_multiple_unpicklings_seekable(self):
2968        self._check_multiple_unpicklings(io.BytesIO)
2969
2970    def test_multiple_unpicklings_unseekable(self):
2971        self._check_multiple_unpicklings(UnseekableIO)
2972
2973    def test_unpickling_buffering_readline(self):
2974        # Issue #12687: the unpickler's buffering logic could fail with
2975        # text mode opcodes.
2976        data = list(range(10))
2977        for proto in protocols:
2978            for buf_size in range(1, 11):
2979                f = io.BufferedRandom(io.BytesIO(), buffer_size=buf_size)
2980                pickler = self.pickler_class(f, protocol=proto)
2981                pickler.dump(data)
2982                f.seek(0)
2983                unpickler = self.unpickler_class(f)
2984                self.assertEqual(unpickler.load(), data)
2985
2986
2987# Tests for dispatch_table attribute
2988
2989REDUCE_A = 'reduce_A'
2990
2991class AAA(object):
2992    def __reduce__(self):
2993        return str, (REDUCE_A,)
2994
2995class BBB(object):
2996    pass
2997
2998class AbstractDispatchTableTests(unittest.TestCase):
2999
3000    def test_default_dispatch_table(self):
3001        # No dispatch_table attribute by default
3002        f = io.BytesIO()
3003        p = self.pickler_class(f, 0)
3004        with self.assertRaises(AttributeError):
3005            p.dispatch_table
3006        self.assertFalse(hasattr(p, 'dispatch_table'))
3007
3008    def test_class_dispatch_table(self):
3009        # A dispatch_table attribute can be specified class-wide
3010        dt = self.get_dispatch_table()
3011
3012        class MyPickler(self.pickler_class):
3013            dispatch_table = dt
3014
3015        def dumps(obj, protocol=None):
3016            f = io.BytesIO()
3017            p = MyPickler(f, protocol)
3018            self.assertEqual(p.dispatch_table, dt)
3019            p.dump(obj)
3020            return f.getvalue()
3021
3022        self._test_dispatch_table(dumps, dt)
3023
3024    def test_instance_dispatch_table(self):
3025        # A dispatch_table attribute can also be specified instance-wide
3026        dt = self.get_dispatch_table()
3027
3028        def dumps(obj, protocol=None):
3029            f = io.BytesIO()
3030            p = self.pickler_class(f, protocol)
3031            p.dispatch_table = dt
3032            self.assertEqual(p.dispatch_table, dt)
3033            p.dump(obj)
3034            return f.getvalue()
3035
3036        self._test_dispatch_table(dumps, dt)
3037
3038    def _test_dispatch_table(self, dumps, dispatch_table):
3039        def custom_load_dump(obj):
3040            return pickle.loads(dumps(obj, 0))
3041
3042        def default_load_dump(obj):
3043            return pickle.loads(pickle.dumps(obj, 0))
3044
3045        # pickling complex numbers using protocol 0 relies on copyreg
3046        # so check pickling a complex number still works
3047        z = 1 + 2j
3048        self.assertEqual(custom_load_dump(z), z)
3049        self.assertEqual(default_load_dump(z), z)
3050
3051        # modify pickling of complex
3052        REDUCE_1 = 'reduce_1'
3053        def reduce_1(obj):
3054            return str, (REDUCE_1,)
3055        dispatch_table[complex] = reduce_1
3056        self.assertEqual(custom_load_dump(z), REDUCE_1)
3057        self.assertEqual(default_load_dump(z), z)
3058
3059        # check picklability of AAA and BBB
3060        a = AAA()
3061        b = BBB()
3062        self.assertEqual(custom_load_dump(a), REDUCE_A)
3063        self.assertIsInstance(custom_load_dump(b), BBB)
3064        self.assertEqual(default_load_dump(a), REDUCE_A)
3065        self.assertIsInstance(default_load_dump(b), BBB)
3066
3067        # modify pickling of BBB
3068        dispatch_table[BBB] = reduce_1
3069        self.assertEqual(custom_load_dump(a), REDUCE_A)
3070        self.assertEqual(custom_load_dump(b), REDUCE_1)
3071        self.assertEqual(default_load_dump(a), REDUCE_A)
3072        self.assertIsInstance(default_load_dump(b), BBB)
3073
3074        # revert pickling of BBB and modify pickling of AAA
3075        REDUCE_2 = 'reduce_2'
3076        def reduce_2(obj):
3077            return str, (REDUCE_2,)
3078        dispatch_table[AAA] = reduce_2
3079        del dispatch_table[BBB]
3080        self.assertEqual(custom_load_dump(a), REDUCE_2)
3081        self.assertIsInstance(custom_load_dump(b), BBB)
3082        self.assertEqual(default_load_dump(a), REDUCE_A)
3083        self.assertIsInstance(default_load_dump(b), BBB)
3084
3085
3086if __name__ == "__main__":
3087    # Print some stuff that can be used to rewrite DATA{0,1,2}
3088    from pickletools import dis
3089    x = create_data()
3090    for i in range(pickle.HIGHEST_PROTOCOL+1):
3091        p = pickle.dumps(x, i)
3092        print("DATA{0} = (".format(i))
3093        for j in range(0, len(p), 20):
3094            b = bytes(p[j:j+20])
3095            print("    {0!r}".format(b))
3096        print(")")
3097        print()
3098        print("# Disassembly of DATA{0}".format(i))
3099        print("DATA{0}_DIS = \"\"\"\\".format(i))
3100        dis(p)
3101        print("\"\"\"")
3102        print()
3103