1# -*- coding: utf-8 -*-
2import unittest
3import pickle
4import cPickle
5import StringIO
6import cStringIO
7import pickletools
8import copy_reg
9import sys
10
11from test import test_support as support
12from test.test_support import TestFailed, verbose, have_unicode, TESTFN
13try:
14    from test.test_support import _2G, _1M, precisionbigmemtest
15except ImportError:
16    # this import might fail when run on older Python versions by test_xpickle
17    _2G = _1M = 0
18    def precisionbigmemtest(*args, **kwargs):
19        return lambda self: None
20
21# Tests that try a number of pickle protocols should have a
22#     for proto in protocols:
23# kind of outer loop.
24assert pickle.HIGHEST_PROTOCOL == cPickle.HIGHEST_PROTOCOL == 2
25protocols = range(pickle.HIGHEST_PROTOCOL + 1)
26
27# Copy of test.test_support.run_with_locale. This is needed to support Python
28# 2.4, which didn't include it. This is all to support test_xpickle, which
29# bounces pickled objects through older Python versions to test backwards
30# compatibility.
31def run_with_locale(catstr, *locales):
32    def decorator(func):
33        def inner(*args, **kwds):
34            try:
35                import locale
36                category = getattr(locale, catstr)
37                orig_locale = locale.setlocale(category)
38            except AttributeError:
39                # if the test author gives us an invalid category string
40                raise
41            except:
42                # cannot retrieve original locale, so do nothing
43                locale = orig_locale = None
44            else:
45                for loc in locales:
46                    try:
47                        locale.setlocale(category, loc)
48                        break
49                    except:
50                        pass
51
52            # now run the function, resetting the locale on exceptions
53            try:
54                return func(*args, **kwds)
55            finally:
56                if locale and orig_locale:
57                    locale.setlocale(category, orig_locale)
58        inner.func_name = func.func_name
59        inner.__doc__ = func.__doc__
60        return inner
61    return decorator
62
63def no_tracing(func):
64    """Decorator to temporarily turn off tracing for the duration of a test."""
65    if not hasattr(sys, 'gettrace'):
66        return func
67    else:
68        def wrapper(*args, **kwargs):
69            original_trace = sys.gettrace()
70            try:
71                sys.settrace(None)
72                return func(*args, **kwargs)
73            finally:
74                sys.settrace(original_trace)
75        wrapper.__name__ = func.__name__
76        return wrapper
77
78
79# Return True if opcode code appears in the pickle, else False.
80def opcode_in_pickle(code, pickle):
81    for op, dummy, dummy in pickletools.genops(pickle):
82        if op.code == code:
83            return True
84    return False
85
86# Return the number of times opcode code appears in pickle.
87def count_opcode(code, pickle):
88    n = 0
89    for op, dummy, dummy in pickletools.genops(pickle):
90        if op.code == code:
91            n += 1
92    return n
93
94class UnseekableIO(StringIO.StringIO):
95    def peek(self, *args):
96        raise NotImplementedError
97
98    def seek(self, *args):
99        raise NotImplementedError
100
101    def tell(self):
102        raise NotImplementedError
103
104# We can't very well test the extension registry without putting known stuff
105# in it, but we have to be careful to restore its original state.  Code
106# should do this:
107#
108#     e = ExtensionSaver(extension_code)
109#     try:
110#         fiddle w/ the extension registry's stuff for extension_code
111#     finally:
112#         e.restore()
113
114class ExtensionSaver:
115    # Remember current registration for code (if any), and remove it (if
116    # there is one).
117    def __init__(self, code):
118        self.code = code
119        if code in copy_reg._inverted_registry:
120            self.pair = copy_reg._inverted_registry[code]
121            copy_reg.remove_extension(self.pair[0], self.pair[1], code)
122        else:
123            self.pair = None
124
125    # Restore previous registration for code.
126    def restore(self):
127        code = self.code
128        curpair = copy_reg._inverted_registry.get(code)
129        if curpair is not None:
130            copy_reg.remove_extension(curpair[0], curpair[1], code)
131        pair = self.pair
132        if pair is not None:
133            copy_reg.add_extension(pair[0], pair[1], code)
134
135class C:
136    def __cmp__(self, other):
137        return cmp(self.__dict__, other.__dict__)
138
139class D(C):
140    def __init__(self, arg):
141        pass
142
143class E(C):
144    def __getinitargs__(self):
145        return ()
146
147class H(object):
148    pass
149
150class MyErr(Exception):
151    def __init__(self):
152        pass
153
154class I:
155    def __init__(self, *args, **kwargs):
156        raise MyErr()
157
158    def __getinitargs__(self):
159        return ()
160
161# Hashable mutable key
162class K(object):
163    def __init__(self, value):
164        self.value = value
165
166    def __reduce__(self):
167        # Shouldn't support the recursion itself
168        return K, (self.value,)
169
170__main__ = sys.modules['__main__']
171__main__.C = C
172C.__module__ = "__main__"
173__main__.D = D
174D.__module__ = "__main__"
175__main__.E = E
176E.__module__ = "__main__"
177__main__.H = H
178H.__module__ = "__main__"
179__main__.I = I
180I.__module__ = "__main__"
181__main__.K = K
182K.__module__ = "__main__"
183
184class myint(int):
185    def __init__(self, x):
186        self.str = str(x)
187
188class initarg(C):
189
190    def __init__(self, a, b):
191        self.a = a
192        self.b = b
193
194    def __getinitargs__(self):
195        return self.a, self.b
196
197class metaclass(type):
198    pass
199
200class use_metaclass(object):
201    __metaclass__ = metaclass
202
203class pickling_metaclass(type):
204    def __eq__(self, other):
205        return (type(self) == type(other) and
206                self.reduce_args == other.reduce_args)
207
208    def __reduce__(self):
209        return (create_dynamic_class, self.reduce_args)
210
211    __hash__ = None
212
213def create_dynamic_class(name, bases):
214    result = pickling_metaclass(name, bases, dict())
215    result.reduce_args = (name, bases)
216    return result
217
218# DATA0 .. DATA2 are the pickles we expect under the various protocols, for
219# the object returned by create_data().
220
221# break into multiple strings to avoid confusing font-lock-mode
222DATA0 = """(lp1
223I0
224aL1L
225aF2
226ac__builtin__
227complex
228p2
229""" + \
230"""(F3
231F0
232tRp3
233aI1
234aI-1
235aI255
236aI-255
237aI-256
238aI65535
239aI-65535
240aI-65536
241aI2147483647
242aI-2147483647
243aI-2147483648
244a""" + \
245"""(S'abc'
246p4
247g4
248""" + \
249"""(i__main__
250C
251p5
252""" + \
253"""(dp6
254S'foo'
255p7
256I1
257sS'bar'
258p8
259I2
260sbg5
261tp9
262ag9
263aI5
264a.
265"""
266
267# Disassembly of DATA0.
268DATA0_DIS = """\
269    0: (    MARK
270    1: l        LIST       (MARK at 0)
271    2: p    PUT        1
272    5: I    INT        0
273    8: a    APPEND
274    9: L    LONG       1L
275   13: a    APPEND
276   14: F    FLOAT      2.0
277   17: a    APPEND
278   18: c    GLOBAL     '__builtin__ complex'
279   39: p    PUT        2
280   42: (    MARK
281   43: F        FLOAT      3.0
282   46: F        FLOAT      0.0
283   49: t        TUPLE      (MARK at 42)
284   50: R    REDUCE
285   51: p    PUT        3
286   54: a    APPEND
287   55: I    INT        1
288   58: a    APPEND
289   59: I    INT        -1
290   63: a    APPEND
291   64: I    INT        255
292   69: a    APPEND
293   70: I    INT        -255
294   76: a    APPEND
295   77: I    INT        -256
296   83: a    APPEND
297   84: I    INT        65535
298   91: a    APPEND
299   92: I    INT        -65535
300  100: a    APPEND
301  101: I    INT        -65536
302  109: a    APPEND
303  110: I    INT        2147483647
304  122: a    APPEND
305  123: I    INT        -2147483647
306  136: a    APPEND
307  137: I    INT        -2147483648
308  150: a    APPEND
309  151: (    MARK
310  152: S        STRING     'abc'
311  159: p        PUT        4
312  162: g        GET        4
313  165: (        MARK
314  166: i            INST       '__main__ C' (MARK at 165)
315  178: p        PUT        5
316  181: (        MARK
317  182: d            DICT       (MARK at 181)
318  183: p        PUT        6
319  186: S        STRING     'foo'
320  193: p        PUT        7
321  196: I        INT        1
322  199: s        SETITEM
323  200: S        STRING     'bar'
324  207: p        PUT        8
325  210: I        INT        2
326  213: s        SETITEM
327  214: b        BUILD
328  215: g        GET        5
329  218: t        TUPLE      (MARK at 151)
330  219: p    PUT        9
331  222: a    APPEND
332  223: g    GET        9
333  226: a    APPEND
334  227: I    INT        5
335  230: a    APPEND
336  231: .    STOP
337highest protocol among opcodes = 0
338"""
339
340DATA1 = (']q\x01(K\x00L1L\nG@\x00\x00\x00\x00\x00\x00\x00'
341         'c__builtin__\ncomplex\nq\x02(G@\x08\x00\x00\x00\x00\x00'
342         '\x00G\x00\x00\x00\x00\x00\x00\x00\x00tRq\x03K\x01J\xff\xff'
343         '\xff\xffK\xffJ\x01\xff\xff\xffJ\x00\xff\xff\xffM\xff\xff'
344         'J\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff\xff\xff\x7fJ\x01\x00'
345         '\x00\x80J\x00\x00\x00\x80(U\x03abcq\x04h\x04(c__main__\n'
346         'C\nq\x05oq\x06}q\x07(U\x03fooq\x08K\x01U\x03barq\tK\x02ubh'
347         '\x06tq\nh\nK\x05e.'
348        )
349
350# Disassembly of DATA1.
351DATA1_DIS = """\
352    0: ]    EMPTY_LIST
353    1: q    BINPUT     1
354    3: (    MARK
355    4: K        BININT1    0
356    6: L        LONG       1L
357   10: G        BINFLOAT   2.0
358   19: c        GLOBAL     '__builtin__ complex'
359   40: q        BINPUT     2
360   42: (        MARK
361   43: G            BINFLOAT   3.0
362   52: G            BINFLOAT   0.0
363   61: t            TUPLE      (MARK at 42)
364   62: R        REDUCE
365   63: q        BINPUT     3
366   65: K        BININT1    1
367   67: J        BININT     -1
368   72: K        BININT1    255
369   74: J        BININT     -255
370   79: J        BININT     -256
371   84: M        BININT2    65535
372   87: J        BININT     -65535
373   92: J        BININT     -65536
374   97: J        BININT     2147483647
375  102: J        BININT     -2147483647
376  107: J        BININT     -2147483648
377  112: (        MARK
378  113: U            SHORT_BINSTRING 'abc'
379  118: q            BINPUT     4
380  120: h            BINGET     4
381  122: (            MARK
382  123: c                GLOBAL     '__main__ C'
383  135: q                BINPUT     5
384  137: o                OBJ        (MARK at 122)
385  138: q            BINPUT     6
386  140: }            EMPTY_DICT
387  141: q            BINPUT     7
388  143: (            MARK
389  144: U                SHORT_BINSTRING 'foo'
390  149: q                BINPUT     8
391  151: K                BININT1    1
392  153: U                SHORT_BINSTRING 'bar'
393  158: q                BINPUT     9
394  160: K                BININT1    2
395  162: u                SETITEMS   (MARK at 143)
396  163: b            BUILD
397  164: h            BINGET     6
398  166: t            TUPLE      (MARK at 112)
399  167: q        BINPUT     10
400  169: h        BINGET     10
401  171: K        BININT1    5
402  173: e        APPENDS    (MARK at 3)
403  174: .    STOP
404highest protocol among opcodes = 1
405"""
406
407DATA2 = ('\x80\x02]q\x01(K\x00\x8a\x01\x01G@\x00\x00\x00\x00\x00\x00\x00'
408         'c__builtin__\ncomplex\nq\x02G@\x08\x00\x00\x00\x00\x00\x00G\x00'
409         '\x00\x00\x00\x00\x00\x00\x00\x86Rq\x03K\x01J\xff\xff\xff\xffK'
410         '\xffJ\x01\xff\xff\xffJ\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xff'
411         'J\x00\x00\xff\xffJ\xff\xff\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00'
412         '\x80(U\x03abcq\x04h\x04(c__main__\nC\nq\x05oq\x06}q\x07(U\x03foo'
413         'q\x08K\x01U\x03barq\tK\x02ubh\x06tq\nh\nK\x05e.')
414
415# Disassembly of DATA2.
416DATA2_DIS = """\
417    0: \x80 PROTO      2
418    2: ]    EMPTY_LIST
419    3: q    BINPUT     1
420    5: (    MARK
421    6: K        BININT1    0
422    8: \x8a     LONG1      1L
423   11: G        BINFLOAT   2.0
424   20: c        GLOBAL     '__builtin__ complex'
425   41: q        BINPUT     2
426   43: G        BINFLOAT   3.0
427   52: G        BINFLOAT   0.0
428   61: \x86     TUPLE2
429   62: R        REDUCE
430   63: q        BINPUT     3
431   65: K        BININT1    1
432   67: J        BININT     -1
433   72: K        BININT1    255
434   74: J        BININT     -255
435   79: J        BININT     -256
436   84: M        BININT2    65535
437   87: J        BININT     -65535
438   92: J        BININT     -65536
439   97: J        BININT     2147483647
440  102: J        BININT     -2147483647
441  107: J        BININT     -2147483648
442  112: (        MARK
443  113: U            SHORT_BINSTRING 'abc'
444  118: q            BINPUT     4
445  120: h            BINGET     4
446  122: (            MARK
447  123: c                GLOBAL     '__main__ C'
448  135: q                BINPUT     5
449  137: o                OBJ        (MARK at 122)
450  138: q            BINPUT     6
451  140: }            EMPTY_DICT
452  141: q            BINPUT     7
453  143: (            MARK
454  144: U                SHORT_BINSTRING 'foo'
455  149: q                BINPUT     8
456  151: K                BININT1    1
457  153: U                SHORT_BINSTRING 'bar'
458  158: q                BINPUT     9
459  160: K                BININT1    2
460  162: u                SETITEMS   (MARK at 143)
461  163: b            BUILD
462  164: h            BINGET     6
463  166: t            TUPLE      (MARK at 112)
464  167: q        BINPUT     10
465  169: h        BINGET     10
466  171: K        BININT1    5
467  173: e        APPENDS    (MARK at 5)
468  174: .    STOP
469highest protocol among opcodes = 2
470"""
471
472def create_data():
473    c = C()
474    c.foo = 1
475    c.bar = 2
476    x = [0, 1L, 2.0, 3.0+0j]
477    # Append some integer test cases at cPickle.c's internal size
478    # cutoffs.
479    uint1max = 0xff
480    uint2max = 0xffff
481    int4max = 0x7fffffff
482    x.extend([1, -1,
483              uint1max, -uint1max, -uint1max-1,
484              uint2max, -uint2max, -uint2max-1,
485               int4max,  -int4max,  -int4max-1])
486    y = ('abc', 'abc', c, c)
487    x.append(y)
488    x.append(y)
489    x.append(5)
490    return x
491
492
493class AbstractUnpickleTests(unittest.TestCase):
494    # Subclass must define self.loads, self.error.
495
496    _testdata = create_data()
497
498    def assert_is_copy(self, obj, objcopy, msg=None):
499        """Utility method to verify if two objects are copies of each others.
500        """
501        if msg is None:
502            msg = "{!r} is not a copy of {!r}".format(obj, objcopy)
503        self.assertEqual(obj, objcopy, msg=msg)
504        self.assertIs(type(obj), type(objcopy), msg=msg)
505        if hasattr(obj, '__dict__'):
506            self.assertDictEqual(obj.__dict__, objcopy.__dict__, msg=msg)
507            self.assertIsNot(obj.__dict__, objcopy.__dict__, msg=msg)
508        if hasattr(obj, '__slots__'):
509            self.assertListEqual(obj.__slots__, objcopy.__slots__, msg=msg)
510            for slot in obj.__slots__:
511                self.assertEqual(
512                    hasattr(obj, slot), hasattr(objcopy, slot), msg=msg)
513                self.assertEqual(getattr(obj, slot, None),
514                                 getattr(objcopy, slot, None), msg=msg)
515
516    def check_unpickling_error(self, errors, data):
517        try:
518            try:
519                self.loads(data)
520            except:
521                if support.verbose > 1:
522                    exc_type, exc, tb = sys.exc_info()
523                    print '%-32r - %s: %s' % (data, exc_type.__name__, exc)
524                raise
525        except errors:
526            pass
527        else:
528            try:
529                exc_name = errors.__name__
530            except AttributeError:
531                exc_name = str(errors)
532            raise self.failureException('%s not raised' % exc_name)
533
534    def test_load_from_canned_string(self):
535        expected = self._testdata
536        for canned in DATA0, DATA1, DATA2:
537            got = self.loads(canned)
538            self.assert_is_copy(expected, got)
539
540    def test_garyp(self):
541        self.check_unpickling_error(self.error, 'garyp')
542
543    def test_maxint64(self):
544        maxint64 = (1L << 63) - 1
545        data = 'I' + str(maxint64) + '\n.'
546        got = self.loads(data)
547        self.assertEqual(got, maxint64)
548
549        # Try too with a bogus literal.
550        data = 'I' + str(maxint64) + 'JUNK\n.'
551        self.check_unpickling_error(ValueError, data)
552
553    def test_insecure_strings(self):
554        insecure = ["abc", "2 + 2", # not quoted
555                    #"'abc' + 'def'", # not a single quoted string
556                    "'abc", # quote is not closed
557                    "'abc\"", # open quote and close quote don't match
558                    "'abc'   ?", # junk after close quote
559                    "'\\'", # trailing backslash
560                    # issue #17710
561                    "'", '"',
562                    "' ", '" ',
563                    '\'"', '"\'',
564                    " ''", ' ""',
565                    ' ',
566                    # some tests of the quoting rules
567                    #"'abc\"\''",
568                    #"'\\\\a\'\'\'\\\'\\\\\''",
569                    ]
570        for s in insecure:
571            buf = "S" + s + "\n."
572            self.check_unpickling_error(ValueError, buf)
573
574    def test_correctly_quoted_string(self):
575        goodpickles = [("S''\n.", ''),
576                       ('S""\n.', ''),
577                       ('S"\\n"\n.', '\n'),
578                       ("S'\\n'\n.", '\n')]
579        for p, expected in goodpickles:
580            self.assertEqual(self.loads(p), expected)
581
582    def test_load_classic_instance(self):
583        # See issue5180.  Test loading 2.x pickles that
584        # contain an instance of old style class.
585        for X, args in [(C, ()), (D, ('x',)), (E, ())]:
586            xname = X.__name__.encode('ascii')
587            # Protocol 0 (text mode pickle):
588            """
589             0: (    MARK
590             1: i        INST       '__main__ X' (MARK at 0)
591            13: p    PUT        0
592            16: (    MARK
593            17: d        DICT       (MARK at 16)
594            18: p    PUT        1
595            21: b    BUILD
596            22: .    STOP
597            """
598            pickle0 = ("(i__main__\n"
599                       "X\n"
600                       "p0\n"
601                       "(dp1\nb.").replace('X', xname)
602            self.assert_is_copy(X(*args), self.loads(pickle0))
603
604            # Protocol 1 (binary mode pickle)
605            """
606             0: (    MARK
607             1: c        GLOBAL     '__main__ X'
608            13: q        BINPUT     0
609            15: o        OBJ        (MARK at 0)
610            16: q    BINPUT     1
611            18: }    EMPTY_DICT
612            19: q    BINPUT     2
613            21: b    BUILD
614            22: .    STOP
615            """
616            pickle1 = ('(c__main__\n'
617                       'X\n'
618                       'q\x00oq\x01}q\x02b.').replace('X', xname)
619            self.assert_is_copy(X(*args), self.loads(pickle1))
620
621            # Protocol 2 (pickle2 = '\x80\x02' + pickle1)
622            """
623             0: \x80 PROTO      2
624             2: (    MARK
625             3: c        GLOBAL     '__main__ X'
626            15: q        BINPUT     0
627            17: o        OBJ        (MARK at 2)
628            18: q    BINPUT     1
629            20: }    EMPTY_DICT
630            21: q    BINPUT     2
631            23: b    BUILD
632            24: .    STOP
633            """
634            pickle2 = ('\x80\x02(c__main__\n'
635                       'X\n'
636                       'q\x00oq\x01}q\x02b.').replace('X', xname)
637            self.assert_is_copy(X(*args), self.loads(pickle2))
638
639    def test_load_classic_instance_error(self):
640        # Issue #28925.
641        # Protocol 0 (text mode pickle):
642        """
643         0: (    MARK
644         1: i        INST       '__main__ I' (MARK at 0)
645        13: (    MARK
646        14: d        DICT       (MARK at 13)
647        15: b    BUILD
648        16: .    STOP
649        """
650        pickle0 = ('(i__main__\n'
651                   'I\n'
652                   '(db.')
653        self.assertRaises(MyErr, self.loads, pickle0)
654
655        # Protocol 1 (binary mode pickle)
656        """
657         0: (    MARK
658         1: c        GLOBAL     '__main__ I'
659        13: o        OBJ        (MARK at 0)
660        14: }    EMPTY_DICT
661        15: b    BUILD
662        16: .    STOP
663        """
664        pickle1 = ('(c__main__\n'
665                   'I\n'
666                   'o}b.')
667        self.assertRaises(MyErr, self.loads, pickle1)
668
669    def test_load_str(self):
670        # From Python 2: pickle.dumps('a\x00\xa0', protocol=0)
671        self.assertEqual(self.loads("S'a\\x00\\xa0'\n."), 'a\x00\xa0')
672        # From Python 2: pickle.dumps('a\x00\xa0', protocol=1)
673        self.assertEqual(self.loads('U\x03a\x00\xa0.'), 'a\x00\xa0')
674        # From Python 2: pickle.dumps('a\x00\xa0', protocol=2)
675        self.assertEqual(self.loads('\x80\x02U\x03a\x00\xa0.'), 'a\x00\xa0')
676
677    def test_load_unicode(self):
678        # From Python 2: pickle.dumps(u'π', protocol=0)
679        self.assertEqual(self.loads('V\\u03c0\n.'), u'π')
680        # From Python 2: pickle.dumps(u'π', protocol=1)
681        self.assertEqual(self.loads('X\x02\x00\x00\x00\xcf\x80.'), u'π')
682        # From Python 2: pickle.dumps(u'π', protocol=2)
683        self.assertEqual(self.loads('\x80\x02X\x02\x00\x00\x00\xcf\x80.'), u'π')
684
685    def test_constants(self):
686        self.assertIsNone(self.loads('N.'))
687        self.assertIs(self.loads('\x88.'), True)
688        self.assertIs(self.loads('\x89.'), False)
689        self.assertIs(self.loads('I01\n.'), True)
690        self.assertIs(self.loads('I00\n.'), False)
691
692    def test_misc_get(self):
693        self.check_unpickling_error(self.error, 'g0\np0\n')
694        self.check_unpickling_error(self.error, 'h\x00q\x00')
695
696    def test_get(self):
697        pickled = '((lp100000\ng100000\nt.'
698        unpickled = self.loads(pickled)
699        self.assertEqual(unpickled, ([],)*2)
700        self.assertIs(unpickled[0], unpickled[1])
701
702    def test_binget(self):
703        pickled = '(]q\xffh\xfft.'
704        unpickled = self.loads(pickled)
705        self.assertEqual(unpickled, ([],)*2)
706        self.assertIs(unpickled[0], unpickled[1])
707
708    def test_long_binget(self):
709        pickled = '(]r\x00\x00\x01\x00j\x00\x00\x01\x00t.'
710        unpickled = self.loads(pickled)
711        self.assertEqual(unpickled, ([],)*2)
712        self.assertIs(unpickled[0], unpickled[1])
713
714    def test_dup(self):
715        pickled = '((l2t.'
716        unpickled = self.loads(pickled)
717        self.assertEqual(unpickled, ([],)*2)
718        self.assertIs(unpickled[0], unpickled[1])
719
720    def test_bad_stack(self):
721        badpickles = [
722            '.',                        # STOP
723            '0',                        # POP
724            '1',                        # POP_MARK
725            '2',                        # DUP
726            # '(2',                     # PyUnpickler doesn't raise
727            'R',                        # REDUCE
728            ')R',
729            'a',                        # APPEND
730            'Na',
731            'b',                        # BUILD
732            'Nb',
733            'd',                        # DICT
734            'e',                        # APPENDS
735            # '(e',                     # PyUnpickler raises AttributeError
736            'i__builtin__\nlist\n',     # INST
737            'l',                        # LIST
738            'o',                        # OBJ
739            '(o',
740            'p1\n',                     # PUT
741            'q\x00',                    # BINPUT
742            'r\x00\x00\x00\x00',        # LONG_BINPUT
743            's',                        # SETITEM
744            'Ns',
745            'NNs',
746            't',                        # TUPLE
747            'u',                        # SETITEMS
748            # '(u',                     # PyUnpickler doesn't raise
749            '}(Nu',
750            '\x81',                     # NEWOBJ
751            ')\x81',
752            '\x85',                     # TUPLE1
753            '\x86',                     # TUPLE2
754            'N\x86',
755            '\x87',                     # TUPLE3
756            'N\x87',
757            'NN\x87',
758        ]
759        for p in badpickles:
760            self.check_unpickling_error(self.bad_stack_errors, p)
761
762    def test_bad_mark(self):
763        badpickles = [
764            # 'N(.',                      # STOP
765            'N(2',                      # DUP
766            'c__builtin__\nlist\n)(R',  # REDUCE
767            'c__builtin__\nlist\n()R',
768            ']N(a',                     # APPEND
769                                        # BUILD
770            'c__builtin__\nValueError\n)R}(b',
771            'c__builtin__\nValueError\n)R(}b',
772            '(Nd',                      # DICT
773            'N(p1\n',                   # PUT
774            'N(q\x00',                  # BINPUT
775            'N(r\x00\x00\x00\x00',      # LONG_BINPUT
776            '}NN(s',                    # SETITEM
777            '}N(Ns',
778            '}(NNs',
779            '}((u',                     # SETITEMS
780                                        # NEWOBJ
781            'c__builtin__\nlist\n)(\x81',
782            'c__builtin__\nlist\n()\x81',
783            'N(\x85',                   # TUPLE1
784            'NN(\x86',                  # TUPLE2
785            'N(N\x86',
786            'NNN(\x87',                 # TUPLE3
787            'NN(N\x87',
788            'N(NN\x87',
789        ]
790        for p in badpickles:
791            self.check_unpickling_error(self.bad_mark_errors, p)
792
793    def test_truncated_data(self):
794        self.check_unpickling_error(EOFError, '')
795        self.check_unpickling_error(EOFError, 'N')
796        badpickles = [
797            'F',                        # FLOAT
798            'F0.0',
799            'F0.00',
800            'G',                        # BINFLOAT
801            'G\x00\x00\x00\x00\x00\x00\x00',
802            'I',                        # INT
803            'I0',
804            'J',                        # BININT
805            'J\x00\x00\x00',
806            'K',                        # BININT1
807            'L',                        # LONG
808            'L0',
809            'L10',
810            'L0L',
811            'L10L',
812            'M',                        # BININT2
813            'M\x00',
814            # 'P',                        # PERSID
815            # 'Pabc',
816            'S',                        # STRING
817            "S'abc'",
818            'T',                        # BINSTRING
819            'T\x03\x00\x00',
820            'T\x03\x00\x00\x00',
821            'T\x03\x00\x00\x00ab',
822            'U',                        # SHORT_BINSTRING
823            'U\x03',
824            'U\x03ab',
825            'V',                        # UNICODE
826            'Vabc',
827            'X',                        # BINUNICODE
828            'X\x03\x00\x00',
829            'X\x03\x00\x00\x00',
830            'X\x03\x00\x00\x00ab',
831            '(c',                       # GLOBAL
832            '(c__builtin__',
833            '(c__builtin__\n',
834            '(c__builtin__\nlist',
835            'Ng',                       # GET
836            'Ng0',
837            '(i',                       # INST
838            '(i__builtin__',
839            '(i__builtin__\n',
840            '(i__builtin__\nlist',
841            'Nh',                       # BINGET
842            'Nj',                       # LONG_BINGET
843            'Nj\x00\x00\x00',
844            'Np',                       # PUT
845            'Np0',
846            'Nq',                       # BINPUT
847            'Nr',                       # LONG_BINPUT
848            'Nr\x00\x00\x00',
849            '\x80',                     # PROTO
850            '\x82',                     # EXT1
851            '\x83',                     # EXT2
852            '\x84\x01',
853            '\x84',                     # EXT4
854            '\x84\x01\x00\x00',
855            '\x8a',                     # LONG1
856            '\x8b',                     # LONG4
857            '\x8b\x00\x00\x00',
858        ]
859        for p in badpickles:
860            self.check_unpickling_error(self.truncated_errors, p)
861
862
863class AbstractPickleTests(unittest.TestCase):
864    # Subclass must define self.dumps, self.loads.
865
866    _testdata = AbstractUnpickleTests._testdata
867
868    def setUp(self):
869        pass
870
871    def test_misc(self):
872        # test various datatypes not tested by testdata
873        for proto in protocols:
874            x = myint(4)
875            s = self.dumps(x, proto)
876            y = self.loads(s)
877            self.assertEqual(x, y)
878
879            x = (1, ())
880            s = self.dumps(x, proto)
881            y = self.loads(s)
882            self.assertEqual(x, y)
883
884            x = initarg(1, x)
885            s = self.dumps(x, proto)
886            y = self.loads(s)
887            self.assertEqual(x, y)
888
889        # XXX test __reduce__ protocol?
890
891    def test_roundtrip_equality(self):
892        expected = self._testdata
893        for proto in protocols:
894            s = self.dumps(expected, proto)
895            got = self.loads(s)
896            self.assertEqual(expected, got)
897
898    # There are gratuitous differences between pickles produced by
899    # pickle and cPickle, largely because cPickle starts PUT indices at
900    # 1 and pickle starts them at 0.  See XXX comment in cPickle's put2() --
901    # there's a comment with an exclamation point there whose meaning
902    # is a mystery.  cPickle also suppresses PUT for objects with a refcount
903    # of 1.
904    def dont_test_disassembly(self):
905        from pickletools import dis
906
907        for proto, expected in (0, DATA0_DIS), (1, DATA1_DIS):
908            s = self.dumps(self._testdata, proto)
909            filelike = cStringIO.StringIO()
910            dis(s, out=filelike)
911            got = filelike.getvalue()
912            self.assertEqual(expected, got)
913
914    def test_recursive_list(self):
915        l = []
916        l.append(l)
917        for proto in protocols:
918            s = self.dumps(l, proto)
919            x = self.loads(s)
920            self.assertIsInstance(x, list)
921            self.assertEqual(len(x), 1)
922            self.assertIs(x[0], x)
923
924    def test_recursive_tuple_and_list(self):
925        t = ([],)
926        t[0].append(t)
927        for proto in protocols:
928            s = self.dumps(t, proto)
929            x = self.loads(s)
930            self.assertIsInstance(x, tuple)
931            self.assertEqual(len(x), 1)
932            self.assertIsInstance(x[0], list)
933            self.assertEqual(len(x[0]), 1)
934            self.assertIs(x[0][0], x)
935
936    def test_recursive_dict(self):
937        d = {}
938        d[1] = d
939        for proto in protocols:
940            s = self.dumps(d, proto)
941            x = self.loads(s)
942            self.assertIsInstance(x, dict)
943            self.assertEqual(x.keys(), [1])
944            self.assertIs(x[1], x)
945
946    def test_recursive_dict_key(self):
947        d = {}
948        k = K(d)
949        d[k] = 1
950        for proto in protocols:
951            s = self.dumps(d, proto)
952            x = self.loads(s)
953            self.assertIsInstance(x, dict)
954            self.assertEqual(len(x.keys()), 1)
955            self.assertIsInstance(x.keys()[0], K)
956            self.assertIs(x.keys()[0].value, x)
957
958    def test_recursive_list_subclass(self):
959        y = MyList()
960        y.append(y)
961        s = self.dumps(y, 2)
962        x = self.loads(s)
963        self.assertIsInstance(x, MyList)
964        self.assertEqual(len(x), 1)
965        self.assertIs(x[0], x)
966
967    def test_recursive_dict_subclass(self):
968        d = MyDict()
969        d[1] = d
970        s = self.dumps(d, 2)
971        x = self.loads(s)
972        self.assertIsInstance(x, MyDict)
973        self.assertEqual(x.keys(), [1])
974        self.assertIs(x[1], x)
975
976    def test_recursive_dict_subclass_key(self):
977        d = MyDict()
978        k = K(d)
979        d[k] = 1
980        s = self.dumps(d, 2)
981        x = self.loads(s)
982        self.assertIsInstance(x, MyDict)
983        self.assertEqual(len(x.keys()), 1)
984        self.assertIsInstance(x.keys()[0], K)
985        self.assertIs(x.keys()[0].value, x)
986
987    def test_recursive_inst(self):
988        i = C()
989        i.attr = i
990        for proto in protocols:
991            s = self.dumps(i, proto)
992            x = self.loads(s)
993            self.assertIsInstance(x, C)
994            self.assertEqual(dir(x), dir(i))
995            self.assertIs(x.attr, x)
996
997    def test_recursive_multi(self):
998        l = []
999        d = {1:l}
1000        i = C()
1001        i.attr = d
1002        l.append(i)
1003        for proto in protocols:
1004            s = self.dumps(l, proto)
1005            x = self.loads(s)
1006            self.assertIsInstance(x, list)
1007            self.assertEqual(len(x), 1)
1008            self.assertEqual(dir(x[0]), dir(i))
1009            self.assertEqual(x[0].attr.keys(), [1])
1010            self.assertTrue(x[0].attr[1] is x)
1011
1012    def check_recursive_collection_and_inst(self, factory):
1013        h = H()
1014        y = factory([h])
1015        h.attr = y
1016        for proto in protocols:
1017            s = self.dumps(y, proto)
1018            x = self.loads(s)
1019            self.assertIsInstance(x, type(y))
1020            self.assertEqual(len(x), 1)
1021            self.assertIsInstance(list(x)[0], H)
1022            self.assertIs(list(x)[0].attr, x)
1023
1024    def test_recursive_list_and_inst(self):
1025        self.check_recursive_collection_and_inst(list)
1026
1027    def test_recursive_tuple_and_inst(self):
1028        self.check_recursive_collection_and_inst(tuple)
1029
1030    def test_recursive_dict_and_inst(self):
1031        self.check_recursive_collection_and_inst(dict.fromkeys)
1032
1033    def test_recursive_set_and_inst(self):
1034        self.check_recursive_collection_and_inst(set)
1035
1036    def test_recursive_frozenset_and_inst(self):
1037        self.check_recursive_collection_and_inst(frozenset)
1038
1039    def test_recursive_list_subclass_and_inst(self):
1040        self.check_recursive_collection_and_inst(MyList)
1041
1042    def test_recursive_tuple_subclass_and_inst(self):
1043        self.check_recursive_collection_and_inst(MyTuple)
1044
1045    def test_recursive_dict_subclass_and_inst(self):
1046        self.check_recursive_collection_and_inst(MyDict.fromkeys)
1047
1048    if have_unicode:
1049        def test_unicode(self):
1050            endcases = [u'', u'<\\u>', u'<\\\u1234>', u'<\n>',
1051                        u'<\\>', u'<\\\U00012345>',
1052                        # surrogates
1053                        u'<\udc80>']
1054            for proto in protocols:
1055                for u in endcases:
1056                    p = self.dumps(u, proto)
1057                    u2 = self.loads(p)
1058                    self.assertEqual(u2, u)
1059
1060        def test_unicode_high_plane(self):
1061            t = u'\U00012345'
1062            for proto in protocols:
1063                p = self.dumps(t, proto)
1064                t2 = self.loads(p)
1065                self.assertEqual(t2, t)
1066
1067    def test_ints(self):
1068        import sys
1069        for proto in protocols:
1070            n = sys.maxint
1071            while n:
1072                for expected in (-n, n):
1073                    s = self.dumps(expected, proto)
1074                    n2 = self.loads(s)
1075                    self.assertEqual(expected, n2)
1076                n = n >> 1
1077
1078    def test_long(self):
1079        for proto in protocols:
1080            # 256 bytes is where LONG4 begins.
1081            for nbits in 1, 8, 8*254, 8*255, 8*256, 8*257:
1082                nbase = 1L << nbits
1083                for npos in nbase-1, nbase, nbase+1:
1084                    for n in npos, -npos:
1085                        pickle = self.dumps(n, proto)
1086                        got = self.loads(pickle)
1087                        self.assertEqual(n, got)
1088        # Try a monster.  This is quadratic-time in protos 0 & 1, so don't
1089        # bother with those.
1090        nbase = long("deadbeeffeedface", 16)
1091        nbase += nbase << 1000000
1092        for n in nbase, -nbase:
1093            p = self.dumps(n, 2)
1094            got = self.loads(p)
1095            self.assertEqual(n, got)
1096
1097    def test_float(self):
1098        test_values = [0.0, 4.94e-324, 1e-310, 7e-308, 6.626e-34, 0.1, 0.5,
1099                       3.14, 263.44582062374053, 6.022e23, 1e30]
1100        test_values = test_values + [-x for x in test_values]
1101        for proto in protocols:
1102            for value in test_values:
1103                pickle = self.dumps(value, proto)
1104                got = self.loads(pickle)
1105                self.assertEqual(value, got)
1106
1107    @run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
1108    def test_float_format(self):
1109        # make sure that floats are formatted locale independent
1110        self.assertEqual(self.dumps(1.2)[0:3], 'F1.')
1111
1112    def test_reduce(self):
1113        pass
1114
1115    def test_getinitargs(self):
1116        pass
1117
1118    def test_metaclass(self):
1119        a = use_metaclass()
1120        for proto in protocols:
1121            s = self.dumps(a, proto)
1122            b = self.loads(s)
1123            self.assertEqual(a.__class__, b.__class__)
1124
1125    def test_dynamic_class(self):
1126        a = create_dynamic_class("my_dynamic_class", (object,))
1127        copy_reg.pickle(pickling_metaclass, pickling_metaclass.__reduce__)
1128        for proto in protocols:
1129            s = self.dumps(a, proto)
1130            b = self.loads(s)
1131            self.assertEqual(a, b)
1132            self.assertIs(a.__class__, b.__class__)
1133
1134    def test_structseq(self):
1135        import time
1136        import os
1137
1138        t = time.localtime()
1139        for proto in protocols:
1140            s = self.dumps(t, proto)
1141            u = self.loads(s)
1142            self.assertEqual(t, u)
1143            if hasattr(os, "stat"):
1144                t = os.stat(os.curdir)
1145                s = self.dumps(t, proto)
1146                u = self.loads(s)
1147                self.assertEqual(t, u)
1148            if hasattr(os, "statvfs"):
1149                t = os.statvfs(os.curdir)
1150                s = self.dumps(t, proto)
1151                u = self.loads(s)
1152                self.assertEqual(t, u)
1153
1154    # Tests for protocol 2
1155
1156    def test_proto(self):
1157        build_none = pickle.NONE + pickle.STOP
1158        for proto in protocols:
1159            expected = build_none
1160            if proto >= 2:
1161                expected = pickle.PROTO + chr(proto) + expected
1162            p = self.dumps(None, proto)
1163            self.assertEqual(p, expected)
1164
1165        oob = protocols[-1] + 1     # a future protocol
1166        badpickle = pickle.PROTO + chr(oob) + build_none
1167        try:
1168            self.loads(badpickle)
1169        except ValueError, detail:
1170            self.assertTrue(str(detail).startswith(
1171                                            "unsupported pickle protocol"))
1172        else:
1173            self.fail("expected bad protocol number to raise ValueError")
1174
1175    def test_long1(self):
1176        x = 12345678910111213141516178920L
1177        for proto in protocols:
1178            s = self.dumps(x, proto)
1179            y = self.loads(s)
1180            self.assertEqual(x, y)
1181            self.assertEqual(opcode_in_pickle(pickle.LONG1, s), proto >= 2)
1182
1183    def test_long4(self):
1184        x = 12345678910111213141516178920L << (256*8)
1185        for proto in protocols:
1186            s = self.dumps(x, proto)
1187            y = self.loads(s)
1188            self.assertEqual(x, y)
1189            self.assertEqual(opcode_in_pickle(pickle.LONG4, s), proto >= 2)
1190
1191    def test_short_tuples(self):
1192        # Map (proto, len(tuple)) to expected opcode.
1193        expected_opcode = {(0, 0): pickle.TUPLE,
1194                           (0, 1): pickle.TUPLE,
1195                           (0, 2): pickle.TUPLE,
1196                           (0, 3): pickle.TUPLE,
1197                           (0, 4): pickle.TUPLE,
1198
1199                           (1, 0): pickle.EMPTY_TUPLE,
1200                           (1, 1): pickle.TUPLE,
1201                           (1, 2): pickle.TUPLE,
1202                           (1, 3): pickle.TUPLE,
1203                           (1, 4): pickle.TUPLE,
1204
1205                           (2, 0): pickle.EMPTY_TUPLE,
1206                           (2, 1): pickle.TUPLE1,
1207                           (2, 2): pickle.TUPLE2,
1208                           (2, 3): pickle.TUPLE3,
1209                           (2, 4): pickle.TUPLE,
1210                          }
1211        a = ()
1212        b = (1,)
1213        c = (1, 2)
1214        d = (1, 2, 3)
1215        e = (1, 2, 3, 4)
1216        for proto in protocols:
1217            for x in a, b, c, d, e:
1218                s = self.dumps(x, proto)
1219                y = self.loads(s)
1220                self.assertEqual(x, y, (proto, x, s, y))
1221                expected = expected_opcode[proto, len(x)]
1222                self.assertEqual(opcode_in_pickle(expected, s), True)
1223
1224    def test_singletons(self):
1225        # Map (proto, singleton) to expected opcode.
1226        expected_opcode = {(0, None): pickle.NONE,
1227                           (1, None): pickle.NONE,
1228                           (2, None): pickle.NONE,
1229
1230                           (0, True): pickle.INT,
1231                           (1, True): pickle.INT,
1232                           (2, True): pickle.NEWTRUE,
1233
1234                           (0, False): pickle.INT,
1235                           (1, False): pickle.INT,
1236                           (2, False): pickle.NEWFALSE,
1237                          }
1238        for proto in protocols:
1239            for x in None, False, True:
1240                s = self.dumps(x, proto)
1241                y = self.loads(s)
1242                self.assertTrue(x is y, (proto, x, s, y))
1243                expected = expected_opcode[proto, x]
1244                self.assertEqual(opcode_in_pickle(expected, s), True)
1245
1246    def test_newobj_tuple(self):
1247        x = MyTuple([1, 2, 3])
1248        x.foo = 42
1249        x.bar = "hello"
1250        for proto in protocols:
1251            s = self.dumps(x, proto)
1252            y = self.loads(s)
1253            self.assertEqual(tuple(x), tuple(y))
1254            self.assertEqual(x.__dict__, y.__dict__)
1255
1256    def test_newobj_list(self):
1257        x = MyList([1, 2, 3])
1258        x.foo = 42
1259        x.bar = "hello"
1260        for proto in protocols:
1261            s = self.dumps(x, proto)
1262            y = self.loads(s)
1263            self.assertEqual(list(x), list(y))
1264            self.assertEqual(x.__dict__, y.__dict__)
1265
1266    def test_newobj_generic(self):
1267        for proto in protocols:
1268            for C in myclasses:
1269                B = C.__base__
1270                x = C(C.sample)
1271                x.foo = 42
1272                s = self.dumps(x, proto)
1273                y = self.loads(s)
1274                detail = (proto, C, B, x, y, type(y))
1275                self.assertEqual(B(x), B(y), detail)
1276                self.assertEqual(x.__dict__, y.__dict__, detail)
1277
1278    def test_newobj_proxies(self):
1279        # NEWOBJ should use the __class__ rather than the raw type
1280        import weakref
1281        classes = myclasses[:]
1282        # Cannot create weakproxies to these classes
1283        for c in (MyInt, MyLong, MyStr, MyTuple):
1284            classes.remove(c)
1285        for proto in protocols:
1286            for C in classes:
1287                B = C.__base__
1288                x = C(C.sample)
1289                x.foo = 42
1290                p = weakref.proxy(x)
1291                s = self.dumps(p, proto)
1292                y = self.loads(s)
1293                self.assertEqual(type(y), type(x))  # rather than type(p)
1294                detail = (proto, C, B, x, y, type(y))
1295                self.assertEqual(B(x), B(y), detail)
1296                self.assertEqual(x.__dict__, y.__dict__, detail)
1297
1298    # Register a type with copy_reg, with extension code extcode.  Pickle
1299    # an object of that type.  Check that the resulting pickle uses opcode
1300    # (EXT[124]) under proto 2, and not in proto 1.
1301
1302    def produce_global_ext(self, extcode, opcode):
1303        e = ExtensionSaver(extcode)
1304        try:
1305            copy_reg.add_extension(__name__, "MyList", extcode)
1306            x = MyList([1, 2, 3])
1307            x.foo = 42
1308            x.bar = "hello"
1309
1310            # Dump using protocol 1 for comparison.
1311            s1 = self.dumps(x, 1)
1312            self.assertIn(__name__, s1)
1313            self.assertIn("MyList", s1)
1314            self.assertEqual(opcode_in_pickle(opcode, s1), False)
1315
1316            y = self.loads(s1)
1317            self.assertEqual(list(x), list(y))
1318            self.assertEqual(x.__dict__, y.__dict__)
1319
1320            # Dump using protocol 2 for test.
1321            s2 = self.dumps(x, 2)
1322            self.assertNotIn(__name__, s2)
1323            self.assertNotIn("MyList", s2)
1324            self.assertEqual(opcode_in_pickle(opcode, s2), True)
1325
1326            y = self.loads(s2)
1327            self.assertEqual(list(x), list(y))
1328            self.assertEqual(x.__dict__, y.__dict__)
1329
1330        finally:
1331            e.restore()
1332
1333    def test_global_ext1(self):
1334        self.produce_global_ext(0x00000001, pickle.EXT1)  # smallest EXT1 code
1335        self.produce_global_ext(0x000000ff, pickle.EXT1)  # largest EXT1 code
1336
1337    def test_global_ext2(self):
1338        self.produce_global_ext(0x00000100, pickle.EXT2)  # smallest EXT2 code
1339        self.produce_global_ext(0x0000ffff, pickle.EXT2)  # largest EXT2 code
1340        self.produce_global_ext(0x0000abcd, pickle.EXT2)  # check endianness
1341
1342    def test_global_ext4(self):
1343        self.produce_global_ext(0x00010000, pickle.EXT4)  # smallest EXT4 code
1344        self.produce_global_ext(0x7fffffff, pickle.EXT4)  # largest EXT4 code
1345        self.produce_global_ext(0x12abcdef, pickle.EXT4)  # check endianness
1346
1347    def test_list_chunking(self):
1348        n = 10  # too small to chunk
1349        x = range(n)
1350        for proto in protocols:
1351            s = self.dumps(x, proto)
1352            y = self.loads(s)
1353            self.assertEqual(x, y)
1354            num_appends = count_opcode(pickle.APPENDS, s)
1355            self.assertEqual(num_appends, proto > 0)
1356
1357        n = 2500  # expect at least two chunks when proto > 0
1358        x = range(n)
1359        for proto in protocols:
1360            s = self.dumps(x, proto)
1361            y = self.loads(s)
1362            self.assertEqual(x, y)
1363            num_appends = count_opcode(pickle.APPENDS, s)
1364            if proto == 0:
1365                self.assertEqual(num_appends, 0)
1366            else:
1367                self.assertTrue(num_appends >= 2)
1368
1369    def test_dict_chunking(self):
1370        n = 10  # too small to chunk
1371        x = dict.fromkeys(range(n))
1372        for proto in protocols:
1373            s = self.dumps(x, proto)
1374            y = self.loads(s)
1375            self.assertEqual(x, y)
1376            num_setitems = count_opcode(pickle.SETITEMS, s)
1377            self.assertEqual(num_setitems, proto > 0)
1378
1379        n = 2500  # expect at least two chunks when proto > 0
1380        x = dict.fromkeys(range(n))
1381        for proto in protocols:
1382            s = self.dumps(x, proto)
1383            y = self.loads(s)
1384            self.assertEqual(x, y)
1385            num_setitems = count_opcode(pickle.SETITEMS, s)
1386            if proto == 0:
1387                self.assertEqual(num_setitems, 0)
1388            else:
1389                self.assertTrue(num_setitems >= 2)
1390
1391    def test_simple_newobj(self):
1392        x = SimpleNewObj.__new__(SimpleNewObj, 0xface)  # avoid __init__
1393        x.abc = 666
1394        for proto in protocols:
1395            s = self.dumps(x, proto)
1396            if proto < 1:
1397                self.assertIn('\nI64206', s)  # INT
1398            else:
1399                self.assertIn('M\xce\xfa', s)  # BININT2
1400            self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s), proto >= 2)
1401            y = self.loads(s)   # will raise TypeError if __init__ called
1402            self.assertEqual(y.abc, 666)
1403            self.assertEqual(x.__dict__, y.__dict__)
1404
1405    def test_complex_newobj(self):
1406        x = ComplexNewObj.__new__(ComplexNewObj, 0xface)  # avoid __init__
1407        x.abc = 666
1408        for proto in protocols:
1409            s = self.dumps(x, proto)
1410            if proto < 1:
1411                self.assertIn('\nI64206', s)  # INT
1412            elif proto < 2:
1413                self.assertIn('M\xce\xfa', s)  # BININT2
1414            else:
1415                self.assertIn('U\x04FACE', s)  # SHORT_BINSTRING
1416            self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s), proto >= 2)
1417            y = self.loads(s)   # will raise TypeError if __init__ called
1418            self.assertEqual(y.abc, 666)
1419            self.assertEqual(x.__dict__, y.__dict__)
1420
1421    def test_newobj_list_slots(self):
1422        x = SlotList([1, 2, 3])
1423        x.foo = 42
1424        x.bar = "hello"
1425        s = self.dumps(x, 2)
1426        y = self.loads(s)
1427        self.assertEqual(list(x), list(y))
1428        self.assertEqual(x.__dict__, y.__dict__)
1429        self.assertEqual(x.foo, y.foo)
1430        self.assertEqual(x.bar, y.bar)
1431
1432    def test_reduce_overrides_default_reduce_ex(self):
1433        for proto in protocols:
1434            x = REX_one()
1435            self.assertEqual(x._reduce_called, 0)
1436            s = self.dumps(x, proto)
1437            self.assertEqual(x._reduce_called, 1)
1438            y = self.loads(s)
1439            self.assertEqual(y._reduce_called, 0)
1440
1441    def test_reduce_ex_called(self):
1442        for proto in protocols:
1443            x = REX_two()
1444            self.assertEqual(x._proto, None)
1445            s = self.dumps(x, proto)
1446            self.assertEqual(x._proto, proto)
1447            y = self.loads(s)
1448            self.assertEqual(y._proto, None)
1449
1450    def test_reduce_ex_overrides_reduce(self):
1451        for proto in protocols:
1452            x = REX_three()
1453            self.assertEqual(x._proto, None)
1454            s = self.dumps(x, proto)
1455            self.assertEqual(x._proto, proto)
1456            y = self.loads(s)
1457            self.assertEqual(y._proto, None)
1458
1459    def test_reduce_ex_calls_base(self):
1460        for proto in protocols:
1461            x = REX_four()
1462            self.assertEqual(x._proto, None)
1463            s = self.dumps(x, proto)
1464            self.assertEqual(x._proto, proto)
1465            y = self.loads(s)
1466            self.assertEqual(y._proto, proto)
1467
1468    def test_reduce_calls_base(self):
1469        for proto in protocols:
1470            x = REX_five()
1471            self.assertEqual(x._reduce_called, 0)
1472            s = self.dumps(x, proto)
1473            self.assertEqual(x._reduce_called, 1)
1474            y = self.loads(s)
1475            self.assertEqual(y._reduce_called, 1)
1476
1477    @no_tracing
1478    def test_bad_getattr(self):
1479        # Issue #3514: crash when there is an infinite loop in __getattr__
1480        x = BadGetattr()
1481        for proto in protocols:
1482            self.assertRaises(RuntimeError, self.dumps, x, proto)
1483
1484    def test_reduce_bad_iterator(self):
1485        # Issue4176: crash when 4th and 5th items of __reduce__()
1486        # are not iterators
1487        class C(object):
1488            def __reduce__(self):
1489                # 4th item is not an iterator
1490                return list, (), None, [], None
1491        class D(object):
1492            def __reduce__(self):
1493                # 5th item is not an iterator
1494                return dict, (), None, None, []
1495
1496        # Protocol 0 in Python implementation is less strict and also accepts
1497        # iterables.
1498        for proto in protocols:
1499            try:
1500                self.dumps(C(), proto)
1501            except (AttributeError, pickle.PicklingError, cPickle.PicklingError):
1502                pass
1503            try:
1504                self.dumps(D(), proto)
1505            except (AttributeError, pickle.PicklingError, cPickle.PicklingError):
1506                pass
1507
1508    def test_many_puts_and_gets(self):
1509        # Test that internal data structures correctly deal with lots of
1510        # puts/gets.
1511        keys = ("aaa" + str(i) for i in xrange(100))
1512        large_dict = dict((k, [4, 5, 6]) for k in keys)
1513        obj = [dict(large_dict), dict(large_dict), dict(large_dict)]
1514
1515        for proto in protocols:
1516            dumped = self.dumps(obj, proto)
1517            loaded = self.loads(dumped)
1518            self.assertEqual(loaded, obj,
1519                             "Failed protocol %d: %r != %r"
1520                             % (proto, obj, loaded))
1521
1522    def test_attribute_name_interning(self):
1523        # Test that attribute names of pickled objects are interned when
1524        # unpickling.
1525        for proto in protocols:
1526            x = C()
1527            x.foo = 42
1528            x.bar = "hello"
1529            s = self.dumps(x, proto)
1530            y = self.loads(s)
1531            x_keys = sorted(x.__dict__)
1532            y_keys = sorted(y.__dict__)
1533            for x_key, y_key in zip(x_keys, y_keys):
1534                self.assertIs(x_key, y_key)
1535
1536    def test_large_pickles(self):
1537        # Test the correctness of internal buffering routines when handling
1538        # large data.
1539        for proto in protocols:
1540            data = (1, min, 'xy' * (30 * 1024), len)
1541            dumped = self.dumps(data, proto)
1542            loaded = self.loads(dumped)
1543            self.assertEqual(len(loaded), len(data))
1544            self.assertEqual(loaded, data)
1545
1546    def _check_pickling_with_opcode(self, obj, opcode, proto):
1547        pickled = self.dumps(obj, proto)
1548        self.assertTrue(opcode_in_pickle(opcode, pickled))
1549        unpickled = self.loads(pickled)
1550        self.assertEqual(obj, unpickled)
1551
1552    def test_appends_on_non_lists(self):
1553        # Issue #17720
1554        obj = REX_six([1, 2, 3])
1555        for proto in protocols:
1556            if proto == 0:
1557                self._check_pickling_with_opcode(obj, pickle.APPEND, proto)
1558            else:
1559                self._check_pickling_with_opcode(obj, pickle.APPENDS, proto)
1560
1561    def test_setitems_on_non_dicts(self):
1562        obj = REX_seven({1: -1, 2: -2, 3: -3})
1563        for proto in protocols:
1564            if proto == 0:
1565                self._check_pickling_with_opcode(obj, pickle.SETITEM, proto)
1566            else:
1567                self._check_pickling_with_opcode(obj, pickle.SETITEMS, proto)
1568
1569
1570# Test classes for reduce_ex
1571
1572class REX_one(object):
1573    _reduce_called = 0
1574    def __reduce__(self):
1575        self._reduce_called = 1
1576        return REX_one, ()
1577    # No __reduce_ex__ here, but inheriting it from object
1578
1579class REX_two(object):
1580    _proto = None
1581    def __reduce_ex__(self, proto):
1582        self._proto = proto
1583        return REX_two, ()
1584    # No __reduce__ here, but inheriting it from object
1585
1586class REX_three(object):
1587    _proto = None
1588    def __reduce_ex__(self, proto):
1589        self._proto = proto
1590        return REX_two, ()
1591    def __reduce__(self):
1592        raise TestFailed, "This __reduce__ shouldn't be called"
1593
1594class REX_four(object):
1595    _proto = None
1596    def __reduce_ex__(self, proto):
1597        self._proto = proto
1598        return object.__reduce_ex__(self, proto)
1599    # Calling base class method should succeed
1600
1601class REX_five(object):
1602    _reduce_called = 0
1603    def __reduce__(self):
1604        self._reduce_called = 1
1605        return object.__reduce__(self)
1606    # This one used to fail with infinite recursion
1607
1608class REX_six(object):
1609    """This class is used to check the 4th argument (list iterator) of
1610    the reduce protocol.
1611    """
1612    def __init__(self, items=None):
1613        if items is None:
1614            items = []
1615        self.items = items
1616    def __eq__(self, other):
1617        return type(self) is type(other) and self.items == other.items
1618    __hash__ = None
1619    def append(self, item):
1620        self.items.append(item)
1621    def extend(self, items):
1622        for item in items:
1623            self.append(item)
1624    def __reduce__(self):
1625        return type(self), (), None, iter(self.items), None
1626
1627class REX_seven(object):
1628    """This class is used to check the 5th argument (dict iterator) of
1629    the reduce protocol.
1630    """
1631    def __init__(self, table=None):
1632        if table is None:
1633            table = {}
1634        self.table = table
1635    def __eq__(self, other):
1636        return type(self) is type(other) and self.table == other.table
1637    __hash__ = None
1638    def __setitem__(self, key, value):
1639        self.table[key] = value
1640    def __reduce__(self):
1641        return type(self), (), None, None, iter(self.table.items())
1642
1643# Test classes for newobj
1644
1645class MyInt(int):
1646    sample = 1
1647
1648class MyLong(long):
1649    sample = 1L
1650
1651class MyFloat(float):
1652    sample = 1.0
1653
1654class MyComplex(complex):
1655    sample = 1.0 + 0.0j
1656
1657class MyStr(str):
1658    sample = "hello"
1659
1660class MyUnicode(unicode):
1661    sample = u"hello \u1234"
1662
1663class MyTuple(tuple):
1664    sample = (1, 2, 3)
1665
1666class MyList(list):
1667    sample = [1, 2, 3]
1668
1669class MyDict(dict):
1670    sample = {"a": 1, "b": 2}
1671
1672myclasses = [MyInt, MyLong, MyFloat,
1673             MyComplex,
1674             MyStr, MyUnicode,
1675             MyTuple, MyList, MyDict]
1676
1677
1678class SlotList(MyList):
1679    __slots__ = ["foo"]
1680
1681class SimpleNewObj(int):
1682    def __init__(self, *args, **kwargs):
1683        # raise an error, to make sure this isn't called
1684        raise TypeError("SimpleNewObj.__init__() didn't expect to get called")
1685    def __eq__(self, other):
1686        return int(self) == int(other) and self.__dict__ == other.__dict__
1687    __hash__ = None
1688
1689class ComplexNewObj(SimpleNewObj):
1690    def __getnewargs__(self):
1691        return ('%X' % self, 16)
1692
1693class BadGetattr:
1694    def __getattr__(self, key):
1695        self.foo
1696
1697class AbstractPickleModuleTests(unittest.TestCase):
1698
1699    def test_dump_closed_file(self):
1700        import os
1701        f = open(TESTFN, "w")
1702        try:
1703            f.close()
1704            self.assertRaises(ValueError, self.module.dump, 123, f)
1705        finally:
1706            os.remove(TESTFN)
1707
1708    def test_load_closed_file(self):
1709        import os
1710        f = open(TESTFN, "w")
1711        try:
1712            f.close()
1713            self.assertRaises(ValueError, self.module.dump, 123, f)
1714        finally:
1715            os.remove(TESTFN)
1716
1717    def test_load_from_and_dump_to_file(self):
1718        stream = cStringIO.StringIO()
1719        data = [123, {}, 124]
1720        self.module.dump(data, stream)
1721        stream.seek(0)
1722        unpickled = self.module.load(stream)
1723        self.assertEqual(unpickled, data)
1724
1725    def test_highest_protocol(self):
1726        # Of course this needs to be changed when HIGHEST_PROTOCOL changes.
1727        self.assertEqual(self.module.HIGHEST_PROTOCOL, 2)
1728
1729    def test_callapi(self):
1730        f = cStringIO.StringIO()
1731        # With and without keyword arguments
1732        self.module.dump(123, f, -1)
1733        self.module.dump(123, file=f, protocol=-1)
1734        self.module.dumps(123, -1)
1735        self.module.dumps(123, protocol=-1)
1736        self.module.Pickler(f, -1)
1737        self.module.Pickler(f, protocol=-1)
1738
1739    def test_incomplete_input(self):
1740        s = StringIO.StringIO("X''.")
1741        self.assertRaises(EOFError, self.module.load, s)
1742
1743    def test_restricted(self):
1744        # issue7128: cPickle failed in restricted mode
1745        builtins = {self.module.__name__: self.module,
1746                    '__import__': __import__}
1747        d = {}
1748        teststr = "def f(): {0}.dumps(0)".format(self.module.__name__)
1749        exec teststr in {'__builtins__': builtins}, d
1750        d['f']()
1751
1752    def test_bad_input(self):
1753        # Test issue4298
1754        s = '\x58\0\0\0\x54'
1755        self.assertRaises(EOFError, self.module.loads, s)
1756
1757
1758class AbstractPersistentPicklerTests(unittest.TestCase):
1759
1760    # This class defines persistent_id() and persistent_load()
1761    # functions that should be used by the pickler.  All even integers
1762    # are pickled using persistent ids.
1763
1764    def persistent_id(self, object):
1765        if isinstance(object, int) and object % 2 == 0:
1766            self.id_count += 1
1767            return str(object)
1768        elif object == "test_false_value":
1769            self.false_count += 1
1770            return ""
1771        else:
1772            return None
1773
1774    def persistent_load(self, oid):
1775        if not oid:
1776            self.load_false_count += 1
1777            return "test_false_value"
1778        else:
1779            self.load_count += 1
1780            object = int(oid)
1781            assert object % 2 == 0
1782            return object
1783
1784    def test_persistence(self):
1785        L = range(10) + ["test_false_value"]
1786        for proto in protocols:
1787            self.id_count = 0
1788            self.false_count = 0
1789            self.load_false_count = 0
1790            self.load_count = 0
1791            self.assertEqual(self.loads(self.dumps(L, proto)), L)
1792            self.assertEqual(self.id_count, 5)
1793            self.assertEqual(self.false_count, 1)
1794            self.assertEqual(self.load_count, 5)
1795            self.assertEqual(self.load_false_count, 1)
1796
1797class AbstractPicklerUnpicklerObjectTests(unittest.TestCase):
1798
1799    pickler_class = None
1800    unpickler_class = None
1801
1802    def setUp(self):
1803        assert self.pickler_class
1804        assert self.unpickler_class
1805
1806    def test_clear_pickler_memo(self):
1807        # To test whether clear_memo() has any effect, we pickle an object,
1808        # then pickle it again without clearing the memo; the two serialized
1809        # forms should be different. If we clear_memo() and then pickle the
1810        # object again, the third serialized form should be identical to the
1811        # first one we obtained.
1812        data = ["abcdefg", "abcdefg", 44]
1813        f = cStringIO.StringIO()
1814        pickler = self.pickler_class(f)
1815
1816        pickler.dump(data)
1817        first_pickled = f.getvalue()
1818
1819        # Reset StringIO object.
1820        f.seek(0)
1821        f.truncate()
1822
1823        pickler.dump(data)
1824        second_pickled = f.getvalue()
1825
1826        # Reset the Pickler and StringIO objects.
1827        pickler.clear_memo()
1828        f.seek(0)
1829        f.truncate()
1830
1831        pickler.dump(data)
1832        third_pickled = f.getvalue()
1833
1834        self.assertNotEqual(first_pickled, second_pickled)
1835        self.assertEqual(first_pickled, third_pickled)
1836
1837    def test_priming_pickler_memo(self):
1838        # Verify that we can set the Pickler's memo attribute.
1839        data = ["abcdefg", "abcdefg", 44]
1840        f = cStringIO.StringIO()
1841        pickler = self.pickler_class(f)
1842
1843        pickler.dump(data)
1844        first_pickled = f.getvalue()
1845
1846        f = cStringIO.StringIO()
1847        primed = self.pickler_class(f)
1848        primed.memo = pickler.memo
1849
1850        primed.dump(data)
1851        primed_pickled = f.getvalue()
1852
1853        self.assertNotEqual(first_pickled, primed_pickled)
1854
1855    def test_priming_unpickler_memo(self):
1856        # Verify that we can set the Unpickler's memo attribute.
1857        data = ["abcdefg", "abcdefg", 44]
1858        f = cStringIO.StringIO()
1859        pickler = self.pickler_class(f)
1860
1861        pickler.dump(data)
1862        first_pickled = f.getvalue()
1863
1864        f = cStringIO.StringIO()
1865        primed = self.pickler_class(f)
1866        primed.memo = pickler.memo
1867
1868        primed.dump(data)
1869        primed_pickled = f.getvalue()
1870
1871        unpickler = self.unpickler_class(cStringIO.StringIO(first_pickled))
1872        unpickled_data1 = unpickler.load()
1873
1874        self.assertEqual(unpickled_data1, data)
1875
1876        primed = self.unpickler_class(cStringIO.StringIO(primed_pickled))
1877        primed.memo = unpickler.memo
1878        unpickled_data2 = primed.load()
1879
1880        primed.memo.clear()
1881
1882        self.assertEqual(unpickled_data2, data)
1883        self.assertTrue(unpickled_data2 is unpickled_data1)
1884
1885    def test_reusing_unpickler_objects(self):
1886        data1 = ["abcdefg", "abcdefg", 44]
1887        f = cStringIO.StringIO()
1888        pickler = self.pickler_class(f)
1889        pickler.dump(data1)
1890        pickled1 = f.getvalue()
1891
1892        data2 = ["abcdefg", 44, 44]
1893        f = cStringIO.StringIO()
1894        pickler = self.pickler_class(f)
1895        pickler.dump(data2)
1896        pickled2 = f.getvalue()
1897
1898        f = cStringIO.StringIO()
1899        f.write(pickled1)
1900        f.seek(0)
1901        unpickler = self.unpickler_class(f)
1902        self.assertEqual(unpickler.load(), data1)
1903
1904        f.seek(0)
1905        f.truncate()
1906        f.write(pickled2)
1907        f.seek(0)
1908        self.assertEqual(unpickler.load(), data2)
1909
1910    def _check_multiple_unpicklings(self, ioclass, seekable):
1911        for proto in protocols:
1912            data1 = [(x, str(x)) for x in xrange(2000)] + ["abcde", len]
1913            f = ioclass()
1914            pickler = self.pickler_class(f, protocol=proto)
1915            pickler.dump(data1)
1916            pickled = f.getvalue()
1917
1918            N = 5
1919            f = ioclass(pickled * N)
1920            unpickler = self.unpickler_class(f)
1921            for i in xrange(N):
1922                if seekable:
1923                    pos = f.tell()
1924                self.assertEqual(unpickler.load(), data1)
1925                if seekable:
1926                    self.assertEqual(f.tell(), pos + len(pickled))
1927            self.assertRaises(EOFError, unpickler.load)
1928
1929    def test_multiple_unpicklings_seekable(self):
1930        self._check_multiple_unpicklings(StringIO.StringIO, True)
1931
1932    def test_multiple_unpicklings_unseekable(self):
1933        self._check_multiple_unpicklings(UnseekableIO, False)
1934
1935    def test_unpickling_buffering_readline(self):
1936        # Issue #12687: the unpickler's buffering logic could fail with
1937        # text mode opcodes.
1938        import io
1939        data = list(xrange(10))
1940        for proto in protocols:
1941            for buf_size in xrange(1, 11):
1942                f = io.BufferedRandom(io.BytesIO(), buffer_size=buf_size)
1943                pickler = self.pickler_class(f, protocol=proto)
1944                pickler.dump(data)
1945                f.seek(0)
1946                unpickler = self.unpickler_class(f)
1947                self.assertEqual(unpickler.load(), data)
1948
1949
1950class BigmemPickleTests(unittest.TestCase):
1951
1952    # Memory requirements: 1 byte per character for input strings, 1 byte
1953    # for pickled data, 1 byte for unpickled strings, 1 byte for internal
1954    # buffer and 1 byte of free space for resizing of internal buffer.
1955
1956    @precisionbigmemtest(size=_2G + 100*_1M, memuse=5)
1957    def test_huge_strlist(self, size):
1958        chunksize = 2**20
1959        data = []
1960        while size > chunksize:
1961            data.append('x' * chunksize)
1962            size -= chunksize
1963            chunksize += 1
1964        data.append('y' * size)
1965
1966        try:
1967            for proto in protocols:
1968                try:
1969                    pickled = self.dumps(data, proto)
1970                    res = self.loads(pickled)
1971                    self.assertEqual(res, data)
1972                finally:
1973                    res = None
1974                    pickled = None
1975        finally:
1976            data = None
1977