1from test.test_support import verbose, TESTFN 2import random 3import os 4 5# From SF bug #422121: Insecurities in dict comparison. 6 7# Safety of code doing comparisons has been a historical Python weak spot. 8# The problem is that comparison of structures written in C *naturally* 9# wants to hold on to things like the size of the container, or "the 10# biggest" containee so far, across a traversal of the container; but 11# code to do containee comparisons can call back into Python and mutate 12# the container in arbitrary ways while the C loop is in midstream. If the 13# C code isn't extremely paranoid about digging things out of memory on 14# each trip, and artificially boosting refcounts for the duration, anything 15# from infinite loops to OS crashes can result (yes, I use Windows <wink>). 16# 17# The other problem is that code designed to provoke a weakness is usually 18# white-box code, and so catches only the particular vulnerabilities the 19# author knew to protect against. For example, Python's list.sort() code 20# went thru many iterations as one "new" vulnerability after another was 21# discovered. 22# 23# So the dict comparison test here uses a black-box approach instead, 24# generating dicts of various sizes at random, and performing random 25# mutations on them at random times. This proved very effective, 26# triggering at least six distinct failure modes the first 20 times I 27# ran it. Indeed, at the start, the driver never got beyond 6 iterations 28# before the test died. 29 30# The dicts are global to make it easy to mutate tham from within functions. 31dict1 = {} 32dict2 = {} 33 34# The current set of keys in dict1 and dict2. These are materialized as 35# lists to make it easy to pick a dict key at random. 36dict1keys = [] 37dict2keys = [] 38 39# Global flag telling maybe_mutate() whether to *consider* mutating. 40mutate = 0 41 42# If global mutate is true, consider mutating a dict. May or may not 43# mutate a dict even if mutate is true. If it does decide to mutate a 44# dict, it picks one of {dict1, dict2} at random, and deletes a random 45# entry from it; or, more rarely, adds a random element. 46 47def maybe_mutate(): 48 global mutate 49 if not mutate: 50 return 51 if random.random() < 0.5: 52 return 53 54 if random.random() < 0.5: 55 target, keys = dict1, dict1keys 56 else: 57 target, keys = dict2, dict2keys 58 59 if random.random() < 0.2: 60 # Insert a new key. 61 mutate = 0 # disable mutation until key inserted 62 while 1: 63 newkey = Horrid(random.randrange(100)) 64 if newkey not in target: 65 break 66 target[newkey] = Horrid(random.randrange(100)) 67 keys.append(newkey) 68 mutate = 1 69 70 elif keys: 71 # Delete a key at random. 72 mutate = 0 # disable mutation until key deleted 73 i = random.randrange(len(keys)) 74 key = keys[i] 75 del target[key] 76 del keys[i] 77 mutate = 1 78 79# A horrid class that triggers random mutations of dict1 and dict2 when 80# instances are compared. 81 82class Horrid: 83 def __init__(self, i): 84 # Comparison outcomes are determined by the value of i. 85 self.i = i 86 87 # An artificial hashcode is selected at random so that we don't 88 # have any systematic relationship between comparison outcomes 89 # (based on self.i and other.i) and relative position within the 90 # hash vector (based on hashcode). 91 self.hashcode = random.randrange(1000000000) 92 93 def __hash__(self): 94 return 42 95 return self.hashcode 96 97 def __cmp__(self, other): 98 maybe_mutate() # The point of the test. 99 return cmp(self.i, other.i) 100 101 def __eq__(self, other): 102 maybe_mutate() # The point of the test. 103 return self.i == other.i 104 105 def __repr__(self): 106 return "Horrid(%d)" % self.i 107 108# Fill dict d with numentries (Horrid(i), Horrid(j)) key-value pairs, 109# where i and j are selected at random from the candidates list. 110# Return d.keys() after filling. 111 112def fill_dict(d, candidates, numentries): 113 d.clear() 114 for i in xrange(numentries): 115 d[Horrid(random.choice(candidates))] = \ 116 Horrid(random.choice(candidates)) 117 return d.keys() 118 119# Test one pair of randomly generated dicts, each with n entries. 120# Note that dict comparison is trivial if they don't have the same number 121# of entires (then the "shorter" dict is instantly considered to be the 122# smaller one, without even looking at the entries). 123 124def test_one(n): 125 global mutate, dict1, dict2, dict1keys, dict2keys 126 127 # Fill the dicts without mutating them. 128 mutate = 0 129 dict1keys = fill_dict(dict1, range(n), n) 130 dict2keys = fill_dict(dict2, range(n), n) 131 132 # Enable mutation, then compare the dicts so long as they have the 133 # same size. 134 mutate = 1 135 if verbose: 136 print "trying w/ lengths", len(dict1), len(dict2), 137 while dict1 and len(dict1) == len(dict2): 138 if verbose: 139 print ".", 140 if random.random() < 0.5: 141 c = cmp(dict1, dict2) 142 else: 143 c = dict1 == dict2 144 if verbose: 145 print 146 147# Run test_one n times. At the start (before the bugs were fixed), 20 148# consecutive runs of this test each blew up on or before the sixth time 149# test_one was run. So n doesn't have to be large to get an interesting 150# test. 151# OTOH, calling with large n is also interesting, to ensure that the fixed 152# code doesn't hold on to refcounts *too* long (in which case memory would 153# leak). 154 155def test(n): 156 for i in xrange(n): 157 test_one(random.randrange(1, 100)) 158 159# See last comment block for clues about good values for n. 160test(100) 161 162########################################################################## 163# Another segfault bug, distilled by Michael Hudson from a c.l.py post. 164 165class Child: 166 def __init__(self, parent): 167 self.__dict__['parent'] = parent 168 def __getattr__(self, attr): 169 self.parent.a = 1 170 self.parent.b = 1 171 self.parent.c = 1 172 self.parent.d = 1 173 self.parent.e = 1 174 self.parent.f = 1 175 self.parent.g = 1 176 self.parent.h = 1 177 self.parent.i = 1 178 return getattr(self.parent, attr) 179 180class Parent: 181 def __init__(self): 182 self.a = Child(self) 183 184# Hard to say what this will print! May vary from time to time. But 185# we're specifically trying to test the tp_print slot here, and this is 186# the clearest way to do it. We print the result to a temp file so that 187# the expected-output file doesn't need to change. 188 189f = open(TESTFN, "w") 190print >> f, Parent().__dict__ 191f.close() 192os.unlink(TESTFN) 193 194########################################################################## 195# And another core-dumper from Michael Hudson. 196 197dict = {} 198 199# Force dict to malloc its table. 200for i in range(1, 10): 201 dict[i] = i 202 203f = open(TESTFN, "w") 204 205class Machiavelli: 206 def __repr__(self): 207 dict.clear() 208 209 # Michael sez: "doesn't crash without this. don't know why." 210 # Tim sez: "luck of the draw; crashes with or without for me." 211 print >> f 212 213 return repr("machiavelli") 214 215 def __hash__(self): 216 return 0 217 218dict[Machiavelli()] = Machiavelli() 219 220print >> f, str(dict) 221f.close() 222os.unlink(TESTFN) 223del f, dict 224 225 226########################################################################## 227# And another core-dumper from Michael Hudson. 228 229dict = {} 230 231# let's force dict to malloc its table 232for i in range(1, 10): 233 dict[i] = i 234 235class Machiavelli2: 236 def __eq__(self, other): 237 dict.clear() 238 return 1 239 240 def __hash__(self): 241 return 0 242 243dict[Machiavelli2()] = Machiavelli2() 244 245try: 246 dict[Machiavelli2()] 247except KeyError: 248 pass 249 250del dict 251 252########################################################################## 253# And another core-dumper from Michael Hudson. 254 255dict = {} 256 257# let's force dict to malloc its table 258for i in range(1, 10): 259 dict[i] = i 260 261class Machiavelli3: 262 def __init__(self, id): 263 self.id = id 264 265 def __eq__(self, other): 266 if self.id == other.id: 267 dict.clear() 268 return 1 269 else: 270 return 0 271 272 def __repr__(self): 273 return "%s(%s)"%(self.__class__.__name__, self.id) 274 275 def __hash__(self): 276 return 0 277 278dict[Machiavelli3(1)] = Machiavelli3(0) 279dict[Machiavelli3(2)] = Machiavelli3(0) 280 281f = open(TESTFN, "w") 282try: 283 try: 284 print >> f, dict[Machiavelli3(2)] 285 except KeyError: 286 pass 287finally: 288 f.close() 289 os.unlink(TESTFN) 290 291del dict 292del dict1, dict2, dict1keys, dict2keys 293