1#!/usr/bin/python 2# Copyright (c) 2012 The Native Client Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""A library to assist automatically downloading files. 7 8This library is used by scripts that download tarballs, zipfiles, etc. as part 9of the build process. 10""" 11 12import hashlib 13import http_download 14import os.path 15import re 16import shutil 17import sys 18import time 19import urllib2 20 21SOURCE_STAMP = 'SOURCE_URL' 22HASH_STAMP = 'SOURCE_SHA1' 23 24 25# Designed to handle more general inputs than sys.platform because the platform 26# name may come from the command line. 27PLATFORM_COLLAPSE = { 28 'windows': 'windows', 29 'win32': 'windows', 30 'cygwin': 'windows', 31 'linux': 'linux', 32 'linux2': 'linux', 33 'linux3': 'linux', 34 'darwin': 'mac', 35 'mac': 'mac', 36} 37 38ARCH_COLLAPSE = { 39 'i386' : 'x86', 40 'i686' : 'x86', 41 'x86_64': 'x86', 42 'armv7l': 'arm', 43} 44 45 46class HashError(Exception): 47 def __init__(self, download_url, expected_hash, actual_hash): 48 self.download_url = download_url 49 self.expected_hash = expected_hash 50 self.actual_hash = actual_hash 51 52 def __str__(self): 53 return 'Got hash "%s" but expected hash "%s" for "%s"' % ( 54 self.actual_hash, self.expected_hash, self.download_url) 55 56 57def PlatformName(name=None): 58 if name is None: 59 name = sys.platform 60 return PLATFORM_COLLAPSE[name] 61 62def ArchName(name=None): 63 if name is None: 64 if PlatformName() == 'windows': 65 # TODO(pdox): Figure out how to auto-detect 32-bit vs 64-bit Windows. 66 name = 'i386' 67 else: 68 import platform 69 name = platform.machine() 70 return ARCH_COLLAPSE[name] 71 72def EnsureFileCanBeWritten(filename): 73 directory = os.path.dirname(filename) 74 if not os.path.exists(directory): 75 os.makedirs(directory) 76 77 78def WriteData(filename, data): 79 EnsureFileCanBeWritten(filename) 80 f = open(filename, 'wb') 81 f.write(data) 82 f.close() 83 84 85def WriteDataFromStream(filename, stream, chunk_size, verbose=True): 86 EnsureFileCanBeWritten(filename) 87 dst = open(filename, 'wb') 88 try: 89 while True: 90 data = stream.read(chunk_size) 91 if len(data) == 0: 92 break 93 dst.write(data) 94 if verbose: 95 # Indicate that we're still writing. 96 sys.stdout.write('.') 97 sys.stdout.flush() 98 finally: 99 if verbose: 100 sys.stdout.write('\n') 101 dst.close() 102 103 104def DoesStampMatch(stampfile, expected, index): 105 try: 106 f = open(stampfile, 'r') 107 stamp = f.read() 108 f.close() 109 if stamp.split('\n')[index] == expected: 110 return "already up-to-date." 111 elif stamp.startswith('manual'): 112 return "manual override." 113 return False 114 except IOError: 115 return False 116 117 118def WriteStamp(stampfile, data): 119 EnsureFileCanBeWritten(stampfile) 120 f = open(stampfile, 'w') 121 f.write(data) 122 f.close() 123 124 125def StampIsCurrent(path, stamp_name, stamp_contents, min_time=None, index=0): 126 stampfile = os.path.join(path, stamp_name) 127 128 # Check if the stampfile is older than the minimum last mod time 129 if min_time: 130 try: 131 stamp_time = os.stat(stampfile).st_mtime 132 if stamp_time <= min_time: 133 return False 134 except OSError: 135 return False 136 137 return DoesStampMatch(stampfile, stamp_contents, index) 138 139 140def WriteSourceStamp(path, url): 141 stampfile = os.path.join(path, SOURCE_STAMP) 142 WriteStamp(stampfile, url) 143 144def WriteHashStamp(path, hash_val): 145 hash_stampfile = os.path.join(path, HASH_STAMP) 146 WriteStamp(hash_stampfile, hash_val) 147 148 149def Retry(op, *args): 150 # Windows seems to be prone to having commands that delete files or 151 # directories fail. We currently do not have a complete understanding why, 152 # and as a workaround we simply retry the command a few times. 153 # It appears that file locks are hanging around longer than they should. This 154 # may be a secondary effect of processes hanging around longer than they 155 # should. This may be because when we kill a browser sel_ldr does not exit 156 # immediately, etc. 157 # Virus checkers can also accidently prevent files from being deleted, but 158 # that shouldn't be a problem on the bots. 159 if sys.platform in ('win32', 'cygwin'): 160 count = 0 161 while True: 162 try: 163 op(*args) 164 break 165 except Exception: 166 sys.stdout.write("FAILED: %s %s\n" % (op.__name__, repr(args))) 167 count += 1 168 if count < 5: 169 sys.stdout.write("RETRY: %s %s\n" % (op.__name__, repr(args))) 170 time.sleep(pow(2, count)) 171 else: 172 # Don't mask the exception. 173 raise 174 else: 175 op(*args) 176 177 178def MoveDirCleanly(src, dst): 179 RemoveDir(dst) 180 MoveDir(src, dst) 181 182 183def MoveDir(src, dst): 184 Retry(shutil.move, src, dst) 185 186 187def RemoveDir(path): 188 if os.path.exists(path): 189 Retry(shutil.rmtree, path) 190 191 192def RemoveFile(path): 193 if os.path.exists(path): 194 Retry(os.unlink, path) 195 196 197def _HashFileHandle(fh): 198 """sha1 of a file like object. 199 200 Arguments: 201 fh: file handle like object to hash. 202 Returns: 203 sha1 as a string. 204 """ 205 hasher = hashlib.sha1() 206 try: 207 while True: 208 data = fh.read(4096) 209 if not data: 210 break 211 hasher.update(data) 212 finally: 213 fh.close() 214 return hasher.hexdigest() 215 216 217def HashFile(filename): 218 """sha1 a file on disk. 219 220 Arguments: 221 filename: filename to hash. 222 Returns: 223 sha1 as a string. 224 """ 225 fh = open(filename, 'rb') 226 return _HashFileHandle(fh) 227 228 229def HashUrlByDownloading(url): 230 """sha1 the data at an url. 231 232 Arguments: 233 url: url to download from. 234 Returns: 235 sha1 of the data at the url. 236 """ 237 try: 238 fh = urllib2.urlopen(url) 239 except: 240 sys.stderr.write("Failed fetching URL: %s\n" % url) 241 raise 242 return _HashFileHandle(fh) 243 244 245# Attempts to get the SHA1 hash of a file given a URL by looking for 246# an adjacent file with a ".sha1hash" suffix. This saves having to 247# download a large tarball just to get its hash. Otherwise, we fall 248# back to downloading the main file. 249def HashUrl(url): 250 hash_url = '%s.sha1hash' % url 251 try: 252 fh = urllib2.urlopen(hash_url) 253 data = fh.read(100) 254 fh.close() 255 except urllib2.HTTPError, exn: 256 if exn.code == 404: 257 return HashUrlByDownloading(url) 258 raise 259 else: 260 if not re.match('[0-9a-f]{40}\n?$', data): 261 raise AssertionError('Bad SHA1 hash file: %r' % data) 262 return data.strip() 263 264 265def SyncURL(url, filename=None, stamp_dir=None, min_time=None, 266 hash_val=None, keep=False, verbose=False, stamp_index=0): 267 """Synchronize a destination file with a URL 268 269 if the URL does not match the URL stamp, then we must re-download it. 270 271 Arugments: 272 url: the url which will to compare against and download 273 filename: the file to create on download 274 path: the download path 275 stamp_dir: the filename containing the URL stamp to check against 276 hash_val: if set, the expected hash which must be matched 277 verbose: prints out status as it runs 278 stamp_index: index within the stamp file to check. 279 Returns: 280 True if the file is replaced 281 False if the file is not replaced 282 Exception: 283 HashError: if the hash does not match 284 """ 285 286 assert url and filename 287 288 # If we are not keeping the tarball, or we already have it, we can 289 # skip downloading it for this reason. If we are keeping it, 290 # it must exist. 291 if keep: 292 tarball_ok = os.path.isfile(filename) 293 else: 294 tarball_ok = True 295 296 # If we don't need the tarball and the stamp_file matches the url, then 297 # we must be up to date. If the URL differs but the recorded hash matches 298 # the one we'll insist the tarball has, then that's good enough too. 299 # TODO(mcgrathr): Download the .sha1sum file first to compare with 300 # the cached hash, in case --file-hash options weren't used. 301 if tarball_ok and stamp_dir is not None: 302 if StampIsCurrent(stamp_dir, SOURCE_STAMP, url, min_time): 303 if verbose: 304 print '%s is already up to date.' % filename 305 return False 306 if (hash_val is not None and 307 StampIsCurrent(stamp_dir, HASH_STAMP, hash_val, min_time, stamp_index)): 308 if verbose: 309 print '%s is identical to the up to date file.' % filename 310 return False 311 312 if verbose: 313 print 'Updating %s\n\tfrom %s.' % (filename, url) 314 EnsureFileCanBeWritten(filename) 315 http_download.HttpDownload(url, filename) 316 317 if hash_val: 318 tar_hash = HashFile(filename) 319 if hash_val != tar_hash: 320 raise HashError(actual_hash=tar_hash, expected_hash=hash_val, 321 download_url=url) 322 323 return True 324