1#!/usr/bin/python 2# Copyright (c) 2012 The Native Client Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Download a file from a URL to a file on disk. 7 8This module supports username and password with basic authentication. 9""" 10 11import base64 12import os 13import os.path 14import sys 15import urllib2 16 17import download_utils 18 19 20def _CreateDirectory(path): 21 """Create a directory tree, ignore if it's already there.""" 22 try: 23 os.makedirs(path) 24 return True 25 except os.error: 26 return False 27 28 29def HttpDownload(url, target, username=None, password=None, verbose=True, 30 logger=None): 31 """Download a file from a remote server. 32 33 Args: 34 url: A URL to download from. 35 target: Filename to write download to. 36 username: Optional username for download. 37 password: Optional password for download (ignored if no username). 38 logger: Function to log events to. 39 """ 40 41 # Log to stdout by default. 42 if logger is None: 43 logger = sys.stdout.write 44 headers = [('Accept', '*/*')] 45 if username: 46 if password: 47 auth_code = base64.b64encode(username + ':' + password) 48 else: 49 auth_code = base64.b64encode(username) 50 headers.append(('Authorization', 'Basic ' + auth_code)) 51 if os.environ.get('http_proxy'): 52 proxy = os.environ.get('http_proxy') 53 proxy_handler = urllib2.ProxyHandler({ 54 'http': proxy, 55 'https': proxy}) 56 opener = urllib2.build_opener(proxy_handler) 57 else: 58 opener = urllib2.build_opener() 59 opener.addheaders = headers 60 urllib2.install_opener(opener) 61 _CreateDirectory(os.path.split(target)[0]) 62 # Retry up to 10 times (appengine logger is flaky). 63 for i in xrange(10): 64 if i: 65 logger('Download failed on %s, retrying... (%d)\n' % (url, i)) 66 try: 67 # 30 second timeout to ensure we fail and retry on stalled connections. 68 src = urllib2.urlopen(url, timeout=30) 69 try: 70 download_utils.WriteDataFromStream(target, src, chunk_size=2**20, 71 verbose=verbose) 72 content_len = src.headers.get('Content-Length') 73 if content_len: 74 content_len = int(content_len) 75 file_size = os.path.getsize(target) 76 if content_len != file_size: 77 logger('Filesize:%d does not match Content-Length:%d' % ( 78 file_size, content_len)) 79 continue 80 finally: 81 src.close() 82 break 83 except urllib2.HTTPError, e: 84 if e.code == 404: 85 logger('Resource does not exist.\n') 86 raise 87 logger('Failed to open.\n') 88 except urllib2.URLError: 89 logger('Failed mid stream.\n') 90 else: 91 logger('Download failed on %s, giving up.\n' % url) 92 raise 93