1"""Compare the speed of downloading URLs sequentially vs. using futures.""" 2 3import functools 4import time 5import timeit 6import sys 7 8try: 9 from urllib2 import urlopen 10except ImportError: 11 from urllib.request import urlopen 12 13from concurrent.futures import (as_completed, ThreadPoolExecutor, 14 ProcessPoolExecutor) 15 16URLS = ['http://www.google.com/', 17 'http://www.apple.com/', 18 'http://www.ibm.com', 19 'http://www.thisurlprobablydoesnotexist.com', 20 'http://www.slashdot.org/', 21 'http://www.python.org/', 22 'http://www.bing.com/', 23 'http://www.facebook.com/', 24 'http://www.yahoo.com/', 25 'http://www.youtube.com/', 26 'http://www.blogger.com/'] 27 28def load_url(url, timeout): 29 kwargs = {'timeout': timeout} if sys.version_info >= (2, 6) else {} 30 return urlopen(url, **kwargs).read() 31 32def download_urls_sequential(urls, timeout=60): 33 url_to_content = {} 34 for url in urls: 35 try: 36 url_to_content[url] = load_url(url, timeout=timeout) 37 except: 38 pass 39 return url_to_content 40 41def download_urls_with_executor(urls, executor, timeout=60): 42 try: 43 url_to_content = {} 44 future_to_url = dict((executor.submit(load_url, url, timeout), url) 45 for url in urls) 46 47 for future in as_completed(future_to_url): 48 try: 49 url_to_content[future_to_url[future]] = future.result() 50 except: 51 pass 52 return url_to_content 53 finally: 54 executor.shutdown() 55 56def main(): 57 for name, fn in [('sequential', 58 functools.partial(download_urls_sequential, URLS)), 59 ('processes', 60 functools.partial(download_urls_with_executor, 61 URLS, 62 ProcessPoolExecutor(10))), 63 ('threads', 64 functools.partial(download_urls_with_executor, 65 URLS, 66 ThreadPoolExecutor(10)))]: 67 sys.stdout.write('%s: ' % name.ljust(12)) 68 start = time.time() 69 url_map = fn() 70 sys.stdout.write('%.2f seconds (%d of %d downloaded)\n' % 71 (time.time() - start, len(url_map), len(URLS))) 72 73if __name__ == '__main__': 74 main() 75