1# Copyright 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import BaseHTTPServer 6from collections import namedtuple 7import errno 8import gzip 9import mimetypes 10import os 11import SimpleHTTPServer 12import socket 13import SocketServer 14import StringIO 15import sys 16import urlparse 17 18from telemetry.core import local_server 19 20ByteRange = namedtuple('ByteRange', ['from_byte', 'to_byte']) 21ResourceAndRange = namedtuple('ResourceAndRange', ['resource', 'byte_range']) 22 23 24class MemoryCacheHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): 25 26 protocol_version = 'HTTP/1.1' # override BaseHTTPServer setting 27 wbufsize = -1 # override StreamRequestHandler (a base class) setting 28 29 def handle(self): 30 try: 31 BaseHTTPServer.BaseHTTPRequestHandler.handle(self) 32 except socket.error as e: 33 # Connection reset errors happen all the time due to the browser closing 34 # without terminating the connection properly. They can be safely 35 # ignored. 36 if e[0] != errno.ECONNRESET: 37 raise 38 39 def do_GET(self): 40 """Serve a GET request.""" 41 resource_range = self.SendHead() 42 43 if not resource_range or not resource_range.resource: 44 return 45 response = resource_range.resource['response'] 46 47 if not resource_range.byte_range: 48 self.wfile.write(response) 49 return 50 51 start_index = resource_range.byte_range.from_byte 52 end_index = resource_range.byte_range.to_byte 53 self.wfile.write(response[start_index:end_index + 1]) 54 55 def do_HEAD(self): 56 """Serve a HEAD request.""" 57 self.SendHead() 58 59 def log_error(self, fmt, *args): 60 pass 61 62 def log_request(self, code='-', size='-'): 63 # Don't spam the console unless it is important. 64 pass 65 66 def SendHead(self): 67 path = os.path.realpath(self.translate_path(self.path)) 68 if path not in self.server.resource_map: 69 self.send_error(404, 'File not found') 70 return None 71 72 resource = self.server.resource_map[path] 73 total_num_of_bytes = resource['content-length'] 74 byte_range = self.GetByteRange(total_num_of_bytes) 75 if byte_range: 76 # request specified a range, so set response code to 206. 77 self.send_response(206) 78 self.send_header('Content-Range', 'bytes %d-%d/%d' % 79 (byte_range.from_byte, byte_range.to_byte, 80 total_num_of_bytes)) 81 total_num_of_bytes = byte_range.to_byte - byte_range.from_byte + 1 82 else: 83 self.send_response(200) 84 85 self.send_header('Content-Length', str(total_num_of_bytes)) 86 self.send_header('Content-Type', resource['content-type']) 87 self.send_header('Last-Modified', 88 self.date_time_string(resource['last-modified'])) 89 if resource['zipped']: 90 self.send_header('Content-Encoding', 'gzip') 91 self.end_headers() 92 return ResourceAndRange(resource, byte_range) 93 94 def GetByteRange(self, total_num_of_bytes): 95 """Parse the header and get the range values specified. 96 97 Args: 98 total_num_of_bytes: Total # of bytes in requested resource, 99 used to calculate upper range limit. 100 Returns: 101 A ByteRange namedtuple object with the requested byte-range values. 102 If no Range is explicitly requested or there is a failure parsing, 103 return None. 104 If range specified is in the format "N-", return N-END. Refer to 105 http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html for details. 106 If upper range limit is greater than total # of bytes, return upper index. 107 """ 108 109 range_header = self.headers.getheader('Range') 110 if range_header is None: 111 return None 112 if not range_header.startswith('bytes='): 113 return None 114 115 # The range header is expected to be a string in this format: 116 # bytes=0-1 117 # Get the upper and lower limits of the specified byte-range. 118 # We've already confirmed that range_header starts with 'bytes='. 119 byte_range_values = range_header[len('bytes='):].split('-') 120 from_byte = 0 121 to_byte = 0 122 123 if len(byte_range_values) == 2: 124 # If to_range is not defined return all bytes starting from from_byte. 125 to_byte = (int(byte_range_values[1]) if byte_range_values[1] else 126 total_num_of_bytes - 1) 127 # If from_range is not defined return last 'to_byte' bytes. 128 from_byte = (int(byte_range_values[0]) if byte_range_values[0] else 129 total_num_of_bytes - to_byte) 130 else: 131 return None 132 133 # Do some validation. 134 if from_byte < 0: 135 return None 136 137 # Make to_byte the end byte by default in edge cases. 138 if to_byte < from_byte or to_byte >= total_num_of_bytes: 139 to_byte = total_num_of_bytes - 1 140 141 return ByteRange(from_byte, to_byte) 142 143 144class _MemoryCacheHTTPServerImpl(SocketServer.ThreadingMixIn, 145 BaseHTTPServer.HTTPServer): 146 # Increase the request queue size. The default value, 5, is set in 147 # SocketServer.TCPServer (the parent of BaseHTTPServer.HTTPServer). 148 # Since we're intercepting many domains through this single server, 149 # it is quite possible to get more than 5 concurrent requests. 150 request_queue_size = 128 151 152 # Don't prevent python from exiting when there is thread activity. 153 daemon_threads = True 154 155 def __init__(self, host_port, handler, paths): 156 BaseHTTPServer.HTTPServer.__init__(self, host_port, handler) 157 self.resource_map = {} 158 for path in paths: 159 if os.path.isdir(path): 160 self.AddDirectoryToResourceMap(path) 161 else: 162 self.AddFileToResourceMap(path) 163 164 def AddDirectoryToResourceMap(self, directory_path): 165 """Loads all files in directory_path into the in-memory resource map.""" 166 for root, dirs, files in os.walk(directory_path): 167 # Skip hidden files and folders (like .svn and .git). 168 files = [f for f in files if f[0] != '.'] 169 dirs[:] = [d for d in dirs if d[0] != '.'] 170 171 for f in files: 172 file_path = os.path.join(root, f) 173 if not os.path.exists(file_path): # Allow for '.#' files 174 continue 175 self.AddFileToResourceMap(file_path) 176 177 def AddFileToResourceMap(self, file_path): 178 """Loads file_path into the in-memory resource map.""" 179 file_path = os.path.realpath(file_path) 180 if file_path in self.resource_map: 181 return 182 183 with open(file_path, 'rb') as fd: 184 response = fd.read() 185 fs = os.fstat(fd.fileno()) 186 content_type = mimetypes.guess_type(file_path)[0] 187 zipped = False 188 if content_type in ['text/html', 'text/css', 'application/javascript']: 189 zipped = True 190 sio = StringIO.StringIO() 191 gzf = gzip.GzipFile(fileobj=sio, compresslevel=9, mode='wb') 192 gzf.write(response) 193 gzf.close() 194 response = sio.getvalue() 195 sio.close() 196 self.resource_map[file_path] = { 197 'content-type': content_type, 198 'content-length': len(response), 199 'last-modified': fs.st_mtime, 200 'response': response, 201 'zipped': zipped 202 } 203 204 index = 'index.html' 205 if os.path.basename(file_path) == index: 206 dir_path = os.path.dirname(file_path) 207 self.resource_map[dir_path] = self.resource_map[file_path] 208 209 210class MemoryCacheHTTPServerBackend(local_server.LocalServerBackend): 211 212 def __init__(self): 213 super(MemoryCacheHTTPServerBackend, self).__init__() 214 self._httpd = None 215 216 def StartAndGetNamedPorts(self, args): 217 base_dir = args['base_dir'] 218 os.chdir(base_dir) 219 220 paths = args['paths'] 221 for path in paths: 222 if not os.path.realpath(path).startswith(os.path.realpath(os.getcwd())): 223 print >> sys.stderr, '"%s" is not under the cwd.' % path 224 sys.exit(1) 225 226 server_address = (args['host'], args['port']) 227 MemoryCacheHTTPRequestHandler.protocol_version = 'HTTP/1.1' 228 self._httpd = _MemoryCacheHTTPServerImpl( 229 server_address, MemoryCacheHTTPRequestHandler, paths) 230 return [local_server.NamedPort('http', self._httpd.server_address[1])] 231 232 def ServeForever(self): 233 return self._httpd.serve_forever() 234 235 236class MemoryCacheHTTPServer(local_server.LocalServer): 237 238 def __init__(self, paths): 239 super(MemoryCacheHTTPServer, self).__init__(MemoryCacheHTTPServerBackend) 240 self._base_dir = None 241 242 for path in paths: 243 assert os.path.exists(path), '%s does not exist.' % path 244 245 paths = list(paths) 246 self._paths = paths 247 248 self._paths_as_set = set(map(os.path.realpath, paths)) 249 250 common_prefix = os.path.commonprefix(paths) 251 if os.path.isdir(common_prefix): 252 self._base_dir = common_prefix 253 else: 254 self._base_dir = os.path.dirname(common_prefix) 255 256 def GetBackendStartupArgs(self): 257 return {'base_dir': self._base_dir, 258 'paths': self._paths, 259 'host': self.host_ip, 260 'port': 0} 261 262 @property 263 def paths(self): 264 return self._paths_as_set 265 266 @property 267 def url(self): 268 return self.forwarder.url 269 270 def UrlOf(self, path): 271 relative_path = os.path.relpath(path, self._base_dir) 272 # Preserve trailing slash or backslash. 273 # It doesn't matter in a file path, but it does matter in a URL. 274 if path.endswith(os.sep) or (os.altsep and path.endswith(os.altsep)): 275 relative_path += '/' 276 return urlparse.urljoin(self.url, relative_path.replace(os.sep, '/')) 277