1# Copyright 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import BaseHTTPServer
6from collections import namedtuple
7import errno
8import gzip
9import mimetypes
10import os
11import SimpleHTTPServer
12import socket
13import SocketServer
14import StringIO
15import sys
16import urlparse
17
18from telemetry.core import local_server
19
20ByteRange = namedtuple('ByteRange', ['from_byte', 'to_byte'])
21ResourceAndRange = namedtuple('ResourceAndRange', ['resource', 'byte_range'])
22
23
24class MemoryCacheHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
25
26  protocol_version = 'HTTP/1.1'  # override BaseHTTPServer setting
27  wbufsize = -1  # override StreamRequestHandler (a base class) setting
28
29  def handle(self):
30    try:
31      BaseHTTPServer.BaseHTTPRequestHandler.handle(self)
32    except socket.error as e:
33      # Connection reset errors happen all the time due to the browser closing
34      # without terminating the connection properly.  They can be safely
35      # ignored.
36      if e[0] != errno.ECONNRESET:
37        raise
38
39  def do_GET(self):
40    """Serve a GET request."""
41    resource_range = self.SendHead()
42
43    if not resource_range or not resource_range.resource:
44      return
45    response = resource_range.resource['response']
46
47    if not resource_range.byte_range:
48      self.wfile.write(response)
49      return
50
51    start_index = resource_range.byte_range.from_byte
52    end_index = resource_range.byte_range.to_byte
53    self.wfile.write(response[start_index:end_index + 1])
54
55  def do_HEAD(self):
56    """Serve a HEAD request."""
57    self.SendHead()
58
59  def log_error(self, fmt, *args):
60    pass
61
62  def log_request(self, code='-', size='-'):
63    # Don't spam the console unless it is important.
64    pass
65
66  def SendHead(self):
67    path = os.path.realpath(self.translate_path(self.path))
68    if path not in self.server.resource_map:
69      self.send_error(404, 'File not found')
70      return None
71
72    resource = self.server.resource_map[path]
73    total_num_of_bytes = resource['content-length']
74    byte_range = self.GetByteRange(total_num_of_bytes)
75    if byte_range:
76      # request specified a range, so set response code to 206.
77      self.send_response(206)
78      self.send_header('Content-Range', 'bytes %d-%d/%d' %
79                       (byte_range.from_byte, byte_range.to_byte,
80                        total_num_of_bytes))
81      total_num_of_bytes = byte_range.to_byte - byte_range.from_byte + 1
82    else:
83      self.send_response(200)
84
85    self.send_header('Content-Length', str(total_num_of_bytes))
86    self.send_header('Content-Type', resource['content-type'])
87    self.send_header('Last-Modified',
88                     self.date_time_string(resource['last-modified']))
89    if resource['zipped']:
90      self.send_header('Content-Encoding', 'gzip')
91    self.end_headers()
92    return ResourceAndRange(resource, byte_range)
93
94  def GetByteRange(self, total_num_of_bytes):
95    """Parse the header and get the range values specified.
96
97    Args:
98      total_num_of_bytes: Total # of bytes in requested resource,
99      used to calculate upper range limit.
100    Returns:
101      A ByteRange namedtuple object with the requested byte-range values.
102      If no Range is explicitly requested or there is a failure parsing,
103      return None.
104      If range specified is in the format "N-", return N-END. Refer to
105      http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html for details.
106      If upper range limit is greater than total # of bytes, return upper index.
107    """
108
109    range_header = self.headers.getheader('Range')
110    if range_header is None:
111      return None
112    if not range_header.startswith('bytes='):
113      return None
114
115    # The range header is expected to be a string in this format:
116    # bytes=0-1
117    # Get the upper and lower limits of the specified byte-range.
118    # We've already confirmed that range_header starts with 'bytes='.
119    byte_range_values = range_header[len('bytes='):].split('-')
120    from_byte = 0
121    to_byte = 0
122
123    if len(byte_range_values) == 2:
124      # If to_range is not defined return all bytes starting from from_byte.
125      to_byte = (int(byte_range_values[1]) if byte_range_values[1] else
126                 total_num_of_bytes - 1)
127      # If from_range is not defined return last 'to_byte' bytes.
128      from_byte = (int(byte_range_values[0]) if byte_range_values[0] else
129                   total_num_of_bytes - to_byte)
130    else:
131      return None
132
133    # Do some validation.
134    if from_byte < 0:
135      return None
136
137    # Make to_byte the end byte by default in edge cases.
138    if to_byte < from_byte or to_byte >= total_num_of_bytes:
139      to_byte = total_num_of_bytes - 1
140
141    return ByteRange(from_byte, to_byte)
142
143
144class _MemoryCacheHTTPServerImpl(SocketServer.ThreadingMixIn,
145                                 BaseHTTPServer.HTTPServer):
146  # Increase the request queue size. The default value, 5, is set in
147  # SocketServer.TCPServer (the parent of BaseHTTPServer.HTTPServer).
148  # Since we're intercepting many domains through this single server,
149  # it is quite possible to get more than 5 concurrent requests.
150  request_queue_size = 128
151
152  # Don't prevent python from exiting when there is thread activity.
153  daemon_threads = True
154
155  def __init__(self, host_port, handler, paths):
156    BaseHTTPServer.HTTPServer.__init__(self, host_port, handler)
157    self.resource_map = {}
158    for path in paths:
159      if os.path.isdir(path):
160        self.AddDirectoryToResourceMap(path)
161      else:
162        self.AddFileToResourceMap(path)
163
164  def AddDirectoryToResourceMap(self, directory_path):
165    """Loads all files in directory_path into the in-memory resource map."""
166    for root, dirs, files in os.walk(directory_path):
167      # Skip hidden files and folders (like .svn and .git).
168      files = [f for f in files if f[0] != '.']
169      dirs[:] = [d for d in dirs if d[0] != '.']
170
171      for f in files:
172        file_path = os.path.join(root, f)
173        if not os.path.exists(file_path):  # Allow for '.#' files
174          continue
175        self.AddFileToResourceMap(file_path)
176
177  def AddFileToResourceMap(self, file_path):
178    """Loads file_path into the in-memory resource map."""
179    file_path = os.path.realpath(file_path)
180    if file_path in self.resource_map:
181      return
182
183    with open(file_path, 'rb') as fd:
184      response = fd.read()
185      fs = os.fstat(fd.fileno())
186    content_type = mimetypes.guess_type(file_path)[0]
187    zipped = False
188    if content_type in ['text/html', 'text/css', 'application/javascript']:
189      zipped = True
190      sio = StringIO.StringIO()
191      gzf = gzip.GzipFile(fileobj=sio, compresslevel=9, mode='wb')
192      gzf.write(response)
193      gzf.close()
194      response = sio.getvalue()
195      sio.close()
196    self.resource_map[file_path] = {
197        'content-type': content_type,
198        'content-length': len(response),
199        'last-modified': fs.st_mtime,
200        'response': response,
201        'zipped': zipped
202    }
203
204    index = 'index.html'
205    if os.path.basename(file_path) == index:
206      dir_path = os.path.dirname(file_path)
207      self.resource_map[dir_path] = self.resource_map[file_path]
208
209
210class MemoryCacheHTTPServerBackend(local_server.LocalServerBackend):
211
212  def __init__(self):
213    super(MemoryCacheHTTPServerBackend, self).__init__()
214    self._httpd = None
215
216  def StartAndGetNamedPorts(self, args):
217    base_dir = args['base_dir']
218    os.chdir(base_dir)
219
220    paths = args['paths']
221    for path in paths:
222      if not os.path.realpath(path).startswith(os.path.realpath(os.getcwd())):
223        print >> sys.stderr, '"%s" is not under the cwd.' % path
224        sys.exit(1)
225
226    server_address = (args['host'], args['port'])
227    MemoryCacheHTTPRequestHandler.protocol_version = 'HTTP/1.1'
228    self._httpd = _MemoryCacheHTTPServerImpl(
229        server_address, MemoryCacheHTTPRequestHandler, paths)
230    return [local_server.NamedPort('http', self._httpd.server_address[1])]
231
232  def ServeForever(self):
233    return self._httpd.serve_forever()
234
235
236class MemoryCacheHTTPServer(local_server.LocalServer):
237
238  def __init__(self, paths):
239    super(MemoryCacheHTTPServer, self).__init__(MemoryCacheHTTPServerBackend)
240    self._base_dir = None
241
242    for path in paths:
243      assert os.path.exists(path), '%s does not exist.' % path
244
245    paths = list(paths)
246    self._paths = paths
247
248    self._paths_as_set = set(map(os.path.realpath, paths))
249
250    common_prefix = os.path.commonprefix(paths)
251    if os.path.isdir(common_prefix):
252      self._base_dir = common_prefix
253    else:
254      self._base_dir = os.path.dirname(common_prefix)
255
256  def GetBackendStartupArgs(self):
257    return {'base_dir': self._base_dir,
258            'paths': self._paths,
259            'host': self.host_ip,
260            'port': 0}
261
262  @property
263  def paths(self):
264    return self._paths_as_set
265
266  @property
267  def url(self):
268    return self.forwarder.url
269
270  def UrlOf(self, path):
271    relative_path = os.path.relpath(path, self._base_dir)
272    # Preserve trailing slash or backslash.
273    # It doesn't matter in a file path, but it does matter in a URL.
274    if path.endswith(os.sep) or (os.altsep and path.endswith(os.altsep)):
275      relative_path += '/'
276    return urlparse.urljoin(self.url, relative_path.replace(os.sep, '/'))
277