1#!/usr/bin/env python
2# Copyright 2010 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Replays web pages under simulated network conditions.
17
18Must be run as administrator (sudo).
19
20To record web pages:
21  1. Start the program in record mode.
22     $ sudo ./replay.py --record archive.wpr
23  2. Load the web pages you want to record in a web browser. It is important to
24     clear browser caches before this so that all subresources are requested
25     from the network.
26  3. Kill the process to stop recording.
27
28To replay web pages:
29  1. Start the program in replay mode with a previously recorded archive.
30     $ sudo ./replay.py archive.wpr
31  2. Load recorded pages in a web browser. A 404 will be served for any pages or
32     resources not in the recorded archive.
33
34Network simulation examples:
35  # 128KByte/s uplink bandwidth, 4Mbps/s downlink bandwidth with 100ms RTT time
36  $ sudo ./replay.py --up 128KByte/s --down 4Mbit/s --delay_ms=100 archive.wpr
37
38  # 1% packet loss rate
39  $ sudo ./replay.py --packet_loss_rate=0.01 archive.wpr
40"""
41
42import json
43import logging
44import optparse
45import os
46import socket
47import sys
48import traceback
49
50import customhandlers
51import dnsproxy
52import httparchive
53import httpclient
54import httpproxy
55import net_configs
56import platformsettings
57import rules_parser
58import script_injector
59import servermanager
60import trafficshaper
61
62if sys.version < '2.6':
63  print 'Need Python 2.6 or greater.'
64  sys.exit(1)
65
66
67def configure_logging(log_level_name, log_file_name=None):
68  """Configure logging level and format.
69
70  Args:
71    log_level_name: 'debug', 'info', 'warning', 'error', or 'critical'.
72    log_file_name: a file name
73  """
74  if logging.root.handlers:
75    logging.critical('A logging method (e.g. "logging.warn(...)")'
76                     ' was called before logging was configured.')
77  log_level = getattr(logging, log_level_name.upper())
78  log_format = (
79    '(%(levelname)s) %(asctime)s %(module)s.%(funcName)s:%(lineno)d  '
80    '%(message)s')
81
82
83  logging.basicConfig(level=log_level, format=log_format)
84  logger = logging.getLogger()
85  if log_file_name:
86    fh = logging.FileHandler(log_file_name)
87    fh.setLevel(log_level)
88    fh.setFormatter(logging.Formatter(log_format))
89    logger.addHandler(fh)
90  system_handler = platformsettings.get_system_logging_handler()
91  if system_handler:
92    logger.addHandler(system_handler)
93
94
95def AddDnsForward(server_manager, host):
96  """Forward DNS traffic."""
97  server_manager.Append(platformsettings.set_temporary_primary_nameserver, host)
98
99
100def AddDnsProxy(server_manager, options, host, port, real_dns_lookup,
101                http_archive):
102  dns_filters = []
103  if options.dns_private_passthrough:
104    private_filter = dnsproxy.PrivateIpFilter(real_dns_lookup, http_archive)
105    dns_filters.append(private_filter)
106    server_manager.AppendRecordCallback(private_filter.InitializeArchiveHosts)
107    server_manager.AppendReplayCallback(private_filter.InitializeArchiveHosts)
108  if options.shaping_dns:
109    delay_filter = dnsproxy.DelayFilter(options.record, **options.shaping_dns)
110    dns_filters.append(delay_filter)
111    server_manager.AppendRecordCallback(delay_filter.SetRecordMode)
112    server_manager.AppendReplayCallback(delay_filter.SetReplayMode)
113  server_manager.Append(dnsproxy.DnsProxyServer, host, port,
114                        dns_lookup=dnsproxy.ReplayDnsLookup(host, dns_filters))
115
116
117def AddWebProxy(server_manager, options, host, real_dns_lookup, http_archive):
118  if options.rules_path:
119    with open(options.rules_path) as file_obj:
120      allowed_imports = [
121          name.strip() for name in options.allowed_rule_imports.split(',')]
122      rules = rules_parser.Rules(file_obj, allowed_imports)
123    logging.info('Parsed %s rules:\n%s', options.rules_path, rules)
124  else:
125    rules = rules_parser.Rules()
126  inject_script = script_injector.GetInjectScript(options.inject_scripts)
127  custom_handlers = customhandlers.CustomHandlers(options, http_archive)
128  custom_handlers.add_server_manager_handler(server_manager)
129  archive_fetch = httpclient.ControllableHttpArchiveFetch(
130      http_archive, real_dns_lookup,
131      inject_script,
132      options.diff_unknown_requests, options.record,
133      use_closest_match=options.use_closest_match,
134      scramble_images=options.scramble_images)
135  server_manager.AppendRecordCallback(archive_fetch.SetRecordMode)
136  server_manager.AppendReplayCallback(archive_fetch.SetReplayMode)
137  server_manager.Append(
138      httpproxy.HttpProxyServer,
139      archive_fetch, custom_handlers, rules,
140      host=host, port=options.port, use_delays=options.use_server_delay,
141      **options.shaping_http)
142  if options.ssl:
143    if options.should_generate_certs:
144      server_manager.Append(
145          httpproxy.HttpsProxyServer, archive_fetch, custom_handlers, rules,
146          options.https_root_ca_cert_path, host=host, port=options.ssl_port,
147          use_delays=options.use_server_delay, **options.shaping_http)
148    else:
149      server_manager.Append(
150          httpproxy.SingleCertHttpsProxyServer, archive_fetch,
151          custom_handlers, rules, options.https_root_ca_cert_path, host=host,
152          port=options.ssl_port, use_delays=options.use_server_delay,
153          **options.shaping_http)
154  if options.http_to_https_port:
155    server_manager.Append(
156        httpproxy.HttpToHttpsProxyServer,
157        archive_fetch, custom_handlers, rules,
158        host=host, port=options.http_to_https_port,
159        use_delays=options.use_server_delay,
160        **options.shaping_http)
161
162
163def AddTrafficShaper(server_manager, options, host):
164  if options.shaping_dummynet:
165    server_manager.AppendTrafficShaper(
166        trafficshaper.TrafficShaper, host=host,
167        use_loopback=not options.server_mode and host == '127.0.0.1',
168        **options.shaping_dummynet)
169
170
171class OptionsWrapper(object):
172  """Add checks, updates, and methods to option values.
173
174  Example:
175    options, args = option_parser.parse_args()
176    options = OptionsWrapper(options, option_parser)  # run checks and updates
177    if options.record and options.HasTrafficShaping():
178       [...]
179  """
180  _TRAFFICSHAPING_OPTIONS = {
181      'down', 'up', 'delay_ms', 'packet_loss_rate', 'init_cwnd', 'net'}
182  _CONFLICTING_OPTIONS = (
183      ('record', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
184                  'spdy', 'use_server_delay')),
185      ('append', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
186                  'use_server_delay')),  # same as --record
187      ('net', ('down', 'up', 'delay_ms')),
188      ('server', ('server_mode',)),
189  )
190
191  def __init__(self, options, parser):
192    self._options = options
193    self._parser = parser
194    self._nondefaults = set([
195        name for name, value in parser.defaults.items()
196        if getattr(options, name) != value])
197    self._CheckConflicts()
198    self._CheckValidIp('host')
199    self._CheckFeatureSupport()
200    self._MassageValues()
201
202  def _CheckConflicts(self):
203    """Give an error if mutually exclusive options are used."""
204    for option, bad_options in self._CONFLICTING_OPTIONS:
205      if option in self._nondefaults:
206        for bad_option in bad_options:
207          if bad_option in self._nondefaults:
208            self._parser.error('Option --%s cannot be used with --%s.' %
209                                (bad_option, option))
210
211  def _CheckValidIp(self, name):
212    """Give an error if option |name| is not a valid IPv4 address."""
213    value = getattr(self._options, name)
214    if value:
215      try:
216        socket.inet_aton(value)
217      except Exception:
218        self._parser.error('Option --%s must be a valid IPv4 address.' % name)
219
220  def _CheckFeatureSupport(self):
221    if (self._options.should_generate_certs and
222        not platformsettings.HasSniSupport()):
223      self._parser.error('Option --should_generate_certs requires pyOpenSSL '
224                         '0.13 or greater for SNI support.')
225
226  def _ShapingKeywordArgs(self, shaping_key):
227    """Return the shaping keyword args for |shaping_key|.
228
229    Args:
230      shaping_key: one of 'dummynet', 'dns', 'http'.
231    Returns:
232      {}  # if shaping_key does not apply, or options have default values.
233      {k: v, ...}
234    """
235    kwargs = {}
236    def AddItemIfSet(d, kw_key, opt_key=None):
237      opt_key = opt_key or kw_key
238      if opt_key in self._nondefaults:
239        d[kw_key] = getattr(self, opt_key)
240    if ((self.shaping_type == 'proxy' and shaping_key in ('dns', 'http')) or
241        self.shaping_type == shaping_key):
242      AddItemIfSet(kwargs, 'delay_ms')
243      if shaping_key in ('dummynet', 'http'):
244        AddItemIfSet(kwargs, 'down_bandwidth', opt_key='down')
245        AddItemIfSet(kwargs, 'up_bandwidth', opt_key='up')
246        if shaping_key == 'dummynet':
247          AddItemIfSet(kwargs, 'packet_loss_rate')
248          AddItemIfSet(kwargs, 'init_cwnd')
249        elif self.shaping_type != 'none':
250          if 'packet_loss_rate' in self._nondefaults:
251            logging.warn('Shaping type, %s, ignores --packet_loss_rate=%s',
252                         self.shaping_type, self.packet_loss_rate)
253          if 'init_cwnd' in self._nondefaults:
254            logging.warn('Shaping type, %s, ignores --init_cwnd=%s',
255                         self.shaping_type, self.init_cwnd)
256    return kwargs
257
258  def _MassageValues(self):
259    """Set options that depend on the values of other options."""
260    if self.append and not self.record:
261      self._options.record = True
262    if self.net:
263      self._options.down, self._options.up, self._options.delay_ms = \
264          net_configs.GetNetConfig(self.net)
265      self._nondefaults.update(['down', 'up', 'delay_ms'])
266    if not self.ssl:
267      self._options.https_root_ca_cert_path = None
268    self.shaping_dns = self._ShapingKeywordArgs('dns')
269    self.shaping_http = self._ShapingKeywordArgs('http')
270    self.shaping_dummynet = self._ShapingKeywordArgs('dummynet')
271
272  def __getattr__(self, name):
273    """Make the original option values available."""
274    return getattr(self._options, name)
275
276  def __repr__(self):
277    """Return a json representation of the original options dictionary."""
278    return json.dumps(self._options.__dict__)
279
280  def IsRootRequired(self):
281    """Returns True iff the options require whole program root access."""
282    if self.server:
283      return True
284
285    def IsPrivilegedPort(port):
286      return port and port < 1024
287
288    if IsPrivilegedPort(self.port) or (self.ssl and
289                                       IsPrivilegedPort(self.ssl_port)):
290      return True
291
292    if self.dns_forwarding:
293      if IsPrivilegedPort(self.dns_port):
294        return True
295      if not self.server_mode and self.host == '127.0.0.1':
296        return True
297
298    return False
299
300
301def replay(options, replay_filename):
302  if options.admin_check and options.IsRootRequired():
303    platformsettings.rerun_as_administrator()
304  configure_logging(options.log_level, options.log_file)
305  server_manager = servermanager.ServerManager(options.record)
306  if options.server:
307    AddDnsForward(server_manager, options.server)
308  else:
309    real_dns_lookup = dnsproxy.RealDnsLookup(
310        name_servers=[platformsettings.get_original_primary_nameserver()])
311    if options.record:
312      httparchive.HttpArchive.AssertWritable(replay_filename)
313      if options.append and os.path.exists(replay_filename):
314        http_archive = httparchive.HttpArchive.Load(replay_filename)
315        logging.info('Appending to %s (loaded %d existing responses)',
316                     replay_filename, len(http_archive))
317      else:
318        http_archive = httparchive.HttpArchive()
319    else:
320      http_archive = httparchive.HttpArchive.Load(replay_filename)
321      logging.info('Loaded %d responses from %s',
322                   len(http_archive), replay_filename)
323    server_manager.AppendRecordCallback(real_dns_lookup.ClearCache)
324    server_manager.AppendRecordCallback(http_archive.clear)
325
326    ipfw_dns_host = None
327    if options.dns_forwarding or options.shaping_dummynet:
328      # compute the ip/host used for the DNS server and traffic shaping
329      ipfw_dns_host = options.host
330      if not ipfw_dns_host:
331        ipfw_dns_host = platformsettings.get_server_ip_address(
332            options.server_mode)
333
334    if options.dns_forwarding:
335      if not options.server_mode and ipfw_dns_host == '127.0.0.1':
336        AddDnsForward(server_manager, ipfw_dns_host)
337      AddDnsProxy(server_manager, options, ipfw_dns_host, options.dns_port,
338                  real_dns_lookup, http_archive)
339    if options.ssl and options.https_root_ca_cert_path is None:
340      options.https_root_ca_cert_path = os.path.join(os.path.dirname(__file__),
341                                                     'wpr_cert.pem')
342    http_proxy_address = options.host
343    if not http_proxy_address:
344      http_proxy_address = platformsettings.get_httpproxy_ip_address(
345          options.server_mode)
346    AddWebProxy(server_manager, options, http_proxy_address, real_dns_lookup,
347                http_archive)
348    AddTrafficShaper(server_manager, options, ipfw_dns_host)
349
350  exit_status = 0
351  try:
352    server_manager.Run()
353  except KeyboardInterrupt:
354    logging.info('Shutting down.')
355  except (dnsproxy.DnsProxyException,
356          trafficshaper.TrafficShaperException,
357          platformsettings.NotAdministratorError,
358          platformsettings.DnsUpdateError) as e:
359    logging.critical('%s: %s', e.__class__.__name__, e)
360    exit_status = 1
361  except Exception:
362    logging.critical(traceback.format_exc())
363    exit_status = 2
364
365  if options.record:
366    http_archive.Persist(replay_filename)
367    logging.info('Saved %d responses to %s', len(http_archive), replay_filename)
368  return exit_status
369
370
371def GetOptionParser():
372  class PlainHelpFormatter(optparse.IndentedHelpFormatter):
373    def format_description(self, description):
374      if description:
375        return description + '\n'
376      else:
377        return ''
378  option_parser = optparse.OptionParser(
379      usage='%prog [options] replay_file',
380      formatter=PlainHelpFormatter(),
381      description=__doc__,
382      epilog='http://code.google.com/p/web-page-replay/')
383
384  option_parser.add_option('-r', '--record', default=False,
385      action='store_true',
386      help='Download real responses and record them to replay_file')
387  option_parser.add_option('--append', default=False,
388      action='store_true',
389      help='Append responses to replay_file.')
390  option_parser.add_option('-l', '--log_level', default='debug',
391      action='store',
392      type='choice',
393      choices=('debug', 'info', 'warning', 'error', 'critical'),
394      help='Minimum verbosity level to log')
395  option_parser.add_option('-f', '--log_file', default=None,
396      action='store',
397      type='string',
398      help='Log file to use in addition to writting logs to stderr.')
399
400  network_group = optparse.OptionGroup(option_parser,
401      'Network Simulation Options',
402      'These options configure the network simulation in replay mode')
403  network_group.add_option('-u', '--up', default='0',
404      action='store',
405      type='string',
406      help='Upload Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
407  network_group.add_option('-d', '--down', default='0',
408      action='store',
409      type='string',
410      help='Download Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
411  network_group.add_option('-m', '--delay_ms', default='0',
412      action='store',
413      type='string',
414      help='Propagation delay (latency) in milliseconds. Zero means no delay.')
415  network_group.add_option('-p', '--packet_loss_rate', default='0',
416      action='store',
417      type='string',
418      help='Packet loss rate in range [0..1]. Zero means no loss.')
419  network_group.add_option('-w', '--init_cwnd', default='0',
420      action='store',
421      type='string',
422      help='Set initial cwnd (linux only, requires kernel patch)')
423  network_group.add_option('--net', default=None,
424      action='store',
425      type='choice',
426      choices=net_configs.NET_CONFIG_NAMES,
427      help='Select a set of network options: %s.' % ', '.join(
428          net_configs.NET_CONFIG_NAMES))
429  network_group.add_option('--shaping_type', default='dummynet',
430      action='store',
431      choices=('dummynet', 'proxy'),
432      help='When shaping is configured (i.e. --up, --down, etc.) decides '
433           'whether to use |dummynet| (default), or |proxy| servers.')
434  option_parser.add_option_group(network_group)
435
436  harness_group = optparse.OptionGroup(option_parser,
437      'Replay Harness Options',
438      'These advanced options configure various aspects of the replay harness')
439  harness_group.add_option('-S', '--server', default=None,
440      action='store',
441      type='string',
442      help='IP address of host running "replay.py --server_mode". '
443           'This only changes the primary DNS nameserver to use the given IP.')
444  harness_group.add_option('-M', '--server_mode', default=False,
445      action='store_true',
446      help='Run replay DNS & http proxies, and trafficshaping on --port '
447           'without changing the primary DNS nameserver. '
448           'Other hosts may connect to this using "replay.py --server" '
449           'or by pointing their DNS to this server.')
450  harness_group.add_option('-i', '--inject_scripts', default='deterministic.js',
451      action='store',
452      dest='inject_scripts',
453      help='A comma separated list of JavaScript sources to inject in all '
454           'pages. By default a script is injected that eliminates sources '
455           'of entropy such as Date() and Math.random() deterministic. '
456           'CAUTION: Without deterministic.js, many pages will not replay.')
457  harness_group.add_option('-D', '--no-diff_unknown_requests', default=True,
458      action='store_false',
459      dest='diff_unknown_requests',
460      help='During replay, do not show a diff of unknown requests against '
461           'their nearest match in the archive.')
462  harness_group.add_option('-C', '--use_closest_match', default=False,
463      action='store_true',
464      dest='use_closest_match',
465      help='During replay, if a request is not found, serve the closest match'
466           'in the archive instead of giving a 404.')
467  harness_group.add_option('-U', '--use_server_delay', default=False,
468      action='store_true',
469      dest='use_server_delay',
470      help='During replay, simulate server delay by delaying response time to'
471           'requests.')
472  harness_group.add_option('-I', '--screenshot_dir', default=None,
473      action='store',
474      type='string',
475      help='Save PNG images of the loaded page in the given directory.')
476  harness_group.add_option('-P', '--no-dns_private_passthrough', default=True,
477      action='store_false',
478      dest='dns_private_passthrough',
479      help='Don\'t forward DNS requests that resolve to private network '
480           'addresses. CAUTION: With this option important services like '
481           'Kerberos will resolve to the HTTP proxy address.')
482  harness_group.add_option('-x', '--no-dns_forwarding', default=True,
483      action='store_false',
484      dest='dns_forwarding',
485      help='Don\'t forward DNS requests to the local replay server. '
486           'CAUTION: With this option an external mechanism must be used to '
487           'forward traffic to the replay server.')
488  harness_group.add_option('--host', default=None,
489      action='store',
490      type='str',
491      help='The IP address to bind all servers to. Defaults to 0.0.0.0 or '
492           '127.0.0.1, depending on --server_mode and platform.')
493  harness_group.add_option('-o', '--port', default=80,
494      action='store',
495      type='int',
496      help='Port number to listen on.')
497  harness_group.add_option('--ssl_port', default=443,
498      action='store',
499      type='int',
500      help='SSL port number to listen on.')
501  harness_group.add_option('--http_to_https_port', default=None,
502      action='store',
503      type='int',
504      help='Port on which WPR will listen for HTTP requests that it will send '
505           'along as HTTPS requests.')
506  harness_group.add_option('--dns_port', default=53,
507      action='store',
508      type='int',
509      help='DNS port number to listen on.')
510  harness_group.add_option('-c', '--https_root_ca_cert_path', default=None,
511      action='store',
512      type='string',
513      help='Certificate file to use with SSL (gets auto-generated if needed).')
514  harness_group.add_option('--no-ssl', default=True,
515      action='store_false',
516      dest='ssl',
517      help='Do not setup an SSL proxy.')
518  option_parser.add_option_group(harness_group)
519  harness_group.add_option('--should_generate_certs', default=False,
520      action='store_true',
521      help='Use OpenSSL to generate certificate files for requested hosts.')
522  harness_group.add_option('--no-admin-check', default=True,
523      action='store_false',
524      dest='admin_check',
525      help='Do not check if administrator access is needed.')
526  harness_group.add_option('--scramble_images', default=False,
527      action='store_true',
528      dest='scramble_images',
529      help='Scramble image responses.')
530  harness_group.add_option('--rules_path', default=None,
531      action='store',
532      help='Path of file containing Python rules.')
533  harness_group.add_option('--allowed_rule_imports', default='rules',
534      action='store',
535      help='A comma-separate list of allowed rule imports, or \'*\' to allow'
536           ' all packages.  Defaults to \'%default\'.')
537  return option_parser
538
539
540def main():
541  option_parser = GetOptionParser()
542  options, args = option_parser.parse_args()
543  options = OptionsWrapper(options, option_parser)
544
545  if options.server:
546    replay_filename = None
547  elif len(args) != 1:
548    option_parser.error('Must specify a replay_file')
549  else:
550    replay_filename = args[0]
551
552  return replay(options, replay_filename)
553
554
555if __name__ == '__main__':
556  sys.exit(main())
557