1#!/usr/bin/env python
2# Copyright 2010 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Replays web pages under simulated network conditions.
17
18Must be run as administrator (sudo).
19
20To record web pages:
21  1. Start the program in record mode.
22     $ sudo ./replay.py --record archive.wpr
23  2. Load the web pages you want to record in a web browser. It is important to
24     clear browser caches before this so that all subresources are requested
25     from the network.
26  3. Kill the process to stop recording.
27
28To replay web pages:
29  1. Start the program in replay mode with a previously recorded archive.
30     $ sudo ./replay.py archive.wpr
31  2. Load recorded pages in a web browser. A 404 will be served for any pages or
32     resources not in the recorded archive.
33
34Network simulation examples:
35  # 128KByte/s uplink bandwidth, 4Mbps/s downlink bandwidth with 100ms RTT time
36  $ sudo ./replay.py --up 128KByte/s --down 4Mbit/s --delay_ms=100 archive.wpr
37
38  # 1% packet loss rate
39  $ sudo ./replay.py --packet_loss_rate=0.01 archive.wpr
40"""
41
42import argparse
43import json
44import logging
45import os
46import socket
47import sys
48import traceback
49
50import customhandlers
51import dnsproxy
52import httparchive
53import httpclient
54import httpproxy
55import net_configs
56import platformsettings
57import rules_parser
58import script_injector
59import servermanager
60import trafficshaper
61
62if sys.version < '2.6':
63  print 'Need Python 2.6 or greater.'
64  sys.exit(1)
65
66
67def configure_logging(log_level_name, log_file_name=None):
68  """Configure logging level and format.
69
70  Args:
71    log_level_name: 'debug', 'info', 'warning', 'error', or 'critical'.
72    log_file_name: a file name
73  """
74  if logging.root.handlers:
75    logging.critical('A logging method (e.g. "logging.warn(...)")'
76                     ' was called before logging was configured.')
77  log_level = getattr(logging, log_level_name.upper())
78  log_format = (
79    '(%(levelname)s) %(asctime)s %(module)s.%(funcName)s:%(lineno)d  '
80    '%(message)s')
81
82
83  logging.basicConfig(level=log_level, format=log_format)
84  logger = logging.getLogger()
85  if log_file_name:
86    fh = logging.FileHandler(log_file_name)
87    fh.setLevel(log_level)
88    fh.setFormatter(logging.Formatter(log_format))
89    logger.addHandler(fh)
90  system_handler = platformsettings.get_system_logging_handler()
91  if system_handler:
92    logger.addHandler(system_handler)
93
94
95def AddDnsForward(server_manager, host):
96  """Forward DNS traffic."""
97  server_manager.Append(platformsettings.set_temporary_primary_nameserver, host)
98
99
100def AddDnsProxy(server_manager, options, host, port, real_dns_lookup,
101                http_archive):
102  dns_filters = []
103  if options.dns_private_passthrough:
104    private_filter = dnsproxy.PrivateIpFilter(real_dns_lookup, http_archive)
105    dns_filters.append(private_filter)
106    server_manager.AppendRecordCallback(private_filter.InitializeArchiveHosts)
107    server_manager.AppendReplayCallback(private_filter.InitializeArchiveHosts)
108  if options.shaping_dns:
109    delay_filter = dnsproxy.DelayFilter(options.record, **options.shaping_dns)
110    dns_filters.append(delay_filter)
111    server_manager.AppendRecordCallback(delay_filter.SetRecordMode)
112    server_manager.AppendReplayCallback(delay_filter.SetReplayMode)
113  server_manager.Append(dnsproxy.DnsProxyServer, host, port,
114                        dns_lookup=dnsproxy.ReplayDnsLookup(host, dns_filters))
115
116
117def AddWebProxy(server_manager, options, host, real_dns_lookup, http_archive):
118  if options.rules_path:
119    with open(options.rules_path) as file_obj:
120      allowed_imports = [
121          name.strip() for name in options.allowed_rule_imports.split(',')]
122      rules = rules_parser.Rules(file_obj, allowed_imports)
123    logging.info('Parsed %s rules:\n%s', options.rules_path, rules)
124  else:
125    rules = rules_parser.Rules()
126  injector = script_injector.GetScriptInjector(options.inject_scripts)
127  custom_handlers = customhandlers.CustomHandlers(options, http_archive)
128  custom_handlers.add_server_manager_handler(server_manager)
129  archive_fetch = httpclient.ControllableHttpArchiveFetch(
130      http_archive, real_dns_lookup,
131      injector,
132      options.diff_unknown_requests, options.record,
133      use_closest_match=options.use_closest_match,
134      scramble_images=options.scramble_images)
135  server_manager.AppendRecordCallback(archive_fetch.SetRecordMode)
136  server_manager.AppendReplayCallback(archive_fetch.SetReplayMode)
137  allow_generate_304 = not options.record
138  server_manager.Append(
139      httpproxy.HttpProxyServer,
140      archive_fetch, custom_handlers, rules,
141      host=host, port=options.port, use_delays=options.use_server_delay,
142      allow_generate_304=allow_generate_304,
143      **options.shaping_http)
144  if options.ssl:
145    if options.should_generate_certs:
146      server_manager.Append(
147          httpproxy.HttpsProxyServer, archive_fetch, custom_handlers, rules,
148          options.https_root_ca_cert_path, host=host, port=options.ssl_port,
149          allow_generate_304=allow_generate_304,
150          use_delays=options.use_server_delay, **options.shaping_http)
151    else:
152      server_manager.Append(
153          httpproxy.SingleCertHttpsProxyServer, archive_fetch,
154          custom_handlers, rules, options.https_root_ca_cert_path, host=host,
155          port=options.ssl_port, use_delays=options.use_server_delay,
156          allow_generate_304=allow_generate_304,
157          **options.shaping_http)
158  if options.http_to_https_port:
159    server_manager.Append(
160        httpproxy.HttpToHttpsProxyServer,
161        archive_fetch, custom_handlers, rules,
162        host=host, port=options.http_to_https_port,
163        use_delays=options.use_server_delay,
164        allow_generate_304=allow_generate_304,
165        **options.shaping_http)
166
167
168def AddTrafficShaper(server_manager, options, host):
169  if options.shaping_dummynet:
170    server_manager.AppendTrafficShaper(
171        trafficshaper.TrafficShaper, host=host,
172        use_loopback=not options.server_mode and host == '127.0.0.1',
173        **options.shaping_dummynet)
174
175
176class OptionsWrapper(object):
177  """Add checks, updates, and methods to option values.
178
179  Example:
180    options, args = arg_parser.parse_args()
181    options = OptionsWrapper(options, arg_parser)  # run checks and updates
182    if options.record and options.HasTrafficShaping():
183       [...]
184  """
185  _TRAFFICSHAPING_OPTIONS = {
186      'down', 'up', 'delay_ms', 'packet_loss_rate', 'init_cwnd', 'net'}
187  _CONFLICTING_OPTIONS = (
188      ('record', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
189                  'spdy', 'use_server_delay')),
190      ('append', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
191                  'use_server_delay')),  # same as --record
192      ('net', ('down', 'up', 'delay_ms')),
193      ('server', ('server_mode',)),
194  )
195
196  def __init__(self, options, parser):
197    self._options = options
198    self._parser = parser
199    self._nondefaults = set([
200        action.dest for action in parser._optionals._actions
201        if getattr(options, action.dest, action.default) is not action.default])
202    self._CheckConflicts()
203    self._CheckValidIp('host')
204    self._CheckFeatureSupport()
205    self._MassageValues()
206
207  def _CheckConflicts(self):
208    """Give an error if mutually exclusive options are used."""
209    for option, bad_options in self._CONFLICTING_OPTIONS:
210      if option in self._nondefaults:
211        for bad_option in bad_options:
212          if bad_option in self._nondefaults:
213            self._parser.error('Option --%s cannot be used with --%s.' %
214                                (bad_option, option))
215
216  def _CheckValidIp(self, name):
217    """Give an error if option |name| is not a valid IPv4 address."""
218    value = getattr(self._options, name)
219    if value:
220      try:
221        socket.inet_aton(value)
222      except Exception:
223        self._parser.error('Option --%s must be a valid IPv4 address.' % name)
224
225  def _CheckFeatureSupport(self):
226    if (self._options.should_generate_certs and
227        not platformsettings.HasSniSupport()):
228      self._parser.error('Option --should_generate_certs requires pyOpenSSL '
229                         '0.13 or greater for SNI support.')
230
231  def _ShapingKeywordArgs(self, shaping_key):
232    """Return the shaping keyword args for |shaping_key|.
233
234    Args:
235      shaping_key: one of 'dummynet', 'dns', 'http'.
236    Returns:
237      {}  # if shaping_key does not apply, or options have default values.
238      {k: v, ...}
239    """
240    kwargs = {}
241    def AddItemIfSet(d, kw_key, opt_key=None):
242      opt_key = opt_key or kw_key
243      if opt_key in self._nondefaults:
244        d[kw_key] = getattr(self, opt_key)
245    if ((self.shaping_type == 'proxy' and shaping_key in ('dns', 'http')) or
246        self.shaping_type == shaping_key):
247      AddItemIfSet(kwargs, 'delay_ms')
248      if shaping_key in ('dummynet', 'http'):
249        AddItemIfSet(kwargs, 'down_bandwidth', opt_key='down')
250        AddItemIfSet(kwargs, 'up_bandwidth', opt_key='up')
251        if shaping_key == 'dummynet':
252          AddItemIfSet(kwargs, 'packet_loss_rate')
253          AddItemIfSet(kwargs, 'init_cwnd')
254        elif self.shaping_type != 'none':
255          if 'packet_loss_rate' in self._nondefaults:
256            logging.warn('Shaping type, %s, ignores --packet_loss_rate=%s',
257                         self.shaping_type, self.packet_loss_rate)
258          if 'init_cwnd' in self._nondefaults:
259            logging.warn('Shaping type, %s, ignores --init_cwnd=%s',
260                         self.shaping_type, self.init_cwnd)
261    return kwargs
262
263  def _MassageValues(self):
264    """Set options that depend on the values of other options."""
265    if self.append and not self.record:
266      self._options.record = True
267    if self.net:
268      self._options.down, self._options.up, self._options.delay_ms = \
269          net_configs.GetNetConfig(self.net)
270      self._nondefaults.update(['down', 'up', 'delay_ms'])
271    if not self.ssl:
272      self._options.https_root_ca_cert_path = None
273    self.shaping_dns = self._ShapingKeywordArgs('dns')
274    self.shaping_http = self._ShapingKeywordArgs('http')
275    self.shaping_dummynet = self._ShapingKeywordArgs('dummynet')
276
277  def __getattr__(self, name):
278    """Make the original option values available."""
279    return getattr(self._options, name)
280
281  def __repr__(self):
282    """Return a json representation of the original options dictionary."""
283    return json.dumps(self._options.__dict__)
284
285  def IsRootRequired(self):
286    """Returns True iff the options require whole program root access."""
287    if self.server:
288      return True
289
290    def IsPrivilegedPort(port):
291      return port and port < 1024
292
293    if IsPrivilegedPort(self.port) or (self.ssl and
294                                       IsPrivilegedPort(self.ssl_port)):
295      return True
296
297    if self.dns_forwarding:
298      if IsPrivilegedPort(self.dns_port):
299        return True
300      if not self.server_mode and self.host == '127.0.0.1':
301        return True
302
303    return False
304
305
306def replay(options, replay_filename):
307  if options.record and sys.version_info < (2, 7, 9):
308    print ('Need Python 2.7.9 or greater for recording mode.\n'
309           'For instructions on how to upgrade Python on Ubuntu 14.04, see:\n'
310           'http://mbless.de/blog/2016/01/09/upgrade-to-python-2711-on-ubuntu-1404-lts.html\n')
311  if options.admin_check and options.IsRootRequired():
312    platformsettings.rerun_as_administrator()
313  configure_logging(options.log_level, options.log_file)
314  server_manager = servermanager.ServerManager(options.record)
315  if options.server:
316    AddDnsForward(server_manager, options.server)
317  else:
318    if options.record:
319      httparchive.HttpArchive.AssertWritable(replay_filename)
320      if options.append and os.path.exists(replay_filename):
321        http_archive = httparchive.HttpArchive.Load(replay_filename)
322        logging.info('Appending to %s (loaded %d existing responses)',
323                     replay_filename, len(http_archive))
324      else:
325        http_archive = httparchive.HttpArchive()
326    else:
327      http_archive = httparchive.HttpArchive.Load(replay_filename)
328      logging.info('Loaded %d responses from %s',
329                   len(http_archive), replay_filename)
330    server_manager.AppendRecordCallback(http_archive.clear)
331
332    ipfw_dns_host = None
333    if options.dns_forwarding or options.shaping_dummynet:
334      # compute the ip/host used for the DNS server and traffic shaping
335      ipfw_dns_host = options.host
336      if not ipfw_dns_host:
337        ipfw_dns_host = platformsettings.get_server_ip_address(
338            options.server_mode)
339
340    real_dns_lookup = dnsproxy.RealDnsLookup(
341        name_servers=[platformsettings.get_original_primary_nameserver()],
342        dns_forwarding=options.dns_forwarding,
343        proxy_host=ipfw_dns_host,
344        proxy_port=options.dns_port)
345    server_manager.AppendRecordCallback(real_dns_lookup.ClearCache)
346
347    if options.dns_forwarding:
348      if not options.server_mode and ipfw_dns_host == '127.0.0.1':
349        AddDnsForward(server_manager, ipfw_dns_host)
350      AddDnsProxy(server_manager, options, ipfw_dns_host, options.dns_port,
351                  real_dns_lookup, http_archive)
352    if options.ssl and options.https_root_ca_cert_path is None:
353      options.https_root_ca_cert_path = os.path.join(os.path.dirname(__file__),
354                                                     'wpr_cert.pem')
355    http_proxy_address = options.host
356    if not http_proxy_address:
357      http_proxy_address = platformsettings.get_httpproxy_ip_address(
358          options.server_mode)
359    AddWebProxy(server_manager, options, http_proxy_address, real_dns_lookup,
360                http_archive)
361    AddTrafficShaper(server_manager, options, ipfw_dns_host)
362
363  exit_status = 0
364  try:
365    server_manager.Run()
366  except KeyboardInterrupt:
367    logging.info('Shutting down.')
368  except (dnsproxy.DnsProxyException,
369          trafficshaper.TrafficShaperException,
370          platformsettings.NotAdministratorError,
371          platformsettings.DnsUpdateError) as e:
372    logging.critical('%s: %s', e.__class__.__name__, e)
373    exit_status = 1
374  except Exception:
375    logging.critical(traceback.format_exc())
376    exit_status = 2
377
378  if options.record:
379    http_archive.Persist(replay_filename)
380    logging.info('Saved %d responses to %s', len(http_archive), replay_filename)
381  return exit_status
382
383
384def GetParser():
385  arg_parser = argparse.ArgumentParser(
386      usage='%(prog)s [options] replay_file',
387      description=__doc__,
388      formatter_class=argparse.RawDescriptionHelpFormatter,
389      epilog='http://code.google.com/p/web-page-replay/')
390
391  arg_parser.add_argument('replay_filename', type=str, help='Replay file',
392                          nargs='?')
393
394  arg_parser.add_argument('-r', '--record', default=False,
395      action='store_true',
396      help='Download real responses and record them to replay_file')
397  arg_parser.add_argument('--append', default=False,
398      action='store_true',
399      help='Append responses to replay_file.')
400  arg_parser.add_argument('-l', '--log_level', default='debug',
401      action='store',
402      type=str,
403      choices=('debug', 'info', 'warning', 'error', 'critical'),
404      help='Minimum verbosity level to log')
405  arg_parser.add_argument('-f', '--log_file', default=None,
406      action='store',
407      type=str,
408      help='Log file to use in addition to writting logs to stderr.')
409
410  network_group = arg_parser.add_argument_group(
411      title='Network Simulation Options',
412      description=('These options configure the network simulation in '
413                   'replay mode'))
414  network_group.add_argument('-u', '--up', default='0',
415      action='store',
416      type=str,
417      help='Upload Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
418  network_group.add_argument('-d', '--down', default='0',
419      action='store',
420      type=str,
421      help='Download Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
422  network_group.add_argument('-m', '--delay_ms', default='0',
423      action='store',
424      type=str,
425      help='Propagation delay (latency) in milliseconds. Zero means no delay.')
426  network_group.add_argument('-p', '--packet_loss_rate', default='0',
427      action='store',
428      type=str,
429      help='Packet loss rate in range [0..1]. Zero means no loss.')
430  network_group.add_argument('-w', '--init_cwnd', default='0',
431      action='store',
432      type=str,
433      help='Set initial cwnd (linux only, requires kernel patch)')
434  network_group.add_argument('--net', default=None,
435      action='store',
436      type=str,
437      choices=net_configs.NET_CONFIG_NAMES,
438      help='Select a set of network options: %s.' % ', '.join(
439          net_configs.NET_CONFIG_NAMES))
440  network_group.add_argument('--shaping_type', default='dummynet',
441      action='store',
442      choices=('dummynet', 'proxy'),
443      help='When shaping is configured (i.e. --up, --down, etc.) decides '
444           'whether to use |dummynet| (default), or |proxy| servers.')
445
446  harness_group = arg_parser.add_argument_group(
447      title='Replay Harness Options',
448      description=('These advanced options configure various aspects '
449                   'of the replay harness'))
450  harness_group.add_argument('-S', '--server', default=None,
451      action='store',
452      type=str,
453      help='IP address of host running "replay.py --server_mode". '
454           'This only changes the primary DNS nameserver to use the given IP.')
455  harness_group.add_argument('-M', '--server_mode', default=False,
456      action='store_true',
457      help='Run replay DNS & http proxies, and trafficshaping on --port '
458           'without changing the primary DNS nameserver. '
459           'Other hosts may connect to this using "replay.py --server" '
460           'or by pointing their DNS to this server.')
461  harness_group.add_argument('-i', '--inject_scripts', default='deterministic.js',
462      action='store',
463      dest='inject_scripts',
464      help='A comma separated list of JavaScript sources to inject in all '
465           'pages. By default a script is injected that eliminates sources '
466           'of entropy such as Date() and Math.random() deterministic. '
467           'CAUTION: Without deterministic.js, many pages will not replay.')
468  harness_group.add_argument('-D', '--no-diff_unknown_requests', default=True,
469      action='store_false',
470      dest='diff_unknown_requests',
471      help='During replay, do not show a diff of unknown requests against '
472           'their nearest match in the archive.')
473  harness_group.add_argument('-C', '--use_closest_match', default=False,
474      action='store_true',
475      dest='use_closest_match',
476      help='During replay, if a request is not found, serve the closest match'
477           'in the archive instead of giving a 404.')
478  harness_group.add_argument('-U', '--use_server_delay', default=False,
479      action='store_true',
480      dest='use_server_delay',
481      help='During replay, simulate server delay by delaying response time to'
482           'requests.')
483  harness_group.add_argument('-I', '--screenshot_dir', default=None,
484      action='store',
485      type=str,
486      help='Save PNG images of the loaded page in the given directory.')
487  harness_group.add_argument('-P', '--no-dns_private_passthrough', default=True,
488      action='store_false',
489      dest='dns_private_passthrough',
490      help='Don\'t forward DNS requests that resolve to private network '
491           'addresses. CAUTION: With this option important services like '
492           'Kerberos will resolve to the HTTP proxy address.')
493  harness_group.add_argument('-x', '--no-dns_forwarding', default=True,
494      action='store_false',
495      dest='dns_forwarding',
496      help='Don\'t forward DNS requests to the local replay server. '
497           'CAUTION: With this option an external mechanism must be used to '
498           'forward traffic to the replay server.')
499  harness_group.add_argument('--host', default=None,
500      action='store',
501      type=str,
502      help='The IP address to bind all servers to. Defaults to 0.0.0.0 or '
503           '127.0.0.1, depending on --server_mode and platform.')
504  harness_group.add_argument('-o', '--port', default=80,
505      action='store',
506      type=int,
507      help='Port number to listen on.')
508  harness_group.add_argument('--ssl_port', default=443,
509      action='store',
510      type=int,
511      help='SSL port number to listen on.')
512  harness_group.add_argument('--http_to_https_port', default=None,
513      action='store',
514      type=int,
515      help='Port on which WPR will listen for HTTP requests that it will send '
516           'along as HTTPS requests.')
517  harness_group.add_argument('--dns_port', default=53,
518      action='store',
519      type=int,
520      help='DNS port number to listen on.')
521  harness_group.add_argument('-c', '--https_root_ca_cert_path', default=None,
522      action='store',
523      type=str,
524      help='Certificate file to use with SSL (gets auto-generated if needed).')
525  harness_group.add_argument('--no-ssl', default=True,
526      action='store_false',
527      dest='ssl',
528      help='Do not setup an SSL proxy.')
529  harness_group.add_argument('--should_generate_certs', default=False,
530      action='store_true',
531      help='Use OpenSSL to generate certificate files for requested hosts.')
532  harness_group.add_argument('--no-admin-check', default=True,
533      action='store_false',
534      dest='admin_check',
535      help='Do not check if administrator access is needed.')
536  harness_group.add_argument('--scramble_images', default=False,
537      action='store_true',
538      dest='scramble_images',
539      help='Scramble image responses.')
540  harness_group.add_argument('--rules_path', default=None,
541      action='store',
542      help='Path of file containing Python rules.')
543  harness_group.add_argument('--allowed_rule_imports', default='rules',
544      action='store',
545      help='A comma-separate list of allowed rule imports, or \'*\' to allow'
546           ' all packages.  Defaults to %(default)s.')
547  return arg_parser
548
549
550def main():
551  arg_parser = GetParser()
552  options = arg_parser.parse_args()
553  options = OptionsWrapper(options, arg_parser)
554
555  if options.server:
556    options.replay_filename = None
557  elif options.replay_filename is None:
558    arg_parser.error('Must specify a replay_file')
559  return replay(options, options.replay_filename)
560
561
562if __name__ == '__main__':
563  sys.exit(main())
564