1#!/usr/bin/env python 2# Copyright 2010 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Replays web pages under simulated network conditions. 17 18Must be run as administrator (sudo). 19 20To record web pages: 21 1. Start the program in record mode. 22 $ sudo ./replay.py --record archive.wpr 23 2. Load the web pages you want to record in a web browser. It is important to 24 clear browser caches before this so that all subresources are requested 25 from the network. 26 3. Kill the process to stop recording. 27 28To replay web pages: 29 1. Start the program in replay mode with a previously recorded archive. 30 $ sudo ./replay.py archive.wpr 31 2. Load recorded pages in a web browser. A 404 will be served for any pages or 32 resources not in the recorded archive. 33 34Network simulation examples: 35 # 128KByte/s uplink bandwidth, 4Mbps/s downlink bandwidth with 100ms RTT time 36 $ sudo ./replay.py --up 128KByte/s --down 4Mbit/s --delay_ms=100 archive.wpr 37 38 # 1% packet loss rate 39 $ sudo ./replay.py --packet_loss_rate=0.01 archive.wpr 40""" 41 42import argparse 43import json 44import logging 45import os 46import socket 47import sys 48import traceback 49 50import customhandlers 51import dnsproxy 52import httparchive 53import httpclient 54import httpproxy 55import net_configs 56import platformsettings 57import rules_parser 58import script_injector 59import servermanager 60import trafficshaper 61 62if sys.version < '2.6': 63 print 'Need Python 2.6 or greater.' 64 sys.exit(1) 65 66 67def configure_logging(log_level_name, log_file_name=None): 68 """Configure logging level and format. 69 70 Args: 71 log_level_name: 'debug', 'info', 'warning', 'error', or 'critical'. 72 log_file_name: a file name 73 """ 74 if logging.root.handlers: 75 logging.critical('A logging method (e.g. "logging.warn(...)")' 76 ' was called before logging was configured.') 77 log_level = getattr(logging, log_level_name.upper()) 78 log_format = ( 79 '(%(levelname)s) %(asctime)s %(module)s.%(funcName)s:%(lineno)d ' 80 '%(message)s') 81 82 83 logging.basicConfig(level=log_level, format=log_format) 84 logger = logging.getLogger() 85 if log_file_name: 86 fh = logging.FileHandler(log_file_name) 87 fh.setLevel(log_level) 88 fh.setFormatter(logging.Formatter(log_format)) 89 logger.addHandler(fh) 90 system_handler = platformsettings.get_system_logging_handler() 91 if system_handler: 92 logger.addHandler(system_handler) 93 94 95def AddDnsForward(server_manager, host): 96 """Forward DNS traffic.""" 97 server_manager.Append(platformsettings.set_temporary_primary_nameserver, host) 98 99 100def AddDnsProxy(server_manager, options, host, port, real_dns_lookup, 101 http_archive): 102 dns_filters = [] 103 if options.dns_private_passthrough: 104 private_filter = dnsproxy.PrivateIpFilter(real_dns_lookup, http_archive) 105 dns_filters.append(private_filter) 106 server_manager.AppendRecordCallback(private_filter.InitializeArchiveHosts) 107 server_manager.AppendReplayCallback(private_filter.InitializeArchiveHosts) 108 if options.shaping_dns: 109 delay_filter = dnsproxy.DelayFilter(options.record, **options.shaping_dns) 110 dns_filters.append(delay_filter) 111 server_manager.AppendRecordCallback(delay_filter.SetRecordMode) 112 server_manager.AppendReplayCallback(delay_filter.SetReplayMode) 113 server_manager.Append(dnsproxy.DnsProxyServer, host, port, 114 dns_lookup=dnsproxy.ReplayDnsLookup(host, dns_filters)) 115 116 117def AddWebProxy(server_manager, options, host, real_dns_lookup, http_archive): 118 if options.rules_path: 119 with open(options.rules_path) as file_obj: 120 allowed_imports = [ 121 name.strip() for name in options.allowed_rule_imports.split(',')] 122 rules = rules_parser.Rules(file_obj, allowed_imports) 123 logging.info('Parsed %s rules:\n%s', options.rules_path, rules) 124 else: 125 rules = rules_parser.Rules() 126 injector = script_injector.GetScriptInjector(options.inject_scripts) 127 custom_handlers = customhandlers.CustomHandlers(options, http_archive) 128 custom_handlers.add_server_manager_handler(server_manager) 129 archive_fetch = httpclient.ControllableHttpArchiveFetch( 130 http_archive, real_dns_lookup, 131 injector, 132 options.diff_unknown_requests, options.record, 133 use_closest_match=options.use_closest_match, 134 scramble_images=options.scramble_images) 135 server_manager.AppendRecordCallback(archive_fetch.SetRecordMode) 136 server_manager.AppendReplayCallback(archive_fetch.SetReplayMode) 137 allow_generate_304 = not options.record 138 server_manager.Append( 139 httpproxy.HttpProxyServer, 140 archive_fetch, custom_handlers, rules, 141 host=host, port=options.port, use_delays=options.use_server_delay, 142 allow_generate_304=allow_generate_304, 143 **options.shaping_http) 144 if options.ssl: 145 if options.should_generate_certs: 146 server_manager.Append( 147 httpproxy.HttpsProxyServer, archive_fetch, custom_handlers, rules, 148 options.https_root_ca_cert_path, host=host, port=options.ssl_port, 149 allow_generate_304=allow_generate_304, 150 use_delays=options.use_server_delay, **options.shaping_http) 151 else: 152 server_manager.Append( 153 httpproxy.SingleCertHttpsProxyServer, archive_fetch, 154 custom_handlers, rules, options.https_root_ca_cert_path, host=host, 155 port=options.ssl_port, use_delays=options.use_server_delay, 156 allow_generate_304=allow_generate_304, 157 **options.shaping_http) 158 if options.http_to_https_port: 159 server_manager.Append( 160 httpproxy.HttpToHttpsProxyServer, 161 archive_fetch, custom_handlers, rules, 162 host=host, port=options.http_to_https_port, 163 use_delays=options.use_server_delay, 164 allow_generate_304=allow_generate_304, 165 **options.shaping_http) 166 167 168def AddTrafficShaper(server_manager, options, host): 169 if options.shaping_dummynet: 170 server_manager.AppendTrafficShaper( 171 trafficshaper.TrafficShaper, host=host, 172 use_loopback=not options.server_mode and host == '127.0.0.1', 173 **options.shaping_dummynet) 174 175 176class OptionsWrapper(object): 177 """Add checks, updates, and methods to option values. 178 179 Example: 180 options, args = arg_parser.parse_args() 181 options = OptionsWrapper(options, arg_parser) # run checks and updates 182 if options.record and options.HasTrafficShaping(): 183 [...] 184 """ 185 _TRAFFICSHAPING_OPTIONS = { 186 'down', 'up', 'delay_ms', 'packet_loss_rate', 'init_cwnd', 'net'} 187 _CONFLICTING_OPTIONS = ( 188 ('record', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net', 189 'spdy', 'use_server_delay')), 190 ('append', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net', 191 'use_server_delay')), # same as --record 192 ('net', ('down', 'up', 'delay_ms')), 193 ('server', ('server_mode',)), 194 ) 195 196 def __init__(self, options, parser): 197 self._options = options 198 self._parser = parser 199 self._nondefaults = set([ 200 action.dest for action in parser._optionals._actions 201 if getattr(options, action.dest, action.default) is not action.default]) 202 self._CheckConflicts() 203 self._CheckValidIp('host') 204 self._CheckFeatureSupport() 205 self._MassageValues() 206 207 def _CheckConflicts(self): 208 """Give an error if mutually exclusive options are used.""" 209 for option, bad_options in self._CONFLICTING_OPTIONS: 210 if option in self._nondefaults: 211 for bad_option in bad_options: 212 if bad_option in self._nondefaults: 213 self._parser.error('Option --%s cannot be used with --%s.' % 214 (bad_option, option)) 215 216 def _CheckValidIp(self, name): 217 """Give an error if option |name| is not a valid IPv4 address.""" 218 value = getattr(self._options, name) 219 if value: 220 try: 221 socket.inet_aton(value) 222 except Exception: 223 self._parser.error('Option --%s must be a valid IPv4 address.' % name) 224 225 def _CheckFeatureSupport(self): 226 if (self._options.should_generate_certs and 227 not platformsettings.HasSniSupport()): 228 self._parser.error('Option --should_generate_certs requires pyOpenSSL ' 229 '0.13 or greater for SNI support.') 230 231 def _ShapingKeywordArgs(self, shaping_key): 232 """Return the shaping keyword args for |shaping_key|. 233 234 Args: 235 shaping_key: one of 'dummynet', 'dns', 'http'. 236 Returns: 237 {} # if shaping_key does not apply, or options have default values. 238 {k: v, ...} 239 """ 240 kwargs = {} 241 def AddItemIfSet(d, kw_key, opt_key=None): 242 opt_key = opt_key or kw_key 243 if opt_key in self._nondefaults: 244 d[kw_key] = getattr(self, opt_key) 245 if ((self.shaping_type == 'proxy' and shaping_key in ('dns', 'http')) or 246 self.shaping_type == shaping_key): 247 AddItemIfSet(kwargs, 'delay_ms') 248 if shaping_key in ('dummynet', 'http'): 249 AddItemIfSet(kwargs, 'down_bandwidth', opt_key='down') 250 AddItemIfSet(kwargs, 'up_bandwidth', opt_key='up') 251 if shaping_key == 'dummynet': 252 AddItemIfSet(kwargs, 'packet_loss_rate') 253 AddItemIfSet(kwargs, 'init_cwnd') 254 elif self.shaping_type != 'none': 255 if 'packet_loss_rate' in self._nondefaults: 256 logging.warn('Shaping type, %s, ignores --packet_loss_rate=%s', 257 self.shaping_type, self.packet_loss_rate) 258 if 'init_cwnd' in self._nondefaults: 259 logging.warn('Shaping type, %s, ignores --init_cwnd=%s', 260 self.shaping_type, self.init_cwnd) 261 return kwargs 262 263 def _MassageValues(self): 264 """Set options that depend on the values of other options.""" 265 if self.append and not self.record: 266 self._options.record = True 267 if self.net: 268 self._options.down, self._options.up, self._options.delay_ms = \ 269 net_configs.GetNetConfig(self.net) 270 self._nondefaults.update(['down', 'up', 'delay_ms']) 271 if not self.ssl: 272 self._options.https_root_ca_cert_path = None 273 self.shaping_dns = self._ShapingKeywordArgs('dns') 274 self.shaping_http = self._ShapingKeywordArgs('http') 275 self.shaping_dummynet = self._ShapingKeywordArgs('dummynet') 276 277 def __getattr__(self, name): 278 """Make the original option values available.""" 279 return getattr(self._options, name) 280 281 def __repr__(self): 282 """Return a json representation of the original options dictionary.""" 283 return json.dumps(self._options.__dict__) 284 285 def IsRootRequired(self): 286 """Returns True iff the options require whole program root access.""" 287 if self.server: 288 return True 289 290 def IsPrivilegedPort(port): 291 return port and port < 1024 292 293 if IsPrivilegedPort(self.port) or (self.ssl and 294 IsPrivilegedPort(self.ssl_port)): 295 return True 296 297 if self.dns_forwarding: 298 if IsPrivilegedPort(self.dns_port): 299 return True 300 if not self.server_mode and self.host == '127.0.0.1': 301 return True 302 303 return False 304 305 306def replay(options, replay_filename): 307 if options.record and sys.version_info < (2, 7, 9): 308 print ('Need Python 2.7.9 or greater for recording mode.\n' 309 'For instructions on how to upgrade Python on Ubuntu 14.04, see:\n' 310 'http://mbless.de/blog/2016/01/09/upgrade-to-python-2711-on-ubuntu-1404-lts.html\n') 311 if options.admin_check and options.IsRootRequired(): 312 platformsettings.rerun_as_administrator() 313 configure_logging(options.log_level, options.log_file) 314 server_manager = servermanager.ServerManager(options.record) 315 if options.server: 316 AddDnsForward(server_manager, options.server) 317 else: 318 if options.record: 319 httparchive.HttpArchive.AssertWritable(replay_filename) 320 if options.append and os.path.exists(replay_filename): 321 http_archive = httparchive.HttpArchive.Load(replay_filename) 322 logging.info('Appending to %s (loaded %d existing responses)', 323 replay_filename, len(http_archive)) 324 else: 325 http_archive = httparchive.HttpArchive() 326 else: 327 http_archive = httparchive.HttpArchive.Load(replay_filename) 328 logging.info('Loaded %d responses from %s', 329 len(http_archive), replay_filename) 330 server_manager.AppendRecordCallback(http_archive.clear) 331 332 ipfw_dns_host = None 333 if options.dns_forwarding or options.shaping_dummynet: 334 # compute the ip/host used for the DNS server and traffic shaping 335 ipfw_dns_host = options.host 336 if not ipfw_dns_host: 337 ipfw_dns_host = platformsettings.get_server_ip_address( 338 options.server_mode) 339 340 real_dns_lookup = dnsproxy.RealDnsLookup( 341 name_servers=[platformsettings.get_original_primary_nameserver()], 342 dns_forwarding=options.dns_forwarding, 343 proxy_host=ipfw_dns_host, 344 proxy_port=options.dns_port) 345 server_manager.AppendRecordCallback(real_dns_lookup.ClearCache) 346 347 if options.dns_forwarding: 348 if not options.server_mode and ipfw_dns_host == '127.0.0.1': 349 AddDnsForward(server_manager, ipfw_dns_host) 350 AddDnsProxy(server_manager, options, ipfw_dns_host, options.dns_port, 351 real_dns_lookup, http_archive) 352 if options.ssl and options.https_root_ca_cert_path is None: 353 options.https_root_ca_cert_path = os.path.join(os.path.dirname(__file__), 354 'wpr_cert.pem') 355 http_proxy_address = options.host 356 if not http_proxy_address: 357 http_proxy_address = platformsettings.get_httpproxy_ip_address( 358 options.server_mode) 359 AddWebProxy(server_manager, options, http_proxy_address, real_dns_lookup, 360 http_archive) 361 AddTrafficShaper(server_manager, options, ipfw_dns_host) 362 363 exit_status = 0 364 try: 365 server_manager.Run() 366 except KeyboardInterrupt: 367 logging.info('Shutting down.') 368 except (dnsproxy.DnsProxyException, 369 trafficshaper.TrafficShaperException, 370 platformsettings.NotAdministratorError, 371 platformsettings.DnsUpdateError) as e: 372 logging.critical('%s: %s', e.__class__.__name__, e) 373 exit_status = 1 374 except Exception: 375 logging.critical(traceback.format_exc()) 376 exit_status = 2 377 378 if options.record: 379 http_archive.Persist(replay_filename) 380 logging.info('Saved %d responses to %s', len(http_archive), replay_filename) 381 return exit_status 382 383 384def GetParser(): 385 arg_parser = argparse.ArgumentParser( 386 usage='%(prog)s [options] replay_file', 387 description=__doc__, 388 formatter_class=argparse.RawDescriptionHelpFormatter, 389 epilog='http://code.google.com/p/web-page-replay/') 390 391 arg_parser.add_argument('replay_filename', type=str, help='Replay file', 392 nargs='?') 393 394 arg_parser.add_argument('-r', '--record', default=False, 395 action='store_true', 396 help='Download real responses and record them to replay_file') 397 arg_parser.add_argument('--append', default=False, 398 action='store_true', 399 help='Append responses to replay_file.') 400 arg_parser.add_argument('-l', '--log_level', default='debug', 401 action='store', 402 type=str, 403 choices=('debug', 'info', 'warning', 'error', 'critical'), 404 help='Minimum verbosity level to log') 405 arg_parser.add_argument('-f', '--log_file', default=None, 406 action='store', 407 type=str, 408 help='Log file to use in addition to writting logs to stderr.') 409 410 network_group = arg_parser.add_argument_group( 411 title='Network Simulation Options', 412 description=('These options configure the network simulation in ' 413 'replay mode')) 414 network_group.add_argument('-u', '--up', default='0', 415 action='store', 416 type=str, 417 help='Upload Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.') 418 network_group.add_argument('-d', '--down', default='0', 419 action='store', 420 type=str, 421 help='Download Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.') 422 network_group.add_argument('-m', '--delay_ms', default='0', 423 action='store', 424 type=str, 425 help='Propagation delay (latency) in milliseconds. Zero means no delay.') 426 network_group.add_argument('-p', '--packet_loss_rate', default='0', 427 action='store', 428 type=str, 429 help='Packet loss rate in range [0..1]. Zero means no loss.') 430 network_group.add_argument('-w', '--init_cwnd', default='0', 431 action='store', 432 type=str, 433 help='Set initial cwnd (linux only, requires kernel patch)') 434 network_group.add_argument('--net', default=None, 435 action='store', 436 type=str, 437 choices=net_configs.NET_CONFIG_NAMES, 438 help='Select a set of network options: %s.' % ', '.join( 439 net_configs.NET_CONFIG_NAMES)) 440 network_group.add_argument('--shaping_type', default='dummynet', 441 action='store', 442 choices=('dummynet', 'proxy'), 443 help='When shaping is configured (i.e. --up, --down, etc.) decides ' 444 'whether to use |dummynet| (default), or |proxy| servers.') 445 446 harness_group = arg_parser.add_argument_group( 447 title='Replay Harness Options', 448 description=('These advanced options configure various aspects ' 449 'of the replay harness')) 450 harness_group.add_argument('-S', '--server', default=None, 451 action='store', 452 type=str, 453 help='IP address of host running "replay.py --server_mode". ' 454 'This only changes the primary DNS nameserver to use the given IP.') 455 harness_group.add_argument('-M', '--server_mode', default=False, 456 action='store_true', 457 help='Run replay DNS & http proxies, and trafficshaping on --port ' 458 'without changing the primary DNS nameserver. ' 459 'Other hosts may connect to this using "replay.py --server" ' 460 'or by pointing their DNS to this server.') 461 harness_group.add_argument('-i', '--inject_scripts', default='deterministic.js', 462 action='store', 463 dest='inject_scripts', 464 help='A comma separated list of JavaScript sources to inject in all ' 465 'pages. By default a script is injected that eliminates sources ' 466 'of entropy such as Date() and Math.random() deterministic. ' 467 'CAUTION: Without deterministic.js, many pages will not replay.') 468 harness_group.add_argument('-D', '--no-diff_unknown_requests', default=True, 469 action='store_false', 470 dest='diff_unknown_requests', 471 help='During replay, do not show a diff of unknown requests against ' 472 'their nearest match in the archive.') 473 harness_group.add_argument('-C', '--use_closest_match', default=False, 474 action='store_true', 475 dest='use_closest_match', 476 help='During replay, if a request is not found, serve the closest match' 477 'in the archive instead of giving a 404.') 478 harness_group.add_argument('-U', '--use_server_delay', default=False, 479 action='store_true', 480 dest='use_server_delay', 481 help='During replay, simulate server delay by delaying response time to' 482 'requests.') 483 harness_group.add_argument('-I', '--screenshot_dir', default=None, 484 action='store', 485 type=str, 486 help='Save PNG images of the loaded page in the given directory.') 487 harness_group.add_argument('-P', '--no-dns_private_passthrough', default=True, 488 action='store_false', 489 dest='dns_private_passthrough', 490 help='Don\'t forward DNS requests that resolve to private network ' 491 'addresses. CAUTION: With this option important services like ' 492 'Kerberos will resolve to the HTTP proxy address.') 493 harness_group.add_argument('-x', '--no-dns_forwarding', default=True, 494 action='store_false', 495 dest='dns_forwarding', 496 help='Don\'t forward DNS requests to the local replay server. ' 497 'CAUTION: With this option an external mechanism must be used to ' 498 'forward traffic to the replay server.') 499 harness_group.add_argument('--host', default=None, 500 action='store', 501 type=str, 502 help='The IP address to bind all servers to. Defaults to 0.0.0.0 or ' 503 '127.0.0.1, depending on --server_mode and platform.') 504 harness_group.add_argument('-o', '--port', default=80, 505 action='store', 506 type=int, 507 help='Port number to listen on.') 508 harness_group.add_argument('--ssl_port', default=443, 509 action='store', 510 type=int, 511 help='SSL port number to listen on.') 512 harness_group.add_argument('--http_to_https_port', default=None, 513 action='store', 514 type=int, 515 help='Port on which WPR will listen for HTTP requests that it will send ' 516 'along as HTTPS requests.') 517 harness_group.add_argument('--dns_port', default=53, 518 action='store', 519 type=int, 520 help='DNS port number to listen on.') 521 harness_group.add_argument('-c', '--https_root_ca_cert_path', default=None, 522 action='store', 523 type=str, 524 help='Certificate file to use with SSL (gets auto-generated if needed).') 525 harness_group.add_argument('--no-ssl', default=True, 526 action='store_false', 527 dest='ssl', 528 help='Do not setup an SSL proxy.') 529 harness_group.add_argument('--should_generate_certs', default=False, 530 action='store_true', 531 help='Use OpenSSL to generate certificate files for requested hosts.') 532 harness_group.add_argument('--no-admin-check', default=True, 533 action='store_false', 534 dest='admin_check', 535 help='Do not check if administrator access is needed.') 536 harness_group.add_argument('--scramble_images', default=False, 537 action='store_true', 538 dest='scramble_images', 539 help='Scramble image responses.') 540 harness_group.add_argument('--rules_path', default=None, 541 action='store', 542 help='Path of file containing Python rules.') 543 harness_group.add_argument('--allowed_rule_imports', default='rules', 544 action='store', 545 help='A comma-separate list of allowed rule imports, or \'*\' to allow' 546 ' all packages. Defaults to %(default)s.') 547 return arg_parser 548 549 550def main(): 551 arg_parser = GetParser() 552 options = arg_parser.parse_args() 553 options = OptionsWrapper(options, arg_parser) 554 555 if options.server: 556 options.replay_filename = None 557 elif options.replay_filename is None: 558 arg_parser.error('Must specify a replay_file') 559 return replay(options, options.replay_filename) 560 561 562if __name__ == '__main__': 563 sys.exit(main()) 564