1# Copyright 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Start and stop Web Page Replay.""" 6 7from telemetry.internal.util import atexit_with_log 8import logging 9import os 10import re 11import signal 12import subprocess 13import sys 14import tempfile 15import urllib 16 17from telemetry.core import util 18from telemetry.internal import forwarders 19 20import py_utils 21 22_REPLAY_DIR = os.path.join( 23 util.GetTelemetryThirdPartyDir(), 'web-page-replay') 24 25 26class ReplayError(Exception): 27 """Catch-all exception for the module.""" 28 pass 29 30 31class ReplayNotFoundError(ReplayError): 32 def __init__(self, label, path): 33 super(ReplayNotFoundError, self).__init__() 34 self.args = (label, path) 35 36 def __str__(self): 37 label, path = self.args 38 return 'Path does not exist for %s: %s' % (label, path) 39 40 41class ReplayNotStartedError(ReplayError): 42 pass 43 44 45class ReplayServer(object): 46 """Start and Stop Web Page Replay. 47 48 Web Page Replay is a proxy that can record and "replay" web pages with 49 simulated network characteristics -- without having to edit the pages 50 by hand. With WPR, tests can use "real" web content, and catch 51 performance issues that may result from introducing network delays and 52 bandwidth throttling. 53 54 Example: 55 with ReplayServer(archive_path): 56 self.NavigateToURL(start_url) 57 self.WaitUntil(...) 58 """ 59 60 def __init__(self, archive_path, replay_host, http_port, https_port, dns_port, 61 replay_options): 62 """Initialize ReplayServer. 63 64 Args: 65 archive_path: a path to a specific WPR archive (required). 66 replay_host: the hostname to serve traffic. 67 http_port: an integer port on which to serve HTTP traffic. May be zero 68 to let the OS choose an available port. 69 https_port: an integer port on which to serve HTTPS traffic. May be zero 70 to let the OS choose an available port. 71 dns_port: an integer port on which to serve DNS traffic. May be zero 72 to let the OS choose an available port. If None DNS forwarding is 73 disabled. 74 replay_options: an iterable of options strings to forward to replay.py. 75 """ 76 self.archive_path = archive_path 77 self._replay_host = replay_host 78 self._use_dns_server = dns_port is not None 79 self._started_ports = {} # a dict such as {'http': 80, 'https': 443} 80 81 # A temporary path for storing stdout & stderr of the webpagereplay 82 # subprocess. 83 self._temp_log_file_path = None 84 85 replay_py = os.path.join(_REPLAY_DIR, 'replay.py') 86 self._cmd_line = self._GetCommandLine( 87 replay_py, self._replay_host, http_port, https_port, dns_port, 88 replay_options, archive_path) 89 90 if '--record' in replay_options: 91 self._CheckPath('archive directory', os.path.dirname(self.archive_path)) 92 elif not os.path.exists(self.archive_path): 93 self._CheckPath('archive file', self.archive_path) 94 self._CheckPath('replay script', replay_py) 95 96 self.replay_process = None 97 98 @staticmethod 99 def _GetLoggingLevel(log_level=None): 100 return { 101 logging.DEBUG: 'debug', 102 logging.INFO: 'info', 103 logging.WARNING: 'warning', 104 logging.ERROR: 'error', 105 logging.CRITICAL: 'critical', 106 }[log_level or logging.getLogger().level] 107 108 @staticmethod 109 def _GetCommandLine(replay_py, host_ip, http_port, https_port, dns_port, 110 replay_options, archive_path, log_level=None): 111 """Set WPR command-line options. Can be overridden if needed.""" 112 cmd_line = [sys.executable, replay_py] 113 cmd_line.extend([ 114 '--host=%s' % host_ip, 115 '--port=%s' % http_port, 116 '--ssl_port=%s' % https_port 117 ]) 118 if dns_port is not None: 119 # Note that if --host is not '127.0.0.1', Replay will override the local 120 # DNS nameserver settings to point to the replay-started DNS server. 121 cmd_line.append('--dns_port=%s' % dns_port) 122 else: 123 cmd_line.append('--no-dns_forwarding') 124 cmd_line.extend([ 125 '--use_closest_match', 126 '--log_level=%s' % ReplayServer._GetLoggingLevel(log_level) 127 ]) 128 cmd_line.extend(replay_options) 129 cmd_line.append(archive_path) 130 return cmd_line 131 132 def _CheckPath(self, label, path): 133 if not os.path.exists(path): 134 raise ReplayNotFoundError(label, path) 135 136 def _OpenLogFile(self): 137 """Opens the log file for writing.""" 138 log_dir = os.path.dirname(self._temp_log_file_path) 139 if not os.path.exists(log_dir): 140 os.makedirs(log_dir) 141 return open(self._temp_log_file_path, 'w') 142 143 def _LogLines(self): 144 """Yields the log lines.""" 145 if not os.path.isfile(self._temp_log_file_path): 146 return 147 with open(self._temp_log_file_path) as f: 148 for line in f: 149 yield line 150 151 def _IsStarted(self): 152 """Returns true if the server is up and running.""" 153 if self.replay_process.poll() is not None: 154 # The process terminated. 155 return False 156 157 def HasIncompleteStartedPorts(): 158 return ('http' not in self._started_ports or 159 'https' not in self._started_ports or 160 (self._use_dns_server and 'dns' not in self._started_ports)) 161 162 if HasIncompleteStartedPorts(): 163 self._started_ports = self._ParseLogFilePorts(self._LogLines()) 164 if HasIncompleteStartedPorts(): 165 return False 166 try: 167 # HTTPS may require SNI (which urllib does not speak), so only check 168 # that HTTP responds. 169 return 200 == self._UrlOpen('web-page-replay-generate-200').getcode() 170 except IOError: 171 return False 172 173 @staticmethod 174 def _ParseLogFilePorts(log_lines): 175 """Returns the ports on which replay listens as reported in its log file. 176 177 Only matches HTTP, HTTPS, and DNS. One call may return only some 178 of the ports depending on what has been written to the log file. 179 180 Example log lines: 181 2014-09-03 17:04:27,978 WARNING HTTP server started on 127.0.0.1:51673 182 2014-09-03 17:04:27,978 WARNING HTTPS server started on 127.0.0.1:35270 183 184 Returns: 185 a dict with ports available in log_lines. For example, 186 {} # no ports found 187 {'http': 1234, 'https': 2345, 'dns': 3456} 188 """ 189 ports = {} 190 port_re = re.compile( 191 r'.*?(?P<protocol>HTTP|HTTPS|DNS)' 192 r' server started on ' 193 r'(?P<host>[^:]*):' 194 r'(?P<port>\d+)') 195 for line in log_lines: 196 m = port_re.match(line.strip()) 197 if m: 198 protocol = m.group('protocol').lower() 199 ports[protocol] = int(m.group('port')) 200 return ports 201 202 def StartServer(self): 203 """Start Web Page Replay and verify that it started. 204 205 Returns: 206 A forwarders.PortSet(http, https, dns) tuple; with dns None if unused. 207 Raises: 208 ReplayNotStartedError: if Replay start-up fails. 209 """ 210 is_posix = sys.platform.startswith('linux') or sys.platform == 'darwin' 211 logging.info('Starting Web-Page-Replay: %s', self._cmd_line) 212 self._CreateTempLogFilePath() 213 with open(self._temp_log_file_path, 'w') as log_fh: 214 self.replay_process = subprocess.Popen( 215 self._cmd_line, stdout=log_fh, stderr=subprocess.STDOUT, 216 preexec_fn=(_ResetInterruptHandler if is_posix else None)) 217 try: 218 py_utils.WaitFor(self._IsStarted, 30) 219 logging.info('WPR ports: %s' % self._started_ports) 220 atexit_with_log.Register(self.StopServer) 221 return forwarders.PortSet( 222 self._started_ports['http'], 223 self._started_ports['https'], 224 self._started_ports.get('dns'), # None if unused 225 ) 226 except py_utils.TimeoutException: 227 raise ReplayNotStartedError( 228 'Web Page Replay failed to start. Log output:\n%s' % 229 ''.join(self._LogLines())) 230 231 def StopServer(self): 232 """Stop Web Page Replay.""" 233 if self._IsStarted(): 234 try: 235 self._StopReplayProcess() 236 finally: 237 # TODO(rnephew): Upload logs to google storage. crbug.com/525787 238 self._CleanUpTempLogFilePath() 239 240 def _StopReplayProcess(self): 241 if not self.replay_process: 242 return 243 244 logging.debug('Trying to stop Web-Page-Replay gracefully') 245 try: 246 if self._started_ports: 247 self._UrlOpen('web-page-replay-command-exit').close() 248 except IOError: 249 # IOError is possible because the server might exit without response. 250 pass 251 252 try: 253 py_utils.WaitFor(lambda: self.replay_process.poll() is not None, 10) 254 except py_utils.TimeoutException: 255 try: 256 # Use a SIGINT so that it can do graceful cleanup. 257 self.replay_process.send_signal(signal.SIGINT) 258 except: # pylint: disable=bare-except 259 # On Windows, we are left with no other option than terminate(). 260 is_primary_nameserver_changed_by_replay = ( 261 self._use_dns_server and self._replay_host == '127.0.0.1') 262 if is_primary_nameserver_changed_by_replay: 263 # Replay changes the DNS nameserver configuration so that DNS 264 # requests are resolved by replay's own DNS server. It resolves 265 # all DNS requests to it own IP address to it can server the 266 # HTTP and HTTPS requests. 267 # If the replay host is not '127.0.0.1', then replay skips the 268 # nameserver change because it assumes a different mechanism 269 # will be used to route DNS requests to replay's DNS server. 270 logging.warning( 271 'Unable to stop Web-Page-Replay gracefully.\n' 272 'Replay changed the DNS nameserver configuration to make replay ' 273 'the primary nameserver. That might not be restored!') 274 try: 275 self.replay_process.terminate() 276 except: # pylint: disable=bare-except 277 pass 278 self.replay_process.wait() 279 280 def _CreateTempLogFilePath(self): 281 assert self._temp_log_file_path is None 282 handle, self._temp_log_file_path = tempfile.mkstemp() 283 os.close(handle) 284 285 def _CleanUpTempLogFilePath(self): 286 assert self._temp_log_file_path 287 if logging.getLogger('').isEnabledFor(logging.DEBUG): 288 with open(self._temp_log_file_path, 'r') as f: 289 wpr_log_content = '\n'.join([ 290 '************************** WPR LOG *****************************', 291 f.read(), 292 '************************** END OF WPR LOG **********************']) 293 logging.debug(wpr_log_content) 294 os.remove(self._temp_log_file_path) 295 self._temp_log_file_path = None 296 297 def __enter__(self): 298 """Add support for with-statement.""" 299 self.StartServer() 300 return self 301 302 def __exit__(self, unused_exc_type, unused_exc_val, unused_exc_tb): 303 """Add support for with-statement.""" 304 self.StopServer() 305 306 def _UrlOpen(self, url_path, protocol='http'): 307 """Open a Replay URL. 308 309 For matching requests in the archive, Replay relies on the "Host:" header. 310 For Replay command URLs, the "Host:" header is not needed. 311 312 Args: 313 url_path: WPR server request path. 314 protocol: 'http' or 'https' 315 Returns: 316 a file-like object from urllib.urlopen 317 """ 318 url = '%s://%s:%s/%s' % ( 319 protocol, self._replay_host, self._started_ports[protocol], url_path) 320 return urllib.urlopen(url, proxies={}) 321 322def _ResetInterruptHandler(): 323 """Reset the interrupt handler back to the default. 324 325 The replay process is stopped gracefully by making an HTTP request 326 ('web-page-replay-command-exit'). The graceful exit is important for 327 restoring the DNS configuration. If the HTTP request fails, the fallback 328 is to send SIGINT to the process. 329 330 On posix system, running this function before starting replay fixes a 331 bug that shows up when Telemetry is run as a background command from a 332 script. https://crbug.com/254572. 333 334 Background: Signal masks on Linux are inherited from parent 335 processes. If anything invoking us accidentally masks SIGINT 336 (e.g. by putting a process in the background from a shell script), 337 sending a SIGINT to the child will fail to terminate it. 338 """ 339 signal.signal(signal.SIGINT, signal.SIG_DFL) 340