1# Copyright 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Start and stop Web Page Replay.""" 6 7import atexit 8import logging 9import os 10import re 11import signal 12import subprocess 13import sys 14import tempfile 15import urllib 16 17from telemetry.core import exceptions 18from telemetry.core import util 19from telemetry.internal import forwarders 20 21_REPLAY_DIR = os.path.join( 22 util.GetTelemetryThirdPartyDir(), 'webpagereplay') 23 24 25class ReplayError(Exception): 26 """Catch-all exception for the module.""" 27 pass 28 29 30class ReplayNotFoundError(ReplayError): 31 def __init__(self, label, path): 32 super(ReplayNotFoundError, self).__init__() 33 self.args = (label, path) 34 35 def __str__(self): 36 label, path = self.args 37 return 'Path does not exist for %s: %s' % (label, path) 38 39 40class ReplayNotStartedError(ReplayError): 41 pass 42 43 44class ReplayServer(object): 45 """Start and Stop Web Page Replay. 46 47 Web Page Replay is a proxy that can record and "replay" web pages with 48 simulated network characteristics -- without having to edit the pages 49 by hand. With WPR, tests can use "real" web content, and catch 50 performance issues that may result from introducing network delays and 51 bandwidth throttling. 52 53 Example: 54 with ReplayServer(archive_path): 55 self.NavigateToURL(start_url) 56 self.WaitUntil(...) 57 """ 58 59 def __init__(self, archive_path, replay_host, http_port, https_port, dns_port, 60 replay_options): 61 """Initialize ReplayServer. 62 63 Args: 64 archive_path: a path to a specific WPR archive (required). 65 replay_host: the hostname to serve traffic. 66 http_port: an integer port on which to serve HTTP traffic. May be zero 67 to let the OS choose an available port. 68 https_port: an integer port on which to serve HTTPS traffic. May be zero 69 to let the OS choose an available port. 70 dns_port: an integer port on which to serve DNS traffic. May be zero 71 to let the OS choose an available port. If None DNS forwarding is 72 disabled. 73 replay_options: an iterable of options strings to forward to replay.py. 74 """ 75 self.archive_path = archive_path 76 self._replay_host = replay_host 77 self._use_dns_server = dns_port is not None 78 self._started_ports = {} # a dict such as {'http': 80, 'https': 443} 79 80 # A temporary path for storing stdout & stderr of the webpagereplay 81 # subprocess. 82 self._temp_log_file_path = None 83 84 replay_py = os.path.join(_REPLAY_DIR, 'replay.py') 85 self._cmd_line = self._GetCommandLine( 86 replay_py, self._replay_host, http_port, https_port, dns_port, 87 replay_options, archive_path) 88 89 if '--record' in replay_options: 90 self._CheckPath('archive directory', os.path.dirname(self.archive_path)) 91 elif not os.path.exists(self.archive_path): 92 self._CheckPath('archive file', self.archive_path) 93 self._CheckPath('replay script', replay_py) 94 95 self.replay_process = None 96 97 @staticmethod 98 def _GetCommandLine(replay_py, host_ip, http_port, https_port, dns_port, 99 replay_options, archive_path): 100 """Set WPR command-line options. Can be overridden if needed.""" 101 cmd_line = [sys.executable, replay_py] 102 cmd_line.extend([ 103 '--host=%s' % host_ip, 104 '--port=%s' % http_port, 105 '--ssl_port=%s' % https_port 106 ]) 107 if dns_port is not None: 108 # Note that if --host is not '127.0.0.1', Replay will override the local 109 # DNS nameserver settings to point to the replay-started DNS server. 110 cmd_line.append('--dns_port=%s' % dns_port) 111 else: 112 cmd_line.append('--no-dns_forwarding') 113 cmd_line.extend([ 114 '--use_closest_match', 115 '--log_level=warning' 116 ]) 117 cmd_line.extend(replay_options) 118 cmd_line.append(archive_path) 119 return cmd_line 120 121 def _CheckPath(self, label, path): 122 if not os.path.exists(path): 123 raise ReplayNotFoundError(label, path) 124 125 def _OpenLogFile(self): 126 """Opens the log file for writing.""" 127 log_dir = os.path.dirname(self._temp_log_file_path) 128 if not os.path.exists(log_dir): 129 os.makedirs(log_dir) 130 return open(self._temp_log_file_path, 'w') 131 132 def _LogLines(self): 133 """Yields the log lines.""" 134 if not os.path.isfile(self._temp_log_file_path): 135 return 136 with open(self._temp_log_file_path) as f: 137 for line in f: 138 yield line 139 140 def _IsStarted(self): 141 """Returns true if the server is up and running.""" 142 if self.replay_process.poll() is not None: 143 # The process terminated. 144 return False 145 146 def HasIncompleteStartedPorts(): 147 return ('http' not in self._started_ports or 148 'https' not in self._started_ports or 149 (self._use_dns_server and 'dns' not in self._started_ports)) 150 if HasIncompleteStartedPorts(): 151 self._started_ports = self._ParseLogFilePorts(self._LogLines()) 152 if HasIncompleteStartedPorts(): 153 return False 154 try: 155 # HTTPS may require SNI (which urllib does not speak), so only check 156 # that HTTP responds. 157 return 200 == self._UrlOpen('web-page-replay-generate-200').getcode() 158 except IOError: 159 return False 160 161 @staticmethod 162 def _ParseLogFilePorts(log_lines): 163 """Returns the ports on which replay listens as reported in its log file. 164 165 Only matches HTTP, HTTPS, and DNS. One call may return only some 166 of the ports depending on what has been written to the log file. 167 168 Example log lines: 169 2014-09-03 17:04:27,978 WARNING HTTP server started on 127.0.0.1:51673 170 2014-09-03 17:04:27,978 WARNING HTTPS server started on 127.0.0.1:35270 171 172 Returns: 173 a dict with ports available in log_lines. For example, 174 {} # no ports found 175 {'http': 1234, 'https': 2345, 'dns': 3456} 176 """ 177 ports = {} 178 port_re = re.compile( 179 r'.*?(?P<protocol>HTTP|HTTPS|DNS)' 180 r' server started on ' 181 r'(?P<host>[^:]*):' 182 r'(?P<port>\d+)') 183 for line in log_lines: 184 m = port_re.match(line.strip()) 185 if m: 186 protocol = m.group('protocol').lower() 187 ports[protocol] = int(m.group('port')) 188 return ports 189 190 def StartServer(self): 191 """Start Web Page Replay and verify that it started. 192 193 Returns: 194 A forwarders.PortSet(http, https, dns) tuple; with dns None if unused. 195 Raises: 196 ReplayNotStartedError: if Replay start-up fails. 197 """ 198 is_posix = sys.platform.startswith('linux') or sys.platform == 'darwin' 199 logging.debug('Starting Web-Page-Replay: %s', self._cmd_line) 200 self._CreateTempLogFilePath() 201 with self._OpenLogFile() as log_fh: 202 self.replay_process = subprocess.Popen( 203 self._cmd_line, stdout=log_fh, stderr=subprocess.STDOUT, 204 preexec_fn=(_ResetInterruptHandler if is_posix else None)) 205 try: 206 util.WaitFor(self._IsStarted, 30) 207 atexit.register(self.StopServer) 208 return forwarders.PortSet( 209 self._started_ports['http'], 210 self._started_ports['https'], 211 self._started_ports.get('dns'), # None if unused 212 ) 213 except exceptions.TimeoutException: 214 raise ReplayNotStartedError( 215 'Web Page Replay failed to start. Log output:\n%s' % 216 ''.join(self._LogLines())) 217 218 def StopServer(self): 219 """Stop Web Page Replay.""" 220 if self._IsStarted(): 221 try: 222 self._StopReplayProcess() 223 finally: 224 # TODO(rnephew): Upload logs to google storage. crbug.com/525787 225 self._CleanUpTempLogFilePath() 226 else: 227 logging.warning('Attempting to stop WPR server that is not running.') 228 229 def _StopReplayProcess(self): 230 if not self.replay_process: 231 return 232 233 logging.debug('Trying to stop Web-Page-Replay gracefully') 234 try: 235 if self._started_ports: 236 self._UrlOpen('web-page-replay-command-exit').close() 237 except IOError: 238 # IOError is possible because the server might exit without response. 239 pass 240 241 try: 242 util.WaitFor(lambda: self.replay_process.poll() is not None, 10) 243 except exceptions.TimeoutException: 244 try: 245 # Use a SIGINT so that it can do graceful cleanup. 246 self.replay_process.send_signal(signal.SIGINT) 247 except: # pylint: disable=bare-except 248 # On Windows, we are left with no other option than terminate(). 249 is_primary_nameserver_changed_by_replay = ( 250 self._use_dns_server and self._replay_host == '127.0.0.1') 251 if is_primary_nameserver_changed_by_replay: 252 # Replay changes the DNS nameserver configuration so that DNS 253 # requests are resolved by replay's own DNS server. It resolves 254 # all DNS requests to it own IP address to it can server the 255 # HTTP and HTTPS requests. 256 # If the replay host is not '127.0.0.1', then replay skips the 257 # nameserver change because it assumes a different mechanism 258 # will be used to route DNS requests to replay's DNS server. 259 logging.warning( 260 'Unable to stop Web-Page-Replay gracefully.\n' 261 'Replay changed the DNS nameserver configuration to make replay ' 262 'the primary nameserver. That might not be restored!') 263 try: 264 self.replay_process.terminate() 265 except: # pylint: disable=bare-except 266 pass 267 self.replay_process.wait() 268 269 def _CreateTempLogFilePath(self): 270 assert self._temp_log_file_path is None 271 handle, self._temp_log_file_path = tempfile.mkstemp() 272 os.close(handle) 273 274 def _CleanUpTempLogFilePath(self): 275 assert self._temp_log_file_path 276 if logging.getLogger('').isEnabledFor(logging.INFO): 277 with open(self._temp_log_file_path, 'r') as f: 278 wpr_log_content = '\n'.join([ 279 '************************** WPR LOG *****************************', 280 f.read(), 281 '************************** END OF WPR LOG **********************']) 282 logging.debug(wpr_log_content) 283 os.remove(self._temp_log_file_path) 284 self._temp_log_file_path = None 285 286 def __enter__(self): 287 """Add support for with-statement.""" 288 self.StartServer() 289 return self 290 291 def __exit__(self, unused_exc_type, unused_exc_val, unused_exc_tb): 292 """Add support for with-statement.""" 293 self.StopServer() 294 295 def _UrlOpen(self, url_path, protocol='http'): 296 """Open a Replay URL. 297 298 For matching requests in the archive, Replay relies on the "Host:" header. 299 For Replay command URLs, the "Host:" header is not needed. 300 301 Args: 302 url_path: WPR server request path. 303 protocol: 'http' or 'https' 304 Returns: 305 a file-like object from urllib.urlopen 306 """ 307 url = '%s://%s:%s/%s' % ( 308 protocol, self._replay_host, self._started_ports[protocol], url_path) 309 return urllib.urlopen(url, proxies={}) 310 311def _ResetInterruptHandler(): 312 """Reset the interrupt handler back to the default. 313 314 The replay process is stopped gracefully by making an HTTP request 315 ('web-page-replay-command-exit'). The graceful exit is important for 316 restoring the DNS configuration. If the HTTP request fails, the fallback 317 is to send SIGINT to the process. 318 319 On posix system, running this function before starting replay fixes a 320 bug that shows up when Telemetry is run as a background command from a 321 script. https://crbug.com/254572. 322 323 Background: Signal masks on Linux are inherited from parent 324 processes. If anything invoking us accidentally masks SIGINT 325 (e.g. by putting a process in the background from a shell script), 326 sending a SIGINT to the child will fail to terminate it. 327 """ 328 signal.signal(signal.SIGINT, signal.SIG_DFL) 329