1# Copyright 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Start and stop Web Page Replay."""
6
7from telemetry.internal.util import atexit_with_log
8import logging
9import os
10import re
11import signal
12import subprocess
13import sys
14import tempfile
15import urllib
16
17from telemetry.core import util
18from telemetry.internal import forwarders
19
20import py_utils
21
22_REPLAY_DIR = os.path.join(
23    util.GetTelemetryThirdPartyDir(), 'web-page-replay')
24
25
26class ReplayError(Exception):
27  """Catch-all exception for the module."""
28  pass
29
30
31class ReplayNotFoundError(ReplayError):
32  def __init__(self, label, path):
33    super(ReplayNotFoundError, self).__init__()
34    self.args = (label, path)
35
36  def __str__(self):
37    label, path = self.args
38    return 'Path does not exist for %s: %s' % (label, path)
39
40
41class ReplayNotStartedError(ReplayError):
42  pass
43
44
45class ReplayServer(object):
46  """Start and Stop Web Page Replay.
47
48  Web Page Replay is a proxy that can record and "replay" web pages with
49  simulated network characteristics -- without having to edit the pages
50  by hand. With WPR, tests can use "real" web content, and catch
51  performance issues that may result from introducing network delays and
52  bandwidth throttling.
53
54  Example:
55     with ReplayServer(archive_path):
56       self.NavigateToURL(start_url)
57       self.WaitUntil(...)
58  """
59
60  def __init__(self, archive_path, replay_host, http_port, https_port, dns_port,
61               replay_options):
62    """Initialize ReplayServer.
63
64    Args:
65      archive_path: a path to a specific WPR archive (required).
66      replay_host: the hostname to serve traffic.
67      http_port: an integer port on which to serve HTTP traffic. May be zero
68          to let the OS choose an available port.
69      https_port: an integer port on which to serve HTTPS traffic. May be zero
70          to let the OS choose an available port.
71      dns_port: an integer port on which to serve DNS traffic. May be zero
72          to let the OS choose an available port. If None DNS forwarding is
73          disabled.
74      replay_options: an iterable of options strings to forward to replay.py.
75    """
76    self.archive_path = archive_path
77    self._replay_host = replay_host
78    self._use_dns_server = dns_port is not None
79    self._started_ports = {}  # a dict such as {'http': 80, 'https': 443}
80
81    # A temporary path for storing stdout & stderr of the webpagereplay
82    # subprocess.
83    self._temp_log_file_path = None
84
85    replay_py = os.path.join(_REPLAY_DIR, 'replay.py')
86    self._cmd_line = self._GetCommandLine(
87        replay_py, self._replay_host, http_port, https_port, dns_port,
88        replay_options, archive_path)
89
90    if '--record' in replay_options:
91      self._CheckPath('archive directory', os.path.dirname(self.archive_path))
92    elif not os.path.exists(self.archive_path):
93      self._CheckPath('archive file', self.archive_path)
94    self._CheckPath('replay script', replay_py)
95
96    self.replay_process = None
97
98  @staticmethod
99  def _GetLoggingLevel(log_level=None):
100    return {
101      logging.DEBUG: 'debug',
102      logging.INFO: 'info',
103      logging.WARNING: 'warning',
104      logging.ERROR: 'error',
105      logging.CRITICAL: 'critical',
106    }[log_level or logging.getLogger().level]
107
108  @staticmethod
109  def _GetCommandLine(replay_py, host_ip, http_port, https_port, dns_port,
110                      replay_options, archive_path, log_level=None):
111    """Set WPR command-line options. Can be overridden if needed."""
112    cmd_line = [sys.executable, replay_py]
113    cmd_line.extend([
114        '--host=%s' % host_ip,
115        '--port=%s' % http_port,
116        '--ssl_port=%s' % https_port
117        ])
118    if dns_port is not None:
119      # Note that if --host is not '127.0.0.1', Replay will override the local
120      # DNS nameserver settings to point to the replay-started DNS server.
121      cmd_line.append('--dns_port=%s' % dns_port)
122    else:
123      cmd_line.append('--no-dns_forwarding')
124    cmd_line.extend([
125        '--use_closest_match',
126        '--log_level=%s' % ReplayServer._GetLoggingLevel(log_level)
127        ])
128    cmd_line.extend(replay_options)
129    cmd_line.append(archive_path)
130    return cmd_line
131
132  def _CheckPath(self, label, path):
133    if not os.path.exists(path):
134      raise ReplayNotFoundError(label, path)
135
136  def _OpenLogFile(self):
137    """Opens the log file for writing."""
138    log_dir = os.path.dirname(self._temp_log_file_path)
139    if not os.path.exists(log_dir):
140      os.makedirs(log_dir)
141    return open(self._temp_log_file_path, 'w')
142
143  def _LogLines(self):
144    """Yields the log lines."""
145    if not os.path.isfile(self._temp_log_file_path):
146      return
147    with open(self._temp_log_file_path) as f:
148      for line in f:
149        yield line
150
151  def _IsStarted(self):
152    """Returns true if the server is up and running."""
153    if self.replay_process.poll() is not None:
154      # The process terminated.
155      return False
156
157    def HasIncompleteStartedPorts():
158      return ('http' not in self._started_ports or
159              'https' not in self._started_ports or
160              (self._use_dns_server and 'dns' not in self._started_ports))
161
162    if HasIncompleteStartedPorts():
163      self._started_ports = self._ParseLogFilePorts(self._LogLines())
164    if HasIncompleteStartedPorts():
165      return False
166    try:
167      # HTTPS may require SNI (which urllib does not speak), so only check
168      # that HTTP responds.
169      return 200 == self._UrlOpen('web-page-replay-generate-200').getcode()
170    except IOError:
171      return False
172
173  @staticmethod
174  def _ParseLogFilePorts(log_lines):
175    """Returns the ports on which replay listens as reported in its log file.
176
177    Only matches HTTP, HTTPS, and DNS. One call may return only some
178    of the ports depending on what has been written to the log file.
179
180    Example log lines:
181        2014-09-03 17:04:27,978 WARNING HTTP server started on 127.0.0.1:51673
182        2014-09-03 17:04:27,978 WARNING HTTPS server started on 127.0.0.1:35270
183
184    Returns:
185      a dict with ports available in log_lines. For example,
186         {}  # no ports found
187         {'http': 1234, 'https': 2345, 'dns': 3456}
188    """
189    ports = {}
190    port_re = re.compile(
191        r'.*?(?P<protocol>HTTP|HTTPS|DNS)'
192        r' server started on '
193        r'(?P<host>[^:]*):'
194        r'(?P<port>\d+)')
195    for line in log_lines:
196      m = port_re.match(line.strip())
197      if m:
198        protocol = m.group('protocol').lower()
199        ports[protocol] = int(m.group('port'))
200    return ports
201
202  def StartServer(self):
203    """Start Web Page Replay and verify that it started.
204
205    Returns:
206      A forwarders.PortSet(http, https, dns) tuple; with dns None if unused.
207    Raises:
208      ReplayNotStartedError: if Replay start-up fails.
209    """
210    is_posix = sys.platform.startswith('linux') or sys.platform == 'darwin'
211    logging.info('Starting Web-Page-Replay: %s', self._cmd_line)
212    self._CreateTempLogFilePath()
213    with open(self._temp_log_file_path, 'w') as log_fh:
214      self.replay_process = subprocess.Popen(
215          self._cmd_line, stdout=log_fh, stderr=subprocess.STDOUT,
216          preexec_fn=(_ResetInterruptHandler if is_posix else None))
217    try:
218      py_utils.WaitFor(self._IsStarted, 30)
219      logging.info('WPR ports: %s' % self._started_ports)
220      atexit_with_log.Register(self.StopServer)
221      return forwarders.PortSet(
222          self._started_ports['http'],
223          self._started_ports['https'],
224          self._started_ports.get('dns'),  # None if unused
225          )
226    except py_utils.TimeoutException:
227      raise ReplayNotStartedError(
228          'Web Page Replay failed to start. Log output:\n%s' %
229          ''.join(self._LogLines()))
230
231  def StopServer(self):
232    """Stop Web Page Replay."""
233    if self._IsStarted():
234      try:
235        self._StopReplayProcess()
236      finally:
237        # TODO(rnephew): Upload logs to google storage. crbug.com/525787
238        self._CleanUpTempLogFilePath()
239
240  def _StopReplayProcess(self):
241    if not self.replay_process:
242      return
243
244    logging.debug('Trying to stop Web-Page-Replay gracefully')
245    try:
246      if self._started_ports:
247        self._UrlOpen('web-page-replay-command-exit').close()
248    except IOError:
249      # IOError is possible because the server might exit without response.
250      pass
251
252    try:
253      py_utils.WaitFor(lambda: self.replay_process.poll() is not None, 10)
254    except py_utils.TimeoutException:
255      try:
256        # Use a SIGINT so that it can do graceful cleanup.
257        self.replay_process.send_signal(signal.SIGINT)
258      except:  # pylint: disable=bare-except
259        # On Windows, we are left with no other option than terminate().
260        is_primary_nameserver_changed_by_replay = (
261            self._use_dns_server and self._replay_host == '127.0.0.1')
262        if is_primary_nameserver_changed_by_replay:
263          # Replay changes the DNS nameserver configuration so that DNS
264          # requests are resolved by replay's own DNS server. It resolves
265          # all DNS requests to it own IP address to it can server the
266          # HTTP and HTTPS requests.
267          # If the replay host is not '127.0.0.1', then replay skips the
268          # nameserver change because it assumes a different mechanism
269          # will be used to route DNS requests to replay's DNS server.
270          logging.warning(
271              'Unable to stop Web-Page-Replay gracefully.\n'
272              'Replay changed the DNS nameserver configuration to make replay '
273              'the primary nameserver. That might not be restored!')
274        try:
275          self.replay_process.terminate()
276        except:  # pylint: disable=bare-except
277          pass
278      self.replay_process.wait()
279
280  def _CreateTempLogFilePath(self):
281    assert self._temp_log_file_path is None
282    handle, self._temp_log_file_path = tempfile.mkstemp()
283    os.close(handle)
284
285  def _CleanUpTempLogFilePath(self):
286    assert self._temp_log_file_path
287    if logging.getLogger('').isEnabledFor(logging.DEBUG):
288      with open(self._temp_log_file_path, 'r') as f:
289        wpr_log_content = '\n'.join([
290            '************************** WPR LOG *****************************',
291            f.read(),
292            '************************** END OF WPR LOG **********************'])
293      logging.debug(wpr_log_content)
294    os.remove(self._temp_log_file_path)
295    self._temp_log_file_path = None
296
297  def __enter__(self):
298    """Add support for with-statement."""
299    self.StartServer()
300    return self
301
302  def __exit__(self, unused_exc_type, unused_exc_val, unused_exc_tb):
303    """Add support for with-statement."""
304    self.StopServer()
305
306  def _UrlOpen(self, url_path, protocol='http'):
307    """Open a Replay URL.
308
309    For matching requests in the archive, Replay relies on the "Host:" header.
310    For Replay command URLs, the "Host:" header is not needed.
311
312    Args:
313      url_path: WPR server request path.
314      protocol: 'http' or 'https'
315    Returns:
316      a file-like object from urllib.urlopen
317    """
318    url = '%s://%s:%s/%s' % (
319        protocol, self._replay_host, self._started_ports[protocol], url_path)
320    return urllib.urlopen(url, proxies={})
321
322def _ResetInterruptHandler():
323  """Reset the interrupt handler back to the default.
324
325  The replay process is stopped gracefully by making an HTTP request
326  ('web-page-replay-command-exit'). The graceful exit is important for
327  restoring the DNS configuration. If the HTTP request fails, the fallback
328  is to send SIGINT to the process.
329
330  On posix system, running this function before starting replay fixes a
331  bug that shows up when Telemetry is run as a background command from a
332  script. https://crbug.com/254572.
333
334  Background: Signal masks on Linux are inherited from parent
335  processes. If anything invoking us accidentally masks SIGINT
336  (e.g. by putting a process in the background from a shell script),
337  sending a SIGINT to the child will fail to terminate it.
338  """
339  signal.signal(signal.SIGINT, signal.SIG_DFL)
340