1# Copyright 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Start and stop Web Page Replay."""
6
7import atexit
8import logging
9import os
10import re
11import signal
12import subprocess
13import sys
14import tempfile
15import urllib
16
17from telemetry.core import exceptions
18from telemetry.core import util
19from telemetry.internal import forwarders
20
21_REPLAY_DIR = os.path.join(
22    util.GetTelemetryThirdPartyDir(), 'webpagereplay')
23
24
25class ReplayError(Exception):
26  """Catch-all exception for the module."""
27  pass
28
29
30class ReplayNotFoundError(ReplayError):
31  def __init__(self, label, path):
32    super(ReplayNotFoundError, self).__init__()
33    self.args = (label, path)
34
35  def __str__(self):
36    label, path = self.args
37    return 'Path does not exist for %s: %s' % (label, path)
38
39
40class ReplayNotStartedError(ReplayError):
41  pass
42
43
44class ReplayServer(object):
45  """Start and Stop Web Page Replay.
46
47  Web Page Replay is a proxy that can record and "replay" web pages with
48  simulated network characteristics -- without having to edit the pages
49  by hand. With WPR, tests can use "real" web content, and catch
50  performance issues that may result from introducing network delays and
51  bandwidth throttling.
52
53  Example:
54     with ReplayServer(archive_path):
55       self.NavigateToURL(start_url)
56       self.WaitUntil(...)
57  """
58
59  def __init__(self, archive_path, replay_host, http_port, https_port, dns_port,
60               replay_options):
61    """Initialize ReplayServer.
62
63    Args:
64      archive_path: a path to a specific WPR archive (required).
65      replay_host: the hostname to serve traffic.
66      http_port: an integer port on which to serve HTTP traffic. May be zero
67          to let the OS choose an available port.
68      https_port: an integer port on which to serve HTTPS traffic. May be zero
69          to let the OS choose an available port.
70      dns_port: an integer port on which to serve DNS traffic. May be zero
71          to let the OS choose an available port. If None DNS forwarding is
72          disabled.
73      replay_options: an iterable of options strings to forward to replay.py.
74    """
75    self.archive_path = archive_path
76    self._replay_host = replay_host
77    self._use_dns_server = dns_port is not None
78    self._started_ports = {}  # a dict such as {'http': 80, 'https': 443}
79
80    # A temporary path for storing stdout & stderr of the webpagereplay
81    # subprocess.
82    self._temp_log_file_path = None
83
84    replay_py = os.path.join(_REPLAY_DIR, 'replay.py')
85    self._cmd_line = self._GetCommandLine(
86        replay_py, self._replay_host, http_port, https_port, dns_port,
87        replay_options, archive_path)
88
89    if '--record' in replay_options:
90      self._CheckPath('archive directory', os.path.dirname(self.archive_path))
91    elif not os.path.exists(self.archive_path):
92      self._CheckPath('archive file', self.archive_path)
93    self._CheckPath('replay script', replay_py)
94
95    self.replay_process = None
96
97  @staticmethod
98  def _GetCommandLine(replay_py, host_ip, http_port, https_port, dns_port,
99                      replay_options, archive_path):
100    """Set WPR command-line options. Can be overridden if needed."""
101    cmd_line = [sys.executable, replay_py]
102    cmd_line.extend([
103        '--host=%s' % host_ip,
104        '--port=%s' % http_port,
105        '--ssl_port=%s' % https_port
106        ])
107    if dns_port is not None:
108      # Note that if --host is not '127.0.0.1', Replay will override the local
109      # DNS nameserver settings to point to the replay-started DNS server.
110      cmd_line.append('--dns_port=%s' % dns_port)
111    else:
112      cmd_line.append('--no-dns_forwarding')
113    cmd_line.extend([
114        '--use_closest_match',
115        '--log_level=warning'
116        ])
117    cmd_line.extend(replay_options)
118    cmd_line.append(archive_path)
119    return cmd_line
120
121  def _CheckPath(self, label, path):
122    if not os.path.exists(path):
123      raise ReplayNotFoundError(label, path)
124
125  def _OpenLogFile(self):
126    """Opens the log file for writing."""
127    log_dir = os.path.dirname(self._temp_log_file_path)
128    if not os.path.exists(log_dir):
129      os.makedirs(log_dir)
130    return open(self._temp_log_file_path, 'w')
131
132  def _LogLines(self):
133    """Yields the log lines."""
134    if not os.path.isfile(self._temp_log_file_path):
135      return
136    with open(self._temp_log_file_path) as f:
137      for line in f:
138        yield line
139
140  def _IsStarted(self):
141    """Returns true if the server is up and running."""
142    if self.replay_process.poll() is not None:
143      # The process terminated.
144      return False
145
146    def HasIncompleteStartedPorts():
147      return ('http' not in self._started_ports or
148              'https' not in self._started_ports or
149              (self._use_dns_server and 'dns' not in self._started_ports))
150    if HasIncompleteStartedPorts():
151      self._started_ports = self._ParseLogFilePorts(self._LogLines())
152    if HasIncompleteStartedPorts():
153      return False
154    try:
155      # HTTPS may require SNI (which urllib does not speak), so only check
156      # that HTTP responds.
157      return 200 == self._UrlOpen('web-page-replay-generate-200').getcode()
158    except IOError:
159      return False
160
161  @staticmethod
162  def _ParseLogFilePorts(log_lines):
163    """Returns the ports on which replay listens as reported in its log file.
164
165    Only matches HTTP, HTTPS, and DNS. One call may return only some
166    of the ports depending on what has been written to the log file.
167
168    Example log lines:
169        2014-09-03 17:04:27,978 WARNING HTTP server started on 127.0.0.1:51673
170        2014-09-03 17:04:27,978 WARNING HTTPS server started on 127.0.0.1:35270
171
172    Returns:
173      a dict with ports available in log_lines. For example,
174         {}  # no ports found
175         {'http': 1234, 'https': 2345, 'dns': 3456}
176    """
177    ports = {}
178    port_re = re.compile(
179        r'.*?(?P<protocol>HTTP|HTTPS|DNS)'
180        r' server started on '
181        r'(?P<host>[^:]*):'
182        r'(?P<port>\d+)')
183    for line in log_lines:
184      m = port_re.match(line.strip())
185      if m:
186        protocol = m.group('protocol').lower()
187        ports[protocol] = int(m.group('port'))
188    return ports
189
190  def StartServer(self):
191    """Start Web Page Replay and verify that it started.
192
193    Returns:
194      A forwarders.PortSet(http, https, dns) tuple; with dns None if unused.
195    Raises:
196      ReplayNotStartedError: if Replay start-up fails.
197    """
198    is_posix = sys.platform.startswith('linux') or sys.platform == 'darwin'
199    logging.debug('Starting Web-Page-Replay: %s', self._cmd_line)
200    self._CreateTempLogFilePath()
201    with self._OpenLogFile() as log_fh:
202      self.replay_process = subprocess.Popen(
203          self._cmd_line, stdout=log_fh, stderr=subprocess.STDOUT,
204          preexec_fn=(_ResetInterruptHandler if is_posix else None))
205    try:
206      util.WaitFor(self._IsStarted, 30)
207      atexit.register(self.StopServer)
208      return forwarders.PortSet(
209          self._started_ports['http'],
210          self._started_ports['https'],
211          self._started_ports.get('dns'),  # None if unused
212          )
213    except exceptions.TimeoutException:
214      raise ReplayNotStartedError(
215          'Web Page Replay failed to start. Log output:\n%s' %
216          ''.join(self._LogLines()))
217
218  def StopServer(self):
219    """Stop Web Page Replay."""
220    if self._IsStarted():
221      try:
222        self._StopReplayProcess()
223      finally:
224        # TODO(rnephew): Upload logs to google storage. crbug.com/525787
225        self._CleanUpTempLogFilePath()
226    else:
227      logging.warning('Attempting to stop WPR server that is not running.')
228
229  def _StopReplayProcess(self):
230    if not self.replay_process:
231      return
232
233    logging.debug('Trying to stop Web-Page-Replay gracefully')
234    try:
235      if self._started_ports:
236        self._UrlOpen('web-page-replay-command-exit').close()
237    except IOError:
238      # IOError is possible because the server might exit without response.
239      pass
240
241    try:
242      util.WaitFor(lambda: self.replay_process.poll() is not None, 10)
243    except exceptions.TimeoutException:
244      try:
245        # Use a SIGINT so that it can do graceful cleanup.
246        self.replay_process.send_signal(signal.SIGINT)
247      except:  # pylint: disable=bare-except
248        # On Windows, we are left with no other option than terminate().
249        is_primary_nameserver_changed_by_replay = (
250            self._use_dns_server and self._replay_host == '127.0.0.1')
251        if is_primary_nameserver_changed_by_replay:
252          # Replay changes the DNS nameserver configuration so that DNS
253          # requests are resolved by replay's own DNS server. It resolves
254          # all DNS requests to it own IP address to it can server the
255          # HTTP and HTTPS requests.
256          # If the replay host is not '127.0.0.1', then replay skips the
257          # nameserver change because it assumes a different mechanism
258          # will be used to route DNS requests to replay's DNS server.
259          logging.warning(
260              'Unable to stop Web-Page-Replay gracefully.\n'
261              'Replay changed the DNS nameserver configuration to make replay '
262              'the primary nameserver. That might not be restored!')
263        try:
264          self.replay_process.terminate()
265        except:  # pylint: disable=bare-except
266          pass
267      self.replay_process.wait()
268
269  def _CreateTempLogFilePath(self):
270    assert self._temp_log_file_path is None
271    handle, self._temp_log_file_path = tempfile.mkstemp()
272    os.close(handle)
273
274  def _CleanUpTempLogFilePath(self):
275    assert self._temp_log_file_path
276    if logging.getLogger('').isEnabledFor(logging.INFO):
277      with open(self._temp_log_file_path, 'r') as f:
278        wpr_log_content = '\n'.join([
279            '************************** WPR LOG *****************************',
280            f.read(),
281            '************************** END OF WPR LOG **********************'])
282      logging.debug(wpr_log_content)
283    os.remove(self._temp_log_file_path)
284    self._temp_log_file_path = None
285
286  def __enter__(self):
287    """Add support for with-statement."""
288    self.StartServer()
289    return self
290
291  def __exit__(self, unused_exc_type, unused_exc_val, unused_exc_tb):
292    """Add support for with-statement."""
293    self.StopServer()
294
295  def _UrlOpen(self, url_path, protocol='http'):
296    """Open a Replay URL.
297
298    For matching requests in the archive, Replay relies on the "Host:" header.
299    For Replay command URLs, the "Host:" header is not needed.
300
301    Args:
302      url_path: WPR server request path.
303      protocol: 'http' or 'https'
304    Returns:
305      a file-like object from urllib.urlopen
306    """
307    url = '%s://%s:%s/%s' % (
308        protocol, self._replay_host, self._started_ports[protocol], url_path)
309    return urllib.urlopen(url, proxies={})
310
311def _ResetInterruptHandler():
312  """Reset the interrupt handler back to the default.
313
314  The replay process is stopped gracefully by making an HTTP request
315  ('web-page-replay-command-exit'). The graceful exit is important for
316  restoring the DNS configuration. If the HTTP request fails, the fallback
317  is to send SIGINT to the process.
318
319  On posix system, running this function before starting replay fixes a
320  bug that shows up when Telemetry is run as a background command from a
321  script. https://crbug.com/254572.
322
323  Background: Signal masks on Linux are inherited from parent
324  processes. If anything invoking us accidentally masks SIGINT
325  (e.g. by putting a process in the background from a shell script),
326  sending a SIGINT to the child will fail to terminate it.
327  """
328  signal.signal(signal.SIGINT, signal.SIG_DFL)
329