1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Archives or replays webpages and creates SKPs in a Google Storage location.
7
8To archive webpages and store SKP files (archives should be rarely updated):
9
10cd skia
11python tools/skp/webpages_playback.py --data_store=gs://rmistry --record \
12--page_sets=all --skia_tools=/home/default/trunk/out/Debug/ \
13--browser_executable=/tmp/chromium/out/Release/chrome
14
15The above command uses Google Storage bucket 'rmistry' to download needed files.
16
17To replay archived webpages and re-generate SKP files (should be run whenever
18SkPicture.PICTURE_VERSION changes):
19
20cd skia
21python tools/skp/webpages_playback.py --data_store=gs://rmistry \
22--page_sets=all --skia_tools=/home/default/trunk/out/Debug/ \
23--browser_executable=/tmp/chromium/out/Release/chrome
24
25
26Specify the --page_sets flag (default value is 'all') to pick a list of which
27webpages should be archived and/or replayed. Eg:
28
29--page_sets=tools/skp/page_sets/skia_yahooanswers_desktop.py,\
30tools/skp/page_sets/skia_googlecalendar_nexus10.py
31
32The --browser_executable flag should point to the browser binary you want to use
33to capture archives and/or capture SKP files. Majority of the time it should be
34a newly built chrome binary.
35
36The --data_store flag controls where the needed artifacts, such as
37credential files, are downloaded from. It also controls where the
38generated artifacts, such as recorded webpages and resulting skp renderings,
39are uploaded to. URLs with scheme 'gs://' use Google Storage. Otherwise
40use local filesystem.
41
42The --upload=True flag means generated artifacts will be
43uploaded or copied to the location specified by --data_store. (default value is
44False if not specified).
45
46The --non-interactive flag controls whether the script will prompt the user
47(default value is False if not specified).
48
49The --skia_tools flag if specified will allow this script to run
50debugger, render_pictures, and render_pdfs on the captured
51SKP(s). The tools are run after all SKPs are succesfully captured to make sure
52they can be added to the buildbots with no breakages.
53"""
54
55import glob
56import optparse
57import os
58import posixpath
59import shutil
60import subprocess
61import sys
62import tempfile
63import time
64import traceback
65
66sys.path.insert(0, os.getcwd())
67
68from common.py.utils import gs_utils
69from common.py.utils import shell_utils
70
71ROOT_PLAYBACK_DIR_NAME = 'playback'
72SKPICTURES_DIR_NAME = 'skps'
73
74
75# Local archive and SKP directories.
76LOCAL_PLAYBACK_ROOT_DIR = os.path.join(
77    tempfile.gettempdir(), ROOT_PLAYBACK_DIR_NAME)
78LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR = os.path.join(
79    os.path.abspath(os.path.dirname(__file__)), 'page_sets', 'data')
80TMP_SKP_DIR = tempfile.mkdtemp()
81
82# Location of the credentials.json file and the string that represents missing
83# passwords.
84CREDENTIALS_FILE_PATH = os.path.join(
85    os.path.abspath(os.path.dirname(__file__)), 'page_sets', 'data',
86    'credentials.json'
87)
88
89# Name of the SKP benchmark
90SKP_BENCHMARK = 'skpicture_printer'
91
92# The max base name length of Skp files.
93MAX_SKP_BASE_NAME_LEN = 31
94
95# Dictionary of device to platform prefixes for SKP files.
96DEVICE_TO_PLATFORM_PREFIX = {
97    'desktop': 'desk',
98    'galaxynexus': 'mobi',
99    'nexus10': 'tabl'
100}
101
102# How many times the record_wpr binary should be retried.
103RETRY_RECORD_WPR_COUNT = 5
104# How many times the run_benchmark binary should be retried.
105RETRY_RUN_MEASUREMENT_COUNT = 5
106
107# Location of the credentials.json file in Google Storage.
108CREDENTIALS_GS_PATH = '/playback/credentials/credentials.json'
109
110X11_DISPLAY = os.getenv('DISPLAY', ':0')
111
112GS_PREDEFINED_ACL = gs_utils.GSUtils.PredefinedACL.PRIVATE
113GS_FINE_GRAINED_ACL_LIST = [
114  (gs_utils.GSUtils.IdType.GROUP_BY_DOMAIN, 'google.com',
115   gs_utils.GSUtils.Permission.READ),
116]
117
118# Path to Chromium's page sets.
119CHROMIUM_PAGE_SETS_PATH = os.path.join('tools', 'perf', 'page_sets')
120
121# Dictionary of supported Chromium page sets to their file prefixes.
122CHROMIUM_PAGE_SETS_TO_PREFIX = {
123    'key_mobile_sites_smooth.py': 'keymobi',
124    'top_25_smooth.py': 'top25desk',
125}
126
127
128def remove_prefix(s, prefix):
129  if s.startswith(prefix):
130    return s[len(prefix):]
131  return s
132
133
134class SkPicturePlayback(object):
135  """Class that archives or replays webpages and creates SKPs."""
136
137  def __init__(self, parse_options):
138    """Constructs a SkPicturePlayback BuildStep instance."""
139    assert parse_options.browser_executable, 'Must specify --browser_executable'
140    self._browser_executable = parse_options.browser_executable
141    self._browser_args = '--disable-setuid-sandbox'
142    if parse_options.browser_extra_args:
143      self._browser_args = '%s %s' % (
144          self._browser_args, parse_options.browser_extra_args)
145
146    self._chrome_page_sets_path = os.path.join(parse_options.chrome_src_path,
147                                               CHROMIUM_PAGE_SETS_PATH)
148    self._all_page_sets_specified = parse_options.page_sets == 'all'
149    self._page_sets = self._ParsePageSets(parse_options.page_sets)
150
151    self._record = parse_options.record
152    self._skia_tools = parse_options.skia_tools
153    self._non_interactive = parse_options.non_interactive
154    self._upload = parse_options.upload
155    self._skp_prefix = parse_options.skp_prefix
156    data_store_location = parse_options.data_store
157    if data_store_location.startswith(gs_utils.GS_PREFIX):
158      self.gs = GoogleStorageDataStore(data_store_location)
159    else:
160      self.gs = LocalFileSystemDataStore(data_store_location)
161    self._alternate_upload_dir = parse_options.alternate_upload_dir
162    self._telemetry_binaries_dir = os.path.join(parse_options.chrome_src_path,
163                                                'tools', 'perf')
164
165    self._local_skp_dir = os.path.join(
166        parse_options.output_dir, ROOT_PLAYBACK_DIR_NAME, SKPICTURES_DIR_NAME)
167    self._local_record_webpages_archive_dir = os.path.join(
168        parse_options.output_dir, ROOT_PLAYBACK_DIR_NAME, 'webpages_archive')
169
170    # List of SKP files generated by this script.
171    self._skp_files = []
172
173  def _ParsePageSets(self, page_sets):
174    if not page_sets:
175      raise ValueError('Must specify at least one page_set!')
176    elif self._all_page_sets_specified:
177      # Get everything from the page_sets directory.
178      page_sets_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)),
179                                   'page_sets')
180      ps = [os.path.join(page_sets_dir, page_set)
181            for page_set in os.listdir(page_sets_dir)
182            if not os.path.isdir(os.path.join(page_sets_dir, page_set)) and
183               page_set.endswith('.py')]
184      chromium_ps = [
185          os.path.join(self._chrome_page_sets_path, cr_page_set)
186          for cr_page_set in CHROMIUM_PAGE_SETS_TO_PREFIX]
187      ps.extend(chromium_ps)
188    elif '*' in page_sets:
189      # Explode and return the glob.
190      ps = glob.glob(page_sets)
191    else:
192      ps = page_sets.split(',')
193    ps.sort()
194    return ps
195
196  def _IsChromiumPageSet(self, page_set):
197    """Returns true if the specified page set is a Chromium page set."""
198    return page_set.startswith(self._chrome_page_sets_path)
199
200  def Run(self):
201    """Run the SkPicturePlayback BuildStep."""
202
203    # Download the credentials file if it was not previously downloaded.
204    if not os.path.isfile(CREDENTIALS_FILE_PATH):
205      # Download the credentials.json file from Google Storage.
206      self.gs.download_file(CREDENTIALS_GS_PATH, CREDENTIALS_FILE_PATH)
207
208    if not os.path.isfile(CREDENTIALS_FILE_PATH):
209      print """\n\nCould not locate credentials file in the storage.
210      Please create a %s file that contains:
211      {
212        "google": {
213          "username": "google_testing_account_username",
214          "password": "google_testing_account_password"
215        },
216        "facebook": {
217          "username": "facebook_testing_account_username",
218          "password": "facebook_testing_account_password"
219        }
220      }\n\n""" % CREDENTIALS_FILE_PATH
221      raw_input("Please press a key when you are ready to proceed...")
222
223    # Delete any left over data files in the data directory.
224    for archive_file in glob.glob(
225        os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, 'skia_*')):
226      os.remove(archive_file)
227
228    # Delete the local root directory if it already exists.
229    if os.path.exists(LOCAL_PLAYBACK_ROOT_DIR):
230      shutil.rmtree(LOCAL_PLAYBACK_ROOT_DIR)
231
232    # Create the required local storage directories.
233    self._CreateLocalStorageDirs()
234
235    # Start the timer.
236    start_time = time.time()
237
238    # Loop through all page_sets.
239    for page_set in self._page_sets:
240
241      page_set_basename = os.path.basename(page_set).split('.')[0]
242      page_set_json_name = page_set_basename + '.json'
243      wpr_data_file = page_set.split(os.path.sep)[-1].split('.')[0] + '_000.wpr'
244      page_set_dir = os.path.dirname(page_set)
245
246      if self._IsChromiumPageSet(page_set):
247        print 'Using Chromium\'s captured archives for Chromium\'s page sets.'
248      elif self._record:
249        # Create an archive of the specified webpages if '--record=True' is
250        # specified.
251        record_wpr_cmd = (
252          'PYTHONPATH=%s:$PYTHONPATH' % page_set_dir,
253          'DISPLAY=%s' % X11_DISPLAY,
254          os.path.join(self._telemetry_binaries_dir, 'record_wpr'),
255          '--extra-browser-args="%s"' % self._browser_args,
256          '--browser=exact',
257          '--browser-executable=%s' % self._browser_executable,
258          '%s_page_set' % page_set_basename,
259          '--page-set-base-dir=%s' % page_set_dir
260        )
261        for _ in range(RETRY_RECORD_WPR_COUNT):
262          try:
263            shell_utils.run(' '.join(record_wpr_cmd), shell=True)
264
265            # Move over the created archive into the local webpages archive
266            # directory.
267            shutil.move(
268              os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, wpr_data_file),
269              self._local_record_webpages_archive_dir)
270            shutil.move(
271              os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR,
272                           page_set_json_name),
273              self._local_record_webpages_archive_dir)
274
275            # Break out of the retry loop since there were no errors.
276            break
277          except Exception:
278            # There was a failure continue with the loop.
279            traceback.print_exc()
280        else:
281          # If we get here then record_wpr did not succeed and thus did not
282          # break out of the loop.
283          raise Exception('record_wpr failed for page_set: %s' % page_set)
284
285      else:
286        # Get the webpages archive so that it can be replayed.
287        self._DownloadWebpagesArchive(wpr_data_file, page_set_json_name)
288
289      run_benchmark_cmd = (
290          'PYTHONPATH=%s:$PYTHONPATH' % page_set_dir,
291          'DISPLAY=%s' % X11_DISPLAY,
292          'timeout', '300',
293          os.path.join(self._telemetry_binaries_dir, 'run_benchmark'),
294          '--extra-browser-args="%s"' % self._browser_args,
295          '--browser=exact',
296          '--browser-executable=%s' % self._browser_executable,
297          SKP_BENCHMARK,
298          '--page-set-name=%s' % page_set_basename,
299          '--page-set-base-dir=%s' % page_set_dir,
300          '--skp-outdir=%s' % TMP_SKP_DIR,
301          '--also-run-disabled-tests'
302      )
303
304      for _ in range(RETRY_RUN_MEASUREMENT_COUNT):
305        try:
306          print '\n\n=======Capturing SKP of %s=======\n\n' % page_set
307          shell_utils.run(' '.join(run_benchmark_cmd), shell=True)
308        except shell_utils.CommandFailedException:
309          # skpicture_printer sometimes fails with AssertionError but the
310          # captured SKP is still valid. This is a known issue.
311          pass
312
313        # Rename generated SKP files into more descriptive names.
314        try:
315          self._RenameSkpFiles(page_set)
316          # Break out of the retry loop since there were no errors.
317          break
318        except Exception:
319          # There was a failure continue with the loop.
320          traceback.print_exc()
321          print '\n\n=======Retrying %s=======\n\n' % page_set
322          time.sleep(10)
323      else:
324        # If we get here then run_benchmark did not succeed and thus did not
325        # break out of the loop.
326        raise Exception('run_benchmark failed for page_set: %s' % page_set)
327
328    print '\n\n=======Capturing SKP files took %s seconds=======\n\n' % (
329        time.time() - start_time)
330
331    if self._skia_tools:
332      render_pictures_cmd = [
333          os.path.join(self._skia_tools, 'render_pictures'),
334          '-r', self._local_skp_dir
335      ]
336      render_pdfs_cmd = [
337          os.path.join(self._skia_tools, 'render_pdfs'),
338          '-r', self._local_skp_dir
339      ]
340
341      for tools_cmd in (render_pictures_cmd, render_pdfs_cmd):
342        print '\n\n=======Running %s=======' % ' '.join(tools_cmd)
343        proc = subprocess.Popen(tools_cmd)
344        (code, _) = shell_utils.log_process_after_completion(proc, echo=False)
345        if code != 0:
346          raise Exception('%s failed!' % ' '.join(tools_cmd))
347
348      if not self._non_interactive:
349        print '\n\n=======Running debugger======='
350        os.system('%s %s' % (os.path.join(self._skia_tools, 'debugger'),
351                             self._local_skp_dir))
352
353    print '\n\n'
354
355    if self._upload:
356      print '\n\n=======Uploading to %s=======\n\n' % self.gs.target_type()
357      # Copy the directory structure in the root directory into Google Storage.
358      dest_dir_name = ROOT_PLAYBACK_DIR_NAME
359      if self._alternate_upload_dir:
360        dest_dir_name = self._alternate_upload_dir
361
362      self.gs.upload_dir_contents(
363          LOCAL_PLAYBACK_ROOT_DIR, dest_dir=dest_dir_name,
364          upload_if=gs_utils.GSUtils.UploadIf.IF_MODIFIED,
365          predefined_acl=GS_PREDEFINED_ACL,
366          fine_grained_acl_list=GS_FINE_GRAINED_ACL_LIST)
367
368      print '\n\n=======New SKPs have been uploaded to %s =======\n\n' % (
369          posixpath.join(self.gs.target_name(), dest_dir_name,
370                         SKPICTURES_DIR_NAME))
371    else:
372      print '\n\n=======Not Uploading to %s=======\n\n' % self.gs.target_type()
373      print 'Generated resources are available in %s\n\n' % (
374          LOCAL_PLAYBACK_ROOT_DIR)
375
376    return 0
377
378  def _GetSkiaSkpFileName(self, page_set):
379    """Returns the SKP file name for Skia page sets."""
380    # /path/to/skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop.py
381    ps_filename = os.path.basename(page_set)
382    # skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop
383    ps_basename, _ = os.path.splitext(ps_filename)
384    # skia_yahooanswers_desktop -> skia, yahooanswers, desktop
385    _, page_name, device = ps_basename.split('_')
386    basename = '%s_%s' % (DEVICE_TO_PLATFORM_PREFIX[device], page_name)
387    return basename[:MAX_SKP_BASE_NAME_LEN] + '.skp'
388
389  def _GetChromiumSkpFileName(self, page_set, site):
390    """Returns the SKP file name for Chromium page sets."""
391    # /path/to/http___mobile_news_sandbox_pt0 -> http___mobile_news_sandbox_pt0
392    _, webpage = os.path.split(site)
393    # http___mobile_news_sandbox_pt0 -> mobile_news_sandbox_pt0
394    for prefix in ('http___', 'https___', 'www_'):
395      if webpage.startswith(prefix):
396        webpage = webpage[len(prefix):]
397    # /path/to/skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop.py
398    ps_filename = os.path.basename(page_set)
399    # http___mobile_news_sandbox -> pagesetprefix_http___mobile_news_sandbox
400    basename = '%s_%s' % (CHROMIUM_PAGE_SETS_TO_PREFIX[ps_filename], webpage)
401    return basename[:MAX_SKP_BASE_NAME_LEN] + '.skp'
402
403  def _RenameSkpFiles(self, page_set):
404    """Rename generated SKP files into more descriptive names.
405
406    Look into the subdirectory of TMP_SKP_DIR and find the most interesting
407    .skp in there to be this page_set's representative .skp.
408    """
409    subdirs = glob.glob(os.path.join(TMP_SKP_DIR, '*'))
410    for site in subdirs:
411      if self._IsChromiumPageSet(page_set):
412        filename = self._GetChromiumSkpFileName(page_set, site)
413      else:
414        filename = self._GetSkiaSkpFileName(page_set)
415      filename = filename.lower()
416
417      if self._skp_prefix:
418        filename = '%s%s' % (self._skp_prefix, filename)
419
420      # We choose the largest .skp as the most likely to be interesting.
421      largest_skp = max(glob.glob(os.path.join(site, '*.skp')),
422                        key=lambda path: os.stat(path).st_size)
423      dest = os.path.join(self._local_skp_dir, filename)
424      print 'Moving', largest_skp, 'to', dest
425      shutil.move(largest_skp, dest)
426      self._skp_files.append(filename)
427      shutil.rmtree(site)
428
429  def _CreateLocalStorageDirs(self):
430    """Creates required local storage directories for this script."""
431    for d in (self._local_record_webpages_archive_dir,
432              self._local_skp_dir):
433      if os.path.exists(d):
434        shutil.rmtree(d)
435      os.makedirs(d)
436
437  def _DownloadWebpagesArchive(self, wpr_data_file, page_set_json_name):
438    """Downloads the webpages archive and its required page set from GS."""
439    wpr_source = posixpath.join(ROOT_PLAYBACK_DIR_NAME, 'webpages_archive',
440                                wpr_data_file)
441    page_set_source = posixpath.join(ROOT_PLAYBACK_DIR_NAME,
442                                     'webpages_archive',
443                                     page_set_json_name)
444    gs = self.gs
445    if (gs.does_storage_object_exist(wpr_source) and
446        gs.does_storage_object_exist(page_set_source)):
447      gs.download_file(wpr_source,
448                       os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR,
449                                    wpr_data_file))
450      gs.download_file(page_set_source,
451                       os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR,
452                                    page_set_json_name))
453    else:
454      raise Exception('%s and %s do not exist in %s!' % (gs.target_type(),
455        wpr_source, page_set_source))
456
457class DataStore:
458  """An abstract base class for uploading recordings to a data storage.
459  The interface emulates the google storage api."""
460  def target_name(self):
461    raise NotImplementedError()
462  def target_type(self):
463    raise NotImplementedError()
464  def does_storage_object_exist(self, *args):
465    raise NotImplementedError()
466  def download_file(self, *args):
467    raise NotImplementedError()
468  def upload_dir_contents(self, source_dir, **kwargs):
469    raise NotImplementedError()
470
471class GoogleStorageDataStore(DataStore):
472  def __init__(self, data_store_url):
473    self._data_store_url = data_store_url
474    self._bucket = remove_prefix(self._data_store_url.lstrip(),
475                                 gs_utils.GS_PREFIX)
476    self.gs = gs_utils.GSUtils()
477  def target_name(self):
478    return self._data_store_url
479  def target_type(self):
480    return 'Google Storage'
481  def does_storage_object_exist(self, *args):
482    return self.gs.does_storage_object_exist(self._bucket, *args)
483  def download_file(self, *args):
484    self.gs.download_file(self._bucket, *args)
485  def upload_dir_contents(self, source_dir, **kwargs):
486    self.gs.upload_dir_contents(source_dir, self._bucket, **kwargs)
487
488class LocalFileSystemDataStore(DataStore):
489  def __init__(self, data_store_location):
490    self._base_dir = data_store_location
491  def target_name(self):
492    return self._base_dir
493  def target_type(self):
494    return self._base_dir
495  def does_storage_object_exist(self, name, *args):
496    return os.path.isfile(os.path.join(self._base_dir, name))
497  def download_file(self, name, local_path, *args):
498    shutil.copyfile(os.path.join(self._base_dir, name), local_path)
499  def upload_dir_contents(self, source_dir, dest_dir, **kwargs):
500    def copytree(source_dir, dest_dir):
501      if not os.path.exists(dest_dir):
502        os.makedirs(dest_dir)
503      for item in os.listdir(source_dir):
504        source = os.path.join(source_dir, item)
505        dest = os.path.join(dest_dir, item)
506        if os.path.isdir(source):
507          copytree(source, dest)
508        else:
509          shutil.copy2(source, dest)
510    copytree(source_dir, os.path.join(self._base_dir, dest_dir))
511
512if '__main__' == __name__:
513  option_parser = optparse.OptionParser()
514  option_parser.add_option(
515      '', '--page_sets',
516      help='Specifies the page sets to use to archive. Supports globs.',
517      default='all')
518  option_parser.add_option(
519      '', '--record', action='store_true',
520      help='Specifies whether a new website archive should be created.',
521      default=False)
522  option_parser.add_option(
523      '', '--skia_tools',
524      help=('Path to compiled Skia executable tools. '
525            'render_pictures/render_pdfs is run on the set '
526            'after all SKPs are captured. If the script is run without '
527            '--non-interactive then the debugger is also run at the end. Debug '
528            'builds are recommended because they seem to catch more failures '
529            'than Release builds.'),
530      default=None)
531  option_parser.add_option(
532      '', '--upload', action='store_true',
533      help=('Uploads to Google Storage or copies to local filesystem storage '
534            ' if this is True.'),
535      default=False)
536  option_parser.add_option(
537      '', '--data_store',
538    help=('The location of the file storage to use to download and upload '
539          'files. Can be \'gs://<bucket>\' for Google Storage, or '
540          'a directory for local filesystem storage'),
541      default='gs://chromium-skia-gm')
542  option_parser.add_option(
543      '', '--alternate_upload_dir',
544      help= ('Uploads to a different directory in Google Storage or local '
545             'storage if this flag is specified'),
546      default=None)
547  option_parser.add_option(
548      '', '--output_dir',
549      help=('Temporary directory where SKPs and webpage archives will be '
550            'outputted to.'),
551      default=tempfile.gettempdir())
552  option_parser.add_option(
553      '', '--browser_executable',
554      help='The exact browser executable to run.',
555      default=None)
556  option_parser.add_option(
557      '', '--browser_extra_args',
558      help='Additional arguments to pass to the browser.',
559      default=None)
560  option_parser.add_option(
561      '', '--chrome_src_path',
562      help='Path to the chromium src directory.',
563      default=None)
564  option_parser.add_option(
565      '', '--non-interactive', action='store_true',
566      help='Runs the script without any prompts. If this flag is specified and '
567           '--skia_tools is specified then the debugger is not run.',
568      default=False)
569  option_parser.add_option(
570      '', '--skp_prefix',
571      help='Prefix to add to the names of generated SKPs.',
572      default=None)
573  options, unused_args = option_parser.parse_args()
574
575  playback = SkPicturePlayback(options)
576  sys.exit(playback.Run())
577