1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Archives or replays webpages and creates SKPs in a Google Storage location.
7
8To archive webpages and store SKP files (archives should be rarely updated):
9
10cd skia
11python tools/skp/webpages_playback.py --data_store=gs://rmistry --record \
12--page_sets=all --skia_tools=/home/default/trunk/out/Debug/ \
13--browser_executable=/tmp/chromium/out/Release/chrome
14
15The above command uses Google Storage bucket 'rmistry' to download needed files.
16
17To replay archived webpages and re-generate SKP files (should be run whenever
18SkPicture.PICTURE_VERSION changes):
19
20cd skia
21python tools/skp/webpages_playback.py --data_store=gs://rmistry \
22--page_sets=all --skia_tools=/home/default/trunk/out/Debug/ \
23--browser_executable=/tmp/chromium/out/Release/chrome
24
25
26Specify the --page_sets flag (default value is 'all') to pick a list of which
27webpages should be archived and/or replayed. Eg:
28
29--page_sets=tools/skp/page_sets/skia_yahooanswers_desktop.py,\
30tools/skp/page_sets/skia_googlecalendar_nexus10.py
31
32The --browser_executable flag should point to the browser binary you want to use
33to capture archives and/or capture SKP files. Majority of the time it should be
34a newly built chrome binary.
35
36The --data_store flag controls where the needed artifacts, such as
37credential files, are downloaded from. It also controls where the
38generated artifacts, such as recorded webpages and resulting skp renderings,
39are uploaded to. URLs with scheme 'gs://' use Google Storage. Otherwise
40use local filesystem.
41
42The --upload=True flag means generated artifacts will be
43uploaded or copied to the location specified by --data_store. (default value is
44False if not specified).
45
46The --non-interactive flag controls whether the script will prompt the user
47(default value is False if not specified).
48
49The --skia_tools flag if specified will allow this script to run
50debugger, render_pictures, and render_pdfs on the captured
51SKP(s). The tools are run after all SKPs are succesfully captured to make sure
52they can be added to the buildbots with no breakages.
53"""
54
55import glob
56import optparse
57import os
58import posixpath
59import shutil
60import subprocess
61import sys
62import tempfile
63import time
64import traceback
65
66
67ROOT_PLAYBACK_DIR_NAME = 'playback'
68SKPICTURES_DIR_NAME = 'skps'
69
70GS_PREFIX = 'gs://'
71
72PARTNERS_GS_BUCKET = 'gs://chrome-partner-telemetry'
73
74# Local archive and SKP directories.
75LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR = os.path.join(
76    os.path.abspath(os.path.dirname(__file__)), 'page_sets', 'data')
77TMP_SKP_DIR = tempfile.mkdtemp()
78
79# Location of the credentials.json file and the string that represents missing
80# passwords.
81CREDENTIALS_FILE_PATH = os.path.join(
82    os.path.abspath(os.path.dirname(__file__)), 'page_sets', 'data',
83    'credentials.json'
84)
85
86# Name of the SKP benchmark
87SKP_BENCHMARK = 'skpicture_printer'
88
89# The max base name length of Skp files.
90MAX_SKP_BASE_NAME_LEN = 31
91
92# Dictionary of device to platform prefixes for SKP files.
93DEVICE_TO_PLATFORM_PREFIX = {
94    'desktop': 'desk',
95    'galaxynexus': 'mobi',
96    'nexus10': 'tabl'
97}
98
99# How many times the record_wpr binary should be retried.
100RETRY_RECORD_WPR_COUNT = 5
101# How many times the run_benchmark binary should be retried.
102RETRY_RUN_MEASUREMENT_COUNT = 5
103
104# Location of the credentials.json file in Google Storage.
105CREDENTIALS_GS_PATH = 'playback/credentials/credentials.json'
106
107X11_DISPLAY = os.getenv('DISPLAY', ':0')
108
109# Path to Chromium's page sets.
110CHROMIUM_PAGE_SETS_PATH = os.path.join('tools', 'perf', 'page_sets')
111
112# Dictionary of supported Chromium page sets to their file prefixes.
113CHROMIUM_PAGE_SETS_TO_PREFIX = {
114    'key_mobile_sites_smooth.py': 'keymobi',
115    'top_25_smooth.py': 'top25desk',
116}
117
118
119def remove_prefix(s, prefix):
120  if s.startswith(prefix):
121    return s[len(prefix):]
122  return s
123
124
125class SkPicturePlayback(object):
126  """Class that archives or replays webpages and creates SKPs."""
127
128  def __init__(self, parse_options):
129    """Constructs a SkPicturePlayback BuildStep instance."""
130    assert parse_options.browser_executable, 'Must specify --browser_executable'
131    self._browser_executable = parse_options.browser_executable
132    self._browser_args = '--disable-setuid-sandbox'
133    if parse_options.browser_extra_args:
134      self._browser_args = '%s %s' % (
135          self._browser_args, parse_options.browser_extra_args)
136
137    self._chrome_page_sets_path = os.path.join(parse_options.chrome_src_path,
138                                               CHROMIUM_PAGE_SETS_PATH)
139    self._all_page_sets_specified = parse_options.page_sets == 'all'
140    self._page_sets = self._ParsePageSets(parse_options.page_sets)
141
142    self._record = parse_options.record
143    self._skia_tools = parse_options.skia_tools
144    self._non_interactive = parse_options.non_interactive
145    self._upload = parse_options.upload
146    self._skp_prefix = parse_options.skp_prefix
147    data_store_location = parse_options.data_store
148    if data_store_location.startswith(GS_PREFIX):
149      self.gs = GoogleStorageDataStore(data_store_location)
150    else:
151      self.gs = LocalFileSystemDataStore(data_store_location)
152    self._upload_to_partner_bucket = parse_options.upload_to_partner_bucket
153    self._alternate_upload_dir = parse_options.alternate_upload_dir
154    self._telemetry_binaries_dir = os.path.join(parse_options.chrome_src_path,
155                                                'tools', 'perf')
156    self._catapult_dir = os.path.join(parse_options.chrome_src_path,
157                                      'third_party', 'catapult')
158
159    self._local_skp_dir = os.path.join(
160        parse_options.output_dir, ROOT_PLAYBACK_DIR_NAME, SKPICTURES_DIR_NAME)
161    self._local_record_webpages_archive_dir = os.path.join(
162        parse_options.output_dir, ROOT_PLAYBACK_DIR_NAME, 'webpages_archive')
163
164    # List of SKP files generated by this script.
165    self._skp_files = []
166
167  def _ParsePageSets(self, page_sets):
168    if not page_sets:
169      raise ValueError('Must specify at least one page_set!')
170    elif self._all_page_sets_specified:
171      # Get everything from the page_sets directory.
172      page_sets_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)),
173                                   'page_sets')
174      ps = [os.path.join(page_sets_dir, page_set)
175            for page_set in os.listdir(page_sets_dir)
176            if not os.path.isdir(os.path.join(page_sets_dir, page_set)) and
177               page_set.endswith('.py')]
178      chromium_ps = [
179          os.path.join(self._chrome_page_sets_path, cr_page_set)
180          for cr_page_set in CHROMIUM_PAGE_SETS_TO_PREFIX]
181      ps.extend(chromium_ps)
182    elif '*' in page_sets:
183      # Explode and return the glob.
184      ps = glob.glob(page_sets)
185    else:
186      ps = page_sets.split(',')
187    ps.sort()
188    return ps
189
190  def _IsChromiumPageSet(self, page_set):
191    """Returns true if the specified page set is a Chromium page set."""
192    return page_set.startswith(self._chrome_page_sets_path)
193
194  def Run(self):
195    """Run the SkPicturePlayback BuildStep."""
196
197    # Download the credentials file if it was not previously downloaded.
198    if not os.path.isfile(CREDENTIALS_FILE_PATH):
199      # Download the credentials.json file from Google Storage.
200      self.gs.download_file(CREDENTIALS_GS_PATH, CREDENTIALS_FILE_PATH)
201
202    if not os.path.isfile(CREDENTIALS_FILE_PATH):
203      print """\n\nCould not locate credentials file in the storage.
204      Please create a %s file that contains:
205      {
206        "google": {
207          "username": "google_testing_account_username",
208          "password": "google_testing_account_password"
209        },
210        "facebook": {
211          "username": "facebook_testing_account_username",
212          "password": "facebook_testing_account_password"
213        }
214      }\n\n""" % CREDENTIALS_FILE_PATH
215      raw_input("Please press a key when you are ready to proceed...")
216
217    # Delete any left over data files in the data directory.
218    for archive_file in glob.glob(
219        os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, 'skia_*')):
220      os.remove(archive_file)
221
222    # Create the required local storage directories.
223    self._CreateLocalStorageDirs()
224
225    # Start the timer.
226    start_time = time.time()
227
228    # Loop through all page_sets.
229    for page_set in self._page_sets:
230
231      page_set_basename = os.path.basename(page_set).split('.')[0]
232      page_set_json_name = page_set_basename + '.json'
233      wpr_data_file = page_set.split(os.path.sep)[-1].split('.')[0] + '_000.wpr'
234      page_set_dir = os.path.dirname(page_set)
235
236      if self._IsChromiumPageSet(page_set):
237        print 'Using Chromium\'s captured archives for Chromium\'s page sets.'
238      elif self._record:
239        # Create an archive of the specified webpages if '--record=True' is
240        # specified.
241        record_wpr_cmd = (
242          'PYTHONPATH=%s:%s:$PYTHONPATH' % (page_set_dir, self._catapult_dir),
243          'DISPLAY=%s' % X11_DISPLAY,
244          os.path.join(self._telemetry_binaries_dir, 'record_wpr'),
245          '--extra-browser-args="%s"' % self._browser_args,
246          '--browser=exact',
247          '--browser-executable=%s' % self._browser_executable,
248          '%s_page_set' % page_set_basename,
249          '--page-set-base-dir=%s' % page_set_dir
250        )
251        for _ in range(RETRY_RECORD_WPR_COUNT):
252          try:
253            subprocess.check_call(' '.join(record_wpr_cmd), shell=True)
254
255            # Move over the created archive into the local webpages archive
256            # directory.
257            shutil.move(
258              os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, wpr_data_file),
259              self._local_record_webpages_archive_dir)
260            shutil.move(
261              os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR,
262                           page_set_json_name),
263              self._local_record_webpages_archive_dir)
264
265            # Break out of the retry loop since there were no errors.
266            break
267          except Exception:
268            # There was a failure continue with the loop.
269            traceback.print_exc()
270        else:
271          # If we get here then record_wpr did not succeed and thus did not
272          # break out of the loop.
273          raise Exception('record_wpr failed for page_set: %s' % page_set)
274
275      else:
276        # Get the webpages archive so that it can be replayed.
277        self._DownloadWebpagesArchive(wpr_data_file, page_set_json_name)
278
279      run_benchmark_cmd = (
280          'PYTHONPATH=%s:%s:$PYTHONPATH' % (page_set_dir, self._catapult_dir),
281          'DISPLAY=%s' % X11_DISPLAY,
282          'timeout', '1800',
283          os.path.join(self._telemetry_binaries_dir, 'run_benchmark'),
284          '--extra-browser-args="%s"' % self._browser_args,
285          '--browser=exact',
286          '--browser-executable=%s' % self._browser_executable,
287          SKP_BENCHMARK,
288          '--page-set-name=%s' % page_set_basename,
289          '--page-set-base-dir=%s' % page_set_dir,
290          '--skp-outdir=%s' % TMP_SKP_DIR,
291          '--also-run-disabled-tests'
292      )
293
294      for _ in range(RETRY_RUN_MEASUREMENT_COUNT):
295        try:
296          print '\n\n=======Capturing SKP of %s=======\n\n' % page_set
297          subprocess.check_call(' '.join(run_benchmark_cmd), shell=True)
298        except subprocess.CalledProcessError:
299          # skpicture_printer sometimes fails with AssertionError but the
300          # captured SKP is still valid. This is a known issue.
301          pass
302
303        # Rename generated SKP files into more descriptive names.
304        try:
305          self._RenameSkpFiles(page_set)
306          # Break out of the retry loop since there were no errors.
307          break
308        except Exception:
309          # There was a failure continue with the loop.
310          traceback.print_exc()
311          print '\n\n=======Retrying %s=======\n\n' % page_set
312          time.sleep(10)
313      else:
314        # If we get here then run_benchmark did not succeed and thus did not
315        # break out of the loop.
316        raise Exception('run_benchmark failed for page_set: %s' % page_set)
317
318    print '\n\n=======Capturing SKP files took %s seconds=======\n\n' % (
319        time.time() - start_time)
320
321    if self._skia_tools:
322      render_pictures_cmd = [
323          os.path.join(self._skia_tools, 'render_pictures'),
324          '-r', self._local_skp_dir
325      ]
326      render_pdfs_cmd = [
327          os.path.join(self._skia_tools, 'render_pdfs'),
328          '-r', self._local_skp_dir
329      ]
330
331      for tools_cmd in (render_pictures_cmd, render_pdfs_cmd):
332        print '\n\n=======Running %s=======' % ' '.join(tools_cmd)
333        subprocess.check_call(tools_cmd)
334
335      if not self._non_interactive:
336        print '\n\n=======Running debugger======='
337        os.system('%s %s' % (os.path.join(self._skia_tools, 'debugger'),
338                             self._local_skp_dir))
339
340    print '\n\n'
341
342    if self._upload:
343      print '\n\n=======Uploading to %s=======\n\n' % self.gs.target_type()
344      # Copy the directory structure in the root directory into Google Storage.
345      dest_dir_name = ROOT_PLAYBACK_DIR_NAME
346      if self._alternate_upload_dir:
347        dest_dir_name = self._alternate_upload_dir
348
349      self.gs.upload_dir_contents(
350          self._local_skp_dir, dest_dir=dest_dir_name)
351
352      print '\n\n=======New SKPs have been uploaded to %s =======\n\n' % (
353          posixpath.join(self.gs.target_name(), dest_dir_name,
354                         SKPICTURES_DIR_NAME))
355
356    else:
357      print '\n\n=======Not Uploading to %s=======\n\n' % self.gs.target_type()
358      print 'Generated resources are available in %s\n\n' % (
359          self._local_skp_dir)
360
361    if self._upload_to_partner_bucket:
362      print '\n\n=======Uploading to Partner bucket %s =======\n\n' % (
363          PARTNERS_GS_BUCKET)
364      partner_gs = GoogleStorageDataStore(PARTNERS_GS_BUCKET)
365      partner_gs.delete_path(SKPICTURES_DIR_NAME)
366      print 'Uploading %s to %s' % (self._local_skp_dir, SKPICTURES_DIR_NAME)
367      partner_gs.upload_dir_contents(self._local_skp_dir, SKPICTURES_DIR_NAME)
368      print '\n\n=======New SKPs have been uploaded to %s =======\n\n' % (
369          posixpath.join(partner_gs.target_name(), SKPICTURES_DIR_NAME))
370
371    return 0
372
373  def _GetSkiaSkpFileName(self, page_set):
374    """Returns the SKP file name for Skia page sets."""
375    # /path/to/skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop.py
376    ps_filename = os.path.basename(page_set)
377    # skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop
378    ps_basename, _ = os.path.splitext(ps_filename)
379    # skia_yahooanswers_desktop -> skia, yahooanswers, desktop
380    _, page_name, device = ps_basename.split('_')
381    basename = '%s_%s' % (DEVICE_TO_PLATFORM_PREFIX[device], page_name)
382    return basename[:MAX_SKP_BASE_NAME_LEN] + '.skp'
383
384  def _GetChromiumSkpFileName(self, page_set, site):
385    """Returns the SKP file name for Chromium page sets."""
386    # /path/to/http___mobile_news_sandbox_pt0 -> http___mobile_news_sandbox_pt0
387    _, webpage = os.path.split(site)
388    # http___mobile_news_sandbox_pt0 -> mobile_news_sandbox_pt0
389    for prefix in ('http___', 'https___', 'www_'):
390      if webpage.startswith(prefix):
391        webpage = webpage[len(prefix):]
392    # /path/to/skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop.py
393    ps_filename = os.path.basename(page_set)
394    # http___mobile_news_sandbox -> pagesetprefix_http___mobile_news_sandbox
395    basename = '%s_%s' % (CHROMIUM_PAGE_SETS_TO_PREFIX[ps_filename], webpage)
396    return basename[:MAX_SKP_BASE_NAME_LEN] + '.skp'
397
398  def _RenameSkpFiles(self, page_set):
399    """Rename generated SKP files into more descriptive names.
400
401    Look into the subdirectory of TMP_SKP_DIR and find the most interesting
402    .skp in there to be this page_set's representative .skp.
403    """
404    subdirs = glob.glob(os.path.join(TMP_SKP_DIR, '*'))
405    for site in subdirs:
406      if self._IsChromiumPageSet(page_set):
407        filename = self._GetChromiumSkpFileName(page_set, site)
408      else:
409        filename = self._GetSkiaSkpFileName(page_set)
410      filename = filename.lower()
411
412      if self._skp_prefix:
413        filename = '%s%s' % (self._skp_prefix, filename)
414
415      # We choose the largest .skp as the most likely to be interesting.
416      largest_skp = max(glob.glob(os.path.join(site, '*.skp')),
417                        key=lambda path: os.stat(path).st_size)
418      dest = os.path.join(self._local_skp_dir, filename)
419      print 'Moving', largest_skp, 'to', dest
420      shutil.move(largest_skp, dest)
421      self._skp_files.append(filename)
422      shutil.rmtree(site)
423
424  def _CreateLocalStorageDirs(self):
425    """Creates required local storage directories for this script."""
426    for d in (self._local_record_webpages_archive_dir,
427              self._local_skp_dir):
428      if os.path.exists(d):
429        shutil.rmtree(d)
430      os.makedirs(d)
431
432  def _DownloadWebpagesArchive(self, wpr_data_file, page_set_json_name):
433    """Downloads the webpages archive and its required page set from GS."""
434    wpr_source = posixpath.join(ROOT_PLAYBACK_DIR_NAME, 'webpages_archive',
435                                wpr_data_file)
436    page_set_source = posixpath.join(ROOT_PLAYBACK_DIR_NAME,
437                                     'webpages_archive',
438                                     page_set_json_name)
439    gs = self.gs
440    if (gs.does_storage_object_exist(wpr_source) and
441        gs.does_storage_object_exist(page_set_source)):
442      gs.download_file(wpr_source,
443                       os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR,
444                                    wpr_data_file))
445      gs.download_file(page_set_source,
446                       os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR,
447                                    page_set_json_name))
448    else:
449      raise Exception('%s and %s do not exist in %s!' % (gs.target_type(),
450        wpr_source, page_set_source))
451
452class DataStore:
453  """An abstract base class for uploading recordings to a data storage.
454  The interface emulates the google storage api."""
455  def target_name(self):
456    raise NotImplementedError()
457  def target_type(self):
458    raise NotImplementedError()
459  def does_storage_object_exist(self, name):
460    raise NotImplementedError()
461  def download_file(self, name, local_path):
462    raise NotImplementedError()
463  def upload_dir_contents(self, source_dir, dest_dir):
464    raise NotImplementedError()
465
466
467class GoogleStorageDataStore(DataStore):
468  def __init__(self, data_store_url):
469    self._url = data_store_url.rstrip('/')
470
471  def target_name(self):
472    return self._url
473
474  def target_type(self):
475    return 'Google Storage'
476
477  def does_storage_object_exist(self, name):
478    try:
479      output = subprocess.check_output([
480          'gsutil', 'ls', '/'.join((self._url, name))])
481    except subprocess.CalledProcessError:
482      return False
483    if len(output.splitlines()) != 1:
484      return False
485    return True
486
487  def delete_path(self, path):
488    subprocess.check_call(['gsutil', 'rm', '-r', '/'.join((self._url, path))])
489
490  def download_file(self, name, local_path):
491    subprocess.check_call([
492        'gsutil', 'cp', '/'.join((self._url, name)), local_path])
493
494  def upload_dir_contents(self, source_dir, dest_dir):
495    subprocess.check_call([
496        'gsutil', 'cp', '-r', source_dir, '/'.join((self._url, dest_dir))])
497
498
499class LocalFileSystemDataStore(DataStore):
500  def __init__(self, data_store_location):
501    self._base_dir = data_store_location
502  def target_name(self):
503    return self._base_dir
504  def target_type(self):
505    return self._base_dir
506  def does_storage_object_exist(self, name):
507    return os.path.isfile(os.path.join(self._base_dir, name))
508  def delete_path(self, path):
509    shutil.rmtree(path)
510  def download_file(self, name, local_path):
511    shutil.copyfile(os.path.join(self._base_dir, name), local_path)
512  def upload_dir_contents(self, source_dir, dest_dir):
513    def copytree(source_dir, dest_dir):
514      if not os.path.exists(dest_dir):
515        os.makedirs(dest_dir)
516      for item in os.listdir(source_dir):
517        source = os.path.join(source_dir, item)
518        dest = os.path.join(dest_dir, item)
519        if os.path.isdir(source):
520          copytree(source, dest)
521        else:
522          shutil.copy2(source, dest)
523    copytree(source_dir, os.path.join(self._base_dir, dest_dir))
524
525if '__main__' == __name__:
526  option_parser = optparse.OptionParser()
527  option_parser.add_option(
528      '', '--page_sets',
529      help='Specifies the page sets to use to archive. Supports globs.',
530      default='all')
531  option_parser.add_option(
532      '', '--record', action='store_true',
533      help='Specifies whether a new website archive should be created.',
534      default=False)
535  option_parser.add_option(
536      '', '--skia_tools',
537      help=('Path to compiled Skia executable tools. '
538            'render_pictures/render_pdfs is run on the set '
539            'after all SKPs are captured. If the script is run without '
540            '--non-interactive then the debugger is also run at the end. Debug '
541            'builds are recommended because they seem to catch more failures '
542            'than Release builds.'),
543      default=None)
544  option_parser.add_option(
545      '', '--upload', action='store_true',
546      help=('Uploads to Google Storage or copies to local filesystem storage '
547            ' if this is True.'),
548      default=False)
549  option_parser.add_option(
550      '', '--upload_to_partner_bucket', action='store_true',
551      help=('Uploads SKPs to the chrome-partner-telemetry Google Storage '
552            'bucket if true.'),
553      default=False)
554  option_parser.add_option(
555      '', '--data_store',
556    help=('The location of the file storage to use to download and upload '
557          'files. Can be \'gs://<bucket>\' for Google Storage, or '
558          'a directory for local filesystem storage'),
559      default='gs://skia-skps')
560  option_parser.add_option(
561      '', '--alternate_upload_dir',
562      help= ('Uploads to a different directory in Google Storage or local '
563             'storage if this flag is specified'),
564      default=None)
565  option_parser.add_option(
566      '', '--output_dir',
567      help=('Temporary directory where SKPs and webpage archives will be '
568            'outputted to.'),
569      default=tempfile.gettempdir())
570  option_parser.add_option(
571      '', '--browser_executable',
572      help='The exact browser executable to run.',
573      default=None)
574  option_parser.add_option(
575      '', '--browser_extra_args',
576      help='Additional arguments to pass to the browser.',
577      default=None)
578  option_parser.add_option(
579      '', '--chrome_src_path',
580      help='Path to the chromium src directory.',
581      default=None)
582  option_parser.add_option(
583      '', '--non-interactive', action='store_true',
584      help='Runs the script without any prompts. If this flag is specified and '
585           '--skia_tools is specified then the debugger is not run.',
586      default=False)
587  option_parser.add_option(
588      '', '--skp_prefix',
589      help='Prefix to add to the names of generated SKPs.',
590      default=None)
591  options, unused_args = option_parser.parse_args()
592
593  playback = SkPicturePlayback(options)
594  sys.exit(playback.Run())
595