1#!/usr/bin/env python 2# Copyright (c) 2012 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Archives or replays webpages and creates SKPs in a Google Storage location. 7 8To archive webpages and store SKP files (archives should be rarely updated): 9 10cd skia 11python tools/skp/webpages_playback.py --data_store=gs://rmistry --record \ 12--page_sets=all --skia_tools=/home/default/trunk/out/Debug/ \ 13--browser_executable=/tmp/chromium/out/Release/chrome 14 15The above command uses Google Storage bucket 'rmistry' to download needed files. 16 17To replay archived webpages and re-generate SKP files (should be run whenever 18SkPicture.PICTURE_VERSION changes): 19 20cd skia 21python tools/skp/webpages_playback.py --data_store=gs://rmistry \ 22--page_sets=all --skia_tools=/home/default/trunk/out/Debug/ \ 23--browser_executable=/tmp/chromium/out/Release/chrome 24 25 26Specify the --page_sets flag (default value is 'all') to pick a list of which 27webpages should be archived and/or replayed. Eg: 28 29--page_sets=tools/skp/page_sets/skia_yahooanswers_desktop.py,\ 30tools/skp/page_sets/skia_googlecalendar_nexus10.py 31 32The --browser_executable flag should point to the browser binary you want to use 33to capture archives and/or capture SKP files. Majority of the time it should be 34a newly built chrome binary. 35 36The --data_store flag controls where the needed artifacts, such as 37credential files, are downloaded from. It also controls where the 38generated artifacts, such as recorded webpages and resulting skp renderings, 39are uploaded to. URLs with scheme 'gs://' use Google Storage. Otherwise 40use local filesystem. 41 42The --upload=True flag means generated artifacts will be 43uploaded or copied to the location specified by --data_store. (default value is 44False if not specified). 45 46The --non-interactive flag controls whether the script will prompt the user 47(default value is False if not specified). 48 49The --skia_tools flag if specified will allow this script to run 50debugger, render_pictures, and render_pdfs on the captured 51SKP(s). The tools are run after all SKPs are succesfully captured to make sure 52they can be added to the buildbots with no breakages. 53""" 54 55import glob 56import optparse 57import os 58import posixpath 59import shutil 60import subprocess 61import sys 62import tempfile 63import time 64import traceback 65 66 67ROOT_PLAYBACK_DIR_NAME = 'playback' 68SKPICTURES_DIR_NAME = 'skps' 69 70GS_PREFIX = 'gs://' 71 72PARTNERS_GS_BUCKET = 'gs://chrome-partner-telemetry' 73 74# Local archive and SKP directories. 75LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR = os.path.join( 76 os.path.abspath(os.path.dirname(__file__)), 'page_sets', 'data') 77TMP_SKP_DIR = tempfile.mkdtemp() 78 79# Location of the credentials.json file and the string that represents missing 80# passwords. 81CREDENTIALS_FILE_PATH = os.path.join( 82 os.path.abspath(os.path.dirname(__file__)), 'page_sets', 'data', 83 'credentials.json' 84) 85 86# Name of the SKP benchmark 87SKP_BENCHMARK = 'skpicture_printer' 88 89# The max base name length of Skp files. 90MAX_SKP_BASE_NAME_LEN = 31 91 92# Dictionary of device to platform prefixes for SKP files. 93DEVICE_TO_PLATFORM_PREFIX = { 94 'desktop': 'desk', 95 'galaxynexus': 'mobi', 96 'nexus10': 'tabl' 97} 98 99# How many times the record_wpr binary should be retried. 100RETRY_RECORD_WPR_COUNT = 5 101# How many times the run_benchmark binary should be retried. 102RETRY_RUN_MEASUREMENT_COUNT = 5 103 104# Location of the credentials.json file in Google Storage. 105CREDENTIALS_GS_PATH = 'playback/credentials/credentials.json' 106 107X11_DISPLAY = os.getenv('DISPLAY', ':0') 108 109# Path to Chromium's page sets. 110CHROMIUM_PAGE_SETS_PATH = os.path.join('tools', 'perf', 'page_sets') 111 112# Dictionary of supported Chromium page sets to their file prefixes. 113CHROMIUM_PAGE_SETS_TO_PREFIX = { 114 'key_mobile_sites_smooth.py': 'keymobi', 115 'top_25_smooth.py': 'top25desk', 116} 117 118 119def remove_prefix(s, prefix): 120 if s.startswith(prefix): 121 return s[len(prefix):] 122 return s 123 124 125class SkPicturePlayback(object): 126 """Class that archives or replays webpages and creates SKPs.""" 127 128 def __init__(self, parse_options): 129 """Constructs a SkPicturePlayback BuildStep instance.""" 130 assert parse_options.browser_executable, 'Must specify --browser_executable' 131 self._browser_executable = parse_options.browser_executable 132 self._browser_args = '--disable-setuid-sandbox' 133 if parse_options.browser_extra_args: 134 self._browser_args = '%s %s' % ( 135 self._browser_args, parse_options.browser_extra_args) 136 137 self._chrome_page_sets_path = os.path.join(parse_options.chrome_src_path, 138 CHROMIUM_PAGE_SETS_PATH) 139 self._all_page_sets_specified = parse_options.page_sets == 'all' 140 self._page_sets = self._ParsePageSets(parse_options.page_sets) 141 142 self._record = parse_options.record 143 self._skia_tools = parse_options.skia_tools 144 self._non_interactive = parse_options.non_interactive 145 self._upload = parse_options.upload 146 self._skp_prefix = parse_options.skp_prefix 147 data_store_location = parse_options.data_store 148 if data_store_location.startswith(GS_PREFIX): 149 self.gs = GoogleStorageDataStore(data_store_location) 150 else: 151 self.gs = LocalFileSystemDataStore(data_store_location) 152 self._upload_to_partner_bucket = parse_options.upload_to_partner_bucket 153 self._alternate_upload_dir = parse_options.alternate_upload_dir 154 self._telemetry_binaries_dir = os.path.join(parse_options.chrome_src_path, 155 'tools', 'perf') 156 self._catapult_dir = os.path.join(parse_options.chrome_src_path, 157 'third_party', 'catapult') 158 159 self._local_skp_dir = os.path.join( 160 parse_options.output_dir, ROOT_PLAYBACK_DIR_NAME, SKPICTURES_DIR_NAME) 161 self._local_record_webpages_archive_dir = os.path.join( 162 parse_options.output_dir, ROOT_PLAYBACK_DIR_NAME, 'webpages_archive') 163 164 # List of SKP files generated by this script. 165 self._skp_files = [] 166 167 def _ParsePageSets(self, page_sets): 168 if not page_sets: 169 raise ValueError('Must specify at least one page_set!') 170 elif self._all_page_sets_specified: 171 # Get everything from the page_sets directory. 172 page_sets_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 173 'page_sets') 174 ps = [os.path.join(page_sets_dir, page_set) 175 for page_set in os.listdir(page_sets_dir) 176 if not os.path.isdir(os.path.join(page_sets_dir, page_set)) and 177 page_set.endswith('.py')] 178 chromium_ps = [ 179 os.path.join(self._chrome_page_sets_path, cr_page_set) 180 for cr_page_set in CHROMIUM_PAGE_SETS_TO_PREFIX] 181 ps.extend(chromium_ps) 182 elif '*' in page_sets: 183 # Explode and return the glob. 184 ps = glob.glob(page_sets) 185 else: 186 ps = page_sets.split(',') 187 ps.sort() 188 return ps 189 190 def _IsChromiumPageSet(self, page_set): 191 """Returns true if the specified page set is a Chromium page set.""" 192 return page_set.startswith(self._chrome_page_sets_path) 193 194 def Run(self): 195 """Run the SkPicturePlayback BuildStep.""" 196 197 # Download the credentials file if it was not previously downloaded. 198 if not os.path.isfile(CREDENTIALS_FILE_PATH): 199 # Download the credentials.json file from Google Storage. 200 self.gs.download_file(CREDENTIALS_GS_PATH, CREDENTIALS_FILE_PATH) 201 202 if not os.path.isfile(CREDENTIALS_FILE_PATH): 203 print """\n\nCould not locate credentials file in the storage. 204 Please create a %s file that contains: 205 { 206 "google": { 207 "username": "google_testing_account_username", 208 "password": "google_testing_account_password" 209 }, 210 "facebook": { 211 "username": "facebook_testing_account_username", 212 "password": "facebook_testing_account_password" 213 } 214 }\n\n""" % CREDENTIALS_FILE_PATH 215 raw_input("Please press a key when you are ready to proceed...") 216 217 # Delete any left over data files in the data directory. 218 for archive_file in glob.glob( 219 os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, 'skia_*')): 220 os.remove(archive_file) 221 222 # Create the required local storage directories. 223 self._CreateLocalStorageDirs() 224 225 # Start the timer. 226 start_time = time.time() 227 228 # Loop through all page_sets. 229 for page_set in self._page_sets: 230 231 page_set_basename = os.path.basename(page_set).split('.')[0] 232 page_set_json_name = page_set_basename + '.json' 233 wpr_data_file = page_set.split(os.path.sep)[-1].split('.')[0] + '_000.wpr' 234 page_set_dir = os.path.dirname(page_set) 235 236 if self._IsChromiumPageSet(page_set): 237 print 'Using Chromium\'s captured archives for Chromium\'s page sets.' 238 elif self._record: 239 # Create an archive of the specified webpages if '--record=True' is 240 # specified. 241 record_wpr_cmd = ( 242 'PYTHONPATH=%s:%s:$PYTHONPATH' % (page_set_dir, self._catapult_dir), 243 'DISPLAY=%s' % X11_DISPLAY, 244 os.path.join(self._telemetry_binaries_dir, 'record_wpr'), 245 '--extra-browser-args="%s"' % self._browser_args, 246 '--browser=exact', 247 '--browser-executable=%s' % self._browser_executable, 248 '%s_page_set' % page_set_basename, 249 '--page-set-base-dir=%s' % page_set_dir 250 ) 251 for _ in range(RETRY_RECORD_WPR_COUNT): 252 try: 253 subprocess.check_call(' '.join(record_wpr_cmd), shell=True) 254 255 # Move over the created archive into the local webpages archive 256 # directory. 257 shutil.move( 258 os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, wpr_data_file), 259 self._local_record_webpages_archive_dir) 260 shutil.move( 261 os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, 262 page_set_json_name), 263 self._local_record_webpages_archive_dir) 264 265 # Break out of the retry loop since there were no errors. 266 break 267 except Exception: 268 # There was a failure continue with the loop. 269 traceback.print_exc() 270 else: 271 # If we get here then record_wpr did not succeed and thus did not 272 # break out of the loop. 273 raise Exception('record_wpr failed for page_set: %s' % page_set) 274 275 else: 276 # Get the webpages archive so that it can be replayed. 277 self._DownloadWebpagesArchive(wpr_data_file, page_set_json_name) 278 279 run_benchmark_cmd = ( 280 'PYTHONPATH=%s:%s:$PYTHONPATH' % (page_set_dir, self._catapult_dir), 281 'DISPLAY=%s' % X11_DISPLAY, 282 'timeout', '1800', 283 os.path.join(self._telemetry_binaries_dir, 'run_benchmark'), 284 '--extra-browser-args="%s"' % self._browser_args, 285 '--browser=exact', 286 '--browser-executable=%s' % self._browser_executable, 287 SKP_BENCHMARK, 288 '--page-set-name=%s' % page_set_basename, 289 '--page-set-base-dir=%s' % page_set_dir, 290 '--skp-outdir=%s' % TMP_SKP_DIR, 291 '--also-run-disabled-tests' 292 ) 293 294 for _ in range(RETRY_RUN_MEASUREMENT_COUNT): 295 try: 296 print '\n\n=======Capturing SKP of %s=======\n\n' % page_set 297 subprocess.check_call(' '.join(run_benchmark_cmd), shell=True) 298 except subprocess.CalledProcessError: 299 # skpicture_printer sometimes fails with AssertionError but the 300 # captured SKP is still valid. This is a known issue. 301 pass 302 303 # Rename generated SKP files into more descriptive names. 304 try: 305 self._RenameSkpFiles(page_set) 306 # Break out of the retry loop since there were no errors. 307 break 308 except Exception: 309 # There was a failure continue with the loop. 310 traceback.print_exc() 311 print '\n\n=======Retrying %s=======\n\n' % page_set 312 time.sleep(10) 313 else: 314 # If we get here then run_benchmark did not succeed and thus did not 315 # break out of the loop. 316 raise Exception('run_benchmark failed for page_set: %s' % page_set) 317 318 print '\n\n=======Capturing SKP files took %s seconds=======\n\n' % ( 319 time.time() - start_time) 320 321 if self._skia_tools: 322 render_pictures_cmd = [ 323 os.path.join(self._skia_tools, 'render_pictures'), 324 '-r', self._local_skp_dir 325 ] 326 render_pdfs_cmd = [ 327 os.path.join(self._skia_tools, 'render_pdfs'), 328 '-r', self._local_skp_dir 329 ] 330 331 for tools_cmd in (render_pictures_cmd, render_pdfs_cmd): 332 print '\n\n=======Running %s=======' % ' '.join(tools_cmd) 333 subprocess.check_call(tools_cmd) 334 335 if not self._non_interactive: 336 print '\n\n=======Running debugger=======' 337 os.system('%s %s' % (os.path.join(self._skia_tools, 'debugger'), 338 self._local_skp_dir)) 339 340 print '\n\n' 341 342 if self._upload: 343 print '\n\n=======Uploading to %s=======\n\n' % self.gs.target_type() 344 # Copy the directory structure in the root directory into Google Storage. 345 dest_dir_name = ROOT_PLAYBACK_DIR_NAME 346 if self._alternate_upload_dir: 347 dest_dir_name = self._alternate_upload_dir 348 349 self.gs.upload_dir_contents( 350 self._local_skp_dir, dest_dir=dest_dir_name) 351 352 print '\n\n=======New SKPs have been uploaded to %s =======\n\n' % ( 353 posixpath.join(self.gs.target_name(), dest_dir_name, 354 SKPICTURES_DIR_NAME)) 355 356 else: 357 print '\n\n=======Not Uploading to %s=======\n\n' % self.gs.target_type() 358 print 'Generated resources are available in %s\n\n' % ( 359 self._local_skp_dir) 360 361 if self._upload_to_partner_bucket: 362 print '\n\n=======Uploading to Partner bucket %s =======\n\n' % ( 363 PARTNERS_GS_BUCKET) 364 partner_gs = GoogleStorageDataStore(PARTNERS_GS_BUCKET) 365 partner_gs.delete_path(SKPICTURES_DIR_NAME) 366 print 'Uploading %s to %s' % (self._local_skp_dir, SKPICTURES_DIR_NAME) 367 partner_gs.upload_dir_contents(self._local_skp_dir, SKPICTURES_DIR_NAME) 368 print '\n\n=======New SKPs have been uploaded to %s =======\n\n' % ( 369 posixpath.join(partner_gs.target_name(), SKPICTURES_DIR_NAME)) 370 371 return 0 372 373 def _GetSkiaSkpFileName(self, page_set): 374 """Returns the SKP file name for Skia page sets.""" 375 # /path/to/skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop.py 376 ps_filename = os.path.basename(page_set) 377 # skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop 378 ps_basename, _ = os.path.splitext(ps_filename) 379 # skia_yahooanswers_desktop -> skia, yahooanswers, desktop 380 _, page_name, device = ps_basename.split('_') 381 basename = '%s_%s' % (DEVICE_TO_PLATFORM_PREFIX[device], page_name) 382 return basename[:MAX_SKP_BASE_NAME_LEN] + '.skp' 383 384 def _GetChromiumSkpFileName(self, page_set, site): 385 """Returns the SKP file name for Chromium page sets.""" 386 # /path/to/http___mobile_news_sandbox_pt0 -> http___mobile_news_sandbox_pt0 387 _, webpage = os.path.split(site) 388 # http___mobile_news_sandbox_pt0 -> mobile_news_sandbox_pt0 389 for prefix in ('http___', 'https___', 'www_'): 390 if webpage.startswith(prefix): 391 webpage = webpage[len(prefix):] 392 # /path/to/skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop.py 393 ps_filename = os.path.basename(page_set) 394 # http___mobile_news_sandbox -> pagesetprefix_http___mobile_news_sandbox 395 basename = '%s_%s' % (CHROMIUM_PAGE_SETS_TO_PREFIX[ps_filename], webpage) 396 return basename[:MAX_SKP_BASE_NAME_LEN] + '.skp' 397 398 def _RenameSkpFiles(self, page_set): 399 """Rename generated SKP files into more descriptive names. 400 401 Look into the subdirectory of TMP_SKP_DIR and find the most interesting 402 .skp in there to be this page_set's representative .skp. 403 """ 404 subdirs = glob.glob(os.path.join(TMP_SKP_DIR, '*')) 405 for site in subdirs: 406 if self._IsChromiumPageSet(page_set): 407 filename = self._GetChromiumSkpFileName(page_set, site) 408 else: 409 filename = self._GetSkiaSkpFileName(page_set) 410 filename = filename.lower() 411 412 if self._skp_prefix: 413 filename = '%s%s' % (self._skp_prefix, filename) 414 415 # We choose the largest .skp as the most likely to be interesting. 416 largest_skp = max(glob.glob(os.path.join(site, '*.skp')), 417 key=lambda path: os.stat(path).st_size) 418 dest = os.path.join(self._local_skp_dir, filename) 419 print 'Moving', largest_skp, 'to', dest 420 shutil.move(largest_skp, dest) 421 self._skp_files.append(filename) 422 shutil.rmtree(site) 423 424 def _CreateLocalStorageDirs(self): 425 """Creates required local storage directories for this script.""" 426 for d in (self._local_record_webpages_archive_dir, 427 self._local_skp_dir): 428 if os.path.exists(d): 429 shutil.rmtree(d) 430 os.makedirs(d) 431 432 def _DownloadWebpagesArchive(self, wpr_data_file, page_set_json_name): 433 """Downloads the webpages archive and its required page set from GS.""" 434 wpr_source = posixpath.join(ROOT_PLAYBACK_DIR_NAME, 'webpages_archive', 435 wpr_data_file) 436 page_set_source = posixpath.join(ROOT_PLAYBACK_DIR_NAME, 437 'webpages_archive', 438 page_set_json_name) 439 gs = self.gs 440 if (gs.does_storage_object_exist(wpr_source) and 441 gs.does_storage_object_exist(page_set_source)): 442 gs.download_file(wpr_source, 443 os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, 444 wpr_data_file)) 445 gs.download_file(page_set_source, 446 os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, 447 page_set_json_name)) 448 else: 449 raise Exception('%s and %s do not exist in %s!' % (gs.target_type(), 450 wpr_source, page_set_source)) 451 452class DataStore: 453 """An abstract base class for uploading recordings to a data storage. 454 The interface emulates the google storage api.""" 455 def target_name(self): 456 raise NotImplementedError() 457 def target_type(self): 458 raise NotImplementedError() 459 def does_storage_object_exist(self, name): 460 raise NotImplementedError() 461 def download_file(self, name, local_path): 462 raise NotImplementedError() 463 def upload_dir_contents(self, source_dir, dest_dir): 464 raise NotImplementedError() 465 466 467class GoogleStorageDataStore(DataStore): 468 def __init__(self, data_store_url): 469 self._url = data_store_url.rstrip('/') 470 471 def target_name(self): 472 return self._url 473 474 def target_type(self): 475 return 'Google Storage' 476 477 def does_storage_object_exist(self, name): 478 try: 479 output = subprocess.check_output([ 480 'gsutil', 'ls', '/'.join((self._url, name))]) 481 except subprocess.CalledProcessError: 482 return False 483 if len(output.splitlines()) != 1: 484 return False 485 return True 486 487 def delete_path(self, path): 488 subprocess.check_call(['gsutil', 'rm', '-r', '/'.join((self._url, path))]) 489 490 def download_file(self, name, local_path): 491 subprocess.check_call([ 492 'gsutil', 'cp', '/'.join((self._url, name)), local_path]) 493 494 def upload_dir_contents(self, source_dir, dest_dir): 495 subprocess.check_call([ 496 'gsutil', 'cp', '-r', source_dir, '/'.join((self._url, dest_dir))]) 497 498 499class LocalFileSystemDataStore(DataStore): 500 def __init__(self, data_store_location): 501 self._base_dir = data_store_location 502 def target_name(self): 503 return self._base_dir 504 def target_type(self): 505 return self._base_dir 506 def does_storage_object_exist(self, name): 507 return os.path.isfile(os.path.join(self._base_dir, name)) 508 def delete_path(self, path): 509 shutil.rmtree(path) 510 def download_file(self, name, local_path): 511 shutil.copyfile(os.path.join(self._base_dir, name), local_path) 512 def upload_dir_contents(self, source_dir, dest_dir): 513 def copytree(source_dir, dest_dir): 514 if not os.path.exists(dest_dir): 515 os.makedirs(dest_dir) 516 for item in os.listdir(source_dir): 517 source = os.path.join(source_dir, item) 518 dest = os.path.join(dest_dir, item) 519 if os.path.isdir(source): 520 copytree(source, dest) 521 else: 522 shutil.copy2(source, dest) 523 copytree(source_dir, os.path.join(self._base_dir, dest_dir)) 524 525if '__main__' == __name__: 526 option_parser = optparse.OptionParser() 527 option_parser.add_option( 528 '', '--page_sets', 529 help='Specifies the page sets to use to archive. Supports globs.', 530 default='all') 531 option_parser.add_option( 532 '', '--record', action='store_true', 533 help='Specifies whether a new website archive should be created.', 534 default=False) 535 option_parser.add_option( 536 '', '--skia_tools', 537 help=('Path to compiled Skia executable tools. ' 538 'render_pictures/render_pdfs is run on the set ' 539 'after all SKPs are captured. If the script is run without ' 540 '--non-interactive then the debugger is also run at the end. Debug ' 541 'builds are recommended because they seem to catch more failures ' 542 'than Release builds.'), 543 default=None) 544 option_parser.add_option( 545 '', '--upload', action='store_true', 546 help=('Uploads to Google Storage or copies to local filesystem storage ' 547 ' if this is True.'), 548 default=False) 549 option_parser.add_option( 550 '', '--upload_to_partner_bucket', action='store_true', 551 help=('Uploads SKPs to the chrome-partner-telemetry Google Storage ' 552 'bucket if true.'), 553 default=False) 554 option_parser.add_option( 555 '', '--data_store', 556 help=('The location of the file storage to use to download and upload ' 557 'files. Can be \'gs://<bucket>\' for Google Storage, or ' 558 'a directory for local filesystem storage'), 559 default='gs://skia-skps') 560 option_parser.add_option( 561 '', '--alternate_upload_dir', 562 help= ('Uploads to a different directory in Google Storage or local ' 563 'storage if this flag is specified'), 564 default=None) 565 option_parser.add_option( 566 '', '--output_dir', 567 help=('Temporary directory where SKPs and webpage archives will be ' 568 'outputted to.'), 569 default=tempfile.gettempdir()) 570 option_parser.add_option( 571 '', '--browser_executable', 572 help='The exact browser executable to run.', 573 default=None) 574 option_parser.add_option( 575 '', '--browser_extra_args', 576 help='Additional arguments to pass to the browser.', 577 default=None) 578 option_parser.add_option( 579 '', '--chrome_src_path', 580 help='Path to the chromium src directory.', 581 default=None) 582 option_parser.add_option( 583 '', '--non-interactive', action='store_true', 584 help='Runs the script without any prompts. If this flag is specified and ' 585 '--skia_tools is specified then the debugger is not run.', 586 default=False) 587 option_parser.add_option( 588 '', '--skp_prefix', 589 help='Prefix to add to the names of generated SKPs.', 590 default=None) 591 options, unused_args = option_parser.parse_args() 592 593 playback = SkPicturePlayback(options) 594 sys.exit(playback.Run()) 595