1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import json 6import logging 7import os 8import re 9import shutil 10import tempfile 11import time 12 13from py_utils import cloud_storage # pylint: disable=import-error 14 15 16_DEFAULT_PLATFORM = 'DEFAULT' 17_ALL_PLATFORMS = ['mac', 'linux', 'android', 'win', _DEFAULT_PLATFORM] 18 19 20def AssertValidCloudStorageBucket(bucket): 21 is_valid = bucket in (None, 22 cloud_storage.PUBLIC_BUCKET, 23 cloud_storage.PARTNER_BUCKET, 24 cloud_storage.INTERNAL_BUCKET) 25 if not is_valid: 26 raise ValueError("Cloud storage privacy bucket %s is invalid" % bucket) 27 28 29class WprArchiveInfo(object): 30 def __init__(self, file_path, data, bucket): 31 AssertValidCloudStorageBucket(bucket) 32 self._file_path = file_path 33 self._base_dir = os.path.dirname(file_path) 34 self._data = data 35 self._bucket = bucket 36 self.temp_target_wpr_file_path = None 37 # Ensure directory exists. 38 if not os.path.exists(self._base_dir): 39 os.makedirs(self._base_dir) 40 41 assert data.get('platform_specific', False), ( 42 'Detected old version of archive info json file. Please update to new ' 43 'version.') 44 45 self._story_name_to_wpr_file = data['archives'] 46 47 @classmethod 48 def FromFile(cls, file_path, bucket): 49 """ Generates an archive_info instance with the given json file. """ 50 if os.path.exists(file_path): 51 with open(file_path, 'r') as f: 52 data = json.load(f) 53 return cls(file_path, data, bucket) 54 return cls(file_path, {'archives': {}, 'platform_specific': True}, bucket) 55 56 def DownloadArchivesIfNeeded(self, target_platforms=None): 57 """Downloads archives iff the Archive has a bucket parameter and the user 58 has permission to access the bucket. 59 60 Raises cloud storage Permissions or Credentials error when there is no 61 local copy of the archive and the user doesn't have permission to access 62 the archive's bucket. 63 64 Warns when a bucket is not specified or when the user doesn't have 65 permission to access the archive's bucket but a local copy of the archive 66 exists. 67 """ 68 logging.info('Downloading WPR archives. This can take a long time.') 69 start_time = time.time() 70 # If no target platform is set, download all platforms. 71 if target_platforms is None: 72 target_platforms = _ALL_PLATFORMS 73 else: 74 assert isinstance(target_platforms, list), 'Must pass platforms as a list' 75 target_platforms = target_platforms + [_DEFAULT_PLATFORM] 76 # Download all .wpr files. 77 if not self._bucket: 78 logging.warning('Story set in %s has no bucket specified, and ' 79 'cannot be downloaded from cloud_storage.', ) 80 return 81 assert 'archives' in self._data, ("Invalid data format in %s. 'archives' " 82 "field is needed" % self._file_path) 83 84 def download_if_needed(path): 85 try: 86 cloud_storage.GetIfChanged(path, self._bucket) 87 except (cloud_storage.CredentialsError, cloud_storage.PermissionError): 88 if os.path.exists(path): 89 # If the archive exists, assume the user recorded their own and warn 90 # them that they do not have the proper credentials to download. 91 logging.warning('Need credentials to update WPR archive: %s', path) 92 else: 93 logging.error("You either aren't authenticated or don't have " 94 "permission to use the archives for this page set." 95 "\nYou may need to run gsutil config." 96 "\nYou can find instructions for gsutil config at: " 97 "http://www.chromium.org/developers/telemetry/" 98 "upload_to_cloud_storage") 99 raise 100 try: 101 story_archives = self._data['archives'] 102 for story in story_archives: 103 for target_platform in target_platforms: 104 if story_archives[story].get(target_platform): 105 archive_path = self._WprFileNameToPath( 106 story_archives[story][target_platform]) 107 download_if_needed(archive_path) 108 finally: 109 logging.info('All WPR archives are downloaded, took %s seconds.', 110 time.time() - start_time) 111 112 def WprFilePathForStory(self, story, target_platform=_DEFAULT_PLATFORM): 113 if self.temp_target_wpr_file_path: 114 return self.temp_target_wpr_file_path 115 116 wpr_file = self._story_name_to_wpr_file.get(story.display_name, None) 117 if wpr_file is None and hasattr(story, 'url'): 118 # Some old pages always use the URL to identify a page rather than the 119 # display_name, so try to look for that. 120 wpr_file = self._story_name_to_wpr_file.get(story.url, None) 121 if wpr_file: 122 if target_platform in wpr_file: 123 return self._WprFileNameToPath(wpr_file[target_platform]) 124 return self._WprFileNameToPath(wpr_file[_DEFAULT_PLATFORM]) 125 return None 126 127 def AddNewTemporaryRecording(self, temp_wpr_file_path=None): 128 if temp_wpr_file_path is None: 129 temp_wpr_file_handle, temp_wpr_file_path = tempfile.mkstemp() 130 os.close(temp_wpr_file_handle) 131 self.temp_target_wpr_file_path = temp_wpr_file_path 132 133 def AddRecordedStories(self, stories, upload_to_cloud_storage=False, 134 target_platform=_DEFAULT_PLATFORM): 135 if not stories: 136 os.remove(self.temp_target_wpr_file_path) 137 return 138 139 (target_wpr_file, target_wpr_file_path) = self._NextWprFileName() 140 for story in stories: 141 # Check to see if the platform has been manually overrided. 142 if not story.platform_specific: 143 current_target_platform = _DEFAULT_PLATFORM 144 else: 145 current_target_platform = target_platform 146 self._SetWprFileForStory( 147 story.display_name, target_wpr_file, current_target_platform) 148 shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path) 149 150 # Update the hash file. 151 target_wpr_file_hash = cloud_storage.CalculateHash(target_wpr_file_path) 152 with open(target_wpr_file_path + '.sha1', 'wb') as f: 153 f.write(target_wpr_file_hash) 154 f.flush() 155 156 self._WriteToFile() 157 158 # Upload to cloud storage 159 if upload_to_cloud_storage: 160 if not self._bucket: 161 logging.warning('StorySet must have bucket specified to upload ' 162 'stories to cloud storage.') 163 return 164 try: 165 cloud_storage.Insert(self._bucket, target_wpr_file_hash, 166 target_wpr_file_path) 167 except cloud_storage.CloudStorageError, e: 168 logging.warning('Failed to upload wpr file %s to cloud storage. ' 169 'Error:%s' % target_wpr_file_path, e) 170 171 def _WriteToFile(self): 172 """Writes the metadata into the file passed as constructor parameter.""" 173 metadata = dict() 174 metadata['description'] = ( 175 'Describes the Web Page Replay archives for a story set. ' 176 'Don\'t edit by hand! Use record_wpr for updating.') 177 metadata['archives'] = self._story_name_to_wpr_file.copy() 178 metadata['platform_specific'] = True 179 180 with open(self._file_path, 'w') as f: 181 json.dump(metadata, f, indent=4, sort_keys=True, separators=(',', ': ')) 182 f.flush() 183 184 def _WprFileNameToPath(self, wpr_file): 185 return os.path.abspath(os.path.join(self._base_dir, wpr_file)) 186 187 def _NextWprFileName(self): 188 """Creates a new file name for a wpr archive file.""" 189 # The names are of the format "some_thing_number.wpr". Read the numbers. 190 highest_number = -1 191 base = None 192 wpr_files = [] 193 for story in self._data['archives']: 194 for p in self._data['archives'][story]: 195 wpr_files.append(self._data['archives'][story][p]) 196 197 for wpr_file in wpr_files: 198 match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file) 199 if not match: 200 raise Exception('Illegal wpr file name ' + wpr_file) 201 highest_number = max(int(match.groupdict()['NUMBER']), highest_number) 202 if base and match.groupdict()['BASE'] != base: 203 raise Exception('Illegal wpr file name ' + wpr_file + 204 ', doesn\'t begin with ' + base) 205 base = match.groupdict()['BASE'] 206 if not base: 207 # If we're creating a completely new info file, use the base name of the 208 # story set file. 209 base = os.path.splitext(os.path.basename(self._file_path))[0] 210 new_filename = '%s_%03d.wpr' % (base, highest_number + 1) 211 return new_filename, self._WprFileNameToPath(new_filename) 212 213 def _SetWprFileForStory(self, story_name, wpr_file, target_platform): 214 """For modifying the metadata when we're going to record a new archive.""" 215 if story_name not in self._data['archives']: 216 # If there is no other recording we want the first to be the default 217 # until a new default is recorded. 218 self._data['archives'][story_name] = {_DEFAULT_PLATFORM: wpr_file} 219 self._data['archives'][story_name][target_platform] = wpr_file 220