1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import json 6import logging 7import os 8import re 9import shutil 10import tempfile 11 12from catapult_base import cloud_storage # pylint: disable=import-error 13 14 15def AssertValidCloudStorageBucket(bucket): 16 is_valid = bucket in (None, 17 cloud_storage.PUBLIC_BUCKET, 18 cloud_storage.PARTNER_BUCKET, 19 cloud_storage.INTERNAL_BUCKET) 20 if not is_valid: 21 raise ValueError("Cloud storage privacy bucket %s is invalid" % bucket) 22 23 24class ArchiveError(Exception): 25 pass 26 27 28class WprArchiveInfo(object): 29 def __init__(self, file_path, data, bucket): 30 AssertValidCloudStorageBucket(bucket) 31 self._file_path = file_path 32 self._base_dir = os.path.dirname(file_path) 33 self._data = data 34 self._bucket = bucket 35 36 # Ensure directory exists. 37 if not os.path.exists(self._base_dir): 38 os.makedirs(self._base_dir) 39 40 # Map from the relative path (as it appears in the metadata file) of the 41 # .wpr file to a list of story names it supports. 42 self._wpr_file_to_story_names = data['archives'] 43 44 # Map from the story name to a relative path (as it appears 45 # in the metadata file) of the .wpr file. 46 self._story_name_to_wpr_file = dict() 47 # Find out the wpr file names for each story. 48 for wpr_file in data['archives']: 49 story_names = data['archives'][wpr_file] 50 for story_name in story_names: 51 self._story_name_to_wpr_file[story_name] = wpr_file 52 self.temp_target_wpr_file_path = None 53 54 @classmethod 55 def FromFile(cls, file_path, bucket): 56 if os.path.exists(file_path): 57 with open(file_path, 'r') as f: 58 data = json.load(f) 59 return cls(file_path, data, bucket) 60 return cls(file_path, {'archives': {}}, bucket) 61 62 def DownloadArchivesIfNeeded(self): 63 """Downloads archives iff the Archive has a bucket parameter and the user 64 has permission to access the bucket. 65 66 Raises cloud storage Permissions or Credentials error when there is no 67 local copy of the archive and the user doesn't have permission to access 68 the archive's bucket. 69 70 Warns when a bucket is not specified or when the user doesn't have 71 permission to access the archive's bucket but a local copy of the archive 72 exists. 73 """ 74 # Download all .wpr files. 75 if not self._bucket: 76 logging.warning('Story set in %s has no bucket specified, and ' 77 'cannot be downloaded from cloud_storage.', ) 78 return 79 assert 'archives' in self._data, 'Invalid data format in %s. \'archives\'' \ 80 ' field is needed' % self._file_path 81 for archive_path in self._data['archives']: 82 archive_path = self._WprFileNameToPath(archive_path) 83 try: 84 cloud_storage.GetIfChanged(archive_path, self._bucket) 85 except (cloud_storage.CredentialsError, cloud_storage.PermissionError): 86 if os.path.exists(archive_path): 87 # If the archive exists, assume the user recorded their own and 88 # simply warn. 89 logging.warning('Need credentials to update WPR archive: %s', 90 archive_path) 91 else: 92 logging.error("You either aren't authenticated or don't have " 93 "permission to use the archives for this page set." 94 "\nYou may need to run gsutil config." 95 "\nYou can find instructions for gsutil config at: " 96 "http://www.chromium.org/developers/telemetry/" 97 "upload_to_cloud_storage") 98 raise 99 100 def WprFilePathForStory(self, story): 101 if self.temp_target_wpr_file_path: 102 return self.temp_target_wpr_file_path 103 wpr_file = self._story_name_to_wpr_file.get(story.display_name, None) 104 if wpr_file is None and hasattr(story, 'url'): 105 # Some old pages always use the URL to identify a page rather than the 106 # display_name, so try to look for that. 107 wpr_file = self._story_name_to_wpr_file.get(story.url, None) 108 if wpr_file: 109 return self._WprFileNameToPath(wpr_file) 110 return None 111 112 def AddNewTemporaryRecording(self, temp_wpr_file_path=None): 113 if temp_wpr_file_path is None: 114 temp_wpr_file_handle, temp_wpr_file_path = tempfile.mkstemp() 115 os.close(temp_wpr_file_handle) 116 self.temp_target_wpr_file_path = temp_wpr_file_path 117 118 def AddRecordedStories(self, stories, upload_to_cloud_storage=False): 119 if not stories: 120 os.remove(self.temp_target_wpr_file_path) 121 return 122 123 (target_wpr_file, target_wpr_file_path) = self._NextWprFileName() 124 for story in stories: 125 self._SetWprFileForStory(story.display_name, target_wpr_file) 126 shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path) 127 128 # Update the hash file. 129 target_wpr_file_hash = cloud_storage.CalculateHash(target_wpr_file_path) 130 with open(target_wpr_file_path + '.sha1', 'wb') as f: 131 f.write(target_wpr_file_hash) 132 f.flush() 133 134 self._WriteToFile() 135 self._DeleteAbandonedWprFiles() 136 137 # Upload to cloud storage 138 if upload_to_cloud_storage: 139 if not self._bucket: 140 logging.warning('StorySet must have bucket specified to upload ' 141 'stories to cloud storage.') 142 return 143 try: 144 cloud_storage.Insert(self._bucket, target_wpr_file_hash, 145 target_wpr_file_path) 146 except cloud_storage.CloudStorageError, e: 147 logging.warning('Failed to upload wpr file %s to cloud storage. ' 148 'Error:%s' % target_wpr_file_path, e) 149 150 def _DeleteAbandonedWprFiles(self): 151 # Update the metadata so that the abandoned wpr files don't have 152 # empty story name arrays. 153 abandoned_wpr_files = self._AbandonedWprFiles() 154 for wpr_file in abandoned_wpr_files: 155 del self._wpr_file_to_story_names[wpr_file] 156 # Don't fail if we're unable to delete some of the files. 157 wpr_file_path = self._WprFileNameToPath(wpr_file) 158 try: 159 os.remove(wpr_file_path) 160 except Exception: 161 logging.warning('Failed to delete file: %s' % wpr_file_path) 162 163 def _AbandonedWprFiles(self): 164 abandoned_wpr_files = [] 165 for wpr_file, story_names in ( 166 self._wpr_file_to_story_names.iteritems()): 167 if not story_names: 168 abandoned_wpr_files.append(wpr_file) 169 return abandoned_wpr_files 170 171 def _WriteToFile(self): 172 """Writes the metadata into the file passed as constructor parameter.""" 173 metadata = dict() 174 metadata['description'] = ( 175 'Describes the Web Page Replay archives for a story set. ' 176 'Don\'t edit by hand! Use record_wpr for updating.') 177 metadata['archives'] = self._wpr_file_to_story_names.copy() 178 # Don't write data for abandoned archives. 179 abandoned_wpr_files = self._AbandonedWprFiles() 180 for wpr_file in abandoned_wpr_files: 181 del metadata['archives'][wpr_file] 182 183 with open(self._file_path, 'w') as f: 184 json.dump(metadata, f, indent=4) 185 f.flush() 186 187 def _WprFileNameToPath(self, wpr_file): 188 return os.path.abspath(os.path.join(self._base_dir, wpr_file)) 189 190 def _NextWprFileName(self): 191 """Creates a new file name for a wpr archive file.""" 192 # The names are of the format "some_thing_number.wpr". Read the numbers. 193 highest_number = -1 194 base = None 195 for wpr_file in self._wpr_file_to_story_names: 196 match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file) 197 if not match: 198 raise Exception('Illegal wpr file name ' + wpr_file) 199 highest_number = max(int(match.groupdict()['NUMBER']), highest_number) 200 if base and match.groupdict()['BASE'] != base: 201 raise Exception('Illegal wpr file name ' + wpr_file + 202 ', doesn\'t begin with ' + base) 203 base = match.groupdict()['BASE'] 204 if not base: 205 # If we're creating a completely new info file, use the base name of the 206 # story set file. 207 base = os.path.splitext(os.path.basename(self._file_path))[0] 208 new_filename = '%s_%03d.wpr' % (base, highest_number + 1) 209 return new_filename, self._WprFileNameToPath(new_filename) 210 211 def _SetWprFileForStory(self, story_name, wpr_file): 212 """For modifying the metadata when we're going to record a new archive.""" 213 old_wpr_file = self._story_name_to_wpr_file.get(story_name, None) 214 if old_wpr_file: 215 self._wpr_file_to_story_names[old_wpr_file].remove(story_name) 216 self._story_name_to_wpr_file[story_name] = wpr_file 217 if wpr_file not in self._wpr_file_to_story_names: 218 self._wpr_file_to_story_names[wpr_file] = [] 219 self._wpr_file_to_story_names[wpr_file].append(story_name) 220