1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import json
6import logging
7import os
8import re
9import shutil
10import tempfile
11import time
12
13from py_utils import cloud_storage  # pylint: disable=import-error
14
15
16_DEFAULT_PLATFORM = 'DEFAULT'
17_ALL_PLATFORMS = ['mac', 'linux', 'android', 'win', _DEFAULT_PLATFORM]
18
19
20def AssertValidCloudStorageBucket(bucket):
21  is_valid = bucket in (None,
22                        cloud_storage.PUBLIC_BUCKET,
23                        cloud_storage.PARTNER_BUCKET,
24                        cloud_storage.INTERNAL_BUCKET)
25  if not is_valid:
26    raise ValueError("Cloud storage privacy bucket %s is invalid" % bucket)
27
28
29class WprArchiveInfo(object):
30  def __init__(self, file_path, data, bucket):
31    AssertValidCloudStorageBucket(bucket)
32    self._file_path = file_path
33    self._base_dir = os.path.dirname(file_path)
34    self._data = data
35    self._bucket = bucket
36    self.temp_target_wpr_file_path = None
37    # Ensure directory exists.
38    if not os.path.exists(self._base_dir):
39      os.makedirs(self._base_dir)
40
41    assert data.get('platform_specific', False), (
42        'Detected old version of archive info json file. Please update to new '
43        'version.')
44
45    self._story_name_to_wpr_file = data['archives']
46
47  @classmethod
48  def FromFile(cls, file_path, bucket):
49    """ Generates an archive_info instance with the given json file. """
50    if os.path.exists(file_path):
51      with open(file_path, 'r') as f:
52        data = json.load(f)
53        return cls(file_path, data, bucket)
54    return cls(file_path, {'archives': {}, 'platform_specific': True}, bucket)
55
56  def DownloadArchivesIfNeeded(self, target_platforms=None):
57    """Downloads archives iff the Archive has a bucket parameter and the user
58    has permission to access the bucket.
59
60    Raises cloud storage Permissions or Credentials error when there is no
61    local copy of the archive and the user doesn't have permission to access
62    the archive's bucket.
63
64    Warns when a bucket is not specified or when the user doesn't have
65    permission to access the archive's bucket but a local copy of the archive
66    exists.
67    """
68    logging.info('Downloading WPR archives. This can take a long time.')
69    start_time = time.time()
70    # If no target platform is set, download all platforms.
71    if target_platforms is None:
72      target_platforms = _ALL_PLATFORMS
73    else:
74      assert isinstance(target_platforms, list), 'Must pass platforms as a list'
75      target_platforms = target_platforms + [_DEFAULT_PLATFORM]
76    # Download all .wpr files.
77    if not self._bucket:
78      logging.warning('Story set in %s has no bucket specified, and '
79                      'cannot be downloaded from cloud_storage.', )
80      return
81    assert 'archives' in self._data, ("Invalid data format in %s. 'archives' "
82                                      "field is needed" % self._file_path)
83
84    def download_if_needed(path):
85      try:
86        cloud_storage.GetIfChanged(path, self._bucket)
87      except (cloud_storage.CredentialsError, cloud_storage.PermissionError):
88        if os.path.exists(path):
89          # If the archive exists, assume the user recorded their own and warn
90          # them that they do not have the proper credentials to download.
91          logging.warning('Need credentials to update WPR archive: %s', path)
92        else:
93          logging.error("You either aren't authenticated or don't have "
94                        "permission to use the archives for this page set."
95                        "\nYou may need to run gsutil config."
96                        "\nYou can find instructions for gsutil config at: "
97                        "http://www.chromium.org/developers/telemetry/"
98                        "upload_to_cloud_storage")
99          raise
100    try:
101      story_archives = self._data['archives']
102      for story in story_archives:
103        for target_platform in target_platforms:
104          if story_archives[story].get(target_platform):
105            archive_path = self._WprFileNameToPath(
106                story_archives[story][target_platform])
107            download_if_needed(archive_path)
108    finally:
109      logging.info('All WPR archives are downloaded, took %s seconds.',
110                   time.time() - start_time)
111
112  def WprFilePathForStory(self, story, target_platform=_DEFAULT_PLATFORM):
113    if self.temp_target_wpr_file_path:
114      return self.temp_target_wpr_file_path
115
116    wpr_file = self._story_name_to_wpr_file.get(story.display_name, None)
117    if wpr_file is None and hasattr(story, 'url'):
118      # Some old pages always use the URL to identify a page rather than the
119      # display_name, so try to look for that.
120      wpr_file = self._story_name_to_wpr_file.get(story.url, None)
121    if wpr_file:
122      if target_platform in wpr_file:
123        return self._WprFileNameToPath(wpr_file[target_platform])
124      return self._WprFileNameToPath(wpr_file[_DEFAULT_PLATFORM])
125    return None
126
127  def AddNewTemporaryRecording(self, temp_wpr_file_path=None):
128    if temp_wpr_file_path is None:
129      temp_wpr_file_handle, temp_wpr_file_path = tempfile.mkstemp()
130      os.close(temp_wpr_file_handle)
131    self.temp_target_wpr_file_path = temp_wpr_file_path
132
133  def AddRecordedStories(self, stories, upload_to_cloud_storage=False,
134                         target_platform=_DEFAULT_PLATFORM):
135    if not stories:
136      os.remove(self.temp_target_wpr_file_path)
137      return
138
139    (target_wpr_file, target_wpr_file_path) = self._NextWprFileName()
140    for story in stories:
141      # Check to see if the platform has been manually overrided.
142      if not story.platform_specific:
143        current_target_platform = _DEFAULT_PLATFORM
144      else:
145        current_target_platform = target_platform
146      self._SetWprFileForStory(
147          story.display_name, target_wpr_file, current_target_platform)
148    shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path)
149
150    # Update the hash file.
151    target_wpr_file_hash = cloud_storage.CalculateHash(target_wpr_file_path)
152    with open(target_wpr_file_path + '.sha1', 'wb') as f:
153      f.write(target_wpr_file_hash)
154      f.flush()
155
156    self._WriteToFile()
157
158    # Upload to cloud storage
159    if upload_to_cloud_storage:
160      if not self._bucket:
161        logging.warning('StorySet must have bucket specified to upload '
162                        'stories to cloud storage.')
163        return
164      try:
165        cloud_storage.Insert(self._bucket, target_wpr_file_hash,
166                             target_wpr_file_path)
167      except cloud_storage.CloudStorageError, e:
168        logging.warning('Failed to upload wpr file %s to cloud storage. '
169                        'Error:%s' % target_wpr_file_path, e)
170
171  def _WriteToFile(self):
172    """Writes the metadata into the file passed as constructor parameter."""
173    metadata = dict()
174    metadata['description'] = (
175        'Describes the Web Page Replay archives for a story set. '
176        'Don\'t edit by hand! Use record_wpr for updating.')
177    metadata['archives'] = self._story_name_to_wpr_file.copy()
178    metadata['platform_specific'] = True
179
180    with open(self._file_path, 'w') as f:
181      json.dump(metadata, f, indent=4, sort_keys=True, separators=(',', ': '))
182      f.flush()
183
184  def _WprFileNameToPath(self, wpr_file):
185    return os.path.abspath(os.path.join(self._base_dir, wpr_file))
186
187  def _NextWprFileName(self):
188    """Creates a new file name for a wpr archive file."""
189    # The names are of the format "some_thing_number.wpr". Read the numbers.
190    highest_number = -1
191    base = None
192    wpr_files = []
193    for story in self._data['archives']:
194      for p in self._data['archives'][story]:
195        wpr_files.append(self._data['archives'][story][p])
196
197    for wpr_file in wpr_files:
198      match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)
199      if not match:
200        raise Exception('Illegal wpr file name ' + wpr_file)
201      highest_number = max(int(match.groupdict()['NUMBER']), highest_number)
202      if base and match.groupdict()['BASE'] != base:
203        raise Exception('Illegal wpr file name ' + wpr_file +
204                        ', doesn\'t begin with ' + base)
205      base = match.groupdict()['BASE']
206    if not base:
207      # If we're creating a completely new info file, use the base name of the
208      # story set file.
209      base = os.path.splitext(os.path.basename(self._file_path))[0]
210    new_filename = '%s_%03d.wpr' % (base, highest_number + 1)
211    return new_filename, self._WprFileNameToPath(new_filename)
212
213  def _SetWprFileForStory(self, story_name, wpr_file, target_platform):
214    """For modifying the metadata when we're going to record a new archive."""
215    if story_name not in self._data['archives']:
216      # If there is no other recording we want the first to be the default
217      # until a new default is recorded.
218      self._data['archives'][story_name] = {_DEFAULT_PLATFORM: wpr_file}
219    self._data['archives'][story_name][target_platform] = wpr_file
220