1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import json
6import logging
7import os
8import re
9import shutil
10import tempfile
11
12from catapult_base import cloud_storage  # pylint: disable=import-error
13
14
15def AssertValidCloudStorageBucket(bucket):
16  is_valid = bucket in (None,
17                        cloud_storage.PUBLIC_BUCKET,
18                        cloud_storage.PARTNER_BUCKET,
19                        cloud_storage.INTERNAL_BUCKET)
20  if not is_valid:
21    raise ValueError("Cloud storage privacy bucket %s is invalid" % bucket)
22
23
24class ArchiveError(Exception):
25  pass
26
27
28class WprArchiveInfo(object):
29  def __init__(self, file_path, data, bucket):
30    AssertValidCloudStorageBucket(bucket)
31    self._file_path = file_path
32    self._base_dir = os.path.dirname(file_path)
33    self._data = data
34    self._bucket = bucket
35
36    # Ensure directory exists.
37    if not os.path.exists(self._base_dir):
38      os.makedirs(self._base_dir)
39
40    # Map from the relative path (as it appears in the metadata file) of the
41    # .wpr file to a list of story names it supports.
42    self._wpr_file_to_story_names = data['archives']
43
44    # Map from the story name to a relative path (as it appears
45    # in the metadata file) of the .wpr file.
46    self._story_name_to_wpr_file = dict()
47    # Find out the wpr file names for each story.
48    for wpr_file in data['archives']:
49      story_names = data['archives'][wpr_file]
50      for story_name in story_names:
51        self._story_name_to_wpr_file[story_name] = wpr_file
52    self.temp_target_wpr_file_path = None
53
54  @classmethod
55  def FromFile(cls, file_path, bucket):
56    if os.path.exists(file_path):
57      with open(file_path, 'r') as f:
58        data = json.load(f)
59        return cls(file_path, data, bucket)
60    return cls(file_path, {'archives': {}}, bucket)
61
62  def DownloadArchivesIfNeeded(self):
63    """Downloads archives iff the Archive has a bucket parameter and the user
64    has permission to access the bucket.
65
66    Raises cloud storage Permissions or Credentials error when there is no
67    local copy of the archive and the user doesn't have permission to access
68    the archive's bucket.
69
70    Warns when a bucket is not specified or when the user doesn't have
71    permission to access the archive's bucket but a local copy of the archive
72    exists.
73    """
74    # Download all .wpr files.
75    if not self._bucket:
76      logging.warning('Story set in %s has no bucket specified, and '
77                      'cannot be downloaded from cloud_storage.', )
78      return
79    assert 'archives' in self._data, 'Invalid data format in %s. \'archives\'' \
80                                     ' field is needed' % self._file_path
81    for archive_path in self._data['archives']:
82      archive_path = self._WprFileNameToPath(archive_path)
83      try:
84        cloud_storage.GetIfChanged(archive_path, self._bucket)
85      except (cloud_storage.CredentialsError, cloud_storage.PermissionError):
86        if os.path.exists(archive_path):
87          # If the archive exists, assume the user recorded their own and
88          # simply warn.
89          logging.warning('Need credentials to update WPR archive: %s',
90                          archive_path)
91        else:
92          logging.error("You either aren't authenticated or don't have "
93                        "permission to use the archives for this page set."
94                        "\nYou may need to run gsutil config."
95                        "\nYou can find instructions for gsutil config at: "
96                        "http://www.chromium.org/developers/telemetry/"
97                        "upload_to_cloud_storage")
98          raise
99
100  def WprFilePathForStory(self, story):
101    if self.temp_target_wpr_file_path:
102      return self.temp_target_wpr_file_path
103    wpr_file = self._story_name_to_wpr_file.get(story.display_name, None)
104    if wpr_file is None and hasattr(story, 'url'):
105      # Some old pages always use the URL to identify a page rather than the
106      # display_name, so try to look for that.
107      wpr_file = self._story_name_to_wpr_file.get(story.url, None)
108    if wpr_file:
109      return self._WprFileNameToPath(wpr_file)
110    return None
111
112  def AddNewTemporaryRecording(self, temp_wpr_file_path=None):
113    if temp_wpr_file_path is None:
114      temp_wpr_file_handle, temp_wpr_file_path = tempfile.mkstemp()
115      os.close(temp_wpr_file_handle)
116    self.temp_target_wpr_file_path = temp_wpr_file_path
117
118  def AddRecordedStories(self, stories, upload_to_cloud_storage=False):
119    if not stories:
120      os.remove(self.temp_target_wpr_file_path)
121      return
122
123    (target_wpr_file, target_wpr_file_path) = self._NextWprFileName()
124    for story in stories:
125      self._SetWprFileForStory(story.display_name, target_wpr_file)
126    shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path)
127
128    # Update the hash file.
129    target_wpr_file_hash = cloud_storage.CalculateHash(target_wpr_file_path)
130    with open(target_wpr_file_path + '.sha1', 'wb') as f:
131      f.write(target_wpr_file_hash)
132      f.flush()
133
134    self._WriteToFile()
135    self._DeleteAbandonedWprFiles()
136
137    # Upload to cloud storage
138    if upload_to_cloud_storage:
139      if not self._bucket:
140        logging.warning('StorySet must have bucket specified to upload '
141                        'stories to cloud storage.')
142        return
143      try:
144        cloud_storage.Insert(self._bucket, target_wpr_file_hash,
145                             target_wpr_file_path)
146      except cloud_storage.CloudStorageError, e:
147        logging.warning('Failed to upload wpr file %s to cloud storage. '
148                        'Error:%s' % target_wpr_file_path, e)
149
150  def _DeleteAbandonedWprFiles(self):
151    # Update the metadata so that the abandoned wpr files don't have
152    # empty story name arrays.
153    abandoned_wpr_files = self._AbandonedWprFiles()
154    for wpr_file in abandoned_wpr_files:
155      del self._wpr_file_to_story_names[wpr_file]
156      # Don't fail if we're unable to delete some of the files.
157      wpr_file_path = self._WprFileNameToPath(wpr_file)
158      try:
159        os.remove(wpr_file_path)
160      except Exception:
161        logging.warning('Failed to delete file: %s' % wpr_file_path)
162
163  def _AbandonedWprFiles(self):
164    abandoned_wpr_files = []
165    for wpr_file, story_names in (
166        self._wpr_file_to_story_names.iteritems()):
167      if not story_names:
168        abandoned_wpr_files.append(wpr_file)
169    return abandoned_wpr_files
170
171  def _WriteToFile(self):
172    """Writes the metadata into the file passed as constructor parameter."""
173    metadata = dict()
174    metadata['description'] = (
175        'Describes the Web Page Replay archives for a story set. '
176        'Don\'t edit by hand! Use record_wpr for updating.')
177    metadata['archives'] = self._wpr_file_to_story_names.copy()
178    # Don't write data for abandoned archives.
179    abandoned_wpr_files = self._AbandonedWprFiles()
180    for wpr_file in abandoned_wpr_files:
181      del metadata['archives'][wpr_file]
182
183    with open(self._file_path, 'w') as f:
184      json.dump(metadata, f, indent=4)
185      f.flush()
186
187  def _WprFileNameToPath(self, wpr_file):
188    return os.path.abspath(os.path.join(self._base_dir, wpr_file))
189
190  def _NextWprFileName(self):
191    """Creates a new file name for a wpr archive file."""
192    # The names are of the format "some_thing_number.wpr". Read the numbers.
193    highest_number = -1
194    base = None
195    for wpr_file in self._wpr_file_to_story_names:
196      match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)
197      if not match:
198        raise Exception('Illegal wpr file name ' + wpr_file)
199      highest_number = max(int(match.groupdict()['NUMBER']), highest_number)
200      if base and match.groupdict()['BASE'] != base:
201        raise Exception('Illegal wpr file name ' + wpr_file +
202                        ', doesn\'t begin with ' + base)
203      base = match.groupdict()['BASE']
204    if not base:
205      # If we're creating a completely new info file, use the base name of the
206      # story set file.
207      base = os.path.splitext(os.path.basename(self._file_path))[0]
208    new_filename = '%s_%03d.wpr' % (base, highest_number + 1)
209    return new_filename, self._WprFileNameToPath(new_filename)
210
211  def _SetWprFileForStory(self, story_name, wpr_file):
212    """For modifying the metadata when we're going to record a new archive."""
213    old_wpr_file = self._story_name_to_wpr_file.get(story_name, None)
214    if old_wpr_file:
215      self._wpr_file_to_story_names[old_wpr_file].remove(story_name)
216    self._story_name_to_wpr_file[story_name] = wpr_file
217    if wpr_file not in self._wpr_file_to_story_names:
218      self._wpr_file_to_story_names[wpr_file] = []
219    self._wpr_file_to_story_names[wpr_file].append(story_name)
220