1# Copyright 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Wrappers for gsutil, for basic interaction with Google Cloud Storage.""" 6 7import collections 8import contextlib 9import hashlib 10import logging 11import os 12import shutil 13import stat 14import subprocess 15import sys 16import tempfile 17import time 18 19try: 20 import fcntl 21except ImportError: 22 fcntl = None 23 24from catapult_base import util 25 26 27PUBLIC_BUCKET = 'chromium-telemetry' 28PARTNER_BUCKET = 'chrome-partner-telemetry' 29INTERNAL_BUCKET = 'chrome-telemetry' 30TELEMETRY_OUTPUT = 'chrome-telemetry-output' 31 32# Uses ordered dict to make sure that bucket's key-value items are ordered from 33# the most open to the most restrictive. 34BUCKET_ALIASES = collections.OrderedDict(( 35 ('public', PUBLIC_BUCKET), 36 ('partner', PARTNER_BUCKET), 37 ('internal', INTERNAL_BUCKET), 38 ('output', TELEMETRY_OUTPUT), 39)) 40 41BUCKET_ALIAS_NAMES = BUCKET_ALIASES.keys() 42 43 44_GSUTIL_PATH = os.path.join(util.GetCatapultDir(), 'third_party', 'gsutil', 45 'gsutil') 46 47# TODO(tbarzic): A workaround for http://crbug.com/386416 and 48# http://crbug.com/359293. See |_RunCommand|. 49_CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/' 50 51 52# If Environment variables has DISABLE_CLOUD_STORAGE_IO set to '1', any method 53# calls that invoke cloud storage network io will throw exceptions. 54DISABLE_CLOUD_STORAGE_IO = 'DISABLE_CLOUD_STORAGE_IO' 55 56 57 58class CloudStorageError(Exception): 59 60 @staticmethod 61 def _GetConfigInstructions(): 62 command = _GSUTIL_PATH 63 if util.IsRunningOnCrosDevice(): 64 command = 'HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, _GSUTIL_PATH) 65 return ('To configure your credentials:\n' 66 ' 1. Run "%s config" and follow its instructions.\n' 67 ' 2. If you have a @google.com account, use that account.\n' 68 ' 3. For the project-id, just enter 0.' % command) 69 70 71class PermissionError(CloudStorageError): 72 73 def __init__(self): 74 super(PermissionError, self).__init__( 75 'Attempted to access a file from Cloud Storage but you don\'t ' 76 'have permission. ' + self._GetConfigInstructions()) 77 78 79class CredentialsError(CloudStorageError): 80 81 def __init__(self): 82 super(CredentialsError, self).__init__( 83 'Attempted to access a file from Cloud Storage but you have no ' 84 'configured credentials. ' + self._GetConfigInstructions()) 85 86 87class CloudStorageIODisabled(CloudStorageError): 88 pass 89 90 91class NotFoundError(CloudStorageError): 92 pass 93 94 95class ServerError(CloudStorageError): 96 pass 97 98 99# TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()? 100def _FindExecutableInPath(relative_executable_path, *extra_search_paths): 101 search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep) 102 for search_path in search_paths: 103 executable_path = os.path.join(search_path, relative_executable_path) 104 if util.IsExecutable(executable_path): 105 return executable_path 106 return None 107 108 109def _EnsureExecutable(gsutil): 110 """chmod +x if gsutil is not executable.""" 111 st = os.stat(gsutil) 112 if not st.st_mode & stat.S_IEXEC: 113 os.chmod(gsutil, st.st_mode | stat.S_IEXEC) 114 115 116def _RunCommand(args): 117 # On cros device, as telemetry is running as root, home will be set to /root/, 118 # which is not writable. gsutil will attempt to create a download tracker dir 119 # in home dir and fail. To avoid this, override HOME dir to something writable 120 # when running on cros device. 121 # 122 # TODO(tbarzic): Figure out a better way to handle gsutil on cros. 123 # http://crbug.com/386416, http://crbug.com/359293. 124 gsutil_env = None 125 if util.IsRunningOnCrosDevice(): 126 gsutil_env = os.environ.copy() 127 gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR 128 129 if os.name == 'nt': 130 # If Windows, prepend python. Python scripts aren't directly executable. 131 args = [sys.executable, _GSUTIL_PATH] + args 132 else: 133 # Don't do it on POSIX, in case someone is using a shell script to redirect. 134 args = [_GSUTIL_PATH] + args 135 _EnsureExecutable(_GSUTIL_PATH) 136 137 if (os.getenv(DISABLE_CLOUD_STORAGE_IO) == '1' and 138 args[0] not in ('help', 'hash', 'version')): 139 raise CloudStorageIODisabled( 140 "Environment variable DISABLE_CLOUD_STORAGE_IO is set to 1. " 141 'Command %s is not allowed to run' % args) 142 143 gsutil = subprocess.Popen(args, stdout=subprocess.PIPE, 144 stderr=subprocess.PIPE, env=gsutil_env) 145 stdout, stderr = gsutil.communicate() 146 147 if gsutil.returncode: 148 if stderr.startswith(( 149 'You are attempting to access protected data with no configured', 150 'Failure: No handler was ready to authenticate.')): 151 raise CredentialsError() 152 if ('status=403' in stderr or 'status 403' in stderr or 153 '403 Forbidden' in stderr): 154 raise PermissionError() 155 if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or 156 'No URLs matched' in stderr or 'One or more URLs matched no' in stderr): 157 raise NotFoundError(stderr) 158 if '500 Internal Server Error' in stderr: 159 raise ServerError(stderr) 160 raise CloudStorageError(stderr) 161 162 return stdout 163 164 165def List(bucket): 166 query = 'gs://%s/' % bucket 167 stdout = _RunCommand(['ls', query]) 168 return [url[len(query):] for url in stdout.splitlines()] 169 170 171def Exists(bucket, remote_path): 172 try: 173 _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)]) 174 return True 175 except NotFoundError: 176 return False 177 178 179def Move(bucket1, bucket2, remote_path): 180 url1 = 'gs://%s/%s' % (bucket1, remote_path) 181 url2 = 'gs://%s/%s' % (bucket2, remote_path) 182 logging.info('Moving %s to %s', url1, url2) 183 _RunCommand(['mv', url1, url2]) 184 185 186def Copy(bucket_from, bucket_to, remote_path_from, remote_path_to): 187 """Copy a file from one location in CloudStorage to another. 188 189 Args: 190 bucket_from: The cloud storage bucket where the file is currently located. 191 bucket_to: The cloud storage bucket it is being copied to. 192 remote_path_from: The file path where the file is located in bucket_from. 193 remote_path_to: The file path it is being copied to in bucket_to. 194 195 It should: cause no changes locally or to the starting file, and will 196 overwrite any existing files in the destination location. 197 """ 198 url1 = 'gs://%s/%s' % (bucket_from, remote_path_from) 199 url2 = 'gs://%s/%s' % (bucket_to, remote_path_to) 200 logging.info('Copying %s to %s', url1, url2) 201 _RunCommand(['cp', url1, url2]) 202 203 204def Delete(bucket, remote_path): 205 url = 'gs://%s/%s' % (bucket, remote_path) 206 logging.info('Deleting %s', url) 207 _RunCommand(['rm', url]) 208 209 210def Get(bucket, remote_path, local_path): 211 with _PseudoFileLock(local_path): 212 _GetLocked(bucket, remote_path, local_path) 213 214 215@contextlib.contextmanager 216def _PseudoFileLock(base_path): 217 pseudo_lock_path = '%s.pseudo_lock' % base_path 218 _CreateDirectoryIfNecessary(os.path.dirname(pseudo_lock_path)) 219 # This is somewhat of a racy hack because we don't have a good 220 # cross-platform file lock. If we get one, this should be refactored 221 # to use it. 222 while os.path.exists(pseudo_lock_path): 223 time.sleep(0.1) 224 fd = os.open(pseudo_lock_path, os.O_RDONLY | os.O_CREAT) 225 if fcntl: 226 fcntl.flock(fd, fcntl.LOCK_EX) 227 try: 228 yield 229 finally: 230 if fcntl: 231 fcntl.flock(fd, fcntl.LOCK_UN) 232 try: 233 os.close(fd) 234 os.remove(pseudo_lock_path) 235 except OSError: 236 # We don't care if the pseudo-lock gets removed elsewhere before we have 237 # a chance to do so. 238 pass 239 240 241def _CreateDirectoryIfNecessary(directory): 242 if not os.path.exists(directory): 243 os.makedirs(directory) 244 245 246def _GetLocked(bucket, remote_path, local_path): 247 url = 'gs://%s/%s' % (bucket, remote_path) 248 logging.info('Downloading %s to %s', url, local_path) 249 _CreateDirectoryIfNecessary(os.path.dirname(local_path)) 250 with tempfile.NamedTemporaryFile( 251 dir=os.path.dirname(local_path), 252 delete=False) as partial_download_path: 253 try: 254 # Windows won't download to an open file. 255 partial_download_path.close() 256 try: 257 _RunCommand(['cp', url, partial_download_path.name]) 258 except ServerError: 259 logging.info('Cloud Storage server error, retrying download') 260 _RunCommand(['cp', url, partial_download_path.name]) 261 shutil.move(partial_download_path.name, local_path) 262 finally: 263 if os.path.exists(partial_download_path.name): 264 os.remove(partial_download_path.name) 265 266 267def Insert(bucket, remote_path, local_path, publicly_readable=False): 268 """ Upload file in |local_path| to cloud storage. 269 Args: 270 bucket: the google cloud storage bucket name. 271 remote_path: the remote file path in |bucket|. 272 local_path: path of the local file to be uploaded. 273 publicly_readable: whether the uploaded file has publicly readable 274 permission. 275 276 Returns: 277 The url where the file is uploaded to. 278 """ 279 url = 'gs://%s/%s' % (bucket, remote_path) 280 command_and_args = ['cp'] 281 extra_info = '' 282 if publicly_readable: 283 command_and_args += ['-a', 'public-read'] 284 extra_info = ' (publicly readable)' 285 command_and_args += [local_path, url] 286 logging.info('Uploading %s to %s%s', local_path, url, extra_info) 287 _RunCommand(command_and_args) 288 return 'https://console.developers.google.com/m/cloudstorage/b/%s/o/%s' % ( 289 bucket, remote_path) 290 291 292def GetIfHashChanged(cs_path, download_path, bucket, file_hash): 293 """Downloads |download_path| to |file_path| if |file_path| doesn't exist or 294 it's hash doesn't match |file_hash|. 295 296 Returns: 297 True if the binary was changed. 298 Raises: 299 CredentialsError if the user has no configured credentials. 300 PermissionError if the user does not have permission to access the bucket. 301 NotFoundError if the file is not in the given bucket in cloud_storage. 302 """ 303 with _PseudoFileLock(download_path): 304 if (os.path.exists(download_path) and 305 CalculateHash(download_path) == file_hash): 306 return False 307 _GetLocked(bucket, cs_path, download_path) 308 return True 309 310 311def GetIfChanged(file_path, bucket): 312 """Gets the file at file_path if it has a hash file that doesn't match or 313 if there is no local copy of file_path, but there is a hash file for it. 314 315 Returns: 316 True if the binary was changed. 317 Raises: 318 CredentialsError if the user has no configured credentials. 319 PermissionError if the user does not have permission to access the bucket. 320 NotFoundError if the file is not in the given bucket in cloud_storage. 321 """ 322 with _PseudoFileLock(file_path): 323 hash_path = file_path + '.sha1' 324 if not os.path.exists(hash_path): 325 logging.warning('Hash file not found: %s', hash_path) 326 return False 327 328 expected_hash = ReadHash(hash_path) 329 if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash: 330 return False 331 _GetLocked(bucket, expected_hash, file_path) 332 return True 333 334 335def GetFilesInDirectoryIfChanged(directory, bucket): 336 """ Scan the directory for .sha1 files, and download them from the given 337 bucket in cloud storage if the local and remote hash don't match or 338 there is no local copy. 339 """ 340 if not os.path.isdir(directory): 341 raise ValueError( 342 '%s does not exist. Must provide a valid directory path.' % directory) 343 # Don't allow the root directory to be a serving_dir. 344 if directory == os.path.abspath(os.sep): 345 raise ValueError('Trying to serve root directory from HTTP server.') 346 for dirpath, _, filenames in os.walk(directory): 347 for filename in filenames: 348 path_name, extension = os.path.splitext( 349 os.path.join(dirpath, filename)) 350 if extension != '.sha1': 351 continue 352 GetIfChanged(path_name, bucket) 353 354 355def CalculateHash(file_path): 356 """Calculates and returns the hash of the file at file_path.""" 357 sha1 = hashlib.sha1() 358 with open(file_path, 'rb') as f: 359 while True: 360 # Read in 1mb chunks, so it doesn't all have to be loaded into memory. 361 chunk = f.read(1024 * 1024) 362 if not chunk: 363 break 364 sha1.update(chunk) 365 return sha1.hexdigest() 366 367 368def ReadHash(hash_path): 369 with open(hash_path, 'rb') as f: 370 return f.read(1024).rstrip() 371