1# -*- coding: utf-8 -*- 2# Copyright 2010 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15"""Static data and helper functions.""" 16 17from __future__ import absolute_import 18 19import collections 20import errno 21import logging 22import math 23import multiprocessing 24import os 25import pkgutil 26import re 27import struct 28import sys 29import tempfile 30import textwrap 31import threading 32import traceback 33import xml.etree.ElementTree as ElementTree 34 35import boto 36from boto import config 37import boto.auth 38from boto.exception import NoAuthHandlerFound 39from boto.gs.connection import GSConnection 40from boto.provider import Provider 41from boto.pyami.config import BotoConfigLocations 42import httplib2 43from oauth2client.client import HAS_CRYPTO 44from retry_decorator import retry_decorator 45 46import gslib 47from gslib.exception import CommandException 48from gslib.storage_url import StorageUrlFromString 49from gslib.translation_helper import AclTranslation 50from gslib.translation_helper import GenerationFromUrlAndString 51from gslib.translation_helper import S3_ACL_MARKER_GUID 52from gslib.translation_helper import S3_DELETE_MARKER_GUID 53from gslib.translation_helper import S3_MARKER_GUIDS 54 55# Detect platform types. 56PLATFORM = str(sys.platform).lower() 57IS_WINDOWS = 'win32' in PLATFORM 58IS_CYGWIN = 'cygwin' in PLATFORM 59IS_LINUX = 'linux' in PLATFORM 60IS_OSX = 'darwin' in PLATFORM 61 62# pylint: disable=g-import-not-at-top 63if IS_WINDOWS: 64 from ctypes import c_int 65 from ctypes import c_uint64 66 from ctypes import c_char_p 67 from ctypes import c_wchar_p 68 from ctypes import windll 69 from ctypes import POINTER 70 from ctypes import WINFUNCTYPE 71 from ctypes import WinError 72 73# pylint: disable=g-import-not-at-top 74try: 75 # This module doesn't necessarily exist on Windows. 76 import resource 77 HAS_RESOURCE_MODULE = True 78except ImportError, e: 79 HAS_RESOURCE_MODULE = False 80 81ONE_KIB = 1024 82ONE_MIB = 1024 * 1024 83TWO_MIB = 2 * ONE_MIB 84EIGHT_MIB = 8 * ONE_MIB 85TEN_MIB = 10 * ONE_MIB 86DEFAULT_FILE_BUFFER_SIZE = 8 * ONE_KIB 87_DEFAULT_LINES = 25 88 89# By default, the timeout for SSL read errors is infinite. This could 90# cause gsutil to hang on network disconnect, so pick a more reasonable 91# timeout. 92SSL_TIMEOUT = 60 93 94# Start with a progress callback every 64 KiB during uploads/downloads (JSON 95# API). Callback implementation should back off until it hits the maximum size 96# so that callbacks do not create huge amounts of log output. 97START_CALLBACK_PER_BYTES = 1024*64 98MAX_CALLBACK_PER_BYTES = 1024*1024*100 99 100# Upload/download files in 8 KiB chunks over the HTTP connection. 101TRANSFER_BUFFER_SIZE = 1024*8 102 103# Default number of progress callbacks during transfer (XML API). 104XML_PROGRESS_CALLBACKS = 10 105 106# For files >= this size, output a message indicating that we're running an 107# operation on the file (like hashing or gzipping) so it does not appear to the 108# user that the command is hanging. 109MIN_SIZE_COMPUTE_LOGGING = 100*1024*1024 # 100 MiB 110 111NO_MAX = sys.maxint 112 113UTF8 = 'utf-8' 114 115VERSION_MATCHER = re.compile(r'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?') 116 117RELEASE_NOTES_URL = 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt' 118 119# Binary exponentiation strings. 120_EXP_STRINGS = [ 121 (0, 'B', 'bit'), 122 (10, 'KiB', 'Kibit', 'K'), 123 (20, 'MiB', 'Mibit', 'M'), 124 (30, 'GiB', 'Gibit', 'G'), 125 (40, 'TiB', 'Tibit', 'T'), 126 (50, 'PiB', 'Pibit', 'P'), 127 (60, 'EiB', 'Eibit', 'E'), 128] 129 130 131global manager # pylint: disable=global-at-module-level 132certs_file_lock = threading.Lock() 133configured_certs_files = [] 134 135 136def _GenerateSuffixRegex(): 137 """Creates a suffix regex for human-readable byte counts.""" 138 human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?' 139 suffixes = [] 140 suffix_to_si = {} 141 for i, si in enumerate(_EXP_STRINGS): 142 si_suffixes = [s.lower() for s in list(si)[1:]] 143 for suffix in si_suffixes: 144 suffix_to_si[suffix] = i 145 suffixes.extend(si_suffixes) 146 human_bytes_re %= '|'.join(suffixes) 147 matcher = re.compile(human_bytes_re) 148 return suffix_to_si, matcher 149 150SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex() 151 152SECONDS_PER_DAY = 3600 * 24 153 154# On Unix-like systems, we will set the maximum number of open files to avoid 155# hitting the limit imposed by the OS. This number was obtained experimentally. 156MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000 157 158GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz' 159 160Retry = retry_decorator.retry # pylint: disable=invalid-name 161 162# Cache the values from this check such that they're available to all callers 163# without needing to run all the checks again (some of these, such as calling 164# multiprocessing.Manager(), are expensive operations). 165cached_multiprocessing_is_available = None 166cached_multiprocessing_is_available_stack_trace = None 167cached_multiprocessing_is_available_message = None 168 169 170# Enum class for specifying listing style. 171class ListingStyle(object): 172 SHORT = 'SHORT' 173 LONG = 'LONG' 174 LONG_LONG = 'LONG_LONG' 175 176 177def UsingCrcmodExtension(crcmod): 178 return (boto.config.get('GSUtil', 'test_assume_fast_crcmod', None) or 179 (getattr(crcmod, 'crcmod', None) and 180 getattr(crcmod.crcmod, '_usingExtension', None))) 181 182 183def CheckFreeSpace(path): 184 """Return path/drive free space (in bytes).""" 185 if IS_WINDOWS: 186 try: 187 # pylint: disable=invalid-name 188 get_disk_free_space_ex = WINFUNCTYPE(c_int, c_wchar_p, 189 POINTER(c_uint64), 190 POINTER(c_uint64), 191 POINTER(c_uint64)) 192 get_disk_free_space_ex = get_disk_free_space_ex( 193 ('GetDiskFreeSpaceExW', windll.kernel32), ( 194 (1, 'lpszPathName'), 195 (2, 'lpFreeUserSpace'), 196 (2, 'lpTotalSpace'), 197 (2, 'lpFreeSpace'),)) 198 except AttributeError: 199 get_disk_free_space_ex = WINFUNCTYPE(c_int, c_char_p, 200 POINTER(c_uint64), 201 POINTER(c_uint64), 202 POINTER(c_uint64)) 203 get_disk_free_space_ex = get_disk_free_space_ex( 204 ('GetDiskFreeSpaceExA', windll.kernel32), ( 205 (1, 'lpszPathName'), 206 (2, 'lpFreeUserSpace'), 207 (2, 'lpTotalSpace'), 208 (2, 'lpFreeSpace'),)) 209 210 def GetDiskFreeSpaceExErrCheck(result, unused_func, args): 211 if not result: 212 raise WinError() 213 return args[1].value 214 get_disk_free_space_ex.errcheck = GetDiskFreeSpaceExErrCheck 215 216 return get_disk_free_space_ex(os.getenv('SystemDrive')) 217 else: 218 (_, f_frsize, _, _, f_bavail, _, _, _, _, _) = os.statvfs(path) 219 return f_frsize * f_bavail 220 221 222def CreateDirIfNeeded(dir_path, mode=0777): 223 """Creates a directory, suppressing already-exists errors.""" 224 if not os.path.exists(dir_path): 225 try: 226 # Unfortunately, even though we catch and ignore EEXIST, this call will 227 # output a (needless) error message (no way to avoid that in Python). 228 os.makedirs(dir_path, mode) 229 # Ignore 'already exists' in case user tried to start up several 230 # resumable uploads concurrently from a machine where no tracker dir had 231 # yet been created. 232 except OSError as e: 233 if e.errno != errno.EEXIST: 234 raise 235 236 237def DivideAndCeil(dividend, divisor): 238 """Returns ceil(dividend / divisor). 239 240 Takes care to avoid the pitfalls of floating point arithmetic that could 241 otherwise yield the wrong result for large numbers. 242 243 Args: 244 dividend: Dividend for the operation. 245 divisor: Divisor for the operation. 246 247 Returns: 248 Quotient. 249 """ 250 quotient = dividend // divisor 251 if (dividend % divisor) != 0: 252 quotient += 1 253 return quotient 254 255 256def GetGsutilStateDir(): 257 """Returns the location of the directory for gsutil state files. 258 259 Certain operations, such as cross-process credential sharing and 260 resumable transfer tracking, need a known location for state files which 261 are created by gsutil as-needed. 262 263 This location should only be used for storing data that is required to be in 264 a static location. 265 266 Returns: 267 Path to directory for gsutil static state files. 268 """ 269 config_file_dir = config.get( 270 'GSUtil', 'state_dir', 271 os.path.expanduser(os.path.join('~', '.gsutil'))) 272 CreateDirIfNeeded(config_file_dir) 273 return config_file_dir 274 275 276def GetCredentialStoreFilename(): 277 return os.path.join(GetGsutilStateDir(), 'credstore') 278 279 280def GetGceCredentialCacheFilename(): 281 return os.path.join(GetGsutilStateDir(), 'gcecredcache') 282 283 284def GetTabCompletionLogFilename(): 285 return os.path.join(GetGsutilStateDir(), 'tab-completion-logs') 286 287 288def GetTabCompletionCacheFilename(): 289 tab_completion_dir = os.path.join(GetGsutilStateDir(), 'tab-completion') 290 # Limit read permissions on the directory to owner for privacy. 291 CreateDirIfNeeded(tab_completion_dir, mode=0700) 292 return os.path.join(tab_completion_dir, 'cache') 293 294 295def PrintTrackerDirDeprecationWarningIfNeeded(): 296 # TODO: Remove this along with the tracker_dir config value 1 year after 297 # 4.6 release date. Use state_dir instead. 298 if config.has_option('GSUtil', 'resumable_tracker_dir'): 299 sys.stderr.write('Warning: you have set resumable_tracker_dir in your ' 300 '.boto configuration file. This configuration option is ' 301 'deprecated; please use the state_dir configuration ' 302 'option instead.\n') 303 304 305# Name of file where we keep the timestamp for the last time we checked whether 306# a new version of gsutil is available. 307PrintTrackerDirDeprecationWarningIfNeeded() 308CreateDirIfNeeded(GetGsutilStateDir()) 309LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE = ( 310 os.path.join(GetGsutilStateDir(), '.last_software_update_check')) 311 312 313def HasConfiguredCredentials(): 314 """Determines if boto credential/config file exists.""" 315 has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and 316 config.has_option('Credentials', 'gs_secret_access_key')) 317 has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and 318 config.has_option('Credentials', 'aws_secret_access_key')) 319 has_oauth_creds = ( 320 config.has_option('Credentials', 'gs_oauth2_refresh_token')) 321 has_service_account_creds = ( 322 HAS_CRYPTO and 323 config.has_option('Credentials', 'gs_service_client_id') and 324 config.has_option('Credentials', 'gs_service_key_file')) 325 326 if (has_goog_creds or has_amzn_creds or has_oauth_creds or 327 has_service_account_creds): 328 return True 329 330 valid_auth_handler = None 331 try: 332 valid_auth_handler = boto.auth.get_auth_handler( 333 GSConnection.DefaultHost, config, Provider('google'), 334 requested_capability=['s3']) 335 # Exclude the no-op auth handler as indicating credentials are configured. 336 # Note we can't use isinstance() here because the no-op module may not be 337 # imported so we can't get a reference to the class type. 338 if getattr(getattr(valid_auth_handler, '__class__', None), 339 '__name__', None) == 'NoOpAuth': 340 valid_auth_handler = None 341 except NoAuthHandlerFound: 342 pass 343 344 return valid_auth_handler 345 346 347def ConfigureNoOpAuthIfNeeded(): 348 """Sets up no-op auth handler if no boto credentials are configured.""" 349 if not HasConfiguredCredentials(): 350 if (config.has_option('Credentials', 'gs_service_client_id') 351 and not HAS_CRYPTO): 352 if os.environ.get('CLOUDSDK_WRAPPER') == '1': 353 raise CommandException('\n'.join(textwrap.wrap( 354 'Your gsutil is configured with an OAuth2 service account, but ' 355 'you do not have PyOpenSSL or PyCrypto 2.6 or later installed. ' 356 'Service account authentication requires one of these libraries; ' 357 'please reactivate your service account via the gcloud auth ' 358 'command and ensure any gcloud packages necessary for ' 359 'service accounts are present.'))) 360 else: 361 raise CommandException('\n'.join(textwrap.wrap( 362 'Your gsutil is configured with an OAuth2 service account, but ' 363 'you do not have PyOpenSSL or PyCrypto 2.6 or later installed. ' 364 'Service account authentication requires one of these libraries; ' 365 'please install either of them to proceed, or configure a ' 366 'different type of credentials with "gsutil config".'))) 367 else: 368 # With no boto config file the user can still access publicly readable 369 # buckets and objects. 370 from gslib import no_op_auth_plugin # pylint: disable=unused-variable 371 372 373def GetConfigFilePath(): 374 config_path = 'no config found' 375 for path in BotoConfigLocations: 376 try: 377 with open(path, 'r'): 378 config_path = path 379 break 380 except IOError: 381 pass 382 return config_path 383 384 385def GetBotoConfigFileList(): 386 """Returns list of boto config files that exist.""" 387 config_paths = boto.pyami.config.BotoConfigLocations 388 if 'AWS_CREDENTIAL_FILE' in os.environ: 389 config_paths.append(os.environ['AWS_CREDENTIAL_FILE']) 390 config_files = {} 391 for config_path in config_paths: 392 if os.path.exists(config_path): 393 config_files[config_path] = 1 394 cf_list = [] 395 for config_file in config_files: 396 cf_list.append(config_file) 397 return cf_list 398 399 400def GetCertsFile(): 401 """Configures and returns the CA Certificates file. 402 403 If one is already configured, use it. Otherwise, amend the configuration 404 (in boto.config) to use the cert roots distributed with gsutil. 405 406 Returns: 407 string filename of the certs file to use. 408 """ 409 certs_file = boto.config.get('Boto', 'ca_certificates_file', None) 410 if not certs_file: 411 with certs_file_lock: 412 if configured_certs_files: 413 disk_certs_file = configured_certs_files[0] 414 else: 415 disk_certs_file = os.path.abspath( 416 os.path.join(gslib.GSLIB_DIR, 'data', 'cacerts.txt')) 417 if not os.path.exists(disk_certs_file): 418 # If the file is not present on disk, this means the gslib module 419 # doesn't actually exist on disk anywhere. This can happen if it's 420 # being imported from a zip file. Unfortunately, we have to copy the 421 # certs file to a local temp file on disk because the underlying SSL 422 # socket requires it to be a filesystem path. 423 certs_data = pkgutil.get_data('gslib', 'data/cacerts.txt') 424 if not certs_data: 425 raise CommandException('Certificates file not found. Please ' 426 'reinstall gsutil from scratch') 427 fd, fname = tempfile.mkstemp(suffix='.txt', prefix='gsutil-cacerts') 428 f = os.fdopen(fd, 'w') 429 f.write(certs_data) 430 f.close() 431 configured_certs_files.append(fname) 432 disk_certs_file = fname 433 certs_file = disk_certs_file 434 return certs_file 435 436 437def GetCleanupFiles(): 438 """Returns a list of temp files to delete (if possible) when program exits.""" 439 cleanup_files = [] 440 if configured_certs_files: 441 cleanup_files += configured_certs_files 442 return cleanup_files 443 444 445def ProxyInfoFromEnvironmentVar(proxy_env_var): 446 """Reads proxy info from the environment and converts to httplib2.ProxyInfo. 447 448 Args: 449 proxy_env_var: Environment variable string to read, such as http_proxy or 450 https_proxy. 451 452 Returns: 453 httplib2.ProxyInfo constructed from the environment string. 454 """ 455 proxy_url = os.environ.get(proxy_env_var) 456 if not proxy_url or not proxy_env_var.lower().startswith('http'): 457 return httplib2.ProxyInfo(httplib2.socks.PROXY_TYPE_HTTP, None, 0) 458 proxy_protocol = proxy_env_var.lower().split('_')[0] 459 if not proxy_url.lower().startswith('http'): 460 # proxy_info_from_url requires a protocol, which is always http or https. 461 proxy_url = proxy_protocol + '://' + proxy_url 462 return httplib2.proxy_info_from_url(proxy_url, method=proxy_protocol) 463 464 465def GetNewHttp(http_class=httplib2.Http, **kwargs): 466 """Creates and returns a new httplib2.Http instance. 467 468 Args: 469 http_class: Optional custom Http class to use. 470 **kwargs: Arguments to pass to http_class constructor. 471 472 Returns: 473 An initialized httplib2.Http instance. 474 """ 475 proxy_info = httplib2.ProxyInfo( 476 proxy_type=3, 477 proxy_host=boto.config.get('Boto', 'proxy', None), 478 proxy_port=boto.config.getint('Boto', 'proxy_port', 0), 479 proxy_user=boto.config.get('Boto', 'proxy_user', None), 480 proxy_pass=boto.config.get('Boto', 'proxy_pass', None), 481 proxy_rdns=boto.config.get('Boto', 'proxy_rdns', False)) 482 483 if not (proxy_info.proxy_host and proxy_info.proxy_port): 484 # Fall back to using the environment variable. 485 for proxy_env_var in ['http_proxy', 'https_proxy', 'HTTPS_PROXY']: 486 if proxy_env_var in os.environ and os.environ[proxy_env_var]: 487 proxy_info = ProxyInfoFromEnvironmentVar(proxy_env_var) 488 # Assume proxy_rnds is True if a proxy environment variable exists. 489 proxy_info.proxy_rdns = boto.config.get('Boto', 'proxy_rdns', True) 490 break 491 492 # Some installers don't package a certs file with httplib2, so use the 493 # one included with gsutil. 494 kwargs['ca_certs'] = GetCertsFile() 495 # Use a non-infinite SSL timeout to avoid hangs during network flakiness. 496 kwargs['timeout'] = SSL_TIMEOUT 497 http = http_class(proxy_info=proxy_info, **kwargs) 498 http.disable_ssl_certificate_validation = (not config.getbool( 499 'Boto', 'https_validate_certificates')) 500 return http 501 502 503# Retry for 10 minutes with exponential backoff, which corresponds to 504# the maximum Downtime Period specified in the GCS SLA 505# (https://cloud.google.com/storage/sla) 506def GetNumRetries(): 507 return config.getint('Boto', 'num_retries', 23) 508 509 510def GetMaxRetryDelay(): 511 return config.getint('Boto', 'max_retry_delay', 32) 512 513 514# Resumable downloads and uploads make one HTTP call per chunk (and must be 515# in multiples of 256KiB). Overridable for testing. 516def GetJsonResumableChunkSize(): 517 chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size', 518 1024*1024*100L) 519 if chunk_size == 0: 520 chunk_size = 1024*256L 521 elif chunk_size % 1024*256L != 0: 522 chunk_size += (1024*256L - (chunk_size % (1024*256L))) 523 return chunk_size 524 525 526def _RoundToNearestExponent(num): 527 i = 0 528 while i+1 < len(_EXP_STRINGS) and num >= (2 ** _EXP_STRINGS[i+1][0]): 529 i += 1 530 return i, round(float(num) / 2 ** _EXP_STRINGS[i][0], 2) 531 532 533def MakeHumanReadable(num): 534 """Generates human readable string for a number of bytes. 535 536 Args: 537 num: The number, in bytes. 538 539 Returns: 540 A string form of the number using size abbreviations (KiB, MiB, etc.). 541 """ 542 i, rounded_val = _RoundToNearestExponent(num) 543 return '%g %s' % (rounded_val, _EXP_STRINGS[i][1]) 544 545 546def MakeBitsHumanReadable(num): 547 """Generates human readable string for a number of bits. 548 549 Args: 550 num: The number, in bits. 551 552 Returns: 553 A string form of the number using bit size abbreviations (kbit, Mbit, etc.) 554 """ 555 i, rounded_val = _RoundToNearestExponent(num) 556 return '%g %s' % (rounded_val, _EXP_STRINGS[i][2]) 557 558 559def HumanReadableToBytes(human_string): 560 """Tries to convert a human-readable string to a number of bytes. 561 562 Args: 563 human_string: A string supplied by user, e.g. '1M', '3 GiB'. 564 Returns: 565 An integer containing the number of bytes. 566 Raises: 567 ValueError: on an invalid string. 568 """ 569 human_string = human_string.lower() 570 m = MATCH_HUMAN_BYTES.match(human_string) 571 if m: 572 num = float(m.group('num')) 573 if m.group('suffix'): 574 power = _EXP_STRINGS[SUFFIX_TO_SI[m.group('suffix')]][0] 575 num *= (2.0 ** power) 576 num = int(round(num)) 577 return num 578 raise ValueError('Invalid byte string specified: %s' % human_string) 579 580 581def Percentile(values, percent, key=lambda x: x): 582 """Find the percentile of a list of values. 583 584 Taken from: http://code.activestate.com/recipes/511478/ 585 586 Args: 587 values: a list of numeric values. Note that the values MUST BE already 588 sorted. 589 percent: a float value from 0.0 to 1.0. 590 key: optional key function to compute value from each element of the list 591 of values. 592 593 Returns: 594 The percentile of the values. 595 """ 596 if not values: 597 return None 598 k = (len(values) - 1) * percent 599 f = math.floor(k) 600 c = math.ceil(k) 601 if f == c: 602 return key(values[int(k)]) 603 d0 = key(values[int(f)]) * (c-k) 604 d1 = key(values[int(c)]) * (k-f) 605 return d0 + d1 606 607 608def RemoveCRLFFromString(input_str): 609 """Returns the input string with all \\n and \\r removed.""" 610 return re.sub(r'[\r\n]', '', input_str) 611 612 613def UnaryDictToXml(message): 614 """Generates XML representation of a nested dict. 615 616 This dict contains exactly one top-level entry and an arbitrary number of 617 2nd-level entries, e.g. capturing a WebsiteConfiguration message. 618 619 Args: 620 message: The dict encoding the message. 621 622 Returns: 623 XML string representation of the input dict. 624 625 Raises: 626 Exception: if dict contains more than one top-level entry. 627 """ 628 if len(message) != 1: 629 raise Exception('Expected dict of size 1, got size %d' % len(message)) 630 631 name, content = message.items()[0] 632 element_type = ElementTree.Element(name) 633 for element_property, value in sorted(content.items()): 634 node = ElementTree.SubElement(element_type, element_property) 635 node.text = value 636 return ElementTree.tostring(element_type) 637 638 639def LookUpGsutilVersion(gsutil_api, url_str): 640 """Looks up the gsutil version of the specified gsutil tarball URL. 641 642 Version is specified in the metadata field set on that object. 643 644 Args: 645 gsutil_api: gsutil Cloud API to use when retrieving gsutil tarball. 646 url_str: tarball URL to retrieve (such as 'gs://pub/gsutil.tar.gz'). 647 648 Returns: 649 Version string if URL is a cloud URL containing x-goog-meta-gsutil-version 650 metadata, else None. 651 """ 652 url = StorageUrlFromString(url_str) 653 if url.IsCloudUrl(): 654 obj = gsutil_api.GetObjectMetadata(url.bucket_name, url.object_name, 655 provider=url.scheme, 656 fields=['metadata']) 657 if obj.metadata and obj.metadata.additionalProperties: 658 for prop in obj.metadata.additionalProperties: 659 if prop.key == 'gsutil_version': 660 return prop.value 661 662 663def GetGsutilVersionModifiedTime(): 664 """Returns unix timestamp of when the VERSION file was last modified.""" 665 if not gslib.VERSION_FILE: 666 return 0 667 return int(os.path.getmtime(gslib.VERSION_FILE)) 668 669 670def IsRunningInteractively(): 671 """Returns True if currently running interactively on a TTY.""" 672 return sys.stdout.isatty() and sys.stderr.isatty() and sys.stdin.isatty() 673 674 675def _HttpsValidateCertifcatesEnabled(): 676 return config.get('Boto', 'https_validate_certificates', True) 677 678CERTIFICATE_VALIDATION_ENABLED = _HttpsValidateCertifcatesEnabled() 679 680 681def _BotoIsSecure(): 682 return config.get('Boto', 'is_secure', True) 683 684BOTO_IS_SECURE = _BotoIsSecure() 685 686 687def ResumableThreshold(): 688 return config.getint('GSUtil', 'resumable_threshold', EIGHT_MIB) 689 690 691def AddAcceptEncoding(headers): 692 """Adds accept-encoding:gzip to the dictionary of headers.""" 693 # If Accept-Encoding is not already set, set it to enable gzip. 694 if 'accept-encoding' not in headers: 695 headers['accept-encoding'] = 'gzip' 696 697 698# pylint: disable=too-many-statements 699def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True): 700 """Print full info for given object (like what displays for gsutil ls -L). 701 702 Args: 703 bucket_listing_ref: BucketListingRef being listed. 704 Must have ref_type OBJECT and a populated root_object 705 with the desired fields. 706 incl_acl: True if ACL info should be output. 707 708 Returns: 709 Tuple (number of objects, object_length) 710 711 Raises: 712 Exception: if calling bug encountered. 713 """ 714 url_str = bucket_listing_ref.url_string 715 storage_url = StorageUrlFromString(url_str) 716 obj = bucket_listing_ref.root_object 717 718 if (obj.metadata and S3_DELETE_MARKER_GUID in 719 obj.metadata.additionalProperties): 720 num_bytes = 0 721 num_objs = 0 722 url_str += '<DeleteMarker>' 723 else: 724 num_bytes = obj.size 725 num_objs = 1 726 727 print '%s:' % url_str.encode(UTF8) 728 if obj.updated: 729 print '\tCreation time:\t\t%s' % obj.updated.strftime( 730 '%a, %d %b %Y %H:%M:%S GMT') 731 if obj.cacheControl: 732 print '\tCache-Control:\t\t%s' % obj.cacheControl 733 if obj.contentDisposition: 734 print '\tContent-Disposition:\t\t%s' % obj.contentDisposition 735 if obj.contentEncoding: 736 print '\tContent-Encoding:\t\t%s' % obj.contentEncoding 737 if obj.contentLanguage: 738 print '\tContent-Language:\t%s' % obj.contentLanguage 739 print '\tContent-Length:\t\t%s' % obj.size 740 print '\tContent-Type:\t\t%s' % obj.contentType 741 if obj.componentCount: 742 print '\tComponent-Count:\t%d' % obj.componentCount 743 marker_props = {} 744 if obj.metadata and obj.metadata.additionalProperties: 745 non_marker_props = [] 746 for add_prop in obj.metadata.additionalProperties: 747 if add_prop.key not in S3_MARKER_GUIDS: 748 non_marker_props.append(add_prop) 749 else: 750 marker_props[add_prop.key] = add_prop.value 751 if non_marker_props: 752 print '\tMetadata:' 753 for ap in non_marker_props: 754 meta_string = '\t\t%s:\t\t%s' % (ap.key, ap.value) 755 print meta_string.encode(UTF8) 756 if obj.crc32c: print '\tHash (crc32c):\t\t%s' % obj.crc32c 757 if obj.md5Hash: print '\tHash (md5):\t\t%s' % obj.md5Hash 758 print '\tETag:\t\t\t%s' % obj.etag.strip('"\'') 759 if obj.generation: 760 generation_str = GenerationFromUrlAndString(storage_url, obj.generation) 761 print '\tGeneration:\t\t%s' % generation_str 762 if obj.metageneration: 763 print '\tMetageneration:\t\t%s' % obj.metageneration 764 if incl_acl: 765 # JSON API won't return acls as part of the response unless we have 766 # full control scope 767 if obj.acl: 768 print '\tACL:\t\t%s' % AclTranslation.JsonFromMessage(obj.acl) 769 elif S3_ACL_MARKER_GUID in marker_props: 770 print '\tACL:\t\t%s' % marker_props[S3_ACL_MARKER_GUID] 771 else: 772 print ('\tACL:\t\t\tACCESS DENIED. Note: you need OWNER ' 773 'permission\n\t\t\t\ton the object to read its ACL.') 774 775 return (num_objs, num_bytes) 776 777 778def CompareVersions(first, second): 779 """Compares the first and second gsutil version strings. 780 781 For example, 3.33 > 3.7, and 4.1 is a greater major version than 3.33. 782 Does not handle multiple periods (e.g. 3.3.4) or complicated suffixes 783 (e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as 784 less than its non-suffix counterpart (e.g. 3.32 > 3.32pre). 785 786 Args: 787 first: First gsutil version string. 788 second: Second gsutil version string. 789 790 Returns: 791 (g, m): 792 g is True if first known to be greater than second, else False. 793 m is True if first known to be greater by at least 1 major version, 794 else False. 795 """ 796 m1 = VERSION_MATCHER.match(str(first)) 797 m2 = VERSION_MATCHER.match(str(second)) 798 799 # If passed strings we don't know how to handle, be conservative. 800 if not m1 or not m2: 801 return (False, False) 802 803 major_ver1 = int(m1.group('maj')) 804 minor_ver1 = int(m1.group('min')) if m1.group('min') else 0 805 suffix_ver1 = m1.group('suffix') 806 major_ver2 = int(m2.group('maj')) 807 minor_ver2 = int(m2.group('min')) if m2.group('min') else 0 808 suffix_ver2 = m2.group('suffix') 809 810 if major_ver1 > major_ver2: 811 return (True, True) 812 elif major_ver1 == major_ver2: 813 if minor_ver1 > minor_ver2: 814 return (True, False) 815 elif minor_ver1 == minor_ver2: 816 return (bool(suffix_ver2) and not suffix_ver1, False) 817 return (False, False) 818 819 820def _IncreaseSoftLimitForResource(resource_name, fallback_value): 821 """Sets a new soft limit for the maximum number of open files. 822 823 The soft limit is used for this process (and its children), but the 824 hard limit is set by the system and cannot be exceeded. 825 826 We will first try to set the soft limit to the hard limit's value; if that 827 fails, we will try to set the soft limit to the fallback_value iff this would 828 increase the soft limit. 829 830 Args: 831 resource_name: Name of the resource to increase the soft limit for. 832 fallback_value: Fallback value to be used if we couldn't set the 833 soft value to the hard value (e.g., if the hard value 834 is "unlimited"). 835 836 Returns: 837 Current soft limit for the resource (after any changes we were able to 838 make), or -1 if the resource doesn't exist. 839 """ 840 841 # Get the value of the resource. 842 try: 843 (soft_limit, hard_limit) = resource.getrlimit(resource_name) 844 except (resource.error, ValueError): 845 # The resource wasn't present, so we can't do anything here. 846 return -1 847 848 # Try to set the value of the soft limit to the value of the hard limit. 849 if hard_limit > soft_limit: # Some OS's report 0 for "unlimited". 850 try: 851 resource.setrlimit(resource_name, (hard_limit, hard_limit)) 852 return hard_limit 853 except (resource.error, ValueError): 854 # We'll ignore this and try the fallback value. 855 pass 856 857 # Try to set the value of the soft limit to the fallback value. 858 if soft_limit < fallback_value: 859 try: 860 resource.setrlimit(resource_name, (fallback_value, hard_limit)) 861 return fallback_value 862 except (resource.error, ValueError): 863 # We couldn't change the soft limit, so just report the current 864 # value of the soft limit. 865 return soft_limit 866 else: 867 return soft_limit 868 869 870def GetCloudApiInstance(cls, thread_state=None): 871 """Gets a gsutil Cloud API instance. 872 873 Since Cloud API implementations are not guaranteed to be thread-safe, each 874 thread needs its own instance. These instances are passed to each thread 875 via the thread pool logic in command. 876 877 Args: 878 cls: Command class to be used for single-threaded case. 879 thread_state: Per thread state from this thread containing a gsutil 880 Cloud API instance. 881 882 Returns: 883 gsutil Cloud API instance. 884 """ 885 return thread_state or cls.gsutil_api 886 887 888def GetFileSize(fp, position_to_eof=False): 889 """Returns size of file, optionally leaving fp positioned at EOF.""" 890 if not position_to_eof: 891 cur_pos = fp.tell() 892 fp.seek(0, os.SEEK_END) 893 cur_file_size = fp.tell() 894 if not position_to_eof: 895 fp.seek(cur_pos) 896 return cur_file_size 897 898 899def GetStreamFromFileUrl(storage_url): 900 if storage_url.IsStream(): 901 return sys.stdin 902 else: 903 return open(storage_url.object_name, 'rb') 904 905 906def UrlsAreForSingleProvider(url_args): 907 """Tests whether the URLs are all for a single provider. 908 909 Args: 910 url_args: Strings to check. 911 912 Returns: 913 True if URLs are for single provider, False otherwise. 914 """ 915 provider = None 916 url = None 917 for url_str in url_args: 918 url = StorageUrlFromString(url_str) 919 if not provider: 920 provider = url.scheme 921 elif url.scheme != provider: 922 return False 923 return provider is not None 924 925 926def HaveFileUrls(args_to_check): 927 """Checks whether args_to_check contain any file URLs. 928 929 Args: 930 args_to_check: Command-line argument subset to check. 931 932 Returns: 933 True if args_to_check contains any file URLs. 934 """ 935 for url_str in args_to_check: 936 storage_url = StorageUrlFromString(url_str) 937 if storage_url.IsFileUrl(): 938 return True 939 return False 940 941 942def HaveProviderUrls(args_to_check): 943 """Checks whether args_to_check contains any provider URLs (like 'gs://'). 944 945 Args: 946 args_to_check: Command-line argument subset to check. 947 948 Returns: 949 True if args_to_check contains any provider URLs. 950 """ 951 for url_str in args_to_check: 952 storage_url = StorageUrlFromString(url_str) 953 if storage_url.IsCloudUrl() and storage_url.IsProvider(): 954 return True 955 return False 956 957# This must be defined at the module level for pickling across processes. 958MultiprocessingIsAvailableResult = collections.namedtuple( 959 'MultiprocessingIsAvailableResult', ['is_available', 'stack_trace']) 960 961 962def CheckMultiprocessingAvailableAndInit(logger=None): 963 """Checks if multiprocessing is available. 964 965 There are some environments in which there is no way to use multiprocessing 966 logic that's built into Python (e.g., if /dev/shm is not available, then 967 we can't create semaphores). This simply tries out a few things that will be 968 needed to make sure the environment can support the pieces of the 969 multiprocessing module that we need. 970 971 If multiprocessing is available, this performs necessary initialization for 972 multiprocessing. See gslib.command.InitializeMultiprocessingVariables for 973 an explanation of why this is necessary. 974 975 Args: 976 logger: logging.logger to use for debug output. 977 978 Returns: 979 (multiprocessing_is_available, stack_trace): 980 multiprocessing_is_available: True iff the multiprocessing module is 981 available for use. 982 stack_trace: The stack trace generated by the call we tried that failed. 983 """ 984 # pylint: disable=global-variable-undefined 985 global cached_multiprocessing_is_available 986 global cached_multiprocessing_check_stack_trace 987 global cached_multiprocessing_is_available_message 988 if cached_multiprocessing_is_available is not None: 989 if logger: 990 logger.debug(cached_multiprocessing_check_stack_trace) 991 logger.warn(cached_multiprocessing_is_available_message) 992 return MultiprocessingIsAvailableResult( 993 is_available=cached_multiprocessing_is_available, 994 stack_trace=cached_multiprocessing_check_stack_trace) 995 996 if IS_WINDOWS: 997 message = """ 998Multiple processes are not supported on Windows. Operations requesting 999parallelism will be executed with multiple threads in a single process only. 1000""" 1001 if logger: 1002 logger.warn(message) 1003 return MultiprocessingIsAvailableResult(is_available=False, 1004 stack_trace=None) 1005 1006 stack_trace = None 1007 multiprocessing_is_available = True 1008 message = """ 1009You have requested multiple processes for an operation, but the 1010required functionality of Python\'s multiprocessing module is not available. 1011Operations requesting parallelism will be executed with multiple threads in a 1012single process only. 1013""" 1014 try: 1015 # Fails if /dev/shm (or some equivalent thereof) is not available for use 1016 # (e.g., there's no implementation, or we can't write to it, etc.). 1017 try: 1018 multiprocessing.Value('i', 0) 1019 except: 1020 message += """ 1021Please ensure that you have write access to both /dev/shm and /run/shm. 1022""" 1023 raise # We'll handle this in one place below. 1024 1025 # Manager objects and Windows are generally a pain to work with, so try it 1026 # out as a sanity check. This definitely works on some versions of Windows, 1027 # but it's certainly possible that there is some unknown configuration for 1028 # which it won't. 1029 global manager # pylint: disable=global-variable-undefined 1030 1031 manager = multiprocessing.Manager() 1032 1033 # Check that the max number of open files is reasonable. Always check this 1034 # after we're sure that the basic multiprocessing functionality is 1035 # available, since this won't matter unless that's true. 1036 limit = -1 1037 if HAS_RESOURCE_MODULE: 1038 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix 1039 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the 1040 # "resource" module is not guaranteed to know about these names. 1041 try: 1042 limit = max(limit, 1043 _IncreaseSoftLimitForResource( 1044 resource.RLIMIT_NOFILE, 1045 MIN_ACCEPTABLE_OPEN_FILES_LIMIT)) 1046 except AttributeError: 1047 pass 1048 try: 1049 limit = max(limit, 1050 _IncreaseSoftLimitForResource( 1051 resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT)) 1052 except AttributeError: 1053 pass 1054 1055 if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT: 1056 message += (""" 1057Your max number of open files, %s, is too low to allow safe multiprocessing. 1058On Linux you can fix this by adding something like "ulimit -n 10000" to your 1059~/.bashrc or equivalent file and opening a new terminal. 1060 1061On MacOS, you may also need to run a command like this once (in addition to the 1062above instructions), which might require a restart of your system to take 1063effect: 1064 launchctl limit maxfiles 10000 1065 1066Alternatively, edit /etc/launchd.conf with something like: 1067 limit maxfiles 10000 10000 1068 1069""" % limit) 1070 raise Exception('Max number of open files, %s, is too low.' % limit) 1071 except: # pylint: disable=bare-except 1072 stack_trace = traceback.format_exc() 1073 multiprocessing_is_available = False 1074 if logger is not None: 1075 logger.debug(stack_trace) 1076 logger.warn(message) 1077 1078 # Set the cached values so that we never need to do this check again. 1079 cached_multiprocessing_is_available = multiprocessing_is_available 1080 cached_multiprocessing_check_stack_trace = stack_trace 1081 cached_multiprocessing_is_available_message = message 1082 return MultiprocessingIsAvailableResult( 1083 is_available=cached_multiprocessing_is_available, 1084 stack_trace=cached_multiprocessing_check_stack_trace) 1085 1086 1087def CreateLock(): 1088 """Returns either a multiprocessing lock or a threading lock. 1089 1090 Use Multiprocessing lock iff we have access to the parts of the 1091 multiprocessing module that are necessary to enable parallelism in operations. 1092 1093 Returns: 1094 Multiprocessing or threading lock. 1095 """ 1096 if CheckMultiprocessingAvailableAndInit().is_available: 1097 return manager.Lock() 1098 else: 1099 return threading.Lock() 1100 1101 1102def IsCloudSubdirPlaceholder(url, blr=None): 1103 """Determines if URL is a cloud subdir placeholder. 1104 1105 This function is needed because GUI tools (like the GCS cloud console) allow 1106 users to create empty "folders" by creating a placeholder object; and parts 1107 of gsutil need to treat those placeholder objects specially. For example, 1108 gsutil rsync needs to avoid downloading those objects because they can cause 1109 conflicts (see comments in rsync command for details). 1110 1111 We currently detect two cases: 1112 - Cloud objects whose name ends with '_$folder$' 1113 - Cloud objects whose name ends with '/' 1114 1115 Args: 1116 url: The URL to be checked. 1117 blr: BucketListingRef to check, or None if not available. 1118 If None, size won't be checked. 1119 1120 Returns: 1121 True/False. 1122 """ 1123 if not url.IsCloudUrl(): 1124 return False 1125 url_str = url.url_string 1126 if url_str.endswith('_$folder$'): 1127 return True 1128 if blr and blr.IsObject(): 1129 size = blr.root_object.size 1130 else: 1131 size = 0 1132 return size == 0 and url_str.endswith('/') 1133 1134 1135def GetTermLines(): 1136 """Returns number of terminal lines.""" 1137 # fcntl isn't supported in Windows. 1138 try: 1139 import fcntl # pylint: disable=g-import-not-at-top 1140 import termios # pylint: disable=g-import-not-at-top 1141 except ImportError: 1142 return _DEFAULT_LINES 1143 def ioctl_GWINSZ(fd): # pylint: disable=invalid-name 1144 try: 1145 return struct.unpack( 1146 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))[0] 1147 except: # pylint: disable=bare-except 1148 return 0 # Failure (so will retry on different file descriptor below). 1149 # Try to find a valid number of lines from termio for stdin, stdout, 1150 # or stderr, in that order. 1151 ioc = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2) 1152 if not ioc: 1153 try: 1154 fd = os.open(os.ctermid(), os.O_RDONLY) 1155 ioc = ioctl_GWINSZ(fd) 1156 os.close(fd) 1157 except: # pylint: disable=bare-except 1158 pass 1159 if not ioc: 1160 ioc = os.environ.get('LINES', _DEFAULT_LINES) 1161 return int(ioc) 1162 1163 1164class GsutilStreamHandler(logging.StreamHandler): 1165 """A subclass of StreamHandler for use in gsutil.""" 1166 1167 def flush(self): 1168 # Note: we override the flush method here due to a python 2.6 bug. The 1169 # python logging module will try to flush all stream handlers at exit. 1170 # If the StreamHandler is pointing to a file that is already closed, the 1171 # method throws an exception. Our unit tests temporarily redirect stderr, 1172 # which causes the default StreamHandler to open its stream against a 1173 # temporary file. By the time the process shuts down, the underlying file 1174 # is closed, causing an exception. This was fixed in Python 2.7, but to 1175 # remove the flake from Python 2.6, we maintain this here. 1176 try: 1177 logging.StreamHandler.flush(self) 1178 except ValueError: 1179 pass 1180 1181 1182def StdinIterator(): 1183 """A generator function that returns lines from stdin.""" 1184 for line in sys.stdin: 1185 # Strip CRLF. 1186 yield line.rstrip() 1187