1# Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/ 2# Copyright (c) 2011, Nexenta Systems Inc. 3# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved 4# 5# Permission is hereby granted, free of charge, to any person obtaining a 6# copy of this software and associated documentation files (the 7# "Software"), to deal in the Software without restriction, including 8# without limitation the rights to use, copy, modify, merge, publish, dis- 9# tribute, sublicense, and/or sell copies of the Software, and to permit 10# persons to whom the Software is furnished to do so, subject to the fol- 11# lowing conditions: 12# 13# The above copyright notice and this permission notice shall be included 14# in all copies or substantial portions of the Software. 15# 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- 18# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22# IN THE SOFTWARE. 23import email.utils 24import errno 25import hashlib 26import mimetypes 27import os 28import re 29import base64 30import binascii 31import math 32from hashlib import md5 33import boto.utils 34from boto.compat import BytesIO, six, urllib, encodebytes 35 36from boto.exception import BotoClientError 37from boto.exception import StorageDataError 38from boto.exception import PleaseRetryException 39from boto.provider import Provider 40from boto.s3.keyfile import KeyFile 41from boto.s3.user import User 42from boto import UserAgent 43from boto.utils import compute_md5, compute_hash 44from boto.utils import find_matching_headers 45from boto.utils import merge_headers_by_name 46 47 48class Key(object): 49 """ 50 Represents a key (object) in an S3 bucket. 51 52 :ivar bucket: The parent :class:`boto.s3.bucket.Bucket`. 53 :ivar name: The name of this Key object. 54 :ivar metadata: A dictionary containing user metadata that you 55 wish to store with the object or that has been retrieved from 56 an existing object. 57 :ivar cache_control: The value of the `Cache-Control` HTTP header. 58 :ivar content_type: The value of the `Content-Type` HTTP header. 59 :ivar content_encoding: The value of the `Content-Encoding` HTTP header. 60 :ivar content_disposition: The value of the `Content-Disposition` HTTP 61 header. 62 :ivar content_language: The value of the `Content-Language` HTTP header. 63 :ivar etag: The `etag` associated with this object. 64 :ivar last_modified: The string timestamp representing the last 65 time this object was modified in S3. 66 :ivar owner: The ID of the owner of this object. 67 :ivar storage_class: The storage class of the object. Currently, one of: 68 STANDARD | REDUCED_REDUNDANCY | GLACIER 69 :ivar md5: The MD5 hash of the contents of the object. 70 :ivar size: The size, in bytes, of the object. 71 :ivar version_id: The version ID of this object, if it is a versioned 72 object. 73 :ivar encrypted: Whether the object is encrypted while at rest on 74 the server. 75 """ 76 77 DefaultContentType = 'application/octet-stream' 78 79 RestoreBody = """<?xml version="1.0" encoding="UTF-8"?> 80 <RestoreRequest xmlns="http://s3.amazonaws.com/doc/2006-03-01"> 81 <Days>%s</Days> 82 </RestoreRequest>""" 83 84 85 BufferSize = boto.config.getint('Boto', 'key_buffer_size', 8192) 86 87 # The object metadata fields a user can set, other than custom metadata 88 # fields (i.e., those beginning with a provider-specific prefix like 89 # x-amz-meta). 90 base_user_settable_fields = set(["cache-control", "content-disposition", 91 "content-encoding", "content-language", 92 "content-md5", "content-type", 93 "x-robots-tag", "expires"]) 94 _underscore_base_user_settable_fields = set() 95 for f in base_user_settable_fields: 96 _underscore_base_user_settable_fields.add(f.replace('-', '_')) 97 # Metadata fields, whether user-settable or not, other than custom 98 # metadata fields (i.e., those beginning with a provider specific prefix 99 # like x-amz-meta). 100 base_fields = (base_user_settable_fields | 101 set(["last-modified", "content-length", "date", "etag"])) 102 103 104 105 def __init__(self, bucket=None, name=None): 106 self.bucket = bucket 107 self.name = name 108 self.metadata = {} 109 self.cache_control = None 110 self.content_type = self.DefaultContentType 111 self.content_encoding = None 112 self.content_disposition = None 113 self.content_language = None 114 self.filename = None 115 self.etag = None 116 self.is_latest = False 117 self.last_modified = None 118 self.owner = None 119 self._storage_class = None 120 self.path = None 121 self.resp = None 122 self.mode = None 123 self.size = None 124 self.version_id = None 125 self.source_version_id = None 126 self.delete_marker = False 127 self.encrypted = None 128 # If the object is being restored, this attribute will be set to True. 129 # If the object is restored, it will be set to False. Otherwise this 130 # value will be None. If the restore is completed (ongoing_restore = 131 # False), the expiry_date will be populated with the expiry date of the 132 # restored object. 133 self.ongoing_restore = None 134 self.expiry_date = None 135 self.local_hashes = {} 136 137 def __repr__(self): 138 if self.bucket: 139 name = u'<Key: %s,%s>' % (self.bucket.name, self.name) 140 else: 141 name = u'<Key: None,%s>' % self.name 142 143 # Encode to bytes for Python 2 to prevent display decoding issues 144 if not isinstance(name, str): 145 name = name.encode('utf-8') 146 147 return name 148 149 def __iter__(self): 150 return self 151 152 @property 153 def provider(self): 154 provider = None 155 if self.bucket and self.bucket.connection: 156 provider = self.bucket.connection.provider 157 return provider 158 159 def _get_key(self): 160 return self.name 161 162 def _set_key(self, value): 163 self.name = value 164 165 key = property(_get_key, _set_key); 166 167 def _get_md5(self): 168 if 'md5' in self.local_hashes and self.local_hashes['md5']: 169 return binascii.b2a_hex(self.local_hashes['md5']) 170 171 def _set_md5(self, value): 172 if value: 173 self.local_hashes['md5'] = binascii.a2b_hex(value) 174 elif 'md5' in self.local_hashes: 175 self.local_hashes.pop('md5', None) 176 177 md5 = property(_get_md5, _set_md5); 178 179 def _get_base64md5(self): 180 if 'md5' in self.local_hashes and self.local_hashes['md5']: 181 md5 = self.local_hashes['md5'] 182 if not isinstance(md5, bytes): 183 md5 = md5.encode('utf-8') 184 return binascii.b2a_base64(md5).decode('utf-8').rstrip('\n') 185 186 def _set_base64md5(self, value): 187 if value: 188 if not isinstance(value, six.string_types): 189 value = value.decode('utf-8') 190 self.local_hashes['md5'] = binascii.a2b_base64(value) 191 elif 'md5' in self.local_hashes: 192 del self.local_hashes['md5'] 193 194 base64md5 = property(_get_base64md5, _set_base64md5); 195 196 def _get_storage_class(self): 197 if self._storage_class is None and self.bucket: 198 # Attempt to fetch storage class 199 list_items = list(self.bucket.list(self.name.encode('utf-8'))) 200 if len(list_items) and getattr(list_items[0], '_storage_class', 201 None): 202 self._storage_class = list_items[0]._storage_class 203 else: 204 # Key is not yet saved? Just use default... 205 self._storage_class = 'STANDARD' 206 207 return self._storage_class 208 209 def _set_storage_class(self, value): 210 self._storage_class = value 211 212 storage_class = property(_get_storage_class, _set_storage_class) 213 214 def get_md5_from_hexdigest(self, md5_hexdigest): 215 """ 216 A utility function to create the 2-tuple (md5hexdigest, base64md5) 217 from just having a precalculated md5_hexdigest. 218 """ 219 digest = binascii.unhexlify(md5_hexdigest) 220 base64md5 = encodebytes(digest) 221 if base64md5[-1] == '\n': 222 base64md5 = base64md5[0:-1] 223 return (md5_hexdigest, base64md5) 224 225 def handle_encryption_headers(self, resp): 226 provider = self.bucket.connection.provider 227 if provider.server_side_encryption_header: 228 self.encrypted = resp.getheader( 229 provider.server_side_encryption_header, None) 230 else: 231 self.encrypted = None 232 233 def handle_version_headers(self, resp, force=False): 234 provider = self.bucket.connection.provider 235 # If the Key object already has a version_id attribute value, it 236 # means that it represents an explicit version and the user is 237 # doing a get_contents_*(version_id=<foo>) to retrieve another 238 # version of the Key. In that case, we don't really want to 239 # overwrite the version_id in this Key object. Comprende? 240 if self.version_id is None or force: 241 self.version_id = resp.getheader(provider.version_id, None) 242 self.source_version_id = resp.getheader(provider.copy_source_version_id, 243 None) 244 if resp.getheader(provider.delete_marker, 'false') == 'true': 245 self.delete_marker = True 246 else: 247 self.delete_marker = False 248 249 def handle_restore_headers(self, response): 250 provider = self.bucket.connection.provider 251 header = response.getheader(provider.restore_header) 252 if header is None: 253 return 254 parts = header.split(',', 1) 255 for part in parts: 256 key, val = [i.strip() for i in part.split('=')] 257 val = val.replace('"', '') 258 if key == 'ongoing-request': 259 self.ongoing_restore = True if val.lower() == 'true' else False 260 elif key == 'expiry-date': 261 self.expiry_date = val 262 263 def handle_addl_headers(self, headers): 264 """ 265 Used by Key subclasses to do additional, provider-specific 266 processing of response headers. No-op for this base class. 267 """ 268 pass 269 270 def open_read(self, headers=None, query_args='', 271 override_num_retries=None, response_headers=None): 272 """ 273 Open this key for reading 274 275 :type headers: dict 276 :param headers: Headers to pass in the web request 277 278 :type query_args: string 279 :param query_args: Arguments to pass in the query string 280 (ie, 'torrent') 281 282 :type override_num_retries: int 283 :param override_num_retries: If not None will override configured 284 num_retries parameter for underlying GET. 285 286 :type response_headers: dict 287 :param response_headers: A dictionary containing HTTP 288 headers/values that will override any headers associated 289 with the stored object in the response. See 290 http://goo.gl/EWOPb for details. 291 """ 292 if self.resp is None: 293 self.mode = 'r' 294 295 provider = self.bucket.connection.provider 296 self.resp = self.bucket.connection.make_request( 297 'GET', self.bucket.name, self.name, headers, 298 query_args=query_args, 299 override_num_retries=override_num_retries) 300 if self.resp.status < 199 or self.resp.status > 299: 301 body = self.resp.read() 302 raise provider.storage_response_error(self.resp.status, 303 self.resp.reason, body) 304 response_headers = self.resp.msg 305 self.metadata = boto.utils.get_aws_metadata(response_headers, 306 provider) 307 for name, value in response_headers.items(): 308 # To get correct size for Range GETs, use Content-Range 309 # header if one was returned. If not, use Content-Length 310 # header. 311 if (name.lower() == 'content-length' and 312 'Content-Range' not in response_headers): 313 self.size = int(value) 314 elif name.lower() == 'content-range': 315 end_range = re.sub('.*/(.*)', '\\1', value) 316 self.size = int(end_range) 317 elif name.lower() in Key.base_fields: 318 self.__dict__[name.lower().replace('-', '_')] = value 319 self.handle_version_headers(self.resp) 320 self.handle_encryption_headers(self.resp) 321 self.handle_restore_headers(self.resp) 322 self.handle_addl_headers(self.resp.getheaders()) 323 324 def open_write(self, headers=None, override_num_retries=None): 325 """ 326 Open this key for writing. 327 Not yet implemented 328 329 :type headers: dict 330 :param headers: Headers to pass in the write request 331 332 :type override_num_retries: int 333 :param override_num_retries: If not None will override configured 334 num_retries parameter for underlying PUT. 335 """ 336 raise BotoClientError('Not Implemented') 337 338 def open(self, mode='r', headers=None, query_args=None, 339 override_num_retries=None): 340 if mode == 'r': 341 self.mode = 'r' 342 self.open_read(headers=headers, query_args=query_args, 343 override_num_retries=override_num_retries) 344 elif mode == 'w': 345 self.mode = 'w' 346 self.open_write(headers=headers, 347 override_num_retries=override_num_retries) 348 else: 349 raise BotoClientError('Invalid mode: %s' % mode) 350 351 closed = False 352 353 def close(self, fast=False): 354 """ 355 Close this key. 356 357 :type fast: bool 358 :param fast: True if you want the connection to be closed without first 359 reading the content. This should only be used in cases where subsequent 360 calls don't need to return the content from the open HTTP connection. 361 Note: As explained at 362 http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.getresponse, 363 callers must read the whole response before sending a new request to the 364 server. Calling Key.close(fast=True) and making a subsequent request to 365 the server will work because boto will get an httplib exception and 366 close/reopen the connection. 367 368 """ 369 if self.resp and not fast: 370 self.resp.read() 371 self.resp = None 372 self.mode = None 373 self.closed = True 374 375 def next(self): 376 """ 377 By providing a next method, the key object supports use as an iterator. 378 For example, you can now say: 379 380 for bytes in key: 381 write bytes to a file or whatever 382 383 All of the HTTP connection stuff is handled for you. 384 """ 385 self.open_read() 386 data = self.resp.read(self.BufferSize) 387 if not data: 388 self.close() 389 raise StopIteration 390 return data 391 392 # Python 3 iterator support 393 __next__ = next 394 395 def read(self, size=0): 396 self.open_read() 397 if size == 0: 398 data = self.resp.read() 399 else: 400 data = self.resp.read(size) 401 if not data: 402 self.close() 403 return data 404 405 def change_storage_class(self, new_storage_class, dst_bucket=None, 406 validate_dst_bucket=True): 407 """ 408 Change the storage class of an existing key. 409 Depending on whether a different destination bucket is supplied 410 or not, this will either move the item within the bucket, preserving 411 all metadata and ACL info bucket changing the storage class or it 412 will copy the item to the provided destination bucket, also 413 preserving metadata and ACL info. 414 415 :type new_storage_class: string 416 :param new_storage_class: The new storage class for the Key. 417 Possible values are: 418 * STANDARD 419 * REDUCED_REDUNDANCY 420 421 :type dst_bucket: string 422 :param dst_bucket: The name of a destination bucket. If not 423 provided the current bucket of the key will be used. 424 425 :type validate_dst_bucket: bool 426 :param validate_dst_bucket: If True, will validate the dst_bucket 427 by using an extra list request. 428 """ 429 bucket_name = dst_bucket or self.bucket.name 430 if new_storage_class == 'STANDARD': 431 return self.copy(bucket_name, self.name, 432 reduced_redundancy=False, preserve_acl=True, 433 validate_dst_bucket=validate_dst_bucket) 434 elif new_storage_class == 'REDUCED_REDUNDANCY': 435 return self.copy(bucket_name, self.name, 436 reduced_redundancy=True, preserve_acl=True, 437 validate_dst_bucket=validate_dst_bucket) 438 else: 439 raise BotoClientError('Invalid storage class: %s' % 440 new_storage_class) 441 442 def copy(self, dst_bucket, dst_key, metadata=None, 443 reduced_redundancy=False, preserve_acl=False, 444 encrypt_key=False, validate_dst_bucket=True): 445 """ 446 Copy this Key to another bucket. 447 448 :type dst_bucket: string 449 :param dst_bucket: The name of the destination bucket 450 451 :type dst_key: string 452 :param dst_key: The name of the destination key 453 454 :type metadata: dict 455 :param metadata: Metadata to be associated with new key. If 456 metadata is supplied, it will replace the metadata of the 457 source key being copied. If no metadata is supplied, the 458 source key's metadata will be copied to the new key. 459 460 :type reduced_redundancy: bool 461 :param reduced_redundancy: If True, this will force the 462 storage class of the new Key to be REDUCED_REDUNDANCY 463 regardless of the storage class of the key being copied. 464 The Reduced Redundancy Storage (RRS) feature of S3, 465 provides lower redundancy at lower storage cost. 466 467 :type preserve_acl: bool 468 :param preserve_acl: If True, the ACL from the source key will 469 be copied to the destination key. If False, the 470 destination key will have the default ACL. Note that 471 preserving the ACL in the new key object will require two 472 additional API calls to S3, one to retrieve the current 473 ACL and one to set that ACL on the new object. If you 474 don't care about the ACL, a value of False will be 475 significantly more efficient. 476 477 :type encrypt_key: bool 478 :param encrypt_key: If True, the new copy of the object will 479 be encrypted on the server-side by S3 and will be stored 480 in an encrypted form while at rest in S3. 481 482 :type validate_dst_bucket: bool 483 :param validate_dst_bucket: If True, will validate the dst_bucket 484 by using an extra list request. 485 486 :rtype: :class:`boto.s3.key.Key` or subclass 487 :returns: An instance of the newly created key object 488 """ 489 dst_bucket = self.bucket.connection.lookup(dst_bucket, 490 validate_dst_bucket) 491 if reduced_redundancy: 492 storage_class = 'REDUCED_REDUNDANCY' 493 else: 494 storage_class = self.storage_class 495 return dst_bucket.copy_key(dst_key, self.bucket.name, 496 self.name, metadata, 497 storage_class=storage_class, 498 preserve_acl=preserve_acl, 499 encrypt_key=encrypt_key, 500 src_version_id=self.version_id) 501 502 def startElement(self, name, attrs, connection): 503 if name == 'Owner': 504 self.owner = User(self) 505 return self.owner 506 else: 507 return None 508 509 def endElement(self, name, value, connection): 510 if name == 'Key': 511 self.name = value 512 elif name == 'ETag': 513 self.etag = value 514 elif name == 'IsLatest': 515 if value == 'true': 516 self.is_latest = True 517 else: 518 self.is_latest = False 519 elif name == 'LastModified': 520 self.last_modified = value 521 elif name == 'Size': 522 self.size = int(value) 523 elif name == 'StorageClass': 524 self.storage_class = value 525 elif name == 'Owner': 526 pass 527 elif name == 'VersionId': 528 self.version_id = value 529 else: 530 setattr(self, name, value) 531 532 def exists(self, headers=None): 533 """ 534 Returns True if the key exists 535 536 :rtype: bool 537 :return: Whether the key exists on S3 538 """ 539 return bool(self.bucket.lookup(self.name, headers=headers)) 540 541 def delete(self, headers=None): 542 """ 543 Delete this key from S3 544 """ 545 return self.bucket.delete_key(self.name, version_id=self.version_id, 546 headers=headers) 547 548 def get_metadata(self, name): 549 return self.metadata.get(name) 550 551 def set_metadata(self, name, value): 552 # Ensure that metadata that is vital to signing is in the correct 553 # case. Applies to ``Content-Type`` & ``Content-MD5``. 554 if name.lower() == 'content-type': 555 self.metadata['Content-Type'] = value 556 elif name.lower() == 'content-md5': 557 self.metadata['Content-MD5'] = value 558 else: 559 self.metadata[name] = value 560 if name.lower() in Key.base_user_settable_fields: 561 self.__dict__[name.lower().replace('-', '_')] = value 562 563 def update_metadata(self, d): 564 self.metadata.update(d) 565 566 # convenience methods for setting/getting ACL 567 def set_acl(self, acl_str, headers=None): 568 if self.bucket is not None: 569 self.bucket.set_acl(acl_str, self.name, headers=headers) 570 571 def get_acl(self, headers=None): 572 if self.bucket is not None: 573 return self.bucket.get_acl(self.name, headers=headers) 574 575 def get_xml_acl(self, headers=None): 576 if self.bucket is not None: 577 return self.bucket.get_xml_acl(self.name, headers=headers) 578 579 def set_xml_acl(self, acl_str, headers=None): 580 if self.bucket is not None: 581 return self.bucket.set_xml_acl(acl_str, self.name, headers=headers) 582 583 def set_canned_acl(self, acl_str, headers=None): 584 return self.bucket.set_canned_acl(acl_str, self.name, headers) 585 586 def get_redirect(self): 587 """Return the redirect location configured for this key. 588 589 If no redirect is configured (via set_redirect), then None 590 will be returned. 591 592 """ 593 response = self.bucket.connection.make_request( 594 'HEAD', self.bucket.name, self.name) 595 if response.status == 200: 596 return response.getheader('x-amz-website-redirect-location') 597 else: 598 raise self.provider.storage_response_error( 599 response.status, response.reason, response.read()) 600 601 def set_redirect(self, redirect_location, headers=None): 602 """Configure this key to redirect to another location. 603 604 When the bucket associated with this key is accessed from the website 605 endpoint, a 301 redirect will be issued to the specified 606 `redirect_location`. 607 608 :type redirect_location: string 609 :param redirect_location: The location to redirect. 610 611 """ 612 if headers is None: 613 headers = {} 614 else: 615 headers = headers.copy() 616 617 headers['x-amz-website-redirect-location'] = redirect_location 618 response = self.bucket.connection.make_request('PUT', self.bucket.name, 619 self.name, headers) 620 if response.status == 200: 621 return True 622 else: 623 raise self.provider.storage_response_error( 624 response.status, response.reason, response.read()) 625 626 def make_public(self, headers=None): 627 return self.bucket.set_canned_acl('public-read', self.name, headers) 628 629 def generate_url(self, expires_in, method='GET', headers=None, 630 query_auth=True, force_http=False, response_headers=None, 631 expires_in_absolute=False, version_id=None, 632 policy=None, reduced_redundancy=False, encrypt_key=False): 633 """ 634 Generate a URL to access this key. 635 636 :type expires_in: int 637 :param expires_in: How long the url is valid for, in seconds 638 639 :type method: string 640 :param method: The method to use for retrieving the file 641 (default is GET) 642 643 :type headers: dict 644 :param headers: Any headers to pass along in the request 645 646 :type query_auth: bool 647 :param query_auth: 648 649 :type force_http: bool 650 :param force_http: If True, http will be used instead of https. 651 652 :type response_headers: dict 653 :param response_headers: A dictionary containing HTTP 654 headers/values that will override any headers associated 655 with the stored object in the response. See 656 http://goo.gl/EWOPb for details. 657 658 :type expires_in_absolute: bool 659 :param expires_in_absolute: 660 661 :type version_id: string 662 :param version_id: The version_id of the object to GET. If specified 663 this overrides any value in the key. 664 665 :type policy: :class:`boto.s3.acl.CannedACLStrings` 666 :param policy: A canned ACL policy that will be applied to the 667 new key in S3. 668 669 :type reduced_redundancy: bool 670 :param reduced_redundancy: If True, this will set the storage 671 class of the new Key to be REDUCED_REDUNDANCY. The Reduced 672 Redundancy Storage (RRS) feature of S3, provides lower 673 redundancy at lower storage cost. 674 675 :type encrypt_key: bool 676 :param encrypt_key: If True, the new copy of the object will 677 be encrypted on the server-side by S3 and will be stored 678 in an encrypted form while at rest in S3. 679 680 :rtype: string 681 :return: The URL to access the key 682 """ 683 provider = self.bucket.connection.provider 684 version_id = version_id or self.version_id 685 if headers is None: 686 headers = {} 687 else: 688 headers = headers.copy() 689 690 # add headers accordingly (usually PUT case) 691 if policy: 692 headers[provider.acl_header] = policy 693 if reduced_redundancy: 694 self.storage_class = 'REDUCED_REDUNDANCY' 695 if provider.storage_class_header: 696 headers[provider.storage_class_header] = self.storage_class 697 if encrypt_key: 698 headers[provider.server_side_encryption_header] = 'AES256' 699 headers = boto.utils.merge_meta(headers, self.metadata, provider) 700 701 return self.bucket.connection.generate_url(expires_in, method, 702 self.bucket.name, self.name, 703 headers, query_auth, 704 force_http, 705 response_headers, 706 expires_in_absolute, 707 version_id) 708 709 def send_file(self, fp, headers=None, cb=None, num_cb=10, 710 query_args=None, chunked_transfer=False, size=None): 711 """ 712 Upload a file to a key into a bucket on S3. 713 714 :type fp: file 715 :param fp: The file pointer to upload. The file pointer must 716 point point at the offset from which you wish to upload. 717 ie. if uploading the full file, it should point at the 718 start of the file. Normally when a file is opened for 719 reading, the fp will point at the first byte. See the 720 bytes parameter below for more info. 721 722 :type headers: dict 723 :param headers: The headers to pass along with the PUT request 724 725 :type num_cb: int 726 :param num_cb: (optional) If a callback is specified with the 727 cb parameter this parameter determines the granularity of 728 the callback by defining the maximum number of times the 729 callback will be called during the file 730 transfer. Providing a negative integer will cause your 731 callback to be called with each buffer read. 732 733 :type query_args: string 734 :param query_args: (optional) Arguments to pass in the query string. 735 736 :type chunked_transfer: boolean 737 :param chunked_transfer: (optional) If true, we use chunked 738 Transfer-Encoding. 739 740 :type size: int 741 :param size: (optional) The Maximum number of bytes to read 742 from the file pointer (fp). This is useful when uploading 743 a file in multiple parts where you are splitting the file 744 up into different ranges to be uploaded. If not specified, 745 the default behaviour is to read all bytes from the file 746 pointer. Less bytes may be available. 747 """ 748 self._send_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb, 749 query_args=query_args, 750 chunked_transfer=chunked_transfer, size=size) 751 752 def _send_file_internal(self, fp, headers=None, cb=None, num_cb=10, 753 query_args=None, chunked_transfer=False, size=None, 754 hash_algs=None): 755 provider = self.bucket.connection.provider 756 try: 757 spos = fp.tell() 758 except IOError: 759 spos = None 760 self.read_from_stream = False 761 762 # If hash_algs is unset and the MD5 hasn't already been computed, 763 # default to an MD5 hash_alg to hash the data on-the-fly. 764 if hash_algs is None and not self.md5: 765 hash_algs = {'md5': md5} 766 digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {}) 767 768 def sender(http_conn, method, path, data, headers): 769 # This function is called repeatedly for temporary retries 770 # so we must be sure the file pointer is pointing at the 771 # start of the data. 772 if spos is not None and spos != fp.tell(): 773 fp.seek(spos) 774 elif spos is None and self.read_from_stream: 775 # if seek is not supported, and we've read from this 776 # stream already, then we need to abort retries to 777 # avoid setting bad data. 778 raise provider.storage_data_error( 779 'Cannot retry failed request. fp does not support seeking.') 780 781 # If the caller explicitly specified host header, tell putrequest 782 # not to add a second host header. Similarly for accept-encoding. 783 skips = {} 784 if boto.utils.find_matching_headers('host', headers): 785 skips['skip_host'] = 1 786 if boto.utils.find_matching_headers('accept-encoding', headers): 787 skips['skip_accept_encoding'] = 1 788 http_conn.putrequest(method, path, **skips) 789 for key in headers: 790 http_conn.putheader(key, headers[key]) 791 http_conn.endheaders() 792 793 save_debug = self.bucket.connection.debug 794 self.bucket.connection.debug = 0 795 # If the debuglevel < 4 we don't want to show connection 796 # payload, so turn off HTTP connection-level debug output (to 797 # be restored below). 798 # Use the getattr approach to allow this to work in AppEngine. 799 if getattr(http_conn, 'debuglevel', 0) < 4: 800 http_conn.set_debuglevel(0) 801 802 data_len = 0 803 if cb: 804 if size: 805 cb_size = size 806 elif self.size: 807 cb_size = self.size 808 else: 809 cb_size = 0 810 if chunked_transfer and cb_size == 0: 811 # For chunked Transfer, we call the cb for every 1MB 812 # of data transferred, except when we know size. 813 cb_count = (1024 * 1024) / self.BufferSize 814 elif num_cb > 1: 815 cb_count = int( 816 math.ceil(cb_size / self.BufferSize / (num_cb - 1.0))) 817 elif num_cb < 0: 818 cb_count = -1 819 else: 820 cb_count = 0 821 i = 0 822 cb(data_len, cb_size) 823 824 bytes_togo = size 825 if bytes_togo and bytes_togo < self.BufferSize: 826 chunk = fp.read(bytes_togo) 827 else: 828 chunk = fp.read(self.BufferSize) 829 830 if not isinstance(chunk, bytes): 831 chunk = chunk.encode('utf-8') 832 833 if spos is None: 834 # read at least something from a non-seekable fp. 835 self.read_from_stream = True 836 while chunk: 837 chunk_len = len(chunk) 838 data_len += chunk_len 839 if chunked_transfer: 840 http_conn.send('%x;\r\n' % chunk_len) 841 http_conn.send(chunk) 842 http_conn.send('\r\n') 843 else: 844 http_conn.send(chunk) 845 for alg in digesters: 846 digesters[alg].update(chunk) 847 if bytes_togo: 848 bytes_togo -= chunk_len 849 if bytes_togo <= 0: 850 break 851 if cb: 852 i += 1 853 if i == cb_count or cb_count == -1: 854 cb(data_len, cb_size) 855 i = 0 856 if bytes_togo and bytes_togo < self.BufferSize: 857 chunk = fp.read(bytes_togo) 858 else: 859 chunk = fp.read(self.BufferSize) 860 861 if not isinstance(chunk, bytes): 862 chunk = chunk.encode('utf-8') 863 864 self.size = data_len 865 866 for alg in digesters: 867 self.local_hashes[alg] = digesters[alg].digest() 868 869 if chunked_transfer: 870 http_conn.send('0\r\n') 871 # http_conn.send("Content-MD5: %s\r\n" % self.base64md5) 872 http_conn.send('\r\n') 873 874 if cb and (cb_count <= 1 or i > 0) and data_len > 0: 875 cb(data_len, cb_size) 876 877 http_conn.set_debuglevel(save_debug) 878 self.bucket.connection.debug = save_debug 879 response = http_conn.getresponse() 880 body = response.read() 881 882 if not self.should_retry(response, chunked_transfer): 883 raise provider.storage_response_error( 884 response.status, response.reason, body) 885 886 return response 887 888 if not headers: 889 headers = {} 890 else: 891 headers = headers.copy() 892 # Overwrite user-supplied user-agent. 893 for header in find_matching_headers('User-Agent', headers): 894 del headers[header] 895 headers['User-Agent'] = UserAgent 896 # If storage_class is None, then a user has not explicitly requested 897 # a storage class, so we can assume STANDARD here 898 if self._storage_class not in [None, 'STANDARD']: 899 headers[provider.storage_class_header] = self.storage_class 900 if find_matching_headers('Content-Encoding', headers): 901 self.content_encoding = merge_headers_by_name( 902 'Content-Encoding', headers) 903 if find_matching_headers('Content-Language', headers): 904 self.content_language = merge_headers_by_name( 905 'Content-Language', headers) 906 content_type_headers = find_matching_headers('Content-Type', headers) 907 if content_type_headers: 908 # Some use cases need to suppress sending of the Content-Type 909 # header and depend on the receiving server to set the content 910 # type. This can be achieved by setting headers['Content-Type'] 911 # to None when calling this method. 912 if (len(content_type_headers) == 1 and 913 headers[content_type_headers[0]] is None): 914 # Delete null Content-Type value to skip sending that header. 915 del headers[content_type_headers[0]] 916 else: 917 self.content_type = merge_headers_by_name( 918 'Content-Type', headers) 919 elif self.path: 920 self.content_type = mimetypes.guess_type(self.path)[0] 921 if self.content_type is None: 922 self.content_type = self.DefaultContentType 923 headers['Content-Type'] = self.content_type 924 else: 925 headers['Content-Type'] = self.content_type 926 if self.base64md5: 927 headers['Content-MD5'] = self.base64md5 928 if chunked_transfer: 929 headers['Transfer-Encoding'] = 'chunked' 930 #if not self.base64md5: 931 # headers['Trailer'] = "Content-MD5" 932 else: 933 headers['Content-Length'] = str(self.size) 934 # This is terrible. We need a SHA256 of the body for SigV4, but to do 935 # the chunked ``sender`` behavior above, the ``fp`` isn't available to 936 # the auth mechanism (because closures). Detect if it's SigV4 & embelish 937 # while we can before the auth calculations occur. 938 if 'hmac-v4-s3' in self.bucket.connection._required_auth_capability(): 939 kwargs = {'fp': fp, 'hash_algorithm': hashlib.sha256} 940 if size is not None: 941 kwargs['size'] = size 942 headers['_sha256'] = compute_hash(**kwargs)[0] 943 headers['Expect'] = '100-Continue' 944 headers = boto.utils.merge_meta(headers, self.metadata, provider) 945 resp = self.bucket.connection.make_request( 946 'PUT', 947 self.bucket.name, 948 self.name, 949 headers, 950 sender=sender, 951 query_args=query_args 952 ) 953 self.handle_version_headers(resp, force=True) 954 self.handle_addl_headers(resp.getheaders()) 955 956 def should_retry(self, response, chunked_transfer=False): 957 provider = self.bucket.connection.provider 958 959 if not chunked_transfer: 960 if response.status in [500, 503]: 961 # 500 & 503 can be plain retries. 962 return True 963 964 if response.getheader('location'): 965 # If there's a redirect, plain retry. 966 return True 967 968 if 200 <= response.status <= 299: 969 self.etag = response.getheader('etag') 970 md5 = self.md5 971 if isinstance(md5, bytes): 972 md5 = md5.decode('utf-8') 973 974 # If you use customer-provided encryption keys, the ETag value that 975 # Amazon S3 returns in the response will not be the MD5 of the 976 # object. 977 server_side_encryption_customer_algorithm = response.getheader( 978 'x-amz-server-side-encryption-customer-algorithm', None) 979 if server_side_encryption_customer_algorithm is None: 980 if self.etag != '"%s"' % md5: 981 raise provider.storage_data_error( 982 'ETag from S3 did not match computed MD5. ' 983 '%s vs. %s' % (self.etag, self.md5)) 984 985 return True 986 987 if response.status == 400: 988 # The 400 must be trapped so the retry handler can check to 989 # see if it was a timeout. 990 # If ``RequestTimeout`` is present, we'll retry. Otherwise, bomb 991 # out. 992 body = response.read() 993 err = provider.storage_response_error( 994 response.status, 995 response.reason, 996 body 997 ) 998 999 if err.error_code in ['RequestTimeout']: 1000 raise PleaseRetryException( 1001 "Saw %s, retrying" % err.error_code, 1002 response=response 1003 ) 1004 1005 return False 1006 1007 def compute_md5(self, fp, size=None): 1008 """ 1009 :type fp: file 1010 :param fp: File pointer to the file to MD5 hash. The file 1011 pointer will be reset to the same position before the 1012 method returns. 1013 1014 :type size: int 1015 :param size: (optional) The Maximum number of bytes to read 1016 from the file pointer (fp). This is useful when uploading 1017 a file in multiple parts where the file is being split 1018 in place into different parts. Less bytes may be available. 1019 """ 1020 hex_digest, b64_digest, data_size = compute_md5(fp, size=size) 1021 # Returned values are MD5 hash, base64 encoded MD5 hash, and data size. 1022 # The internal implementation of compute_md5() needs to return the 1023 # data size but we don't want to return that value to the external 1024 # caller because it changes the class interface (i.e. it might 1025 # break some code) so we consume the third tuple value here and 1026 # return the remainder of the tuple to the caller, thereby preserving 1027 # the existing interface. 1028 self.size = data_size 1029 return (hex_digest, b64_digest) 1030 1031 def set_contents_from_stream(self, fp, headers=None, replace=True, 1032 cb=None, num_cb=10, policy=None, 1033 reduced_redundancy=False, query_args=None, 1034 size=None): 1035 """ 1036 Store an object using the name of the Key object as the key in 1037 cloud and the contents of the data stream pointed to by 'fp' as 1038 the contents. 1039 1040 The stream object is not seekable and total size is not known. 1041 This has the implication that we can't specify the 1042 Content-Size and Content-MD5 in the header. So for huge 1043 uploads, the delay in calculating MD5 is avoided but with a 1044 penalty of inability to verify the integrity of the uploaded 1045 data. 1046 1047 :type fp: file 1048 :param fp: the file whose contents are to be uploaded 1049 1050 :type headers: dict 1051 :param headers: additional HTTP headers to be sent with the 1052 PUT request. 1053 1054 :type replace: bool 1055 :param replace: If this parameter is False, the method will first check 1056 to see if an object exists in the bucket with the same key. If it 1057 does, it won't overwrite it. The default value is True which will 1058 overwrite the object. 1059 1060 :type cb: function 1061 :param cb: a callback function that will be called to report 1062 progress on the upload. The callback should accept two integer 1063 parameters, the first representing the number of bytes that have 1064 been successfully transmitted to GS and the second representing the 1065 total number of bytes that need to be transmitted. 1066 1067 :type num_cb: int 1068 :param num_cb: (optional) If a callback is specified with the 1069 cb parameter, this parameter determines the granularity of 1070 the callback by defining the maximum number of times the 1071 callback will be called during the file transfer. 1072 1073 :type policy: :class:`boto.gs.acl.CannedACLStrings` 1074 :param policy: A canned ACL policy that will be applied to the new key 1075 in GS. 1076 1077 :type reduced_redundancy: bool 1078 :param reduced_redundancy: If True, this will set the storage 1079 class of the new Key to be REDUCED_REDUNDANCY. The Reduced 1080 Redundancy Storage (RRS) feature of S3, provides lower 1081 redundancy at lower storage cost. 1082 1083 :type size: int 1084 :param size: (optional) The Maximum number of bytes to read from 1085 the file pointer (fp). This is useful when uploading a 1086 file in multiple parts where you are splitting the file up 1087 into different ranges to be uploaded. If not specified, 1088 the default behaviour is to read all bytes from the file 1089 pointer. Less bytes may be available. 1090 """ 1091 1092 provider = self.bucket.connection.provider 1093 if not provider.supports_chunked_transfer(): 1094 raise BotoClientError('%s does not support chunked transfer' 1095 % provider.get_provider_name()) 1096 1097 # Name of the Object should be specified explicitly for Streams. 1098 if not self.name or self.name == '': 1099 raise BotoClientError('Cannot determine the destination ' 1100 'object name for the given stream') 1101 1102 if headers is None: 1103 headers = {} 1104 if policy: 1105 headers[provider.acl_header] = policy 1106 1107 if reduced_redundancy: 1108 self.storage_class = 'REDUCED_REDUNDANCY' 1109 if provider.storage_class_header: 1110 headers[provider.storage_class_header] = self.storage_class 1111 1112 if self.bucket is not None: 1113 if not replace: 1114 if self.bucket.lookup(self.name): 1115 return 1116 self.send_file(fp, headers, cb, num_cb, query_args, 1117 chunked_transfer=True, size=size) 1118 1119 def set_contents_from_file(self, fp, headers=None, replace=True, 1120 cb=None, num_cb=10, policy=None, md5=None, 1121 reduced_redundancy=False, query_args=None, 1122 encrypt_key=False, size=None, rewind=False): 1123 """ 1124 Store an object in S3 using the name of the Key object as the 1125 key in S3 and the contents of the file pointed to by 'fp' as the 1126 contents. The data is read from 'fp' from its current position until 1127 'size' bytes have been read or EOF. 1128 1129 :type fp: file 1130 :param fp: the file whose contents to upload 1131 1132 :type headers: dict 1133 :param headers: Additional HTTP headers that will be sent with 1134 the PUT request. 1135 1136 :type replace: bool 1137 :param replace: If this parameter is False, the method will 1138 first check to see if an object exists in the bucket with 1139 the same key. If it does, it won't overwrite it. The 1140 default value is True which will overwrite the object. 1141 1142 :type cb: function 1143 :param cb: a callback function that will be called to report 1144 progress on the upload. The callback should accept two 1145 integer parameters, the first representing the number of 1146 bytes that have been successfully transmitted to S3 and 1147 the second representing the size of the to be transmitted 1148 object. 1149 1150 :type num_cb: int 1151 :param num_cb: (optional) If a callback is specified with the 1152 cb parameter this parameter determines the granularity of 1153 the callback by defining the maximum number of times the 1154 callback will be called during the file transfer. 1155 1156 :type policy: :class:`boto.s3.acl.CannedACLStrings` 1157 :param policy: A canned ACL policy that will be applied to the 1158 new key in S3. 1159 1160 :type md5: A tuple containing the hexdigest version of the MD5 1161 checksum of the file as the first element and the 1162 Base64-encoded version of the plain checksum as the second 1163 element. This is the same format returned by the 1164 compute_md5 method. 1165 :param md5: If you need to compute the MD5 for any reason 1166 prior to upload, it's silly to have to do it twice so this 1167 param, if present, will be used as the MD5 values of the 1168 file. Otherwise, the checksum will be computed. 1169 1170 :type reduced_redundancy: bool 1171 :param reduced_redundancy: If True, this will set the storage 1172 class of the new Key to be REDUCED_REDUNDANCY. The Reduced 1173 Redundancy Storage (RRS) feature of S3, provides lower 1174 redundancy at lower storage cost. 1175 1176 :type encrypt_key: bool 1177 :param encrypt_key: If True, the new copy of the object will 1178 be encrypted on the server-side by S3 and will be stored 1179 in an encrypted form while at rest in S3. 1180 1181 :type size: int 1182 :param size: (optional) The Maximum number of bytes to read 1183 from the file pointer (fp). This is useful when uploading 1184 a file in multiple parts where you are splitting the file 1185 up into different ranges to be uploaded. If not specified, 1186 the default behaviour is to read all bytes from the file 1187 pointer. Less bytes may be available. 1188 1189 :type rewind: bool 1190 :param rewind: (optional) If True, the file pointer (fp) will 1191 be rewound to the start before any bytes are read from 1192 it. The default behaviour is False which reads from the 1193 current position of the file pointer (fp). 1194 1195 :rtype: int 1196 :return: The number of bytes written to the key. 1197 """ 1198 provider = self.bucket.connection.provider 1199 headers = headers or {} 1200 if policy: 1201 headers[provider.acl_header] = policy 1202 if encrypt_key: 1203 headers[provider.server_side_encryption_header] = 'AES256' 1204 1205 if rewind: 1206 # caller requests reading from beginning of fp. 1207 fp.seek(0, os.SEEK_SET) 1208 else: 1209 # The following seek/tell/seek logic is intended 1210 # to detect applications using the older interface to 1211 # set_contents_from_file(), which automatically rewound the 1212 # file each time the Key was reused. This changed with commit 1213 # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads 1214 # split into multiple parts and uploaded in parallel, and at 1215 # the time of that commit this check was added because otherwise 1216 # older programs would get a success status and upload an empty 1217 # object. Unfortuantely, it's very inefficient for fp's implemented 1218 # by KeyFile (used, for example, by gsutil when copying between 1219 # providers). So, we skip the check for the KeyFile case. 1220 # TODO: At some point consider removing this seek/tell/seek 1221 # logic, after enough time has passed that it's unlikely any 1222 # programs remain that assume the older auto-rewind interface. 1223 if not isinstance(fp, KeyFile): 1224 spos = fp.tell() 1225 fp.seek(0, os.SEEK_END) 1226 if fp.tell() == spos: 1227 fp.seek(0, os.SEEK_SET) 1228 if fp.tell() != spos: 1229 # Raise an exception as this is likely a programming 1230 # error whereby there is data before the fp but nothing 1231 # after it. 1232 fp.seek(spos) 1233 raise AttributeError('fp is at EOF. Use rewind option ' 1234 'or seek() to data start.') 1235 # seek back to the correct position. 1236 fp.seek(spos) 1237 1238 if reduced_redundancy: 1239 self.storage_class = 'REDUCED_REDUNDANCY' 1240 if provider.storage_class_header: 1241 headers[provider.storage_class_header] = self.storage_class 1242 # TODO - What if provider doesn't support reduced reduncancy? 1243 # What if different providers provide different classes? 1244 if hasattr(fp, 'name'): 1245 self.path = fp.name 1246 if self.bucket is not None: 1247 if not md5 and provider.supports_chunked_transfer(): 1248 # defer md5 calculation to on the fly and 1249 # we don't know anything about size yet. 1250 chunked_transfer = True 1251 self.size = None 1252 else: 1253 chunked_transfer = False 1254 if isinstance(fp, KeyFile): 1255 # Avoid EOF seek for KeyFile case as it's very inefficient. 1256 key = fp.getkey() 1257 size = key.size - fp.tell() 1258 self.size = size 1259 # At present both GCS and S3 use MD5 for the etag for 1260 # non-multipart-uploaded objects. If the etag is 32 hex 1261 # chars use it as an MD5, to avoid having to read the file 1262 # twice while transferring. 1263 if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)): 1264 etag = key.etag.strip('"') 1265 md5 = (etag, base64.b64encode(binascii.unhexlify(etag))) 1266 if not md5: 1267 # compute_md5() and also set self.size to actual 1268 # size of the bytes read computing the md5. 1269 md5 = self.compute_md5(fp, size) 1270 # adjust size if required 1271 size = self.size 1272 elif size: 1273 self.size = size 1274 else: 1275 # If md5 is provided, still need to size so 1276 # calculate based on bytes to end of content 1277 spos = fp.tell() 1278 fp.seek(0, os.SEEK_END) 1279 self.size = fp.tell() - spos 1280 fp.seek(spos) 1281 size = self.size 1282 self.md5 = md5[0] 1283 self.base64md5 = md5[1] 1284 1285 if self.name is None: 1286 self.name = self.md5 1287 if not replace: 1288 if self.bucket.lookup(self.name): 1289 return 1290 1291 self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb, 1292 query_args=query_args, 1293 chunked_transfer=chunked_transfer, size=size) 1294 # return number of bytes written. 1295 return self.size 1296 1297 def set_contents_from_filename(self, filename, headers=None, replace=True, 1298 cb=None, num_cb=10, policy=None, md5=None, 1299 reduced_redundancy=False, 1300 encrypt_key=False): 1301 """ 1302 Store an object in S3 using the name of the Key object as the 1303 key in S3 and the contents of the file named by 'filename'. 1304 See set_contents_from_file method for details about the 1305 parameters. 1306 1307 :type filename: string 1308 :param filename: The name of the file that you want to put onto S3 1309 1310 :type headers: dict 1311 :param headers: Additional headers to pass along with the 1312 request to AWS. 1313 1314 :type replace: bool 1315 :param replace: If True, replaces the contents of the file 1316 if it already exists. 1317 1318 :type cb: function 1319 :param cb: a callback function that will be called to report 1320 progress on the upload. The callback should accept two 1321 integer parameters, the first representing the number of 1322 bytes that have been successfully transmitted to S3 and 1323 the second representing the size of the to be transmitted 1324 object. 1325 1326 :type cb: int 1327 :param num_cb: (optional) If a callback is specified with the 1328 cb parameter this parameter determines the granularity of 1329 the callback by defining the maximum number of times the 1330 callback will be called during the file transfer. 1331 1332 :type policy: :class:`boto.s3.acl.CannedACLStrings` 1333 :param policy: A canned ACL policy that will be applied to the 1334 new key in S3. 1335 1336 :type md5: A tuple containing the hexdigest version of the MD5 1337 checksum of the file as the first element and the 1338 Base64-encoded version of the plain checksum as the second 1339 element. This is the same format returned by the 1340 compute_md5 method. 1341 :param md5: If you need to compute the MD5 for any reason 1342 prior to upload, it's silly to have to do it twice so this 1343 param, if present, will be used as the MD5 values of the 1344 file. Otherwise, the checksum will be computed. 1345 1346 :type reduced_redundancy: bool 1347 :param reduced_redundancy: If True, this will set the storage 1348 class of the new Key to be REDUCED_REDUNDANCY. The Reduced 1349 Redundancy Storage (RRS) feature of S3, provides lower 1350 redundancy at lower storage cost. :type encrypt_key: bool 1351 :param encrypt_key: If True, the new copy of the object 1352 will be encrypted on the server-side by S3 and will be 1353 stored in an encrypted form while at rest in S3. 1354 1355 :rtype: int 1356 :return: The number of bytes written to the key. 1357 """ 1358 with open(filename, 'rb') as fp: 1359 return self.set_contents_from_file(fp, headers, replace, cb, 1360 num_cb, policy, md5, 1361 reduced_redundancy, 1362 encrypt_key=encrypt_key) 1363 1364 def set_contents_from_string(self, string_data, headers=None, replace=True, 1365 cb=None, num_cb=10, policy=None, md5=None, 1366 reduced_redundancy=False, 1367 encrypt_key=False): 1368 """ 1369 Store an object in S3 using the name of the Key object as the 1370 key in S3 and the string 's' as the contents. 1371 See set_contents_from_file method for details about the 1372 parameters. 1373 1374 :type headers: dict 1375 :param headers: Additional headers to pass along with the 1376 request to AWS. 1377 1378 :type replace: bool 1379 :param replace: If True, replaces the contents of the file if 1380 it already exists. 1381 1382 :type cb: function 1383 :param cb: a callback function that will be called to report 1384 progress on the upload. The callback should accept two 1385 integer parameters, the first representing the number of 1386 bytes that have been successfully transmitted to S3 and 1387 the second representing the size of the to be transmitted 1388 object. 1389 1390 :type cb: int 1391 :param num_cb: (optional) If a callback is specified with the 1392 cb parameter this parameter determines the granularity of 1393 the callback by defining the maximum number of times the 1394 callback will be called during the file transfer. 1395 1396 :type policy: :class:`boto.s3.acl.CannedACLStrings` 1397 :param policy: A canned ACL policy that will be applied to the 1398 new key in S3. 1399 1400 :type md5: A tuple containing the hexdigest version of the MD5 1401 checksum of the file as the first element and the 1402 Base64-encoded version of the plain checksum as the second 1403 element. This is the same format returned by the 1404 compute_md5 method. 1405 :param md5: If you need to compute the MD5 for any reason 1406 prior to upload, it's silly to have to do it twice so this 1407 param, if present, will be used as the MD5 values of the 1408 file. Otherwise, the checksum will be computed. 1409 1410 :type reduced_redundancy: bool 1411 :param reduced_redundancy: If True, this will set the storage 1412 class of the new Key to be REDUCED_REDUNDANCY. The Reduced 1413 Redundancy Storage (RRS) feature of S3, provides lower 1414 redundancy at lower storage cost. 1415 1416 :type encrypt_key: bool 1417 :param encrypt_key: If True, the new copy of the object will 1418 be encrypted on the server-side by S3 and will be stored 1419 in an encrypted form while at rest in S3. 1420 """ 1421 if not isinstance(string_data, bytes): 1422 string_data = string_data.encode("utf-8") 1423 fp = BytesIO(string_data) 1424 r = self.set_contents_from_file(fp, headers, replace, cb, num_cb, 1425 policy, md5, reduced_redundancy, 1426 encrypt_key=encrypt_key) 1427 fp.close() 1428 return r 1429 1430 def get_file(self, fp, headers=None, cb=None, num_cb=10, 1431 torrent=False, version_id=None, override_num_retries=None, 1432 response_headers=None): 1433 """ 1434 Retrieves a file from an S3 Key 1435 1436 :type fp: file 1437 :param fp: File pointer to put the data into 1438 1439 :type headers: string 1440 :param: headers to send when retrieving the files 1441 1442 :type cb: function 1443 :param cb: a callback function that will be called to report 1444 progress on the upload. The callback should accept two 1445 integer parameters, the first representing the number of 1446 bytes that have been successfully transmitted to S3 and 1447 the second representing the size of the to be transmitted 1448 object. 1449 1450 :type cb: int 1451 :param num_cb: (optional) If a callback is specified with the 1452 cb parameter this parameter determines the granularity of 1453 the callback by defining the maximum number of times the 1454 callback will be called during the file transfer. 1455 1456 :type torrent: bool 1457 :param torrent: Flag for whether to get a torrent for the file 1458 1459 :type override_num_retries: int 1460 :param override_num_retries: If not None will override configured 1461 num_retries parameter for underlying GET. 1462 1463 :type response_headers: dict 1464 :param response_headers: A dictionary containing HTTP 1465 headers/values that will override any headers associated 1466 with the stored object in the response. See 1467 http://goo.gl/EWOPb for details. 1468 1469 :type version_id: str 1470 :param version_id: The ID of a particular version of the object. 1471 If this parameter is not supplied but the Key object has 1472 a ``version_id`` attribute, that value will be used when 1473 retrieving the object. You can set the Key object's 1474 ``version_id`` attribute to None to always grab the latest 1475 version from a version-enabled bucket. 1476 """ 1477 self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb, 1478 torrent=torrent, version_id=version_id, 1479 override_num_retries=override_num_retries, 1480 response_headers=response_headers, 1481 hash_algs=None, 1482 query_args=None) 1483 1484 def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10, 1485 torrent=False, version_id=None, override_num_retries=None, 1486 response_headers=None, hash_algs=None, query_args=None): 1487 if headers is None: 1488 headers = {} 1489 save_debug = self.bucket.connection.debug 1490 if self.bucket.connection.debug == 1: 1491 self.bucket.connection.debug = 0 1492 1493 query_args = query_args or [] 1494 if torrent: 1495 query_args.append('torrent') 1496 1497 if hash_algs is None and not torrent: 1498 hash_algs = {'md5': md5} 1499 digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {}) 1500 1501 # If a version_id is passed in, use that. If not, check to see 1502 # if the Key object has an explicit version_id and, if so, use that. 1503 # Otherwise, don't pass a version_id query param. 1504 if version_id is None: 1505 version_id = self.version_id 1506 if version_id: 1507 query_args.append('versionId=%s' % version_id) 1508 if response_headers: 1509 for key in response_headers: 1510 query_args.append('%s=%s' % ( 1511 key, urllib.parse.quote(response_headers[key]))) 1512 query_args = '&'.join(query_args) 1513 self.open('r', headers, query_args=query_args, 1514 override_num_retries=override_num_retries) 1515 1516 data_len = 0 1517 if cb: 1518 if self.size is None: 1519 cb_size = 0 1520 else: 1521 cb_size = self.size 1522 if self.size is None and num_cb != -1: 1523 # If size is not available due to chunked transfer for example, 1524 # we'll call the cb for every 1MB of data transferred. 1525 cb_count = (1024 * 1024) / self.BufferSize 1526 elif num_cb > 1: 1527 cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0))) 1528 elif num_cb < 0: 1529 cb_count = -1 1530 else: 1531 cb_count = 0 1532 i = 0 1533 cb(data_len, cb_size) 1534 try: 1535 for bytes in self: 1536 fp.write(bytes) 1537 data_len += len(bytes) 1538 for alg in digesters: 1539 digesters[alg].update(bytes) 1540 if cb: 1541 if cb_size > 0 and data_len >= cb_size: 1542 break 1543 i += 1 1544 if i == cb_count or cb_count == -1: 1545 cb(data_len, cb_size) 1546 i = 0 1547 except IOError as e: 1548 if e.errno == errno.ENOSPC: 1549 raise StorageDataError('Out of space for destination file ' 1550 '%s' % fp.name) 1551 raise 1552 if cb and (cb_count <= 1 or i > 0) and data_len > 0: 1553 cb(data_len, cb_size) 1554 for alg in digesters: 1555 self.local_hashes[alg] = digesters[alg].digest() 1556 if self.size is None and not torrent and "Range" not in headers: 1557 self.size = data_len 1558 self.close() 1559 self.bucket.connection.debug = save_debug 1560 1561 def get_torrent_file(self, fp, headers=None, cb=None, num_cb=10): 1562 """ 1563 Get a torrent file (see to get_file) 1564 1565 :type fp: file 1566 :param fp: The file pointer of where to put the torrent 1567 1568 :type headers: dict 1569 :param headers: Headers to be passed 1570 1571 :type cb: function 1572 :param cb: a callback function that will be called to report 1573 progress on the upload. The callback should accept two 1574 integer parameters, the first representing the number of 1575 bytes that have been successfully transmitted to S3 and 1576 the second representing the size of the to be transmitted 1577 object. 1578 1579 :type cb: int 1580 :param num_cb: (optional) If a callback is specified with the 1581 cb parameter this parameter determines the granularity of 1582 the callback by defining the maximum number of times the 1583 callback will be called during the file transfer. 1584 1585 """ 1586 return self.get_file(fp, headers, cb, num_cb, torrent=True) 1587 1588 def get_contents_to_file(self, fp, headers=None, 1589 cb=None, num_cb=10, 1590 torrent=False, 1591 version_id=None, 1592 res_download_handler=None, 1593 response_headers=None): 1594 """ 1595 Retrieve an object from S3 using the name of the Key object as the 1596 key in S3. Write the contents of the object to the file pointed 1597 to by 'fp'. 1598 1599 :type fp: File -like object 1600 :param fp: 1601 1602 :type headers: dict 1603 :param headers: additional HTTP headers that will be sent with 1604 the GET request. 1605 1606 :type cb: function 1607 :param cb: a callback function that will be called to report 1608 progress on the upload. The callback should accept two 1609 integer parameters, the first representing the number of 1610 bytes that have been successfully transmitted to S3 and 1611 the second representing the size of the to be transmitted 1612 object. 1613 1614 :type cb: int 1615 :param num_cb: (optional) If a callback is specified with the 1616 cb parameter this parameter determines the granularity of 1617 the callback by defining the maximum number of times the 1618 callback will be called during the file transfer. 1619 1620 :type torrent: bool 1621 :param torrent: If True, returns the contents of a torrent 1622 file as a string. 1623 1624 :type res_upload_handler: ResumableDownloadHandler 1625 :param res_download_handler: If provided, this handler will 1626 perform the download. 1627 1628 :type response_headers: dict 1629 :param response_headers: A dictionary containing HTTP 1630 headers/values that will override any headers associated 1631 with the stored object in the response. See 1632 http://goo.gl/EWOPb for details. 1633 1634 :type version_id: str 1635 :param version_id: The ID of a particular version of the object. 1636 If this parameter is not supplied but the Key object has 1637 a ``version_id`` attribute, that value will be used when 1638 retrieving the object. You can set the Key object's 1639 ``version_id`` attribute to None to always grab the latest 1640 version from a version-enabled bucket. 1641 """ 1642 if self.bucket is not None: 1643 if res_download_handler: 1644 res_download_handler.get_file(self, fp, headers, cb, num_cb, 1645 torrent=torrent, 1646 version_id=version_id) 1647 else: 1648 self.get_file(fp, headers, cb, num_cb, torrent=torrent, 1649 version_id=version_id, 1650 response_headers=response_headers) 1651 1652 def get_contents_to_filename(self, filename, headers=None, 1653 cb=None, num_cb=10, 1654 torrent=False, 1655 version_id=None, 1656 res_download_handler=None, 1657 response_headers=None): 1658 """ 1659 Retrieve an object from S3 using the name of the Key object as the 1660 key in S3. Store contents of the object to a file named by 'filename'. 1661 See get_contents_to_file method for details about the 1662 parameters. 1663 1664 :type filename: string 1665 :param filename: The filename of where to put the file contents 1666 1667 :type headers: dict 1668 :param headers: Any additional headers to send in the request 1669 1670 :type cb: function 1671 :param cb: a callback function that will be called to report 1672 progress on the upload. The callback should accept two 1673 integer parameters, the first representing the number of 1674 bytes that have been successfully transmitted to S3 and 1675 the second representing the size of the to be transmitted 1676 object. 1677 1678 :type num_cb: int 1679 :param num_cb: (optional) If a callback is specified with the 1680 cb parameter this parameter determines the granularity of 1681 the callback by defining the maximum number of times the 1682 callback will be called during the file transfer. 1683 1684 :type torrent: bool 1685 :param torrent: If True, returns the contents of a torrent file 1686 as a string. 1687 1688 :type res_upload_handler: ResumableDownloadHandler 1689 :param res_download_handler: If provided, this handler will 1690 perform the download. 1691 1692 :type response_headers: dict 1693 :param response_headers: A dictionary containing HTTP 1694 headers/values that will override any headers associated 1695 with the stored object in the response. See 1696 http://goo.gl/EWOPb for details. 1697 1698 :type version_id: str 1699 :param version_id: The ID of a particular version of the object. 1700 If this parameter is not supplied but the Key object has 1701 a ``version_id`` attribute, that value will be used when 1702 retrieving the object. You can set the Key object's 1703 ``version_id`` attribute to None to always grab the latest 1704 version from a version-enabled bucket. 1705 """ 1706 try: 1707 with open(filename, 'wb') as fp: 1708 self.get_contents_to_file(fp, headers, cb, num_cb, 1709 torrent=torrent, 1710 version_id=version_id, 1711 res_download_handler=res_download_handler, 1712 response_headers=response_headers) 1713 except Exception: 1714 os.remove(filename) 1715 raise 1716 # if last_modified date was sent from s3, try to set file's timestamp 1717 if self.last_modified is not None: 1718 try: 1719 modified_tuple = email.utils.parsedate_tz(self.last_modified) 1720 modified_stamp = int(email.utils.mktime_tz(modified_tuple)) 1721 os.utime(fp.name, (modified_stamp, modified_stamp)) 1722 except Exception: 1723 pass 1724 1725 def get_contents_as_string(self, headers=None, 1726 cb=None, num_cb=10, 1727 torrent=False, 1728 version_id=None, 1729 response_headers=None, encoding=None): 1730 """ 1731 Retrieve an object from S3 using the name of the Key object as the 1732 key in S3. Return the contents of the object as a string. 1733 See get_contents_to_file method for details about the 1734 parameters. 1735 1736 :type headers: dict 1737 :param headers: Any additional headers to send in the request 1738 1739 :type cb: function 1740 :param cb: a callback function that will be called to report 1741 progress on the upload. The callback should accept two 1742 integer parameters, the first representing the number of 1743 bytes that have been successfully transmitted to S3 and 1744 the second representing the size of the to be transmitted 1745 object. 1746 1747 :type cb: int 1748 :param num_cb: (optional) If a callback is specified with the 1749 cb parameter this parameter determines the granularity of 1750 the callback by defining the maximum number of times the 1751 callback will be called during the file transfer. 1752 1753 :type torrent: bool 1754 :param torrent: If True, returns the contents of a torrent file 1755 as a string. 1756 1757 :type response_headers: dict 1758 :param response_headers: A dictionary containing HTTP 1759 headers/values that will override any headers associated 1760 with the stored object in the response. See 1761 http://goo.gl/EWOPb for details. 1762 1763 :type version_id: str 1764 :param version_id: The ID of a particular version of the object. 1765 If this parameter is not supplied but the Key object has 1766 a ``version_id`` attribute, that value will be used when 1767 retrieving the object. You can set the Key object's 1768 ``version_id`` attribute to None to always grab the latest 1769 version from a version-enabled bucket. 1770 1771 :type encoding: str 1772 :param encoding: The text encoding to use, such as ``utf-8`` 1773 or ``iso-8859-1``. If set, then a string will be returned. 1774 Defaults to ``None`` and returns bytes. 1775 1776 :rtype: bytes or str 1777 :returns: The contents of the file as bytes or a string 1778 """ 1779 fp = BytesIO() 1780 self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent, 1781 version_id=version_id, 1782 response_headers=response_headers) 1783 value = fp.getvalue() 1784 1785 if encoding is not None: 1786 value = value.decode(encoding) 1787 1788 return value 1789 1790 def add_email_grant(self, permission, email_address, headers=None): 1791 """ 1792 Convenience method that provides a quick way to add an email grant 1793 to a key. This method retrieves the current ACL, creates a new 1794 grant based on the parameters passed in, adds that grant to the ACL 1795 and then PUT's the new ACL back to S3. 1796 1797 :type permission: string 1798 :param permission: The permission being granted. Should be one of: 1799 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL). 1800 1801 :type email_address: string 1802 :param email_address: The email address associated with the AWS 1803 account your are granting the permission to. 1804 1805 :type recursive: boolean 1806 :param recursive: A boolean value to controls whether the 1807 command will apply the grant to all keys within the bucket 1808 or not. The default value is False. By passing a True 1809 value, the call will iterate through all keys in the 1810 bucket and apply the same grant to each key. CAUTION: If 1811 you have a lot of keys, this could take a long time! 1812 """ 1813 policy = self.get_acl(headers=headers) 1814 policy.acl.add_email_grant(permission, email_address) 1815 self.set_acl(policy, headers=headers) 1816 1817 def add_user_grant(self, permission, user_id, headers=None, 1818 display_name=None): 1819 """ 1820 Convenience method that provides a quick way to add a canonical 1821 user grant to a key. This method retrieves the current ACL, 1822 creates a new grant based on the parameters passed in, adds that 1823 grant to the ACL and then PUT's the new ACL back to S3. 1824 1825 :type permission: string 1826 :param permission: The permission being granted. Should be one of: 1827 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL). 1828 1829 :type user_id: string 1830 :param user_id: The canonical user id associated with the AWS 1831 account your are granting the permission to. 1832 1833 :type display_name: string 1834 :param display_name: An option string containing the user's 1835 Display Name. Only required on Walrus. 1836 """ 1837 policy = self.get_acl(headers=headers) 1838 policy.acl.add_user_grant(permission, user_id, 1839 display_name=display_name) 1840 self.set_acl(policy, headers=headers) 1841 1842 def _normalize_metadata(self, metadata): 1843 if type(metadata) == set: 1844 norm_metadata = set() 1845 for k in metadata: 1846 norm_metadata.add(k.lower()) 1847 else: 1848 norm_metadata = {} 1849 for k in metadata: 1850 norm_metadata[k.lower()] = metadata[k] 1851 return norm_metadata 1852 1853 def _get_remote_metadata(self, headers=None): 1854 """ 1855 Extracts metadata from existing URI into a dict, so we can 1856 overwrite/delete from it to form the new set of metadata to apply to a 1857 key. 1858 """ 1859 metadata = {} 1860 for underscore_name in self._underscore_base_user_settable_fields: 1861 if hasattr(self, underscore_name): 1862 value = getattr(self, underscore_name) 1863 if value: 1864 # Generate HTTP field name corresponding to "_" named field. 1865 field_name = underscore_name.replace('_', '-') 1866 metadata[field_name.lower()] = value 1867 # self.metadata contains custom metadata, which are all user-settable. 1868 prefix = self.provider.metadata_prefix 1869 for underscore_name in self.metadata: 1870 field_name = underscore_name.replace('_', '-') 1871 metadata['%s%s' % (prefix, field_name.lower())] = ( 1872 self.metadata[underscore_name]) 1873 return metadata 1874 1875 def set_remote_metadata(self, metadata_plus, metadata_minus, preserve_acl, 1876 headers=None): 1877 metadata_plus = self._normalize_metadata(metadata_plus) 1878 metadata_minus = self._normalize_metadata(metadata_minus) 1879 metadata = self._get_remote_metadata() 1880 metadata.update(metadata_plus) 1881 for h in metadata_minus: 1882 if h in metadata: 1883 del metadata[h] 1884 src_bucket = self.bucket 1885 # Boto prepends the meta prefix when adding headers, so strip prefix in 1886 # metadata before sending back in to copy_key() call. 1887 rewritten_metadata = {} 1888 for h in metadata: 1889 if (h.startswith('x-goog-meta-') or h.startswith('x-amz-meta-')): 1890 rewritten_h = (h.replace('x-goog-meta-', '') 1891 .replace('x-amz-meta-', '')) 1892 else: 1893 rewritten_h = h 1894 rewritten_metadata[rewritten_h] = metadata[h] 1895 metadata = rewritten_metadata 1896 src_bucket.copy_key(self.name, self.bucket.name, self.name, 1897 metadata=metadata, preserve_acl=preserve_acl, 1898 headers=headers) 1899 1900 def restore(self, days, headers=None): 1901 """Restore an object from an archive. 1902 1903 :type days: int 1904 :param days: The lifetime of the restored object (must 1905 be at least 1 day). If the object is already restored 1906 then this parameter can be used to readjust the lifetime 1907 of the restored object. In this case, the days 1908 param is with respect to the initial time of the request. 1909 If the object has not been restored, this param is with 1910 respect to the completion time of the request. 1911 1912 """ 1913 response = self.bucket.connection.make_request( 1914 'POST', self.bucket.name, self.name, 1915 data=self.RestoreBody % days, 1916 headers=headers, query_args='restore') 1917 if response.status not in (200, 202): 1918 provider = self.bucket.connection.provider 1919 raise provider.storage_response_error(response.status, 1920 response.reason, 1921 response.read()) 1922