1# Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/
2# Copyright (c) 2011, Nexenta Systems Inc.
3# Copyright (c) 2012 Amazon.com, Inc. or its affiliates.  All Rights Reserved
4#
5# Permission is hereby granted, free of charge, to any person obtaining a
6# copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish, dis-
9# tribute, sublicense, and/or sell copies of the Software, and to permit
10# persons to whom the Software is furnished to do so, subject to the fol-
11# lowing conditions:
12#
13# The above copyright notice and this permission notice shall be included
14# in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
18# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22# IN THE SOFTWARE.
23import email.utils
24import errno
25import hashlib
26import mimetypes
27import os
28import re
29import base64
30import binascii
31import math
32from hashlib import md5
33import boto.utils
34from boto.compat import BytesIO, six, urllib, encodebytes
35
36from boto.exception import BotoClientError
37from boto.exception import StorageDataError
38from boto.exception import PleaseRetryException
39from boto.provider import Provider
40from boto.s3.keyfile import KeyFile
41from boto.s3.user import User
42from boto import UserAgent
43from boto.utils import compute_md5, compute_hash
44from boto.utils import find_matching_headers
45from boto.utils import merge_headers_by_name
46
47
48class Key(object):
49    """
50    Represents a key (object) in an S3 bucket.
51
52    :ivar bucket: The parent :class:`boto.s3.bucket.Bucket`.
53    :ivar name: The name of this Key object.
54    :ivar metadata: A dictionary containing user metadata that you
55        wish to store with the object or that has been retrieved from
56        an existing object.
57    :ivar cache_control: The value of the `Cache-Control` HTTP header.
58    :ivar content_type: The value of the `Content-Type` HTTP header.
59    :ivar content_encoding: The value of the `Content-Encoding` HTTP header.
60    :ivar content_disposition: The value of the `Content-Disposition` HTTP
61        header.
62    :ivar content_language: The value of the `Content-Language` HTTP header.
63    :ivar etag: The `etag` associated with this object.
64    :ivar last_modified: The string timestamp representing the last
65        time this object was modified in S3.
66    :ivar owner: The ID of the owner of this object.
67    :ivar storage_class: The storage class of the object.  Currently, one of:
68        STANDARD | REDUCED_REDUNDANCY | GLACIER
69    :ivar md5: The MD5 hash of the contents of the object.
70    :ivar size: The size, in bytes, of the object.
71    :ivar version_id: The version ID of this object, if it is a versioned
72        object.
73    :ivar encrypted: Whether the object is encrypted while at rest on
74        the server.
75    """
76
77    DefaultContentType = 'application/octet-stream'
78
79    RestoreBody = """<?xml version="1.0" encoding="UTF-8"?>
80      <RestoreRequest xmlns="http://s3.amazonaws.com/doc/2006-03-01">
81        <Days>%s</Days>
82      </RestoreRequest>"""
83
84
85    BufferSize = boto.config.getint('Boto', 'key_buffer_size', 8192)
86
87    # The object metadata fields a user can set, other than custom metadata
88    # fields (i.e., those beginning with a provider-specific prefix like
89    # x-amz-meta).
90    base_user_settable_fields = set(["cache-control", "content-disposition",
91                                    "content-encoding", "content-language",
92                                    "content-md5", "content-type",
93                                     "x-robots-tag", "expires"])
94    _underscore_base_user_settable_fields = set()
95    for f in base_user_settable_fields:
96      _underscore_base_user_settable_fields.add(f.replace('-', '_'))
97    # Metadata fields, whether user-settable or not, other than custom
98    # metadata fields (i.e., those beginning with a provider specific prefix
99    # like x-amz-meta).
100    base_fields = (base_user_settable_fields |
101                   set(["last-modified", "content-length", "date", "etag"]))
102
103
104
105    def __init__(self, bucket=None, name=None):
106        self.bucket = bucket
107        self.name = name
108        self.metadata = {}
109        self.cache_control = None
110        self.content_type = self.DefaultContentType
111        self.content_encoding = None
112        self.content_disposition = None
113        self.content_language = None
114        self.filename = None
115        self.etag = None
116        self.is_latest = False
117        self.last_modified = None
118        self.owner = None
119        self._storage_class = None
120        self.path = None
121        self.resp = None
122        self.mode = None
123        self.size = None
124        self.version_id = None
125        self.source_version_id = None
126        self.delete_marker = False
127        self.encrypted = None
128        # If the object is being restored, this attribute will be set to True.
129        # If the object is restored, it will be set to False.  Otherwise this
130        # value will be None. If the restore is completed (ongoing_restore =
131        # False), the expiry_date will be populated with the expiry date of the
132        # restored object.
133        self.ongoing_restore = None
134        self.expiry_date = None
135        self.local_hashes = {}
136
137    def __repr__(self):
138        if self.bucket:
139            name = u'<Key: %s,%s>' % (self.bucket.name, self.name)
140        else:
141            name = u'<Key: None,%s>' % self.name
142
143        # Encode to bytes for Python 2 to prevent display decoding issues
144        if not isinstance(name, str):
145            name = name.encode('utf-8')
146
147        return name
148
149    def __iter__(self):
150        return self
151
152    @property
153    def provider(self):
154        provider = None
155        if self.bucket and self.bucket.connection:
156            provider = self.bucket.connection.provider
157        return provider
158
159    def _get_key(self):
160        return self.name
161
162    def _set_key(self, value):
163        self.name = value
164
165    key = property(_get_key, _set_key);
166
167    def _get_md5(self):
168        if 'md5' in self.local_hashes and self.local_hashes['md5']:
169            return binascii.b2a_hex(self.local_hashes['md5'])
170
171    def _set_md5(self, value):
172        if value:
173            self.local_hashes['md5'] = binascii.a2b_hex(value)
174        elif 'md5' in self.local_hashes:
175            self.local_hashes.pop('md5', None)
176
177    md5 = property(_get_md5, _set_md5);
178
179    def _get_base64md5(self):
180        if 'md5' in self.local_hashes and self.local_hashes['md5']:
181            md5 = self.local_hashes['md5']
182            if not isinstance(md5, bytes):
183                md5 = md5.encode('utf-8')
184            return binascii.b2a_base64(md5).decode('utf-8').rstrip('\n')
185
186    def _set_base64md5(self, value):
187        if value:
188            if not isinstance(value, six.string_types):
189                value = value.decode('utf-8')
190            self.local_hashes['md5'] = binascii.a2b_base64(value)
191        elif 'md5' in self.local_hashes:
192            del self.local_hashes['md5']
193
194    base64md5 = property(_get_base64md5, _set_base64md5);
195
196    def _get_storage_class(self):
197        if self._storage_class is None and self.bucket:
198            # Attempt to fetch storage class
199            list_items = list(self.bucket.list(self.name.encode('utf-8')))
200            if len(list_items) and getattr(list_items[0], '_storage_class',
201                                           None):
202                self._storage_class = list_items[0]._storage_class
203            else:
204                # Key is not yet saved? Just use default...
205                self._storage_class = 'STANDARD'
206
207        return self._storage_class
208
209    def _set_storage_class(self, value):
210        self._storage_class = value
211
212    storage_class = property(_get_storage_class, _set_storage_class)
213
214    def get_md5_from_hexdigest(self, md5_hexdigest):
215        """
216        A utility function to create the 2-tuple (md5hexdigest, base64md5)
217        from just having a precalculated md5_hexdigest.
218        """
219        digest = binascii.unhexlify(md5_hexdigest)
220        base64md5 = encodebytes(digest)
221        if base64md5[-1] == '\n':
222            base64md5 = base64md5[0:-1]
223        return (md5_hexdigest, base64md5)
224
225    def handle_encryption_headers(self, resp):
226        provider = self.bucket.connection.provider
227        if provider.server_side_encryption_header:
228            self.encrypted = resp.getheader(
229                provider.server_side_encryption_header, None)
230        else:
231            self.encrypted = None
232
233    def handle_version_headers(self, resp, force=False):
234        provider = self.bucket.connection.provider
235        # If the Key object already has a version_id attribute value, it
236        # means that it represents an explicit version and the user is
237        # doing a get_contents_*(version_id=<foo>) to retrieve another
238        # version of the Key.  In that case, we don't really want to
239        # overwrite the version_id in this Key object.  Comprende?
240        if self.version_id is None or force:
241            self.version_id = resp.getheader(provider.version_id, None)
242        self.source_version_id = resp.getheader(provider.copy_source_version_id,
243                                                None)
244        if resp.getheader(provider.delete_marker, 'false') == 'true':
245            self.delete_marker = True
246        else:
247            self.delete_marker = False
248
249    def handle_restore_headers(self, response):
250        provider = self.bucket.connection.provider
251        header = response.getheader(provider.restore_header)
252        if header is None:
253            return
254        parts = header.split(',', 1)
255        for part in parts:
256            key, val = [i.strip() for i in part.split('=')]
257            val = val.replace('"', '')
258            if key == 'ongoing-request':
259                self.ongoing_restore = True if val.lower() == 'true' else False
260            elif key == 'expiry-date':
261                self.expiry_date = val
262
263    def handle_addl_headers(self, headers):
264        """
265        Used by Key subclasses to do additional, provider-specific
266        processing of response headers. No-op for this base class.
267        """
268        pass
269
270    def open_read(self, headers=None, query_args='',
271                  override_num_retries=None, response_headers=None):
272        """
273        Open this key for reading
274
275        :type headers: dict
276        :param headers: Headers to pass in the web request
277
278        :type query_args: string
279        :param query_args: Arguments to pass in the query string
280            (ie, 'torrent')
281
282        :type override_num_retries: int
283        :param override_num_retries: If not None will override configured
284            num_retries parameter for underlying GET.
285
286        :type response_headers: dict
287        :param response_headers: A dictionary containing HTTP
288            headers/values that will override any headers associated
289            with the stored object in the response.  See
290            http://goo.gl/EWOPb for details.
291        """
292        if self.resp is None:
293            self.mode = 'r'
294
295            provider = self.bucket.connection.provider
296            self.resp = self.bucket.connection.make_request(
297                'GET', self.bucket.name, self.name, headers,
298                query_args=query_args,
299                override_num_retries=override_num_retries)
300            if self.resp.status < 199 or self.resp.status > 299:
301                body = self.resp.read()
302                raise provider.storage_response_error(self.resp.status,
303                                                      self.resp.reason, body)
304            response_headers = self.resp.msg
305            self.metadata = boto.utils.get_aws_metadata(response_headers,
306                                                        provider)
307            for name, value in response_headers.items():
308                # To get correct size for Range GETs, use Content-Range
309                # header if one was returned. If not, use Content-Length
310                # header.
311                if (name.lower() == 'content-length' and
312                    'Content-Range' not in response_headers):
313                    self.size = int(value)
314                elif name.lower() == 'content-range':
315                    end_range = re.sub('.*/(.*)', '\\1', value)
316                    self.size = int(end_range)
317                elif name.lower() in Key.base_fields:
318                    self.__dict__[name.lower().replace('-', '_')] = value
319            self.handle_version_headers(self.resp)
320            self.handle_encryption_headers(self.resp)
321            self.handle_restore_headers(self.resp)
322            self.handle_addl_headers(self.resp.getheaders())
323
324    def open_write(self, headers=None, override_num_retries=None):
325        """
326        Open this key for writing.
327        Not yet implemented
328
329        :type headers: dict
330        :param headers: Headers to pass in the write request
331
332        :type override_num_retries: int
333        :param override_num_retries: If not None will override configured
334            num_retries parameter for underlying PUT.
335        """
336        raise BotoClientError('Not Implemented')
337
338    def open(self, mode='r', headers=None, query_args=None,
339             override_num_retries=None):
340        if mode == 'r':
341            self.mode = 'r'
342            self.open_read(headers=headers, query_args=query_args,
343                           override_num_retries=override_num_retries)
344        elif mode == 'w':
345            self.mode = 'w'
346            self.open_write(headers=headers,
347                            override_num_retries=override_num_retries)
348        else:
349            raise BotoClientError('Invalid mode: %s' % mode)
350
351    closed = False
352
353    def close(self, fast=False):
354        """
355        Close this key.
356
357        :type fast: bool
358        :param fast: True if you want the connection to be closed without first
359        reading the content. This should only be used in cases where subsequent
360        calls don't need to return the content from the open HTTP connection.
361        Note: As explained at
362        http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.getresponse,
363        callers must read the whole response before sending a new request to the
364        server. Calling Key.close(fast=True) and making a subsequent request to
365        the server will work because boto will get an httplib exception and
366        close/reopen the connection.
367
368        """
369        if self.resp and not fast:
370            self.resp.read()
371        self.resp = None
372        self.mode = None
373        self.closed = True
374
375    def next(self):
376        """
377        By providing a next method, the key object supports use as an iterator.
378        For example, you can now say:
379
380        for bytes in key:
381            write bytes to a file or whatever
382
383        All of the HTTP connection stuff is handled for you.
384        """
385        self.open_read()
386        data = self.resp.read(self.BufferSize)
387        if not data:
388            self.close()
389            raise StopIteration
390        return data
391
392    # Python 3 iterator support
393    __next__ = next
394
395    def read(self, size=0):
396        self.open_read()
397        if size == 0:
398            data = self.resp.read()
399        else:
400            data = self.resp.read(size)
401        if not data:
402            self.close()
403        return data
404
405    def change_storage_class(self, new_storage_class, dst_bucket=None,
406                             validate_dst_bucket=True):
407        """
408        Change the storage class of an existing key.
409        Depending on whether a different destination bucket is supplied
410        or not, this will either move the item within the bucket, preserving
411        all metadata and ACL info bucket changing the storage class or it
412        will copy the item to the provided destination bucket, also
413        preserving metadata and ACL info.
414
415        :type new_storage_class: string
416        :param new_storage_class: The new storage class for the Key.
417            Possible values are:
418            * STANDARD
419            * REDUCED_REDUNDANCY
420
421        :type dst_bucket: string
422        :param dst_bucket: The name of a destination bucket.  If not
423            provided the current bucket of the key will be used.
424
425        :type validate_dst_bucket: bool
426        :param validate_dst_bucket: If True, will validate the dst_bucket
427            by using an extra list request.
428        """
429        bucket_name = dst_bucket or self.bucket.name
430        if new_storage_class == 'STANDARD':
431            return self.copy(bucket_name, self.name,
432                             reduced_redundancy=False, preserve_acl=True,
433                             validate_dst_bucket=validate_dst_bucket)
434        elif new_storage_class == 'REDUCED_REDUNDANCY':
435            return self.copy(bucket_name, self.name,
436                             reduced_redundancy=True, preserve_acl=True,
437                             validate_dst_bucket=validate_dst_bucket)
438        else:
439            raise BotoClientError('Invalid storage class: %s' %
440                                  new_storage_class)
441
442    def copy(self, dst_bucket, dst_key, metadata=None,
443             reduced_redundancy=False, preserve_acl=False,
444             encrypt_key=False, validate_dst_bucket=True):
445        """
446        Copy this Key to another bucket.
447
448        :type dst_bucket: string
449        :param dst_bucket: The name of the destination bucket
450
451        :type dst_key: string
452        :param dst_key: The name of the destination key
453
454        :type metadata: dict
455        :param metadata: Metadata to be associated with new key.  If
456            metadata is supplied, it will replace the metadata of the
457            source key being copied.  If no metadata is supplied, the
458            source key's metadata will be copied to the new key.
459
460        :type reduced_redundancy: bool
461        :param reduced_redundancy: If True, this will force the
462            storage class of the new Key to be REDUCED_REDUNDANCY
463            regardless of the storage class of the key being copied.
464            The Reduced Redundancy Storage (RRS) feature of S3,
465            provides lower redundancy at lower storage cost.
466
467        :type preserve_acl: bool
468        :param preserve_acl: If True, the ACL from the source key will
469            be copied to the destination key.  If False, the
470            destination key will have the default ACL.  Note that
471            preserving the ACL in the new key object will require two
472            additional API calls to S3, one to retrieve the current
473            ACL and one to set that ACL on the new object.  If you
474            don't care about the ACL, a value of False will be
475            significantly more efficient.
476
477        :type encrypt_key: bool
478        :param encrypt_key: If True, the new copy of the object will
479            be encrypted on the server-side by S3 and will be stored
480            in an encrypted form while at rest in S3.
481
482        :type validate_dst_bucket: bool
483        :param validate_dst_bucket: If True, will validate the dst_bucket
484            by using an extra list request.
485
486        :rtype: :class:`boto.s3.key.Key` or subclass
487        :returns: An instance of the newly created key object
488        """
489        dst_bucket = self.bucket.connection.lookup(dst_bucket,
490                                                   validate_dst_bucket)
491        if reduced_redundancy:
492            storage_class = 'REDUCED_REDUNDANCY'
493        else:
494            storage_class = self.storage_class
495        return dst_bucket.copy_key(dst_key, self.bucket.name,
496                                   self.name, metadata,
497                                   storage_class=storage_class,
498                                   preserve_acl=preserve_acl,
499                                   encrypt_key=encrypt_key,
500                                   src_version_id=self.version_id)
501
502    def startElement(self, name, attrs, connection):
503        if name == 'Owner':
504            self.owner = User(self)
505            return self.owner
506        else:
507            return None
508
509    def endElement(self, name, value, connection):
510        if name == 'Key':
511            self.name = value
512        elif name == 'ETag':
513            self.etag = value
514        elif name == 'IsLatest':
515            if value == 'true':
516                self.is_latest = True
517            else:
518                self.is_latest = False
519        elif name == 'LastModified':
520            self.last_modified = value
521        elif name == 'Size':
522            self.size = int(value)
523        elif name == 'StorageClass':
524            self.storage_class = value
525        elif name == 'Owner':
526            pass
527        elif name == 'VersionId':
528            self.version_id = value
529        else:
530            setattr(self, name, value)
531
532    def exists(self, headers=None):
533        """
534        Returns True if the key exists
535
536        :rtype: bool
537        :return: Whether the key exists on S3
538        """
539        return bool(self.bucket.lookup(self.name, headers=headers))
540
541    def delete(self, headers=None):
542        """
543        Delete this key from S3
544        """
545        return self.bucket.delete_key(self.name, version_id=self.version_id,
546                                      headers=headers)
547
548    def get_metadata(self, name):
549        return self.metadata.get(name)
550
551    def set_metadata(self, name, value):
552        # Ensure that metadata that is vital to signing is in the correct
553        # case. Applies to ``Content-Type`` & ``Content-MD5``.
554        if name.lower() == 'content-type':
555            self.metadata['Content-Type'] = value
556        elif name.lower() == 'content-md5':
557            self.metadata['Content-MD5'] = value
558        else:
559            self.metadata[name] = value
560        if name.lower() in Key.base_user_settable_fields:
561            self.__dict__[name.lower().replace('-', '_')] = value
562
563    def update_metadata(self, d):
564        self.metadata.update(d)
565
566    # convenience methods for setting/getting ACL
567    def set_acl(self, acl_str, headers=None):
568        if self.bucket is not None:
569            self.bucket.set_acl(acl_str, self.name, headers=headers)
570
571    def get_acl(self, headers=None):
572        if self.bucket is not None:
573            return self.bucket.get_acl(self.name, headers=headers)
574
575    def get_xml_acl(self, headers=None):
576        if self.bucket is not None:
577            return self.bucket.get_xml_acl(self.name, headers=headers)
578
579    def set_xml_acl(self, acl_str, headers=None):
580        if self.bucket is not None:
581            return self.bucket.set_xml_acl(acl_str, self.name, headers=headers)
582
583    def set_canned_acl(self, acl_str, headers=None):
584        return self.bucket.set_canned_acl(acl_str, self.name, headers)
585
586    def get_redirect(self):
587        """Return the redirect location configured for this key.
588
589        If no redirect is configured (via set_redirect), then None
590        will be returned.
591
592        """
593        response = self.bucket.connection.make_request(
594            'HEAD', self.bucket.name, self.name)
595        if response.status == 200:
596            return response.getheader('x-amz-website-redirect-location')
597        else:
598            raise self.provider.storage_response_error(
599                response.status, response.reason, response.read())
600
601    def set_redirect(self, redirect_location, headers=None):
602        """Configure this key to redirect to another location.
603
604        When the bucket associated with this key is accessed from the website
605        endpoint, a 301 redirect will be issued to the specified
606        `redirect_location`.
607
608        :type redirect_location: string
609        :param redirect_location: The location to redirect.
610
611        """
612        if headers is None:
613            headers = {}
614        else:
615            headers = headers.copy()
616
617        headers['x-amz-website-redirect-location'] = redirect_location
618        response = self.bucket.connection.make_request('PUT', self.bucket.name,
619                                                       self.name, headers)
620        if response.status == 200:
621            return True
622        else:
623            raise self.provider.storage_response_error(
624                response.status, response.reason, response.read())
625
626    def make_public(self, headers=None):
627        return self.bucket.set_canned_acl('public-read', self.name, headers)
628
629    def generate_url(self, expires_in, method='GET', headers=None,
630                     query_auth=True, force_http=False, response_headers=None,
631                     expires_in_absolute=False, version_id=None,
632                     policy=None, reduced_redundancy=False, encrypt_key=False):
633        """
634        Generate a URL to access this key.
635
636        :type expires_in: int
637        :param expires_in: How long the url is valid for, in seconds
638
639        :type method: string
640        :param method: The method to use for retrieving the file
641            (default is GET)
642
643        :type headers: dict
644        :param headers: Any headers to pass along in the request
645
646        :type query_auth: bool
647        :param query_auth:
648
649        :type force_http: bool
650        :param force_http: If True, http will be used instead of https.
651
652        :type response_headers: dict
653        :param response_headers: A dictionary containing HTTP
654            headers/values that will override any headers associated
655            with the stored object in the response.  See
656            http://goo.gl/EWOPb for details.
657
658        :type expires_in_absolute: bool
659        :param expires_in_absolute:
660
661        :type version_id: string
662        :param version_id: The version_id of the object to GET. If specified
663            this overrides any value in the key.
664
665        :type policy: :class:`boto.s3.acl.CannedACLStrings`
666        :param policy: A canned ACL policy that will be applied to the
667            new key in S3.
668
669        :type reduced_redundancy: bool
670        :param reduced_redundancy: If True, this will set the storage
671            class of the new Key to be REDUCED_REDUNDANCY. The Reduced
672            Redundancy Storage (RRS) feature of S3, provides lower
673            redundancy at lower storage cost.
674
675        :type encrypt_key: bool
676        :param encrypt_key: If True, the new copy of the object will
677            be encrypted on the server-side by S3 and will be stored
678            in an encrypted form while at rest in S3.
679
680        :rtype: string
681        :return: The URL to access the key
682        """
683        provider = self.bucket.connection.provider
684        version_id = version_id or self.version_id
685        if headers is None:
686            headers = {}
687        else:
688            headers = headers.copy()
689
690        # add headers accordingly (usually PUT case)
691        if policy:
692            headers[provider.acl_header] = policy
693        if reduced_redundancy:
694            self.storage_class = 'REDUCED_REDUNDANCY'
695            if provider.storage_class_header:
696                headers[provider.storage_class_header] = self.storage_class
697        if encrypt_key:
698            headers[provider.server_side_encryption_header] = 'AES256'
699        headers = boto.utils.merge_meta(headers, self.metadata, provider)
700
701        return self.bucket.connection.generate_url(expires_in, method,
702                                                   self.bucket.name, self.name,
703                                                   headers, query_auth,
704                                                   force_http,
705                                                   response_headers,
706                                                   expires_in_absolute,
707                                                   version_id)
708
709    def send_file(self, fp, headers=None, cb=None, num_cb=10,
710                  query_args=None, chunked_transfer=False, size=None):
711        """
712        Upload a file to a key into a bucket on S3.
713
714        :type fp: file
715        :param fp: The file pointer to upload. The file pointer must
716            point point at the offset from which you wish to upload.
717            ie. if uploading the full file, it should point at the
718            start of the file. Normally when a file is opened for
719            reading, the fp will point at the first byte.  See the
720            bytes parameter below for more info.
721
722        :type headers: dict
723        :param headers: The headers to pass along with the PUT request
724
725        :type num_cb: int
726        :param num_cb: (optional) If a callback is specified with the
727            cb parameter this parameter determines the granularity of
728            the callback by defining the maximum number of times the
729            callback will be called during the file
730            transfer. Providing a negative integer will cause your
731            callback to be called with each buffer read.
732
733        :type query_args: string
734        :param query_args: (optional) Arguments to pass in the query string.
735
736        :type chunked_transfer: boolean
737        :param chunked_transfer: (optional) If true, we use chunked
738            Transfer-Encoding.
739
740        :type size: int
741        :param size: (optional) The Maximum number of bytes to read
742            from the file pointer (fp). This is useful when uploading
743            a file in multiple parts where you are splitting the file
744            up into different ranges to be uploaded. If not specified,
745            the default behaviour is to read all bytes from the file
746            pointer. Less bytes may be available.
747        """
748        self._send_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb,
749                                 query_args=query_args,
750                                 chunked_transfer=chunked_transfer, size=size)
751
752    def _send_file_internal(self, fp, headers=None, cb=None, num_cb=10,
753                            query_args=None, chunked_transfer=False, size=None,
754                            hash_algs=None):
755        provider = self.bucket.connection.provider
756        try:
757            spos = fp.tell()
758        except IOError:
759            spos = None
760            self.read_from_stream = False
761
762        # If hash_algs is unset and the MD5 hasn't already been computed,
763        # default to an MD5 hash_alg to hash the data on-the-fly.
764        if hash_algs is None and not self.md5:
765            hash_algs = {'md5': md5}
766        digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {})
767
768        def sender(http_conn, method, path, data, headers):
769            # This function is called repeatedly for temporary retries
770            # so we must be sure the file pointer is pointing at the
771            # start of the data.
772            if spos is not None and spos != fp.tell():
773                fp.seek(spos)
774            elif spos is None and self.read_from_stream:
775                # if seek is not supported, and we've read from this
776                # stream already, then we need to abort retries to
777                # avoid setting bad data.
778                raise provider.storage_data_error(
779                    'Cannot retry failed request. fp does not support seeking.')
780
781            # If the caller explicitly specified host header, tell putrequest
782            # not to add a second host header. Similarly for accept-encoding.
783            skips = {}
784            if boto.utils.find_matching_headers('host', headers):
785              skips['skip_host'] = 1
786            if boto.utils.find_matching_headers('accept-encoding', headers):
787              skips['skip_accept_encoding'] = 1
788            http_conn.putrequest(method, path, **skips)
789            for key in headers:
790                http_conn.putheader(key, headers[key])
791            http_conn.endheaders()
792
793            save_debug = self.bucket.connection.debug
794            self.bucket.connection.debug = 0
795            # If the debuglevel < 4 we don't want to show connection
796            # payload, so turn off HTTP connection-level debug output (to
797            # be restored below).
798            # Use the getattr approach to allow this to work in AppEngine.
799            if getattr(http_conn, 'debuglevel', 0) < 4:
800                http_conn.set_debuglevel(0)
801
802            data_len = 0
803            if cb:
804                if size:
805                    cb_size = size
806                elif self.size:
807                    cb_size = self.size
808                else:
809                    cb_size = 0
810                if chunked_transfer and cb_size == 0:
811                    # For chunked Transfer, we call the cb for every 1MB
812                    # of data transferred, except when we know size.
813                    cb_count = (1024 * 1024) / self.BufferSize
814                elif num_cb > 1:
815                    cb_count = int(
816                        math.ceil(cb_size / self.BufferSize / (num_cb - 1.0)))
817                elif num_cb < 0:
818                    cb_count = -1
819                else:
820                    cb_count = 0
821                i = 0
822                cb(data_len, cb_size)
823
824            bytes_togo = size
825            if bytes_togo and bytes_togo < self.BufferSize:
826                chunk = fp.read(bytes_togo)
827            else:
828                chunk = fp.read(self.BufferSize)
829
830            if not isinstance(chunk, bytes):
831                chunk = chunk.encode('utf-8')
832
833            if spos is None:
834                # read at least something from a non-seekable fp.
835                self.read_from_stream = True
836            while chunk:
837                chunk_len = len(chunk)
838                data_len += chunk_len
839                if chunked_transfer:
840                    http_conn.send('%x;\r\n' % chunk_len)
841                    http_conn.send(chunk)
842                    http_conn.send('\r\n')
843                else:
844                    http_conn.send(chunk)
845                for alg in digesters:
846                    digesters[alg].update(chunk)
847                if bytes_togo:
848                    bytes_togo -= chunk_len
849                    if bytes_togo <= 0:
850                        break
851                if cb:
852                    i += 1
853                    if i == cb_count or cb_count == -1:
854                        cb(data_len, cb_size)
855                        i = 0
856                if bytes_togo and bytes_togo < self.BufferSize:
857                    chunk = fp.read(bytes_togo)
858                else:
859                    chunk = fp.read(self.BufferSize)
860
861                if not isinstance(chunk, bytes):
862                    chunk = chunk.encode('utf-8')
863
864            self.size = data_len
865
866            for alg in digesters:
867                self.local_hashes[alg] = digesters[alg].digest()
868
869            if chunked_transfer:
870                http_conn.send('0\r\n')
871                    # http_conn.send("Content-MD5: %s\r\n" % self.base64md5)
872                http_conn.send('\r\n')
873
874            if cb and (cb_count <= 1 or i > 0) and data_len > 0:
875                cb(data_len, cb_size)
876
877            http_conn.set_debuglevel(save_debug)
878            self.bucket.connection.debug = save_debug
879            response = http_conn.getresponse()
880            body = response.read()
881
882            if not self.should_retry(response, chunked_transfer):
883                raise provider.storage_response_error(
884                    response.status, response.reason, body)
885
886            return response
887
888        if not headers:
889            headers = {}
890        else:
891            headers = headers.copy()
892        # Overwrite user-supplied user-agent.
893        for header in find_matching_headers('User-Agent', headers):
894            del headers[header]
895        headers['User-Agent'] = UserAgent
896        # If storage_class is None, then a user has not explicitly requested
897        # a storage class, so we can assume STANDARD here
898        if self._storage_class not in [None, 'STANDARD']:
899            headers[provider.storage_class_header] = self.storage_class
900        if find_matching_headers('Content-Encoding', headers):
901            self.content_encoding = merge_headers_by_name(
902                'Content-Encoding', headers)
903        if find_matching_headers('Content-Language', headers):
904            self.content_language = merge_headers_by_name(
905                'Content-Language', headers)
906        content_type_headers = find_matching_headers('Content-Type', headers)
907        if content_type_headers:
908            # Some use cases need to suppress sending of the Content-Type
909            # header and depend on the receiving server to set the content
910            # type. This can be achieved by setting headers['Content-Type']
911            # to None when calling this method.
912            if (len(content_type_headers) == 1 and
913                headers[content_type_headers[0]] is None):
914                # Delete null Content-Type value to skip sending that header.
915                del headers[content_type_headers[0]]
916            else:
917                self.content_type = merge_headers_by_name(
918                    'Content-Type', headers)
919        elif self.path:
920            self.content_type = mimetypes.guess_type(self.path)[0]
921            if self.content_type is None:
922                self.content_type = self.DefaultContentType
923            headers['Content-Type'] = self.content_type
924        else:
925            headers['Content-Type'] = self.content_type
926        if self.base64md5:
927            headers['Content-MD5'] = self.base64md5
928        if chunked_transfer:
929            headers['Transfer-Encoding'] = 'chunked'
930            #if not self.base64md5:
931            #    headers['Trailer'] = "Content-MD5"
932        else:
933            headers['Content-Length'] = str(self.size)
934        # This is terrible. We need a SHA256 of the body for SigV4, but to do
935        # the chunked ``sender`` behavior above, the ``fp`` isn't available to
936        # the auth mechanism (because closures). Detect if it's SigV4 & embelish
937        # while we can before the auth calculations occur.
938        if 'hmac-v4-s3' in self.bucket.connection._required_auth_capability():
939            kwargs = {'fp': fp, 'hash_algorithm': hashlib.sha256}
940            if size is not None:
941                kwargs['size'] = size
942            headers['_sha256'] = compute_hash(**kwargs)[0]
943        headers['Expect'] = '100-Continue'
944        headers = boto.utils.merge_meta(headers, self.metadata, provider)
945        resp = self.bucket.connection.make_request(
946            'PUT',
947            self.bucket.name,
948            self.name,
949            headers,
950            sender=sender,
951            query_args=query_args
952        )
953        self.handle_version_headers(resp, force=True)
954        self.handle_addl_headers(resp.getheaders())
955
956    def should_retry(self, response, chunked_transfer=False):
957        provider = self.bucket.connection.provider
958
959        if not chunked_transfer:
960            if response.status in [500, 503]:
961                # 500 & 503 can be plain retries.
962                return True
963
964            if response.getheader('location'):
965                # If there's a redirect, plain retry.
966                return True
967
968        if 200 <= response.status <= 299:
969            self.etag = response.getheader('etag')
970            md5 = self.md5
971            if isinstance(md5, bytes):
972                md5 = md5.decode('utf-8')
973
974            # If you use customer-provided encryption keys, the ETag value that
975            # Amazon S3 returns in the response will not be the MD5 of the
976            # object.
977            server_side_encryption_customer_algorithm = response.getheader(
978                'x-amz-server-side-encryption-customer-algorithm', None)
979            if server_side_encryption_customer_algorithm is None:
980                if self.etag != '"%s"' % md5:
981                    raise provider.storage_data_error(
982                        'ETag from S3 did not match computed MD5. '
983                        '%s vs. %s' % (self.etag, self.md5))
984
985            return True
986
987        if response.status == 400:
988            # The 400 must be trapped so the retry handler can check to
989            # see if it was a timeout.
990            # If ``RequestTimeout`` is present, we'll retry. Otherwise, bomb
991            # out.
992            body = response.read()
993            err = provider.storage_response_error(
994                response.status,
995                response.reason,
996                body
997            )
998
999            if err.error_code in ['RequestTimeout']:
1000                raise PleaseRetryException(
1001                    "Saw %s, retrying" % err.error_code,
1002                    response=response
1003                )
1004
1005        return False
1006
1007    def compute_md5(self, fp, size=None):
1008        """
1009        :type fp: file
1010        :param fp: File pointer to the file to MD5 hash.  The file
1011            pointer will be reset to the same position before the
1012            method returns.
1013
1014        :type size: int
1015        :param size: (optional) The Maximum number of bytes to read
1016            from the file pointer (fp). This is useful when uploading
1017            a file in multiple parts where the file is being split
1018            in place into different parts. Less bytes may be available.
1019        """
1020        hex_digest, b64_digest, data_size = compute_md5(fp, size=size)
1021        # Returned values are MD5 hash, base64 encoded MD5 hash, and data size.
1022        # The internal implementation of compute_md5() needs to return the
1023        # data size but we don't want to return that value to the external
1024        # caller because it changes the class interface (i.e. it might
1025        # break some code) so we consume the third tuple value here and
1026        # return the remainder of the tuple to the caller, thereby preserving
1027        # the existing interface.
1028        self.size = data_size
1029        return (hex_digest, b64_digest)
1030
1031    def set_contents_from_stream(self, fp, headers=None, replace=True,
1032                                 cb=None, num_cb=10, policy=None,
1033                                 reduced_redundancy=False, query_args=None,
1034                                 size=None):
1035        """
1036        Store an object using the name of the Key object as the key in
1037        cloud and the contents of the data stream pointed to by 'fp' as
1038        the contents.
1039
1040        The stream object is not seekable and total size is not known.
1041        This has the implication that we can't specify the
1042        Content-Size and Content-MD5 in the header. So for huge
1043        uploads, the delay in calculating MD5 is avoided but with a
1044        penalty of inability to verify the integrity of the uploaded
1045        data.
1046
1047        :type fp: file
1048        :param fp: the file whose contents are to be uploaded
1049
1050        :type headers: dict
1051        :param headers: additional HTTP headers to be sent with the
1052            PUT request.
1053
1054        :type replace: bool
1055        :param replace: If this parameter is False, the method will first check
1056            to see if an object exists in the bucket with the same key. If it
1057            does, it won't overwrite it. The default value is True which will
1058            overwrite the object.
1059
1060        :type cb: function
1061        :param cb: a callback function that will be called to report
1062            progress on the upload. The callback should accept two integer
1063            parameters, the first representing the number of bytes that have
1064            been successfully transmitted to GS and the second representing the
1065            total number of bytes that need to be transmitted.
1066
1067        :type num_cb: int
1068        :param num_cb: (optional) If a callback is specified with the
1069            cb parameter, this parameter determines the granularity of
1070            the callback by defining the maximum number of times the
1071            callback will be called during the file transfer.
1072
1073        :type policy: :class:`boto.gs.acl.CannedACLStrings`
1074        :param policy: A canned ACL policy that will be applied to the new key
1075            in GS.
1076
1077        :type reduced_redundancy: bool
1078        :param reduced_redundancy: If True, this will set the storage
1079            class of the new Key to be REDUCED_REDUNDANCY. The Reduced
1080            Redundancy Storage (RRS) feature of S3, provides lower
1081            redundancy at lower storage cost.
1082
1083        :type size: int
1084        :param size: (optional) The Maximum number of bytes to read from
1085            the file pointer (fp). This is useful when uploading a
1086            file in multiple parts where you are splitting the file up
1087            into different ranges to be uploaded. If not specified,
1088            the default behaviour is to read all bytes from the file
1089            pointer. Less bytes may be available.
1090        """
1091
1092        provider = self.bucket.connection.provider
1093        if not provider.supports_chunked_transfer():
1094            raise BotoClientError('%s does not support chunked transfer'
1095                % provider.get_provider_name())
1096
1097        # Name of the Object should be specified explicitly for Streams.
1098        if not self.name or self.name == '':
1099            raise BotoClientError('Cannot determine the destination '
1100                                'object name for the given stream')
1101
1102        if headers is None:
1103            headers = {}
1104        if policy:
1105            headers[provider.acl_header] = policy
1106
1107        if reduced_redundancy:
1108            self.storage_class = 'REDUCED_REDUNDANCY'
1109            if provider.storage_class_header:
1110                headers[provider.storage_class_header] = self.storage_class
1111
1112        if self.bucket is not None:
1113            if not replace:
1114                if self.bucket.lookup(self.name):
1115                    return
1116            self.send_file(fp, headers, cb, num_cb, query_args,
1117                           chunked_transfer=True, size=size)
1118
1119    def set_contents_from_file(self, fp, headers=None, replace=True,
1120                               cb=None, num_cb=10, policy=None, md5=None,
1121                               reduced_redundancy=False, query_args=None,
1122                               encrypt_key=False, size=None, rewind=False):
1123        """
1124        Store an object in S3 using the name of the Key object as the
1125        key in S3 and the contents of the file pointed to by 'fp' as the
1126        contents. The data is read from 'fp' from its current position until
1127        'size' bytes have been read or EOF.
1128
1129        :type fp: file
1130        :param fp: the file whose contents to upload
1131
1132        :type headers: dict
1133        :param headers: Additional HTTP headers that will be sent with
1134            the PUT request.
1135
1136        :type replace: bool
1137        :param replace: If this parameter is False, the method will
1138            first check to see if an object exists in the bucket with
1139            the same key.  If it does, it won't overwrite it.  The
1140            default value is True which will overwrite the object.
1141
1142        :type cb: function
1143        :param cb: a callback function that will be called to report
1144            progress on the upload.  The callback should accept two
1145            integer parameters, the first representing the number of
1146            bytes that have been successfully transmitted to S3 and
1147            the second representing the size of the to be transmitted
1148            object.
1149
1150        :type num_cb: int
1151        :param num_cb: (optional) If a callback is specified with the
1152            cb parameter this parameter determines the granularity of
1153            the callback by defining the maximum number of times the
1154            callback will be called during the file transfer.
1155
1156        :type policy: :class:`boto.s3.acl.CannedACLStrings`
1157        :param policy: A canned ACL policy that will be applied to the
1158            new key in S3.
1159
1160        :type md5: A tuple containing the hexdigest version of the MD5
1161            checksum of the file as the first element and the
1162            Base64-encoded version of the plain checksum as the second
1163            element.  This is the same format returned by the
1164            compute_md5 method.
1165        :param md5: If you need to compute the MD5 for any reason
1166            prior to upload, it's silly to have to do it twice so this
1167            param, if present, will be used as the MD5 values of the
1168            file.  Otherwise, the checksum will be computed.
1169
1170        :type reduced_redundancy: bool
1171        :param reduced_redundancy: If True, this will set the storage
1172            class of the new Key to be REDUCED_REDUNDANCY. The Reduced
1173            Redundancy Storage (RRS) feature of S3, provides lower
1174            redundancy at lower storage cost.
1175
1176        :type encrypt_key: bool
1177        :param encrypt_key: If True, the new copy of the object will
1178            be encrypted on the server-side by S3 and will be stored
1179            in an encrypted form while at rest in S3.
1180
1181        :type size: int
1182        :param size: (optional) The Maximum number of bytes to read
1183            from the file pointer (fp). This is useful when uploading
1184            a file in multiple parts where you are splitting the file
1185            up into different ranges to be uploaded. If not specified,
1186            the default behaviour is to read all bytes from the file
1187            pointer. Less bytes may be available.
1188
1189        :type rewind: bool
1190        :param rewind: (optional) If True, the file pointer (fp) will
1191            be rewound to the start before any bytes are read from
1192            it. The default behaviour is False which reads from the
1193            current position of the file pointer (fp).
1194
1195        :rtype: int
1196        :return: The number of bytes written to the key.
1197        """
1198        provider = self.bucket.connection.provider
1199        headers = headers or {}
1200        if policy:
1201            headers[provider.acl_header] = policy
1202        if encrypt_key:
1203            headers[provider.server_side_encryption_header] = 'AES256'
1204
1205        if rewind:
1206            # caller requests reading from beginning of fp.
1207            fp.seek(0, os.SEEK_SET)
1208        else:
1209            # The following seek/tell/seek logic is intended
1210            # to detect applications using the older interface to
1211            # set_contents_from_file(), which automatically rewound the
1212            # file each time the Key was reused. This changed with commit
1213            # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads
1214            # split into multiple parts and uploaded in parallel, and at
1215            # the time of that commit this check was added because otherwise
1216            # older programs would get a success status and upload an empty
1217            # object. Unfortuantely, it's very inefficient for fp's implemented
1218            # by KeyFile (used, for example, by gsutil when copying between
1219            # providers). So, we skip the check for the KeyFile case.
1220            # TODO: At some point consider removing this seek/tell/seek
1221            # logic, after enough time has passed that it's unlikely any
1222            # programs remain that assume the older auto-rewind interface.
1223            if not isinstance(fp, KeyFile):
1224                spos = fp.tell()
1225                fp.seek(0, os.SEEK_END)
1226                if fp.tell() == spos:
1227                    fp.seek(0, os.SEEK_SET)
1228                    if fp.tell() != spos:
1229                        # Raise an exception as this is likely a programming
1230                        # error whereby there is data before the fp but nothing
1231                        # after it.
1232                        fp.seek(spos)
1233                        raise AttributeError('fp is at EOF. Use rewind option '
1234                                             'or seek() to data start.')
1235                # seek back to the correct position.
1236                fp.seek(spos)
1237
1238        if reduced_redundancy:
1239            self.storage_class = 'REDUCED_REDUNDANCY'
1240            if provider.storage_class_header:
1241                headers[provider.storage_class_header] = self.storage_class
1242                # TODO - What if provider doesn't support reduced reduncancy?
1243                # What if different providers provide different classes?
1244        if hasattr(fp, 'name'):
1245            self.path = fp.name
1246        if self.bucket is not None:
1247            if not md5 and provider.supports_chunked_transfer():
1248                # defer md5 calculation to on the fly and
1249                # we don't know anything about size yet.
1250                chunked_transfer = True
1251                self.size = None
1252            else:
1253                chunked_transfer = False
1254                if isinstance(fp, KeyFile):
1255                    # Avoid EOF seek for KeyFile case as it's very inefficient.
1256                    key = fp.getkey()
1257                    size = key.size - fp.tell()
1258                    self.size = size
1259                    # At present both GCS and S3 use MD5 for the etag for
1260                    # non-multipart-uploaded objects. If the etag is 32 hex
1261                    # chars use it as an MD5, to avoid having to read the file
1262                    # twice while transferring.
1263                    if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)):
1264                        etag = key.etag.strip('"')
1265                        md5 = (etag, base64.b64encode(binascii.unhexlify(etag)))
1266                if not md5:
1267                    # compute_md5() and also set self.size to actual
1268                    # size of the bytes read computing the md5.
1269                    md5 = self.compute_md5(fp, size)
1270                    # adjust size if required
1271                    size = self.size
1272                elif size:
1273                    self.size = size
1274                else:
1275                    # If md5 is provided, still need to size so
1276                    # calculate based on bytes to end of content
1277                    spos = fp.tell()
1278                    fp.seek(0, os.SEEK_END)
1279                    self.size = fp.tell() - spos
1280                    fp.seek(spos)
1281                    size = self.size
1282                self.md5 = md5[0]
1283                self.base64md5 = md5[1]
1284
1285            if self.name is None:
1286                self.name = self.md5
1287            if not replace:
1288                if self.bucket.lookup(self.name):
1289                    return
1290
1291            self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb,
1292                           query_args=query_args,
1293                           chunked_transfer=chunked_transfer, size=size)
1294            # return number of bytes written.
1295            return self.size
1296
1297    def set_contents_from_filename(self, filename, headers=None, replace=True,
1298                                   cb=None, num_cb=10, policy=None, md5=None,
1299                                   reduced_redundancy=False,
1300                                   encrypt_key=False):
1301        """
1302        Store an object in S3 using the name of the Key object as the
1303        key in S3 and the contents of the file named by 'filename'.
1304        See set_contents_from_file method for details about the
1305        parameters.
1306
1307        :type filename: string
1308        :param filename: The name of the file that you want to put onto S3
1309
1310        :type headers: dict
1311        :param headers: Additional headers to pass along with the
1312            request to AWS.
1313
1314        :type replace: bool
1315        :param replace: If True, replaces the contents of the file
1316            if it already exists.
1317
1318        :type cb: function
1319        :param cb: a callback function that will be called to report
1320            progress on the upload.  The callback should accept two
1321            integer parameters, the first representing the number of
1322            bytes that have been successfully transmitted to S3 and
1323            the second representing the size of the to be transmitted
1324            object.
1325
1326        :type cb: int
1327        :param num_cb: (optional) If a callback is specified with the
1328            cb parameter this parameter determines the granularity of
1329            the callback by defining the maximum number of times the
1330            callback will be called during the file transfer.
1331
1332        :type policy: :class:`boto.s3.acl.CannedACLStrings`
1333        :param policy: A canned ACL policy that will be applied to the
1334            new key in S3.
1335
1336        :type md5: A tuple containing the hexdigest version of the MD5
1337            checksum of the file as the first element and the
1338            Base64-encoded version of the plain checksum as the second
1339            element.  This is the same format returned by the
1340            compute_md5 method.
1341        :param md5: If you need to compute the MD5 for any reason
1342            prior to upload, it's silly to have to do it twice so this
1343            param, if present, will be used as the MD5 values of the
1344            file.  Otherwise, the checksum will be computed.
1345
1346        :type reduced_redundancy: bool
1347        :param reduced_redundancy: If True, this will set the storage
1348            class of the new Key to be REDUCED_REDUNDANCY. The Reduced
1349            Redundancy Storage (RRS) feature of S3, provides lower
1350            redundancy at lower storage cost.  :type encrypt_key: bool
1351            :param encrypt_key: If True, the new copy of the object
1352            will be encrypted on the server-side by S3 and will be
1353            stored in an encrypted form while at rest in S3.
1354
1355        :rtype: int
1356        :return: The number of bytes written to the key.
1357        """
1358        with open(filename, 'rb') as fp:
1359            return self.set_contents_from_file(fp, headers, replace, cb,
1360                                               num_cb, policy, md5,
1361                                               reduced_redundancy,
1362                                               encrypt_key=encrypt_key)
1363
1364    def set_contents_from_string(self, string_data, headers=None, replace=True,
1365                                 cb=None, num_cb=10, policy=None, md5=None,
1366                                 reduced_redundancy=False,
1367                                 encrypt_key=False):
1368        """
1369        Store an object in S3 using the name of the Key object as the
1370        key in S3 and the string 's' as the contents.
1371        See set_contents_from_file method for details about the
1372        parameters.
1373
1374        :type headers: dict
1375        :param headers: Additional headers to pass along with the
1376            request to AWS.
1377
1378        :type replace: bool
1379        :param replace: If True, replaces the contents of the file if
1380            it already exists.
1381
1382        :type cb: function
1383        :param cb: a callback function that will be called to report
1384            progress on the upload.  The callback should accept two
1385            integer parameters, the first representing the number of
1386            bytes that have been successfully transmitted to S3 and
1387            the second representing the size of the to be transmitted
1388            object.
1389
1390        :type cb: int
1391        :param num_cb: (optional) If a callback is specified with the
1392            cb parameter this parameter determines the granularity of
1393            the callback by defining the maximum number of times the
1394            callback will be called during the file transfer.
1395
1396        :type policy: :class:`boto.s3.acl.CannedACLStrings`
1397        :param policy: A canned ACL policy that will be applied to the
1398            new key in S3.
1399
1400        :type md5: A tuple containing the hexdigest version of the MD5
1401            checksum of the file as the first element and the
1402            Base64-encoded version of the plain checksum as the second
1403            element.  This is the same format returned by the
1404            compute_md5 method.
1405        :param md5: If you need to compute the MD5 for any reason
1406            prior to upload, it's silly to have to do it twice so this
1407            param, if present, will be used as the MD5 values of the
1408            file.  Otherwise, the checksum will be computed.
1409
1410        :type reduced_redundancy: bool
1411        :param reduced_redundancy: If True, this will set the storage
1412            class of the new Key to be REDUCED_REDUNDANCY. The Reduced
1413            Redundancy Storage (RRS) feature of S3, provides lower
1414            redundancy at lower storage cost.
1415
1416        :type encrypt_key: bool
1417        :param encrypt_key: If True, the new copy of the object will
1418            be encrypted on the server-side by S3 and will be stored
1419            in an encrypted form while at rest in S3.
1420        """
1421        if not isinstance(string_data, bytes):
1422            string_data = string_data.encode("utf-8")
1423        fp = BytesIO(string_data)
1424        r = self.set_contents_from_file(fp, headers, replace, cb, num_cb,
1425                                        policy, md5, reduced_redundancy,
1426                                        encrypt_key=encrypt_key)
1427        fp.close()
1428        return r
1429
1430    def get_file(self, fp, headers=None, cb=None, num_cb=10,
1431                 torrent=False, version_id=None, override_num_retries=None,
1432                 response_headers=None):
1433        """
1434        Retrieves a file from an S3 Key
1435
1436        :type fp: file
1437        :param fp: File pointer to put the data into
1438
1439        :type headers: string
1440        :param: headers to send when retrieving the files
1441
1442        :type cb: function
1443        :param cb: a callback function that will be called to report
1444            progress on the upload.  The callback should accept two
1445            integer parameters, the first representing the number of
1446            bytes that have been successfully transmitted to S3 and
1447            the second representing the size of the to be transmitted
1448            object.
1449
1450        :type cb: int
1451        :param num_cb: (optional) If a callback is specified with the
1452            cb parameter this parameter determines the granularity of
1453            the callback by defining the maximum number of times the
1454            callback will be called during the file transfer.
1455
1456        :type torrent: bool
1457        :param torrent: Flag for whether to get a torrent for the file
1458
1459        :type override_num_retries: int
1460        :param override_num_retries: If not None will override configured
1461            num_retries parameter for underlying GET.
1462
1463        :type response_headers: dict
1464        :param response_headers: A dictionary containing HTTP
1465            headers/values that will override any headers associated
1466            with the stored object in the response.  See
1467            http://goo.gl/EWOPb for details.
1468
1469        :type version_id: str
1470        :param version_id: The ID of a particular version of the object.
1471            If this parameter is not supplied but the Key object has
1472            a ``version_id`` attribute, that value will be used when
1473            retrieving the object.  You can set the Key object's
1474            ``version_id`` attribute to None to always grab the latest
1475            version from a version-enabled bucket.
1476        """
1477        self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb,
1478                                torrent=torrent, version_id=version_id,
1479                                override_num_retries=override_num_retries,
1480                                response_headers=response_headers,
1481                                hash_algs=None,
1482                                query_args=None)
1483
1484    def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10,
1485                 torrent=False, version_id=None, override_num_retries=None,
1486                 response_headers=None, hash_algs=None, query_args=None):
1487        if headers is None:
1488            headers = {}
1489        save_debug = self.bucket.connection.debug
1490        if self.bucket.connection.debug == 1:
1491            self.bucket.connection.debug = 0
1492
1493        query_args = query_args or []
1494        if torrent:
1495            query_args.append('torrent')
1496
1497        if hash_algs is None and not torrent:
1498            hash_algs = {'md5': md5}
1499        digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {})
1500
1501        # If a version_id is passed in, use that.  If not, check to see
1502        # if the Key object has an explicit version_id and, if so, use that.
1503        # Otherwise, don't pass a version_id query param.
1504        if version_id is None:
1505            version_id = self.version_id
1506        if version_id:
1507            query_args.append('versionId=%s' % version_id)
1508        if response_headers:
1509            for key in response_headers:
1510                query_args.append('%s=%s' % (
1511                    key, urllib.parse.quote(response_headers[key])))
1512        query_args = '&'.join(query_args)
1513        self.open('r', headers, query_args=query_args,
1514                  override_num_retries=override_num_retries)
1515
1516        data_len = 0
1517        if cb:
1518            if self.size is None:
1519                cb_size = 0
1520            else:
1521                cb_size = self.size
1522            if self.size is None and num_cb != -1:
1523                # If size is not available due to chunked transfer for example,
1524                # we'll call the cb for every 1MB of data transferred.
1525                cb_count = (1024 * 1024) / self.BufferSize
1526            elif num_cb > 1:
1527                cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0)))
1528            elif num_cb < 0:
1529                cb_count = -1
1530            else:
1531                cb_count = 0
1532            i = 0
1533            cb(data_len, cb_size)
1534        try:
1535            for bytes in self:
1536                fp.write(bytes)
1537                data_len += len(bytes)
1538                for alg in digesters:
1539                    digesters[alg].update(bytes)
1540                if cb:
1541                    if cb_size > 0 and data_len >= cb_size:
1542                        break
1543                    i += 1
1544                    if i == cb_count or cb_count == -1:
1545                        cb(data_len, cb_size)
1546                        i = 0
1547        except IOError as e:
1548            if e.errno == errno.ENOSPC:
1549                raise StorageDataError('Out of space for destination file '
1550                                       '%s' % fp.name)
1551            raise
1552        if cb and (cb_count <= 1 or i > 0) and data_len > 0:
1553            cb(data_len, cb_size)
1554        for alg in digesters:
1555          self.local_hashes[alg] = digesters[alg].digest()
1556        if self.size is None and not torrent and "Range" not in headers:
1557            self.size = data_len
1558        self.close()
1559        self.bucket.connection.debug = save_debug
1560
1561    def get_torrent_file(self, fp, headers=None, cb=None, num_cb=10):
1562        """
1563        Get a torrent file (see to get_file)
1564
1565        :type fp: file
1566        :param fp: The file pointer of where to put the torrent
1567
1568        :type headers: dict
1569        :param headers: Headers to be passed
1570
1571        :type cb: function
1572        :param cb: a callback function that will be called to report
1573            progress on the upload.  The callback should accept two
1574            integer parameters, the first representing the number of
1575            bytes that have been successfully transmitted to S3 and
1576            the second representing the size of the to be transmitted
1577            object.
1578
1579        :type cb: int
1580        :param num_cb: (optional) If a callback is specified with the
1581            cb parameter this parameter determines the granularity of
1582            the callback by defining the maximum number of times the
1583            callback will be called during the file transfer.
1584
1585        """
1586        return self.get_file(fp, headers, cb, num_cb, torrent=True)
1587
1588    def get_contents_to_file(self, fp, headers=None,
1589                             cb=None, num_cb=10,
1590                             torrent=False,
1591                             version_id=None,
1592                             res_download_handler=None,
1593                             response_headers=None):
1594        """
1595        Retrieve an object from S3 using the name of the Key object as the
1596        key in S3.  Write the contents of the object to the file pointed
1597        to by 'fp'.
1598
1599        :type fp: File -like object
1600        :param fp:
1601
1602        :type headers: dict
1603        :param headers: additional HTTP headers that will be sent with
1604            the GET request.
1605
1606        :type cb: function
1607        :param cb: a callback function that will be called to report
1608            progress on the upload.  The callback should accept two
1609            integer parameters, the first representing the number of
1610            bytes that have been successfully transmitted to S3 and
1611            the second representing the size of the to be transmitted
1612            object.
1613
1614        :type cb: int
1615        :param num_cb: (optional) If a callback is specified with the
1616            cb parameter this parameter determines the granularity of
1617            the callback by defining the maximum number of times the
1618            callback will be called during the file transfer.
1619
1620        :type torrent: bool
1621        :param torrent: If True, returns the contents of a torrent
1622            file as a string.
1623
1624        :type res_upload_handler: ResumableDownloadHandler
1625        :param res_download_handler: If provided, this handler will
1626            perform the download.
1627
1628        :type response_headers: dict
1629        :param response_headers: A dictionary containing HTTP
1630            headers/values that will override any headers associated
1631            with the stored object in the response.  See
1632            http://goo.gl/EWOPb for details.
1633
1634        :type version_id: str
1635        :param version_id: The ID of a particular version of the object.
1636            If this parameter is not supplied but the Key object has
1637            a ``version_id`` attribute, that value will be used when
1638            retrieving the object.  You can set the Key object's
1639            ``version_id`` attribute to None to always grab the latest
1640            version from a version-enabled bucket.
1641        """
1642        if self.bucket is not None:
1643            if res_download_handler:
1644                res_download_handler.get_file(self, fp, headers, cb, num_cb,
1645                                              torrent=torrent,
1646                                              version_id=version_id)
1647            else:
1648                self.get_file(fp, headers, cb, num_cb, torrent=torrent,
1649                              version_id=version_id,
1650                              response_headers=response_headers)
1651
1652    def get_contents_to_filename(self, filename, headers=None,
1653                                 cb=None, num_cb=10,
1654                                 torrent=False,
1655                                 version_id=None,
1656                                 res_download_handler=None,
1657                                 response_headers=None):
1658        """
1659        Retrieve an object from S3 using the name of the Key object as the
1660        key in S3.  Store contents of the object to a file named by 'filename'.
1661        See get_contents_to_file method for details about the
1662        parameters.
1663
1664        :type filename: string
1665        :param filename: The filename of where to put the file contents
1666
1667        :type headers: dict
1668        :param headers: Any additional headers to send in the request
1669
1670        :type cb: function
1671        :param cb: a callback function that will be called to report
1672            progress on the upload.  The callback should accept two
1673            integer parameters, the first representing the number of
1674            bytes that have been successfully transmitted to S3 and
1675            the second representing the size of the to be transmitted
1676            object.
1677
1678        :type num_cb: int
1679        :param num_cb: (optional) If a callback is specified with the
1680            cb parameter this parameter determines the granularity of
1681            the callback by defining the maximum number of times the
1682            callback will be called during the file transfer.
1683
1684        :type torrent: bool
1685        :param torrent: If True, returns the contents of a torrent file
1686            as a string.
1687
1688        :type res_upload_handler: ResumableDownloadHandler
1689        :param res_download_handler: If provided, this handler will
1690            perform the download.
1691
1692        :type response_headers: dict
1693        :param response_headers: A dictionary containing HTTP
1694            headers/values that will override any headers associated
1695            with the stored object in the response.  See
1696            http://goo.gl/EWOPb for details.
1697
1698        :type version_id: str
1699        :param version_id: The ID of a particular version of the object.
1700            If this parameter is not supplied but the Key object has
1701            a ``version_id`` attribute, that value will be used when
1702            retrieving the object.  You can set the Key object's
1703            ``version_id`` attribute to None to always grab the latest
1704            version from a version-enabled bucket.
1705        """
1706        try:
1707            with open(filename, 'wb') as fp:
1708                self.get_contents_to_file(fp, headers, cb, num_cb,
1709                                          torrent=torrent,
1710                                          version_id=version_id,
1711                                          res_download_handler=res_download_handler,
1712                                          response_headers=response_headers)
1713        except Exception:
1714            os.remove(filename)
1715            raise
1716        # if last_modified date was sent from s3, try to set file's timestamp
1717        if self.last_modified is not None:
1718            try:
1719                modified_tuple = email.utils.parsedate_tz(self.last_modified)
1720                modified_stamp = int(email.utils.mktime_tz(modified_tuple))
1721                os.utime(fp.name, (modified_stamp, modified_stamp))
1722            except Exception:
1723                pass
1724
1725    def get_contents_as_string(self, headers=None,
1726                               cb=None, num_cb=10,
1727                               torrent=False,
1728                               version_id=None,
1729                               response_headers=None, encoding=None):
1730        """
1731        Retrieve an object from S3 using the name of the Key object as the
1732        key in S3.  Return the contents of the object as a string.
1733        See get_contents_to_file method for details about the
1734        parameters.
1735
1736        :type headers: dict
1737        :param headers: Any additional headers to send in the request
1738
1739        :type cb: function
1740        :param cb: a callback function that will be called to report
1741            progress on the upload.  The callback should accept two
1742            integer parameters, the first representing the number of
1743            bytes that have been successfully transmitted to S3 and
1744            the second representing the size of the to be transmitted
1745            object.
1746
1747        :type cb: int
1748        :param num_cb: (optional) If a callback is specified with the
1749            cb parameter this parameter determines the granularity of
1750            the callback by defining the maximum number of times the
1751            callback will be called during the file transfer.
1752
1753        :type torrent: bool
1754        :param torrent: If True, returns the contents of a torrent file
1755            as a string.
1756
1757        :type response_headers: dict
1758        :param response_headers: A dictionary containing HTTP
1759            headers/values that will override any headers associated
1760            with the stored object in the response.  See
1761            http://goo.gl/EWOPb for details.
1762
1763        :type version_id: str
1764        :param version_id: The ID of a particular version of the object.
1765            If this parameter is not supplied but the Key object has
1766            a ``version_id`` attribute, that value will be used when
1767            retrieving the object.  You can set the Key object's
1768            ``version_id`` attribute to None to always grab the latest
1769            version from a version-enabled bucket.
1770
1771        :type encoding: str
1772        :param encoding: The text encoding to use, such as ``utf-8``
1773            or ``iso-8859-1``. If set, then a string will be returned.
1774            Defaults to ``None`` and returns bytes.
1775
1776        :rtype: bytes or str
1777        :returns: The contents of the file as bytes or a string
1778        """
1779        fp = BytesIO()
1780        self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent,
1781                                  version_id=version_id,
1782                                  response_headers=response_headers)
1783        value = fp.getvalue()
1784
1785        if encoding is not None:
1786            value = value.decode(encoding)
1787
1788        return value
1789
1790    def add_email_grant(self, permission, email_address, headers=None):
1791        """
1792        Convenience method that provides a quick way to add an email grant
1793        to a key. This method retrieves the current ACL, creates a new
1794        grant based on the parameters passed in, adds that grant to the ACL
1795        and then PUT's the new ACL back to S3.
1796
1797        :type permission: string
1798        :param permission: The permission being granted. Should be one of:
1799            (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL).
1800
1801        :type email_address: string
1802        :param email_address: The email address associated with the AWS
1803            account your are granting the permission to.
1804
1805        :type recursive: boolean
1806        :param recursive: A boolean value to controls whether the
1807            command will apply the grant to all keys within the bucket
1808            or not.  The default value is False.  By passing a True
1809            value, the call will iterate through all keys in the
1810            bucket and apply the same grant to each key.  CAUTION: If
1811            you have a lot of keys, this could take a long time!
1812        """
1813        policy = self.get_acl(headers=headers)
1814        policy.acl.add_email_grant(permission, email_address)
1815        self.set_acl(policy, headers=headers)
1816
1817    def add_user_grant(self, permission, user_id, headers=None,
1818                       display_name=None):
1819        """
1820        Convenience method that provides a quick way to add a canonical
1821        user grant to a key.  This method retrieves the current ACL,
1822        creates a new grant based on the parameters passed in, adds that
1823        grant to the ACL and then PUT's the new ACL back to S3.
1824
1825        :type permission: string
1826        :param permission: The permission being granted. Should be one of:
1827            (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL).
1828
1829        :type user_id: string
1830        :param user_id: The canonical user id associated with the AWS
1831            account your are granting the permission to.
1832
1833        :type display_name: string
1834        :param display_name: An option string containing the user's
1835            Display Name.  Only required on Walrus.
1836        """
1837        policy = self.get_acl(headers=headers)
1838        policy.acl.add_user_grant(permission, user_id,
1839                                  display_name=display_name)
1840        self.set_acl(policy, headers=headers)
1841
1842    def _normalize_metadata(self, metadata):
1843        if type(metadata) == set:
1844            norm_metadata = set()
1845            for k in metadata:
1846                norm_metadata.add(k.lower())
1847        else:
1848            norm_metadata = {}
1849            for k in metadata:
1850                norm_metadata[k.lower()] = metadata[k]
1851        return norm_metadata
1852
1853    def _get_remote_metadata(self, headers=None):
1854        """
1855        Extracts metadata from existing URI into a dict, so we can
1856        overwrite/delete from it to form the new set of metadata to apply to a
1857        key.
1858        """
1859        metadata = {}
1860        for underscore_name in self._underscore_base_user_settable_fields:
1861            if hasattr(self, underscore_name):
1862                value = getattr(self, underscore_name)
1863                if value:
1864                    # Generate HTTP field name corresponding to "_" named field.
1865                    field_name = underscore_name.replace('_', '-')
1866                    metadata[field_name.lower()] = value
1867        # self.metadata contains custom metadata, which are all user-settable.
1868        prefix = self.provider.metadata_prefix
1869        for underscore_name in self.metadata:
1870            field_name = underscore_name.replace('_', '-')
1871            metadata['%s%s' % (prefix, field_name.lower())] = (
1872                self.metadata[underscore_name])
1873        return metadata
1874
1875    def set_remote_metadata(self, metadata_plus, metadata_minus, preserve_acl,
1876                            headers=None):
1877        metadata_plus = self._normalize_metadata(metadata_plus)
1878        metadata_minus = self._normalize_metadata(metadata_minus)
1879        metadata = self._get_remote_metadata()
1880        metadata.update(metadata_plus)
1881        for h in metadata_minus:
1882            if h in metadata:
1883                del metadata[h]
1884        src_bucket = self.bucket
1885        # Boto prepends the meta prefix when adding headers, so strip prefix in
1886        # metadata before sending back in to copy_key() call.
1887        rewritten_metadata = {}
1888        for h in metadata:
1889            if (h.startswith('x-goog-meta-') or h.startswith('x-amz-meta-')):
1890                rewritten_h = (h.replace('x-goog-meta-', '')
1891                               .replace('x-amz-meta-', ''))
1892            else:
1893                rewritten_h = h
1894            rewritten_metadata[rewritten_h] = metadata[h]
1895        metadata = rewritten_metadata
1896        src_bucket.copy_key(self.name, self.bucket.name, self.name,
1897                            metadata=metadata, preserve_acl=preserve_acl,
1898                            headers=headers)
1899
1900    def restore(self, days, headers=None):
1901        """Restore an object from an archive.
1902
1903        :type days: int
1904        :param days: The lifetime of the restored object (must
1905            be at least 1 day).  If the object is already restored
1906            then this parameter can be used to readjust the lifetime
1907            of the restored object.  In this case, the days
1908            param is with respect to the initial time of the request.
1909            If the object has not been restored, this param is with
1910            respect to the completion time of the request.
1911
1912        """
1913        response = self.bucket.connection.make_request(
1914            'POST', self.bucket.name, self.name,
1915            data=self.RestoreBody % days,
1916            headers=headers, query_args='restore')
1917        if response.status not in (200, 202):
1918            provider = self.bucket.connection.provider
1919            raise provider.storage_response_error(response.status,
1920                                                  response.reason,
1921                                                  response.read())
1922