1# Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/
2# Copyright (c) 2012 Amazon.com, Inc. or its affiliates.
3# Copyright (c) 2010, Eucalyptus Systems, Inc.
4# All rights reserved.
5#
6# Permission is hereby granted, free of charge, to any person obtaining a
7# copy of this software and associated documentation files (the
8# "Software"), to deal in the Software without restriction, including
9# without limitation the rights to use, copy, modify, merge, publish, dis-
10# tribute, sublicense, and/or sell copies of the Software, and to permit
11# persons to whom the Software is furnished to do so, subject to the fol-
12# lowing conditions:
13#
14# The above copyright notice and this permission notice shall be included
15# in all copies or substantial portions of the Software.
16#
17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
19# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
20# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23# IN THE SOFTWARE.
24
25import xml.sax
26import base64
27from boto.compat import six, urllib
28import time
29
30from boto.auth import detect_potential_s3sigv4
31import boto.utils
32from boto.connection import AWSAuthConnection
33from boto import handler
34from boto.s3.bucket import Bucket
35from boto.s3.key import Key
36from boto.resultset import ResultSet
37from boto.exception import BotoClientError, S3ResponseError
38
39
40def check_lowercase_bucketname(n):
41    """
42    Bucket names must not contain uppercase characters. We check for
43    this by appending a lowercase character and testing with islower().
44    Note this also covers cases like numeric bucket names with dashes.
45
46    >>> check_lowercase_bucketname("Aaaa")
47    Traceback (most recent call last):
48    ...
49    BotoClientError: S3Error: Bucket names cannot contain upper-case
50    characters when using either the sub-domain or virtual hosting calling
51    format.
52
53    >>> check_lowercase_bucketname("1234-5678-9123")
54    True
55    >>> check_lowercase_bucketname("abcdefg1234")
56    True
57    """
58    if not (n + 'a').islower():
59        raise BotoClientError("Bucket names cannot contain upper-case " \
60            "characters when using either the sub-domain or virtual " \
61            "hosting calling format.")
62    return True
63
64
65def assert_case_insensitive(f):
66    def wrapper(*args, **kwargs):
67        if len(args) == 3 and check_lowercase_bucketname(args[2]):
68            pass
69        return f(*args, **kwargs)
70    return wrapper
71
72
73class _CallingFormat(object):
74
75    def get_bucket_server(self, server, bucket):
76        return ''
77
78    def build_url_base(self, connection, protocol, server, bucket, key=''):
79        url_base = '%s://' % protocol
80        url_base += self.build_host(server, bucket)
81        url_base += connection.get_path(self.build_path_base(bucket, key))
82        return url_base
83
84    def build_host(self, server, bucket):
85        if bucket == '':
86            return server
87        else:
88            return self.get_bucket_server(server, bucket)
89
90    def build_auth_path(self, bucket, key=''):
91        key = boto.utils.get_utf8_value(key)
92        path = ''
93        if bucket != '':
94            path = '/' + bucket
95        return path + '/%s' % urllib.parse.quote(key)
96
97    def build_path_base(self, bucket, key=''):
98        key = boto.utils.get_utf8_value(key)
99        return '/%s' % urllib.parse.quote(key)
100
101
102class SubdomainCallingFormat(_CallingFormat):
103
104    @assert_case_insensitive
105    def get_bucket_server(self, server, bucket):
106        return '%s.%s' % (bucket, server)
107
108
109class VHostCallingFormat(_CallingFormat):
110
111    @assert_case_insensitive
112    def get_bucket_server(self, server, bucket):
113        return bucket
114
115
116class OrdinaryCallingFormat(_CallingFormat):
117
118    def get_bucket_server(self, server, bucket):
119        return server
120
121    def build_path_base(self, bucket, key=''):
122        key = boto.utils.get_utf8_value(key)
123        path_base = '/'
124        if bucket:
125            path_base += "%s/" % bucket
126        return path_base + urllib.parse.quote(key)
127
128
129class ProtocolIndependentOrdinaryCallingFormat(OrdinaryCallingFormat):
130
131    def build_url_base(self, connection, protocol, server, bucket, key=''):
132        url_base = '//'
133        url_base += self.build_host(server, bucket)
134        url_base += connection.get_path(self.build_path_base(bucket, key))
135        return url_base
136
137
138class Location(object):
139
140    DEFAULT = ''  # US Classic Region
141    EU = 'EU'
142    USWest = 'us-west-1'
143    USWest2 = 'us-west-2'
144    SAEast = 'sa-east-1'
145    APNortheast = 'ap-northeast-1'
146    APSoutheast = 'ap-southeast-1'
147    APSoutheast2 = 'ap-southeast-2'
148    CNNorth1 = 'cn-north-1'
149
150
151class NoHostProvided(object):
152    # An identifying object to help determine whether the user provided a
153    # ``host`` or not. Never instantiated.
154    pass
155
156
157class HostRequiredError(BotoClientError):
158    pass
159
160
161class S3Connection(AWSAuthConnection):
162
163    DefaultHost = boto.config.get('s3', 'host', 's3.amazonaws.com')
164    DefaultCallingFormat = boto.config.get('s3', 'calling_format', 'boto.s3.connection.SubdomainCallingFormat')
165    QueryString = 'Signature=%s&Expires=%d&AWSAccessKeyId=%s'
166
167    def __init__(self, aws_access_key_id=None, aws_secret_access_key=None,
168                 is_secure=True, port=None, proxy=None, proxy_port=None,
169                 proxy_user=None, proxy_pass=None,
170                 host=NoHostProvided, debug=0, https_connection_factory=None,
171                 calling_format=DefaultCallingFormat, path='/',
172                 provider='aws', bucket_class=Bucket, security_token=None,
173                 suppress_consec_slashes=True, anon=False,
174                 validate_certs=None, profile_name=None):
175        no_host_provided = False
176        if host is NoHostProvided:
177            no_host_provided = True
178            host = self.DefaultHost
179        if isinstance(calling_format, six.string_types):
180            calling_format=boto.utils.find_class(calling_format)()
181        self.calling_format = calling_format
182        self.bucket_class = bucket_class
183        self.anon = anon
184        super(S3Connection, self).__init__(host,
185                aws_access_key_id, aws_secret_access_key,
186                is_secure, port, proxy, proxy_port, proxy_user, proxy_pass,
187                debug=debug, https_connection_factory=https_connection_factory,
188                path=path, provider=provider, security_token=security_token,
189                suppress_consec_slashes=suppress_consec_slashes,
190                validate_certs=validate_certs, profile_name=profile_name)
191        # We need to delay until after the call to ``super`` before checking
192        # to see if SigV4 is in use.
193        if no_host_provided:
194            if 'hmac-v4-s3' in self._required_auth_capability():
195                raise HostRequiredError(
196                    "When using SigV4, you must specify a 'host' parameter."
197                )
198
199    @detect_potential_s3sigv4
200    def _required_auth_capability(self):
201        if self.anon:
202            return ['anon']
203        else:
204            return ['s3']
205
206    def __iter__(self):
207        for bucket in self.get_all_buckets():
208            yield bucket
209
210    def __contains__(self, bucket_name):
211        return not (self.lookup(bucket_name) is None)
212
213    def set_bucket_class(self, bucket_class):
214        """
215        Set the Bucket class associated with this bucket.  By default, this
216        would be the boto.s3.key.Bucket class but if you want to subclass that
217        for some reason this allows you to associate your new class.
218
219        :type bucket_class: class
220        :param bucket_class: A subclass of Bucket that can be more specific
221        """
222        self.bucket_class = bucket_class
223
224    def build_post_policy(self, expiration_time, conditions):
225        """
226        Taken from the AWS book Python examples and modified for use with boto
227        """
228        assert isinstance(expiration_time, time.struct_time), \
229            'Policy document must include a valid expiration Time object'
230
231        # Convert conditions object mappings to condition statements
232
233        return '{"expiration": "%s",\n"conditions": [%s]}' % \
234            (time.strftime(boto.utils.ISO8601, expiration_time), ",".join(conditions))
235
236    def build_post_form_args(self, bucket_name, key, expires_in=6000,
237                             acl=None, success_action_redirect=None,
238                             max_content_length=None,
239                             http_method='http', fields=None,
240                             conditions=None, storage_class='STANDARD',
241                             server_side_encryption=None):
242        """
243        Taken from the AWS book Python examples and modified for use with boto
244        This only returns the arguments required for the post form, not the
245        actual form.  This does not return the file input field which also
246        needs to be added
247
248        :type bucket_name: string
249        :param bucket_name: Bucket to submit to
250
251        :type key: string
252        :param key:  Key name, optionally add ${filename} to the end to
253            attach the submitted filename
254
255        :type expires_in: integer
256        :param expires_in: Time (in seconds) before this expires, defaults
257            to 6000
258
259        :type acl: string
260        :param acl: A canned ACL.  One of:
261            * private
262            * public-read
263            * public-read-write
264            * authenticated-read
265            * bucket-owner-read
266            * bucket-owner-full-control
267
268        :type success_action_redirect: string
269        :param success_action_redirect: URL to redirect to on success
270
271        :type max_content_length: integer
272        :param max_content_length: Maximum size for this file
273
274        :type http_method: string
275        :param http_method:  HTTP Method to use, "http" or "https"
276
277        :type storage_class: string
278        :param storage_class: Storage class to use for storing the object.
279            Valid values: STANDARD | REDUCED_REDUNDANCY
280
281        :type server_side_encryption: string
282        :param server_side_encryption: Specifies server-side encryption
283            algorithm to use when Amazon S3 creates an object.
284            Valid values: None | AES256
285
286        :rtype: dict
287        :return: A dictionary containing field names/values as well as
288            a url to POST to
289
290            .. code-block:: python
291
292
293        """
294        if fields is None:
295            fields = []
296        if conditions is None:
297            conditions = []
298        expiration = time.gmtime(int(time.time() + expires_in))
299
300        # Generate policy document
301        conditions.append('{"bucket": "%s"}' % bucket_name)
302        if key.endswith("${filename}"):
303            conditions.append('["starts-with", "$key", "%s"]' % key[:-len("${filename}")])
304        else:
305            conditions.append('{"key": "%s"}' % key)
306        if acl:
307            conditions.append('{"acl": "%s"}' % acl)
308            fields.append({"name": "acl", "value": acl})
309        if success_action_redirect:
310            conditions.append('{"success_action_redirect": "%s"}' % success_action_redirect)
311            fields.append({"name": "success_action_redirect", "value": success_action_redirect})
312        if max_content_length:
313            conditions.append('["content-length-range", 0, %i]' % max_content_length)
314
315        if self.provider.security_token:
316            fields.append({'name': 'x-amz-security-token',
317                           'value': self.provider.security_token})
318            conditions.append('{"x-amz-security-token": "%s"}' % self.provider.security_token)
319
320        if storage_class:
321            fields.append({'name': 'x-amz-storage-class',
322                           'value': storage_class})
323            conditions.append('{"x-amz-storage-class": "%s"}' % storage_class)
324
325        if server_side_encryption:
326            fields.append({'name': 'x-amz-server-side-encryption',
327                           'value': server_side_encryption})
328            conditions.append('{"x-amz-server-side-encryption": "%s"}' % server_side_encryption)
329
330        policy = self.build_post_policy(expiration, conditions)
331
332        # Add the base64-encoded policy document as the 'policy' field
333        policy_b64 = base64.b64encode(policy)
334        fields.append({"name": "policy", "value": policy_b64})
335
336        # Add the AWS access key as the 'AWSAccessKeyId' field
337        fields.append({"name": "AWSAccessKeyId",
338                       "value": self.aws_access_key_id})
339
340        # Add signature for encoded policy document as the
341        # 'signature' field
342        signature = self._auth_handler.sign_string(policy_b64)
343        fields.append({"name": "signature", "value": signature})
344        fields.append({"name": "key", "value": key})
345
346        # HTTPS protocol will be used if the secure HTTP option is enabled.
347        url = '%s://%s/' % (http_method,
348                            self.calling_format.build_host(self.server_name(),
349                                                           bucket_name))
350
351        return {"action": url, "fields": fields}
352
353    def generate_url_sigv4(self, expires_in, method, bucket='', key='',
354                            headers=None, force_http=False,
355                            response_headers=None, version_id=None,
356                            iso_date=None):
357        path = self.calling_format.build_path_base(bucket, key)
358        auth_path = self.calling_format.build_auth_path(bucket, key)
359        host = self.calling_format.build_host(self.server_name(), bucket)
360
361        # For presigned URLs we should ignore the port if it's HTTPS
362        if host.endswith(':443'):
363            host = host[:-4]
364
365        params = {}
366        if version_id is not None:
367            params['VersionId'] = version_id
368
369        http_request = self.build_base_http_request(method, path, auth_path,
370                                                    headers=headers, host=host,
371                                                    params=params)
372
373        return self._auth_handler.presign(http_request, expires_in,
374                                          iso_date=iso_date)
375
376    def generate_url(self, expires_in, method, bucket='', key='', headers=None,
377                     query_auth=True, force_http=False, response_headers=None,
378                     expires_in_absolute=False, version_id=None):
379        if self._auth_handler.capability[0] == 'hmac-v4-s3':
380            # Handle the special sigv4 case
381            return self.generate_url_sigv4(expires_in, method, bucket=bucket,
382                key=key, headers=headers, force_http=force_http,
383                response_headers=response_headers, version_id=version_id)
384
385        headers = headers or {}
386        if expires_in_absolute:
387            expires = int(expires_in)
388        else:
389            expires = int(time.time() + expires_in)
390        auth_path = self.calling_format.build_auth_path(bucket, key)
391        auth_path = self.get_path(auth_path)
392        # optional version_id and response_headers need to be added to
393        # the query param list.
394        extra_qp = []
395        if version_id is not None:
396            extra_qp.append("versionId=%s" % version_id)
397        if response_headers:
398            for k, v in response_headers.items():
399                extra_qp.append("%s=%s" % (k, urllib.parse.quote(v)))
400        if self.provider.security_token:
401            headers['x-amz-security-token'] = self.provider.security_token
402        if extra_qp:
403            delimiter = '?' if '?' not in auth_path else '&'
404            auth_path += delimiter + '&'.join(extra_qp)
405        c_string = boto.utils.canonical_string(method, auth_path, headers,
406                                               expires, self.provider)
407        b64_hmac = self._auth_handler.sign_string(c_string)
408        encoded_canonical = urllib.parse.quote(b64_hmac, safe='')
409        self.calling_format.build_path_base(bucket, key)
410        if query_auth:
411            query_part = '?' + self.QueryString % (encoded_canonical, expires,
412                                                   self.aws_access_key_id)
413        else:
414            query_part = ''
415        if headers:
416            hdr_prefix = self.provider.header_prefix
417            for k, v in headers.items():
418                if k.startswith(hdr_prefix):
419                    # headers used for sig generation must be
420                    # included in the url also.
421                    extra_qp.append("%s=%s" % (k, urllib.parse.quote(v)))
422        if extra_qp:
423            delimiter = '?' if not query_part else '&'
424            query_part += delimiter + '&'.join(extra_qp)
425        if force_http:
426            protocol = 'http'
427            port = 80
428        else:
429            protocol = self.protocol
430            port = self.port
431        return self.calling_format.build_url_base(self, protocol,
432                                                  self.server_name(port),
433                                                  bucket, key) + query_part
434
435    def get_all_buckets(self, headers=None):
436        response = self.make_request('GET', headers=headers)
437        body = response.read()
438        if response.status > 300:
439            raise self.provider.storage_response_error(
440                response.status, response.reason, body)
441        rs = ResultSet([('Bucket', self.bucket_class)])
442        h = handler.XmlHandler(rs, self)
443        if not isinstance(body, bytes):
444            body = body.encode('utf-8')
445        xml.sax.parseString(body, h)
446        return rs
447
448    def get_canonical_user_id(self, headers=None):
449        """
450        Convenience method that returns the "CanonicalUserID" of the
451        user who's credentials are associated with the connection.
452        The only way to get this value is to do a GET request on the
453        service which returns all buckets associated with the account.
454        As part of that response, the canonical userid is returned.
455        This method simply does all of that and then returns just the
456        user id.
457
458        :rtype: string
459        :return: A string containing the canonical user id.
460        """
461        rs = self.get_all_buckets(headers=headers)
462        return rs.owner.id
463
464    def get_bucket(self, bucket_name, validate=True, headers=None):
465        """
466        Retrieves a bucket by name.
467
468        If the bucket does not exist, an ``S3ResponseError`` will be raised. If
469        you are unsure if the bucket exists or not, you can use the
470        ``S3Connection.lookup`` method, which will either return a valid bucket
471        or ``None``.
472
473        If ``validate=False`` is passed, no request is made to the service (no
474        charge/communication delay). This is only safe to do if you are **sure**
475        the bucket exists.
476
477        If the default ``validate=True`` is passed, a request is made to the
478        service to ensure the bucket exists. Prior to Boto v2.25.0, this fetched
479        a list of keys (but with a max limit set to ``0``, always returning an empty
480        list) in the bucket (& included better error messages), at an
481        increased expense. As of Boto v2.25.0, this now performs a HEAD request
482        (less expensive but worse error messages).
483
484        If you were relying on parsing the error message before, you should call
485        something like::
486
487            bucket = conn.get_bucket('<bucket_name>', validate=False)
488            bucket.get_all_keys(maxkeys=0)
489
490        :type bucket_name: string
491        :param bucket_name: The name of the bucket
492
493        :type headers: dict
494        :param headers: Additional headers to pass along with the request to
495            AWS.
496
497        :type validate: boolean
498        :param validate: If ``True``, it will try to verify the bucket exists
499            on the service-side. (Default: ``True``)
500        """
501        if validate:
502            return self.head_bucket(bucket_name, headers=headers)
503        else:
504            return self.bucket_class(self, bucket_name)
505
506    def head_bucket(self, bucket_name, headers=None):
507        """
508        Determines if a bucket exists by name.
509
510        If the bucket does not exist, an ``S3ResponseError`` will be raised.
511
512        :type bucket_name: string
513        :param bucket_name: The name of the bucket
514
515        :type headers: dict
516        :param headers: Additional headers to pass along with the request to
517            AWS.
518
519        :returns: A <Bucket> object
520        """
521        response = self.make_request('HEAD', bucket_name, headers=headers)
522        body = response.read()
523        if response.status == 200:
524            return self.bucket_class(self, bucket_name)
525        elif response.status == 403:
526            # For backward-compatibility, we'll populate part of the exception
527            # with the most-common default.
528            err = self.provider.storage_response_error(
529                response.status,
530                response.reason,
531                body
532            )
533            err.error_code = 'AccessDenied'
534            err.error_message = 'Access Denied'
535            raise err
536        elif response.status == 404:
537            # For backward-compatibility, we'll populate part of the exception
538            # with the most-common default.
539            err = self.provider.storage_response_error(
540                response.status,
541                response.reason,
542                body
543            )
544            err.error_code = 'NoSuchBucket'
545            err.error_message = 'The specified bucket does not exist'
546            raise err
547        else:
548            raise self.provider.storage_response_error(
549                response.status, response.reason, body)
550
551    def lookup(self, bucket_name, validate=True, headers=None):
552        """
553        Attempts to get a bucket from S3.
554
555        Works identically to ``S3Connection.get_bucket``, save for that it
556        will return ``None`` if the bucket does not exist instead of throwing
557        an exception.
558
559        :type bucket_name: string
560        :param bucket_name: The name of the bucket
561
562        :type headers: dict
563        :param headers: Additional headers to pass along with the request to
564            AWS.
565
566        :type validate: boolean
567        :param validate: If ``True``, it will try to fetch all keys within the
568            given bucket. (Default: ``True``)
569        """
570        try:
571            bucket = self.get_bucket(bucket_name, validate, headers=headers)
572        except:
573            bucket = None
574        return bucket
575
576    def create_bucket(self, bucket_name, headers=None,
577                      location=Location.DEFAULT, policy=None):
578        """
579        Creates a new located bucket. By default it's in the USA. You can pass
580        Location.EU to create a European bucket (S3) or European Union bucket
581        (GCS).
582
583        :type bucket_name: string
584        :param bucket_name: The name of the new bucket
585
586        :type headers: dict
587        :param headers: Additional headers to pass along with the request to AWS.
588
589        :type location: str
590        :param location: The location of the new bucket.  You can use one of the
591            constants in :class:`boto.s3.connection.Location` (e.g. Location.EU,
592            Location.USWest, etc.).
593
594        :type policy: :class:`boto.s3.acl.CannedACLStrings`
595        :param policy: A canned ACL policy that will be applied to the
596            new key in S3.
597
598        """
599        check_lowercase_bucketname(bucket_name)
600
601        if policy:
602            if headers:
603                headers[self.provider.acl_header] = policy
604            else:
605                headers = {self.provider.acl_header: policy}
606        if location == Location.DEFAULT:
607            data = ''
608        else:
609            data = '<CreateBucketConfiguration><LocationConstraint>' + \
610                    location + '</LocationConstraint></CreateBucketConfiguration>'
611        response = self.make_request('PUT', bucket_name, headers=headers,
612                data=data)
613        body = response.read()
614        if response.status == 409:
615            raise self.provider.storage_create_error(
616                response.status, response.reason, body)
617        if response.status == 200:
618            return self.bucket_class(self, bucket_name)
619        else:
620            raise self.provider.storage_response_error(
621                response.status, response.reason, body)
622
623    def delete_bucket(self, bucket, headers=None):
624        """
625        Removes an S3 bucket.
626
627        In order to remove the bucket, it must first be empty. If the bucket is
628        not empty, an ``S3ResponseError`` will be raised.
629
630        :type bucket_name: string
631        :param bucket_name: The name of the bucket
632
633        :type headers: dict
634        :param headers: Additional headers to pass along with the request to
635            AWS.
636        """
637        response = self.make_request('DELETE', bucket, headers=headers)
638        body = response.read()
639        if response.status != 204:
640            raise self.provider.storage_response_error(
641                response.status, response.reason, body)
642
643    def make_request(self, method, bucket='', key='', headers=None, data='',
644                     query_args=None, sender=None, override_num_retries=None,
645                     retry_handler=None):
646        if isinstance(bucket, self.bucket_class):
647            bucket = bucket.name
648        if isinstance(key, Key):
649            key = key.name
650        path = self.calling_format.build_path_base(bucket, key)
651        boto.log.debug('path=%s' % path)
652        auth_path = self.calling_format.build_auth_path(bucket, key)
653        boto.log.debug('auth_path=%s' % auth_path)
654        host = self.calling_format.build_host(self.server_name(), bucket)
655        if query_args:
656            path += '?' + query_args
657            boto.log.debug('path=%s' % path)
658            auth_path += '?' + query_args
659            boto.log.debug('auth_path=%s' % auth_path)
660        return super(S3Connection, self).make_request(
661            method, path, headers,
662            data, host, auth_path, sender,
663            override_num_retries=override_num_retries,
664            retry_handler=retry_handler
665        )
666