1# Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/
2# Copyright (c) 2012 Amazon.com, Inc. or its affiliates.
3# Copyright (c) 2010 Google
4# Copyright (c) 2008 rPath, Inc.
5# Copyright (c) 2009 The Echo Nest Corporation
6# Copyright (c) 2010, Eucalyptus Systems, Inc.
7# Copyright (c) 2011, Nexenta Systems Inc.
8# All rights reserved.
9#
10# Permission is hereby granted, free of charge, to any person obtaining a
11# copy of this software and associated documentation files (the
12# "Software"), to deal in the Software without restriction, including
13# without limitation the rights to use, copy, modify, merge, publish, dis-
14# tribute, sublicense, and/or sell copies of the Software, and to permit
15# persons to whom the Software is furnished to do so, subject to the fol-
16# lowing conditions:
17#
18# The above copyright notice and this permission notice shall be included
19# in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
23# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
24# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27# IN THE SOFTWARE.
28
29#
30# Parts of this code were copied or derived from sample code supplied by AWS.
31# The following notice applies to that code.
32#
33#  This software code is made available "AS IS" without warranties of any
34#  kind.  You may copy, display, modify and redistribute the software
35#  code either by itself or as incorporated into your code; provided that
36#  you do not remove any proprietary notices.  Your use of this software
37#  code is at your own risk and you waive any claim against Amazon
38#  Digital Services, Inc. or its affiliates with respect to your use of
39#  this software code. (c) 2006 Amazon Digital Services, Inc. or its
40#  affiliates.
41
42"""
43Handles basic connections to AWS
44"""
45from datetime import datetime
46import errno
47import os
48import random
49import re
50import socket
51import sys
52import time
53import xml.sax
54import copy
55
56from boto import auth
57from boto import auth_handler
58import boto
59import boto.utils
60import boto.handler
61import boto.cacerts
62
63from boto import config, UserAgent
64from boto.compat import six, http_client, urlparse, quote, encodebytes
65from boto.exception import AWSConnectionError
66from boto.exception import BotoClientError
67from boto.exception import BotoServerError
68from boto.exception import PleaseRetryException
69from boto.provider import Provider
70from boto.resultset import ResultSet
71
72HAVE_HTTPS_CONNECTION = False
73try:
74    import ssl
75    from boto import https_connection
76    # Google App Engine runs on Python 2.5 so doesn't have ssl.SSLError.
77    if hasattr(ssl, 'SSLError'):
78        HAVE_HTTPS_CONNECTION = True
79except ImportError:
80    pass
81
82try:
83    import threading
84except ImportError:
85    import dummy_threading as threading
86
87ON_APP_ENGINE = all(key in os.environ for key in (
88    'USER_IS_ADMIN', 'CURRENT_VERSION_ID', 'APPLICATION_ID'))
89
90PORTS_BY_SECURITY = {True: 443,
91                     False: 80}
92
93DEFAULT_CA_CERTS_FILE = os.path.join(os.path.dirname(os.path.abspath(boto.cacerts.__file__)), "cacerts.txt")
94
95
96class HostConnectionPool(object):
97
98    """
99    A pool of connections for one remote (host,port,is_secure).
100
101    When connections are added to the pool, they are put into a
102    pending queue.  The _mexe method returns connections to the pool
103    before the response body has been read, so they connections aren't
104    ready to send another request yet.  They stay in the pending queue
105    until they are ready for another request, at which point they are
106    returned to the pool of ready connections.
107
108    The pool of ready connections is an ordered list of
109    (connection,time) pairs, where the time is the time the connection
110    was returned from _mexe.  After a certain period of time,
111    connections are considered stale, and discarded rather than being
112    reused.  This saves having to wait for the connection to time out
113    if AWS has decided to close it on the other end because of
114    inactivity.
115
116    Thread Safety:
117
118        This class is used only from ConnectionPool while it's mutex
119        is held.
120    """
121
122    def __init__(self):
123        self.queue = []
124
125    def size(self):
126        """
127        Returns the number of connections in the pool for this host.
128        Some of the connections may still be in use, and may not be
129        ready to be returned by get().
130        """
131        return len(self.queue)
132
133    def put(self, conn):
134        """
135        Adds a connection to the pool, along with the time it was
136        added.
137        """
138        self.queue.append((conn, time.time()))
139
140    def get(self):
141        """
142        Returns the next connection in this pool that is ready to be
143        reused.  Returns None if there aren't any.
144        """
145        # Discard ready connections that are too old.
146        self.clean()
147
148        # Return the first connection that is ready, and remove it
149        # from the queue.  Connections that aren't ready are returned
150        # to the end of the queue with an updated time, on the
151        # assumption that somebody is actively reading the response.
152        for _ in range(len(self.queue)):
153            (conn, _) = self.queue.pop(0)
154            if self._conn_ready(conn):
155                return conn
156            else:
157                self.put(conn)
158        return None
159
160    def _conn_ready(self, conn):
161        """
162        There is a nice state diagram at the top of http_client.py.  It
163        indicates that once the response headers have been read (which
164        _mexe does before adding the connection to the pool), a
165        response is attached to the connection, and it stays there
166        until it's done reading.  This isn't entirely true: even after
167        the client is done reading, the response may be closed, but
168        not removed from the connection yet.
169
170        This is ugly, reading a private instance variable, but the
171        state we care about isn't available in any public methods.
172        """
173        if ON_APP_ENGINE:
174            # Google AppEngine implementation of HTTPConnection doesn't contain
175            # _HTTPConnection__response attribute. Moreover, it's not possible
176            # to determine if given connection is ready. Reusing connections
177            # simply doesn't make sense with App Engine urlfetch service.
178            return False
179        else:
180            response = getattr(conn, '_HTTPConnection__response', None)
181            return (response is None) or response.isclosed()
182
183    def clean(self):
184        """
185        Get rid of stale connections.
186        """
187        # Note that we do not close the connection here -- somebody
188        # may still be reading from it.
189        while len(self.queue) > 0 and self._pair_stale(self.queue[0]):
190            self.queue.pop(0)
191
192    def _pair_stale(self, pair):
193        """
194        Returns true of the (connection,time) pair is too old to be
195        used.
196        """
197        (_conn, return_time) = pair
198        now = time.time()
199        return return_time + ConnectionPool.STALE_DURATION < now
200
201
202class ConnectionPool(object):
203
204    """
205    A connection pool that expires connections after a fixed period of
206    time.  This saves time spent waiting for a connection that AWS has
207    timed out on the other end.
208
209    This class is thread-safe.
210    """
211
212    #
213    # The amout of time between calls to clean.
214    #
215
216    CLEAN_INTERVAL = 5.0
217
218    #
219    # How long before a connection becomes "stale" and won't be reused
220    # again.  The intention is that this time is less that the timeout
221    # period that AWS uses, so we'll never try to reuse a connection
222    # and find that AWS is timing it out.
223    #
224    # Experimentation in July 2011 shows that AWS starts timing things
225    # out after three minutes.  The 60 seconds here is conservative so
226    # we should never hit that 3-minute timout.
227    #
228
229    STALE_DURATION = 60.0
230
231    def __init__(self):
232        # Mapping from (host,port,is_secure) to HostConnectionPool.
233        # If a pool becomes empty, it is removed.
234        self.host_to_pool = {}
235        # The last time the pool was cleaned.
236        self.last_clean_time = 0.0
237        self.mutex = threading.Lock()
238        ConnectionPool.STALE_DURATION = \
239            config.getfloat('Boto', 'connection_stale_duration',
240                            ConnectionPool.STALE_DURATION)
241
242    def __getstate__(self):
243        pickled_dict = copy.copy(self.__dict__)
244        pickled_dict['host_to_pool'] = {}
245        del pickled_dict['mutex']
246        return pickled_dict
247
248    def __setstate__(self, dct):
249        self.__init__()
250
251    def size(self):
252        """
253        Returns the number of connections in the pool.
254        """
255        return sum(pool.size() for pool in self.host_to_pool.values())
256
257    def get_http_connection(self, host, port, is_secure):
258        """
259        Gets a connection from the pool for the named host.  Returns
260        None if there is no connection that can be reused. It's the caller's
261        responsibility to call close() on the connection when it's no longer
262        needed.
263        """
264        self.clean()
265        with self.mutex:
266            key = (host, port, is_secure)
267            if key not in self.host_to_pool:
268                return None
269            return self.host_to_pool[key].get()
270
271    def put_http_connection(self, host, port, is_secure, conn):
272        """
273        Adds a connection to the pool of connections that can be
274        reused for the named host.
275        """
276        with self.mutex:
277            key = (host, port, is_secure)
278            if key not in self.host_to_pool:
279                self.host_to_pool[key] = HostConnectionPool()
280            self.host_to_pool[key].put(conn)
281
282    def clean(self):
283        """
284        Clean up the stale connections in all of the pools, and then
285        get rid of empty pools.  Pools clean themselves every time a
286        connection is fetched; this cleaning takes care of pools that
287        aren't being used any more, so nothing is being gotten from
288        them.
289        """
290        with self.mutex:
291            now = time.time()
292            if self.last_clean_time + self.CLEAN_INTERVAL < now:
293                to_remove = []
294                for (host, pool) in self.host_to_pool.items():
295                    pool.clean()
296                    if pool.size() == 0:
297                        to_remove.append(host)
298                for host in to_remove:
299                    del self.host_to_pool[host]
300                self.last_clean_time = now
301
302
303class HTTPRequest(object):
304
305    def __init__(self, method, protocol, host, port, path, auth_path,
306                 params, headers, body):
307        """Represents an HTTP request.
308
309        :type method: string
310        :param method: The HTTP method name, 'GET', 'POST', 'PUT' etc.
311
312        :type protocol: string
313        :param protocol: The http protocol used, 'http' or 'https'.
314
315        :type host: string
316        :param host: Host to which the request is addressed. eg. abc.com
317
318        :type port: int
319        :param port: port on which the request is being sent. Zero means unset,
320            in which case default port will be chosen.
321
322        :type path: string
323        :param path: URL path that is being accessed.
324
325        :type auth_path: string
326        :param path: The part of the URL path used when creating the
327            authentication string.
328
329        :type params: dict
330        :param params: HTTP url query parameters, with key as name of
331            the param, and value as value of param.
332
333        :type headers: dict
334        :param headers: HTTP headers, with key as name of the header and value
335            as value of header.
336
337        :type body: string
338        :param body: Body of the HTTP request. If not present, will be None or
339            empty string ('').
340        """
341        self.method = method
342        self.protocol = protocol
343        self.host = host
344        self.port = port
345        self.path = path
346        if auth_path is None:
347            auth_path = path
348        self.auth_path = auth_path
349        self.params = params
350        # chunked Transfer-Encoding should act only on PUT request.
351        if headers and 'Transfer-Encoding' in headers and \
352                headers['Transfer-Encoding'] == 'chunked' and \
353                self.method != 'PUT':
354            self.headers = headers.copy()
355            del self.headers['Transfer-Encoding']
356        else:
357            self.headers = headers
358        self.body = body
359
360    def __str__(self):
361        return (('method:(%s) protocol:(%s) host(%s) port(%s) path(%s) '
362                 'params(%s) headers(%s) body(%s)') % (self.method,
363                 self.protocol, self.host, self.port, self.path, self.params,
364                 self.headers, self.body))
365
366    def authorize(self, connection, **kwargs):
367        if not getattr(self, '_headers_quoted', False):
368            for key in self.headers:
369                val = self.headers[key]
370                if isinstance(val, six.text_type):
371                    safe = '!"#$%&\'()*+,/:;<=>?@[\\]^`{|}~'
372                    self.headers[key] = quote(val.encode('utf-8'), safe)
373            setattr(self, '_headers_quoted', True)
374
375        self.headers['User-Agent'] = UserAgent
376
377        connection._auth_handler.add_auth(self, **kwargs)
378
379        # I'm not sure if this is still needed, now that add_auth is
380        # setting the content-length for POST requests.
381        if 'Content-Length' not in self.headers:
382            if 'Transfer-Encoding' not in self.headers or \
383                    self.headers['Transfer-Encoding'] != 'chunked':
384                self.headers['Content-Length'] = str(len(self.body))
385
386
387class HTTPResponse(http_client.HTTPResponse):
388
389    def __init__(self, *args, **kwargs):
390        http_client.HTTPResponse.__init__(self, *args, **kwargs)
391        self._cached_response = ''
392
393    def read(self, amt=None):
394        """Read the response.
395
396        This method does not have the same behavior as
397        http_client.HTTPResponse.read.  Instead, if this method is called with
398        no ``amt`` arg, then the response body will be cached.  Subsequent
399        calls to ``read()`` with no args **will return the cached response**.
400
401        """
402        if amt is None:
403            # The reason for doing this is that many places in boto call
404            # response.read() and except to get the response body that they
405            # can then process.  To make sure this always works as they expect
406            # we're caching the response so that multiple calls to read()
407            # will return the full body.  Note that this behavior only
408            # happens if the amt arg is not specified.
409            if not self._cached_response:
410                self._cached_response = http_client.HTTPResponse.read(self)
411            return self._cached_response
412        else:
413            return http_client.HTTPResponse.read(self, amt)
414
415
416class AWSAuthConnection(object):
417    def __init__(self, host, aws_access_key_id=None,
418                 aws_secret_access_key=None,
419                 is_secure=True, port=None, proxy=None, proxy_port=None,
420                 proxy_user=None, proxy_pass=None, debug=0,
421                 https_connection_factory=None, path='/',
422                 provider='aws', security_token=None,
423                 suppress_consec_slashes=True,
424                 validate_certs=True, profile_name=None):
425        """
426        :type host: str
427        :param host: The host to make the connection to
428
429        :keyword str aws_access_key_id: Your AWS Access Key ID (provided by
430            Amazon). If none is specified, the value in your
431            ``AWS_ACCESS_KEY_ID`` environmental variable is used.
432        :keyword str aws_secret_access_key: Your AWS Secret Access Key
433            (provided by Amazon). If none is specified, the value in your
434            ``AWS_SECRET_ACCESS_KEY`` environmental variable is used.
435        :keyword str security_token: The security token associated with
436            temporary credentials issued by STS.  Optional unless using
437            temporary credentials.  If none is specified, the environment
438            variable ``AWS_SECURITY_TOKEN`` is used if defined.
439
440        :type is_secure: boolean
441        :param is_secure: Whether the connection is over SSL
442
443        :type https_connection_factory: list or tuple
444        :param https_connection_factory: A pair of an HTTP connection
445            factory and the exceptions to catch.  The factory should have
446            a similar interface to L{http_client.HTTPSConnection}.
447
448        :param str proxy: Address/hostname for a proxy server
449
450        :type proxy_port: int
451        :param proxy_port: The port to use when connecting over a proxy
452
453        :type proxy_user: str
454        :param proxy_user: The username to connect with on the proxy
455
456        :type proxy_pass: str
457        :param proxy_pass: The password to use when connection over a proxy.
458
459        :type port: int
460        :param port: The port to use to connect
461
462        :type suppress_consec_slashes: bool
463        :param suppress_consec_slashes: If provided, controls whether
464            consecutive slashes will be suppressed in key paths.
465
466        :type validate_certs: bool
467        :param validate_certs: Controls whether SSL certificates
468            will be validated or not.  Defaults to True.
469
470        :type profile_name: str
471        :param profile_name: Override usual Credentials section in config
472            file to use a named set of keys instead.
473        """
474        self.suppress_consec_slashes = suppress_consec_slashes
475        self.num_retries = 6
476        # Override passed-in is_secure setting if value was defined in config.
477        if config.has_option('Boto', 'is_secure'):
478            is_secure = config.getboolean('Boto', 'is_secure')
479        self.is_secure = is_secure
480        # Whether or not to validate server certificates.
481        # The default is now to validate certificates.  This can be
482        # overridden in the boto config file are by passing an
483        # explicit validate_certs parameter to the class constructor.
484        self.https_validate_certificates = config.getbool(
485            'Boto', 'https_validate_certificates',
486            validate_certs)
487        if self.https_validate_certificates and not HAVE_HTTPS_CONNECTION:
488            raise BotoClientError(
489                "SSL server certificate validation is enabled in boto "
490                "configuration, but Python dependencies required to "
491                "support this feature are not available. Certificate "
492                "validation is only supported when running under Python "
493                "2.6 or later.")
494        certs_file = config.get_value(
495            'Boto', 'ca_certificates_file', DEFAULT_CA_CERTS_FILE)
496        if certs_file == 'system':
497            certs_file = None
498        self.ca_certificates_file = certs_file
499        if port:
500            self.port = port
501        else:
502            self.port = PORTS_BY_SECURITY[is_secure]
503
504        self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass)
505        # define exceptions from http_client that we want to catch and retry
506        self.http_exceptions = (http_client.HTTPException, socket.error,
507                                socket.gaierror, http_client.BadStatusLine)
508        # define subclasses of the above that are not retryable.
509        self.http_unretryable_exceptions = []
510        if HAVE_HTTPS_CONNECTION:
511            self.http_unretryable_exceptions.append(
512                https_connection.InvalidCertificateException)
513
514        # define values in socket exceptions we don't want to catch
515        self.socket_exception_values = (errno.EINTR,)
516        if https_connection_factory is not None:
517            self.https_connection_factory = https_connection_factory[0]
518            self.http_exceptions += https_connection_factory[1]
519        else:
520            self.https_connection_factory = None
521        if (is_secure):
522            self.protocol = 'https'
523        else:
524            self.protocol = 'http'
525        self.host = host
526        self.path = path
527        # if the value passed in for debug
528        if not isinstance(debug, six.integer_types):
529            debug = 0
530        self.debug = config.getint('Boto', 'debug', debug)
531        self.host_header = None
532
533        # Timeout used to tell http_client how long to wait for socket timeouts.
534        # Default is to leave timeout unchanged, which will in turn result in
535        # the socket's default global timeout being used. To specify a
536        # timeout, set http_socket_timeout in Boto config. Regardless,
537        # timeouts will only be applied if Python is 2.6 or greater.
538        self.http_connection_kwargs = {}
539        if (sys.version_info[0], sys.version_info[1]) >= (2, 6):
540            # If timeout isn't defined in boto config file, use 70 second
541            # default as recommended by
542            # http://docs.aws.amazon.com/amazonswf/latest/apireference/API_PollForActivityTask.html
543            self.http_connection_kwargs['timeout'] = config.getint(
544                'Boto', 'http_socket_timeout', 70)
545
546        if isinstance(provider, Provider):
547            # Allow overriding Provider
548            self.provider = provider
549        else:
550            self._provider_type = provider
551            self.provider = Provider(self._provider_type,
552                                     aws_access_key_id,
553                                     aws_secret_access_key,
554                                     security_token,
555                                     profile_name)
556
557        # Allow config file to override default host, port, and host header.
558        if self.provider.host:
559            self.host = self.provider.host
560        if self.provider.port:
561            self.port = self.provider.port
562        if self.provider.host_header:
563            self.host_header = self.provider.host_header
564
565        self._pool = ConnectionPool()
566        self._connection = (self.host, self.port, self.is_secure)
567        self._last_rs = None
568        self._auth_handler = auth.get_auth_handler(
569            host, config, self.provider, self._required_auth_capability())
570        if getattr(self, 'AuthServiceName', None) is not None:
571            self.auth_service_name = self.AuthServiceName
572        self.request_hook = None
573
574    def __repr__(self):
575        return '%s:%s' % (self.__class__.__name__, self.host)
576
577    def _required_auth_capability(self):
578        return []
579
580    def _get_auth_service_name(self):
581        return getattr(self._auth_handler, 'service_name')
582
583    # For Sigv4, the auth_service_name/auth_region_name properties allow
584    # the service_name/region_name to be explicitly set instead of being
585    # derived from the endpoint url.
586    def _set_auth_service_name(self, value):
587        self._auth_handler.service_name = value
588    auth_service_name = property(_get_auth_service_name, _set_auth_service_name)
589
590    def _get_auth_region_name(self):
591        return getattr(self._auth_handler, 'region_name')
592
593    def _set_auth_region_name(self, value):
594        self._auth_handler.region_name = value
595    auth_region_name = property(_get_auth_region_name, _set_auth_region_name)
596
597    def connection(self):
598        return self.get_http_connection(*self._connection)
599    connection = property(connection)
600
601    def aws_access_key_id(self):
602        return self.provider.access_key
603    aws_access_key_id = property(aws_access_key_id)
604    gs_access_key_id = aws_access_key_id
605    access_key = aws_access_key_id
606
607    def aws_secret_access_key(self):
608        return self.provider.secret_key
609    aws_secret_access_key = property(aws_secret_access_key)
610    gs_secret_access_key = aws_secret_access_key
611    secret_key = aws_secret_access_key
612
613    def profile_name(self):
614        return self.provider.profile_name
615    profile_name = property(profile_name)
616
617    def get_path(self, path='/'):
618        # The default behavior is to suppress consecutive slashes for reasons
619        # discussed at
620        # https://groups.google.com/forum/#!topic/boto-dev/-ft0XPUy0y8
621        # You can override that behavior with the suppress_consec_slashes param.
622        if not self.suppress_consec_slashes:
623            return self.path + re.sub('^(/*)/', "\\1", path)
624        pos = path.find('?')
625        if pos >= 0:
626            params = path[pos:]
627            path = path[:pos]
628        else:
629            params = None
630        if path[-1] == '/':
631            need_trailing = True
632        else:
633            need_trailing = False
634        path_elements = self.path.split('/')
635        path_elements.extend(path.split('/'))
636        path_elements = [p for p in path_elements if p]
637        path = '/' + '/'.join(path_elements)
638        if path[-1] != '/' and need_trailing:
639            path += '/'
640        if params:
641            path = path + params
642        return path
643
644    def server_name(self, port=None):
645        if not port:
646            port = self.port
647        if port == 80:
648            signature_host = self.host
649        else:
650            # This unfortunate little hack can be attributed to
651            # a difference in the 2.6 version of http_client.  In old
652            # versions, it would append ":443" to the hostname sent
653            # in the Host header and so we needed to make sure we
654            # did the same when calculating the V2 signature.  In 2.6
655            # (and higher!)
656            # it no longer does that.  Hence, this kludge.
657            if ((ON_APP_ENGINE and sys.version[:3] == '2.5') or
658                    sys.version[:3] in ('2.6', '2.7')) and port == 443:
659                signature_host = self.host
660            else:
661                signature_host = '%s:%d' % (self.host, port)
662        return signature_host
663
664    def handle_proxy(self, proxy, proxy_port, proxy_user, proxy_pass):
665        self.proxy = proxy
666        self.proxy_port = proxy_port
667        self.proxy_user = proxy_user
668        self.proxy_pass = proxy_pass
669        if 'http_proxy' in os.environ and not self.proxy:
670            pattern = re.compile(
671                '(?:http://)?'
672                '(?:(?P<user>[\w\-\.]+):(?P<pass>.*)@)?'
673                '(?P<host>[\w\-\.]+)'
674                '(?::(?P<port>\d+))?'
675            )
676            match = pattern.match(os.environ['http_proxy'])
677            if match:
678                self.proxy = match.group('host')
679                self.proxy_port = match.group('port')
680                self.proxy_user = match.group('user')
681                self.proxy_pass = match.group('pass')
682        else:
683            if not self.proxy:
684                self.proxy = config.get_value('Boto', 'proxy', None)
685            if not self.proxy_port:
686                self.proxy_port = config.get_value('Boto', 'proxy_port', None)
687            if not self.proxy_user:
688                self.proxy_user = config.get_value('Boto', 'proxy_user', None)
689            if not self.proxy_pass:
690                self.proxy_pass = config.get_value('Boto', 'proxy_pass', None)
691
692        if not self.proxy_port and self.proxy:
693            print("http_proxy environment variable does not specify "
694                  "a port, using default")
695            self.proxy_port = self.port
696
697        self.no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
698        self.use_proxy = (self.proxy is not None)
699
700    def get_http_connection(self, host, port, is_secure):
701        conn = self._pool.get_http_connection(host, port, is_secure)
702        if conn is not None:
703            return conn
704        else:
705            return self.new_http_connection(host, port, is_secure)
706
707    def skip_proxy(self, host):
708        if not self.no_proxy:
709            return False
710
711        if self.no_proxy == "*":
712            return True
713
714        hostonly = host
715        hostonly = host.split(':')[0]
716
717        for name in self.no_proxy.split(','):
718            if name and (hostonly.endswith(name) or host.endswith(name)):
719                return True
720
721        return False
722
723    def new_http_connection(self, host, port, is_secure):
724        if host is None:
725            host = self.server_name()
726
727        # Make sure the host is really just the host, not including
728        # the port number
729        host = host.split(':', 1)[0]
730
731        http_connection_kwargs = self.http_connection_kwargs.copy()
732
733        # Connection factories below expect a port keyword argument
734        http_connection_kwargs['port'] = port
735
736        # Override host with proxy settings if needed
737        if self.use_proxy and not is_secure and \
738                not self.skip_proxy(host):
739            host = self.proxy
740            http_connection_kwargs['port'] = int(self.proxy_port)
741
742        if is_secure:
743            boto.log.debug(
744                'establishing HTTPS connection: host=%s, kwargs=%s',
745                host, http_connection_kwargs)
746            if self.use_proxy and not self.skip_proxy(host):
747                connection = self.proxy_ssl(host, is_secure and 443 or 80)
748            elif self.https_connection_factory:
749                connection = self.https_connection_factory(host)
750            elif self.https_validate_certificates and HAVE_HTTPS_CONNECTION:
751                connection = https_connection.CertValidatingHTTPSConnection(
752                    host, ca_certs=self.ca_certificates_file,
753                    **http_connection_kwargs)
754            else:
755                connection = http_client.HTTPSConnection(
756                    host, **http_connection_kwargs)
757        else:
758            boto.log.debug('establishing HTTP connection: kwargs=%s' %
759                           http_connection_kwargs)
760            if self.https_connection_factory:
761                # even though the factory says https, this is too handy
762                # to not be able to allow overriding for http also.
763                connection = self.https_connection_factory(
764                    host, **http_connection_kwargs)
765            else:
766                connection = http_client.HTTPConnection(
767                    host, **http_connection_kwargs)
768        if self.debug > 1:
769            connection.set_debuglevel(self.debug)
770        # self.connection must be maintained for backwards-compatibility
771        # however, it must be dynamically pulled from the connection pool
772        # set a private variable which will enable that
773        if host.split(':')[0] == self.host and is_secure == self.is_secure:
774            self._connection = (host, port, is_secure)
775        # Set the response class of the http connection to use our custom
776        # class.
777        connection.response_class = HTTPResponse
778        return connection
779
780    def put_http_connection(self, host, port, is_secure, connection):
781        self._pool.put_http_connection(host, port, is_secure, connection)
782
783    def proxy_ssl(self, host=None, port=None):
784        if host and port:
785            host = '%s:%d' % (host, port)
786        else:
787            host = '%s:%d' % (self.host, self.port)
788        # Seems properly to use timeout for connect too
789        timeout = self.http_connection_kwargs.get("timeout")
790        if timeout is not None:
791            sock = socket.create_connection((self.proxy,
792                                             int(self.proxy_port)), timeout)
793        else:
794            sock = socket.create_connection((self.proxy, int(self.proxy_port)))
795        boto.log.debug("Proxy connection: CONNECT %s HTTP/1.0\r\n", host)
796        sock.sendall("CONNECT %s HTTP/1.0\r\n" % host)
797        sock.sendall("User-Agent: %s\r\n" % UserAgent)
798        if self.proxy_user and self.proxy_pass:
799            for k, v in self.get_proxy_auth_header().items():
800                sock.sendall("%s: %s\r\n" % (k, v))
801            # See discussion about this config option at
802            # https://groups.google.com/forum/?fromgroups#!topic/boto-dev/teenFvOq2Cc
803            if config.getbool('Boto', 'send_crlf_after_proxy_auth_headers', False):
804                sock.sendall("\r\n")
805        else:
806            sock.sendall("\r\n")
807        resp = http_client.HTTPResponse(sock, strict=True, debuglevel=self.debug)
808        resp.begin()
809
810        if resp.status != 200:
811            # Fake a socket error, use a code that make it obvious it hasn't
812            # been generated by the socket library
813            raise socket.error(-71,
814                               "Error talking to HTTP proxy %s:%s: %s (%s)" %
815                               (self.proxy, self.proxy_port,
816                                resp.status, resp.reason))
817
818        # We can safely close the response, it duped the original socket
819        resp.close()
820
821        h = http_client.HTTPConnection(host)
822
823        if self.https_validate_certificates and HAVE_HTTPS_CONNECTION:
824            msg = "wrapping ssl socket for proxied connection; "
825            if self.ca_certificates_file:
826                msg += "CA certificate file=%s" % self.ca_certificates_file
827            else:
828                msg += "using system provided SSL certs"
829            boto.log.debug(msg)
830            key_file = self.http_connection_kwargs.get('key_file', None)
831            cert_file = self.http_connection_kwargs.get('cert_file', None)
832            sslSock = ssl.wrap_socket(sock, keyfile=key_file,
833                                      certfile=cert_file,
834                                      cert_reqs=ssl.CERT_REQUIRED,
835                                      ca_certs=self.ca_certificates_file)
836            cert = sslSock.getpeercert()
837            hostname = self.host.split(':', 0)[0]
838            if not https_connection.ValidateCertificateHostname(cert, hostname):
839                raise https_connection.InvalidCertificateException(
840                    hostname, cert, 'hostname mismatch')
841        else:
842            # Fallback for old Python without ssl.wrap_socket
843            if hasattr(http_client, 'ssl'):
844                sslSock = http_client.ssl.SSLSocket(sock)
845            else:
846                sslSock = socket.ssl(sock, None, None)
847                sslSock = http_client.FakeSocket(sock, sslSock)
848
849        # This is a bit unclean
850        h.sock = sslSock
851        return h
852
853    def prefix_proxy_to_path(self, path, host=None):
854        path = self.protocol + '://' + (host or self.server_name()) + path
855        return path
856
857    def get_proxy_auth_header(self):
858        auth = encodebytes(self.proxy_user + ':' + self.proxy_pass)
859        return {'Proxy-Authorization': 'Basic %s' % auth}
860
861    # For passing proxy information to other connection libraries, e.g. cloudsearch2
862    def get_proxy_url_with_auth(self):
863        if not self.use_proxy:
864            return None
865
866        if self.proxy_user or self.proxy_pass:
867            if self.proxy_pass:
868                login_info = '%s:%s@' % (self.proxy_user, self.proxy_pass)
869            else:
870                login_info = '%s@' % self.proxy_user
871        else:
872            login_info = ''
873
874        return 'http://%s%s:%s' % (login_info, self.proxy, str(self.proxy_port or self.port))
875
876    def set_host_header(self, request):
877        try:
878            request.headers['Host'] = \
879                self._auth_handler.host_header(self.host, request)
880        except AttributeError:
881            request.headers['Host'] = self.host.split(':', 1)[0]
882
883    def set_request_hook(self, hook):
884        self.request_hook = hook
885
886    def _mexe(self, request, sender=None, override_num_retries=None,
887              retry_handler=None):
888        """
889        mexe - Multi-execute inside a loop, retrying multiple times to handle
890               transient Internet errors by simply trying again.
891               Also handles redirects.
892
893        This code was inspired by the S3Utils classes posted to the boto-users
894        Google group by Larry Bates.  Thanks!
895
896        """
897        boto.log.debug('Method: %s' % request.method)
898        boto.log.debug('Path: %s' % request.path)
899        boto.log.debug('Data: %s' % request.body)
900        boto.log.debug('Headers: %s' % request.headers)
901        boto.log.debug('Host: %s' % request.host)
902        boto.log.debug('Port: %s' % request.port)
903        boto.log.debug('Params: %s' % request.params)
904        response = None
905        body = None
906        ex = None
907        if override_num_retries is None:
908            num_retries = config.getint('Boto', 'num_retries', self.num_retries)
909        else:
910            num_retries = override_num_retries
911        i = 0
912        connection = self.get_http_connection(request.host, request.port,
913                                              self.is_secure)
914
915        # Convert body to bytes if needed
916        if not isinstance(request.body, bytes) and hasattr(request.body,
917                                                           'encode'):
918            request.body = request.body.encode('utf-8')
919
920        while i <= num_retries:
921            # Use binary exponential backoff to desynchronize client requests.
922            next_sleep = min(random.random() * (2 ** i),
923                             boto.config.get('Boto', 'max_retry_delay', 60))
924            try:
925                # we now re-sign each request before it is retried
926                boto.log.debug('Token: %s' % self.provider.security_token)
927                request.authorize(connection=self)
928                # Only force header for non-s3 connections, because s3 uses
929                # an older signing method + bucket resource URLs that include
930                # the port info. All others should be now be up to date and
931                # not include the port.
932                if 's3' not in self._required_auth_capability():
933                    if not getattr(self, 'anon', False):
934                        if not request.headers.get('Host'):
935                            self.set_host_header(request)
936                boto.log.debug('Final headers: %s' % request.headers)
937                request.start_time = datetime.now()
938                if callable(sender):
939                    response = sender(connection, request.method, request.path,
940                                      request.body, request.headers)
941                else:
942                    connection.request(request.method, request.path,
943                                       request.body, request.headers)
944                    response = connection.getresponse()
945                boto.log.debug('Response headers: %s' % response.getheaders())
946                location = response.getheader('location')
947                # -- gross hack --
948                # http_client gets confused with chunked responses to HEAD requests
949                # so I have to fake it out
950                if request.method == 'HEAD' and getattr(response,
951                                                        'chunked', False):
952                    response.chunked = 0
953                if callable(retry_handler):
954                    status = retry_handler(response, i, next_sleep)
955                    if status:
956                        msg, i, next_sleep = status
957                        if msg:
958                            boto.log.debug(msg)
959                        time.sleep(next_sleep)
960                        continue
961                if response.status in [500, 502, 503, 504]:
962                    msg = 'Received %d response.  ' % response.status
963                    msg += 'Retrying in %3.1f seconds' % next_sleep
964                    boto.log.debug(msg)
965                    body = response.read()
966                    if isinstance(body, bytes):
967                        body = body.decode('utf-8')
968                elif response.status < 300 or response.status >= 400 or \
969                        not location:
970                    # don't return connection to the pool if response contains
971                    # Connection:close header, because the connection has been
972                    # closed and default reconnect behavior may do something
973                    # different than new_http_connection. Also, it's probably
974                    # less efficient to try to reuse a closed connection.
975                    conn_header_value = response.getheader('connection')
976                    if conn_header_value == 'close':
977                        connection.close()
978                    else:
979                        self.put_http_connection(request.host, request.port,
980                                                 self.is_secure, connection)
981                    if self.request_hook is not None:
982                        self.request_hook.handle_request_data(request, response)
983                    return response
984                else:
985                    scheme, request.host, request.path, \
986                        params, query, fragment = urlparse(location)
987                    if query:
988                        request.path += '?' + query
989                    # urlparse can return both host and port in netloc, so if
990                    # that's the case we need to split them up properly
991                    if ':' in request.host:
992                        request.host, request.port = request.host.split(':', 1)
993                    msg = 'Redirecting: %s' % scheme + '://'
994                    msg += request.host + request.path
995                    boto.log.debug(msg)
996                    connection = self.get_http_connection(request.host,
997                                                          request.port,
998                                                          scheme == 'https')
999                    response = None
1000                    continue
1001            except PleaseRetryException as e:
1002                boto.log.debug('encountered a retry exception: %s' % e)
1003                connection = self.new_http_connection(request.host, request.port,
1004                                                      self.is_secure)
1005                response = e.response
1006                ex = e
1007            except self.http_exceptions as e:
1008                for unretryable in self.http_unretryable_exceptions:
1009                    if isinstance(e, unretryable):
1010                        boto.log.debug(
1011                            'encountered unretryable %s exception, re-raising' %
1012                            e.__class__.__name__)
1013                        raise
1014                boto.log.debug('encountered %s exception, reconnecting' %
1015                               e.__class__.__name__)
1016                connection = self.new_http_connection(request.host, request.port,
1017                                                      self.is_secure)
1018                ex = e
1019            time.sleep(next_sleep)
1020            i += 1
1021        # If we made it here, it's because we have exhausted our retries
1022        # and stil haven't succeeded.  So, if we have a response object,
1023        # use it to raise an exception.
1024        # Otherwise, raise the exception that must have already happened.
1025        if self.request_hook is not None:
1026            self.request_hook.handle_request_data(request, response, error=True)
1027        if response:
1028            raise BotoServerError(response.status, response.reason, body)
1029        elif ex:
1030            raise ex
1031        else:
1032            msg = 'Please report this exception as a Boto Issue!'
1033            raise BotoClientError(msg)
1034
1035    def build_base_http_request(self, method, path, auth_path,
1036                                params=None, headers=None, data='', host=None):
1037        path = self.get_path(path)
1038        if auth_path is not None:
1039            auth_path = self.get_path(auth_path)
1040        if params is None:
1041            params = {}
1042        else:
1043            params = params.copy()
1044        if headers is None:
1045            headers = {}
1046        else:
1047            headers = headers.copy()
1048        if self.host_header and not boto.utils.find_matching_headers('host', headers):
1049            headers['host'] = self.host_header
1050        host = host or self.host
1051        if self.use_proxy:
1052            if not auth_path:
1053                auth_path = path
1054            path = self.prefix_proxy_to_path(path, host)
1055            if self.proxy_user and self.proxy_pass and not self.is_secure:
1056                # If is_secure, we don't have to set the proxy authentication
1057                # header here, we did that in the CONNECT to the proxy.
1058                headers.update(self.get_proxy_auth_header())
1059        return HTTPRequest(method, self.protocol, host, self.port,
1060                           path, auth_path, params, headers, data)
1061
1062    def make_request(self, method, path, headers=None, data='', host=None,
1063                     auth_path=None, sender=None, override_num_retries=None,
1064                     params=None, retry_handler=None):
1065        """Makes a request to the server, with stock multiple-retry logic."""
1066        if params is None:
1067            params = {}
1068        http_request = self.build_base_http_request(method, path, auth_path,
1069                                                    params, headers, data, host)
1070        return self._mexe(http_request, sender, override_num_retries,
1071                          retry_handler=retry_handler)
1072
1073    def close(self):
1074        """(Optional) Close any open HTTP connections.  This is non-destructive,
1075        and making a new request will open a connection again."""
1076
1077        boto.log.debug('closing all HTTP connections')
1078        self._connection = None  # compat field
1079
1080
1081class AWSQueryConnection(AWSAuthConnection):
1082
1083    APIVersion = ''
1084    ResponseError = BotoServerError
1085
1086    def __init__(self, aws_access_key_id=None, aws_secret_access_key=None,
1087                 is_secure=True, port=None, proxy=None, proxy_port=None,
1088                 proxy_user=None, proxy_pass=None, host=None, debug=0,
1089                 https_connection_factory=None, path='/', security_token=None,
1090                 validate_certs=True, profile_name=None, provider='aws'):
1091        super(AWSQueryConnection, self).__init__(
1092            host, aws_access_key_id,
1093            aws_secret_access_key,
1094            is_secure, port, proxy,
1095            proxy_port, proxy_user, proxy_pass,
1096            debug, https_connection_factory, path,
1097            security_token=security_token,
1098            validate_certs=validate_certs,
1099            profile_name=profile_name,
1100            provider=provider)
1101
1102    def _required_auth_capability(self):
1103        return []
1104
1105    def get_utf8_value(self, value):
1106        return boto.utils.get_utf8_value(value)
1107
1108    def make_request(self, action, params=None, path='/', verb='GET'):
1109        http_request = self.build_base_http_request(verb, path, None,
1110                                                    params, {}, '',
1111                                                    self.host)
1112        if action:
1113            http_request.params['Action'] = action
1114        if self.APIVersion:
1115            http_request.params['Version'] = self.APIVersion
1116        return self._mexe(http_request)
1117
1118    def build_list_params(self, params, items, label):
1119        if isinstance(items, six.string_types):
1120            items = [items]
1121        for i in range(1, len(items) + 1):
1122            params['%s.%d' % (label, i)] = items[i - 1]
1123
1124    def build_complex_list_params(self, params, items, label, names):
1125        """Serialize a list of structures.
1126
1127        For example::
1128
1129            items = [('foo', 'bar', 'baz'), ('foo2', 'bar2', 'baz2')]
1130            label = 'ParamName.member'
1131            names = ('One', 'Two', 'Three')
1132            self.build_complex_list_params(params, items, label, names)
1133
1134        would result in the params dict being updated with these params::
1135
1136            ParamName.member.1.One = foo
1137            ParamName.member.1.Two = bar
1138            ParamName.member.1.Three = baz
1139
1140            ParamName.member.2.One = foo2
1141            ParamName.member.2.Two = bar2
1142            ParamName.member.2.Three = baz2
1143
1144        :type params: dict
1145        :param params: The params dict.  The complex list params
1146            will be added to this dict.
1147
1148        :type items: list of tuples
1149        :param items: The list to serialize.
1150
1151        :type label: string
1152        :param label: The prefix to apply to the parameter.
1153
1154        :type names: tuple of strings
1155        :param names: The names associated with each tuple element.
1156
1157        """
1158        for i, item in enumerate(items, 1):
1159            current_prefix = '%s.%s' % (label, i)
1160            for key, value in zip(names, item):
1161                full_key = '%s.%s' % (current_prefix, key)
1162                params[full_key] = value
1163
1164    # generics
1165
1166    def get_list(self, action, params, markers, path='/',
1167                 parent=None, verb='GET'):
1168        if not parent:
1169            parent = self
1170        response = self.make_request(action, params, path, verb)
1171        body = response.read()
1172        boto.log.debug(body)
1173        if not body:
1174            boto.log.error('Null body %s' % body)
1175            raise self.ResponseError(response.status, response.reason, body)
1176        elif response.status == 200:
1177            rs = ResultSet(markers)
1178            h = boto.handler.XmlHandler(rs, parent)
1179            if isinstance(body, six.text_type):
1180                body = body.encode('utf-8')
1181            xml.sax.parseString(body, h)
1182            return rs
1183        else:
1184            boto.log.error('%s %s' % (response.status, response.reason))
1185            boto.log.error('%s' % body)
1186            raise self.ResponseError(response.status, response.reason, body)
1187
1188    def get_object(self, action, params, cls, path='/',
1189                   parent=None, verb='GET'):
1190        if not parent:
1191            parent = self
1192        response = self.make_request(action, params, path, verb)
1193        body = response.read()
1194        boto.log.debug(body)
1195        if not body:
1196            boto.log.error('Null body %s' % body)
1197            raise self.ResponseError(response.status, response.reason, body)
1198        elif response.status == 200:
1199            obj = cls(parent)
1200            h = boto.handler.XmlHandler(obj, parent)
1201            if isinstance(body, six.text_type):
1202                body = body.encode('utf-8')
1203            xml.sax.parseString(body, h)
1204            return obj
1205        else:
1206            boto.log.error('%s %s' % (response.status, response.reason))
1207            boto.log.error('%s' % body)
1208            raise self.ResponseError(response.status, response.reason, body)
1209
1210    def get_status(self, action, params, path='/', parent=None, verb='GET'):
1211        if not parent:
1212            parent = self
1213        response = self.make_request(action, params, path, verb)
1214        body = response.read()
1215        boto.log.debug(body)
1216        if not body:
1217            boto.log.error('Null body %s' % body)
1218            raise self.ResponseError(response.status, response.reason, body)
1219        elif response.status == 200:
1220            rs = ResultSet()
1221            h = boto.handler.XmlHandler(rs, parent)
1222            xml.sax.parseString(body, h)
1223            return rs.status
1224        else:
1225            boto.log.error('%s %s' % (response.status, response.reason))
1226            boto.log.error('%s' % body)
1227            raise self.ResponseError(response.status, response.reason, body)
1228