1# Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/ 2# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. 3# Copyright (c) 2010 Google 4# Copyright (c) 2008 rPath, Inc. 5# Copyright (c) 2009 The Echo Nest Corporation 6# Copyright (c) 2010, Eucalyptus Systems, Inc. 7# Copyright (c) 2011, Nexenta Systems Inc. 8# All rights reserved. 9# 10# Permission is hereby granted, free of charge, to any person obtaining a 11# copy of this software and associated documentation files (the 12# "Software"), to deal in the Software without restriction, including 13# without limitation the rights to use, copy, modify, merge, publish, dis- 14# tribute, sublicense, and/or sell copies of the Software, and to permit 15# persons to whom the Software is furnished to do so, subject to the fol- 16# lowing conditions: 17# 18# The above copyright notice and this permission notice shall be included 19# in all copies or substantial portions of the Software. 20# 21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 22# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- 23# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 24# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 27# IN THE SOFTWARE. 28 29# 30# Parts of this code were copied or derived from sample code supplied by AWS. 31# The following notice applies to that code. 32# 33# This software code is made available "AS IS" without warranties of any 34# kind. You may copy, display, modify and redistribute the software 35# code either by itself or as incorporated into your code; provided that 36# you do not remove any proprietary notices. Your use of this software 37# code is at your own risk and you waive any claim against Amazon 38# Digital Services, Inc. or its affiliates with respect to your use of 39# this software code. (c) 2006 Amazon Digital Services, Inc. or its 40# affiliates. 41 42""" 43Handles basic connections to AWS 44""" 45from datetime import datetime 46import errno 47import os 48import random 49import re 50import socket 51import sys 52import time 53import xml.sax 54import copy 55 56from boto import auth 57from boto import auth_handler 58import boto 59import boto.utils 60import boto.handler 61import boto.cacerts 62 63from boto import config, UserAgent 64from boto.compat import six, http_client, urlparse, quote, encodebytes 65from boto.exception import AWSConnectionError 66from boto.exception import BotoClientError 67from boto.exception import BotoServerError 68from boto.exception import PleaseRetryException 69from boto.provider import Provider 70from boto.resultset import ResultSet 71 72HAVE_HTTPS_CONNECTION = False 73try: 74 import ssl 75 from boto import https_connection 76 # Google App Engine runs on Python 2.5 so doesn't have ssl.SSLError. 77 if hasattr(ssl, 'SSLError'): 78 HAVE_HTTPS_CONNECTION = True 79except ImportError: 80 pass 81 82try: 83 import threading 84except ImportError: 85 import dummy_threading as threading 86 87ON_APP_ENGINE = all(key in os.environ for key in ( 88 'USER_IS_ADMIN', 'CURRENT_VERSION_ID', 'APPLICATION_ID')) 89 90PORTS_BY_SECURITY = {True: 443, 91 False: 80} 92 93DEFAULT_CA_CERTS_FILE = os.path.join(os.path.dirname(os.path.abspath(boto.cacerts.__file__)), "cacerts.txt") 94 95 96class HostConnectionPool(object): 97 98 """ 99 A pool of connections for one remote (host,port,is_secure). 100 101 When connections are added to the pool, they are put into a 102 pending queue. The _mexe method returns connections to the pool 103 before the response body has been read, so they connections aren't 104 ready to send another request yet. They stay in the pending queue 105 until they are ready for another request, at which point they are 106 returned to the pool of ready connections. 107 108 The pool of ready connections is an ordered list of 109 (connection,time) pairs, where the time is the time the connection 110 was returned from _mexe. After a certain period of time, 111 connections are considered stale, and discarded rather than being 112 reused. This saves having to wait for the connection to time out 113 if AWS has decided to close it on the other end because of 114 inactivity. 115 116 Thread Safety: 117 118 This class is used only from ConnectionPool while it's mutex 119 is held. 120 """ 121 122 def __init__(self): 123 self.queue = [] 124 125 def size(self): 126 """ 127 Returns the number of connections in the pool for this host. 128 Some of the connections may still be in use, and may not be 129 ready to be returned by get(). 130 """ 131 return len(self.queue) 132 133 def put(self, conn): 134 """ 135 Adds a connection to the pool, along with the time it was 136 added. 137 """ 138 self.queue.append((conn, time.time())) 139 140 def get(self): 141 """ 142 Returns the next connection in this pool that is ready to be 143 reused. Returns None if there aren't any. 144 """ 145 # Discard ready connections that are too old. 146 self.clean() 147 148 # Return the first connection that is ready, and remove it 149 # from the queue. Connections that aren't ready are returned 150 # to the end of the queue with an updated time, on the 151 # assumption that somebody is actively reading the response. 152 for _ in range(len(self.queue)): 153 (conn, _) = self.queue.pop(0) 154 if self._conn_ready(conn): 155 return conn 156 else: 157 self.put(conn) 158 return None 159 160 def _conn_ready(self, conn): 161 """ 162 There is a nice state diagram at the top of http_client.py. It 163 indicates that once the response headers have been read (which 164 _mexe does before adding the connection to the pool), a 165 response is attached to the connection, and it stays there 166 until it's done reading. This isn't entirely true: even after 167 the client is done reading, the response may be closed, but 168 not removed from the connection yet. 169 170 This is ugly, reading a private instance variable, but the 171 state we care about isn't available in any public methods. 172 """ 173 if ON_APP_ENGINE: 174 # Google AppEngine implementation of HTTPConnection doesn't contain 175 # _HTTPConnection__response attribute. Moreover, it's not possible 176 # to determine if given connection is ready. Reusing connections 177 # simply doesn't make sense with App Engine urlfetch service. 178 return False 179 else: 180 response = getattr(conn, '_HTTPConnection__response', None) 181 return (response is None) or response.isclosed() 182 183 def clean(self): 184 """ 185 Get rid of stale connections. 186 """ 187 # Note that we do not close the connection here -- somebody 188 # may still be reading from it. 189 while len(self.queue) > 0 and self._pair_stale(self.queue[0]): 190 self.queue.pop(0) 191 192 def _pair_stale(self, pair): 193 """ 194 Returns true of the (connection,time) pair is too old to be 195 used. 196 """ 197 (_conn, return_time) = pair 198 now = time.time() 199 return return_time + ConnectionPool.STALE_DURATION < now 200 201 202class ConnectionPool(object): 203 204 """ 205 A connection pool that expires connections after a fixed period of 206 time. This saves time spent waiting for a connection that AWS has 207 timed out on the other end. 208 209 This class is thread-safe. 210 """ 211 212 # 213 # The amout of time between calls to clean. 214 # 215 216 CLEAN_INTERVAL = 5.0 217 218 # 219 # How long before a connection becomes "stale" and won't be reused 220 # again. The intention is that this time is less that the timeout 221 # period that AWS uses, so we'll never try to reuse a connection 222 # and find that AWS is timing it out. 223 # 224 # Experimentation in July 2011 shows that AWS starts timing things 225 # out after three minutes. The 60 seconds here is conservative so 226 # we should never hit that 3-minute timout. 227 # 228 229 STALE_DURATION = 60.0 230 231 def __init__(self): 232 # Mapping from (host,port,is_secure) to HostConnectionPool. 233 # If a pool becomes empty, it is removed. 234 self.host_to_pool = {} 235 # The last time the pool was cleaned. 236 self.last_clean_time = 0.0 237 self.mutex = threading.Lock() 238 ConnectionPool.STALE_DURATION = \ 239 config.getfloat('Boto', 'connection_stale_duration', 240 ConnectionPool.STALE_DURATION) 241 242 def __getstate__(self): 243 pickled_dict = copy.copy(self.__dict__) 244 pickled_dict['host_to_pool'] = {} 245 del pickled_dict['mutex'] 246 return pickled_dict 247 248 def __setstate__(self, dct): 249 self.__init__() 250 251 def size(self): 252 """ 253 Returns the number of connections in the pool. 254 """ 255 return sum(pool.size() for pool in self.host_to_pool.values()) 256 257 def get_http_connection(self, host, port, is_secure): 258 """ 259 Gets a connection from the pool for the named host. Returns 260 None if there is no connection that can be reused. It's the caller's 261 responsibility to call close() on the connection when it's no longer 262 needed. 263 """ 264 self.clean() 265 with self.mutex: 266 key = (host, port, is_secure) 267 if key not in self.host_to_pool: 268 return None 269 return self.host_to_pool[key].get() 270 271 def put_http_connection(self, host, port, is_secure, conn): 272 """ 273 Adds a connection to the pool of connections that can be 274 reused for the named host. 275 """ 276 with self.mutex: 277 key = (host, port, is_secure) 278 if key not in self.host_to_pool: 279 self.host_to_pool[key] = HostConnectionPool() 280 self.host_to_pool[key].put(conn) 281 282 def clean(self): 283 """ 284 Clean up the stale connections in all of the pools, and then 285 get rid of empty pools. Pools clean themselves every time a 286 connection is fetched; this cleaning takes care of pools that 287 aren't being used any more, so nothing is being gotten from 288 them. 289 """ 290 with self.mutex: 291 now = time.time() 292 if self.last_clean_time + self.CLEAN_INTERVAL < now: 293 to_remove = [] 294 for (host, pool) in self.host_to_pool.items(): 295 pool.clean() 296 if pool.size() == 0: 297 to_remove.append(host) 298 for host in to_remove: 299 del self.host_to_pool[host] 300 self.last_clean_time = now 301 302 303class HTTPRequest(object): 304 305 def __init__(self, method, protocol, host, port, path, auth_path, 306 params, headers, body): 307 """Represents an HTTP request. 308 309 :type method: string 310 :param method: The HTTP method name, 'GET', 'POST', 'PUT' etc. 311 312 :type protocol: string 313 :param protocol: The http protocol used, 'http' or 'https'. 314 315 :type host: string 316 :param host: Host to which the request is addressed. eg. abc.com 317 318 :type port: int 319 :param port: port on which the request is being sent. Zero means unset, 320 in which case default port will be chosen. 321 322 :type path: string 323 :param path: URL path that is being accessed. 324 325 :type auth_path: string 326 :param path: The part of the URL path used when creating the 327 authentication string. 328 329 :type params: dict 330 :param params: HTTP url query parameters, with key as name of 331 the param, and value as value of param. 332 333 :type headers: dict 334 :param headers: HTTP headers, with key as name of the header and value 335 as value of header. 336 337 :type body: string 338 :param body: Body of the HTTP request. If not present, will be None or 339 empty string (''). 340 """ 341 self.method = method 342 self.protocol = protocol 343 self.host = host 344 self.port = port 345 self.path = path 346 if auth_path is None: 347 auth_path = path 348 self.auth_path = auth_path 349 self.params = params 350 # chunked Transfer-Encoding should act only on PUT request. 351 if headers and 'Transfer-Encoding' in headers and \ 352 headers['Transfer-Encoding'] == 'chunked' and \ 353 self.method != 'PUT': 354 self.headers = headers.copy() 355 del self.headers['Transfer-Encoding'] 356 else: 357 self.headers = headers 358 self.body = body 359 360 def __str__(self): 361 return (('method:(%s) protocol:(%s) host(%s) port(%s) path(%s) ' 362 'params(%s) headers(%s) body(%s)') % (self.method, 363 self.protocol, self.host, self.port, self.path, self.params, 364 self.headers, self.body)) 365 366 def authorize(self, connection, **kwargs): 367 if not getattr(self, '_headers_quoted', False): 368 for key in self.headers: 369 val = self.headers[key] 370 if isinstance(val, six.text_type): 371 safe = '!"#$%&\'()*+,/:;<=>?@[\\]^`{|}~' 372 self.headers[key] = quote(val.encode('utf-8'), safe) 373 setattr(self, '_headers_quoted', True) 374 375 self.headers['User-Agent'] = UserAgent 376 377 connection._auth_handler.add_auth(self, **kwargs) 378 379 # I'm not sure if this is still needed, now that add_auth is 380 # setting the content-length for POST requests. 381 if 'Content-Length' not in self.headers: 382 if 'Transfer-Encoding' not in self.headers or \ 383 self.headers['Transfer-Encoding'] != 'chunked': 384 self.headers['Content-Length'] = str(len(self.body)) 385 386 387class HTTPResponse(http_client.HTTPResponse): 388 389 def __init__(self, *args, **kwargs): 390 http_client.HTTPResponse.__init__(self, *args, **kwargs) 391 self._cached_response = '' 392 393 def read(self, amt=None): 394 """Read the response. 395 396 This method does not have the same behavior as 397 http_client.HTTPResponse.read. Instead, if this method is called with 398 no ``amt`` arg, then the response body will be cached. Subsequent 399 calls to ``read()`` with no args **will return the cached response**. 400 401 """ 402 if amt is None: 403 # The reason for doing this is that many places in boto call 404 # response.read() and except to get the response body that they 405 # can then process. To make sure this always works as they expect 406 # we're caching the response so that multiple calls to read() 407 # will return the full body. Note that this behavior only 408 # happens if the amt arg is not specified. 409 if not self._cached_response: 410 self._cached_response = http_client.HTTPResponse.read(self) 411 return self._cached_response 412 else: 413 return http_client.HTTPResponse.read(self, amt) 414 415 416class AWSAuthConnection(object): 417 def __init__(self, host, aws_access_key_id=None, 418 aws_secret_access_key=None, 419 is_secure=True, port=None, proxy=None, proxy_port=None, 420 proxy_user=None, proxy_pass=None, debug=0, 421 https_connection_factory=None, path='/', 422 provider='aws', security_token=None, 423 suppress_consec_slashes=True, 424 validate_certs=True, profile_name=None): 425 """ 426 :type host: str 427 :param host: The host to make the connection to 428 429 :keyword str aws_access_key_id: Your AWS Access Key ID (provided by 430 Amazon). If none is specified, the value in your 431 ``AWS_ACCESS_KEY_ID`` environmental variable is used. 432 :keyword str aws_secret_access_key: Your AWS Secret Access Key 433 (provided by Amazon). If none is specified, the value in your 434 ``AWS_SECRET_ACCESS_KEY`` environmental variable is used. 435 :keyword str security_token: The security token associated with 436 temporary credentials issued by STS. Optional unless using 437 temporary credentials. If none is specified, the environment 438 variable ``AWS_SECURITY_TOKEN`` is used if defined. 439 440 :type is_secure: boolean 441 :param is_secure: Whether the connection is over SSL 442 443 :type https_connection_factory: list or tuple 444 :param https_connection_factory: A pair of an HTTP connection 445 factory and the exceptions to catch. The factory should have 446 a similar interface to L{http_client.HTTPSConnection}. 447 448 :param str proxy: Address/hostname for a proxy server 449 450 :type proxy_port: int 451 :param proxy_port: The port to use when connecting over a proxy 452 453 :type proxy_user: str 454 :param proxy_user: The username to connect with on the proxy 455 456 :type proxy_pass: str 457 :param proxy_pass: The password to use when connection over a proxy. 458 459 :type port: int 460 :param port: The port to use to connect 461 462 :type suppress_consec_slashes: bool 463 :param suppress_consec_slashes: If provided, controls whether 464 consecutive slashes will be suppressed in key paths. 465 466 :type validate_certs: bool 467 :param validate_certs: Controls whether SSL certificates 468 will be validated or not. Defaults to True. 469 470 :type profile_name: str 471 :param profile_name: Override usual Credentials section in config 472 file to use a named set of keys instead. 473 """ 474 self.suppress_consec_slashes = suppress_consec_slashes 475 self.num_retries = 6 476 # Override passed-in is_secure setting if value was defined in config. 477 if config.has_option('Boto', 'is_secure'): 478 is_secure = config.getboolean('Boto', 'is_secure') 479 self.is_secure = is_secure 480 # Whether or not to validate server certificates. 481 # The default is now to validate certificates. This can be 482 # overridden in the boto config file are by passing an 483 # explicit validate_certs parameter to the class constructor. 484 self.https_validate_certificates = config.getbool( 485 'Boto', 'https_validate_certificates', 486 validate_certs) 487 if self.https_validate_certificates and not HAVE_HTTPS_CONNECTION: 488 raise BotoClientError( 489 "SSL server certificate validation is enabled in boto " 490 "configuration, but Python dependencies required to " 491 "support this feature are not available. Certificate " 492 "validation is only supported when running under Python " 493 "2.6 or later.") 494 certs_file = config.get_value( 495 'Boto', 'ca_certificates_file', DEFAULT_CA_CERTS_FILE) 496 if certs_file == 'system': 497 certs_file = None 498 self.ca_certificates_file = certs_file 499 if port: 500 self.port = port 501 else: 502 self.port = PORTS_BY_SECURITY[is_secure] 503 504 self.handle_proxy(proxy, proxy_port, proxy_user, proxy_pass) 505 # define exceptions from http_client that we want to catch and retry 506 self.http_exceptions = (http_client.HTTPException, socket.error, 507 socket.gaierror, http_client.BadStatusLine) 508 # define subclasses of the above that are not retryable. 509 self.http_unretryable_exceptions = [] 510 if HAVE_HTTPS_CONNECTION: 511 self.http_unretryable_exceptions.append( 512 https_connection.InvalidCertificateException) 513 514 # define values in socket exceptions we don't want to catch 515 self.socket_exception_values = (errno.EINTR,) 516 if https_connection_factory is not None: 517 self.https_connection_factory = https_connection_factory[0] 518 self.http_exceptions += https_connection_factory[1] 519 else: 520 self.https_connection_factory = None 521 if (is_secure): 522 self.protocol = 'https' 523 else: 524 self.protocol = 'http' 525 self.host = host 526 self.path = path 527 # if the value passed in for debug 528 if not isinstance(debug, six.integer_types): 529 debug = 0 530 self.debug = config.getint('Boto', 'debug', debug) 531 self.host_header = None 532 533 # Timeout used to tell http_client how long to wait for socket timeouts. 534 # Default is to leave timeout unchanged, which will in turn result in 535 # the socket's default global timeout being used. To specify a 536 # timeout, set http_socket_timeout in Boto config. Regardless, 537 # timeouts will only be applied if Python is 2.6 or greater. 538 self.http_connection_kwargs = {} 539 if (sys.version_info[0], sys.version_info[1]) >= (2, 6): 540 # If timeout isn't defined in boto config file, use 70 second 541 # default as recommended by 542 # http://docs.aws.amazon.com/amazonswf/latest/apireference/API_PollForActivityTask.html 543 self.http_connection_kwargs['timeout'] = config.getint( 544 'Boto', 'http_socket_timeout', 70) 545 546 if isinstance(provider, Provider): 547 # Allow overriding Provider 548 self.provider = provider 549 else: 550 self._provider_type = provider 551 self.provider = Provider(self._provider_type, 552 aws_access_key_id, 553 aws_secret_access_key, 554 security_token, 555 profile_name) 556 557 # Allow config file to override default host, port, and host header. 558 if self.provider.host: 559 self.host = self.provider.host 560 if self.provider.port: 561 self.port = self.provider.port 562 if self.provider.host_header: 563 self.host_header = self.provider.host_header 564 565 self._pool = ConnectionPool() 566 self._connection = (self.host, self.port, self.is_secure) 567 self._last_rs = None 568 self._auth_handler = auth.get_auth_handler( 569 host, config, self.provider, self._required_auth_capability()) 570 if getattr(self, 'AuthServiceName', None) is not None: 571 self.auth_service_name = self.AuthServiceName 572 self.request_hook = None 573 574 def __repr__(self): 575 return '%s:%s' % (self.__class__.__name__, self.host) 576 577 def _required_auth_capability(self): 578 return [] 579 580 def _get_auth_service_name(self): 581 return getattr(self._auth_handler, 'service_name') 582 583 # For Sigv4, the auth_service_name/auth_region_name properties allow 584 # the service_name/region_name to be explicitly set instead of being 585 # derived from the endpoint url. 586 def _set_auth_service_name(self, value): 587 self._auth_handler.service_name = value 588 auth_service_name = property(_get_auth_service_name, _set_auth_service_name) 589 590 def _get_auth_region_name(self): 591 return getattr(self._auth_handler, 'region_name') 592 593 def _set_auth_region_name(self, value): 594 self._auth_handler.region_name = value 595 auth_region_name = property(_get_auth_region_name, _set_auth_region_name) 596 597 def connection(self): 598 return self.get_http_connection(*self._connection) 599 connection = property(connection) 600 601 def aws_access_key_id(self): 602 return self.provider.access_key 603 aws_access_key_id = property(aws_access_key_id) 604 gs_access_key_id = aws_access_key_id 605 access_key = aws_access_key_id 606 607 def aws_secret_access_key(self): 608 return self.provider.secret_key 609 aws_secret_access_key = property(aws_secret_access_key) 610 gs_secret_access_key = aws_secret_access_key 611 secret_key = aws_secret_access_key 612 613 def profile_name(self): 614 return self.provider.profile_name 615 profile_name = property(profile_name) 616 617 def get_path(self, path='/'): 618 # The default behavior is to suppress consecutive slashes for reasons 619 # discussed at 620 # https://groups.google.com/forum/#!topic/boto-dev/-ft0XPUy0y8 621 # You can override that behavior with the suppress_consec_slashes param. 622 if not self.suppress_consec_slashes: 623 return self.path + re.sub('^(/*)/', "\\1", path) 624 pos = path.find('?') 625 if pos >= 0: 626 params = path[pos:] 627 path = path[:pos] 628 else: 629 params = None 630 if path[-1] == '/': 631 need_trailing = True 632 else: 633 need_trailing = False 634 path_elements = self.path.split('/') 635 path_elements.extend(path.split('/')) 636 path_elements = [p for p in path_elements if p] 637 path = '/' + '/'.join(path_elements) 638 if path[-1] != '/' and need_trailing: 639 path += '/' 640 if params: 641 path = path + params 642 return path 643 644 def server_name(self, port=None): 645 if not port: 646 port = self.port 647 if port == 80: 648 signature_host = self.host 649 else: 650 # This unfortunate little hack can be attributed to 651 # a difference in the 2.6 version of http_client. In old 652 # versions, it would append ":443" to the hostname sent 653 # in the Host header and so we needed to make sure we 654 # did the same when calculating the V2 signature. In 2.6 655 # (and higher!) 656 # it no longer does that. Hence, this kludge. 657 if ((ON_APP_ENGINE and sys.version[:3] == '2.5') or 658 sys.version[:3] in ('2.6', '2.7')) and port == 443: 659 signature_host = self.host 660 else: 661 signature_host = '%s:%d' % (self.host, port) 662 return signature_host 663 664 def handle_proxy(self, proxy, proxy_port, proxy_user, proxy_pass): 665 self.proxy = proxy 666 self.proxy_port = proxy_port 667 self.proxy_user = proxy_user 668 self.proxy_pass = proxy_pass 669 if 'http_proxy' in os.environ and not self.proxy: 670 pattern = re.compile( 671 '(?:http://)?' 672 '(?:(?P<user>[\w\-\.]+):(?P<pass>.*)@)?' 673 '(?P<host>[\w\-\.]+)' 674 '(?::(?P<port>\d+))?' 675 ) 676 match = pattern.match(os.environ['http_proxy']) 677 if match: 678 self.proxy = match.group('host') 679 self.proxy_port = match.group('port') 680 self.proxy_user = match.group('user') 681 self.proxy_pass = match.group('pass') 682 else: 683 if not self.proxy: 684 self.proxy = config.get_value('Boto', 'proxy', None) 685 if not self.proxy_port: 686 self.proxy_port = config.get_value('Boto', 'proxy_port', None) 687 if not self.proxy_user: 688 self.proxy_user = config.get_value('Boto', 'proxy_user', None) 689 if not self.proxy_pass: 690 self.proxy_pass = config.get_value('Boto', 'proxy_pass', None) 691 692 if not self.proxy_port and self.proxy: 693 print("http_proxy environment variable does not specify " 694 "a port, using default") 695 self.proxy_port = self.port 696 697 self.no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') 698 self.use_proxy = (self.proxy is not None) 699 700 def get_http_connection(self, host, port, is_secure): 701 conn = self._pool.get_http_connection(host, port, is_secure) 702 if conn is not None: 703 return conn 704 else: 705 return self.new_http_connection(host, port, is_secure) 706 707 def skip_proxy(self, host): 708 if not self.no_proxy: 709 return False 710 711 if self.no_proxy == "*": 712 return True 713 714 hostonly = host 715 hostonly = host.split(':')[0] 716 717 for name in self.no_proxy.split(','): 718 if name and (hostonly.endswith(name) or host.endswith(name)): 719 return True 720 721 return False 722 723 def new_http_connection(self, host, port, is_secure): 724 if host is None: 725 host = self.server_name() 726 727 # Make sure the host is really just the host, not including 728 # the port number 729 host = host.split(':', 1)[0] 730 731 http_connection_kwargs = self.http_connection_kwargs.copy() 732 733 # Connection factories below expect a port keyword argument 734 http_connection_kwargs['port'] = port 735 736 # Override host with proxy settings if needed 737 if self.use_proxy and not is_secure and \ 738 not self.skip_proxy(host): 739 host = self.proxy 740 http_connection_kwargs['port'] = int(self.proxy_port) 741 742 if is_secure: 743 boto.log.debug( 744 'establishing HTTPS connection: host=%s, kwargs=%s', 745 host, http_connection_kwargs) 746 if self.use_proxy and not self.skip_proxy(host): 747 connection = self.proxy_ssl(host, is_secure and 443 or 80) 748 elif self.https_connection_factory: 749 connection = self.https_connection_factory(host) 750 elif self.https_validate_certificates and HAVE_HTTPS_CONNECTION: 751 connection = https_connection.CertValidatingHTTPSConnection( 752 host, ca_certs=self.ca_certificates_file, 753 **http_connection_kwargs) 754 else: 755 connection = http_client.HTTPSConnection( 756 host, **http_connection_kwargs) 757 else: 758 boto.log.debug('establishing HTTP connection: kwargs=%s' % 759 http_connection_kwargs) 760 if self.https_connection_factory: 761 # even though the factory says https, this is too handy 762 # to not be able to allow overriding for http also. 763 connection = self.https_connection_factory( 764 host, **http_connection_kwargs) 765 else: 766 connection = http_client.HTTPConnection( 767 host, **http_connection_kwargs) 768 if self.debug > 1: 769 connection.set_debuglevel(self.debug) 770 # self.connection must be maintained for backwards-compatibility 771 # however, it must be dynamically pulled from the connection pool 772 # set a private variable which will enable that 773 if host.split(':')[0] == self.host and is_secure == self.is_secure: 774 self._connection = (host, port, is_secure) 775 # Set the response class of the http connection to use our custom 776 # class. 777 connection.response_class = HTTPResponse 778 return connection 779 780 def put_http_connection(self, host, port, is_secure, connection): 781 self._pool.put_http_connection(host, port, is_secure, connection) 782 783 def proxy_ssl(self, host=None, port=None): 784 if host and port: 785 host = '%s:%d' % (host, port) 786 else: 787 host = '%s:%d' % (self.host, self.port) 788 # Seems properly to use timeout for connect too 789 timeout = self.http_connection_kwargs.get("timeout") 790 if timeout is not None: 791 sock = socket.create_connection((self.proxy, 792 int(self.proxy_port)), timeout) 793 else: 794 sock = socket.create_connection((self.proxy, int(self.proxy_port))) 795 boto.log.debug("Proxy connection: CONNECT %s HTTP/1.0\r\n", host) 796 sock.sendall("CONNECT %s HTTP/1.0\r\n" % host) 797 sock.sendall("User-Agent: %s\r\n" % UserAgent) 798 if self.proxy_user and self.proxy_pass: 799 for k, v in self.get_proxy_auth_header().items(): 800 sock.sendall("%s: %s\r\n" % (k, v)) 801 # See discussion about this config option at 802 # https://groups.google.com/forum/?fromgroups#!topic/boto-dev/teenFvOq2Cc 803 if config.getbool('Boto', 'send_crlf_after_proxy_auth_headers', False): 804 sock.sendall("\r\n") 805 else: 806 sock.sendall("\r\n") 807 resp = http_client.HTTPResponse(sock, strict=True, debuglevel=self.debug) 808 resp.begin() 809 810 if resp.status != 200: 811 # Fake a socket error, use a code that make it obvious it hasn't 812 # been generated by the socket library 813 raise socket.error(-71, 814 "Error talking to HTTP proxy %s:%s: %s (%s)" % 815 (self.proxy, self.proxy_port, 816 resp.status, resp.reason)) 817 818 # We can safely close the response, it duped the original socket 819 resp.close() 820 821 h = http_client.HTTPConnection(host) 822 823 if self.https_validate_certificates and HAVE_HTTPS_CONNECTION: 824 msg = "wrapping ssl socket for proxied connection; " 825 if self.ca_certificates_file: 826 msg += "CA certificate file=%s" % self.ca_certificates_file 827 else: 828 msg += "using system provided SSL certs" 829 boto.log.debug(msg) 830 key_file = self.http_connection_kwargs.get('key_file', None) 831 cert_file = self.http_connection_kwargs.get('cert_file', None) 832 sslSock = ssl.wrap_socket(sock, keyfile=key_file, 833 certfile=cert_file, 834 cert_reqs=ssl.CERT_REQUIRED, 835 ca_certs=self.ca_certificates_file) 836 cert = sslSock.getpeercert() 837 hostname = self.host.split(':', 0)[0] 838 if not https_connection.ValidateCertificateHostname(cert, hostname): 839 raise https_connection.InvalidCertificateException( 840 hostname, cert, 'hostname mismatch') 841 else: 842 # Fallback for old Python without ssl.wrap_socket 843 if hasattr(http_client, 'ssl'): 844 sslSock = http_client.ssl.SSLSocket(sock) 845 else: 846 sslSock = socket.ssl(sock, None, None) 847 sslSock = http_client.FakeSocket(sock, sslSock) 848 849 # This is a bit unclean 850 h.sock = sslSock 851 return h 852 853 def prefix_proxy_to_path(self, path, host=None): 854 path = self.protocol + '://' + (host or self.server_name()) + path 855 return path 856 857 def get_proxy_auth_header(self): 858 auth = encodebytes(self.proxy_user + ':' + self.proxy_pass) 859 return {'Proxy-Authorization': 'Basic %s' % auth} 860 861 # For passing proxy information to other connection libraries, e.g. cloudsearch2 862 def get_proxy_url_with_auth(self): 863 if not self.use_proxy: 864 return None 865 866 if self.proxy_user or self.proxy_pass: 867 if self.proxy_pass: 868 login_info = '%s:%s@' % (self.proxy_user, self.proxy_pass) 869 else: 870 login_info = '%s@' % self.proxy_user 871 else: 872 login_info = '' 873 874 return 'http://%s%s:%s' % (login_info, self.proxy, str(self.proxy_port or self.port)) 875 876 def set_host_header(self, request): 877 try: 878 request.headers['Host'] = \ 879 self._auth_handler.host_header(self.host, request) 880 except AttributeError: 881 request.headers['Host'] = self.host.split(':', 1)[0] 882 883 def set_request_hook(self, hook): 884 self.request_hook = hook 885 886 def _mexe(self, request, sender=None, override_num_retries=None, 887 retry_handler=None): 888 """ 889 mexe - Multi-execute inside a loop, retrying multiple times to handle 890 transient Internet errors by simply trying again. 891 Also handles redirects. 892 893 This code was inspired by the S3Utils classes posted to the boto-users 894 Google group by Larry Bates. Thanks! 895 896 """ 897 boto.log.debug('Method: %s' % request.method) 898 boto.log.debug('Path: %s' % request.path) 899 boto.log.debug('Data: %s' % request.body) 900 boto.log.debug('Headers: %s' % request.headers) 901 boto.log.debug('Host: %s' % request.host) 902 boto.log.debug('Port: %s' % request.port) 903 boto.log.debug('Params: %s' % request.params) 904 response = None 905 body = None 906 ex = None 907 if override_num_retries is None: 908 num_retries = config.getint('Boto', 'num_retries', self.num_retries) 909 else: 910 num_retries = override_num_retries 911 i = 0 912 connection = self.get_http_connection(request.host, request.port, 913 self.is_secure) 914 915 # Convert body to bytes if needed 916 if not isinstance(request.body, bytes) and hasattr(request.body, 917 'encode'): 918 request.body = request.body.encode('utf-8') 919 920 while i <= num_retries: 921 # Use binary exponential backoff to desynchronize client requests. 922 next_sleep = min(random.random() * (2 ** i), 923 boto.config.get('Boto', 'max_retry_delay', 60)) 924 try: 925 # we now re-sign each request before it is retried 926 boto.log.debug('Token: %s' % self.provider.security_token) 927 request.authorize(connection=self) 928 # Only force header for non-s3 connections, because s3 uses 929 # an older signing method + bucket resource URLs that include 930 # the port info. All others should be now be up to date and 931 # not include the port. 932 if 's3' not in self._required_auth_capability(): 933 if not getattr(self, 'anon', False): 934 if not request.headers.get('Host'): 935 self.set_host_header(request) 936 boto.log.debug('Final headers: %s' % request.headers) 937 request.start_time = datetime.now() 938 if callable(sender): 939 response = sender(connection, request.method, request.path, 940 request.body, request.headers) 941 else: 942 connection.request(request.method, request.path, 943 request.body, request.headers) 944 response = connection.getresponse() 945 boto.log.debug('Response headers: %s' % response.getheaders()) 946 location = response.getheader('location') 947 # -- gross hack -- 948 # http_client gets confused with chunked responses to HEAD requests 949 # so I have to fake it out 950 if request.method == 'HEAD' and getattr(response, 951 'chunked', False): 952 response.chunked = 0 953 if callable(retry_handler): 954 status = retry_handler(response, i, next_sleep) 955 if status: 956 msg, i, next_sleep = status 957 if msg: 958 boto.log.debug(msg) 959 time.sleep(next_sleep) 960 continue 961 if response.status in [500, 502, 503, 504]: 962 msg = 'Received %d response. ' % response.status 963 msg += 'Retrying in %3.1f seconds' % next_sleep 964 boto.log.debug(msg) 965 body = response.read() 966 if isinstance(body, bytes): 967 body = body.decode('utf-8') 968 elif response.status < 300 or response.status >= 400 or \ 969 not location: 970 # don't return connection to the pool if response contains 971 # Connection:close header, because the connection has been 972 # closed and default reconnect behavior may do something 973 # different than new_http_connection. Also, it's probably 974 # less efficient to try to reuse a closed connection. 975 conn_header_value = response.getheader('connection') 976 if conn_header_value == 'close': 977 connection.close() 978 else: 979 self.put_http_connection(request.host, request.port, 980 self.is_secure, connection) 981 if self.request_hook is not None: 982 self.request_hook.handle_request_data(request, response) 983 return response 984 else: 985 scheme, request.host, request.path, \ 986 params, query, fragment = urlparse(location) 987 if query: 988 request.path += '?' + query 989 # urlparse can return both host and port in netloc, so if 990 # that's the case we need to split them up properly 991 if ':' in request.host: 992 request.host, request.port = request.host.split(':', 1) 993 msg = 'Redirecting: %s' % scheme + '://' 994 msg += request.host + request.path 995 boto.log.debug(msg) 996 connection = self.get_http_connection(request.host, 997 request.port, 998 scheme == 'https') 999 response = None 1000 continue 1001 except PleaseRetryException as e: 1002 boto.log.debug('encountered a retry exception: %s' % e) 1003 connection = self.new_http_connection(request.host, request.port, 1004 self.is_secure) 1005 response = e.response 1006 ex = e 1007 except self.http_exceptions as e: 1008 for unretryable in self.http_unretryable_exceptions: 1009 if isinstance(e, unretryable): 1010 boto.log.debug( 1011 'encountered unretryable %s exception, re-raising' % 1012 e.__class__.__name__) 1013 raise 1014 boto.log.debug('encountered %s exception, reconnecting' % 1015 e.__class__.__name__) 1016 connection = self.new_http_connection(request.host, request.port, 1017 self.is_secure) 1018 ex = e 1019 time.sleep(next_sleep) 1020 i += 1 1021 # If we made it here, it's because we have exhausted our retries 1022 # and stil haven't succeeded. So, if we have a response object, 1023 # use it to raise an exception. 1024 # Otherwise, raise the exception that must have already happened. 1025 if self.request_hook is not None: 1026 self.request_hook.handle_request_data(request, response, error=True) 1027 if response: 1028 raise BotoServerError(response.status, response.reason, body) 1029 elif ex: 1030 raise ex 1031 else: 1032 msg = 'Please report this exception as a Boto Issue!' 1033 raise BotoClientError(msg) 1034 1035 def build_base_http_request(self, method, path, auth_path, 1036 params=None, headers=None, data='', host=None): 1037 path = self.get_path(path) 1038 if auth_path is not None: 1039 auth_path = self.get_path(auth_path) 1040 if params is None: 1041 params = {} 1042 else: 1043 params = params.copy() 1044 if headers is None: 1045 headers = {} 1046 else: 1047 headers = headers.copy() 1048 if self.host_header and not boto.utils.find_matching_headers('host', headers): 1049 headers['host'] = self.host_header 1050 host = host or self.host 1051 if self.use_proxy: 1052 if not auth_path: 1053 auth_path = path 1054 path = self.prefix_proxy_to_path(path, host) 1055 if self.proxy_user and self.proxy_pass and not self.is_secure: 1056 # If is_secure, we don't have to set the proxy authentication 1057 # header here, we did that in the CONNECT to the proxy. 1058 headers.update(self.get_proxy_auth_header()) 1059 return HTTPRequest(method, self.protocol, host, self.port, 1060 path, auth_path, params, headers, data) 1061 1062 def make_request(self, method, path, headers=None, data='', host=None, 1063 auth_path=None, sender=None, override_num_retries=None, 1064 params=None, retry_handler=None): 1065 """Makes a request to the server, with stock multiple-retry logic.""" 1066 if params is None: 1067 params = {} 1068 http_request = self.build_base_http_request(method, path, auth_path, 1069 params, headers, data, host) 1070 return self._mexe(http_request, sender, override_num_retries, 1071 retry_handler=retry_handler) 1072 1073 def close(self): 1074 """(Optional) Close any open HTTP connections. This is non-destructive, 1075 and making a new request will open a connection again.""" 1076 1077 boto.log.debug('closing all HTTP connections') 1078 self._connection = None # compat field 1079 1080 1081class AWSQueryConnection(AWSAuthConnection): 1082 1083 APIVersion = '' 1084 ResponseError = BotoServerError 1085 1086 def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, 1087 is_secure=True, port=None, proxy=None, proxy_port=None, 1088 proxy_user=None, proxy_pass=None, host=None, debug=0, 1089 https_connection_factory=None, path='/', security_token=None, 1090 validate_certs=True, profile_name=None, provider='aws'): 1091 super(AWSQueryConnection, self).__init__( 1092 host, aws_access_key_id, 1093 aws_secret_access_key, 1094 is_secure, port, proxy, 1095 proxy_port, proxy_user, proxy_pass, 1096 debug, https_connection_factory, path, 1097 security_token=security_token, 1098 validate_certs=validate_certs, 1099 profile_name=profile_name, 1100 provider=provider) 1101 1102 def _required_auth_capability(self): 1103 return [] 1104 1105 def get_utf8_value(self, value): 1106 return boto.utils.get_utf8_value(value) 1107 1108 def make_request(self, action, params=None, path='/', verb='GET'): 1109 http_request = self.build_base_http_request(verb, path, None, 1110 params, {}, '', 1111 self.host) 1112 if action: 1113 http_request.params['Action'] = action 1114 if self.APIVersion: 1115 http_request.params['Version'] = self.APIVersion 1116 return self._mexe(http_request) 1117 1118 def build_list_params(self, params, items, label): 1119 if isinstance(items, six.string_types): 1120 items = [items] 1121 for i in range(1, len(items) + 1): 1122 params['%s.%d' % (label, i)] = items[i - 1] 1123 1124 def build_complex_list_params(self, params, items, label, names): 1125 """Serialize a list of structures. 1126 1127 For example:: 1128 1129 items = [('foo', 'bar', 'baz'), ('foo2', 'bar2', 'baz2')] 1130 label = 'ParamName.member' 1131 names = ('One', 'Two', 'Three') 1132 self.build_complex_list_params(params, items, label, names) 1133 1134 would result in the params dict being updated with these params:: 1135 1136 ParamName.member.1.One = foo 1137 ParamName.member.1.Two = bar 1138 ParamName.member.1.Three = baz 1139 1140 ParamName.member.2.One = foo2 1141 ParamName.member.2.Two = bar2 1142 ParamName.member.2.Three = baz2 1143 1144 :type params: dict 1145 :param params: The params dict. The complex list params 1146 will be added to this dict. 1147 1148 :type items: list of tuples 1149 :param items: The list to serialize. 1150 1151 :type label: string 1152 :param label: The prefix to apply to the parameter. 1153 1154 :type names: tuple of strings 1155 :param names: The names associated with each tuple element. 1156 1157 """ 1158 for i, item in enumerate(items, 1): 1159 current_prefix = '%s.%s' % (label, i) 1160 for key, value in zip(names, item): 1161 full_key = '%s.%s' % (current_prefix, key) 1162 params[full_key] = value 1163 1164 # generics 1165 1166 def get_list(self, action, params, markers, path='/', 1167 parent=None, verb='GET'): 1168 if not parent: 1169 parent = self 1170 response = self.make_request(action, params, path, verb) 1171 body = response.read() 1172 boto.log.debug(body) 1173 if not body: 1174 boto.log.error('Null body %s' % body) 1175 raise self.ResponseError(response.status, response.reason, body) 1176 elif response.status == 200: 1177 rs = ResultSet(markers) 1178 h = boto.handler.XmlHandler(rs, parent) 1179 if isinstance(body, six.text_type): 1180 body = body.encode('utf-8') 1181 xml.sax.parseString(body, h) 1182 return rs 1183 else: 1184 boto.log.error('%s %s' % (response.status, response.reason)) 1185 boto.log.error('%s' % body) 1186 raise self.ResponseError(response.status, response.reason, body) 1187 1188 def get_object(self, action, params, cls, path='/', 1189 parent=None, verb='GET'): 1190 if not parent: 1191 parent = self 1192 response = self.make_request(action, params, path, verb) 1193 body = response.read() 1194 boto.log.debug(body) 1195 if not body: 1196 boto.log.error('Null body %s' % body) 1197 raise self.ResponseError(response.status, response.reason, body) 1198 elif response.status == 200: 1199 obj = cls(parent) 1200 h = boto.handler.XmlHandler(obj, parent) 1201 if isinstance(body, six.text_type): 1202 body = body.encode('utf-8') 1203 xml.sax.parseString(body, h) 1204 return obj 1205 else: 1206 boto.log.error('%s %s' % (response.status, response.reason)) 1207 boto.log.error('%s' % body) 1208 raise self.ResponseError(response.status, response.reason, body) 1209 1210 def get_status(self, action, params, path='/', parent=None, verb='GET'): 1211 if not parent: 1212 parent = self 1213 response = self.make_request(action, params, path, verb) 1214 body = response.read() 1215 boto.log.debug(body) 1216 if not body: 1217 boto.log.error('Null body %s' % body) 1218 raise self.ResponseError(response.status, response.reason, body) 1219 elif response.status == 200: 1220 rs = ResultSet() 1221 h = boto.handler.XmlHandler(rs, parent) 1222 xml.sax.parseString(body, h) 1223 return rs.status 1224 else: 1225 boto.log.error('%s %s' % (response.status, response.reason)) 1226 boto.log.error('%s' % body) 1227 raise self.ResponseError(response.status, response.reason, body) 1228