1#!/usr/bin/env python
2#
3# Copyright 2015 Google Inc.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""HTTP wrapper for apitools.
18
19This library wraps the underlying http library we use, which is
20currently httplib2.
21"""
22
23import collections
24import contextlib
25import logging
26import socket
27import time
28
29import httplib2
30import six
31from six.moves import http_client
32from six.moves.urllib import parse
33
34from apitools.base.py import exceptions
35from apitools.base.py import util
36
37# pylint: disable=ungrouped-imports
38try:
39    from oauth2client.client import HttpAccessTokenRefreshError as TokenRefreshError  # noqa
40except ImportError:
41    from oauth2client.client import AccessTokenRefreshError as TokenRefreshError  # noqa
42
43__all__ = [
44    'CheckResponse',
45    'GetHttp',
46    'HandleExceptionsAndRebuildHttpConnections',
47    'MakeRequest',
48    'RebuildHttpConnections',
49    'Request',
50    'Response',
51    'RethrowExceptionHandler',
52]
53
54
55# 308 and 429 don't have names in httplib.
56RESUME_INCOMPLETE = 308
57TOO_MANY_REQUESTS = 429
58_REDIRECT_STATUS_CODES = (
59    http_client.MOVED_PERMANENTLY,
60    http_client.FOUND,
61    http_client.SEE_OTHER,
62    http_client.TEMPORARY_REDIRECT,
63    RESUME_INCOMPLETE,
64)
65
66# http: An httplib2.Http instance.
67# http_request: A http_wrapper.Request.
68# exc: Exception being raised.
69# num_retries: Number of retries consumed; used for exponential backoff.
70ExceptionRetryArgs = collections.namedtuple(
71    'ExceptionRetryArgs', ['http', 'http_request', 'exc', 'num_retries',
72                           'max_retry_wait', 'total_wait_sec'])
73
74
75@contextlib.contextmanager
76def _Httplib2Debuglevel(http_request, level, http=None):
77    """Temporarily change the value of httplib2.debuglevel, if necessary.
78
79    If http_request has a `loggable_body` distinct from `body`, then we
80    need to prevent httplib2 from logging the full body. This sets
81    httplib2.debuglevel for the duration of the `with` block; however,
82    that alone won't change the value of existing HTTP connections. If
83    an httplib2.Http object is provided, we'll also change the level on
84    any cached connections attached to it.
85
86    Args:
87      http_request: a Request we're logging.
88      level: (int) the debuglevel for logging.
89      http: (optional) an httplib2.Http whose connections we should
90        set the debuglevel on.
91
92    Yields:
93      None.
94    """
95    if http_request.loggable_body is None:
96        yield
97        return
98    old_level = httplib2.debuglevel
99    http_levels = {}
100    httplib2.debuglevel = level
101    if http is not None:
102        for connection_key, connection in http.connections.items():
103            # httplib2 stores two kinds of values in this dict, connection
104            # classes and instances. Since the connection types are all
105            # old-style classes, we can't easily distinguish by connection
106            # type -- so instead we use the key pattern.
107            if ':' not in connection_key:
108                continue
109            http_levels[connection_key] = connection.debuglevel
110            connection.set_debuglevel(level)
111    yield
112    httplib2.debuglevel = old_level
113    if http is not None:
114        for connection_key, old_level in http_levels.items():
115            if connection_key in http.connections:
116                http.connections[connection_key].set_debuglevel(old_level)
117
118
119class Request(object):
120
121    """Class encapsulating the data for an HTTP request."""
122
123    def __init__(self, url='', http_method='GET', headers=None, body=''):
124        self.url = url
125        self.http_method = http_method
126        self.headers = headers or {}
127        self.__body = None
128        self.__loggable_body = None
129        self.body = body
130
131    @property
132    def loggable_body(self):
133        return self.__loggable_body
134
135    @loggable_body.setter
136    def loggable_body(self, value):
137        if self.body is None:
138            raise exceptions.RequestError(
139                'Cannot set loggable body on request with no body')
140        self.__loggable_body = value
141
142    @property
143    def body(self):
144        return self.__body
145
146    @body.setter
147    def body(self, value):
148        """Sets the request body; handles logging and length measurement."""
149        self.__body = value
150        if value is not None:
151            # Avoid calling len() which cannot exceed 4GiB in 32-bit python.
152            body_length = getattr(
153                self.__body, 'length', None) or len(self.__body)
154            self.headers['content-length'] = str(body_length)
155        else:
156            self.headers.pop('content-length', None)
157        # This line ensures we don't try to print large requests.
158        if not isinstance(value, (type(None), six.string_types)):
159            self.loggable_body = '<media body>'
160
161
162# Note: currently the order of fields here is important, since we want
163# to be able to pass in the result from httplib2.request.
164class Response(collections.namedtuple(
165        'HttpResponse', ['info', 'content', 'request_url'])):
166
167    """Class encapsulating data for an HTTP response."""
168    __slots__ = ()
169
170    def __len__(self):
171        return self.length
172
173    @property
174    def length(self):
175        """Return the length of this response.
176
177        We expose this as an attribute since using len() directly can fail
178        for responses larger than sys.maxint.
179
180        Returns:
181          Response length (as int or long)
182        """
183        def ProcessContentRange(content_range):
184            _, _, range_spec = content_range.partition(' ')
185            byte_range, _, _ = range_spec.partition('/')
186            start, _, end = byte_range.partition('-')
187            return int(end) - int(start) + 1
188
189        if '-content-encoding' in self.info and 'content-range' in self.info:
190            # httplib2 rewrites content-length in the case of a compressed
191            # transfer; we can't trust the content-length header in that
192            # case, but we *can* trust content-range, if it's present.
193            return ProcessContentRange(self.info['content-range'])
194        elif 'content-length' in self.info:
195            return int(self.info.get('content-length'))
196        elif 'content-range' in self.info:
197            return ProcessContentRange(self.info['content-range'])
198        return len(self.content)
199
200    @property
201    def status_code(self):
202        return int(self.info['status'])
203
204    @property
205    def retry_after(self):
206        if 'retry-after' in self.info:
207            return int(self.info['retry-after'])
208
209    @property
210    def is_redirect(self):
211        return (self.status_code in _REDIRECT_STATUS_CODES and
212                'location' in self.info)
213
214
215def CheckResponse(response):
216    if response is None:
217        # Caller shouldn't call us if the response is None, but handle anyway.
218        raise exceptions.RequestError(
219            'Request to url %s did not return a response.' %
220            response.request_url)
221    elif (response.status_code >= 500 or
222          response.status_code == TOO_MANY_REQUESTS):
223        raise exceptions.BadStatusCodeError.FromResponse(response)
224    elif response.retry_after:
225        raise exceptions.RetryAfterError.FromResponse(response)
226
227
228def RebuildHttpConnections(http):
229    """Rebuilds all http connections in the httplib2.Http instance.
230
231    httplib2 overloads the map in http.connections to contain two different
232    types of values:
233    { scheme string:  connection class } and
234    { scheme + authority string : actual http connection }
235    Here we remove all of the entries for actual connections so that on the
236    next request httplib2 will rebuild them from the connection types.
237
238    Args:
239      http: An httplib2.Http instance.
240    """
241    if getattr(http, 'connections', None):
242        for conn_key in list(http.connections.keys()):
243            if ':' in conn_key:
244                del http.connections[conn_key]
245
246
247def RethrowExceptionHandler(*unused_args):
248    # pylint: disable=misplaced-bare-raise
249    raise
250
251
252def HandleExceptionsAndRebuildHttpConnections(retry_args):
253    """Exception handler for http failures.
254
255    This catches known failures and rebuilds the underlying HTTP connections.
256
257    Args:
258      retry_args: An ExceptionRetryArgs tuple.
259    """
260    # If the server indicates how long to wait, use that value.  Otherwise,
261    # calculate the wait time on our own.
262    retry_after = None
263
264    # Transport failures
265    if isinstance(retry_args.exc, (http_client.BadStatusLine,
266                                   http_client.IncompleteRead,
267                                   http_client.ResponseNotReady)):
268        logging.debug('Caught HTTP error %s, retrying: %s',
269                      type(retry_args.exc).__name__, retry_args.exc)
270    elif isinstance(retry_args.exc, socket.error):
271        logging.debug('Caught socket error, retrying: %s', retry_args.exc)
272    elif isinstance(retry_args.exc, socket.gaierror):
273        logging.debug(
274            'Caught socket address error, retrying: %s', retry_args.exc)
275    elif isinstance(retry_args.exc, socket.timeout):
276        logging.debug(
277            'Caught socket timeout error, retrying: %s', retry_args.exc)
278    elif isinstance(retry_args.exc, httplib2.ServerNotFoundError):
279        logging.debug(
280            'Caught server not found error, retrying: %s', retry_args.exc)
281    elif isinstance(retry_args.exc, ValueError):
282        # oauth2client tries to JSON-decode the response, which can result
283        # in a ValueError if the response was invalid. Until that is fixed in
284        # oauth2client, need to handle it here.
285        logging.debug('Response content was invalid (%s), retrying',
286                      retry_args.exc)
287    elif (isinstance(retry_args.exc, TokenRefreshError) and
288          hasattr(retry_args.exc, 'status') and
289          (retry_args.exc.status == TOO_MANY_REQUESTS or
290           retry_args.exc.status >= 500)):
291        logging.debug(
292            'Caught transient credential refresh error (%s), retrying',
293            retry_args.exc)
294    elif isinstance(retry_args.exc, exceptions.RequestError):
295        logging.debug('Request returned no response, retrying')
296    # API-level failures
297    elif isinstance(retry_args.exc, exceptions.BadStatusCodeError):
298        logging.debug('Response returned status %s, retrying',
299                      retry_args.exc.status_code)
300    elif isinstance(retry_args.exc, exceptions.RetryAfterError):
301        logging.debug('Response returned a retry-after header, retrying')
302        retry_after = retry_args.exc.retry_after
303    else:
304        raise retry_args.exc
305    RebuildHttpConnections(retry_args.http)
306    logging.debug('Retrying request to url %s after exception %s',
307                  retry_args.http_request.url, retry_args.exc)
308    time.sleep(
309        retry_after or util.CalculateWaitForRetry(
310            retry_args.num_retries, max_wait=retry_args.max_retry_wait))
311
312
313def MakeRequest(http, http_request, retries=7, max_retry_wait=60,
314                redirections=5,
315                retry_func=HandleExceptionsAndRebuildHttpConnections,
316                check_response_func=CheckResponse):
317    """Send http_request via the given http, performing error/retry handling.
318
319    Args:
320      http: An httplib2.Http instance, or a http multiplexer that delegates to
321          an underlying http, for example, HTTPMultiplexer.
322      http_request: A Request to send.
323      retries: (int, default 7) Number of retries to attempt on retryable
324          replies (such as 429 or 5XX).
325      max_retry_wait: (int, default 60) Maximum number of seconds to wait
326          when retrying.
327      redirections: (int, default 5) Number of redirects to follow.
328      retry_func: Function to handle retries on exceptions. Argument is an
329          ExceptionRetryArgs tuple.
330      check_response_func: Function to validate the HTTP response.
331          Arguments are (Response, response content, url).
332
333    Raises:
334      InvalidDataFromServerError: if there is no response after retries.
335
336    Returns:
337      A Response object.
338
339    """
340    retry = 0
341    first_req_time = time.time()
342    # Provide compatibility for breaking change in httplib2 0.16.0+:
343    # https://github.com/googleapis/google-api-python-client/issues/803
344    if hasattr(http, 'redirect_codes'):
345        http.redirect_codes = set(http.redirect_codes) - {308}
346    while True:
347        try:
348            return _MakeRequestNoRetry(
349                http, http_request, redirections=redirections,
350                check_response_func=check_response_func)
351        # retry_func will consume the exception types it handles and raise.
352        # pylint: disable=broad-except
353        except Exception as e:
354            retry += 1
355            if retry >= retries:
356                raise
357            else:
358                total_wait_sec = time.time() - first_req_time
359                retry_func(ExceptionRetryArgs(http, http_request, e, retry,
360                                              max_retry_wait, total_wait_sec))
361
362
363def _MakeRequestNoRetry(http, http_request, redirections=5,
364                        check_response_func=CheckResponse):
365    """Send http_request via the given http.
366
367    This wrapper exists to handle translation between the plain httplib2
368    request/response types and the Request and Response types above.
369
370    Args:
371      http: An httplib2.Http instance, or a http multiplexer that delegates to
372          an underlying http, for example, HTTPMultiplexer.
373      http_request: A Request to send.
374      redirections: (int, default 5) Number of redirects to follow.
375      check_response_func: Function to validate the HTTP response.
376          Arguments are (Response, response content, url).
377
378    Returns:
379      A Response object.
380
381    Raises:
382      RequestError if no response could be parsed.
383
384    """
385    connection_type = None
386    # Handle overrides for connection types.  This is used if the caller
387    # wants control over the underlying connection for managing callbacks
388    # or hash digestion.
389    if getattr(http, 'connections', None):
390        url_scheme = parse.urlsplit(http_request.url).scheme
391        if url_scheme and url_scheme in http.connections:
392            connection_type = http.connections[url_scheme]
393
394    # Custom printing only at debuglevel 4
395    new_debuglevel = 4 if httplib2.debuglevel == 4 else 0
396    with _Httplib2Debuglevel(http_request, new_debuglevel, http=http):
397        info, content = http.request(
398            str(http_request.url), method=str(http_request.http_method),
399            body=http_request.body, headers=http_request.headers,
400            redirections=redirections, connection_type=connection_type)
401
402    if info is None:
403        raise exceptions.RequestError()
404
405    response = Response(info, content, http_request.url)
406    check_response_func(response)
407    return response
408
409
410_HTTP_FACTORIES = []
411
412
413def _RegisterHttpFactory(factory):
414    _HTTP_FACTORIES.append(factory)
415
416
417def GetHttp(**kwds):
418    for factory in _HTTP_FACTORIES:
419        http = factory(**kwds)
420        if http is not None:
421            return http
422    return httplib2.Http(**kwds)
423