1"""HTTP/1.1 client library
2
3<intro stuff goes here>
4<other stuff, too>
5
6HTTPConnection goes through a number of "states", which define when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
9
10    (null)
11      |
12      | HTTPConnection()
13      v
14    Idle
15      |
16      | putrequest()
17      v
18    Request-started
19      |
20      | ( putheader() )*  endheaders()
21      v
22    Request-sent
23      |
24      | response = getresponse()
25      v
26    Unread-response   [Response-headers-read]
27      |\____________________
28      |                     |
29      | response.read()     | putrequest()
30      v                     v
31    Idle                  Req-started-unread-response
32                     ______/|
33                   /        |
34   response.read() |        | ( putheader() )*  endheaders()
35                   v        v
36       Request-started    Req-sent-unread-response
37                            |
38                            | response.read()
39                            v
40                          Request-sent
41
42This diagram presents the following rules:
43  -- a second request may not be started until {response-headers-read}
44  -- a response [object] cannot be retrieved until {request-sent}
45  -- there is no differentiation between an unread response body and a
46     partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49      HTTPResponse class does not enforce this state machine, which
50      implies sophisticated clients may accelerate the request/response
51      pipeline. Caution should be taken, though: accelerating the states
52      beyond the above pattern may imply knowledge of the server's
53      connection-close behavior for certain requests. For example, it
54      is impossible to tell whether the server will close the connection
55      UNTIL the response headers have been read; this means that further
56      requests cannot be placed into the pipeline until it is known that
57      the server will NOT be closing the connection.
58
59Logical State                  __state            __response
60-------------                  -------            ----------
61Idle                           _CS_IDLE           None
62Request-started                _CS_REQ_STARTED    None
63Request-sent                   _CS_REQ_SENT       None
64Unread-response                _CS_IDLE           <response_class>
65Req-started-unread-response    _CS_REQ_STARTED    <response_class>
66Req-sent-unread-response       _CS_REQ_SENT       <response_class>
67"""
68
69from array import array
70import os
71import socket
72from sys import py3kwarning
73from urlparse import urlsplit
74import warnings
75with warnings.catch_warnings():
76    if py3kwarning:
77        warnings.filterwarnings("ignore", ".*mimetools has been removed",
78                                DeprecationWarning)
79    import mimetools
80
81try:
82    from cStringIO import StringIO
83except ImportError:
84    from StringIO import StringIO
85
86__all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
87           "HTTPException", "NotConnected", "UnknownProtocol",
88           "UnknownTransferEncoding", "UnimplementedFileMode",
89           "IncompleteRead", "InvalidURL", "ImproperConnectionState",
90           "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
91           "BadStatusLine", "error", "responses"]
92
93HTTP_PORT = 80
94HTTPS_PORT = 443
95
96_UNKNOWN = 'UNKNOWN'
97
98# connection states
99_CS_IDLE = 'Idle'
100_CS_REQ_STARTED = 'Request-started'
101_CS_REQ_SENT = 'Request-sent'
102
103# status codes
104# informational
105CONTINUE = 100
106SWITCHING_PROTOCOLS = 101
107PROCESSING = 102
108
109# successful
110OK = 200
111CREATED = 201
112ACCEPTED = 202
113NON_AUTHORITATIVE_INFORMATION = 203
114NO_CONTENT = 204
115RESET_CONTENT = 205
116PARTIAL_CONTENT = 206
117MULTI_STATUS = 207
118IM_USED = 226
119
120# redirection
121MULTIPLE_CHOICES = 300
122MOVED_PERMANENTLY = 301
123FOUND = 302
124SEE_OTHER = 303
125NOT_MODIFIED = 304
126USE_PROXY = 305
127TEMPORARY_REDIRECT = 307
128
129# client error
130BAD_REQUEST = 400
131UNAUTHORIZED = 401
132PAYMENT_REQUIRED = 402
133FORBIDDEN = 403
134NOT_FOUND = 404
135METHOD_NOT_ALLOWED = 405
136NOT_ACCEPTABLE = 406
137PROXY_AUTHENTICATION_REQUIRED = 407
138REQUEST_TIMEOUT = 408
139CONFLICT = 409
140GONE = 410
141LENGTH_REQUIRED = 411
142PRECONDITION_FAILED = 412
143REQUEST_ENTITY_TOO_LARGE = 413
144REQUEST_URI_TOO_LONG = 414
145UNSUPPORTED_MEDIA_TYPE = 415
146REQUESTED_RANGE_NOT_SATISFIABLE = 416
147EXPECTATION_FAILED = 417
148UNPROCESSABLE_ENTITY = 422
149LOCKED = 423
150FAILED_DEPENDENCY = 424
151UPGRADE_REQUIRED = 426
152
153# server error
154INTERNAL_SERVER_ERROR = 500
155NOT_IMPLEMENTED = 501
156BAD_GATEWAY = 502
157SERVICE_UNAVAILABLE = 503
158GATEWAY_TIMEOUT = 504
159HTTP_VERSION_NOT_SUPPORTED = 505
160INSUFFICIENT_STORAGE = 507
161NOT_EXTENDED = 510
162
163# Mapping status codes to official W3C names
164responses = {
165    100: 'Continue',
166    101: 'Switching Protocols',
167
168    200: 'OK',
169    201: 'Created',
170    202: 'Accepted',
171    203: 'Non-Authoritative Information',
172    204: 'No Content',
173    205: 'Reset Content',
174    206: 'Partial Content',
175
176    300: 'Multiple Choices',
177    301: 'Moved Permanently',
178    302: 'Found',
179    303: 'See Other',
180    304: 'Not Modified',
181    305: 'Use Proxy',
182    306: '(Unused)',
183    307: 'Temporary Redirect',
184
185    400: 'Bad Request',
186    401: 'Unauthorized',
187    402: 'Payment Required',
188    403: 'Forbidden',
189    404: 'Not Found',
190    405: 'Method Not Allowed',
191    406: 'Not Acceptable',
192    407: 'Proxy Authentication Required',
193    408: 'Request Timeout',
194    409: 'Conflict',
195    410: 'Gone',
196    411: 'Length Required',
197    412: 'Precondition Failed',
198    413: 'Request Entity Too Large',
199    414: 'Request-URI Too Long',
200    415: 'Unsupported Media Type',
201    416: 'Requested Range Not Satisfiable',
202    417: 'Expectation Failed',
203
204    500: 'Internal Server Error',
205    501: 'Not Implemented',
206    502: 'Bad Gateway',
207    503: 'Service Unavailable',
208    504: 'Gateway Timeout',
209    505: 'HTTP Version Not Supported',
210}
211
212# maximal amount of data to read at one time in _safe_read
213MAXAMOUNT = 1048576
214
215# maximal line length when calling readline().
216_MAXLINE = 65536
217
218class HTTPMessage(mimetools.Message):
219
220    def addheader(self, key, value):
221        """Add header for field key handling repeats."""
222        prev = self.dict.get(key)
223        if prev is None:
224            self.dict[key] = value
225        else:
226            combined = ", ".join((prev, value))
227            self.dict[key] = combined
228
229    def addcontinue(self, key, more):
230        """Add more field data from a continuation line."""
231        prev = self.dict[key]
232        self.dict[key] = prev + "\n " + more
233
234    def readheaders(self):
235        """Read header lines.
236
237        Read header lines up to the entirely blank line that terminates them.
238        The (normally blank) line that ends the headers is skipped, but not
239        included in the returned list.  If a non-header line ends the headers,
240        (which is an error), an attempt is made to backspace over it; it is
241        never included in the returned list.
242
243        The variable self.status is set to the empty string if all went well,
244        otherwise it is an error message.  The variable self.headers is a
245        completely uninterpreted list of lines contained in the header (so
246        printing them will reproduce the header exactly as it appears in the
247        file).
248
249        If multiple header fields with the same name occur, they are combined
250        according to the rules in RFC 2616 sec 4.2:
251
252        Appending each subsequent field-value to the first, each separated
253        by a comma. The order in which header fields with the same field-name
254        are received is significant to the interpretation of the combined
255        field value.
256        """
257        # XXX The implementation overrides the readheaders() method of
258        # rfc822.Message.  The base class design isn't amenable to
259        # customized behavior here so the method here is a copy of the
260        # base class code with a few small changes.
261
262        self.dict = {}
263        self.unixfrom = ''
264        self.headers = hlist = []
265        self.status = ''
266        headerseen = ""
267        firstline = 1
268        startofline = unread = tell = None
269        if hasattr(self.fp, 'unread'):
270            unread = self.fp.unread
271        elif self.seekable:
272            tell = self.fp.tell
273        while True:
274            if tell:
275                try:
276                    startofline = tell()
277                except IOError:
278                    startofline = tell = None
279                    self.seekable = 0
280            line = self.fp.readline(_MAXLINE + 1)
281            if len(line) > _MAXLINE:
282                raise LineTooLong("header line")
283            if not line:
284                self.status = 'EOF in headers'
285                break
286            # Skip unix From name time lines
287            if firstline and line.startswith('From '):
288                self.unixfrom = self.unixfrom + line
289                continue
290            firstline = 0
291            if headerseen and line[0] in ' \t':
292                # XXX Not sure if continuation lines are handled properly
293                # for http and/or for repeating headers
294                # It's a continuation line.
295                hlist.append(line)
296                self.addcontinue(headerseen, line.strip())
297                continue
298            elif self.iscomment(line):
299                # It's a comment.  Ignore it.
300                continue
301            elif self.islast(line):
302                # Note! No pushback here!  The delimiter line gets eaten.
303                break
304            headerseen = self.isheader(line)
305            if headerseen:
306                # It's a legal header line, save it.
307                hlist.append(line)
308                self.addheader(headerseen, line[len(headerseen)+1:].strip())
309                continue
310            else:
311                # It's not a header line; throw it back and stop here.
312                if not self.dict:
313                    self.status = 'No headers'
314                else:
315                    self.status = 'Non-header line where header expected'
316                # Try to undo the read.
317                if unread:
318                    unread(line)
319                elif tell:
320                    self.fp.seek(startofline)
321                else:
322                    self.status = self.status + '; bad seek'
323                break
324
325class HTTPResponse:
326
327    # strict: If true, raise BadStatusLine if the status line can't be
328    # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
329    # false because it prevents clients from talking to HTTP/0.9
330    # servers.  Note that a response with a sufficiently corrupted
331    # status line will look like an HTTP/0.9 response.
332
333    # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
334
335    def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
336        if buffering:
337            # The caller won't be using any sock.recv() calls, so buffering
338            # is fine and recommended for performance.
339            self.fp = sock.makefile('rb')
340        else:
341            # The buffer size is specified as zero, because the headers of
342            # the response are read with readline().  If the reads were
343            # buffered the readline() calls could consume some of the
344            # response, which make be read via a recv() on the underlying
345            # socket.
346            self.fp = sock.makefile('rb', 0)
347        self.debuglevel = debuglevel
348        self.strict = strict
349        self._method = method
350
351        self.msg = None
352
353        # from the Status-Line of the response
354        self.version = _UNKNOWN # HTTP-Version
355        self.status = _UNKNOWN  # Status-Code
356        self.reason = _UNKNOWN  # Reason-Phrase
357
358        self.chunked = _UNKNOWN         # is "chunked" being used?
359        self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
360        self.length = _UNKNOWN          # number of bytes left in response
361        self.will_close = _UNKNOWN      # conn will close at end of response
362
363    def _read_status(self):
364        # Initialize with Simple-Response defaults
365        line = self.fp.readline()
366        if self.debuglevel > 0:
367            print "reply:", repr(line)
368        if not line:
369            # Presumably, the server closed the connection before
370            # sending a valid response.
371            raise BadStatusLine(line)
372        try:
373            [version, status, reason] = line.split(None, 2)
374        except ValueError:
375            try:
376                [version, status] = line.split(None, 1)
377                reason = ""
378            except ValueError:
379                # empty version will cause next test to fail and status
380                # will be treated as 0.9 response.
381                version = ""
382        if not version.startswith('HTTP/'):
383            if self.strict:
384                self.close()
385                raise BadStatusLine(line)
386            else:
387                # assume it's a Simple-Response from an 0.9 server
388                self.fp = LineAndFileWrapper(line, self.fp)
389                return "HTTP/0.9", 200, ""
390
391        # The status code is a three-digit number
392        try:
393            status = int(status)
394            if status < 100 or status > 999:
395                raise BadStatusLine(line)
396        except ValueError:
397            raise BadStatusLine(line)
398        return version, status, reason
399
400    def begin(self):
401        if self.msg is not None:
402            # we've already started reading the response
403            return
404
405        # read until we get a non-100 response
406        while True:
407            version, status, reason = self._read_status()
408            if status != CONTINUE:
409                break
410            # skip the header from the 100 response
411            while True:
412                skip = self.fp.readline(_MAXLINE + 1)
413                if len(skip) > _MAXLINE:
414                    raise LineTooLong("header line")
415                skip = skip.strip()
416                if not skip:
417                    break
418                if self.debuglevel > 0:
419                    print "header:", skip
420
421        self.status = status
422        self.reason = reason.strip()
423        if version == 'HTTP/1.0':
424            self.version = 10
425        elif version.startswith('HTTP/1.'):
426            self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
427        elif version == 'HTTP/0.9':
428            self.version = 9
429        else:
430            raise UnknownProtocol(version)
431
432        if self.version == 9:
433            self.length = None
434            self.chunked = 0
435            self.will_close = 1
436            self.msg = HTTPMessage(StringIO())
437            return
438
439        self.msg = HTTPMessage(self.fp, 0)
440        if self.debuglevel > 0:
441            for hdr in self.msg.headers:
442                print "header:", hdr,
443
444        # don't let the msg keep an fp
445        self.msg.fp = None
446
447        # are we using the chunked-style of transfer encoding?
448        tr_enc = self.msg.getheader('transfer-encoding')
449        if tr_enc and tr_enc.lower() == "chunked":
450            self.chunked = 1
451            self.chunk_left = None
452        else:
453            self.chunked = 0
454
455        # will the connection close at the end of the response?
456        self.will_close = self._check_close()
457
458        # do we have a Content-Length?
459        # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
460        length = self.msg.getheader('content-length')
461        if length and not self.chunked:
462            try:
463                self.length = int(length)
464            except ValueError:
465                self.length = None
466            else:
467                if self.length < 0:  # ignore nonsensical negative lengths
468                    self.length = None
469        else:
470            self.length = None
471
472        # does the body have a fixed length? (of zero)
473        if (status == NO_CONTENT or status == NOT_MODIFIED or
474            100 <= status < 200 or      # 1xx codes
475            self._method == 'HEAD'):
476            self.length = 0
477
478        # if the connection remains open, and we aren't using chunked, and
479        # a content-length was not provided, then assume that the connection
480        # WILL close.
481        if not self.will_close and \
482           not self.chunked and \
483           self.length is None:
484            self.will_close = 1
485
486    def _check_close(self):
487        conn = self.msg.getheader('connection')
488        if self.version == 11:
489            # An HTTP/1.1 proxy is assumed to stay open unless
490            # explicitly closed.
491            conn = self.msg.getheader('connection')
492            if conn and "close" in conn.lower():
493                return True
494            return False
495
496        # Some HTTP/1.0 implementations have support for persistent
497        # connections, using rules different than HTTP/1.1.
498
499        # For older HTTP, Keep-Alive indicates persistent connection.
500        if self.msg.getheader('keep-alive'):
501            return False
502
503        # At least Akamai returns a "Connection: Keep-Alive" header,
504        # which was supposed to be sent by the client.
505        if conn and "keep-alive" in conn.lower():
506            return False
507
508        # Proxy-Connection is a netscape hack.
509        pconn = self.msg.getheader('proxy-connection')
510        if pconn and "keep-alive" in pconn.lower():
511            return False
512
513        # otherwise, assume it will close
514        return True
515
516    def close(self):
517        if self.fp:
518            self.fp.close()
519            self.fp = None
520
521    def isclosed(self):
522        # NOTE: it is possible that we will not ever call self.close(). This
523        #       case occurs when will_close is TRUE, length is None, and we
524        #       read up to the last byte, but NOT past it.
525        #
526        # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
527        #          called, meaning self.isclosed() is meaningful.
528        return self.fp is None
529
530    # XXX It would be nice to have readline and __iter__ for this, too.
531
532    def read(self, amt=None):
533        if self.fp is None:
534            return ''
535
536        if self._method == 'HEAD':
537            self.close()
538            return ''
539
540        if self.chunked:
541            return self._read_chunked(amt)
542
543        if amt is None:
544            # unbounded read
545            if self.length is None:
546                s = self.fp.read()
547            else:
548                s = self._safe_read(self.length)
549                self.length = 0
550            self.close()        # we read everything
551            return s
552
553        if self.length is not None:
554            if amt > self.length:
555                # clip the read to the "end of response"
556                amt = self.length
557
558        # we do not use _safe_read() here because this may be a .will_close
559        # connection, and the user is reading more bytes than will be provided
560        # (for example, reading in 1k chunks)
561        s = self.fp.read(amt)
562        if self.length is not None:
563            self.length -= len(s)
564            if not self.length:
565                self.close()
566        return s
567
568    def _read_chunked(self, amt):
569        assert self.chunked != _UNKNOWN
570        chunk_left = self.chunk_left
571        value = []
572        while True:
573            if chunk_left is None:
574                line = self.fp.readline(_MAXLINE + 1)
575                if len(line) > _MAXLINE:
576                    raise LineTooLong("chunk size")
577                i = line.find(';')
578                if i >= 0:
579                    line = line[:i] # strip chunk-extensions
580                try:
581                    chunk_left = int(line, 16)
582                except ValueError:
583                    # close the connection as protocol synchronisation is
584                    # probably lost
585                    self.close()
586                    raise IncompleteRead(''.join(value))
587                if chunk_left == 0:
588                    break
589            if amt is None:
590                value.append(self._safe_read(chunk_left))
591            elif amt < chunk_left:
592                value.append(self._safe_read(amt))
593                self.chunk_left = chunk_left - amt
594                return ''.join(value)
595            elif amt == chunk_left:
596                value.append(self._safe_read(amt))
597                self._safe_read(2)  # toss the CRLF at the end of the chunk
598                self.chunk_left = None
599                return ''.join(value)
600            else:
601                value.append(self._safe_read(chunk_left))
602                amt -= chunk_left
603
604            # we read the whole chunk, get another
605            self._safe_read(2)      # toss the CRLF at the end of the chunk
606            chunk_left = None
607
608        # read and discard trailer up to the CRLF terminator
609        ### note: we shouldn't have any trailers!
610        while True:
611            line = self.fp.readline(_MAXLINE + 1)
612            if len(line) > _MAXLINE:
613                raise LineTooLong("trailer line")
614            if not line:
615                # a vanishingly small number of sites EOF without
616                # sending the trailer
617                break
618            if line == '\r\n':
619                break
620
621        # we read everything; close the "file"
622        self.close()
623
624        return ''.join(value)
625
626    def _safe_read(self, amt):
627        """Read the number of bytes requested, compensating for partial reads.
628
629        Normally, we have a blocking socket, but a read() can be interrupted
630        by a signal (resulting in a partial read).
631
632        Note that we cannot distinguish between EOF and an interrupt when zero
633        bytes have been read. IncompleteRead() will be raised in this
634        situation.
635
636        This function should be used when <amt> bytes "should" be present for
637        reading. If the bytes are truly not available (due to EOF), then the
638        IncompleteRead exception can be used to detect the problem.
639        """
640        # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
641        # return less than x bytes unless EOF is encountered.  It now handles
642        # signal interruptions (socket.error EINTR) internally.  This code
643        # never caught that exception anyways.  It seems largely pointless.
644        # self.fp.read(amt) will work fine.
645        s = []
646        while amt > 0:
647            chunk = self.fp.read(min(amt, MAXAMOUNT))
648            if not chunk:
649                raise IncompleteRead(''.join(s), amt)
650            s.append(chunk)
651            amt -= len(chunk)
652        return ''.join(s)
653
654    def fileno(self):
655        return self.fp.fileno()
656
657    def getheader(self, name, default=None):
658        if self.msg is None:
659            raise ResponseNotReady()
660        return self.msg.getheader(name, default)
661
662    def getheaders(self):
663        """Return list of (header, value) tuples."""
664        if self.msg is None:
665            raise ResponseNotReady()
666        return self.msg.items()
667
668
669class HTTPConnection:
670
671    _http_vsn = 11
672    _http_vsn_str = 'HTTP/1.1'
673
674    response_class = HTTPResponse
675    default_port = HTTP_PORT
676    auto_open = 1
677    debuglevel = 0
678    strict = 0
679
680    def __init__(self, host, port=None, strict=None,
681                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
682        self.timeout = timeout
683        self.source_address = source_address
684        self.sock = None
685        self._buffer = []
686        self.__response = None
687        self.__state = _CS_IDLE
688        self._method = None
689        self._tunnel_host = None
690        self._tunnel_port = None
691        self._tunnel_headers = {}
692
693        self._set_hostport(host, port)
694        if strict is not None:
695            self.strict = strict
696
697    def set_tunnel(self, host, port=None, headers=None):
698        """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
699
700        The headers argument should be a mapping of extra HTTP headers
701        to send with the CONNECT request.
702        """
703        self._tunnel_host = host
704        self._tunnel_port = port
705        if headers:
706            self._tunnel_headers = headers
707        else:
708            self._tunnel_headers.clear()
709
710    def _set_hostport(self, host, port):
711        if port is None:
712            i = host.rfind(':')
713            j = host.rfind(']')         # ipv6 addresses have [...]
714            if i > j:
715                try:
716                    port = int(host[i+1:])
717                except ValueError:
718                    raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
719                host = host[:i]
720            else:
721                port = self.default_port
722            if host and host[0] == '[' and host[-1] == ']':
723                host = host[1:-1]
724        self.host = host
725        self.port = port
726
727    def set_debuglevel(self, level):
728        self.debuglevel = level
729
730    def _tunnel(self):
731        self._set_hostport(self._tunnel_host, self._tunnel_port)
732        self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
733        for header, value in self._tunnel_headers.iteritems():
734            self.send("%s: %s\r\n" % (header, value))
735        self.send("\r\n")
736        response = self.response_class(self.sock, strict = self.strict,
737                                       method = self._method)
738        (version, code, message) = response._read_status()
739
740        if code != 200:
741            self.close()
742            raise socket.error("Tunnel connection failed: %d %s" % (code,
743                                                                    message.strip()))
744        while True:
745            line = response.fp.readline(_MAXLINE + 1)
746            if len(line) > _MAXLINE:
747                raise LineTooLong("header line")
748            if line == '\r\n': break
749
750
751    def connect(self):
752        """Connect to the host and port specified in __init__."""
753        self.sock = socket.create_connection((self.host,self.port),
754                                             self.timeout, self.source_address)
755
756        if self._tunnel_host:
757            self._tunnel()
758
759    def close(self):
760        """Close the connection to the HTTP server."""
761        if self.sock:
762            self.sock.close()   # close it manually... there may be other refs
763            self.sock = None
764        if self.__response:
765            self.__response.close()
766            self.__response = None
767        self.__state = _CS_IDLE
768
769    def send(self, data):
770        """Send `data' to the server."""
771        if self.sock is None:
772            if self.auto_open:
773                self.connect()
774            else:
775                raise NotConnected()
776
777        if self.debuglevel > 0:
778            print "send:", repr(data)
779        blocksize = 8192
780        if hasattr(data,'read') and not isinstance(data, array):
781            if self.debuglevel > 0: print "sendIng a read()able"
782            datablock = data.read(blocksize)
783            while datablock:
784                self.sock.sendall(datablock)
785                datablock = data.read(blocksize)
786        else:
787            self.sock.sendall(data)
788
789    def _output(self, s):
790        """Add a line of output to the current request buffer.
791
792        Assumes that the line does *not* end with \\r\\n.
793        """
794        self._buffer.append(s)
795
796    def _send_output(self, message_body=None):
797        """Send the currently buffered request and clear the buffer.
798
799        Appends an extra \\r\\n to the buffer.
800        A message_body may be specified, to be appended to the request.
801        """
802        self._buffer.extend(("", ""))
803        msg = "\r\n".join(self._buffer)
804        del self._buffer[:]
805        # If msg and message_body are sent in a single send() call,
806        # it will avoid performance problems caused by the interaction
807        # between delayed ack and the Nagle algorithm.
808        if isinstance(message_body, str):
809            msg += message_body
810            message_body = None
811        self.send(msg)
812        if message_body is not None:
813            #message_body was not a string (i.e. it is a file) and
814            #we must run the risk of Nagle
815            self.send(message_body)
816
817    def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
818        """Send a request to the server.
819
820        `method' specifies an HTTP request method, e.g. 'GET'.
821        `url' specifies the object being requested, e.g. '/index.html'.
822        `skip_host' if True does not add automatically a 'Host:' header
823        `skip_accept_encoding' if True does not add automatically an
824           'Accept-Encoding:' header
825        """
826
827        # if a prior response has been completed, then forget about it.
828        if self.__response and self.__response.isclosed():
829            self.__response = None
830
831
832        # in certain cases, we cannot issue another request on this connection.
833        # this occurs when:
834        #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
835        #   2) a response to a previous request has signalled that it is going
836        #      to close the connection upon completion.
837        #   3) the headers for the previous response have not been read, thus
838        #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
839        #
840        # if there is no prior response, then we can request at will.
841        #
842        # if point (2) is true, then we will have passed the socket to the
843        # response (effectively meaning, "there is no prior response"), and
844        # will open a new one when a new request is made.
845        #
846        # Note: if a prior response exists, then we *can* start a new request.
847        #       We are not allowed to begin fetching the response to this new
848        #       request, however, until that prior response is complete.
849        #
850        if self.__state == _CS_IDLE:
851            self.__state = _CS_REQ_STARTED
852        else:
853            raise CannotSendRequest()
854
855        # Save the method we use, we need it later in the response phase
856        self._method = method
857        if not url:
858            url = '/'
859        hdr = '%s %s %s' % (method, url, self._http_vsn_str)
860
861        self._output(hdr)
862
863        if self._http_vsn == 11:
864            # Issue some standard headers for better HTTP/1.1 compliance
865
866            if not skip_host:
867                # this header is issued *only* for HTTP/1.1
868                # connections. more specifically, this means it is
869                # only issued when the client uses the new
870                # HTTPConnection() class. backwards-compat clients
871                # will be using HTTP/1.0 and those clients may be
872                # issuing this header themselves. we should NOT issue
873                # it twice; some web servers (such as Apache) barf
874                # when they see two Host: headers
875
876                # If we need a non-standard port,include it in the
877                # header.  If the request is going through a proxy,
878                # but the host of the actual URL, not the host of the
879                # proxy.
880
881                netloc = ''
882                if url.startswith('http'):
883                    nil, netloc, nil, nil, nil = urlsplit(url)
884
885                if netloc:
886                    try:
887                        netloc_enc = netloc.encode("ascii")
888                    except UnicodeEncodeError:
889                        netloc_enc = netloc.encode("idna")
890                    self.putheader('Host', netloc_enc)
891                else:
892                    try:
893                        host_enc = self.host.encode("ascii")
894                    except UnicodeEncodeError:
895                        host_enc = self.host.encode("idna")
896                    # Wrap the IPv6 Host Header with [] (RFC 2732)
897                    if host_enc.find(':') >= 0:
898                        host_enc = "[" + host_enc + "]"
899                    if self.port == self.default_port:
900                        self.putheader('Host', host_enc)
901                    else:
902                        self.putheader('Host', "%s:%s" % (host_enc, self.port))
903
904            # note: we are assuming that clients will not attempt to set these
905            #       headers since *this* library must deal with the
906            #       consequences. this also means that when the supporting
907            #       libraries are updated to recognize other forms, then this
908            #       code should be changed (removed or updated).
909
910            # we only want a Content-Encoding of "identity" since we don't
911            # support encodings such as x-gzip or x-deflate.
912            if not skip_accept_encoding:
913                self.putheader('Accept-Encoding', 'identity')
914
915            # we can accept "chunked" Transfer-Encodings, but no others
916            # NOTE: no TE header implies *only* "chunked"
917            #self.putheader('TE', 'chunked')
918
919            # if TE is supplied in the header, then it must appear in a
920            # Connection header.
921            #self.putheader('Connection', 'TE')
922
923        else:
924            # For HTTP/1.0, the server will assume "not chunked"
925            pass
926
927    def putheader(self, header, *values):
928        """Send a request header line to the server.
929
930        For example: h.putheader('Accept', 'text/html')
931        """
932        if self.__state != _CS_REQ_STARTED:
933            raise CannotSendHeader()
934
935        hdr = '%s: %s' % (header, '\r\n\t'.join([str(v) for v in values]))
936        self._output(hdr)
937
938    def endheaders(self, message_body=None):
939        """Indicate that the last header line has been sent to the server.
940
941        This method sends the request to the server.  The optional
942        message_body argument can be used to pass message body
943        associated with the request.  The message body will be sent in
944        the same packet as the message headers if possible.  The
945        message_body should be a string.
946        """
947        if self.__state == _CS_REQ_STARTED:
948            self.__state = _CS_REQ_SENT
949        else:
950            raise CannotSendHeader()
951        self._send_output(message_body)
952
953    def request(self, method, url, body=None, headers={}):
954        """Send a complete request to the server."""
955        self._send_request(method, url, body, headers)
956
957    def _set_content_length(self, body):
958        # Set the content-length based on the body.
959        thelen = None
960        try:
961            thelen = str(len(body))
962        except TypeError, te:
963            # If this is a file-like object, try to
964            # fstat its file descriptor
965            try:
966                thelen = str(os.fstat(body.fileno()).st_size)
967            except (AttributeError, OSError):
968                # Don't send a length if this failed
969                if self.debuglevel > 0: print "Cannot stat!!"
970
971        if thelen is not None:
972            self.putheader('Content-Length', thelen)
973
974    def _send_request(self, method, url, body, headers):
975        # Honor explicitly requested Host: and Accept-Encoding: headers.
976        header_names = dict.fromkeys([k.lower() for k in headers])
977        skips = {}
978        if 'host' in header_names:
979            skips['skip_host'] = 1
980        if 'accept-encoding' in header_names:
981            skips['skip_accept_encoding'] = 1
982
983        self.putrequest(method, url, **skips)
984
985        if body and ('content-length' not in header_names):
986            self._set_content_length(body)
987        for hdr, value in headers.iteritems():
988            self.putheader(hdr, value)
989        self.endheaders(body)
990
991    def getresponse(self, buffering=False):
992        "Get the response from the server."
993
994        # if a prior response has been completed, then forget about it.
995        if self.__response and self.__response.isclosed():
996            self.__response = None
997
998        #
999        # if a prior response exists, then it must be completed (otherwise, we
1000        # cannot read this response's header to determine the connection-close
1001        # behavior)
1002        #
1003        # note: if a prior response existed, but was connection-close, then the
1004        # socket and response were made independent of this HTTPConnection
1005        # object since a new request requires that we open a whole new
1006        # connection
1007        #
1008        # this means the prior response had one of two states:
1009        #   1) will_close: this connection was reset and the prior socket and
1010        #                  response operate independently
1011        #   2) persistent: the response was retained and we await its
1012        #                  isclosed() status to become true.
1013        #
1014        if self.__state != _CS_REQ_SENT or self.__response:
1015            raise ResponseNotReady()
1016
1017        args = (self.sock,)
1018        kwds = {"strict":self.strict, "method":self._method}
1019        if self.debuglevel > 0:
1020            args += (self.debuglevel,)
1021        if buffering:
1022            #only add this keyword if non-default, for compatibility with
1023            #other response_classes.
1024            kwds["buffering"] = True;
1025        response = self.response_class(*args, **kwds)
1026
1027        response.begin()
1028        assert response.will_close != _UNKNOWN
1029        self.__state = _CS_IDLE
1030
1031        if response.will_close:
1032            # this effectively passes the connection to the response
1033            self.close()
1034        else:
1035            # remember this, so we can tell when it is complete
1036            self.__response = response
1037
1038        return response
1039
1040
1041class HTTP:
1042    "Compatibility class with httplib.py from 1.5."
1043
1044    _http_vsn = 10
1045    _http_vsn_str = 'HTTP/1.0'
1046
1047    debuglevel = 0
1048
1049    _connection_class = HTTPConnection
1050
1051    def __init__(self, host='', port=None, strict=None):
1052        "Provide a default host, since the superclass requires one."
1053
1054        # some joker passed 0 explicitly, meaning default port
1055        if port == 0:
1056            port = None
1057
1058        # Note that we may pass an empty string as the host; this will throw
1059        # an error when we attempt to connect. Presumably, the client code
1060        # will call connect before then, with a proper host.
1061        self._setup(self._connection_class(host, port, strict))
1062
1063    def _setup(self, conn):
1064        self._conn = conn
1065
1066        # set up delegation to flesh out interface
1067        self.send = conn.send
1068        self.putrequest = conn.putrequest
1069        self.putheader = conn.putheader
1070        self.endheaders = conn.endheaders
1071        self.set_debuglevel = conn.set_debuglevel
1072
1073        conn._http_vsn = self._http_vsn
1074        conn._http_vsn_str = self._http_vsn_str
1075
1076        self.file = None
1077
1078    def connect(self, host=None, port=None):
1079        "Accept arguments to set the host/port, since the superclass doesn't."
1080
1081        if host is not None:
1082            self._conn._set_hostport(host, port)
1083        self._conn.connect()
1084
1085    def getfile(self):
1086        "Provide a getfile, since the superclass' does not use this concept."
1087        return self.file
1088
1089    def getreply(self, buffering=False):
1090        """Compat definition since superclass does not define it.
1091
1092        Returns a tuple consisting of:
1093        - server status code (e.g. '200' if all goes well)
1094        - server "reason" corresponding to status code
1095        - any RFC822 headers in the response from the server
1096        """
1097        try:
1098            if not buffering:
1099                response = self._conn.getresponse()
1100            else:
1101                #only add this keyword if non-default for compatibility
1102                #with other connection classes
1103                response = self._conn.getresponse(buffering)
1104        except BadStatusLine, e:
1105            ### hmm. if getresponse() ever closes the socket on a bad request,
1106            ### then we are going to have problems with self.sock
1107
1108            ### should we keep this behavior? do people use it?
1109            # keep the socket open (as a file), and return it
1110            self.file = self._conn.sock.makefile('rb', 0)
1111
1112            # close our socket -- we want to restart after any protocol error
1113            self.close()
1114
1115            self.headers = None
1116            return -1, e.line, None
1117
1118        self.headers = response.msg
1119        self.file = response.fp
1120        return response.status, response.reason, response.msg
1121
1122    def close(self):
1123        self._conn.close()
1124
1125        # note that self.file == response.fp, which gets closed by the
1126        # superclass. just clear the object ref here.
1127        ### hmm. messy. if status==-1, then self.file is owned by us.
1128        ### well... we aren't explicitly closing, but losing this ref will
1129        ### do it
1130        self.file = None
1131
1132try:
1133    import ssl
1134except ImportError:
1135    pass
1136else:
1137    class HTTPSConnection(HTTPConnection):
1138        "This class allows communication via SSL."
1139
1140        default_port = HTTPS_PORT
1141
1142        def __init__(self, host, port=None, key_file=None, cert_file=None,
1143                     strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1144                     source_address=None):
1145            HTTPConnection.__init__(self, host, port, strict, timeout,
1146                                    source_address)
1147            self.key_file = key_file
1148            self.cert_file = cert_file
1149
1150        def connect(self):
1151            "Connect to a host on a given (SSL) port."
1152
1153            sock = socket.create_connection((self.host, self.port),
1154                                            self.timeout, self.source_address)
1155            if self._tunnel_host:
1156                self.sock = sock
1157                self._tunnel()
1158            self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1159
1160    __all__.append("HTTPSConnection")
1161
1162    class HTTPS(HTTP):
1163        """Compatibility with 1.5 httplib interface
1164
1165        Python 1.5.2 did not have an HTTPS class, but it defined an
1166        interface for sending http requests that is also useful for
1167        https.
1168        """
1169
1170        _connection_class = HTTPSConnection
1171
1172        def __init__(self, host='', port=None, key_file=None, cert_file=None,
1173                     strict=None):
1174            # provide a default host, pass the X509 cert info
1175
1176            # urf. compensate for bad input.
1177            if port == 0:
1178                port = None
1179            self._setup(self._connection_class(host, port, key_file,
1180                                               cert_file, strict))
1181
1182            # we never actually use these for anything, but we keep them
1183            # here for compatibility with post-1.5.2 CVS.
1184            self.key_file = key_file
1185            self.cert_file = cert_file
1186
1187
1188    def FakeSocket (sock, sslobj):
1189        warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
1190                      "Use the result of ssl.wrap_socket() directly instead.",
1191                      DeprecationWarning, stacklevel=2)
1192        return sslobj
1193
1194
1195class HTTPException(Exception):
1196    # Subclasses that define an __init__ must call Exception.__init__
1197    # or define self.args.  Otherwise, str() will fail.
1198    pass
1199
1200class NotConnected(HTTPException):
1201    pass
1202
1203class InvalidURL(HTTPException):
1204    pass
1205
1206class UnknownProtocol(HTTPException):
1207    def __init__(self, version):
1208        self.args = version,
1209        self.version = version
1210
1211class UnknownTransferEncoding(HTTPException):
1212    pass
1213
1214class UnimplementedFileMode(HTTPException):
1215    pass
1216
1217class IncompleteRead(HTTPException):
1218    def __init__(self, partial, expected=None):
1219        self.args = partial,
1220        self.partial = partial
1221        self.expected = expected
1222    def __repr__(self):
1223        if self.expected is not None:
1224            e = ', %i more expected' % self.expected
1225        else:
1226            e = ''
1227        return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1228    def __str__(self):
1229        return repr(self)
1230
1231class ImproperConnectionState(HTTPException):
1232    pass
1233
1234class CannotSendRequest(ImproperConnectionState):
1235    pass
1236
1237class CannotSendHeader(ImproperConnectionState):
1238    pass
1239
1240class ResponseNotReady(ImproperConnectionState):
1241    pass
1242
1243class BadStatusLine(HTTPException):
1244    def __init__(self, line):
1245        if not line:
1246            line = repr(line)
1247        self.args = line,
1248        self.line = line
1249
1250class LineTooLong(HTTPException):
1251    def __init__(self, line_type):
1252        HTTPException.__init__(self, "got more than %d bytes when reading %s"
1253                                     % (_MAXLINE, line_type))
1254
1255# for backwards compatibility
1256error = HTTPException
1257
1258class LineAndFileWrapper:
1259    """A limited file-like object for HTTP/0.9 responses."""
1260
1261    # The status-line parsing code calls readline(), which normally
1262    # get the HTTP status line.  For a 0.9 response, however, this is
1263    # actually the first line of the body!  Clients need to get a
1264    # readable file object that contains that line.
1265
1266    def __init__(self, line, file):
1267        self._line = line
1268        self._file = file
1269        self._line_consumed = 0
1270        self._line_offset = 0
1271        self._line_left = len(line)
1272
1273    def __getattr__(self, attr):
1274        return getattr(self._file, attr)
1275
1276    def _done(self):
1277        # called when the last byte is read from the line.  After the
1278        # call, all read methods are delegated to the underlying file
1279        # object.
1280        self._line_consumed = 1
1281        self.read = self._file.read
1282        self.readline = self._file.readline
1283        self.readlines = self._file.readlines
1284
1285    def read(self, amt=None):
1286        if self._line_consumed:
1287            return self._file.read(amt)
1288        assert self._line_left
1289        if amt is None or amt > self._line_left:
1290            s = self._line[self._line_offset:]
1291            self._done()
1292            if amt is None:
1293                return s + self._file.read()
1294            else:
1295                return s + self._file.read(amt - len(s))
1296        else:
1297            assert amt <= self._line_left
1298            i = self._line_offset
1299            j = i + amt
1300            s = self._line[i:j]
1301            self._line_offset = j
1302            self._line_left -= amt
1303            if self._line_left == 0:
1304                self._done()
1305            return s
1306
1307    def readline(self):
1308        if self._line_consumed:
1309            return self._file.readline()
1310        assert self._line_left
1311        s = self._line[self._line_offset:]
1312        self._done()
1313        return s
1314
1315    def readlines(self, size=None):
1316        if self._line_consumed:
1317            return self._file.readlines(size)
1318        assert self._line_left
1319        L = [self._line[self._line_offset:]]
1320        self._done()
1321        if size is None:
1322            return L + self._file.readlines()
1323        else:
1324            return L + self._file.readlines(size)
1325
1326def test():
1327    """Test this module.
1328
1329    A hodge podge of tests collected here, because they have too many
1330    external dependencies for the regular test suite.
1331    """
1332
1333    import sys
1334    import getopt
1335    opts, args = getopt.getopt(sys.argv[1:], 'd')
1336    dl = 0
1337    for o, a in opts:
1338        if o == '-d': dl = dl + 1
1339    host = 'www.python.org'
1340    selector = '/'
1341    if args[0:]: host = args[0]
1342    if args[1:]: selector = args[1]
1343    h = HTTP()
1344    h.set_debuglevel(dl)
1345    h.connect(host)
1346    h.putrequest('GET', selector)
1347    h.endheaders()
1348    status, reason, headers = h.getreply()
1349    print 'status =', status
1350    print 'reason =', reason
1351    print "read", len(h.getfile().read())
1352    print
1353    if headers:
1354        for header in headers.headers: print header.strip()
1355    print
1356
1357    # minimal test that code to extract host from url works
1358    class HTTP11(HTTP):
1359        _http_vsn = 11
1360        _http_vsn_str = 'HTTP/1.1'
1361
1362    h = HTTP11('www.python.org')
1363    h.putrequest('GET', 'http://www.python.org/~jeremy/')
1364    h.endheaders()
1365    h.getreply()
1366    h.close()
1367
1368    try:
1369        import ssl
1370    except ImportError:
1371        pass
1372    else:
1373
1374        for host, selector in (('sourceforge.net', '/projects/python'),
1375                               ):
1376            print "https://%s%s" % (host, selector)
1377            hs = HTTPS()
1378            hs.set_debuglevel(dl)
1379            hs.connect(host)
1380            hs.putrequest('GET', selector)
1381            hs.endheaders()
1382            status, reason, headers = hs.getreply()
1383            print 'status =', status
1384            print 'reason =', reason
1385            print "read", len(hs.getfile().read())
1386            print
1387            if headers:
1388                for header in headers.headers: print header.strip()
1389            print
1390
1391if __name__ == '__main__':
1392    test()
1393