1r"""HTTP/1.1 client library 2 3<intro stuff goes here> 4<other stuff, too> 5 6HTTPConnection goes through a number of "states", which define when a client 7may legally make another request or fetch the response for a particular 8request. This diagram details these state transitions: 9 10 (null) 11 | 12 | HTTPConnection() 13 v 14 Idle 15 | 16 | putrequest() 17 v 18 Request-started 19 | 20 | ( putheader() )* endheaders() 21 v 22 Request-sent 23 |\_____________________________ 24 | | getresponse() raises 25 | response = getresponse() | ConnectionError 26 v v 27 Unread-response Idle 28 [Response-headers-read] 29 |\____________________ 30 | | 31 | response.read() | putrequest() 32 v v 33 Idle Req-started-unread-response 34 ______/| 35 / | 36 response.read() | | ( putheader() )* endheaders() 37 v v 38 Request-started Req-sent-unread-response 39 | 40 | response.read() 41 v 42 Request-sent 43 44This diagram presents the following rules: 45 -- a second request may not be started until {response-headers-read} 46 -- a response [object] cannot be retrieved until {request-sent} 47 -- there is no differentiation between an unread response body and a 48 partially read response body 49 50Note: this enforcement is applied by the HTTPConnection class. The 51 HTTPResponse class does not enforce this state machine, which 52 implies sophisticated clients may accelerate the request/response 53 pipeline. Caution should be taken, though: accelerating the states 54 beyond the above pattern may imply knowledge of the server's 55 connection-close behavior for certain requests. For example, it 56 is impossible to tell whether the server will close the connection 57 UNTIL the response headers have been read; this means that further 58 requests cannot be placed into the pipeline until it is known that 59 the server will NOT be closing the connection. 60 61Logical State __state __response 62------------- ------- ---------- 63Idle _CS_IDLE None 64Request-started _CS_REQ_STARTED None 65Request-sent _CS_REQ_SENT None 66Unread-response _CS_IDLE <response_class> 67Req-started-unread-response _CS_REQ_STARTED <response_class> 68Req-sent-unread-response _CS_REQ_SENT <response_class> 69""" 70 71import email.parser 72import email.message 73import http 74import io 75import os 76import re 77import socket 78import collections 79from urllib.parse import urlsplit 80 81# HTTPMessage, parse_headers(), and the HTTP status code constants are 82# intentionally omitted for simplicity 83__all__ = ["HTTPResponse", "HTTPConnection", 84 "HTTPException", "NotConnected", "UnknownProtocol", 85 "UnknownTransferEncoding", "UnimplementedFileMode", 86 "IncompleteRead", "InvalidURL", "ImproperConnectionState", 87 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", 88 "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error", 89 "responses"] 90 91HTTP_PORT = 80 92HTTPS_PORT = 443 93 94_UNKNOWN = 'UNKNOWN' 95 96# connection states 97_CS_IDLE = 'Idle' 98_CS_REQ_STARTED = 'Request-started' 99_CS_REQ_SENT = 'Request-sent' 100 101 102# hack to maintain backwards compatibility 103globals().update(http.HTTPStatus.__members__) 104 105# another hack to maintain backwards compatibility 106# Mapping status codes to official W3C names 107responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()} 108 109# maximal amount of data to read at one time in _safe_read 110MAXAMOUNT = 1048576 111 112# maximal line length when calling readline(). 113_MAXLINE = 65536 114_MAXHEADERS = 100 115 116# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) 117# 118# VCHAR = %x21-7E 119# obs-text = %x80-FF 120# header-field = field-name ":" OWS field-value OWS 121# field-name = token 122# field-value = *( field-content / obs-fold ) 123# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] 124# field-vchar = VCHAR / obs-text 125# 126# obs-fold = CRLF 1*( SP / HTAB ) 127# ; obsolete line folding 128# ; see Section 3.2.4 129 130# token = 1*tchar 131# 132# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" 133# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" 134# / DIGIT / ALPHA 135# ; any VCHAR, except delimiters 136# 137# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1 138 139# the patterns for both name and value are more lenient than RFC 140# definitions to allow for backwards compatibility 141_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch 142_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search 143 144# We always set the Content-Length header for these methods because some 145# servers will otherwise respond with a 411 146_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'} 147 148 149def _encode(data, name='data'): 150 """Call data.encode("latin-1") but show a better error message.""" 151 try: 152 return data.encode("latin-1") 153 except UnicodeEncodeError as err: 154 raise UnicodeEncodeError( 155 err.encoding, 156 err.object, 157 err.start, 158 err.end, 159 "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') " 160 "if you want to send it encoded in UTF-8." % 161 (name.title(), data[err.start:err.end], name)) from None 162 163 164class HTTPMessage(email.message.Message): 165 # XXX The only usage of this method is in 166 # http.server.CGIHTTPRequestHandler. Maybe move the code there so 167 # that it doesn't need to be part of the public API. The API has 168 # never been defined so this could cause backwards compatibility 169 # issues. 170 171 def getallmatchingheaders(self, name): 172 """Find all header lines matching a given header name. 173 174 Look through the list of headers and find all lines matching a given 175 header name (and their continuation lines). A list of the lines is 176 returned, without interpretation. If the header does not occur, an 177 empty list is returned. If the header occurs multiple times, all 178 occurrences are returned. Case is not important in the header name. 179 180 """ 181 name = name.lower() + ':' 182 n = len(name) 183 lst = [] 184 hit = 0 185 for line in self.keys(): 186 if line[:n].lower() == name: 187 hit = 1 188 elif not line[:1].isspace(): 189 hit = 0 190 if hit: 191 lst.append(line) 192 return lst 193 194def parse_headers(fp, _class=HTTPMessage): 195 """Parses only RFC2822 headers from a file pointer. 196 197 email Parser wants to see strings rather than bytes. 198 But a TextIOWrapper around self.rfile would buffer too many bytes 199 from the stream, bytes which we later need to read as bytes. 200 So we read the correct bytes here, as bytes, for email Parser 201 to parse. 202 203 """ 204 headers = [] 205 while True: 206 line = fp.readline(_MAXLINE + 1) 207 if len(line) > _MAXLINE: 208 raise LineTooLong("header line") 209 headers.append(line) 210 if len(headers) > _MAXHEADERS: 211 raise HTTPException("got more than %d headers" % _MAXHEADERS) 212 if line in (b'\r\n', b'\n', b''): 213 break 214 hstring = b''.join(headers).decode('iso-8859-1') 215 return email.parser.Parser(_class=_class).parsestr(hstring) 216 217 218class HTTPResponse(io.BufferedIOBase): 219 220 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. 221 222 # The bytes from the socket object are iso-8859-1 strings. 223 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded 224 # text following RFC 2047. The basic status line parsing only 225 # accepts iso-8859-1. 226 227 def __init__(self, sock, debuglevel=0, method=None, url=None): 228 # If the response includes a content-length header, we need to 229 # make sure that the client doesn't read more than the 230 # specified number of bytes. If it does, it will block until 231 # the server times out and closes the connection. This will 232 # happen if a self.fp.read() is done (without a size) whether 233 # self.fp is buffered or not. So, no self.fp.read() by 234 # clients unless they know what they are doing. 235 self.fp = sock.makefile("rb") 236 self.debuglevel = debuglevel 237 self._method = method 238 239 # The HTTPResponse object is returned via urllib. The clients 240 # of http and urllib expect different attributes for the 241 # headers. headers is used here and supports urllib. msg is 242 # provided as a backwards compatibility layer for http 243 # clients. 244 245 self.headers = self.msg = None 246 247 # from the Status-Line of the response 248 self.version = _UNKNOWN # HTTP-Version 249 self.status = _UNKNOWN # Status-Code 250 self.reason = _UNKNOWN # Reason-Phrase 251 252 self.chunked = _UNKNOWN # is "chunked" being used? 253 self.chunk_left = _UNKNOWN # bytes left to read in current chunk 254 self.length = _UNKNOWN # number of bytes left in response 255 self.will_close = _UNKNOWN # conn will close at end of response 256 257 def _read_status(self): 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") 259 if len(line) > _MAXLINE: 260 raise LineTooLong("status line") 261 if self.debuglevel > 0: 262 print("reply:", repr(line)) 263 if not line: 264 # Presumably, the server closed the connection before 265 # sending a valid response. 266 raise RemoteDisconnected("Remote end closed connection without" 267 " response") 268 try: 269 version, status, reason = line.split(None, 2) 270 except ValueError: 271 try: 272 version, status = line.split(None, 1) 273 reason = "" 274 except ValueError: 275 # empty version will cause next test to fail. 276 version = "" 277 if not version.startswith("HTTP/"): 278 self._close_conn() 279 raise BadStatusLine(line) 280 281 # The status code is a three-digit number 282 try: 283 status = int(status) 284 if status < 100 or status > 999: 285 raise BadStatusLine(line) 286 except ValueError: 287 raise BadStatusLine(line) 288 return version, status, reason 289 290 def begin(self): 291 if self.headers is not None: 292 # we've already started reading the response 293 return 294 295 # read until we get a non-100 response 296 while True: 297 version, status, reason = self._read_status() 298 if status != CONTINUE: 299 break 300 # skip the header from the 100 response 301 while True: 302 skip = self.fp.readline(_MAXLINE + 1) 303 if len(skip) > _MAXLINE: 304 raise LineTooLong("header line") 305 skip = skip.strip() 306 if not skip: 307 break 308 if self.debuglevel > 0: 309 print("header:", skip) 310 311 self.code = self.status = status 312 self.reason = reason.strip() 313 if version in ("HTTP/1.0", "HTTP/0.9"): 314 # Some servers might still return "0.9", treat it as 1.0 anyway 315 self.version = 10 316 elif version.startswith("HTTP/1."): 317 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 318 else: 319 raise UnknownProtocol(version) 320 321 self.headers = self.msg = parse_headers(self.fp) 322 323 if self.debuglevel > 0: 324 for hdr in self.headers: 325 print("header:", hdr, end=" ") 326 327 # are we using the chunked-style of transfer encoding? 328 tr_enc = self.headers.get("transfer-encoding") 329 if tr_enc and tr_enc.lower() == "chunked": 330 self.chunked = True 331 self.chunk_left = None 332 else: 333 self.chunked = False 334 335 # will the connection close at the end of the response? 336 self.will_close = self._check_close() 337 338 # do we have a Content-Length? 339 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" 340 self.length = None 341 length = self.headers.get("content-length") 342 343 # are we using the chunked-style of transfer encoding? 344 tr_enc = self.headers.get("transfer-encoding") 345 if length and not self.chunked: 346 try: 347 self.length = int(length) 348 except ValueError: 349 self.length = None 350 else: 351 if self.length < 0: # ignore nonsensical negative lengths 352 self.length = None 353 else: 354 self.length = None 355 356 # does the body have a fixed length? (of zero) 357 if (status == NO_CONTENT or status == NOT_MODIFIED or 358 100 <= status < 200 or # 1xx codes 359 self._method == "HEAD"): 360 self.length = 0 361 362 # if the connection remains open, and we aren't using chunked, and 363 # a content-length was not provided, then assume that the connection 364 # WILL close. 365 if (not self.will_close and 366 not self.chunked and 367 self.length is None): 368 self.will_close = True 369 370 def _check_close(self): 371 conn = self.headers.get("connection") 372 if self.version == 11: 373 # An HTTP/1.1 proxy is assumed to stay open unless 374 # explicitly closed. 375 conn = self.headers.get("connection") 376 if conn and "close" in conn.lower(): 377 return True 378 return False 379 380 # Some HTTP/1.0 implementations have support for persistent 381 # connections, using rules different than HTTP/1.1. 382 383 # For older HTTP, Keep-Alive indicates persistent connection. 384 if self.headers.get("keep-alive"): 385 return False 386 387 # At least Akamai returns a "Connection: Keep-Alive" header, 388 # which was supposed to be sent by the client. 389 if conn and "keep-alive" in conn.lower(): 390 return False 391 392 # Proxy-Connection is a netscape hack. 393 pconn = self.headers.get("proxy-connection") 394 if pconn and "keep-alive" in pconn.lower(): 395 return False 396 397 # otherwise, assume it will close 398 return True 399 400 def _close_conn(self): 401 fp = self.fp 402 self.fp = None 403 fp.close() 404 405 def close(self): 406 try: 407 super().close() # set "closed" flag 408 finally: 409 if self.fp: 410 self._close_conn() 411 412 # These implementations are for the benefit of io.BufferedReader. 413 414 # XXX This class should probably be revised to act more like 415 # the "raw stream" that BufferedReader expects. 416 417 def flush(self): 418 super().flush() 419 if self.fp: 420 self.fp.flush() 421 422 def readable(self): 423 """Always returns True""" 424 return True 425 426 # End of "raw stream" methods 427 428 def isclosed(self): 429 """True if the connection is closed.""" 430 # NOTE: it is possible that we will not ever call self.close(). This 431 # case occurs when will_close is TRUE, length is None, and we 432 # read up to the last byte, but NOT past it. 433 # 434 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be 435 # called, meaning self.isclosed() is meaningful. 436 return self.fp is None 437 438 def read(self, amt=None): 439 if self.fp is None: 440 return b"" 441 442 if self._method == "HEAD": 443 self._close_conn() 444 return b"" 445 446 if amt is not None: 447 # Amount is given, implement using readinto 448 b = bytearray(amt) 449 n = self.readinto(b) 450 return memoryview(b)[:n].tobytes() 451 else: 452 # Amount is not given (unbounded read) so we must check self.length 453 # and self.chunked 454 455 if self.chunked: 456 return self._readall_chunked() 457 458 if self.length is None: 459 s = self.fp.read() 460 else: 461 try: 462 s = self._safe_read(self.length) 463 except IncompleteRead: 464 self._close_conn() 465 raise 466 self.length = 0 467 self._close_conn() # we read everything 468 return s 469 470 def readinto(self, b): 471 """Read up to len(b) bytes into bytearray b and return the number 472 of bytes read. 473 """ 474 475 if self.fp is None: 476 return 0 477 478 if self._method == "HEAD": 479 self._close_conn() 480 return 0 481 482 if self.chunked: 483 return self._readinto_chunked(b) 484 485 if self.length is not None: 486 if len(b) > self.length: 487 # clip the read to the "end of response" 488 b = memoryview(b)[0:self.length] 489 490 # we do not use _safe_read() here because this may be a .will_close 491 # connection, and the user is reading more bytes than will be provided 492 # (for example, reading in 1k chunks) 493 n = self.fp.readinto(b) 494 if not n and b: 495 # Ideally, we would raise IncompleteRead if the content-length 496 # wasn't satisfied, but it might break compatibility. 497 self._close_conn() 498 elif self.length is not None: 499 self.length -= n 500 if not self.length: 501 self._close_conn() 502 return n 503 504 def _read_next_chunk_size(self): 505 # Read the next chunk size from the file 506 line = self.fp.readline(_MAXLINE + 1) 507 if len(line) > _MAXLINE: 508 raise LineTooLong("chunk size") 509 i = line.find(b";") 510 if i >= 0: 511 line = line[:i] # strip chunk-extensions 512 try: 513 return int(line, 16) 514 except ValueError: 515 # close the connection as protocol synchronisation is 516 # probably lost 517 self._close_conn() 518 raise 519 520 def _read_and_discard_trailer(self): 521 # read and discard trailer up to the CRLF terminator 522 ### note: we shouldn't have any trailers! 523 while True: 524 line = self.fp.readline(_MAXLINE + 1) 525 if len(line) > _MAXLINE: 526 raise LineTooLong("trailer line") 527 if not line: 528 # a vanishingly small number of sites EOF without 529 # sending the trailer 530 break 531 if line in (b'\r\n', b'\n', b''): 532 break 533 534 def _get_chunk_left(self): 535 # return self.chunk_left, reading a new chunk if necessary. 536 # chunk_left == 0: at the end of the current chunk, need to close it 537 # chunk_left == None: No current chunk, should read next. 538 # This function returns non-zero or None if the last chunk has 539 # been read. 540 chunk_left = self.chunk_left 541 if not chunk_left: # Can be 0 or None 542 if chunk_left is not None: 543 # We are at the end of chunk. dicard chunk end 544 self._safe_read(2) # toss the CRLF at the end of the chunk 545 try: 546 chunk_left = self._read_next_chunk_size() 547 except ValueError: 548 raise IncompleteRead(b'') 549 if chunk_left == 0: 550 # last chunk: 1*("0") [ chunk-extension ] CRLF 551 self._read_and_discard_trailer() 552 # we read everything; close the "file" 553 self._close_conn() 554 chunk_left = None 555 self.chunk_left = chunk_left 556 return chunk_left 557 558 def _readall_chunked(self): 559 assert self.chunked != _UNKNOWN 560 value = [] 561 try: 562 while True: 563 chunk_left = self._get_chunk_left() 564 if chunk_left is None: 565 break 566 value.append(self._safe_read(chunk_left)) 567 self.chunk_left = 0 568 return b''.join(value) 569 except IncompleteRead: 570 raise IncompleteRead(b''.join(value)) 571 572 def _readinto_chunked(self, b): 573 assert self.chunked != _UNKNOWN 574 total_bytes = 0 575 mvb = memoryview(b) 576 try: 577 while True: 578 chunk_left = self._get_chunk_left() 579 if chunk_left is None: 580 return total_bytes 581 582 if len(mvb) <= chunk_left: 583 n = self._safe_readinto(mvb) 584 self.chunk_left = chunk_left - n 585 return total_bytes + n 586 587 temp_mvb = mvb[:chunk_left] 588 n = self._safe_readinto(temp_mvb) 589 mvb = mvb[n:] 590 total_bytes += n 591 self.chunk_left = 0 592 593 except IncompleteRead: 594 raise IncompleteRead(bytes(b[0:total_bytes])) 595 596 def _safe_read(self, amt): 597 """Read the number of bytes requested, compensating for partial reads. 598 599 Normally, we have a blocking socket, but a read() can be interrupted 600 by a signal (resulting in a partial read). 601 602 Note that we cannot distinguish between EOF and an interrupt when zero 603 bytes have been read. IncompleteRead() will be raised in this 604 situation. 605 606 This function should be used when <amt> bytes "should" be present for 607 reading. If the bytes are truly not available (due to EOF), then the 608 IncompleteRead exception can be used to detect the problem. 609 """ 610 s = [] 611 while amt > 0: 612 chunk = self.fp.read(min(amt, MAXAMOUNT)) 613 if not chunk: 614 raise IncompleteRead(b''.join(s), amt) 615 s.append(chunk) 616 amt -= len(chunk) 617 return b"".join(s) 618 619 def _safe_readinto(self, b): 620 """Same as _safe_read, but for reading into a buffer.""" 621 total_bytes = 0 622 mvb = memoryview(b) 623 while total_bytes < len(b): 624 if MAXAMOUNT < len(mvb): 625 temp_mvb = mvb[0:MAXAMOUNT] 626 n = self.fp.readinto(temp_mvb) 627 else: 628 n = self.fp.readinto(mvb) 629 if not n: 630 raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b)) 631 mvb = mvb[n:] 632 total_bytes += n 633 return total_bytes 634 635 def read1(self, n=-1): 636 """Read with at most one underlying system call. If at least one 637 byte is buffered, return that instead. 638 """ 639 if self.fp is None or self._method == "HEAD": 640 return b"" 641 if self.chunked: 642 return self._read1_chunked(n) 643 if self.length is not None and (n < 0 or n > self.length): 644 n = self.length 645 try: 646 result = self.fp.read1(n) 647 except ValueError: 648 if n >= 0: 649 raise 650 # some implementations, like BufferedReader, don't support -1 651 # Read an arbitrarily selected largeish chunk. 652 result = self.fp.read1(16*1024) 653 if not result and n: 654 self._close_conn() 655 elif self.length is not None: 656 self.length -= len(result) 657 return result 658 659 def peek(self, n=-1): 660 # Having this enables IOBase.readline() to read more than one 661 # byte at a time 662 if self.fp is None or self._method == "HEAD": 663 return b"" 664 if self.chunked: 665 return self._peek_chunked(n) 666 return self.fp.peek(n) 667 668 def readline(self, limit=-1): 669 if self.fp is None or self._method == "HEAD": 670 return b"" 671 if self.chunked: 672 # Fallback to IOBase readline which uses peek() and read() 673 return super().readline(limit) 674 if self.length is not None and (limit < 0 or limit > self.length): 675 limit = self.length 676 result = self.fp.readline(limit) 677 if not result and limit: 678 self._close_conn() 679 elif self.length is not None: 680 self.length -= len(result) 681 return result 682 683 def _read1_chunked(self, n): 684 # Strictly speaking, _get_chunk_left() may cause more than one read, 685 # but that is ok, since that is to satisfy the chunked protocol. 686 chunk_left = self._get_chunk_left() 687 if chunk_left is None or n == 0: 688 return b'' 689 if not (0 <= n <= chunk_left): 690 n = chunk_left # if n is negative or larger than chunk_left 691 read = self.fp.read1(n) 692 self.chunk_left -= len(read) 693 if not read: 694 raise IncompleteRead(b"") 695 return read 696 697 def _peek_chunked(self, n): 698 # Strictly speaking, _get_chunk_left() may cause more than one read, 699 # but that is ok, since that is to satisfy the chunked protocol. 700 try: 701 chunk_left = self._get_chunk_left() 702 except IncompleteRead: 703 return b'' # peek doesn't worry about protocol 704 if chunk_left is None: 705 return b'' # eof 706 # peek is allowed to return more than requested. Just request the 707 # entire chunk, and truncate what we get. 708 return self.fp.peek(chunk_left)[:chunk_left] 709 710 def fileno(self): 711 return self.fp.fileno() 712 713 def getheader(self, name, default=None): 714 '''Returns the value of the header matching *name*. 715 716 If there are multiple matching headers, the values are 717 combined into a single string separated by commas and spaces. 718 719 If no matching header is found, returns *default* or None if 720 the *default* is not specified. 721 722 If the headers are unknown, raises http.client.ResponseNotReady. 723 724 ''' 725 if self.headers is None: 726 raise ResponseNotReady() 727 headers = self.headers.get_all(name) or default 728 if isinstance(headers, str) or not hasattr(headers, '__iter__'): 729 return headers 730 else: 731 return ', '.join(headers) 732 733 def getheaders(self): 734 """Return list of (header, value) tuples.""" 735 if self.headers is None: 736 raise ResponseNotReady() 737 return list(self.headers.items()) 738 739 # We override IOBase.__iter__ so that it doesn't check for closed-ness 740 741 def __iter__(self): 742 return self 743 744 # For compatibility with old-style urllib responses. 745 746 def info(self): 747 '''Returns an instance of the class mimetools.Message containing 748 meta-information associated with the URL. 749 750 When the method is HTTP, these headers are those returned by 751 the server at the head of the retrieved HTML page (including 752 Content-Length and Content-Type). 753 754 When the method is FTP, a Content-Length header will be 755 present if (as is now usual) the server passed back a file 756 length in response to the FTP retrieval request. A 757 Content-Type header will be present if the MIME type can be 758 guessed. 759 760 When the method is local-file, returned headers will include 761 a Date representing the file's last-modified time, a 762 Content-Length giving file size, and a Content-Type 763 containing a guess at the file's type. See also the 764 description of the mimetools module. 765 766 ''' 767 return self.headers 768 769 def geturl(self): 770 '''Return the real URL of the page. 771 772 In some cases, the HTTP server redirects a client to another 773 URL. The urlopen() function handles this transparently, but in 774 some cases the caller needs to know which URL the client was 775 redirected to. The geturl() method can be used to get at this 776 redirected URL. 777 778 ''' 779 return self.url 780 781 def getcode(self): 782 '''Return the HTTP status code that was sent with the response, 783 or None if the URL is not an HTTP URL. 784 785 ''' 786 return self.status 787 788class HTTPConnection: 789 790 _http_vsn = 11 791 _http_vsn_str = 'HTTP/1.1' 792 793 response_class = HTTPResponse 794 default_port = HTTP_PORT 795 auto_open = 1 796 debuglevel = 0 797 798 @staticmethod 799 def _is_textIO(stream): 800 """Test whether a file-like object is a text or a binary stream. 801 """ 802 return isinstance(stream, io.TextIOBase) 803 804 @staticmethod 805 def _get_content_length(body, method): 806 """Get the content-length based on the body. 807 808 If the body is None, we set Content-Length: 0 for methods that expect 809 a body (RFC 7230, Section 3.3.2). We also set the Content-Length for 810 any method if the body is a str or bytes-like object and not a file. 811 """ 812 if body is None: 813 # do an explicit check for not None here to distinguish 814 # between unset and set but empty 815 if method.upper() in _METHODS_EXPECTING_BODY: 816 return 0 817 else: 818 return None 819 820 if hasattr(body, 'read'): 821 # file-like object. 822 return None 823 824 try: 825 # does it implement the buffer protocol (bytes, bytearray, array)? 826 mv = memoryview(body) 827 return mv.nbytes 828 except TypeError: 829 pass 830 831 if isinstance(body, str): 832 return len(body) 833 834 return None 835 836 def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 837 source_address=None): 838 self.timeout = timeout 839 self.source_address = source_address 840 self.sock = None 841 self._buffer = [] 842 self.__response = None 843 self.__state = _CS_IDLE 844 self._method = None 845 self._tunnel_host = None 846 self._tunnel_port = None 847 self._tunnel_headers = {} 848 849 (self.host, self.port) = self._get_hostport(host, port) 850 851 # This is stored as an instance variable to allow unit 852 # tests to replace it with a suitable mockup 853 self._create_connection = socket.create_connection 854 855 def set_tunnel(self, host, port=None, headers=None): 856 """Set up host and port for HTTP CONNECT tunnelling. 857 858 In a connection that uses HTTP CONNECT tunneling, the host passed to the 859 constructor is used as a proxy server that relays all communication to 860 the endpoint passed to `set_tunnel`. This done by sending an HTTP 861 CONNECT request to the proxy server when the connection is established. 862 863 This method must be called before the HTML connection has been 864 established. 865 866 The headers argument should be a mapping of extra HTTP headers to send 867 with the CONNECT request. 868 """ 869 870 if self.sock: 871 raise RuntimeError("Can't set up tunnel for established connection") 872 873 self._tunnel_host, self._tunnel_port = self._get_hostport(host, port) 874 if headers: 875 self._tunnel_headers = headers 876 else: 877 self._tunnel_headers.clear() 878 879 def _get_hostport(self, host, port): 880 if port is None: 881 i = host.rfind(':') 882 j = host.rfind(']') # ipv6 addresses have [...] 883 if i > j: 884 try: 885 port = int(host[i+1:]) 886 except ValueError: 887 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ 888 port = self.default_port 889 else: 890 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) 891 host = host[:i] 892 else: 893 port = self.default_port 894 if host and host[0] == '[' and host[-1] == ']': 895 host = host[1:-1] 896 897 return (host, port) 898 899 def set_debuglevel(self, level): 900 self.debuglevel = level 901 902 def _tunnel(self): 903 connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host, 904 self._tunnel_port) 905 connect_bytes = connect_str.encode("ascii") 906 self.send(connect_bytes) 907 for header, value in self._tunnel_headers.items(): 908 header_str = "%s: %s\r\n" % (header, value) 909 header_bytes = header_str.encode("latin-1") 910 self.send(header_bytes) 911 self.send(b'\r\n') 912 913 response = self.response_class(self.sock, method=self._method) 914 (version, code, message) = response._read_status() 915 916 if code != http.HTTPStatus.OK: 917 self.close() 918 raise OSError("Tunnel connection failed: %d %s" % (code, 919 message.strip())) 920 while True: 921 line = response.fp.readline(_MAXLINE + 1) 922 if len(line) > _MAXLINE: 923 raise LineTooLong("header line") 924 if not line: 925 # for sites which EOF without sending a trailer 926 break 927 if line in (b'\r\n', b'\n', b''): 928 break 929 930 if self.debuglevel > 0: 931 print('header:', line.decode()) 932 933 def connect(self): 934 """Connect to the host and port specified in __init__.""" 935 self.sock = self._create_connection( 936 (self.host,self.port), self.timeout, self.source_address) 937 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) 938 939 if self._tunnel_host: 940 self._tunnel() 941 942 def close(self): 943 """Close the connection to the HTTP server.""" 944 self.__state = _CS_IDLE 945 try: 946 sock = self.sock 947 if sock: 948 self.sock = None 949 sock.close() # close it manually... there may be other refs 950 finally: 951 response = self.__response 952 if response: 953 self.__response = None 954 response.close() 955 956 def send(self, data): 957 """Send `data' to the server. 958 ``data`` can be a string object, a bytes object, an array object, a 959 file-like object that supports a .read() method, or an iterable object. 960 """ 961 962 if self.sock is None: 963 if self.auto_open: 964 self.connect() 965 else: 966 raise NotConnected() 967 968 if self.debuglevel > 0: 969 print("send:", repr(data)) 970 blocksize = 8192 971 if hasattr(data, "read") : 972 if self.debuglevel > 0: 973 print("sendIng a read()able") 974 encode = self._is_textIO(data) 975 if encode and self.debuglevel > 0: 976 print("encoding file using iso-8859-1") 977 while 1: 978 datablock = data.read(blocksize) 979 if not datablock: 980 break 981 if encode: 982 datablock = datablock.encode("iso-8859-1") 983 self.sock.sendall(datablock) 984 return 985 try: 986 self.sock.sendall(data) 987 except TypeError: 988 if isinstance(data, collections.Iterable): 989 for d in data: 990 self.sock.sendall(d) 991 else: 992 raise TypeError("data should be a bytes-like object " 993 "or an iterable, got %r" % type(data)) 994 995 def _output(self, s): 996 """Add a line of output to the current request buffer. 997 998 Assumes that the line does *not* end with \\r\\n. 999 """ 1000 self._buffer.append(s) 1001 1002 def _read_readable(self, readable): 1003 blocksize = 8192 1004 if self.debuglevel > 0: 1005 print("sendIng a read()able") 1006 encode = self._is_textIO(readable) 1007 if encode and self.debuglevel > 0: 1008 print("encoding file using iso-8859-1") 1009 while True: 1010 datablock = readable.read(blocksize) 1011 if not datablock: 1012 break 1013 if encode: 1014 datablock = datablock.encode("iso-8859-1") 1015 yield datablock 1016 1017 def _send_output(self, message_body=None, encode_chunked=False): 1018 """Send the currently buffered request and clear the buffer. 1019 1020 Appends an extra \\r\\n to the buffer. 1021 A message_body may be specified, to be appended to the request. 1022 """ 1023 self._buffer.extend((b"", b"")) 1024 msg = b"\r\n".join(self._buffer) 1025 del self._buffer[:] 1026 self.send(msg) 1027 1028 if message_body is not None: 1029 1030 # create a consistent interface to message_body 1031 if hasattr(message_body, 'read'): 1032 # Let file-like take precedence over byte-like. This 1033 # is needed to allow the current position of mmap'ed 1034 # files to be taken into account. 1035 chunks = self._read_readable(message_body) 1036 else: 1037 try: 1038 # this is solely to check to see if message_body 1039 # implements the buffer API. it /would/ be easier 1040 # to capture if PyObject_CheckBuffer was exposed 1041 # to Python. 1042 memoryview(message_body) 1043 except TypeError: 1044 try: 1045 chunks = iter(message_body) 1046 except TypeError: 1047 raise TypeError("message_body should be a bytes-like " 1048 "object or an iterable, got %r" 1049 % type(message_body)) 1050 else: 1051 # the object implements the buffer interface and 1052 # can be passed directly into socket methods 1053 chunks = (message_body,) 1054 1055 for chunk in chunks: 1056 if not chunk: 1057 if self.debuglevel > 0: 1058 print('Zero length chunk ignored') 1059 continue 1060 1061 if encode_chunked and self._http_vsn == 11: 1062 # chunked encoding 1063 chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \ 1064 + b'\r\n' 1065 self.send(chunk) 1066 1067 if encode_chunked and self._http_vsn == 11: 1068 # end chunked transfer 1069 self.send(b'0\r\n\r\n') 1070 1071 def putrequest(self, method, url, skip_host=False, 1072 skip_accept_encoding=False): 1073 """Send a request to the server. 1074 1075 `method' specifies an HTTP request method, e.g. 'GET'. 1076 `url' specifies the object being requested, e.g. '/index.html'. 1077 `skip_host' if True does not add automatically a 'Host:' header 1078 `skip_accept_encoding' if True does not add automatically an 1079 'Accept-Encoding:' header 1080 """ 1081 1082 # if a prior response has been completed, then forget about it. 1083 if self.__response and self.__response.isclosed(): 1084 self.__response = None 1085 1086 1087 # in certain cases, we cannot issue another request on this connection. 1088 # this occurs when: 1089 # 1) we are in the process of sending a request. (_CS_REQ_STARTED) 1090 # 2) a response to a previous request has signalled that it is going 1091 # to close the connection upon completion. 1092 # 3) the headers for the previous response have not been read, thus 1093 # we cannot determine whether point (2) is true. (_CS_REQ_SENT) 1094 # 1095 # if there is no prior response, then we can request at will. 1096 # 1097 # if point (2) is true, then we will have passed the socket to the 1098 # response (effectively meaning, "there is no prior response"), and 1099 # will open a new one when a new request is made. 1100 # 1101 # Note: if a prior response exists, then we *can* start a new request. 1102 # We are not allowed to begin fetching the response to this new 1103 # request, however, until that prior response is complete. 1104 # 1105 if self.__state == _CS_IDLE: 1106 self.__state = _CS_REQ_STARTED 1107 else: 1108 raise CannotSendRequest(self.__state) 1109 1110 # Save the method we use, we need it later in the response phase 1111 self._method = method 1112 if not url: 1113 url = '/' 1114 request = '%s %s %s' % (method, url, self._http_vsn_str) 1115 1116 # Non-ASCII characters should have been eliminated earlier 1117 self._output(request.encode('ascii')) 1118 1119 if self._http_vsn == 11: 1120 # Issue some standard headers for better HTTP/1.1 compliance 1121 1122 if not skip_host: 1123 # this header is issued *only* for HTTP/1.1 1124 # connections. more specifically, this means it is 1125 # only issued when the client uses the new 1126 # HTTPConnection() class. backwards-compat clients 1127 # will be using HTTP/1.0 and those clients may be 1128 # issuing this header themselves. we should NOT issue 1129 # it twice; some web servers (such as Apache) barf 1130 # when they see two Host: headers 1131 1132 # If we need a non-standard port,include it in the 1133 # header. If the request is going through a proxy, 1134 # but the host of the actual URL, not the host of the 1135 # proxy. 1136 1137 netloc = '' 1138 if url.startswith('http'): 1139 nil, netloc, nil, nil, nil = urlsplit(url) 1140 1141 if netloc: 1142 try: 1143 netloc_enc = netloc.encode("ascii") 1144 except UnicodeEncodeError: 1145 netloc_enc = netloc.encode("idna") 1146 self.putheader('Host', netloc_enc) 1147 else: 1148 if self._tunnel_host: 1149 host = self._tunnel_host 1150 port = self._tunnel_port 1151 else: 1152 host = self.host 1153 port = self.port 1154 1155 try: 1156 host_enc = host.encode("ascii") 1157 except UnicodeEncodeError: 1158 host_enc = host.encode("idna") 1159 1160 # As per RFC 273, IPv6 address should be wrapped with [] 1161 # when used as Host header 1162 1163 if host.find(':') >= 0: 1164 host_enc = b'[' + host_enc + b']' 1165 1166 if port == self.default_port: 1167 self.putheader('Host', host_enc) 1168 else: 1169 host_enc = host_enc.decode("ascii") 1170 self.putheader('Host', "%s:%s" % (host_enc, port)) 1171 1172 # note: we are assuming that clients will not attempt to set these 1173 # headers since *this* library must deal with the 1174 # consequences. this also means that when the supporting 1175 # libraries are updated to recognize other forms, then this 1176 # code should be changed (removed or updated). 1177 1178 # we only want a Content-Encoding of "identity" since we don't 1179 # support encodings such as x-gzip or x-deflate. 1180 if not skip_accept_encoding: 1181 self.putheader('Accept-Encoding', 'identity') 1182 1183 # we can accept "chunked" Transfer-Encodings, but no others 1184 # NOTE: no TE header implies *only* "chunked" 1185 #self.putheader('TE', 'chunked') 1186 1187 # if TE is supplied in the header, then it must appear in a 1188 # Connection header. 1189 #self.putheader('Connection', 'TE') 1190 1191 else: 1192 # For HTTP/1.0, the server will assume "not chunked" 1193 pass 1194 1195 def putheader(self, header, *values): 1196 """Send a request header line to the server. 1197 1198 For example: h.putheader('Accept', 'text/html') 1199 """ 1200 if self.__state != _CS_REQ_STARTED: 1201 raise CannotSendHeader() 1202 1203 if hasattr(header, 'encode'): 1204 header = header.encode('ascii') 1205 1206 if not _is_legal_header_name(header): 1207 raise ValueError('Invalid header name %r' % (header,)) 1208 1209 values = list(values) 1210 for i, one_value in enumerate(values): 1211 if hasattr(one_value, 'encode'): 1212 values[i] = one_value.encode('latin-1') 1213 elif isinstance(one_value, int): 1214 values[i] = str(one_value).encode('ascii') 1215 1216 if _is_illegal_header_value(values[i]): 1217 raise ValueError('Invalid header value %r' % (values[i],)) 1218 1219 value = b'\r\n\t'.join(values) 1220 header = header + b': ' + value 1221 self._output(header) 1222 1223 def endheaders(self, message_body=None, *, encode_chunked=False): 1224 """Indicate that the last header line has been sent to the server. 1225 1226 This method sends the request to the server. The optional message_body 1227 argument can be used to pass a message body associated with the 1228 request. 1229 """ 1230 if self.__state == _CS_REQ_STARTED: 1231 self.__state = _CS_REQ_SENT 1232 else: 1233 raise CannotSendHeader() 1234 self._send_output(message_body, encode_chunked=encode_chunked) 1235 1236 def request(self, method, url, body=None, headers={}, *, 1237 encode_chunked=False): 1238 """Send a complete request to the server.""" 1239 self._send_request(method, url, body, headers, encode_chunked) 1240 1241 def _send_request(self, method, url, body, headers, encode_chunked): 1242 # Honor explicitly requested Host: and Accept-Encoding: headers. 1243 header_names = frozenset(k.lower() for k in headers) 1244 skips = {} 1245 if 'host' in header_names: 1246 skips['skip_host'] = 1 1247 if 'accept-encoding' in header_names: 1248 skips['skip_accept_encoding'] = 1 1249 1250 self.putrequest(method, url, **skips) 1251 1252 # chunked encoding will happen if HTTP/1.1 is used and either 1253 # the caller passes encode_chunked=True or the following 1254 # conditions hold: 1255 # 1. content-length has not been explicitly set 1256 # 2. the body is a file or iterable, but not a str or bytes-like 1257 # 3. Transfer-Encoding has NOT been explicitly set by the caller 1258 1259 if 'content-length' not in header_names: 1260 # only chunk body if not explicitly set for backwards 1261 # compatibility, assuming the client code is already handling the 1262 # chunking 1263 if 'transfer-encoding' not in header_names: 1264 # if content-length cannot be automatically determined, fall 1265 # back to chunked encoding 1266 encode_chunked = False 1267 content_length = self._get_content_length(body, method) 1268 if content_length is None: 1269 if body is not None: 1270 if self.debuglevel > 0: 1271 print('Unable to determine size of %r' % body) 1272 encode_chunked = True 1273 self.putheader('Transfer-Encoding', 'chunked') 1274 else: 1275 self.putheader('Content-Length', str(content_length)) 1276 else: 1277 encode_chunked = False 1278 1279 for hdr, value in headers.items(): 1280 self.putheader(hdr, value) 1281 if isinstance(body, str): 1282 # RFC 2616 Section 3.7.1 says that text default has a 1283 # default charset of iso-8859-1. 1284 body = _encode(body, 'body') 1285 self.endheaders(body, encode_chunked=encode_chunked) 1286 1287 def getresponse(self): 1288 """Get the response from the server. 1289 1290 If the HTTPConnection is in the correct state, returns an 1291 instance of HTTPResponse or of whatever object is returned by 1292 the response_class variable. 1293 1294 If a request has not been sent or if a previous response has 1295 not be handled, ResponseNotReady is raised. If the HTTP 1296 response indicates that the connection should be closed, then 1297 it will be closed before the response is returned. When the 1298 connection is closed, the underlying socket is closed. 1299 """ 1300 1301 # if a prior response has been completed, then forget about it. 1302 if self.__response and self.__response.isclosed(): 1303 self.__response = None 1304 1305 # if a prior response exists, then it must be completed (otherwise, we 1306 # cannot read this response's header to determine the connection-close 1307 # behavior) 1308 # 1309 # note: if a prior response existed, but was connection-close, then the 1310 # socket and response were made independent of this HTTPConnection 1311 # object since a new request requires that we open a whole new 1312 # connection 1313 # 1314 # this means the prior response had one of two states: 1315 # 1) will_close: this connection was reset and the prior socket and 1316 # response operate independently 1317 # 2) persistent: the response was retained and we await its 1318 # isclosed() status to become true. 1319 # 1320 if self.__state != _CS_REQ_SENT or self.__response: 1321 raise ResponseNotReady(self.__state) 1322 1323 if self.debuglevel > 0: 1324 response = self.response_class(self.sock, self.debuglevel, 1325 method=self._method) 1326 else: 1327 response = self.response_class(self.sock, method=self._method) 1328 1329 try: 1330 try: 1331 response.begin() 1332 except ConnectionError: 1333 self.close() 1334 raise 1335 assert response.will_close != _UNKNOWN 1336 self.__state = _CS_IDLE 1337 1338 if response.will_close: 1339 # this effectively passes the connection to the response 1340 self.close() 1341 else: 1342 # remember this, so we can tell when it is complete 1343 self.__response = response 1344 1345 return response 1346 except: 1347 response.close() 1348 raise 1349 1350try: 1351 import ssl 1352except ImportError: 1353 pass 1354else: 1355 class HTTPSConnection(HTTPConnection): 1356 "This class allows communication via SSL." 1357 1358 default_port = HTTPS_PORT 1359 1360 # XXX Should key_file and cert_file be deprecated in favour of context? 1361 1362 def __init__(self, host, port=None, key_file=None, cert_file=None, 1363 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 1364 source_address=None, *, context=None, 1365 check_hostname=None): 1366 super(HTTPSConnection, self).__init__(host, port, timeout, 1367 source_address) 1368 if (key_file is not None or cert_file is not None or 1369 check_hostname is not None): 1370 import warnings 1371 warnings.warn("key_file, cert_file and check_hostname are " 1372 "deprecated, use a custom context instead.", 1373 DeprecationWarning, 2) 1374 self.key_file = key_file 1375 self.cert_file = cert_file 1376 if context is None: 1377 context = ssl._create_default_https_context() 1378 will_verify = context.verify_mode != ssl.CERT_NONE 1379 if check_hostname is None: 1380 check_hostname = context.check_hostname 1381 if check_hostname and not will_verify: 1382 raise ValueError("check_hostname needs a SSL context with " 1383 "either CERT_OPTIONAL or CERT_REQUIRED") 1384 if key_file or cert_file: 1385 context.load_cert_chain(cert_file, key_file) 1386 self._context = context 1387 self._check_hostname = check_hostname 1388 1389 def connect(self): 1390 "Connect to a host on a given (SSL) port." 1391 1392 super().connect() 1393 1394 if self._tunnel_host: 1395 server_hostname = self._tunnel_host 1396 else: 1397 server_hostname = self.host 1398 1399 self.sock = self._context.wrap_socket(self.sock, 1400 server_hostname=server_hostname) 1401 if not self._context.check_hostname and self._check_hostname: 1402 try: 1403 ssl.match_hostname(self.sock.getpeercert(), server_hostname) 1404 except Exception: 1405 self.sock.shutdown(socket.SHUT_RDWR) 1406 self.sock.close() 1407 raise 1408 1409 __all__.append("HTTPSConnection") 1410 1411class HTTPException(Exception): 1412 # Subclasses that define an __init__ must call Exception.__init__ 1413 # or define self.args. Otherwise, str() will fail. 1414 pass 1415 1416class NotConnected(HTTPException): 1417 pass 1418 1419class InvalidURL(HTTPException): 1420 pass 1421 1422class UnknownProtocol(HTTPException): 1423 def __init__(self, version): 1424 self.args = version, 1425 self.version = version 1426 1427class UnknownTransferEncoding(HTTPException): 1428 pass 1429 1430class UnimplementedFileMode(HTTPException): 1431 pass 1432 1433class IncompleteRead(HTTPException): 1434 def __init__(self, partial, expected=None): 1435 self.args = partial, 1436 self.partial = partial 1437 self.expected = expected 1438 def __repr__(self): 1439 if self.expected is not None: 1440 e = ', %i more expected' % self.expected 1441 else: 1442 e = '' 1443 return '%s(%i bytes read%s)' % (self.__class__.__name__, 1444 len(self.partial), e) 1445 def __str__(self): 1446 return repr(self) 1447 1448class ImproperConnectionState(HTTPException): 1449 pass 1450 1451class CannotSendRequest(ImproperConnectionState): 1452 pass 1453 1454class CannotSendHeader(ImproperConnectionState): 1455 pass 1456 1457class ResponseNotReady(ImproperConnectionState): 1458 pass 1459 1460class BadStatusLine(HTTPException): 1461 def __init__(self, line): 1462 if not line: 1463 line = repr(line) 1464 self.args = line, 1465 self.line = line 1466 1467class LineTooLong(HTTPException): 1468 def __init__(self, line_type): 1469 HTTPException.__init__(self, "got more than %d bytes when reading %s" 1470 % (_MAXLINE, line_type)) 1471 1472class RemoteDisconnected(ConnectionResetError, BadStatusLine): 1473 def __init__(self, *pos, **kw): 1474 BadStatusLine.__init__(self, "") 1475 ConnectionResetError.__init__(self, *pos, **kw) 1476 1477# for backwards compatibility 1478error = HTTPException 1479