1"""HTTP server base class.
2
3Note: the class in this module doesn't implement any HTTP request; see
4SimpleHTTPServer for simple implementations of GET, HEAD and POST
5(including CGI scripts).  It does, however, optionally implement HTTP/1.1
6persistent connections, as of version 0.3.
7
8Contents:
9
10- BaseHTTPRequestHandler: HTTP request handler base class
11- test: test function
12
13XXX To do:
14
15- log requests even later (to capture byte count)
16- log user-agent header and other interesting goodies
17- send error log to separate file
18"""
19
20
21# See also:
22#
23# HTTP Working Group                                        T. Berners-Lee
24# INTERNET-DRAFT                                            R. T. Fielding
25# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
26# Expires September 8, 1995                                  March 8, 1995
27#
28# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
29#
30# and
31#
32# Network Working Group                                      R. Fielding
33# Request for Comments: 2616                                       et al
34# Obsoletes: 2068                                              June 1999
35# Category: Standards Track
36#
37# URL: http://www.faqs.org/rfcs/rfc2616.html
38
39# Log files
40# ---------
41#
42# Here's a quote from the NCSA httpd docs about log file format.
43#
44# | The logfile format is as follows. Each line consists of:
45# |
46# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
47# |
48# |        host: Either the DNS name or the IP number of the remote client
49# |        rfc931: Any information returned by identd for this person,
50# |                - otherwise.
51# |        authuser: If user sent a userid for authentication, the user name,
52# |                  - otherwise.
53# |        DD: Day
54# |        Mon: Month (calendar name)
55# |        YYYY: Year
56# |        hh: hour (24-hour format, the machine's timezone)
57# |        mm: minutes
58# |        ss: seconds
59# |        request: The first line of the HTTP request as sent by the client.
60# |        ddd: the status code returned by the server, - if not available.
61# |        bbbb: the total number of bytes sent,
62# |              *not including the HTTP/1.0 header*, - if not available
63# |
64# | You can determine the name of the file accessed through request.
65#
66# (Actually, the latter is only true if you know the server configuration
67# at the time the request was made!)
68
69__version__ = "0.3"
70
71__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
72
73import sys
74import time
75import socket # For gethostbyaddr()
76from warnings import filterwarnings, catch_warnings
77with catch_warnings():
78    if sys.py3kwarning:
79        filterwarnings("ignore", ".*mimetools has been removed",
80                        DeprecationWarning)
81    import mimetools
82import SocketServer
83
84# Default error message template
85DEFAULT_ERROR_MESSAGE = """\
86<head>
87<title>Error response</title>
88</head>
89<body>
90<h1>Error response</h1>
91<p>Error code %(code)d.
92<p>Message: %(message)s.
93<p>Error code explanation: %(code)s = %(explain)s.
94</body>
95"""
96
97DEFAULT_ERROR_CONTENT_TYPE = "text/html"
98
99def _quote_html(html):
100    return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
101
102class HTTPServer(SocketServer.TCPServer):
103
104    allow_reuse_address = 1    # Seems to make sense in testing environment
105
106    def server_bind(self):
107        """Override server_bind to store the server name."""
108        SocketServer.TCPServer.server_bind(self)
109        host, port = self.socket.getsockname()[:2]
110        self.server_name = socket.getfqdn(host)
111        self.server_port = port
112
113
114class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
115
116    """HTTP request handler base class.
117
118    The following explanation of HTTP serves to guide you through the
119    code as well as to expose any misunderstandings I may have about
120    HTTP (so you don't need to read the code to figure out I'm wrong
121    :-).
122
123    HTTP (HyperText Transfer Protocol) is an extensible protocol on
124    top of a reliable stream transport (e.g. TCP/IP).  The protocol
125    recognizes three parts to a request:
126
127    1. One line identifying the request type and path
128    2. An optional set of RFC-822-style headers
129    3. An optional data part
130
131    The headers and data are separated by a blank line.
132
133    The first line of the request has the form
134
135    <command> <path> <version>
136
137    where <command> is a (case-sensitive) keyword such as GET or POST,
138    <path> is a string containing path information for the request,
139    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
140    <path> is encoded using the URL encoding scheme (using %xx to signify
141    the ASCII character with hex code xx).
142
143    The specification specifies that lines are separated by CRLF but
144    for compatibility with the widest range of clients recommends
145    servers also handle LF.  Similarly, whitespace in the request line
146    is treated sensibly (allowing multiple spaces between components
147    and allowing trailing whitespace).
148
149    Similarly, for output, lines ought to be separated by CRLF pairs
150    but most clients grok LF characters just fine.
151
152    If the first line of the request has the form
153
154    <command> <path>
155
156    (i.e. <version> is left out) then this is assumed to be an HTTP
157    0.9 request; this form has no optional headers and data part and
158    the reply consists of just the data.
159
160    The reply form of the HTTP 1.x protocol again has three parts:
161
162    1. One line giving the response code
163    2. An optional set of RFC-822-style headers
164    3. The data
165
166    Again, the headers and data are separated by a blank line.
167
168    The response code line has the form
169
170    <version> <responsecode> <responsestring>
171
172    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
173    <responsecode> is a 3-digit response code indicating success or
174    failure of the request, and <responsestring> is an optional
175    human-readable string explaining what the response code means.
176
177    This server parses the request and the headers, and then calls a
178    function specific to the request type (<command>).  Specifically,
179    a request SPAM will be handled by a method do_SPAM().  If no
180    such method exists the server sends an error response to the
181    client.  If it exists, it is called with no arguments:
182
183    do_SPAM()
184
185    Note that the request name is case sensitive (i.e. SPAM and spam
186    are different requests).
187
188    The various request details are stored in instance variables:
189
190    - client_address is the client IP address in the form (host,
191    port);
192
193    - command, path and version are the broken-down request line;
194
195    - headers is an instance of mimetools.Message (or a derived
196    class) containing the header information;
197
198    - rfile is a file object open for reading positioned at the
199    start of the optional input data part;
200
201    - wfile is a file object open for writing.
202
203    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
204
205    The first thing to be written must be the response line.  Then
206    follow 0 or more header lines, then a blank line, and then the
207    actual data (if any).  The meaning of the header lines depends on
208    the command executed by the server; in most cases, when data is
209    returned, there should be at least one header line of the form
210
211    Content-type: <type>/<subtype>
212
213    where <type> and <subtype> should be registered MIME types,
214    e.g. "text/html" or "text/plain".
215
216    """
217
218    # The Python system version, truncated to its first component.
219    sys_version = "Python/" + sys.version.split()[0]
220
221    # The server software version.  You may want to override this.
222    # The format is multiple whitespace-separated strings,
223    # where each string is of the form name[/version].
224    server_version = "BaseHTTP/" + __version__
225
226    # The default request version.  This only affects responses up until
227    # the point where the request line is parsed, so it mainly decides what
228    # the client gets back when sending a malformed request line.
229    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
230    default_request_version = "HTTP/0.9"
231
232    def parse_request(self):
233        """Parse a request (internal).
234
235        The request should be stored in self.raw_requestline; the results
236        are in self.command, self.path, self.request_version and
237        self.headers.
238
239        Return True for success, False for failure; on failure, an
240        error is sent back.
241
242        """
243        self.command = None  # set in case of error on the first line
244        self.request_version = version = self.default_request_version
245        self.close_connection = 1
246        requestline = self.raw_requestline
247        if requestline[-2:] == '\r\n':
248            requestline = requestline[:-2]
249        elif requestline[-1:] == '\n':
250            requestline = requestline[:-1]
251        self.requestline = requestline
252        words = requestline.split()
253        if len(words) == 3:
254            [command, path, version] = words
255            if version[:5] != 'HTTP/':
256                self.send_error(400, "Bad request version (%r)" % version)
257                return False
258            try:
259                base_version_number = version.split('/', 1)[1]
260                version_number = base_version_number.split(".")
261                # RFC 2145 section 3.1 says there can be only one "." and
262                #   - major and minor numbers MUST be treated as
263                #      separate integers;
264                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
265                #      turn is lower than HTTP/12.3;
266                #   - Leading zeros MUST be ignored by recipients.
267                if len(version_number) != 2:
268                    raise ValueError
269                version_number = int(version_number[0]), int(version_number[1])
270            except (ValueError, IndexError):
271                self.send_error(400, "Bad request version (%r)" % version)
272                return False
273            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
274                self.close_connection = 0
275            if version_number >= (2, 0):
276                self.send_error(505,
277                          "Invalid HTTP Version (%s)" % base_version_number)
278                return False
279        elif len(words) == 2:
280            [command, path] = words
281            self.close_connection = 1
282            if command != 'GET':
283                self.send_error(400,
284                                "Bad HTTP/0.9 request type (%r)" % command)
285                return False
286        elif not words:
287            return False
288        else:
289            self.send_error(400, "Bad request syntax (%r)" % requestline)
290            return False
291        self.command, self.path, self.request_version = command, path, version
292
293        # Examine the headers and look for a Connection directive
294        self.headers = self.MessageClass(self.rfile, 0)
295
296        conntype = self.headers.get('Connection', "")
297        if conntype.lower() == 'close':
298            self.close_connection = 1
299        elif (conntype.lower() == 'keep-alive' and
300              self.protocol_version >= "HTTP/1.1"):
301            self.close_connection = 0
302        return True
303
304    def handle_one_request(self):
305        """Handle a single HTTP request.
306
307        You normally don't need to override this method; see the class
308        __doc__ string for information on how to handle specific HTTP
309        commands such as GET and POST.
310
311        """
312        try:
313            self.raw_requestline = self.rfile.readline(65537)
314            if len(self.raw_requestline) > 65536:
315                self.requestline = ''
316                self.request_version = ''
317                self.command = ''
318                self.send_error(414)
319                return
320            if not self.raw_requestline:
321                self.close_connection = 1
322                return
323            if not self.parse_request():
324                # An error code has been sent, just exit
325                return
326            mname = 'do_' + self.command
327            if not hasattr(self, mname):
328                self.send_error(501, "Unsupported method (%r)" % self.command)
329                return
330            method = getattr(self, mname)
331            method()
332            self.wfile.flush() #actually send the response if not already done.
333        except socket.timeout, e:
334            #a read or a write timed out.  Discard this connection
335            self.log_error("Request timed out: %r", e)
336            self.close_connection = 1
337            return
338
339    def handle(self):
340        """Handle multiple requests if necessary."""
341        self.close_connection = 1
342
343        self.handle_one_request()
344        while not self.close_connection:
345            self.handle_one_request()
346
347    def send_error(self, code, message=None):
348        """Send and log an error reply.
349
350        Arguments are the error code, and a detailed message.
351        The detailed message defaults to the short entry matching the
352        response code.
353
354        This sends an error response (so it must be called before any
355        output has been generated), logs the error, and finally sends
356        a piece of HTML explaining the error to the user.
357
358        """
359
360        try:
361            short, long = self.responses[code]
362        except KeyError:
363            short, long = '???', '???'
364        if message is None:
365            message = short
366        explain = long
367        self.log_error("code %d, message %s", code, message)
368        # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
369        content = (self.error_message_format %
370                   {'code': code, 'message': _quote_html(message), 'explain': explain})
371        self.send_response(code, message)
372        self.send_header("Content-Type", self.error_content_type)
373        self.send_header('Connection', 'close')
374        self.end_headers()
375        if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
376            self.wfile.write(content)
377
378    error_message_format = DEFAULT_ERROR_MESSAGE
379    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
380
381    def send_response(self, code, message=None):
382        """Send the response header and log the response code.
383
384        Also send two standard headers with the server software
385        version and the current date.
386
387        """
388        self.log_request(code)
389        if message is None:
390            if code in self.responses:
391                message = self.responses[code][0]
392            else:
393                message = ''
394        if self.request_version != 'HTTP/0.9':
395            self.wfile.write("%s %d %s\r\n" %
396                             (self.protocol_version, code, message))
397            # print (self.protocol_version, code, message)
398        self.send_header('Server', self.version_string())
399        self.send_header('Date', self.date_time_string())
400
401    def send_header(self, keyword, value):
402        """Send a MIME header."""
403        if self.request_version != 'HTTP/0.9':
404            self.wfile.write("%s: %s\r\n" % (keyword, value))
405
406        if keyword.lower() == 'connection':
407            if value.lower() == 'close':
408                self.close_connection = 1
409            elif value.lower() == 'keep-alive':
410                self.close_connection = 0
411
412    def end_headers(self):
413        """Send the blank line ending the MIME headers."""
414        if self.request_version != 'HTTP/0.9':
415            self.wfile.write("\r\n")
416
417    def log_request(self, code='-', size='-'):
418        """Log an accepted request.
419
420        This is called by send_response().
421
422        """
423
424        self.log_message('"%s" %s %s',
425                         self.requestline, str(code), str(size))
426
427    def log_error(self, format, *args):
428        """Log an error.
429
430        This is called when a request cannot be fulfilled.  By
431        default it passes the message on to log_message().
432
433        Arguments are the same as for log_message().
434
435        XXX This should go to the separate error log.
436
437        """
438
439        self.log_message(format, *args)
440
441    def log_message(self, format, *args):
442        """Log an arbitrary message.
443
444        This is used by all other logging functions.  Override
445        it if you have specific logging wishes.
446
447        The first argument, FORMAT, is a format string for the
448        message to be logged.  If the format string contains
449        any % escapes requiring parameters, they should be
450        specified as subsequent arguments (it's just like
451        printf!).
452
453        The client host and current date/time are prefixed to
454        every message.
455
456        """
457
458        sys.stderr.write("%s - - [%s] %s\n" %
459                         (self.address_string(),
460                          self.log_date_time_string(),
461                          format%args))
462
463    def version_string(self):
464        """Return the server software version string."""
465        return self.server_version + ' ' + self.sys_version
466
467    def date_time_string(self, timestamp=None):
468        """Return the current date and time formatted for a message header."""
469        if timestamp is None:
470            timestamp = time.time()
471        year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
472        s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
473                self.weekdayname[wd],
474                day, self.monthname[month], year,
475                hh, mm, ss)
476        return s
477
478    def log_date_time_string(self):
479        """Return the current time formatted for logging."""
480        now = time.time()
481        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
482        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
483                day, self.monthname[month], year, hh, mm, ss)
484        return s
485
486    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
487
488    monthname = [None,
489                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
490                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
491
492    def address_string(self):
493        """Return the client address formatted for logging.
494
495        This version looks up the full hostname using gethostbyaddr(),
496        and tries to find a name that contains at least one dot.
497
498        """
499
500        host, port = self.client_address[:2]
501        return socket.getfqdn(host)
502
503    # Essentially static class variables
504
505    # The version of the HTTP protocol we support.
506    # Set this to HTTP/1.1 to enable automatic keepalive
507    protocol_version = "HTTP/1.0"
508
509    # The Message-like class used to parse headers
510    MessageClass = mimetools.Message
511
512    # Table mapping response codes to messages; entries have the
513    # form {code: (shortmessage, longmessage)}.
514    # See RFC 2616.
515    responses = {
516        100: ('Continue', 'Request received, please continue'),
517        101: ('Switching Protocols',
518              'Switching to new protocol; obey Upgrade header'),
519
520        200: ('OK', 'Request fulfilled, document follows'),
521        201: ('Created', 'Document created, URL follows'),
522        202: ('Accepted',
523              'Request accepted, processing continues off-line'),
524        203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
525        204: ('No Content', 'Request fulfilled, nothing follows'),
526        205: ('Reset Content', 'Clear input form for further input.'),
527        206: ('Partial Content', 'Partial content follows.'),
528
529        300: ('Multiple Choices',
530              'Object has several resources -- see URI list'),
531        301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
532        302: ('Found', 'Object moved temporarily -- see URI list'),
533        303: ('See Other', 'Object moved -- see Method and URL list'),
534        304: ('Not Modified',
535              'Document has not changed since given time'),
536        305: ('Use Proxy',
537              'You must use proxy specified in Location to access this '
538              'resource.'),
539        307: ('Temporary Redirect',
540              'Object moved temporarily -- see URI list'),
541
542        400: ('Bad Request',
543              'Bad request syntax or unsupported method'),
544        401: ('Unauthorized',
545              'No permission -- see authorization schemes'),
546        402: ('Payment Required',
547              'No payment -- see charging schemes'),
548        403: ('Forbidden',
549              'Request forbidden -- authorization will not help'),
550        404: ('Not Found', 'Nothing matches the given URI'),
551        405: ('Method Not Allowed',
552              'Specified method is invalid for this resource.'),
553        406: ('Not Acceptable', 'URI not available in preferred format.'),
554        407: ('Proxy Authentication Required', 'You must authenticate with '
555              'this proxy before proceeding.'),
556        408: ('Request Timeout', 'Request timed out; try again later.'),
557        409: ('Conflict', 'Request conflict.'),
558        410: ('Gone',
559              'URI no longer exists and has been permanently removed.'),
560        411: ('Length Required', 'Client must specify Content-Length.'),
561        412: ('Precondition Failed', 'Precondition in headers is false.'),
562        413: ('Request Entity Too Large', 'Entity is too large.'),
563        414: ('Request-URI Too Long', 'URI is too long.'),
564        415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
565        416: ('Requested Range Not Satisfiable',
566              'Cannot satisfy request range.'),
567        417: ('Expectation Failed',
568              'Expect condition could not be satisfied.'),
569
570        500: ('Internal Server Error', 'Server got itself in trouble'),
571        501: ('Not Implemented',
572              'Server does not support this operation'),
573        502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
574        503: ('Service Unavailable',
575              'The server cannot process the request due to a high load'),
576        504: ('Gateway Timeout',
577              'The gateway server did not receive a timely response'),
578        505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
579        }
580
581
582def test(HandlerClass = BaseHTTPRequestHandler,
583         ServerClass = HTTPServer, protocol="HTTP/1.0"):
584    """Test the HTTP request handler class.
585
586    This runs an HTTP server on port 8000 (or the first command line
587    argument).
588
589    """
590
591    if sys.argv[1:]:
592        port = int(sys.argv[1])
593    else:
594        port = 8000
595    server_address = ('', port)
596
597    HandlerClass.protocol_version = protocol
598    httpd = ServerClass(server_address, HandlerClass)
599
600    sa = httpd.socket.getsockname()
601    print "Serving HTTP on", sa[0], "port", sa[1], "..."
602    httpd.serve_forever()
603
604
605if __name__ == '__main__':
606    test()
607