1"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
16subprocess.Popen() is used as a fallback, with slightly altered semantics.
17
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group                                        T. Berners-Lee
38# INTERNET-DRAFT                                            R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
40# Expires September 8, 1995                                  March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group                                      R. Fielding
47# Request for Comments: 2616                                       et al
48# Obsoletes: 2068                                              June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# |        host: Either the DNS name or the IP number of the remote client
63# |        rfc931: Any information returned by identd for this person,
64# |                - otherwise.
65# |        authuser: If user sent a userid for authentication, the user name,
66# |                  - otherwise.
67# |        DD: Day
68# |        Mon: Month (calendar name)
69# |        YYYY: Year
70# |        hh: hour (24-hour format, the machine's timezone)
71# |        mm: minutes
72# |        ss: seconds
73# |        request: The first line of the HTTP request as sent by the client.
74# |        ddd: the status code returned by the server, - if not available.
75# |        bbbb: the total number of bytes sent,
76# |              *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = [
86    "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88]
89
90import copy
91import datetime
92import email.utils
93import html
94import http.client
95import io
96import mimetypes
97import os
98import posixpath
99import select
100import shutil
101import socket # For gethostbyaddr()
102import socketserver
103import sys
104import time
105import urllib.parse
106import contextlib
107from functools import partial
108
109from http import HTTPStatus
110
111
112# Default error message template
113DEFAULT_ERROR_MESSAGE = """\
114<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
115        "http://www.w3.org/TR/html4/strict.dtd">
116<html>
117    <head>
118        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
119        <title>Error response</title>
120    </head>
121    <body>
122        <h1>Error response</h1>
123        <p>Error code: %(code)d</p>
124        <p>Message: %(message)s.</p>
125        <p>Error code explanation: %(code)s - %(explain)s.</p>
126    </body>
127</html>
128"""
129
130DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
131
132class HTTPServer(socketserver.TCPServer):
133
134    allow_reuse_address = 1    # Seems to make sense in testing environment
135
136    def server_bind(self):
137        """Override server_bind to store the server name."""
138        socketserver.TCPServer.server_bind(self)
139        host, port = self.server_address[:2]
140        self.server_name = socket.getfqdn(host)
141        self.server_port = port
142
143
144class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
145    daemon_threads = True
146
147
148class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
149
150    """HTTP request handler base class.
151
152    The following explanation of HTTP serves to guide you through the
153    code as well as to expose any misunderstandings I may have about
154    HTTP (so you don't need to read the code to figure out I'm wrong
155    :-).
156
157    HTTP (HyperText Transfer Protocol) is an extensible protocol on
158    top of a reliable stream transport (e.g. TCP/IP).  The protocol
159    recognizes three parts to a request:
160
161    1. One line identifying the request type and path
162    2. An optional set of RFC-822-style headers
163    3. An optional data part
164
165    The headers and data are separated by a blank line.
166
167    The first line of the request has the form
168
169    <command> <path> <version>
170
171    where <command> is a (case-sensitive) keyword such as GET or POST,
172    <path> is a string containing path information for the request,
173    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
174    <path> is encoded using the URL encoding scheme (using %xx to signify
175    the ASCII character with hex code xx).
176
177    The specification specifies that lines are separated by CRLF but
178    for compatibility with the widest range of clients recommends
179    servers also handle LF.  Similarly, whitespace in the request line
180    is treated sensibly (allowing multiple spaces between components
181    and allowing trailing whitespace).
182
183    Similarly, for output, lines ought to be separated by CRLF pairs
184    but most clients grok LF characters just fine.
185
186    If the first line of the request has the form
187
188    <command> <path>
189
190    (i.e. <version> is left out) then this is assumed to be an HTTP
191    0.9 request; this form has no optional headers and data part and
192    the reply consists of just the data.
193
194    The reply form of the HTTP 1.x protocol again has three parts:
195
196    1. One line giving the response code
197    2. An optional set of RFC-822-style headers
198    3. The data
199
200    Again, the headers and data are separated by a blank line.
201
202    The response code line has the form
203
204    <version> <responsecode> <responsestring>
205
206    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
207    <responsecode> is a 3-digit response code indicating success or
208    failure of the request, and <responsestring> is an optional
209    human-readable string explaining what the response code means.
210
211    This server parses the request and the headers, and then calls a
212    function specific to the request type (<command>).  Specifically,
213    a request SPAM will be handled by a method do_SPAM().  If no
214    such method exists the server sends an error response to the
215    client.  If it exists, it is called with no arguments:
216
217    do_SPAM()
218
219    Note that the request name is case sensitive (i.e. SPAM and spam
220    are different requests).
221
222    The various request details are stored in instance variables:
223
224    - client_address is the client IP address in the form (host,
225    port);
226
227    - command, path and version are the broken-down request line;
228
229    - headers is an instance of email.message.Message (or a derived
230    class) containing the header information;
231
232    - rfile is a file object open for reading positioned at the
233    start of the optional input data part;
234
235    - wfile is a file object open for writing.
236
237    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
238
239    The first thing to be written must be the response line.  Then
240    follow 0 or more header lines, then a blank line, and then the
241    actual data (if any).  The meaning of the header lines depends on
242    the command executed by the server; in most cases, when data is
243    returned, there should be at least one header line of the form
244
245    Content-type: <type>/<subtype>
246
247    where <type> and <subtype> should be registered MIME types,
248    e.g. "text/html" or "text/plain".
249
250    """
251
252    # The Python system version, truncated to its first component.
253    sys_version = "Python/" + sys.version.split()[0]
254
255    # The server software version.  You may want to override this.
256    # The format is multiple whitespace-separated strings,
257    # where each string is of the form name[/version].
258    server_version = "BaseHTTP/" + __version__
259
260    error_message_format = DEFAULT_ERROR_MESSAGE
261    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
262
263    # The default request version.  This only affects responses up until
264    # the point where the request line is parsed, so it mainly decides what
265    # the client gets back when sending a malformed request line.
266    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
267    default_request_version = "HTTP/0.9"
268
269    def parse_request(self):
270        """Parse a request (internal).
271
272        The request should be stored in self.raw_requestline; the results
273        are in self.command, self.path, self.request_version and
274        self.headers.
275
276        Return True for success, False for failure; on failure, any relevant
277        error response has already been sent back.
278
279        """
280        self.command = None  # set in case of error on the first line
281        self.request_version = version = self.default_request_version
282        self.close_connection = True
283        requestline = str(self.raw_requestline, 'iso-8859-1')
284        requestline = requestline.rstrip('\r\n')
285        self.requestline = requestline
286        words = requestline.split()
287        if len(words) == 0:
288            return False
289
290        if len(words) >= 3:  # Enough to determine protocol version
291            version = words[-1]
292            try:
293                if not version.startswith('HTTP/'):
294                    raise ValueError
295                base_version_number = version.split('/', 1)[1]
296                version_number = base_version_number.split(".")
297                # RFC 2145 section 3.1 says there can be only one "." and
298                #   - major and minor numbers MUST be treated as
299                #      separate integers;
300                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
301                #      turn is lower than HTTP/12.3;
302                #   - Leading zeros MUST be ignored by recipients.
303                if len(version_number) != 2:
304                    raise ValueError
305                version_number = int(version_number[0]), int(version_number[1])
306            except (ValueError, IndexError):
307                self.send_error(
308                    HTTPStatus.BAD_REQUEST,
309                    "Bad request version (%r)" % version)
310                return False
311            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
312                self.close_connection = False
313            if version_number >= (2, 0):
314                self.send_error(
315                    HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
316                    "Invalid HTTP version (%s)" % base_version_number)
317                return False
318            self.request_version = version
319
320        if not 2 <= len(words) <= 3:
321            self.send_error(
322                HTTPStatus.BAD_REQUEST,
323                "Bad request syntax (%r)" % requestline)
324            return False
325        command, path = words[:2]
326        if len(words) == 2:
327            self.close_connection = True
328            if command != 'GET':
329                self.send_error(
330                    HTTPStatus.BAD_REQUEST,
331                    "Bad HTTP/0.9 request type (%r)" % command)
332                return False
333        self.command, self.path = command, path
334
335        # Examine the headers and look for a Connection directive.
336        try:
337            self.headers = http.client.parse_headers(self.rfile,
338                                                     _class=self.MessageClass)
339        except http.client.LineTooLong as err:
340            self.send_error(
341                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
342                "Line too long",
343                str(err))
344            return False
345        except http.client.HTTPException as err:
346            self.send_error(
347                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
348                "Too many headers",
349                str(err)
350            )
351            return False
352
353        conntype = self.headers.get('Connection', "")
354        if conntype.lower() == 'close':
355            self.close_connection = True
356        elif (conntype.lower() == 'keep-alive' and
357              self.protocol_version >= "HTTP/1.1"):
358            self.close_connection = False
359        # Examine the headers and look for an Expect directive
360        expect = self.headers.get('Expect', "")
361        if (expect.lower() == "100-continue" and
362                self.protocol_version >= "HTTP/1.1" and
363                self.request_version >= "HTTP/1.1"):
364            if not self.handle_expect_100():
365                return False
366        return True
367
368    def handle_expect_100(self):
369        """Decide what to do with an "Expect: 100-continue" header.
370
371        If the client is expecting a 100 Continue response, we must
372        respond with either a 100 Continue or a final response before
373        waiting for the request body. The default is to always respond
374        with a 100 Continue. You can behave differently (for example,
375        reject unauthorized requests) by overriding this method.
376
377        This method should either return True (possibly after sending
378        a 100 Continue response) or send an error response and return
379        False.
380
381        """
382        self.send_response_only(HTTPStatus.CONTINUE)
383        self.end_headers()
384        return True
385
386    def handle_one_request(self):
387        """Handle a single HTTP request.
388
389        You normally don't need to override this method; see the class
390        __doc__ string for information on how to handle specific HTTP
391        commands such as GET and POST.
392
393        """
394        try:
395            self.raw_requestline = self.rfile.readline(65537)
396            if len(self.raw_requestline) > 65536:
397                self.requestline = ''
398                self.request_version = ''
399                self.command = ''
400                self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
401                return
402            if not self.raw_requestline:
403                self.close_connection = True
404                return
405            if not self.parse_request():
406                # An error code has been sent, just exit
407                return
408            mname = 'do_' + self.command
409            if not hasattr(self, mname):
410                self.send_error(
411                    HTTPStatus.NOT_IMPLEMENTED,
412                    "Unsupported method (%r)" % self.command)
413                return
414            method = getattr(self, mname)
415            method()
416            self.wfile.flush() #actually send the response if not already done.
417        except socket.timeout as e:
418            #a read or a write timed out.  Discard this connection
419            self.log_error("Request timed out: %r", e)
420            self.close_connection = True
421            return
422
423    def handle(self):
424        """Handle multiple requests if necessary."""
425        self.close_connection = True
426
427        self.handle_one_request()
428        while not self.close_connection:
429            self.handle_one_request()
430
431    def send_error(self, code, message=None, explain=None):
432        """Send and log an error reply.
433
434        Arguments are
435        * code:    an HTTP error code
436                   3 digits
437        * message: a simple optional 1 line reason phrase.
438                   *( HTAB / SP / VCHAR / %x80-FF )
439                   defaults to short entry matching the response code
440        * explain: a detailed message defaults to the long entry
441                   matching the response code.
442
443        This sends an error response (so it must be called before any
444        output has been generated), logs the error, and finally sends
445        a piece of HTML explaining the error to the user.
446
447        """
448
449        try:
450            shortmsg, longmsg = self.responses[code]
451        except KeyError:
452            shortmsg, longmsg = '???', '???'
453        if message is None:
454            message = shortmsg
455        if explain is None:
456            explain = longmsg
457        self.log_error("code %d, message %s", code, message)
458        self.send_response(code, message)
459        self.send_header('Connection', 'close')
460
461        # Message body is omitted for cases described in:
462        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
463        #  - RFC7231: 6.3.6. 205(Reset Content)
464        body = None
465        if (code >= 200 and
466            code not in (HTTPStatus.NO_CONTENT,
467                         HTTPStatus.RESET_CONTENT,
468                         HTTPStatus.NOT_MODIFIED)):
469            # HTML encode to prevent Cross Site Scripting attacks
470            # (see bug #1100201)
471            content = (self.error_message_format % {
472                'code': code,
473                'message': html.escape(message, quote=False),
474                'explain': html.escape(explain, quote=False)
475            })
476            body = content.encode('UTF-8', 'replace')
477            self.send_header("Content-Type", self.error_content_type)
478            self.send_header('Content-Length', str(len(body)))
479        self.end_headers()
480
481        if self.command != 'HEAD' and body:
482            self.wfile.write(body)
483
484    def send_response(self, code, message=None):
485        """Add the response header to the headers buffer and log the
486        response code.
487
488        Also send two standard headers with the server software
489        version and the current date.
490
491        """
492        self.log_request(code)
493        self.send_response_only(code, message)
494        self.send_header('Server', self.version_string())
495        self.send_header('Date', self.date_time_string())
496
497    def send_response_only(self, code, message=None):
498        """Send the response header only."""
499        if self.request_version != 'HTTP/0.9':
500            if message is None:
501                if code in self.responses:
502                    message = self.responses[code][0]
503                else:
504                    message = ''
505            if not hasattr(self, '_headers_buffer'):
506                self._headers_buffer = []
507            self._headers_buffer.append(("%s %d %s\r\n" %
508                    (self.protocol_version, code, message)).encode(
509                        'latin-1', 'strict'))
510
511    def send_header(self, keyword, value):
512        """Send a MIME header to the headers buffer."""
513        if self.request_version != 'HTTP/0.9':
514            if not hasattr(self, '_headers_buffer'):
515                self._headers_buffer = []
516            self._headers_buffer.append(
517                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
518
519        if keyword.lower() == 'connection':
520            if value.lower() == 'close':
521                self.close_connection = True
522            elif value.lower() == 'keep-alive':
523                self.close_connection = False
524
525    def end_headers(self):
526        """Send the blank line ending the MIME headers."""
527        if self.request_version != 'HTTP/0.9':
528            self._headers_buffer.append(b"\r\n")
529            self.flush_headers()
530
531    def flush_headers(self):
532        if hasattr(self, '_headers_buffer'):
533            self.wfile.write(b"".join(self._headers_buffer))
534            self._headers_buffer = []
535
536    def log_request(self, code='-', size='-'):
537        """Log an accepted request.
538
539        This is called by send_response().
540
541        """
542        if isinstance(code, HTTPStatus):
543            code = code.value
544        self.log_message('"%s" %s %s',
545                         self.requestline, str(code), str(size))
546
547    def log_error(self, format, *args):
548        """Log an error.
549
550        This is called when a request cannot be fulfilled.  By
551        default it passes the message on to log_message().
552
553        Arguments are the same as for log_message().
554
555        XXX This should go to the separate error log.
556
557        """
558
559        self.log_message(format, *args)
560
561    def log_message(self, format, *args):
562        """Log an arbitrary message.
563
564        This is used by all other logging functions.  Override
565        it if you have specific logging wishes.
566
567        The first argument, FORMAT, is a format string for the
568        message to be logged.  If the format string contains
569        any % escapes requiring parameters, they should be
570        specified as subsequent arguments (it's just like
571        printf!).
572
573        The client ip and current date/time are prefixed to
574        every message.
575
576        """
577
578        sys.stderr.write("%s - - [%s] %s\n" %
579                         (self.address_string(),
580                          self.log_date_time_string(),
581                          format%args))
582
583    def version_string(self):
584        """Return the server software version string."""
585        return self.server_version + ' ' + self.sys_version
586
587    def date_time_string(self, timestamp=None):
588        """Return the current date and time formatted for a message header."""
589        if timestamp is None:
590            timestamp = time.time()
591        return email.utils.formatdate(timestamp, usegmt=True)
592
593    def log_date_time_string(self):
594        """Return the current time formatted for logging."""
595        now = time.time()
596        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
597        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
598                day, self.monthname[month], year, hh, mm, ss)
599        return s
600
601    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
602
603    monthname = [None,
604                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
605                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
606
607    def address_string(self):
608        """Return the client address."""
609
610        return self.client_address[0]
611
612    # Essentially static class variables
613
614    # The version of the HTTP protocol we support.
615    # Set this to HTTP/1.1 to enable automatic keepalive
616    protocol_version = "HTTP/1.0"
617
618    # MessageClass used to parse headers
619    MessageClass = http.client.HTTPMessage
620
621    # hack to maintain backwards compatibility
622    responses = {
623        v: (v.phrase, v.description)
624        for v in HTTPStatus.__members__.values()
625    }
626
627
628class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
629
630    """Simple HTTP request handler with GET and HEAD commands.
631
632    This serves files from the current directory and any of its
633    subdirectories.  The MIME type for files is determined by
634    calling the .guess_type() method.
635
636    The GET and HEAD requests are identical except that the HEAD
637    request omits the actual contents of the file.
638
639    """
640
641    server_version = "SimpleHTTP/" + __version__
642    extensions_map = _encodings_map_default = {
643        '.gz': 'application/gzip',
644        '.Z': 'application/octet-stream',
645        '.bz2': 'application/x-bzip2',
646        '.xz': 'application/x-xz',
647    }
648
649    def __init__(self, *args, directory=None, **kwargs):
650        if directory is None:
651            directory = os.getcwd()
652        self.directory = os.fspath(directory)
653        super().__init__(*args, **kwargs)
654
655    def do_GET(self):
656        """Serve a GET request."""
657        f = self.send_head()
658        if f:
659            try:
660                self.copyfile(f, self.wfile)
661            finally:
662                f.close()
663
664    def do_HEAD(self):
665        """Serve a HEAD request."""
666        f = self.send_head()
667        if f:
668            f.close()
669
670    def send_head(self):
671        """Common code for GET and HEAD commands.
672
673        This sends the response code and MIME headers.
674
675        Return value is either a file object (which has to be copied
676        to the outputfile by the caller unless the command was HEAD,
677        and must be closed by the caller under all circumstances), or
678        None, in which case the caller has nothing further to do.
679
680        """
681        path = self.translate_path(self.path)
682        f = None
683        if os.path.isdir(path):
684            parts = urllib.parse.urlsplit(self.path)
685            if not parts.path.endswith('/'):
686                # redirect browser - doing basically what apache does
687                self.send_response(HTTPStatus.MOVED_PERMANENTLY)
688                new_parts = (parts[0], parts[1], parts[2] + '/',
689                             parts[3], parts[4])
690                new_url = urllib.parse.urlunsplit(new_parts)
691                self.send_header("Location", new_url)
692                self.end_headers()
693                return None
694            for index in "index.html", "index.htm":
695                index = os.path.join(path, index)
696                if os.path.exists(index):
697                    path = index
698                    break
699            else:
700                return self.list_directory(path)
701        ctype = self.guess_type(path)
702        # check for trailing "/" which should return 404. See Issue17324
703        # The test for this was added in test_httpserver.py
704        # However, some OS platforms accept a trailingSlash as a filename
705        # See discussion on python-dev and Issue34711 regarding
706        # parseing and rejection of filenames with a trailing slash
707        if path.endswith("/"):
708            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
709            return None
710        try:
711            f = open(path, 'rb')
712        except OSError:
713            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
714            return None
715
716        try:
717            fs = os.fstat(f.fileno())
718            # Use browser cache if possible
719            if ("If-Modified-Since" in self.headers
720                    and "If-None-Match" not in self.headers):
721                # compare If-Modified-Since and time of last file modification
722                try:
723                    ims = email.utils.parsedate_to_datetime(
724                        self.headers["If-Modified-Since"])
725                except (TypeError, IndexError, OverflowError, ValueError):
726                    # ignore ill-formed values
727                    pass
728                else:
729                    if ims.tzinfo is None:
730                        # obsolete format with no timezone, cf.
731                        # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
732                        ims = ims.replace(tzinfo=datetime.timezone.utc)
733                    if ims.tzinfo is datetime.timezone.utc:
734                        # compare to UTC datetime of last modification
735                        last_modif = datetime.datetime.fromtimestamp(
736                            fs.st_mtime, datetime.timezone.utc)
737                        # remove microseconds, like in If-Modified-Since
738                        last_modif = last_modif.replace(microsecond=0)
739
740                        if last_modif <= ims:
741                            self.send_response(HTTPStatus.NOT_MODIFIED)
742                            self.end_headers()
743                            f.close()
744                            return None
745
746            self.send_response(HTTPStatus.OK)
747            self.send_header("Content-type", ctype)
748            self.send_header("Content-Length", str(fs[6]))
749            self.send_header("Last-Modified",
750                self.date_time_string(fs.st_mtime))
751            self.end_headers()
752            return f
753        except:
754            f.close()
755            raise
756
757    def list_directory(self, path):
758        """Helper to produce a directory listing (absent index.html).
759
760        Return value is either a file object, or None (indicating an
761        error).  In either case, the headers are sent, making the
762        interface the same as for send_head().
763
764        """
765        try:
766            list = os.listdir(path)
767        except OSError:
768            self.send_error(
769                HTTPStatus.NOT_FOUND,
770                "No permission to list directory")
771            return None
772        list.sort(key=lambda a: a.lower())
773        r = []
774        try:
775            displaypath = urllib.parse.unquote(self.path,
776                                               errors='surrogatepass')
777        except UnicodeDecodeError:
778            displaypath = urllib.parse.unquote(path)
779        displaypath = html.escape(displaypath, quote=False)
780        enc = sys.getfilesystemencoding()
781        title = 'Directory listing for %s' % displaypath
782        r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
783                 '"http://www.w3.org/TR/html4/strict.dtd">')
784        r.append('<html>\n<head>')
785        r.append('<meta http-equiv="Content-Type" '
786                 'content="text/html; charset=%s">' % enc)
787        r.append('<title>%s</title>\n</head>' % title)
788        r.append('<body>\n<h1>%s</h1>' % title)
789        r.append('<hr>\n<ul>')
790        for name in list:
791            fullname = os.path.join(path, name)
792            displayname = linkname = name
793            # Append / for directories or @ for symbolic links
794            if os.path.isdir(fullname):
795                displayname = name + "/"
796                linkname = name + "/"
797            if os.path.islink(fullname):
798                displayname = name + "@"
799                # Note: a link to a directory displays with @ and links with /
800            r.append('<li><a href="%s">%s</a></li>'
801                    % (urllib.parse.quote(linkname,
802                                          errors='surrogatepass'),
803                       html.escape(displayname, quote=False)))
804        r.append('</ul>\n<hr>\n</body>\n</html>\n')
805        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
806        f = io.BytesIO()
807        f.write(encoded)
808        f.seek(0)
809        self.send_response(HTTPStatus.OK)
810        self.send_header("Content-type", "text/html; charset=%s" % enc)
811        self.send_header("Content-Length", str(len(encoded)))
812        self.end_headers()
813        return f
814
815    def translate_path(self, path):
816        """Translate a /-separated PATH to the local filename syntax.
817
818        Components that mean special things to the local file system
819        (e.g. drive or directory names) are ignored.  (XXX They should
820        probably be diagnosed.)
821
822        """
823        # abandon query parameters
824        path = path.split('?',1)[0]
825        path = path.split('#',1)[0]
826        # Don't forget explicit trailing slash when normalizing. Issue17324
827        trailing_slash = path.rstrip().endswith('/')
828        try:
829            path = urllib.parse.unquote(path, errors='surrogatepass')
830        except UnicodeDecodeError:
831            path = urllib.parse.unquote(path)
832        path = posixpath.normpath(path)
833        words = path.split('/')
834        words = filter(None, words)
835        path = self.directory
836        for word in words:
837            if os.path.dirname(word) or word in (os.curdir, os.pardir):
838                # Ignore components that are not a simple file/directory name
839                continue
840            path = os.path.join(path, word)
841        if trailing_slash:
842            path += '/'
843        return path
844
845    def copyfile(self, source, outputfile):
846        """Copy all data between two file objects.
847
848        The SOURCE argument is a file object open for reading
849        (or anything with a read() method) and the DESTINATION
850        argument is a file object open for writing (or
851        anything with a write() method).
852
853        The only reason for overriding this would be to change
854        the block size or perhaps to replace newlines by CRLF
855        -- note however that this the default server uses this
856        to copy binary data as well.
857
858        """
859        shutil.copyfileobj(source, outputfile)
860
861    def guess_type(self, path):
862        """Guess the type of a file.
863
864        Argument is a PATH (a filename).
865
866        Return value is a string of the form type/subtype,
867        usable for a MIME Content-type header.
868
869        The default implementation looks the file's extension
870        up in the table self.extensions_map, using application/octet-stream
871        as a default; however it would be permissible (if
872        slow) to look inside the data to make a better guess.
873
874        """
875        base, ext = posixpath.splitext(path)
876        if ext in self.extensions_map:
877            return self.extensions_map[ext]
878        ext = ext.lower()
879        if ext in self.extensions_map:
880            return self.extensions_map[ext]
881        guess, _ = mimetypes.guess_type(path)
882        if guess:
883            return guess
884        return 'application/octet-stream'
885
886
887# Utilities for CGIHTTPRequestHandler
888
889def _url_collapse_path(path):
890    """
891    Given a URL path, remove extra '/'s and '.' path elements and collapse
892    any '..' references and returns a collapsed path.
893
894    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
895    The utility of this function is limited to is_cgi method and helps
896    preventing some security attacks.
897
898    Returns: The reconstituted URL, which will always start with a '/'.
899
900    Raises: IndexError if too many '..' occur within the path.
901
902    """
903    # Query component should not be involved.
904    path, _, query = path.partition('?')
905    path = urllib.parse.unquote(path)
906
907    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
908    # path semantics rather than local operating system semantics.
909    path_parts = path.split('/')
910    head_parts = []
911    for part in path_parts[:-1]:
912        if part == '..':
913            head_parts.pop() # IndexError if more '..' than prior parts
914        elif part and part != '.':
915            head_parts.append( part )
916    if path_parts:
917        tail_part = path_parts.pop()
918        if tail_part:
919            if tail_part == '..':
920                head_parts.pop()
921                tail_part = ''
922            elif tail_part == '.':
923                tail_part = ''
924    else:
925        tail_part = ''
926
927    if query:
928        tail_part = '?'.join((tail_part, query))
929
930    splitpath = ('/' + '/'.join(head_parts), tail_part)
931    collapsed_path = "/".join(splitpath)
932
933    return collapsed_path
934
935
936
937nobody = None
938
939def nobody_uid():
940    """Internal routine to get nobody's uid"""
941    global nobody
942    if nobody:
943        return nobody
944    try:
945        import pwd
946    except ImportError:
947        return -1
948    try:
949        nobody = pwd.getpwnam('nobody')[2]
950    except KeyError:
951        nobody = 1 + max(x[2] for x in pwd.getpwall())
952    return nobody
953
954
955def executable(path):
956    """Test for executable file."""
957    return os.access(path, os.X_OK)
958
959
960class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
961
962    """Complete HTTP server with GET, HEAD and POST commands.
963
964    GET and HEAD also support running CGI scripts.
965
966    The POST command is *only* implemented for CGI scripts.
967
968    """
969
970    # Determine platform specifics
971    have_fork = hasattr(os, 'fork')
972
973    # Make rfile unbuffered -- we need to read one line and then pass
974    # the rest to a subprocess, so we can't use buffered input.
975    rbufsize = 0
976
977    def do_POST(self):
978        """Serve a POST request.
979
980        This is only implemented for CGI scripts.
981
982        """
983
984        if self.is_cgi():
985            self.run_cgi()
986        else:
987            self.send_error(
988                HTTPStatus.NOT_IMPLEMENTED,
989                "Can only POST to CGI scripts")
990
991    def send_head(self):
992        """Version of send_head that support CGI scripts"""
993        if self.is_cgi():
994            return self.run_cgi()
995        else:
996            return SimpleHTTPRequestHandler.send_head(self)
997
998    def is_cgi(self):
999        """Test whether self.path corresponds to a CGI script.
1000
1001        Returns True and updates the cgi_info attribute to the tuple
1002        (dir, rest) if self.path requires running a CGI script.
1003        Returns False otherwise.
1004
1005        If any exception is raised, the caller should assume that
1006        self.path was rejected as invalid and act accordingly.
1007
1008        The default implementation tests whether the normalized url
1009        path begins with one of the strings in self.cgi_directories
1010        (and the next character is a '/' or the end of the string).
1011
1012        """
1013        collapsed_path = _url_collapse_path(self.path)
1014        dir_sep = collapsed_path.find('/', 1)
1015        while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories:
1016            dir_sep = collapsed_path.find('/', dir_sep+1)
1017        if dir_sep > 0:
1018            head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1019            self.cgi_info = head, tail
1020            return True
1021        return False
1022
1023
1024    cgi_directories = ['/cgi-bin', '/htbin']
1025
1026    def is_executable(self, path):
1027        """Test whether argument path is an executable file."""
1028        return executable(path)
1029
1030    def is_python(self, path):
1031        """Test whether argument path is a Python script."""
1032        head, tail = os.path.splitext(path)
1033        return tail.lower() in (".py", ".pyw")
1034
1035    def run_cgi(self):
1036        """Execute a CGI script."""
1037        dir, rest = self.cgi_info
1038        path = dir + '/' + rest
1039        i = path.find('/', len(dir)+1)
1040        while i >= 0:
1041            nextdir = path[:i]
1042            nextrest = path[i+1:]
1043
1044            scriptdir = self.translate_path(nextdir)
1045            if os.path.isdir(scriptdir):
1046                dir, rest = nextdir, nextrest
1047                i = path.find('/', len(dir)+1)
1048            else:
1049                break
1050
1051        # find an explicit query string, if present.
1052        rest, _, query = rest.partition('?')
1053
1054        # dissect the part after the directory name into a script name &
1055        # a possible additional path, to be stored in PATH_INFO.
1056        i = rest.find('/')
1057        if i >= 0:
1058            script, rest = rest[:i], rest[i:]
1059        else:
1060            script, rest = rest, ''
1061
1062        scriptname = dir + '/' + script
1063        scriptfile = self.translate_path(scriptname)
1064        if not os.path.exists(scriptfile):
1065            self.send_error(
1066                HTTPStatus.NOT_FOUND,
1067                "No such CGI script (%r)" % scriptname)
1068            return
1069        if not os.path.isfile(scriptfile):
1070            self.send_error(
1071                HTTPStatus.FORBIDDEN,
1072                "CGI script is not a plain file (%r)" % scriptname)
1073            return
1074        ispy = self.is_python(scriptname)
1075        if self.have_fork or not ispy:
1076            if not self.is_executable(scriptfile):
1077                self.send_error(
1078                    HTTPStatus.FORBIDDEN,
1079                    "CGI script is not executable (%r)" % scriptname)
1080                return
1081
1082        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1083        # XXX Much of the following could be prepared ahead of time!
1084        env = copy.deepcopy(os.environ)
1085        env['SERVER_SOFTWARE'] = self.version_string()
1086        env['SERVER_NAME'] = self.server.server_name
1087        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1088        env['SERVER_PROTOCOL'] = self.protocol_version
1089        env['SERVER_PORT'] = str(self.server.server_port)
1090        env['REQUEST_METHOD'] = self.command
1091        uqrest = urllib.parse.unquote(rest)
1092        env['PATH_INFO'] = uqrest
1093        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1094        env['SCRIPT_NAME'] = scriptname
1095        if query:
1096            env['QUERY_STRING'] = query
1097        env['REMOTE_ADDR'] = self.client_address[0]
1098        authorization = self.headers.get("authorization")
1099        if authorization:
1100            authorization = authorization.split()
1101            if len(authorization) == 2:
1102                import base64, binascii
1103                env['AUTH_TYPE'] = authorization[0]
1104                if authorization[0].lower() == "basic":
1105                    try:
1106                        authorization = authorization[1].encode('ascii')
1107                        authorization = base64.decodebytes(authorization).\
1108                                        decode('ascii')
1109                    except (binascii.Error, UnicodeError):
1110                        pass
1111                    else:
1112                        authorization = authorization.split(':')
1113                        if len(authorization) == 2:
1114                            env['REMOTE_USER'] = authorization[0]
1115        # XXX REMOTE_IDENT
1116        if self.headers.get('content-type') is None:
1117            env['CONTENT_TYPE'] = self.headers.get_content_type()
1118        else:
1119            env['CONTENT_TYPE'] = self.headers['content-type']
1120        length = self.headers.get('content-length')
1121        if length:
1122            env['CONTENT_LENGTH'] = length
1123        referer = self.headers.get('referer')
1124        if referer:
1125            env['HTTP_REFERER'] = referer
1126        accept = self.headers.get_all('accept', ())
1127        env['HTTP_ACCEPT'] = ','.join(accept)
1128        ua = self.headers.get('user-agent')
1129        if ua:
1130            env['HTTP_USER_AGENT'] = ua
1131        co = filter(None, self.headers.get_all('cookie', []))
1132        cookie_str = ', '.join(co)
1133        if cookie_str:
1134            env['HTTP_COOKIE'] = cookie_str
1135        # XXX Other HTTP_* headers
1136        # Since we're setting the env in the parent, provide empty
1137        # values to override previously set values
1138        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1139                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1140            env.setdefault(k, "")
1141
1142        self.send_response(HTTPStatus.OK, "Script output follows")
1143        self.flush_headers()
1144
1145        decoded_query = query.replace('+', ' ')
1146
1147        if self.have_fork:
1148            # Unix -- fork as we should
1149            args = [script]
1150            if '=' not in decoded_query:
1151                args.append(decoded_query)
1152            nobody = nobody_uid()
1153            self.wfile.flush() # Always flush before forking
1154            pid = os.fork()
1155            if pid != 0:
1156                # Parent
1157                pid, sts = os.waitpid(pid, 0)
1158                # throw away additional data [see bug #427345]
1159                while select.select([self.rfile], [], [], 0)[0]:
1160                    if not self.rfile.read(1):
1161                        break
1162                exitcode = os.waitstatus_to_exitcode(sts)
1163                if exitcode:
1164                    self.log_error(f"CGI script exit code {exitcode}")
1165                return
1166            # Child
1167            try:
1168                try:
1169                    os.setuid(nobody)
1170                except OSError:
1171                    pass
1172                os.dup2(self.rfile.fileno(), 0)
1173                os.dup2(self.wfile.fileno(), 1)
1174                os.execve(scriptfile, args, env)
1175            except:
1176                self.server.handle_error(self.request, self.client_address)
1177                os._exit(127)
1178
1179        else:
1180            # Non-Unix -- use subprocess
1181            import subprocess
1182            cmdline = [scriptfile]
1183            if self.is_python(scriptfile):
1184                interp = sys.executable
1185                if interp.lower().endswith("w.exe"):
1186                    # On Windows, use python.exe, not pythonw.exe
1187                    interp = interp[:-5] + interp[-4:]
1188                cmdline = [interp, '-u'] + cmdline
1189            if '=' not in query:
1190                cmdline.append(query)
1191            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1192            try:
1193                nbytes = int(length)
1194            except (TypeError, ValueError):
1195                nbytes = 0
1196            p = subprocess.Popen(cmdline,
1197                                 stdin=subprocess.PIPE,
1198                                 stdout=subprocess.PIPE,
1199                                 stderr=subprocess.PIPE,
1200                                 env = env
1201                                 )
1202            if self.command.lower() == "post" and nbytes > 0:
1203                data = self.rfile.read(nbytes)
1204            else:
1205                data = None
1206            # throw away additional data [see bug #427345]
1207            while select.select([self.rfile._sock], [], [], 0)[0]:
1208                if not self.rfile._sock.recv(1):
1209                    break
1210            stdout, stderr = p.communicate(data)
1211            self.wfile.write(stdout)
1212            if stderr:
1213                self.log_error('%s', stderr)
1214            p.stderr.close()
1215            p.stdout.close()
1216            status = p.returncode
1217            if status:
1218                self.log_error("CGI script exit status %#x", status)
1219            else:
1220                self.log_message("CGI script exited OK")
1221
1222
1223def _get_best_family(*address):
1224    infos = socket.getaddrinfo(
1225        *address,
1226        type=socket.SOCK_STREAM,
1227        flags=socket.AI_PASSIVE,
1228    )
1229    family, type, proto, canonname, sockaddr = next(iter(infos))
1230    return family, sockaddr
1231
1232
1233def test(HandlerClass=BaseHTTPRequestHandler,
1234         ServerClass=ThreadingHTTPServer,
1235         protocol="HTTP/1.0", port=8000, bind=None):
1236    """Test the HTTP request handler class.
1237
1238    This runs an HTTP server on port 8000 (or the port argument).
1239
1240    """
1241    ServerClass.address_family, addr = _get_best_family(bind, port)
1242
1243    HandlerClass.protocol_version = protocol
1244    with ServerClass(addr, HandlerClass) as httpd:
1245        host, port = httpd.socket.getsockname()[:2]
1246        url_host = f'[{host}]' if ':' in host else host
1247        print(
1248            f"Serving HTTP on {host} port {port} "
1249            f"(http://{url_host}:{port}/) ..."
1250        )
1251        try:
1252            httpd.serve_forever()
1253        except KeyboardInterrupt:
1254            print("\nKeyboard interrupt received, exiting.")
1255            sys.exit(0)
1256
1257if __name__ == '__main__':
1258    import argparse
1259
1260    parser = argparse.ArgumentParser()
1261    parser.add_argument('--cgi', action='store_true',
1262                       help='Run as CGI Server')
1263    parser.add_argument('--bind', '-b', metavar='ADDRESS',
1264                        help='Specify alternate bind address '
1265                             '[default: all interfaces]')
1266    parser.add_argument('--directory', '-d', default=os.getcwd(),
1267                        help='Specify alternative directory '
1268                        '[default:current directory]')
1269    parser.add_argument('port', action='store',
1270                        default=8000, type=int,
1271                        nargs='?',
1272                        help='Specify alternate port [default: 8000]')
1273    args = parser.parse_args()
1274    if args.cgi:
1275        handler_class = CGIHTTPRequestHandler
1276    else:
1277        handler_class = partial(SimpleHTTPRequestHandler,
1278                                directory=args.directory)
1279
1280    # ensure dual-stack is not disabled; ref #38907
1281    class DualStackServer(ThreadingHTTPServer):
1282        def server_bind(self):
1283            # suppress exception when protocol is IPv4
1284            with contextlib.suppress(Exception):
1285                self.socket.setsockopt(
1286                    socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
1287            return super().server_bind()
1288
1289    test(
1290        HandlerClass=handler_class,
1291        ServerClass=DualStackServer,
1292        port=args.port,
1293        bind=args.bind,
1294    )
1295