1"""Base classes for server/gateway implementations"""
2
3from .util import FileWrapper, guess_scheme, is_hop_by_hop
4from .headers import Headers
5
6import sys, os, time
7
8__all__ = [
9    'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler',
10    'IISCGIHandler', 'read_environ'
11]
12
13# Weekday and month names for HTTP date/time formatting; always English!
14_weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
15_monthname = [None, # Dummy so we can use 1-based month numbers
16              "Jan", "Feb", "Mar", "Apr", "May", "Jun",
17              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
18
19def format_date_time(timestamp):
20    year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
21    return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
22        _weekdayname[wd], day, _monthname[month], year, hh, mm, ss
23    )
24
25_is_request = {
26    'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE',
27    'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT',
28}.__contains__
29
30def _needs_transcode(k):
31    return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \
32        or (k.startswith('REDIRECT_') and _needs_transcode(k[9:]))
33
34def read_environ():
35    """Read environment, fixing HTTP variables"""
36    enc = sys.getfilesystemencoding()
37    esc = 'surrogateescape'
38    try:
39        ''.encode('utf-8', esc)
40    except LookupError:
41        esc = 'replace'
42    environ = {}
43
44    # Take the basic environment from native-unicode os.environ. Attempt to
45    # fix up the variables that come from the HTTP request to compensate for
46    # the bytes->unicode decoding step that will already have taken place.
47    for k, v in os.environ.items():
48        if _needs_transcode(k):
49
50            # On win32, the os.environ is natively Unicode. Different servers
51            # decode the request bytes using different encodings.
52            if sys.platform == 'win32':
53                software = os.environ.get('SERVER_SOFTWARE', '').lower()
54
55                # On IIS, the HTTP request will be decoded as UTF-8 as long
56                # as the input is a valid UTF-8 sequence. Otherwise it is
57                # decoded using the system code page (mbcs), with no way to
58                # detect this has happened. Because UTF-8 is the more likely
59                # encoding, and mbcs is inherently unreliable (an mbcs string
60                # that happens to be valid UTF-8 will not be decoded as mbcs)
61                # always recreate the original bytes as UTF-8.
62                if software.startswith('microsoft-iis/'):
63                    v = v.encode('utf-8').decode('iso-8859-1')
64
65                # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct
66                # to the Unicode environ. No modification needed.
67                elif software.startswith('apache/'):
68                    pass
69
70                # Python 3's http.server.CGIHTTPRequestHandler decodes
71                # using the urllib.unquote default of UTF-8, amongst other
72                # issues.
73                elif (
74                    software.startswith('simplehttp/')
75                    and 'python/3' in software
76                ):
77                    v = v.encode('utf-8').decode('iso-8859-1')
78
79                # For other servers, guess that they have written bytes to
80                # the environ using stdio byte-oriented interfaces, ending up
81                # with the system code page.
82                else:
83                    v = v.encode(enc, 'replace').decode('iso-8859-1')
84
85            # Recover bytes from unicode environ, using surrogate escapes
86            # where available (Python 3.1+).
87            else:
88                v = v.encode(enc, esc).decode('iso-8859-1')
89
90        environ[k] = v
91    return environ
92
93
94class BaseHandler:
95    """Manage the invocation of a WSGI application"""
96
97    # Configuration parameters; can override per-subclass or per-instance
98    wsgi_version = (1,0)
99    wsgi_multithread = True
100    wsgi_multiprocess = True
101    wsgi_run_once = False
102
103    origin_server = True    # We are transmitting direct to client
104    http_version  = "1.0"   # Version that should be used for response
105    server_software = None  # String name of server software, if any
106
107    # os_environ is used to supply configuration from the OS environment:
108    # by default it's a copy of 'os.environ' as of import time, but you can
109    # override this in e.g. your __init__ method.
110    os_environ= read_environ()
111
112    # Collaborator classes
113    wsgi_file_wrapper = FileWrapper     # set to None to disable
114    headers_class = Headers             # must be a Headers-like class
115
116    # Error handling (also per-subclass or per-instance)
117    traceback_limit = None  # Print entire traceback to self.get_stderr()
118    error_status = "500 Internal Server Error"
119    error_headers = [('Content-Type','text/plain')]
120    error_body = b"A server error occurred.  Please contact the administrator."
121
122    # State variables (don't mess with these)
123    status = result = None
124    headers_sent = False
125    headers = None
126    bytes_sent = 0
127
128    def run(self, application):
129        """Invoke the application"""
130        # Note to self: don't move the close()!  Asynchronous servers shouldn't
131        # call close() from finish_response(), so if you close() anywhere but
132        # the double-error branch here, you'll break asynchronous servers by
133        # prematurely closing.  Async servers must return from 'run()' without
134        # closing if there might still be output to iterate over.
135        try:
136            self.setup_environ()
137            self.result = application(self.environ, self.start_response)
138            self.finish_response()
139        except:
140            try:
141                self.handle_error()
142            except:
143                # If we get an error handling an error, just give up already!
144                self.close()
145                raise   # ...and let the actual server figure it out.
146
147
148    def setup_environ(self):
149        """Set up the environment for one request"""
150
151        env = self.environ = self.os_environ.copy()
152        self.add_cgi_vars()
153
154        env['wsgi.input']        = self.get_stdin()
155        env['wsgi.errors']       = self.get_stderr()
156        env['wsgi.version']      = self.wsgi_version
157        env['wsgi.run_once']     = self.wsgi_run_once
158        env['wsgi.url_scheme']   = self.get_scheme()
159        env['wsgi.multithread']  = self.wsgi_multithread
160        env['wsgi.multiprocess'] = self.wsgi_multiprocess
161
162        if self.wsgi_file_wrapper is not None:
163            env['wsgi.file_wrapper'] = self.wsgi_file_wrapper
164
165        if self.origin_server and self.server_software:
166            env.setdefault('SERVER_SOFTWARE',self.server_software)
167
168
169    def finish_response(self):
170        """Send any iterable data, then close self and the iterable
171
172        Subclasses intended for use in asynchronous servers will
173        want to redefine this method, such that it sets up callbacks
174        in the event loop to iterate over the data, and to call
175        'self.close()' once the response is finished.
176        """
177        try:
178            if not self.result_is_file() or not self.sendfile():
179                for data in self.result:
180                    self.write(data)
181                self.finish_content()
182        finally:
183            self.close()
184
185
186    def get_scheme(self):
187        """Return the URL scheme being used"""
188        return guess_scheme(self.environ)
189
190
191    def set_content_length(self):
192        """Compute Content-Length or switch to chunked encoding if possible"""
193        try:
194            blocks = len(self.result)
195        except (TypeError,AttributeError,NotImplementedError):
196            pass
197        else:
198            if blocks==1:
199                self.headers['Content-Length'] = str(self.bytes_sent)
200                return
201        # XXX Try for chunked encoding if origin server and client is 1.1
202
203
204    def cleanup_headers(self):
205        """Make any necessary header changes or defaults
206
207        Subclasses can extend this to add other defaults.
208        """
209        if 'Content-Length' not in self.headers:
210            self.set_content_length()
211
212    def start_response(self, status, headers,exc_info=None):
213        """'start_response()' callable as specified by PEP 3333"""
214
215        if exc_info:
216            try:
217                if self.headers_sent:
218                    # Re-raise original exception if headers sent
219                    raise exc_info[0](exc_info[1]).with_traceback(exc_info[2])
220            finally:
221                exc_info = None        # avoid dangling circular ref
222        elif self.headers is not None:
223            raise AssertionError("Headers already set!")
224
225        self.status = status
226        self.headers = self.headers_class(headers)
227        status = self._convert_string_type(status, "Status")
228        assert len(status)>=4,"Status must be at least 4 characters"
229        assert status[:3].isdigit(), "Status message must begin w/3-digit code"
230        assert status[3]==" ", "Status message must have a space after code"
231
232        if __debug__:
233            for name, val in headers:
234                name = self._convert_string_type(name, "Header name")
235                val = self._convert_string_type(val, "Header value")
236                assert not is_hop_by_hop(name),"Hop-by-hop headers not allowed"
237
238        return self.write
239
240    def _convert_string_type(self, value, title):
241        """Convert/check value type."""
242        if type(value) is str:
243            return value
244        raise AssertionError(
245            "{0} must be of type str (got {1})".format(title, repr(value))
246        )
247
248    def send_preamble(self):
249        """Transmit version/status/date/server, via self._write()"""
250        if self.origin_server:
251            if self.client_is_modern():
252                self._write(('HTTP/%s %s\r\n' % (self.http_version,self.status)).encode('iso-8859-1'))
253                if 'Date' not in self.headers:
254                    self._write(
255                        ('Date: %s\r\n' % format_date_time(time.time())).encode('iso-8859-1')
256                    )
257                if self.server_software and 'Server' not in self.headers:
258                    self._write(('Server: %s\r\n' % self.server_software).encode('iso-8859-1'))
259        else:
260            self._write(('Status: %s\r\n' % self.status).encode('iso-8859-1'))
261
262    def write(self, data):
263        """'write()' callable as specified by PEP 3333"""
264
265        assert type(data) is bytes, \
266            "write() argument must be a bytes instance"
267
268        if not self.status:
269            raise AssertionError("write() before start_response()")
270
271        elif not self.headers_sent:
272            # Before the first output, send the stored headers
273            self.bytes_sent = len(data)    # make sure we know content-length
274            self.send_headers()
275        else:
276            self.bytes_sent += len(data)
277
278        # XXX check Content-Length and truncate if too many bytes written?
279        self._write(data)
280        self._flush()
281
282
283    def sendfile(self):
284        """Platform-specific file transmission
285
286        Override this method in subclasses to support platform-specific
287        file transmission.  It is only called if the application's
288        return iterable ('self.result') is an instance of
289        'self.wsgi_file_wrapper'.
290
291        This method should return a true value if it was able to actually
292        transmit the wrapped file-like object using a platform-specific
293        approach.  It should return a false value if normal iteration
294        should be used instead.  An exception can be raised to indicate
295        that transmission was attempted, but failed.
296
297        NOTE: this method should call 'self.send_headers()' if
298        'self.headers_sent' is false and it is going to attempt direct
299        transmission of the file.
300        """
301        return False   # No platform-specific transmission by default
302
303
304    def finish_content(self):
305        """Ensure headers and content have both been sent"""
306        if not self.headers_sent:
307            # Only zero Content-Length if not set by the application (so
308            # that HEAD requests can be satisfied properly, see #3839)
309            self.headers.setdefault('Content-Length', "0")
310            self.send_headers()
311        else:
312            pass # XXX check if content-length was too short?
313
314    def close(self):
315        """Close the iterable (if needed) and reset all instance vars
316
317        Subclasses may want to also drop the client connection.
318        """
319        try:
320            if hasattr(self.result,'close'):
321                self.result.close()
322        finally:
323            self.result = self.headers = self.status = self.environ = None
324            self.bytes_sent = 0; self.headers_sent = False
325
326
327    def send_headers(self):
328        """Transmit headers to the client, via self._write()"""
329        self.cleanup_headers()
330        self.headers_sent = True
331        if not self.origin_server or self.client_is_modern():
332            self.send_preamble()
333            self._write(bytes(self.headers))
334
335
336    def result_is_file(self):
337        """True if 'self.result' is an instance of 'self.wsgi_file_wrapper'"""
338        wrapper = self.wsgi_file_wrapper
339        return wrapper is not None and isinstance(self.result,wrapper)
340
341
342    def client_is_modern(self):
343        """True if client can accept status and headers"""
344        return self.environ['SERVER_PROTOCOL'].upper() != 'HTTP/0.9'
345
346
347    def log_exception(self,exc_info):
348        """Log the 'exc_info' tuple in the server log
349
350        Subclasses may override to retarget the output or change its format.
351        """
352        try:
353            from traceback import print_exception
354            stderr = self.get_stderr()
355            print_exception(
356                exc_info[0], exc_info[1], exc_info[2],
357                self.traceback_limit, stderr
358            )
359            stderr.flush()
360        finally:
361            exc_info = None
362
363    def handle_error(self):
364        """Log current error, and send error output to client if possible"""
365        self.log_exception(sys.exc_info())
366        if not self.headers_sent:
367            self.result = self.error_output(self.environ, self.start_response)
368            self.finish_response()
369        # XXX else: attempt advanced recovery techniques for HTML or text?
370
371    def error_output(self, environ, start_response):
372        """WSGI mini-app to create error output
373
374        By default, this just uses the 'error_status', 'error_headers',
375        and 'error_body' attributes to generate an output page.  It can
376        be overridden in a subclass to dynamically generate diagnostics,
377        choose an appropriate message for the user's preferred language, etc.
378
379        Note, however, that it's not recommended from a security perspective to
380        spit out diagnostics to any old user; ideally, you should have to do
381        something special to enable diagnostic output, which is why we don't
382        include any here!
383        """
384        start_response(self.error_status,self.error_headers[:],sys.exc_info())
385        return [self.error_body]
386
387
388    # Pure abstract methods; *must* be overridden in subclasses
389
390    def _write(self,data):
391        """Override in subclass to buffer data for send to client
392
393        It's okay if this method actually transmits the data; BaseHandler
394        just separates write and flush operations for greater efficiency
395        when the underlying system actually has such a distinction.
396        """
397        raise NotImplementedError
398
399    def _flush(self):
400        """Override in subclass to force sending of recent '_write()' calls
401
402        It's okay if this method is a no-op (i.e., if '_write()' actually
403        sends the data.
404        """
405        raise NotImplementedError
406
407    def get_stdin(self):
408        """Override in subclass to return suitable 'wsgi.input'"""
409        raise NotImplementedError
410
411    def get_stderr(self):
412        """Override in subclass to return suitable 'wsgi.errors'"""
413        raise NotImplementedError
414
415    def add_cgi_vars(self):
416        """Override in subclass to insert CGI variables in 'self.environ'"""
417        raise NotImplementedError
418
419
420class SimpleHandler(BaseHandler):
421    """Handler that's just initialized with streams, environment, etc.
422
423    This handler subclass is intended for synchronous HTTP/1.0 origin servers,
424    and handles sending the entire response output, given the correct inputs.
425
426    Usage::
427
428        handler = SimpleHandler(
429            inp,out,err,env, multithread=False, multiprocess=True
430        )
431        handler.run(app)"""
432
433    def __init__(self,stdin,stdout,stderr,environ,
434        multithread=True, multiprocess=False
435    ):
436        self.stdin = stdin
437        self.stdout = stdout
438        self.stderr = stderr
439        self.base_env = environ
440        self.wsgi_multithread = multithread
441        self.wsgi_multiprocess = multiprocess
442
443    def get_stdin(self):
444        return self.stdin
445
446    def get_stderr(self):
447        return self.stderr
448
449    def add_cgi_vars(self):
450        self.environ.update(self.base_env)
451
452    def _write(self,data):
453        result = self.stdout.write(data)
454        if result is None or result == len(data):
455            return
456        from warnings import warn
457        warn("SimpleHandler.stdout.write() should not do partial writes",
458            DeprecationWarning)
459        while True:
460            data = data[result:]
461            if not data:
462                break
463            result = self.stdout.write(data)
464
465    def _flush(self):
466        self.stdout.flush()
467        self._flush = self.stdout.flush
468
469
470class BaseCGIHandler(SimpleHandler):
471
472    """CGI-like systems using input/output/error streams and environ mapping
473
474    Usage::
475
476        handler = BaseCGIHandler(inp,out,err,env)
477        handler.run(app)
478
479    This handler class is useful for gateway protocols like ReadyExec and
480    FastCGI, that have usable input/output/error streams and an environment
481    mapping.  It's also the base class for CGIHandler, which just uses
482    sys.stdin, os.environ, and so on.
483
484    The constructor also takes keyword arguments 'multithread' and
485    'multiprocess' (defaulting to 'True' and 'False' respectively) to control
486    the configuration sent to the application.  It sets 'origin_server' to
487    False (to enable CGI-like output), and assumes that 'wsgi.run_once' is
488    False.
489    """
490
491    origin_server = False
492
493
494class CGIHandler(BaseCGIHandler):
495
496    """CGI-based invocation via sys.stdin/stdout/stderr and os.environ
497
498    Usage::
499
500        CGIHandler().run(app)
501
502    The difference between this class and BaseCGIHandler is that it always
503    uses 'wsgi.run_once' of 'True', 'wsgi.multithread' of 'False', and
504    'wsgi.multiprocess' of 'True'.  It does not take any initialization
505    parameters, but always uses 'sys.stdin', 'os.environ', and friends.
506
507    If you need to override any of these parameters, use BaseCGIHandler
508    instead.
509    """
510
511    wsgi_run_once = True
512    # Do not allow os.environ to leak between requests in Google App Engine
513    # and other multi-run CGI use cases.  This is not easily testable.
514    # See http://bugs.python.org/issue7250
515    os_environ = {}
516
517    def __init__(self):
518        BaseCGIHandler.__init__(
519            self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
520            read_environ(), multithread=False, multiprocess=True
521        )
522
523
524class IISCGIHandler(BaseCGIHandler):
525    """CGI-based invocation with workaround for IIS path bug
526
527    This handler should be used in preference to CGIHandler when deploying on
528    Microsoft IIS without having set the config allowPathInfo option (IIS>=7)
529    or metabase allowPathInfoForScriptMappings (IIS<7).
530    """
531    wsgi_run_once = True
532    os_environ = {}
533
534    # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at
535    # the front, causing problems for WSGI applications that wish to implement
536    # routing. This handler strips any such duplicated path.
537
538    # IIS can be configured to pass the correct PATH_INFO, but this causes
539    # another bug where PATH_TRANSLATED is wrong. Luckily this variable is
540    # rarely used and is not guaranteed by WSGI. On IIS<7, though, the
541    # setting can only be made on a vhost level, affecting all other script
542    # mappings, many of which break when exposed to the PATH_TRANSLATED bug.
543    # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
544    # rarely uses it because there is still no UI for it.)
545
546    # There is no way for CGI code to tell whether the option was set, so a
547    # separate handler class is provided.
548    def __init__(self):
549        environ= read_environ()
550        path = environ.get('PATH_INFO', '')
551        script = environ.get('SCRIPT_NAME', '')
552        if (path+'/').startswith(script+'/'):
553            environ['PATH_INFO'] = path[len(script):]
554        BaseCGIHandler.__init__(
555            self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
556            environ, multithread=False, multiprocess=True
557        )
558