1"""Base classes for server/gateway implementations""" 2 3from .util import FileWrapper, guess_scheme, is_hop_by_hop 4from .headers import Headers 5 6import sys, os, time 7 8__all__ = [ 9 'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler', 10 'IISCGIHandler', 'read_environ' 11] 12 13# Weekday and month names for HTTP date/time formatting; always English! 14_weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] 15_monthname = [None, # Dummy so we can use 1-based month numbers 16 "Jan", "Feb", "Mar", "Apr", "May", "Jun", 17 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] 18 19def format_date_time(timestamp): 20 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) 21 return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( 22 _weekdayname[wd], day, _monthname[month], year, hh, mm, ss 23 ) 24 25_is_request = { 26 'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE', 27 'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT', 28}.__contains__ 29 30def _needs_transcode(k): 31 return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \ 32 or (k.startswith('REDIRECT_') and _needs_transcode(k[9:])) 33 34def read_environ(): 35 """Read environment, fixing HTTP variables""" 36 enc = sys.getfilesystemencoding() 37 esc = 'surrogateescape' 38 try: 39 ''.encode('utf-8', esc) 40 except LookupError: 41 esc = 'replace' 42 environ = {} 43 44 # Take the basic environment from native-unicode os.environ. Attempt to 45 # fix up the variables that come from the HTTP request to compensate for 46 # the bytes->unicode decoding step that will already have taken place. 47 for k, v in os.environ.items(): 48 if _needs_transcode(k): 49 50 # On win32, the os.environ is natively Unicode. Different servers 51 # decode the request bytes using different encodings. 52 if sys.platform == 'win32': 53 software = os.environ.get('SERVER_SOFTWARE', '').lower() 54 55 # On IIS, the HTTP request will be decoded as UTF-8 as long 56 # as the input is a valid UTF-8 sequence. Otherwise it is 57 # decoded using the system code page (mbcs), with no way to 58 # detect this has happened. Because UTF-8 is the more likely 59 # encoding, and mbcs is inherently unreliable (an mbcs string 60 # that happens to be valid UTF-8 will not be decoded as mbcs) 61 # always recreate the original bytes as UTF-8. 62 if software.startswith('microsoft-iis/'): 63 v = v.encode('utf-8').decode('iso-8859-1') 64 65 # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct 66 # to the Unicode environ. No modification needed. 67 elif software.startswith('apache/'): 68 pass 69 70 # Python 3's http.server.CGIHTTPRequestHandler decodes 71 # using the urllib.unquote default of UTF-8, amongst other 72 # issues. 73 elif ( 74 software.startswith('simplehttp/') 75 and 'python/3' in software 76 ): 77 v = v.encode('utf-8').decode('iso-8859-1') 78 79 # For other servers, guess that they have written bytes to 80 # the environ using stdio byte-oriented interfaces, ending up 81 # with the system code page. 82 else: 83 v = v.encode(enc, 'replace').decode('iso-8859-1') 84 85 # Recover bytes from unicode environ, using surrogate escapes 86 # where available (Python 3.1+). 87 else: 88 v = v.encode(enc, esc).decode('iso-8859-1') 89 90 environ[k] = v 91 return environ 92 93 94class BaseHandler: 95 """Manage the invocation of a WSGI application""" 96 97 # Configuration parameters; can override per-subclass or per-instance 98 wsgi_version = (1,0) 99 wsgi_multithread = True 100 wsgi_multiprocess = True 101 wsgi_run_once = False 102 103 origin_server = True # We are transmitting direct to client 104 http_version = "1.0" # Version that should be used for response 105 server_software = None # String name of server software, if any 106 107 # os_environ is used to supply configuration from the OS environment: 108 # by default it's a copy of 'os.environ' as of import time, but you can 109 # override this in e.g. your __init__ method. 110 os_environ= read_environ() 111 112 # Collaborator classes 113 wsgi_file_wrapper = FileWrapper # set to None to disable 114 headers_class = Headers # must be a Headers-like class 115 116 # Error handling (also per-subclass or per-instance) 117 traceback_limit = None # Print entire traceback to self.get_stderr() 118 error_status = "500 Internal Server Error" 119 error_headers = [('Content-Type','text/plain')] 120 error_body = b"A server error occurred. Please contact the administrator." 121 122 # State variables (don't mess with these) 123 status = result = None 124 headers_sent = False 125 headers = None 126 bytes_sent = 0 127 128 def run(self, application): 129 """Invoke the application""" 130 # Note to self: don't move the close()! Asynchronous servers shouldn't 131 # call close() from finish_response(), so if you close() anywhere but 132 # the double-error branch here, you'll break asynchronous servers by 133 # prematurely closing. Async servers must return from 'run()' without 134 # closing if there might still be output to iterate over. 135 try: 136 self.setup_environ() 137 self.result = application(self.environ, self.start_response) 138 self.finish_response() 139 except: 140 try: 141 self.handle_error() 142 except: 143 # If we get an error handling an error, just give up already! 144 self.close() 145 raise # ...and let the actual server figure it out. 146 147 148 def setup_environ(self): 149 """Set up the environment for one request""" 150 151 env = self.environ = self.os_environ.copy() 152 self.add_cgi_vars() 153 154 env['wsgi.input'] = self.get_stdin() 155 env['wsgi.errors'] = self.get_stderr() 156 env['wsgi.version'] = self.wsgi_version 157 env['wsgi.run_once'] = self.wsgi_run_once 158 env['wsgi.url_scheme'] = self.get_scheme() 159 env['wsgi.multithread'] = self.wsgi_multithread 160 env['wsgi.multiprocess'] = self.wsgi_multiprocess 161 162 if self.wsgi_file_wrapper is not None: 163 env['wsgi.file_wrapper'] = self.wsgi_file_wrapper 164 165 if self.origin_server and self.server_software: 166 env.setdefault('SERVER_SOFTWARE',self.server_software) 167 168 169 def finish_response(self): 170 """Send any iterable data, then close self and the iterable 171 172 Subclasses intended for use in asynchronous servers will 173 want to redefine this method, such that it sets up callbacks 174 in the event loop to iterate over the data, and to call 175 'self.close()' once the response is finished. 176 """ 177 try: 178 if not self.result_is_file() or not self.sendfile(): 179 for data in self.result: 180 self.write(data) 181 self.finish_content() 182 finally: 183 self.close() 184 185 186 def get_scheme(self): 187 """Return the URL scheme being used""" 188 return guess_scheme(self.environ) 189 190 191 def set_content_length(self): 192 """Compute Content-Length or switch to chunked encoding if possible""" 193 try: 194 blocks = len(self.result) 195 except (TypeError,AttributeError,NotImplementedError): 196 pass 197 else: 198 if blocks==1: 199 self.headers['Content-Length'] = str(self.bytes_sent) 200 return 201 # XXX Try for chunked encoding if origin server and client is 1.1 202 203 204 def cleanup_headers(self): 205 """Make any necessary header changes or defaults 206 207 Subclasses can extend this to add other defaults. 208 """ 209 if 'Content-Length' not in self.headers: 210 self.set_content_length() 211 212 def start_response(self, status, headers,exc_info=None): 213 """'start_response()' callable as specified by PEP 3333""" 214 215 if exc_info: 216 try: 217 if self.headers_sent: 218 # Re-raise original exception if headers sent 219 raise exc_info[0](exc_info[1]).with_traceback(exc_info[2]) 220 finally: 221 exc_info = None # avoid dangling circular ref 222 elif self.headers is not None: 223 raise AssertionError("Headers already set!") 224 225 self.status = status 226 self.headers = self.headers_class(headers) 227 status = self._convert_string_type(status, "Status") 228 assert len(status)>=4,"Status must be at least 4 characters" 229 assert status[:3].isdigit(), "Status message must begin w/3-digit code" 230 assert status[3]==" ", "Status message must have a space after code" 231 232 if __debug__: 233 for name, val in headers: 234 name = self._convert_string_type(name, "Header name") 235 val = self._convert_string_type(val, "Header value") 236 assert not is_hop_by_hop(name),"Hop-by-hop headers not allowed" 237 238 return self.write 239 240 def _convert_string_type(self, value, title): 241 """Convert/check value type.""" 242 if type(value) is str: 243 return value 244 raise AssertionError( 245 "{0} must be of type str (got {1})".format(title, repr(value)) 246 ) 247 248 def send_preamble(self): 249 """Transmit version/status/date/server, via self._write()""" 250 if self.origin_server: 251 if self.client_is_modern(): 252 self._write(('HTTP/%s %s\r\n' % (self.http_version,self.status)).encode('iso-8859-1')) 253 if 'Date' not in self.headers: 254 self._write( 255 ('Date: %s\r\n' % format_date_time(time.time())).encode('iso-8859-1') 256 ) 257 if self.server_software and 'Server' not in self.headers: 258 self._write(('Server: %s\r\n' % self.server_software).encode('iso-8859-1')) 259 else: 260 self._write(('Status: %s\r\n' % self.status).encode('iso-8859-1')) 261 262 def write(self, data): 263 """'write()' callable as specified by PEP 3333""" 264 265 assert type(data) is bytes, \ 266 "write() argument must be a bytes instance" 267 268 if not self.status: 269 raise AssertionError("write() before start_response()") 270 271 elif not self.headers_sent: 272 # Before the first output, send the stored headers 273 self.bytes_sent = len(data) # make sure we know content-length 274 self.send_headers() 275 else: 276 self.bytes_sent += len(data) 277 278 # XXX check Content-Length and truncate if too many bytes written? 279 self._write(data) 280 self._flush() 281 282 283 def sendfile(self): 284 """Platform-specific file transmission 285 286 Override this method in subclasses to support platform-specific 287 file transmission. It is only called if the application's 288 return iterable ('self.result') is an instance of 289 'self.wsgi_file_wrapper'. 290 291 This method should return a true value if it was able to actually 292 transmit the wrapped file-like object using a platform-specific 293 approach. It should return a false value if normal iteration 294 should be used instead. An exception can be raised to indicate 295 that transmission was attempted, but failed. 296 297 NOTE: this method should call 'self.send_headers()' if 298 'self.headers_sent' is false and it is going to attempt direct 299 transmission of the file. 300 """ 301 return False # No platform-specific transmission by default 302 303 304 def finish_content(self): 305 """Ensure headers and content have both been sent""" 306 if not self.headers_sent: 307 # Only zero Content-Length if not set by the application (so 308 # that HEAD requests can be satisfied properly, see #3839) 309 self.headers.setdefault('Content-Length', "0") 310 self.send_headers() 311 else: 312 pass # XXX check if content-length was too short? 313 314 def close(self): 315 """Close the iterable (if needed) and reset all instance vars 316 317 Subclasses may want to also drop the client connection. 318 """ 319 try: 320 if hasattr(self.result,'close'): 321 self.result.close() 322 finally: 323 self.result = self.headers = self.status = self.environ = None 324 self.bytes_sent = 0; self.headers_sent = False 325 326 327 def send_headers(self): 328 """Transmit headers to the client, via self._write()""" 329 self.cleanup_headers() 330 self.headers_sent = True 331 if not self.origin_server or self.client_is_modern(): 332 self.send_preamble() 333 self._write(bytes(self.headers)) 334 335 336 def result_is_file(self): 337 """True if 'self.result' is an instance of 'self.wsgi_file_wrapper'""" 338 wrapper = self.wsgi_file_wrapper 339 return wrapper is not None and isinstance(self.result,wrapper) 340 341 342 def client_is_modern(self): 343 """True if client can accept status and headers""" 344 return self.environ['SERVER_PROTOCOL'].upper() != 'HTTP/0.9' 345 346 347 def log_exception(self,exc_info): 348 """Log the 'exc_info' tuple in the server log 349 350 Subclasses may override to retarget the output or change its format. 351 """ 352 try: 353 from traceback import print_exception 354 stderr = self.get_stderr() 355 print_exception( 356 exc_info[0], exc_info[1], exc_info[2], 357 self.traceback_limit, stderr 358 ) 359 stderr.flush() 360 finally: 361 exc_info = None 362 363 def handle_error(self): 364 """Log current error, and send error output to client if possible""" 365 self.log_exception(sys.exc_info()) 366 if not self.headers_sent: 367 self.result = self.error_output(self.environ, self.start_response) 368 self.finish_response() 369 # XXX else: attempt advanced recovery techniques for HTML or text? 370 371 def error_output(self, environ, start_response): 372 """WSGI mini-app to create error output 373 374 By default, this just uses the 'error_status', 'error_headers', 375 and 'error_body' attributes to generate an output page. It can 376 be overridden in a subclass to dynamically generate diagnostics, 377 choose an appropriate message for the user's preferred language, etc. 378 379 Note, however, that it's not recommended from a security perspective to 380 spit out diagnostics to any old user; ideally, you should have to do 381 something special to enable diagnostic output, which is why we don't 382 include any here! 383 """ 384 start_response(self.error_status,self.error_headers[:],sys.exc_info()) 385 return [self.error_body] 386 387 388 # Pure abstract methods; *must* be overridden in subclasses 389 390 def _write(self,data): 391 """Override in subclass to buffer data for send to client 392 393 It's okay if this method actually transmits the data; BaseHandler 394 just separates write and flush operations for greater efficiency 395 when the underlying system actually has such a distinction. 396 """ 397 raise NotImplementedError 398 399 def _flush(self): 400 """Override in subclass to force sending of recent '_write()' calls 401 402 It's okay if this method is a no-op (i.e., if '_write()' actually 403 sends the data. 404 """ 405 raise NotImplementedError 406 407 def get_stdin(self): 408 """Override in subclass to return suitable 'wsgi.input'""" 409 raise NotImplementedError 410 411 def get_stderr(self): 412 """Override in subclass to return suitable 'wsgi.errors'""" 413 raise NotImplementedError 414 415 def add_cgi_vars(self): 416 """Override in subclass to insert CGI variables in 'self.environ'""" 417 raise NotImplementedError 418 419 420class SimpleHandler(BaseHandler): 421 """Handler that's just initialized with streams, environment, etc. 422 423 This handler subclass is intended for synchronous HTTP/1.0 origin servers, 424 and handles sending the entire response output, given the correct inputs. 425 426 Usage:: 427 428 handler = SimpleHandler( 429 inp,out,err,env, multithread=False, multiprocess=True 430 ) 431 handler.run(app)""" 432 433 def __init__(self,stdin,stdout,stderr,environ, 434 multithread=True, multiprocess=False 435 ): 436 self.stdin = stdin 437 self.stdout = stdout 438 self.stderr = stderr 439 self.base_env = environ 440 self.wsgi_multithread = multithread 441 self.wsgi_multiprocess = multiprocess 442 443 def get_stdin(self): 444 return self.stdin 445 446 def get_stderr(self): 447 return self.stderr 448 449 def add_cgi_vars(self): 450 self.environ.update(self.base_env) 451 452 def _write(self,data): 453 result = self.stdout.write(data) 454 if result is None or result == len(data): 455 return 456 from warnings import warn 457 warn("SimpleHandler.stdout.write() should not do partial writes", 458 DeprecationWarning) 459 while True: 460 data = data[result:] 461 if not data: 462 break 463 result = self.stdout.write(data) 464 465 def _flush(self): 466 self.stdout.flush() 467 self._flush = self.stdout.flush 468 469 470class BaseCGIHandler(SimpleHandler): 471 472 """CGI-like systems using input/output/error streams and environ mapping 473 474 Usage:: 475 476 handler = BaseCGIHandler(inp,out,err,env) 477 handler.run(app) 478 479 This handler class is useful for gateway protocols like ReadyExec and 480 FastCGI, that have usable input/output/error streams and an environment 481 mapping. It's also the base class for CGIHandler, which just uses 482 sys.stdin, os.environ, and so on. 483 484 The constructor also takes keyword arguments 'multithread' and 485 'multiprocess' (defaulting to 'True' and 'False' respectively) to control 486 the configuration sent to the application. It sets 'origin_server' to 487 False (to enable CGI-like output), and assumes that 'wsgi.run_once' is 488 False. 489 """ 490 491 origin_server = False 492 493 494class CGIHandler(BaseCGIHandler): 495 496 """CGI-based invocation via sys.stdin/stdout/stderr and os.environ 497 498 Usage:: 499 500 CGIHandler().run(app) 501 502 The difference between this class and BaseCGIHandler is that it always 503 uses 'wsgi.run_once' of 'True', 'wsgi.multithread' of 'False', and 504 'wsgi.multiprocess' of 'True'. It does not take any initialization 505 parameters, but always uses 'sys.stdin', 'os.environ', and friends. 506 507 If you need to override any of these parameters, use BaseCGIHandler 508 instead. 509 """ 510 511 wsgi_run_once = True 512 # Do not allow os.environ to leak between requests in Google App Engine 513 # and other multi-run CGI use cases. This is not easily testable. 514 # See http://bugs.python.org/issue7250 515 os_environ = {} 516 517 def __init__(self): 518 BaseCGIHandler.__init__( 519 self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr, 520 read_environ(), multithread=False, multiprocess=True 521 ) 522 523 524class IISCGIHandler(BaseCGIHandler): 525 """CGI-based invocation with workaround for IIS path bug 526 527 This handler should be used in preference to CGIHandler when deploying on 528 Microsoft IIS without having set the config allowPathInfo option (IIS>=7) 529 or metabase allowPathInfoForScriptMappings (IIS<7). 530 """ 531 wsgi_run_once = True 532 os_environ = {} 533 534 # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at 535 # the front, causing problems for WSGI applications that wish to implement 536 # routing. This handler strips any such duplicated path. 537 538 # IIS can be configured to pass the correct PATH_INFO, but this causes 539 # another bug where PATH_TRANSLATED is wrong. Luckily this variable is 540 # rarely used and is not guaranteed by WSGI. On IIS<7, though, the 541 # setting can only be made on a vhost level, affecting all other script 542 # mappings, many of which break when exposed to the PATH_TRANSLATED bug. 543 # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7 544 # rarely uses it because there is still no UI for it.) 545 546 # There is no way for CGI code to tell whether the option was set, so a 547 # separate handler class is provided. 548 def __init__(self): 549 environ= read_environ() 550 path = environ.get('PATH_INFO', '') 551 script = environ.get('SCRIPT_NAME', '') 552 if (path+'/').startswith(script+'/'): 553 environ['PATH_INFO'] = path[len(script):] 554 BaseCGIHandler.__init__( 555 self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr, 556 environ, multithread=False, multiprocess=True 557 ) 558