1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
3# (c) 2005 Ian Bicking, Clark C. Evans and contributors
4# This module is part of the Python Paste Project and is released under
5# the MIT License: http://www.opensource.org/licenses/mit-license.php
6"""
7This module handles sending static content such as in-memory data or
8files.  At this time it has cache helpers and understands the
9if-modified-since request header.
10"""
11
12import os, time, mimetypes, zipfile, tarfile
13from paste.httpexceptions import *
14from paste.httpheaders import *
15
16CACHE_SIZE = 4096
17BLOCK_SIZE = 4096 * 16
18
19__all__ = ['DataApp', 'FileApp', 'DirectoryApp', 'ArchiveStore']
20
21class DataApp(object):
22    """
23    Returns an application that will send content in a single chunk,
24    this application has support for setting cache-control and for
25    responding to conditional (or HEAD) requests.
26
27    Constructor Arguments:
28
29        ``content``     the content being sent to the client
30
31        ``headers``     the headers to send /w the response
32
33        The remaining ``kwargs`` correspond to headers, where the
34        underscore is replaced with a dash.  These values are only
35        added to the headers if they are not already provided; thus,
36        they can be used for default values.  Examples include, but
37        are not limited to:
38
39            ``content_type``
40            ``content_encoding``
41            ``content_location``
42
43    ``cache_control()``
44
45        This method provides validated construction of the ``Cache-Control``
46        header as well as providing for automated filling out of the
47        ``EXPIRES`` header for HTTP/1.0 clients.
48
49    ``set_content()``
50
51        This method provides a mechanism to set the content after the
52        application has been constructed.  This method does things
53        like changing ``Last-Modified`` and ``Content-Length`` headers.
54
55    """
56
57    allowed_methods = ('GET', 'HEAD')
58
59    def __init__(self, content, headers=None, allowed_methods=None,
60                 **kwargs):
61        assert isinstance(headers, (type(None), list))
62        self.expires = None
63        self.content = None
64        self.content_length = None
65        self.last_modified = 0
66        if allowed_methods is not None:
67            self.allowed_methods = allowed_methods
68        self.headers = headers or []
69        for (k, v) in kwargs.items():
70            header = get_header(k)
71            header.update(self.headers, v)
72        ACCEPT_RANGES.update(self.headers, bytes=True)
73        if not CONTENT_TYPE(self.headers):
74            CONTENT_TYPE.update(self.headers)
75        if content is not None:
76            self.set_content(content)
77
78    def cache_control(self, **kwargs):
79        self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None
80        return self
81
82    def set_content(self, content, last_modified=None):
83        assert content is not None
84        if last_modified is None:
85            self.last_modified = time.time()
86        else:
87            self.last_modified = last_modified
88        self.content = content
89        self.content_length = len(content)
90        LAST_MODIFIED.update(self.headers, time=self.last_modified)
91        return self
92
93    def content_disposition(self, **kwargs):
94        CONTENT_DISPOSITION.apply(self.headers, **kwargs)
95        return self
96
97    def __call__(self, environ, start_response):
98        method = environ['REQUEST_METHOD'].upper()
99        if method not in self.allowed_methods:
100            exc = HTTPMethodNotAllowed(
101                'You cannot %s a file' % method,
102                headers=[('Allow', ','.join(self.allowed_methods))])
103            return exc(environ, start_response)
104        return self.get(environ, start_response)
105
106    def calculate_etag(self):
107        return '"%s-%s"' % (self.last_modified, self.content_length)
108
109    def get(self, environ, start_response):
110        headers = self.headers[:]
111        current_etag = self.calculate_etag()
112        ETAG.update(headers, current_etag)
113        if self.expires is not None:
114            EXPIRES.update(headers, delta=self.expires)
115
116        try:
117            client_etags = IF_NONE_MATCH.parse(environ)
118            if client_etags:
119                for etag in client_etags:
120                    if etag == current_etag or etag == '*':
121                        # horribly inefficient, n^2 performance, yuck!
122                        for head in list_headers(entity=True):
123                            head.delete(headers)
124                        start_response('304 Not Modified', headers)
125                        return [b'']
126        except HTTPBadRequest as exce:
127            return exce.wsgi_application(environ, start_response)
128
129        # If we get If-None-Match and If-Modified-Since, and
130        # If-None-Match doesn't match, then we should not try to
131        # figure out If-Modified-Since (which has 1-second granularity
132        # and just isn't as accurate)
133        if not client_etags:
134            try:
135                client_clock = IF_MODIFIED_SINCE.parse(environ)
136                if (client_clock is not None
137                    and client_clock >= int(self.last_modified)):
138                    # horribly inefficient, n^2 performance, yuck!
139                    for head in list_headers(entity=True):
140                        head.delete(headers)
141                    start_response('304 Not Modified', headers)
142                    return [b''] # empty body
143            except HTTPBadRequest as exce:
144                return exce.wsgi_application(environ, start_response)
145
146        (lower, upper) = (0, self.content_length - 1)
147        range = RANGE.parse(environ)
148        if range and 'bytes' == range[0] and 1 == len(range[1]):
149            (lower, upper) = range[1][0]
150            upper = upper or (self.content_length - 1)
151            if upper >= self.content_length or lower > upper:
152                return HTTPRequestRangeNotSatisfiable((
153                  "Range request was made beyond the end of the content,\r\n"
154                  "which is %s long.\r\n  Range: %s\r\n") % (
155                     self.content_length, RANGE(environ))
156                ).wsgi_application(environ, start_response)
157
158        content_length = upper - lower + 1
159        CONTENT_RANGE.update(headers, first_byte=lower, last_byte=upper,
160                            total_length = self.content_length)
161        CONTENT_LENGTH.update(headers, content_length)
162        if range or content_length != self.content_length:
163            start_response('206 Partial Content', headers)
164        else:
165            start_response('200 OK', headers)
166        if self.content is not None:
167            return [self.content[lower:upper+1]]
168        return (lower, content_length)
169
170class FileApp(DataApp):
171    """
172    Returns an application that will send the file at the given
173    filename.  Adds a mime type based on ``mimetypes.guess_type()``.
174    See DataApp for the arguments beyond ``filename``.
175    """
176
177    def __init__(self, filename, headers=None, **kwargs):
178        self.filename = filename
179        content_type, content_encoding = self.guess_type()
180        if content_type and 'content_type' not in kwargs:
181            kwargs['content_type'] = content_type
182        if content_encoding and 'content_encoding' not in kwargs:
183            kwargs['content_encoding'] = content_encoding
184        DataApp.__init__(self, None, headers, **kwargs)
185
186    def guess_type(self):
187        return mimetypes.guess_type(self.filename)
188
189    def update(self, force=False):
190        stat = os.stat(self.filename)
191        if not force and stat.st_mtime == self.last_modified:
192            return
193        self.last_modified = stat.st_mtime
194        if stat.st_size < CACHE_SIZE:
195            fh = open(self.filename,"rb")
196            self.set_content(fh.read(), stat.st_mtime)
197            fh.close()
198        else:
199            self.content = None
200            self.content_length = stat.st_size
201            # This is updated automatically if self.set_content() is
202            # called
203            LAST_MODIFIED.update(self.headers, time=self.last_modified)
204
205    def get(self, environ, start_response):
206        is_head = environ['REQUEST_METHOD'].upper() == 'HEAD'
207        if 'max-age=0' in CACHE_CONTROL(environ).lower():
208            self.update(force=True) # RFC 2616 13.2.6
209        else:
210            self.update()
211        if not self.content:
212            if not os.path.exists(self.filename):
213                exc = HTTPNotFound(
214                    'The resource does not exist',
215                    comment="No file at %r" % self.filename)
216                return exc(environ, start_response)
217            try:
218                file = open(self.filename, 'rb')
219            except (IOError, OSError) as e:
220                exc = HTTPForbidden(
221                    'You are not permitted to view this file (%s)' % e)
222                return exc.wsgi_application(
223                    environ, start_response)
224        retval = DataApp.get(self, environ, start_response)
225        if isinstance(retval, list):
226            # cached content, exception, or not-modified
227            if is_head:
228                return [b'']
229            return retval
230        (lower, content_length) = retval
231        if is_head:
232            return [b'']
233        file.seek(lower)
234        file_wrapper = environ.get('wsgi.file_wrapper', None)
235        if file_wrapper:
236            return file_wrapper(file, BLOCK_SIZE)
237        else:
238            return _FileIter(file, size=content_length)
239
240class _FileIter(object):
241
242    def __init__(self, file, block_size=None, size=None):
243        self.file = file
244        self.size = size
245        self.block_size = block_size or BLOCK_SIZE
246
247    def __iter__(self):
248        return self
249
250    def next(self):
251        chunk_size = self.block_size
252        if self.size is not None:
253            if chunk_size > self.size:
254                chunk_size = self.size
255            self.size -= chunk_size
256        data = self.file.read(chunk_size)
257        if not data:
258            raise StopIteration
259        return data
260    __next__ = next
261
262    def close(self):
263        self.file.close()
264
265
266class DirectoryApp(object):
267    """
268    Returns an application that dispatches requests to corresponding FileApps based on PATH_INFO.
269    FileApp instances are cached. This app makes sure not to serve any files that are not in a subdirectory.
270    To customize FileApp creation override ``DirectoryApp.make_fileapp``
271    """
272
273    def __init__(self, path):
274        self.path = os.path.abspath(path)
275        if not self.path.endswith(os.path.sep):
276            self.path += os.path.sep
277        assert os.path.isdir(self.path)
278        self.cached_apps = {}
279
280    make_fileapp = FileApp
281
282    def __call__(self, environ, start_response):
283        path_info = environ['PATH_INFO']
284        app = self.cached_apps.get(path_info)
285        if app is None:
286            path = os.path.join(self.path, path_info.lstrip('/'))
287            if not os.path.normpath(path).startswith(self.path):
288                app = HTTPForbidden()
289            elif os.path.isfile(path):
290                app = self.make_fileapp(path)
291                self.cached_apps[path_info] = app
292            else:
293                app = HTTPNotFound(comment=path)
294        return app(environ, start_response)
295
296
297class ArchiveStore(object):
298    """
299    Returns an application that serves up a DataApp for items requested
300    in a given zip or tar archive.
301
302    Constructor Arguments:
303
304        ``filepath``    the path to the archive being served
305
306    ``cache_control()``
307
308        This method provides validated construction of the ``Cache-Control``
309        header as well as providing for automated filling out of the
310        ``EXPIRES`` header for HTTP/1.0 clients.
311    """
312
313    def __init__(self, filepath):
314        if zipfile.is_zipfile(filepath):
315            self.archive = zipfile.ZipFile(filepath,"r")
316        elif tarfile.is_tarfile(filepath):
317            self.archive = tarfile.TarFileCompat(filepath,"r")
318        else:
319            raise AssertionError("filepath '%s' is not a zip or tar " % filepath)
320        self.expires = None
321        self.last_modified = time.time()
322        self.cache = {}
323
324    def cache_control(self, **kwargs):
325        self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None
326        return self
327
328    def __call__(self, environ, start_response):
329        path = environ.get("PATH_INFO","")
330        if path.startswith("/"):
331            path = path[1:]
332        application = self.cache.get(path)
333        if application:
334            return application(environ, start_response)
335        try:
336            info = self.archive.getinfo(path)
337        except KeyError:
338            exc = HTTPNotFound("The file requested, '%s', was not found." % path)
339            return exc.wsgi_application(environ, start_response)
340        if info.filename.endswith("/"):
341            exc = HTTPNotFound("Path requested, '%s', is not a file." % path)
342            return exc.wsgi_application(environ, start_response)
343        content_type, content_encoding = mimetypes.guess_type(info.filename)
344        # 'None' is not a valid content-encoding, so don't set the header if
345        # mimetypes.guess_type returns None
346        if content_encoding is not None:
347            app = DataApp(None, content_type = content_type,
348                                content_encoding = content_encoding)
349        else:
350            app = DataApp(None, content_type = content_type)
351        app.set_content(self.archive.read(path),
352                time.mktime(info.date_time + (0,0,0)))
353        self.cache[path] = app
354        app.expires = self.expires
355        return app(environ, start_response)
356
357