1# -*- coding: utf-8 -*-
2"""
3    jinja2.utils
4    ~~~~~~~~~~~~
5
6    Utility functions.
7
8    :copyright: (c) 2017 by the Jinja Team.
9    :license: BSD, see LICENSE for more details.
10"""
11import re
12import json
13import errno
14from collections import deque
15from threading import Lock
16from jinja2._compat import text_type, string_types, implements_iterator, \
17     url_quote
18
19
20_word_split_re = re.compile(r'(\s+)')
21_punctuation_re = re.compile(
22    '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
23        '|'.join(map(re.escape, ('(', '<', '&lt;'))),
24        '|'.join(map(re.escape, ('.', ',', ')', '>', '\n', '&gt;')))
25    )
26)
27_simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
28_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
29_entity_re = re.compile(r'&([^;]+);')
30_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
31_digits = '0123456789'
32
33# special singleton representing missing values for the runtime
34missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})()
35
36# internal code
37internal_code = set()
38
39concat = u''.join
40
41_slash_escape = '\\/' not in json.dumps('/')
42
43
44def contextfunction(f):
45    """This decorator can be used to mark a function or method context callable.
46    A context callable is passed the active :class:`Context` as first argument when
47    called from the template.  This is useful if a function wants to get access
48    to the context or functions provided on the context object.  For example
49    a function that returns a sorted list of template variables the current
50    template exports could look like this::
51
52        @contextfunction
53        def get_exported_names(context):
54            return sorted(context.exported_vars)
55    """
56    f.contextfunction = True
57    return f
58
59
60def evalcontextfunction(f):
61    """This decorator can be used to mark a function or method as an eval
62    context callable.  This is similar to the :func:`contextfunction`
63    but instead of passing the context, an evaluation context object is
64    passed.  For more information about the eval context, see
65    :ref:`eval-context`.
66
67    .. versionadded:: 2.4
68    """
69    f.evalcontextfunction = True
70    return f
71
72
73def environmentfunction(f):
74    """This decorator can be used to mark a function or method as environment
75    callable.  This decorator works exactly like the :func:`contextfunction`
76    decorator just that the first argument is the active :class:`Environment`
77    and not context.
78    """
79    f.environmentfunction = True
80    return f
81
82
83def internalcode(f):
84    """Marks the function as internally used"""
85    internal_code.add(f.__code__)
86    return f
87
88
89def is_undefined(obj):
90    """Check if the object passed is undefined.  This does nothing more than
91    performing an instance check against :class:`Undefined` but looks nicer.
92    This can be used for custom filters or tests that want to react to
93    undefined variables.  For example a custom default filter can look like
94    this::
95
96        def default(var, default=''):
97            if is_undefined(var):
98                return default
99            return var
100    """
101    from jinja2.runtime import Undefined
102    return isinstance(obj, Undefined)
103
104
105def consume(iterable):
106    """Consumes an iterable without doing anything with it."""
107    for event in iterable:
108        pass
109
110
111def clear_caches():
112    """Jinja2 keeps internal caches for environments and lexers.  These are
113    used so that Jinja2 doesn't have to recreate environments and lexers all
114    the time.  Normally you don't have to care about that but if you are
115    measuring memory consumption you may want to clean the caches.
116    """
117    from jinja2.environment import _spontaneous_environments
118    from jinja2.lexer import _lexer_cache
119    _spontaneous_environments.clear()
120    _lexer_cache.clear()
121
122
123def import_string(import_name, silent=False):
124    """Imports an object based on a string.  This is useful if you want to
125    use import paths as endpoints or something similar.  An import path can
126    be specified either in dotted notation (``xml.sax.saxutils.escape``)
127    or with a colon as object delimiter (``xml.sax.saxutils:escape``).
128
129    If the `silent` is True the return value will be `None` if the import
130    fails.
131
132    :return: imported object
133    """
134    try:
135        if ':' in import_name:
136            module, obj = import_name.split(':', 1)
137        elif '.' in import_name:
138            items = import_name.split('.')
139            module = '.'.join(items[:-1])
140            obj = items[-1]
141        else:
142            return __import__(import_name)
143        return getattr(__import__(module, None, None, [obj]), obj)
144    except (ImportError, AttributeError):
145        if not silent:
146            raise
147
148
149def open_if_exists(filename, mode='rb'):
150    """Returns a file descriptor for the filename if that file exists,
151    otherwise `None`.
152    """
153    try:
154        return open(filename, mode)
155    except IOError as e:
156        if e.errno not in (errno.ENOENT, errno.EISDIR, errno.EINVAL):
157            raise
158
159
160def object_type_repr(obj):
161    """Returns the name of the object's type.  For some recognized
162    singletons the name of the object is returned instead. (For
163    example for `None` and `Ellipsis`).
164    """
165    if obj is None:
166        return 'None'
167    elif obj is Ellipsis:
168        return 'Ellipsis'
169    # __builtin__ in 2.x, builtins in 3.x
170    if obj.__class__.__module__ in ('__builtin__', 'builtins'):
171        name = obj.__class__.__name__
172    else:
173        name = obj.__class__.__module__ + '.' + obj.__class__.__name__
174    return '%s object' % name
175
176
177def pformat(obj, verbose=False):
178    """Prettyprint an object.  Either use the `pretty` library or the
179    builtin `pprint`.
180    """
181    try:
182        from pretty import pretty
183        return pretty(obj, verbose=verbose)
184    except ImportError:
185        from pprint import pformat
186        return pformat(obj)
187
188
189def urlize(text, trim_url_limit=None, rel=None, target=None):
190    """Converts any URLs in text into clickable links. Works on http://,
191    https:// and www. links. Links can have trailing punctuation (periods,
192    commas, close-parens) and leading punctuation (opening parens) and
193    it'll still do the right thing.
194
195    If trim_url_limit is not None, the URLs in link text will be limited
196    to trim_url_limit characters.
197
198    If nofollow is True, the URLs in link text will get a rel="nofollow"
199    attribute.
200
201    If target is not None, a target attribute will be added to the link.
202    """
203    trim_url = lambda x, limit=trim_url_limit: limit is not None \
204                         and (x[:limit] + (len(x) >=limit and '...'
205                         or '')) or x
206    words = _word_split_re.split(text_type(escape(text)))
207    rel_attr = rel and ' rel="%s"' % text_type(escape(rel)) or ''
208    target_attr = target and ' target="%s"' % escape(target) or ''
209
210    for i, word in enumerate(words):
211        match = _punctuation_re.match(word)
212        if match:
213            lead, middle, trail = match.groups()
214            if middle.startswith('www.') or (
215                '@' not in middle and
216                not middle.startswith('http://') and
217                not middle.startswith('https://') and
218                len(middle) > 0 and
219                middle[0] in _letters + _digits and (
220                    middle.endswith('.org') or
221                    middle.endswith('.net') or
222                    middle.endswith('.com')
223                )):
224                middle = '<a href="http://%s"%s%s>%s</a>' % (middle,
225                    rel_attr, target_attr, trim_url(middle))
226            if middle.startswith('http://') or \
227               middle.startswith('https://'):
228                middle = '<a href="%s"%s%s>%s</a>' % (middle,
229                    rel_attr, target_attr, trim_url(middle))
230            if '@' in middle and not middle.startswith('www.') and \
231               not ':' in middle and _simple_email_re.match(middle):
232                middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
233            if lead + middle + trail != word:
234                words[i] = lead + middle + trail
235    return u''.join(words)
236
237
238def generate_lorem_ipsum(n=5, html=True, min=20, max=100):
239    """Generate some lorem ipsum for the template."""
240    from jinja2.constants import LOREM_IPSUM_WORDS
241    from random import choice, randrange
242    words = LOREM_IPSUM_WORDS.split()
243    result = []
244
245    for _ in range(n):
246        next_capitalized = True
247        last_comma = last_fullstop = 0
248        word = None
249        last = None
250        p = []
251
252        # each paragraph contains out of 20 to 100 words.
253        for idx, _ in enumerate(range(randrange(min, max))):
254            while True:
255                word = choice(words)
256                if word != last:
257                    last = word
258                    break
259            if next_capitalized:
260                word = word.capitalize()
261                next_capitalized = False
262            # add commas
263            if idx - randrange(3, 8) > last_comma:
264                last_comma = idx
265                last_fullstop += 2
266                word += ','
267            # add end of sentences
268            if idx - randrange(10, 20) > last_fullstop:
269                last_comma = last_fullstop = idx
270                word += '.'
271                next_capitalized = True
272            p.append(word)
273
274        # ensure that the paragraph ends with a dot.
275        p = u' '.join(p)
276        if p.endswith(','):
277            p = p[:-1] + '.'
278        elif not p.endswith('.'):
279            p += '.'
280        result.append(p)
281
282    if not html:
283        return u'\n\n'.join(result)
284    return Markup(u'\n'.join(u'<p>%s</p>' % escape(x) for x in result))
285
286
287def unicode_urlencode(obj, charset='utf-8', for_qs=False):
288    """URL escapes a single bytestring or unicode string with the
289    given charset if applicable to URL safe quoting under all rules
290    that need to be considered under all supported Python versions.
291
292    If non strings are provided they are converted to their unicode
293    representation first.
294    """
295    if not isinstance(obj, string_types):
296        obj = text_type(obj)
297    if isinstance(obj, text_type):
298        obj = obj.encode(charset)
299    safe = not for_qs and b'/' or b''
300    rv = text_type(url_quote(obj, safe))
301    if for_qs:
302        rv = rv.replace('%20', '+')
303    return rv
304
305
306class LRUCache(object):
307    """A simple LRU Cache implementation."""
308
309    # this is fast for small capacities (something below 1000) but doesn't
310    # scale.  But as long as it's only used as storage for templates this
311    # won't do any harm.
312
313    def __init__(self, capacity):
314        self.capacity = capacity
315        self._mapping = {}
316        self._queue = deque()
317        self._postinit()
318
319    def _postinit(self):
320        # alias all queue methods for faster lookup
321        self._popleft = self._queue.popleft
322        self._pop = self._queue.pop
323        self._remove = self._queue.remove
324        self._wlock = Lock()
325        self._append = self._queue.append
326
327    def __getstate__(self):
328        return {
329            'capacity':     self.capacity,
330            '_mapping':     self._mapping,
331            '_queue':       self._queue
332        }
333
334    def __setstate__(self, d):
335        self.__dict__.update(d)
336        self._postinit()
337
338    def __getnewargs__(self):
339        return (self.capacity,)
340
341    def copy(self):
342        """Return a shallow copy of the instance."""
343        rv = self.__class__(self.capacity)
344        rv._mapping.update(self._mapping)
345        rv._queue = deque(self._queue)
346        return rv
347
348    def get(self, key, default=None):
349        """Return an item from the cache dict or `default`"""
350        try:
351            return self[key]
352        except KeyError:
353            return default
354
355    def setdefault(self, key, default=None):
356        """Set `default` if the key is not in the cache otherwise
357        leave unchanged. Return the value of this key.
358        """
359        self._wlock.acquire()
360        try:
361            try:
362                return self[key]
363            except KeyError:
364                self[key] = default
365                return default
366        finally:
367            self._wlock.release()
368
369    def clear(self):
370        """Clear the cache."""
371        self._wlock.acquire()
372        try:
373            self._mapping.clear()
374            self._queue.clear()
375        finally:
376            self._wlock.release()
377
378    def __contains__(self, key):
379        """Check if a key exists in this cache."""
380        return key in self._mapping
381
382    def __len__(self):
383        """Return the current size of the cache."""
384        return len(self._mapping)
385
386    def __repr__(self):
387        return '<%s %r>' % (
388            self.__class__.__name__,
389            self._mapping
390        )
391
392    def __getitem__(self, key):
393        """Get an item from the cache. Moves the item up so that it has the
394        highest priority then.
395
396        Raise a `KeyError` if it does not exist.
397        """
398        self._wlock.acquire()
399        try:
400            rv = self._mapping[key]
401            if self._queue[-1] != key:
402                try:
403                    self._remove(key)
404                except ValueError:
405                    # if something removed the key from the container
406                    # when we read, ignore the ValueError that we would
407                    # get otherwise.
408                    pass
409                self._append(key)
410            return rv
411        finally:
412            self._wlock.release()
413
414    def __setitem__(self, key, value):
415        """Sets the value for an item. Moves the item up so that it
416        has the highest priority then.
417        """
418        self._wlock.acquire()
419        try:
420            if key in self._mapping:
421                self._remove(key)
422            elif len(self._mapping) == self.capacity:
423                del self._mapping[self._popleft()]
424            self._append(key)
425            self._mapping[key] = value
426        finally:
427            self._wlock.release()
428
429    def __delitem__(self, key):
430        """Remove an item from the cache dict.
431        Raise a `KeyError` if it does not exist.
432        """
433        self._wlock.acquire()
434        try:
435            del self._mapping[key]
436            try:
437                self._remove(key)
438            except ValueError:
439                # __getitem__ is not locked, it might happen
440                pass
441        finally:
442            self._wlock.release()
443
444    def items(self):
445        """Return a list of items."""
446        result = [(key, self._mapping[key]) for key in list(self._queue)]
447        result.reverse()
448        return result
449
450    def iteritems(self):
451        """Iterate over all items."""
452        return iter(self.items())
453
454    def values(self):
455        """Return a list of all values."""
456        return [x[1] for x in self.items()]
457
458    def itervalue(self):
459        """Iterate over all values."""
460        return iter(self.values())
461
462    def keys(self):
463        """Return a list of all keys ordered by most recent usage."""
464        return list(self)
465
466    def iterkeys(self):
467        """Iterate over all keys in the cache dict, ordered by
468        the most recent usage.
469        """
470        return reversed(tuple(self._queue))
471
472    __iter__ = iterkeys
473
474    def __reversed__(self):
475        """Iterate over the values in the cache dict, oldest items
476        coming first.
477        """
478        return iter(tuple(self._queue))
479
480    __copy__ = copy
481
482
483# register the LRU cache as mutable mapping if possible
484try:
485    from collections import MutableMapping
486    MutableMapping.register(LRUCache)
487except ImportError:
488    pass
489
490
491def select_autoescape(enabled_extensions=('html', 'htm', 'xml'),
492                      disabled_extensions=(),
493                      default_for_string=True,
494                      default=False):
495    """Intelligently sets the initial value of autoescaping based on the
496    filename of the template.  This is the recommended way to configure
497    autoescaping if you do not want to write a custom function yourself.
498
499    If you want to enable it for all templates created from strings or
500    for all templates with `.html` and `.xml` extensions::
501
502        from jinja2 import Environment, select_autoescape
503        env = Environment(autoescape=select_autoescape(
504            enabled_extensions=('html', 'xml'),
505            default_for_string=True,
506        ))
507
508    Example configuration to turn it on at all times except if the template
509    ends with `.txt`::
510
511        from jinja2 import Environment, select_autoescape
512        env = Environment(autoescape=select_autoescape(
513            disabled_extensions=('txt',),
514            default_for_string=True,
515            default=True,
516        ))
517
518    The `enabled_extensions` is an iterable of all the extensions that
519    autoescaping should be enabled for.  Likewise `disabled_extensions` is
520    a list of all templates it should be disabled for.  If a template is
521    loaded from a string then the default from `default_for_string` is used.
522    If nothing matches then the initial value of autoescaping is set to the
523    value of `default`.
524
525    For security reasons this function operates case insensitive.
526
527    .. versionadded:: 2.9
528    """
529    enabled_patterns = tuple('.' + x.lstrip('.').lower()
530                             for x in enabled_extensions)
531    disabled_patterns = tuple('.' + x.lstrip('.').lower()
532                              for x in disabled_extensions)
533    def autoescape(template_name):
534        if template_name is None:
535            return default_for_string
536        template_name = template_name.lower()
537        if template_name.endswith(enabled_patterns):
538            return True
539        if template_name.endswith(disabled_patterns):
540            return False
541        return default
542    return autoescape
543
544
545def htmlsafe_json_dumps(obj, dumper=None, **kwargs):
546    """Works exactly like :func:`dumps` but is safe for use in ``<script>``
547    tags.  It accepts the same arguments and returns a JSON string.  Note that
548    this is available in templates through the ``|tojson`` filter which will
549    also mark the result as safe.  Due to how this function escapes certain
550    characters this is safe even if used outside of ``<script>`` tags.
551
552    The following characters are escaped in strings:
553
554    -   ``<``
555    -   ``>``
556    -   ``&``
557    -   ``'``
558
559    This makes it safe to embed such strings in any place in HTML with the
560    notable exception of double quoted attributes.  In that case single
561    quote your attributes or HTML escape it in addition.
562    """
563    if dumper is None:
564        dumper = json.dumps
565    rv = dumper(obj, **kwargs) \
566        .replace(u'<', u'\\u003c') \
567        .replace(u'>', u'\\u003e') \
568        .replace(u'&', u'\\u0026') \
569        .replace(u"'", u'\\u0027')
570    return Markup(rv)
571
572
573@implements_iterator
574class Cycler(object):
575    """A cycle helper for templates."""
576
577    def __init__(self, *items):
578        if not items:
579            raise RuntimeError('at least one item has to be provided')
580        self.items = items
581        self.reset()
582
583    def reset(self):
584        """Resets the cycle."""
585        self.pos = 0
586
587    @property
588    def current(self):
589        """Returns the current item."""
590        return self.items[self.pos]
591
592    def next(self):
593        """Goes one item ahead and returns it."""
594        rv = self.current
595        self.pos = (self.pos + 1) % len(self.items)
596        return rv
597
598    __next__ = next
599
600
601class Joiner(object):
602    """A joining helper for templates."""
603
604    def __init__(self, sep=u', '):
605        self.sep = sep
606        self.used = False
607
608    def __call__(self):
609        if not self.used:
610            self.used = True
611            return u''
612        return self.sep
613
614
615class Namespace(object):
616    """A namespace object that can hold arbitrary attributes.  It may be
617    initialized from a dictionary or with keyword argments."""
618
619    def __init__(*args, **kwargs):
620        self, args = args[0], args[1:]
621        self.__attrs = dict(*args, **kwargs)
622
623    def __getattribute__(self, name):
624        if name == '_Namespace__attrs':
625            return object.__getattribute__(self, name)
626        try:
627            return self.__attrs[name]
628        except KeyError:
629            raise AttributeError(name)
630
631    def __setitem__(self, name, value):
632        self.__attrs[name] = value
633
634    def __repr__(self):
635        return '<Namespace %r>' % self.__attrs
636
637
638# does this python version support async for in and async generators?
639try:
640    exec('async def _():\n async for _ in ():\n  yield _')
641    have_async_gen = True
642except SyntaxError:
643    have_async_gen = False
644
645
646# Imported here because that's where it was in the past
647from markupsafe import Markup, escape, soft_unicode
648