1# -*- coding: utf-8 -*-
2"""
3    markupsafe
4    ~~~~~~~~~~
5
6    Implements a Markup string.
7
8    :copyright: (c) 2010 by Armin Ronacher.
9    :license: BSD, see LICENSE for more details.
10"""
11import re
12from ._compat import text_type, string_types, int_types, \
13     unichr, PY2
14
15
16__all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent']
17
18
19_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
20_entity_re = re.compile(r'&([^;]+);')
21
22
23class Markup(text_type):
24    r"""Marks a string as being safe for inclusion in HTML/XML output without
25    needing to be escaped.  This implements the `__html__` interface a couple
26    of frameworks and web applications use.  :class:`Markup` is a direct
27    subclass of `unicode` and provides all the methods of `unicode` just that
28    it escapes arguments passed and always returns `Markup`.
29
30    The `escape` function returns markup objects so that double escaping can't
31    happen.
32
33    The constructor of the :class:`Markup` class can be used for three
34    different things:  When passed an unicode object it's assumed to be safe,
35    when passed an object with an HTML representation (has an `__html__`
36    method) that representation is used, otherwise the object passed is
37    converted into a unicode string and then assumed to be safe:
38
39    >>> Markup("Hello <em>World</em>!")
40    Markup(u'Hello <em>World</em>!')
41    >>> class Foo(object):
42    ...  def __html__(self):
43    ...   return '<a href="#">foo</a>'
44    ...
45    >>> Markup(Foo())
46    Markup(u'<a href="#">foo</a>')
47
48    If you want object passed being always treated as unsafe you can use the
49    :meth:`escape` classmethod to create a :class:`Markup` object:
50
51    >>> Markup.escape("Hello <em>World</em>!")
52    Markup(u'Hello &lt;em&gt;World&lt;/em&gt;!')
53
54    Operations on a markup string are markup aware which means that all
55    arguments are passed through the :func:`escape` function:
56
57    >>> em = Markup("<em>%s</em>")
58    >>> em % "foo & bar"
59    Markup(u'<em>foo &amp; bar</em>')
60    >>> strong = Markup("<strong>%(text)s</strong>")
61    >>> strong % {'text': '<blink>hacker here</blink>'}
62    Markup(u'<strong>&lt;blink&gt;hacker here&lt;/blink&gt;</strong>')
63    >>> Markup("<em>Hello</em> ") + "<foo>"
64    Markup(u'<em>Hello</em> &lt;foo&gt;')
65    """
66    __slots__ = ()
67
68    def __new__(cls, base=u'', encoding=None, errors='strict'):
69        if hasattr(base, '__html__'):
70            base = base.__html__()
71        if encoding is None:
72            return text_type.__new__(cls, base)
73        return text_type.__new__(cls, base, encoding, errors)
74
75    def __html__(self):
76        return self
77
78    def __add__(self, other):
79        if isinstance(other, string_types) or hasattr(other, '__html__'):
80            return self.__class__(super(Markup, self).__add__(self.escape(other)))
81        return NotImplemented
82
83    def __radd__(self, other):
84        if hasattr(other, '__html__') or isinstance(other, string_types):
85            return self.escape(other).__add__(self)
86        return NotImplemented
87
88    def __mul__(self, num):
89        if isinstance(num, int_types):
90            return self.__class__(text_type.__mul__(self, num))
91        return NotImplemented
92    __rmul__ = __mul__
93
94    def __mod__(self, arg):
95        if isinstance(arg, tuple):
96            arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg)
97        else:
98            arg = _MarkupEscapeHelper(arg, self.escape)
99        return self.__class__(text_type.__mod__(self, arg))
100
101    def __repr__(self):
102        return '%s(%s)' % (
103            self.__class__.__name__,
104            text_type.__repr__(self)
105        )
106
107    def join(self, seq):
108        return self.__class__(text_type.join(self, map(self.escape, seq)))
109    join.__doc__ = text_type.join.__doc__
110
111    def split(self, *args, **kwargs):
112        return list(map(self.__class__, text_type.split(self, *args, **kwargs)))
113    split.__doc__ = text_type.split.__doc__
114
115    def rsplit(self, *args, **kwargs):
116        return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs)))
117    rsplit.__doc__ = text_type.rsplit.__doc__
118
119    def splitlines(self, *args, **kwargs):
120        return list(map(self.__class__, text_type.splitlines(self, *args, **kwargs)))
121    splitlines.__doc__ = text_type.splitlines.__doc__
122
123    def unescape(self):
124        r"""Unescape markup again into an text_type string.  This also resolves
125        known HTML4 and XHTML entities:
126
127        >>> Markup("Main &raquo; <em>About</em>").unescape()
128        u'Main \xbb <em>About</em>'
129        """
130        from _constants import HTML_ENTITIES
131        def handle_match(m):
132            name = m.group(1)
133            if name in HTML_ENTITIES:
134                return unichr(HTML_ENTITIES[name])
135            try:
136                if name[:2] in ('#x', '#X'):
137                    return unichr(int(name[2:], 16))
138                elif name.startswith('#'):
139                    return unichr(int(name[1:]))
140            except ValueError:
141                pass
142            return u''
143        return _entity_re.sub(handle_match, text_type(self))
144
145    def striptags(self):
146        r"""Unescape markup into an text_type string and strip all tags.  This
147        also resolves known HTML4 and XHTML entities.  Whitespace is
148        normalized to one:
149
150        >>> Markup("Main &raquo;  <em>About</em>").striptags()
151        u'Main \xbb About'
152        """
153        stripped = u' '.join(_striptags_re.sub('', self).split())
154        return Markup(stripped).unescape()
155
156    @classmethod
157    def escape(cls, s):
158        """Escape the string.  Works like :func:`escape` with the difference
159        that for subclasses of :class:`Markup` this function would return the
160        correct subclass.
161        """
162        rv = escape(s)
163        if rv.__class__ is not cls:
164            return cls(rv)
165        return rv
166
167    def make_wrapper(name):
168        orig = getattr(text_type, name)
169        def func(self, *args, **kwargs):
170            args = _escape_argspec(list(args), enumerate(args), self.escape)
171            #_escape_argspec(kwargs, kwargs.iteritems(), None)
172            return self.__class__(orig(self, *args, **kwargs))
173        func.__name__ = orig.__name__
174        func.__doc__ = orig.__doc__
175        return func
176
177    for method in '__getitem__', 'capitalize', \
178                  'title', 'lower', 'upper', 'replace', 'ljust', \
179                  'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
180                  'translate', 'expandtabs', 'swapcase', 'zfill':
181        locals()[method] = make_wrapper(method)
182
183    # new in python 2.5
184    if hasattr(text_type, 'partition'):
185        def partition(self, sep):
186            return tuple(map(self.__class__,
187                             text_type.partition(self, self.escape(sep))))
188        def rpartition(self, sep):
189            return tuple(map(self.__class__,
190                             text_type.rpartition(self, self.escape(sep))))
191
192    # new in python 2.6
193    if hasattr(text_type, 'format'):
194        format = make_wrapper('format')
195
196    # not in python 3
197    if hasattr(text_type, '__getslice__'):
198        __getslice__ = make_wrapper('__getslice__')
199
200    del method, make_wrapper
201
202
203def _escape_argspec(obj, iterable, escape):
204    """Helper for various string-wrapped functions."""
205    for key, value in iterable:
206        if hasattr(value, '__html__') or isinstance(value, string_types):
207            obj[key] = escape(value)
208    return obj
209
210
211class _MarkupEscapeHelper(object):
212    """Helper for Markup.__mod__"""
213
214    def __init__(self, obj, escape):
215        self.obj = obj
216        self.escape = escape
217
218    __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x], s.escape)
219    __unicode__ = __str__ = lambda s: text_type(s.escape(s.obj))
220    __repr__ = lambda s: str(s.escape(repr(s.obj)))
221    __int__ = lambda s: int(s.obj)
222    __float__ = lambda s: float(s.obj)
223
224
225# we have to import it down here as the speedups and native
226# modules imports the markup type which is define above.
227try:
228    from _speedups import escape, escape_silent, soft_unicode
229except ImportError:
230    from ._native import escape, escape_silent, soft_unicode
231
232if not PY2:
233    soft_str = soft_unicode
234    __all__.append('soft_str')
235