1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) 2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php 3 4import cgi 5import six 6import re 7from six.moves import html_entities 8from six.moves.urllib.parse import quote, unquote 9 10 11__all__ = ['html_quote', 'html_unquote', 'url_quote', 'url_unquote', 12 'strip_html'] 13 14default_encoding = 'UTF-8' 15 16def html_quote(v, encoding=None): 17 r""" 18 Quote the value (turned to a string) as HTML. This quotes <, >, 19 and quotes: 20 """ 21 encoding = encoding or default_encoding 22 if v is None: 23 return '' 24 elif isinstance(v, six.binary_type): 25 return cgi.escape(v, 1) 26 elif isinstance(v, six.text_type): 27 if six.PY3: 28 return cgi.escape(v, 1) 29 else: 30 return cgi.escape(v.encode(encoding), 1) 31 else: 32 if six.PY3: 33 return cgi.escape(six.text_type(v), 1) 34 else: 35 return cgi.escape(six.text_type(v).encode(encoding), 1) 36 37_unquote_re = re.compile(r'&([a-zA-Z]+);') 38def _entity_subber(match, name2c=html_entities.name2codepoint): 39 code = name2c.get(match.group(1)) 40 if code: 41 return six.unichr(code) 42 else: 43 return match.group(0) 44 45def html_unquote(s, encoding=None): 46 r""" 47 Decode the value. 48 49 """ 50 if isinstance(s, six.binary_type): 51 s = s.decode(encoding or default_encoding) 52 return _unquote_re.sub(_entity_subber, s) 53 54def strip_html(s): 55 # should this use html_unquote? 56 s = re.sub('<.*?>', '', s) 57 s = html_unquote(s) 58 return s 59 60def no_quote(s): 61 """ 62 Quoting that doesn't do anything 63 """ 64 return s 65 66_comment_quote_re = re.compile(r'\-\s*\>') 67# Everything but \r, \n, \t: 68_bad_chars_re = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]') 69def comment_quote(s): 70 """ 71 Quote that makes sure text can't escape a comment 72 """ 73 comment = str(s) 74 #comment = _bad_chars_re.sub('', comment) 75 #print('in ', repr(str(s))) 76 #print('out', repr(comment)) 77 comment = _comment_quote_re.sub('->', comment) 78 return comment 79 80url_quote = quote 81url_unquote = unquote 82 83if __name__ == '__main__': 84 import doctest 85 doctest.testmod() 86