1import markdown
2import re
3
4def isString(s):
5    """ Check if it's string """
6    return isinstance(s, unicode) or isinstance(s, str)
7
8class Processor:
9    def __init__(self, markdown_instance=None):
10        if markdown_instance:
11            self.markdown = markdown_instance
12
13class Treeprocessor(Processor):
14    """
15    Treeprocessors are run on the ElementTree object before serialization.
16
17    Each Treeprocessor implements a "run" method that takes a pointer to an
18    ElementTree, modifies it as necessary and returns an ElementTree
19    object.
20
21    Treeprocessors must extend markdown.Treeprocessor.
22
23    """
24    def run(self, root):
25        """
26        Subclasses of Treeprocessor should implement a `run` method, which
27        takes a root ElementTree. This method can return another ElementTree
28        object, and the existing root ElementTree will be replaced, or it can
29        modify the current tree and return None.
30        """
31        pass
32
33
34class InlineProcessor(Treeprocessor):
35    """
36    A Treeprocessor that traverses a tree, applying inline patterns.
37    """
38
39    def __init__ (self, md):
40        self.__placeholder_prefix = markdown.INLINE_PLACEHOLDER_PREFIX
41        self.__placeholder_suffix = markdown.ETX
42        self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
43                                      + len(self.__placeholder_suffix)
44        self.__placeholder_re = re.compile(markdown.INLINE_PLACEHOLDER % r'([0-9]{4})')
45        self.markdown = md
46
47    def __makePlaceholder(self, type):
48        """ Generate a placeholder """
49        id = "%04d" % len(self.stashed_nodes)
50        hash = markdown.INLINE_PLACEHOLDER % id
51        return hash, id
52
53    def __findPlaceholder(self, data, index):
54        """
55        Extract id from data string, start from index
56
57        Keyword arguments:
58
59        * data: string
60        * index: index, from which we start search
61
62        Returns: placeholder id and string index, after the found placeholder.
63        """
64
65        m = self.__placeholder_re.search(data, index)
66        if m:
67            return m.group(1), m.end()
68        else:
69            return None, index + 1
70
71    def __stashNode(self, node, type):
72        """ Add node to stash """
73        placeholder, id = self.__makePlaceholder(type)
74        self.stashed_nodes[id] = node
75        return placeholder
76
77    def __handleInline(self, data, patternIndex=0):
78        """
79        Process string with inline patterns and replace it
80        with placeholders
81
82        Keyword arguments:
83
84        * data: A line of Markdown text
85        * patternIndex: The index of the inlinePattern to start with
86
87        Returns: String with placeholders.
88
89        """
90        if not isinstance(data, markdown.AtomicString):
91            startIndex = 0
92            while patternIndex < len(self.markdown.inlinePatterns):
93                data, matched, startIndex = self.__applyPattern(
94                    self.markdown.inlinePatterns.value_for_index(patternIndex),
95                    data, patternIndex, startIndex)
96                if not matched:
97                    patternIndex += 1
98        return data
99
100    def __processElementText(self, node, subnode, isText=True):
101        """
102        Process placeholders in Element.text or Element.tail
103        of Elements popped from self.stashed_nodes.
104
105        Keywords arguments:
106
107        * node: parent node
108        * subnode: processing node
109        * isText: bool variable, True - it's text, False - it's tail
110
111        Returns: None
112
113        """
114        if isText:
115            text = subnode.text
116            subnode.text = None
117        else:
118            text = subnode.tail
119            subnode.tail = None
120
121        childResult = self.__processPlaceholders(text, subnode)
122
123        if not isText and node is not subnode:
124            pos = node.getchildren().index(subnode)
125            node.remove(subnode)
126        else:
127            pos = 0
128
129        childResult.reverse()
130        for newChild in childResult:
131            node.insert(pos, newChild)
132
133    def __processPlaceholders(self, data, parent):
134        """
135        Process string with placeholders and generate ElementTree tree.
136
137        Keyword arguments:
138
139        * data: string with placeholders instead of ElementTree elements.
140        * parent: Element, which contains processing inline data
141
142        Returns: list with ElementTree elements with applied inline patterns.
143        """
144        def linkText(text):
145            if text:
146                if result:
147                    if result[-1].tail:
148                        result[-1].tail += text
149                    else:
150                        result[-1].tail = text
151                else:
152                    if parent.text:
153                        parent.text += text
154                    else:
155                        parent.text = text
156
157        result = []
158        strartIndex = 0
159        while data:
160            index = data.find(self.__placeholder_prefix, strartIndex)
161            if index != -1:
162                id, phEndIndex = self.__findPlaceholder(data, index)
163
164                if id in self.stashed_nodes:
165                    node = self.stashed_nodes.get(id)
166
167                    if index > 0:
168                        text = data[strartIndex:index]
169                        linkText(text)
170
171                    if not isString(node): # it's Element
172                        for child in [node] + node.getchildren():
173                            if child.tail:
174                                if child.tail.strip():
175                                    self.__processElementText(node, child, False)
176                            if child.text:
177                                if child.text.strip():
178                                    self.__processElementText(child, child)
179                    else: # it's just a string
180                        linkText(node)
181                        strartIndex = phEndIndex
182                        continue
183
184                    strartIndex = phEndIndex
185                    result.append(node)
186
187                else: # wrong placeholder
188                    end = index + len(prefix)
189                    linkText(data[strartIndex:end])
190                    strartIndex = end
191            else:
192                text = data[strartIndex:]
193                linkText(text)
194                data = ""
195
196        return result
197
198    def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
199        """
200        Check if the line fits the pattern, create the necessary
201        elements, add it to stashed_nodes.
202
203        Keyword arguments:
204
205        * data: the text to be processed
206        * pattern: the pattern to be checked
207        * patternIndex: index of current pattern
208        * startIndex: string index, from which we starting search
209
210        Returns: String with placeholders instead of ElementTree elements.
211
212        """
213        match = pattern.getCompiledRegExp().match(data[startIndex:])
214        leftData = data[:startIndex]
215
216        if not match:
217            return data, False, 0
218
219        node = pattern.handleMatch(match)
220
221        if node is None:
222            return data, True, len(leftData) + match.span(len(match.groups()))[0]
223
224        if not isString(node):
225            if not isinstance(node.text, markdown.AtomicString):
226                # We need to process current node too
227                for child in [node] + node.getchildren():
228                    if not isString(node):
229                        if child.text:
230                            child.text = self.__handleInline(child.text,
231                                                            patternIndex + 1)
232                        if child.tail:
233                            child.tail = self.__handleInline(child.tail,
234                                                            patternIndex)
235
236        placeholder = self.__stashNode(node, pattern.type())
237
238        return "%s%s%s%s" % (leftData,
239                             match.group(1),
240                             placeholder, match.groups()[-1]), True, 0
241
242    def run(self, tree):
243        """Apply inline patterns to a parsed Markdown tree.
244
245        Iterate over ElementTree, find elements with inline tag, apply inline
246        patterns and append newly created Elements to tree.  If you don't
247        want process your data with inline paterns, instead of normal string,
248        use subclass AtomicString:
249
250            node.text = markdown.AtomicString("data won't be processed with inline patterns")
251
252        Arguments:
253
254        * markdownTree: ElementTree object, representing Markdown tree.
255
256        Returns: ElementTree object with applied inline patterns.
257
258        """
259        self.stashed_nodes = {}
260
261        stack = [tree]
262
263        while stack:
264            currElement = stack.pop()
265            insertQueue = []
266            for child in currElement.getchildren():
267                if child.text and not isinstance(child.text, markdown.AtomicString):
268                    text = child.text
269                    child.text = None
270                    lst = self.__processPlaceholders(self.__handleInline(
271                                                    text), child)
272                    stack += lst
273                    insertQueue.append((child, lst))
274
275                if child.getchildren():
276                    stack.append(child)
277
278            for element, lst in insertQueue:
279                if element.text:
280                    element.text = \
281                        markdown.inlinepatterns.handleAttributes(element.text,
282                                                                 element)
283                i = 0
284                for newChild in lst:
285                    # Processing attributes
286                    if newChild.tail:
287                        newChild.tail = \
288                            markdown.inlinepatterns.handleAttributes(newChild.tail,
289                                                                     element)
290                    if newChild.text:
291                        newChild.text = \
292                            markdown.inlinepatterns.handleAttributes(newChild.text,
293                                                                     newChild)
294                    element.insert(i, newChild)
295                    i += 1
296        return tree
297
298
299class PrettifyTreeprocessor(Treeprocessor):
300    """ Add linebreaks to the html document. """
301
302    def _prettifyETree(self, elem):
303        """ Recursively add linebreaks to ElementTree children. """
304
305        i = "\n"
306        if markdown.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
307            if (not elem.text or not elem.text.strip()) \
308                    and len(elem) and markdown.isBlockLevel(elem[0].tag):
309                elem.text = i
310            for e in elem:
311                if markdown.isBlockLevel(e.tag):
312                    self._prettifyETree(e)
313            if not elem.tail or not elem.tail.strip():
314                elem.tail = i
315        if not elem.tail or not elem.tail.strip():
316            elem.tail = i
317
318    def run(self, root):
319        """ Add linebreaks to ElementTree root object. """
320
321        self._prettifyETree(root)
322        # Do <br />'s seperately as they are often in the middle of
323        # inline content and missed by _prettifyETree.
324        brs = root.getiterator('br')
325        for br in brs:
326            if not br.tail or not br.tail.strip():
327                br.tail = '\n'
328            else:
329                br.tail = '\n%s' % br.tail
330