1#!/usr/bin/env python
2"""Doxygen XML to SWIG docstring converter.
3
4Usage:
5
6  doxy2swig.py [options] input.xml output.i
7
8Converts Doxygen generated XML files into a file containing docstrings
9that can be used by SWIG-1.3.x.  Note that you need to get SWIG
10version > 1.3.23 or use Robin Dunn's docstring patch to be able to use
11the resulting output.
12
13input.xml is your doxygen generated XML file and output.i is where the
14output will be written (the file will be clobbered).
15
16"""
17#
18#
19# This code is implemented using Mark Pilgrim's code as a guideline:
20#   http://www.faqs.org/docs/diveintopython/kgp_divein.html
21#
22# Author: Prabhu Ramachandran
23# License: BSD style
24#
25# Thanks:
26#   Johan Hake:  the include_function_definition feature
27#   Bill Spotz:  bug reports and testing.
28#   Sebastian Henschel:   Misc. enhancements.
29#
30#
31
32from xml.dom import minidom
33import re
34import textwrap
35import sys
36import os.path
37import optparse
38
39
40def my_open_read(source):
41    if hasattr(source, "read"):
42        return source
43    else:
44        return open(source)
45
46
47def my_open_write(dest):
48    if hasattr(dest, "write"):
49        return dest
50    else:
51        return open(dest, 'w')
52
53
54class Doxy2SWIG:
55
56    """Converts Doxygen generated XML files into a file containing
57    docstrings that can be used by SWIG-1.3.x that have support for
58    feature("docstring").  Once the data is parsed it is stored in
59    self.pieces.
60
61    """
62
63    def __init__(self, src, include_function_definition=True, quiet=False):
64        """Initialize the instance given a source object.  `src` can
65        be a file or filename.  If you do not want to include function
66        definitions from doxygen then set
67        `include_function_definition` to `False`.  This is handy since
68        this allows you to use the swig generated function definition
69        using %feature("autodoc", [0,1]).
70
71        """
72        f = my_open_read(src)
73        self.my_dir = os.path.dirname(f.name)
74        self.xmldoc = minidom.parse(f).documentElement
75        f.close()
76
77        self.pieces = []
78        self.pieces.append('\n// File: %s\n' %
79                           os.path.basename(f.name))
80
81        self.space_re = re.compile(r'\s+')
82        self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)')
83        self.multi = 0
84        self.ignores = ['inheritancegraph', 'param', 'listofallmembers',
85                        'innerclass', 'name', 'declname', 'incdepgraph',
86                        'invincdepgraph', 'programlisting', 'type',
87                        'references', 'referencedby', 'location',
88                        'collaborationgraph', 'reimplements',
89                        'reimplementedby', 'derivedcompoundref',
90                        'basecompoundref']
91        #self.generics = []
92        self.include_function_definition = include_function_definition
93        if not include_function_definition:
94            self.ignores.append('argsstring')
95
96        self.quiet = quiet
97
98    def generate(self):
99        """Parses the file set in the initialization.  The resulting
100        data is stored in `self.pieces`.
101
102        """
103        self.parse(self.xmldoc)
104
105    def parse(self, node):
106        """Parse a given node.  This function in turn calls the
107        `parse_<nodeType>` functions which handle the respective
108        nodes.
109
110        """
111        pm = getattr(self, "parse_%s" % node.__class__.__name__)
112        pm(node)
113
114    def parse_Document(self, node):
115        self.parse(node.documentElement)
116
117    def parse_Text(self, node):
118        txt = node.data
119        txt = txt.replace('\\', r'\\\\')
120        txt = txt.replace('"', r'\"')
121        # ignore pure whitespace
122        m = self.space_re.match(txt)
123        if m and len(m.group()) == len(txt):
124            pass
125        else:
126            self.add_text(textwrap.fill(txt, break_long_words=False))
127
128    def parse_Element(self, node):
129        """Parse an `ELEMENT_NODE`.  This calls specific
130        `do_<tagName>` handers for different elements.  If no handler
131        is available the `generic_parse` method is called.  All
132        tagNames specified in `self.ignores` are simply ignored.
133
134        """
135        name = node.tagName
136        ignores = self.ignores
137        if name in ignores:
138            return
139        attr = "do_%s" % name
140        if hasattr(self, attr):
141            handlerMethod = getattr(self, attr)
142            handlerMethod(node)
143        else:
144            self.generic_parse(node)
145            #if name not in self.generics: self.generics.append(name)
146
147    def parse_Comment(self, node):
148        """Parse a `COMMENT_NODE`.  This does nothing for now."""
149        return
150
151    def add_text(self, value):
152        """Adds text corresponding to `value` into `self.pieces`."""
153        if isinstance(value, (list, tuple)):
154            self.pieces.extend(value)
155        else:
156            self.pieces.append(value)
157
158    def get_specific_nodes(self, node, names):
159        """Given a node and a sequence of strings in `names`, return a
160        dictionary containing the names as keys and child
161        `ELEMENT_NODEs`, that have a `tagName` equal to the name.
162
163        """
164        nodes = [(x.tagName, x) for x in node.childNodes
165                 if x.nodeType == x.ELEMENT_NODE and
166                 x.tagName in names]
167        return dict(nodes)
168
169    def generic_parse(self, node, pad=0):
170        """A Generic parser for arbitrary tags in a node.
171
172        Parameters:
173
174         - node:  A node in the DOM.
175         - pad: `int` (default: 0)
176
177           If 0 the node data is not padded with newlines.  If 1 it
178           appends a newline after parsing the childNodes.  If 2 it
179           pads before and after the nodes are processed.  Defaults to
180           0.
181
182        """
183        npiece = 0
184        if pad:
185            npiece = len(self.pieces)
186            if pad == 2:
187                self.add_text('\n')
188        for n in node.childNodes:
189            self.parse(n)
190        if pad:
191            if len(self.pieces) > npiece:
192                self.add_text('\n')
193
194    def space_parse(self, node):
195        self.add_text(' ')
196        self.generic_parse(node)
197
198    do_ref = space_parse
199    do_emphasis = space_parse
200    do_bold = space_parse
201    do_computeroutput = space_parse
202    do_formula = space_parse
203
204    def do_compoundname(self, node):
205        self.add_text('\n\n')
206        data = node.firstChild.data
207        self.add_text('%%feature("docstring") %s "\n' % data)
208
209    def do_compounddef(self, node):
210        kind = node.attributes['kind'].value
211        if kind in ('class', 'struct'):
212            prot = node.attributes['prot'].value
213            if prot != 'public':
214                return
215            names = ('compoundname', 'briefdescription',
216                     'detaileddescription', 'includes')
217            first = self.get_specific_nodes(node, names)
218            for n in names:
219                if first.has_key(n):
220                    self.parse(first[n])
221            self.add_text(['";', '\n'])
222            for n in node.childNodes:
223                if n not in first.values():
224                    self.parse(n)
225        elif kind in ('file', 'namespace'):
226            nodes = node.getElementsByTagName('sectiondef')
227            for n in nodes:
228                self.parse(n)
229
230    def do_includes(self, node):
231        self.add_text('C++ includes: ')
232        self.generic_parse(node, pad=1)
233
234    def do_parameterlist(self, node):
235        text = 'unknown'
236        for key, val in node.attributes.items():
237            if key == 'kind':
238                if val == 'param':
239                    text = 'Parameters'
240                elif val == 'exception':
241                    text = 'Exceptions'
242                elif val == 'retval':
243                    text = 'Returns'
244                else:
245                    text = val
246                break
247        self.add_text(['\n', '\n', text, ':', '\n'])
248        self.generic_parse(node, pad=1)
249
250    def do_para(self, node):
251        self.add_text('\n')
252        self.generic_parse(node, pad=1)
253
254    def do_parametername(self, node):
255        self.add_text('\n')
256        try:
257            data = node.firstChild.data
258        except AttributeError:  # perhaps a <ref> tag in it
259            data = node.firstChild.firstChild.data
260        if data.find('Exception') != -1:
261            self.add_text(data)
262        else:
263            self.add_text("%s: " % data)
264
265    def do_parameterdefinition(self, node):
266        self.generic_parse(node, pad=1)
267
268    def do_detaileddescription(self, node):
269        self.generic_parse(node, pad=1)
270
271    def do_briefdescription(self, node):
272        self.generic_parse(node, pad=1)
273
274    def do_memberdef(self, node):
275        prot = node.attributes['prot'].value
276        id = node.attributes['id'].value
277        kind = node.attributes['kind'].value
278        tmp = node.parentNode.parentNode.parentNode
279        compdef = tmp.getElementsByTagName('compounddef')[0]
280        cdef_kind = compdef.attributes['kind'].value
281
282        if prot == 'public':
283            first = self.get_specific_nodes(node, ('definition', 'name'))
284            name = first['name'].firstChild.data
285            if name[:8] == 'operator':  # Don't handle operators yet.
286                return
287
288            if not 'definition' in first or \
289                   kind in ['variable', 'typedef']:
290                return
291
292            if self.include_function_definition:
293                defn = first['definition'].firstChild.data
294            else:
295                defn = ""
296            self.add_text('\n')
297            self.add_text('%feature("docstring") ')
298
299            anc = node.parentNode.parentNode
300            if cdef_kind in ('file', 'namespace'):
301                ns_node = anc.getElementsByTagName('innernamespace')
302                if not ns_node and cdef_kind == 'namespace':
303                    ns_node = anc.getElementsByTagName('compoundname')
304                if ns_node:
305                    ns = ns_node[0].firstChild.data
306                    self.add_text(' %s::%s "\n%s' % (ns, name, defn))
307                else:
308                    self.add_text(' %s "\n%s' % (name, defn))
309            elif cdef_kind in ('class', 'struct'):
310                # Get the full function name.
311                anc_node = anc.getElementsByTagName('compoundname')
312                cname = anc_node[0].firstChild.data
313                self.add_text(' %s::%s "\n%s' % (cname, name, defn))
314
315            for n in node.childNodes:
316                if n not in first.values():
317                    self.parse(n)
318            self.add_text(['";', '\n'])
319
320    def do_definition(self, node):
321        data = node.firstChild.data
322        self.add_text('%s "\n%s' % (data, data))
323
324    def do_sectiondef(self, node):
325        kind = node.attributes['kind'].value
326        if kind in ('public-func', 'func', 'user-defined', ''):
327            self.generic_parse(node)
328
329    def do_header(self, node):
330        """For a user defined section def a header field is present
331        which should not be printed as such, so we comment it in the
332        output."""
333        data = node.firstChild.data
334        self.add_text('\n/*\n %s \n*/\n' % data)
335        # If our immediate sibling is a 'description' node then we
336        # should comment that out also and remove it from the parent
337        # node's children.
338        parent = node.parentNode
339        idx = parent.childNodes.index(node)
340        if len(parent.childNodes) >= idx + 2:
341            nd = parent.childNodes[idx + 2]
342            if nd.nodeName == 'description':
343                nd = parent.removeChild(nd)
344                self.add_text('\n/*')
345                self.generic_parse(nd)
346                self.add_text('\n*/\n')
347
348    def do_simplesect(self, node):
349        kind = node.attributes['kind'].value
350        if kind in ('date', 'rcs', 'version'):
351            pass
352        elif kind == 'warning':
353            self.add_text(['\n', 'WARNING: '])
354            self.generic_parse(node)
355        elif kind == 'see':
356            self.add_text('\n')
357            self.add_text('See: ')
358            self.generic_parse(node)
359        else:
360            self.generic_parse(node)
361
362    def do_argsstring(self, node):
363        self.generic_parse(node, pad=1)
364
365    def do_member(self, node):
366        kind = node.attributes['kind'].value
367        refid = node.attributes['refid'].value
368        if kind == 'function' and refid[:9] == 'namespace':
369            self.generic_parse(node)
370
371    def do_doxygenindex(self, node):
372        self.multi = 1
373        comps = node.getElementsByTagName('compound')
374        for c in comps:
375            refid = c.attributes['refid'].value
376            fname = refid + '.xml'
377            if not os.path.exists(fname):
378                fname = os.path.join(self.my_dir,  fname)
379            if not self.quiet:
380                print("parsing file: %s" % fname)
381            p = Doxy2SWIG(fname, self.include_function_definition, self.quiet)
382            p.generate()
383            self.pieces.extend(self.clean_pieces(p.pieces))
384
385    def write(self, fname):
386        o = my_open_write(fname)
387        if self.multi:
388            o.write("".join(x.encode('utf-8') for x in self.pieces))
389        else:
390            o.write("".join(self.clean_pieces(self.pieces)))
391        o.close()
392
393    def clean_pieces(self, pieces):
394        """Cleans the list of strings given as `pieces`.  It replaces
395        multiple newlines by a maximum of 2 and returns a new list.
396        It also wraps the paragraphs nicely.
397
398        """
399        ret = []
400        count = 0
401        for i in pieces:
402            if i == '\n':
403                count = count + 1
404            else:
405                if i == '";':
406                    if count:
407                        ret.append('\n')
408                elif count > 2:
409                    ret.append('\n\n')
410                elif count:
411                    ret.append('\n' * count)
412                count = 0
413                ret.append(i)
414
415        _data = "".join(ret)
416        ret = []
417        for i in _data.split('\n\n'):
418            if i == 'Parameters:' or i == 'Exceptions:' or i == 'Returns:':
419                ret.extend([i, '\n' + '-' * len(i), '\n\n'])
420            elif i.find('// File:') > -1:  # leave comments alone.
421                ret.extend([i, '\n'])
422            else:
423                _tmp = textwrap.fill(i.strip(), break_long_words=False)
424                _tmp = self.lead_spc.sub(r'\1"\2', _tmp)
425                ret.extend([_tmp, '\n\n'])
426        return ret
427
428
429def convert(input, output, include_function_definition=True, quiet=False):
430    p = Doxy2SWIG(input, include_function_definition, quiet)
431    p.generate()
432    p.write(output)
433
434
435def main():
436    usage = __doc__
437    parser = optparse.OptionParser(usage)
438    parser.add_option("-n", '--no-function-definition',
439                      action='store_true',
440                      default=False,
441                      dest='func_def',
442                      help='do not include doxygen function definitions')
443    parser.add_option("-q", '--quiet',
444                      action='store_true',
445                      default=False,
446                      dest='quiet',
447                      help='be quiet and minimize output')
448
449    options, args = parser.parse_args()
450    if len(args) != 2:
451        parser.error("error: no input and output specified")
452
453    convert(args[0], args[1], not options.func_def, options.quiet)
454
455
456if __name__ == '__main__':
457    main()
458