1#!/usr/bin/env python 2"""Doxygen XML to SWIG docstring converter. 3 4Usage: 5 6 doxy2swig.py [options] input.xml output.i 7 8Converts Doxygen generated XML files into a file containing docstrings 9that can be used by SWIG-1.3.x. Note that you need to get SWIG 10version > 1.3.23 or use Robin Dunn's docstring patch to be able to use 11the resulting output. 12 13input.xml is your doxygen generated XML file and output.i is where the 14output will be written (the file will be clobbered). 15 16""" 17# 18# 19# This code is implemented using Mark Pilgrim's code as a guideline: 20# http://www.faqs.org/docs/diveintopython/kgp_divein.html 21# 22# Author: Prabhu Ramachandran 23# License: BSD style 24# 25# Thanks: 26# Johan Hake: the include_function_definition feature 27# Bill Spotz: bug reports and testing. 28# Sebastian Henschel: Misc. enhancements. 29# 30# 31 32from xml.dom import minidom 33import re 34import textwrap 35import sys 36import os.path 37import optparse 38 39 40def my_open_read(source): 41 if hasattr(source, "read"): 42 return source 43 else: 44 return open(source) 45 46 47def my_open_write(dest): 48 if hasattr(dest, "write"): 49 return dest 50 else: 51 return open(dest, 'w') 52 53 54class Doxy2SWIG: 55 56 """Converts Doxygen generated XML files into a file containing 57 docstrings that can be used by SWIG-1.3.x that have support for 58 feature("docstring"). Once the data is parsed it is stored in 59 self.pieces. 60 61 """ 62 63 def __init__(self, src, include_function_definition=True, quiet=False): 64 """Initialize the instance given a source object. `src` can 65 be a file or filename. If you do not want to include function 66 definitions from doxygen then set 67 `include_function_definition` to `False`. This is handy since 68 this allows you to use the swig generated function definition 69 using %feature("autodoc", [0,1]). 70 71 """ 72 f = my_open_read(src) 73 self.my_dir = os.path.dirname(f.name) 74 self.xmldoc = minidom.parse(f).documentElement 75 f.close() 76 77 self.pieces = [] 78 self.pieces.append('\n// File: %s\n' % 79 os.path.basename(f.name)) 80 81 self.space_re = re.compile(r'\s+') 82 self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)') 83 self.multi = 0 84 self.ignores = ['inheritancegraph', 'param', 'listofallmembers', 85 'innerclass', 'name', 'declname', 'incdepgraph', 86 'invincdepgraph', 'programlisting', 'type', 87 'references', 'referencedby', 'location', 88 'collaborationgraph', 'reimplements', 89 'reimplementedby', 'derivedcompoundref', 90 'basecompoundref'] 91 #self.generics = [] 92 self.include_function_definition = include_function_definition 93 if not include_function_definition: 94 self.ignores.append('argsstring') 95 96 self.quiet = quiet 97 98 def generate(self): 99 """Parses the file set in the initialization. The resulting 100 data is stored in `self.pieces`. 101 102 """ 103 self.parse(self.xmldoc) 104 105 def parse(self, node): 106 """Parse a given node. This function in turn calls the 107 `parse_<nodeType>` functions which handle the respective 108 nodes. 109 110 """ 111 pm = getattr(self, "parse_%s" % node.__class__.__name__) 112 pm(node) 113 114 def parse_Document(self, node): 115 self.parse(node.documentElement) 116 117 def parse_Text(self, node): 118 txt = node.data 119 txt = txt.replace('\\', r'\\\\') 120 txt = txt.replace('"', r'\"') 121 # ignore pure whitespace 122 m = self.space_re.match(txt) 123 if m and len(m.group()) == len(txt): 124 pass 125 else: 126 self.add_text(textwrap.fill(txt, break_long_words=False)) 127 128 def parse_Element(self, node): 129 """Parse an `ELEMENT_NODE`. This calls specific 130 `do_<tagName>` handers for different elements. If no handler 131 is available the `generic_parse` method is called. All 132 tagNames specified in `self.ignores` are simply ignored. 133 134 """ 135 name = node.tagName 136 ignores = self.ignores 137 if name in ignores: 138 return 139 attr = "do_%s" % name 140 if hasattr(self, attr): 141 handlerMethod = getattr(self, attr) 142 handlerMethod(node) 143 else: 144 self.generic_parse(node) 145 #if name not in self.generics: self.generics.append(name) 146 147 def parse_Comment(self, node): 148 """Parse a `COMMENT_NODE`. This does nothing for now.""" 149 return 150 151 def add_text(self, value): 152 """Adds text corresponding to `value` into `self.pieces`.""" 153 if isinstance(value, (list, tuple)): 154 self.pieces.extend(value) 155 else: 156 self.pieces.append(value) 157 158 def get_specific_nodes(self, node, names): 159 """Given a node and a sequence of strings in `names`, return a 160 dictionary containing the names as keys and child 161 `ELEMENT_NODEs`, that have a `tagName` equal to the name. 162 163 """ 164 nodes = [(x.tagName, x) for x in node.childNodes 165 if x.nodeType == x.ELEMENT_NODE and 166 x.tagName in names] 167 return dict(nodes) 168 169 def generic_parse(self, node, pad=0): 170 """A Generic parser for arbitrary tags in a node. 171 172 Parameters: 173 174 - node: A node in the DOM. 175 - pad: `int` (default: 0) 176 177 If 0 the node data is not padded with newlines. If 1 it 178 appends a newline after parsing the childNodes. If 2 it 179 pads before and after the nodes are processed. Defaults to 180 0. 181 182 """ 183 npiece = 0 184 if pad: 185 npiece = len(self.pieces) 186 if pad == 2: 187 self.add_text('\n') 188 for n in node.childNodes: 189 self.parse(n) 190 if pad: 191 if len(self.pieces) > npiece: 192 self.add_text('\n') 193 194 def space_parse(self, node): 195 self.add_text(' ') 196 self.generic_parse(node) 197 198 do_ref = space_parse 199 do_emphasis = space_parse 200 do_bold = space_parse 201 do_computeroutput = space_parse 202 do_formula = space_parse 203 204 def do_compoundname(self, node): 205 self.add_text('\n\n') 206 data = node.firstChild.data 207 self.add_text('%%feature("docstring") %s "\n' % data) 208 209 def do_compounddef(self, node): 210 kind = node.attributes['kind'].value 211 if kind in ('class', 'struct'): 212 prot = node.attributes['prot'].value 213 if prot != 'public': 214 return 215 names = ('compoundname', 'briefdescription', 216 'detaileddescription', 'includes') 217 first = self.get_specific_nodes(node, names) 218 for n in names: 219 if first.has_key(n): 220 self.parse(first[n]) 221 self.add_text(['";', '\n']) 222 for n in node.childNodes: 223 if n not in first.values(): 224 self.parse(n) 225 elif kind in ('file', 'namespace'): 226 nodes = node.getElementsByTagName('sectiondef') 227 for n in nodes: 228 self.parse(n) 229 230 def do_includes(self, node): 231 self.add_text('C++ includes: ') 232 self.generic_parse(node, pad=1) 233 234 def do_parameterlist(self, node): 235 text = 'unknown' 236 for key, val in node.attributes.items(): 237 if key == 'kind': 238 if val == 'param': 239 text = 'Parameters' 240 elif val == 'exception': 241 text = 'Exceptions' 242 elif val == 'retval': 243 text = 'Returns' 244 else: 245 text = val 246 break 247 self.add_text(['\n', '\n', text, ':', '\n']) 248 self.generic_parse(node, pad=1) 249 250 def do_para(self, node): 251 self.add_text('\n') 252 self.generic_parse(node, pad=1) 253 254 def do_parametername(self, node): 255 self.add_text('\n') 256 try: 257 data = node.firstChild.data 258 except AttributeError: # perhaps a <ref> tag in it 259 data = node.firstChild.firstChild.data 260 if data.find('Exception') != -1: 261 self.add_text(data) 262 else: 263 self.add_text("%s: " % data) 264 265 def do_parameterdefinition(self, node): 266 self.generic_parse(node, pad=1) 267 268 def do_detaileddescription(self, node): 269 self.generic_parse(node, pad=1) 270 271 def do_briefdescription(self, node): 272 self.generic_parse(node, pad=1) 273 274 def do_memberdef(self, node): 275 prot = node.attributes['prot'].value 276 id = node.attributes['id'].value 277 kind = node.attributes['kind'].value 278 tmp = node.parentNode.parentNode.parentNode 279 compdef = tmp.getElementsByTagName('compounddef')[0] 280 cdef_kind = compdef.attributes['kind'].value 281 282 if prot == 'public': 283 first = self.get_specific_nodes(node, ('definition', 'name')) 284 name = first['name'].firstChild.data 285 if name[:8] == 'operator': # Don't handle operators yet. 286 return 287 288 if not 'definition' in first or \ 289 kind in ['variable', 'typedef']: 290 return 291 292 if self.include_function_definition: 293 defn = first['definition'].firstChild.data 294 else: 295 defn = "" 296 self.add_text('\n') 297 self.add_text('%feature("docstring") ') 298 299 anc = node.parentNode.parentNode 300 if cdef_kind in ('file', 'namespace'): 301 ns_node = anc.getElementsByTagName('innernamespace') 302 if not ns_node and cdef_kind == 'namespace': 303 ns_node = anc.getElementsByTagName('compoundname') 304 if ns_node: 305 ns = ns_node[0].firstChild.data 306 self.add_text(' %s::%s "\n%s' % (ns, name, defn)) 307 else: 308 self.add_text(' %s "\n%s' % (name, defn)) 309 elif cdef_kind in ('class', 'struct'): 310 # Get the full function name. 311 anc_node = anc.getElementsByTagName('compoundname') 312 cname = anc_node[0].firstChild.data 313 self.add_text(' %s::%s "\n%s' % (cname, name, defn)) 314 315 for n in node.childNodes: 316 if n not in first.values(): 317 self.parse(n) 318 self.add_text(['";', '\n']) 319 320 def do_definition(self, node): 321 data = node.firstChild.data 322 self.add_text('%s "\n%s' % (data, data)) 323 324 def do_sectiondef(self, node): 325 kind = node.attributes['kind'].value 326 if kind in ('public-func', 'func', 'user-defined', ''): 327 self.generic_parse(node) 328 329 def do_header(self, node): 330 """For a user defined section def a header field is present 331 which should not be printed as such, so we comment it in the 332 output.""" 333 data = node.firstChild.data 334 self.add_text('\n/*\n %s \n*/\n' % data) 335 # If our immediate sibling is a 'description' node then we 336 # should comment that out also and remove it from the parent 337 # node's children. 338 parent = node.parentNode 339 idx = parent.childNodes.index(node) 340 if len(parent.childNodes) >= idx + 2: 341 nd = parent.childNodes[idx + 2] 342 if nd.nodeName == 'description': 343 nd = parent.removeChild(nd) 344 self.add_text('\n/*') 345 self.generic_parse(nd) 346 self.add_text('\n*/\n') 347 348 def do_simplesect(self, node): 349 kind = node.attributes['kind'].value 350 if kind in ('date', 'rcs', 'version'): 351 pass 352 elif kind == 'warning': 353 self.add_text(['\n', 'WARNING: ']) 354 self.generic_parse(node) 355 elif kind == 'see': 356 self.add_text('\n') 357 self.add_text('See: ') 358 self.generic_parse(node) 359 else: 360 self.generic_parse(node) 361 362 def do_argsstring(self, node): 363 self.generic_parse(node, pad=1) 364 365 def do_member(self, node): 366 kind = node.attributes['kind'].value 367 refid = node.attributes['refid'].value 368 if kind == 'function' and refid[:9] == 'namespace': 369 self.generic_parse(node) 370 371 def do_doxygenindex(self, node): 372 self.multi = 1 373 comps = node.getElementsByTagName('compound') 374 for c in comps: 375 refid = c.attributes['refid'].value 376 fname = refid + '.xml' 377 if not os.path.exists(fname): 378 fname = os.path.join(self.my_dir, fname) 379 if not self.quiet: 380 print("parsing file: %s" % fname) 381 p = Doxy2SWIG(fname, self.include_function_definition, self.quiet) 382 p.generate() 383 self.pieces.extend(self.clean_pieces(p.pieces)) 384 385 def write(self, fname): 386 o = my_open_write(fname) 387 if self.multi: 388 o.write("".join(x.encode('utf-8') for x in self.pieces)) 389 else: 390 o.write("".join(self.clean_pieces(self.pieces))) 391 o.close() 392 393 def clean_pieces(self, pieces): 394 """Cleans the list of strings given as `pieces`. It replaces 395 multiple newlines by a maximum of 2 and returns a new list. 396 It also wraps the paragraphs nicely. 397 398 """ 399 ret = [] 400 count = 0 401 for i in pieces: 402 if i == '\n': 403 count = count + 1 404 else: 405 if i == '";': 406 if count: 407 ret.append('\n') 408 elif count > 2: 409 ret.append('\n\n') 410 elif count: 411 ret.append('\n' * count) 412 count = 0 413 ret.append(i) 414 415 _data = "".join(ret) 416 ret = [] 417 for i in _data.split('\n\n'): 418 if i == 'Parameters:' or i == 'Exceptions:' or i == 'Returns:': 419 ret.extend([i, '\n' + '-' * len(i), '\n\n']) 420 elif i.find('// File:') > -1: # leave comments alone. 421 ret.extend([i, '\n']) 422 else: 423 _tmp = textwrap.fill(i.strip(), break_long_words=False) 424 _tmp = self.lead_spc.sub(r'\1"\2', _tmp) 425 ret.extend([_tmp, '\n\n']) 426 return ret 427 428 429def convert(input, output, include_function_definition=True, quiet=False): 430 p = Doxy2SWIG(input, include_function_definition, quiet) 431 p.generate() 432 p.write(output) 433 434 435def main(): 436 usage = __doc__ 437 parser = optparse.OptionParser(usage) 438 parser.add_option("-n", '--no-function-definition', 439 action='store_true', 440 default=False, 441 dest='func_def', 442 help='do not include doxygen function definitions') 443 parser.add_option("-q", '--quiet', 444 action='store_true', 445 default=False, 446 dest='quiet', 447 help='be quiet and minimize output') 448 449 options, args = parser.parse_args() 450 if len(args) != 2: 451 parser.error("error: no input and output specified") 452 453 convert(args[0], args[1], not options.func_def, options.quiet) 454 455 456if __name__ == '__main__': 457 main() 458