1"""Simple code to extract class & function docstrings from a module. 2 3This code is used as an example in the library reference manual in the 4section on using the parser module. Refer to the manual for a thorough 5discussion of the operation of this code. 6""" 7 8import os 9import parser 10import symbol 11import token 12import types 13 14from types import ListType, TupleType 15 16 17def get_docs(fileName): 18 """Retrieve information from the parse tree of a source file. 19 20 fileName 21 Name of the file to read Python source code from. 22 """ 23 source = open(fileName).read() 24 basename = os.path.basename(os.path.splitext(fileName)[0]) 25 ast = parser.suite(source) 26 return ModuleInfo(ast.totuple(), basename) 27 28 29class SuiteInfoBase: 30 _docstring = '' 31 _name = '' 32 33 def __init__(self, tree = None): 34 self._class_info = {} 35 self._function_info = {} 36 if tree: 37 self._extract_info(tree) 38 39 def _extract_info(self, tree): 40 # extract docstring 41 if len(tree) == 2: 42 found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1]) 43 else: 44 found, vars = match(DOCSTRING_STMT_PATTERN, tree[3]) 45 if found: 46 self._docstring = eval(vars['docstring']) 47 # discover inner definitions 48 for node in tree[1:]: 49 found, vars = match(COMPOUND_STMT_PATTERN, node) 50 if found: 51 cstmt = vars['compound'] 52 if cstmt[0] == symbol.funcdef: 53 name = cstmt[2][1] 54 self._function_info[name] = FunctionInfo(cstmt) 55 elif cstmt[0] == symbol.classdef: 56 name = cstmt[2][1] 57 self._class_info[name] = ClassInfo(cstmt) 58 59 def get_docstring(self): 60 return self._docstring 61 62 def get_name(self): 63 return self._name 64 65 def get_class_names(self): 66 return self._class_info.keys() 67 68 def get_class_info(self, name): 69 return self._class_info[name] 70 71 def __getitem__(self, name): 72 try: 73 return self._class_info[name] 74 except KeyError: 75 return self._function_info[name] 76 77 78class SuiteFuncInfo: 79 # Mixin class providing access to function names and info. 80 81 def get_function_names(self): 82 return self._function_info.keys() 83 84 def get_function_info(self, name): 85 return self._function_info[name] 86 87 88class FunctionInfo(SuiteInfoBase, SuiteFuncInfo): 89 def __init__(self, tree = None): 90 self._name = tree[2][1] 91 SuiteInfoBase.__init__(self, tree and tree[-1] or None) 92 93 94class ClassInfo(SuiteInfoBase): 95 def __init__(self, tree = None): 96 self._name = tree[2][1] 97 SuiteInfoBase.__init__(self, tree and tree[-1] or None) 98 99 def get_method_names(self): 100 return self._function_info.keys() 101 102 def get_method_info(self, name): 103 return self._function_info[name] 104 105 106class ModuleInfo(SuiteInfoBase, SuiteFuncInfo): 107 def __init__(self, tree = None, name = "<string>"): 108 self._name = name 109 SuiteInfoBase.__init__(self, tree) 110 if tree: 111 found, vars = match(DOCSTRING_STMT_PATTERN, tree[1]) 112 if found: 113 self._docstring = vars["docstring"] 114 115 116def match(pattern, data, vars=None): 117 """Match `data' to `pattern', with variable extraction. 118 119 pattern 120 Pattern to match against, possibly containing variables. 121 122 data 123 Data to be checked and against which variables are extracted. 124 125 vars 126 Dictionary of variables which have already been found. If not 127 provided, an empty dictionary is created. 128 129 The `pattern' value may contain variables of the form ['varname'] which 130 are allowed to match anything. The value that is matched is returned as 131 part of a dictionary which maps 'varname' to the matched value. 'varname' 132 is not required to be a string object, but using strings makes patterns 133 and the code which uses them more readable. 134 135 This function returns two values: a boolean indicating whether a match 136 was found and a dictionary mapping variable names to their associated 137 values. 138 """ 139 if vars is None: 140 vars = {} 141 if type(pattern) is ListType: # 'variables' are ['varname'] 142 vars[pattern[0]] = data 143 return 1, vars 144 if type(pattern) is not TupleType: 145 return (pattern == data), vars 146 if len(data) != len(pattern): 147 return 0, vars 148 for pattern, data in map(None, pattern, data): 149 same, vars = match(pattern, data, vars) 150 if not same: 151 break 152 return same, vars 153 154 155# This pattern identifies compound statements, allowing them to be readily 156# differentiated from simple statements. 157# 158COMPOUND_STMT_PATTERN = ( 159 symbol.stmt, 160 (symbol.compound_stmt, ['compound']) 161 ) 162 163 164# This pattern will match a 'stmt' node which *might* represent a docstring; 165# docstrings require that the statement which provides the docstring be the 166# first statement in the class or function, which this pattern does not check. 167# 168DOCSTRING_STMT_PATTERN = ( 169 symbol.stmt, 170 (symbol.simple_stmt, 171 (symbol.small_stmt, 172 (symbol.expr_stmt, 173 (symbol.testlist, 174 (symbol.test, 175 (symbol.and_test, 176 (symbol.not_test, 177 (symbol.comparison, 178 (symbol.expr, 179 (symbol.xor_expr, 180 (symbol.and_expr, 181 (symbol.shift_expr, 182 (symbol.arith_expr, 183 (symbol.term, 184 (symbol.factor, 185 (symbol.power, 186 (symbol.atom, 187 (token.STRING, ['docstring']) 188 )))))))))))))))), 189 (token.NEWLINE, '') 190 )) 191