1import re 2import shlex 3import subprocess 4 5from ..common.info import UNKNOWN 6 7from . import source 8 9 10IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)' 11 12TYPE_QUAL = r'(?:const|volatile)' 13 14VAR_TYPE_SPEC = r'''(?: 15 void | 16 (?: 17 (?:(?:un)?signed\s+)? 18 (?: 19 char | 20 short | 21 int | 22 long | 23 long\s+int | 24 long\s+long 25 ) | 26 ) | 27 float | 28 double | 29 {IDENTIFIER} | 30 (?:struct|union)\s+{IDENTIFIER} 31 )''' 32 33POINTER = rf'''(?: 34 (?:\s+const)?\s*[*] 35 )''' 36 37#STRUCT = r'''(?: 38# (?:struct|(struct\s+%s))\s*[{] 39# [^}]* 40# [}] 41# )''' % (IDENTIFIER) 42#UNION = r'''(?: 43# (?:union|(union\s+%s))\s*[{] 44# [^}]* 45# [}] 46# )''' % (IDENTIFIER) 47#DECL_SPEC = rf'''(?: 48# ({VAR_TYPE_SPEC}) | 49# ({STRUCT}) | 50# ({UNION}) 51# )''' 52 53FUNC_START = rf'''(?: 54 (?: 55 (?: 56 extern | 57 static | 58 static\s+inline 59 )\s+ 60 )? 61 #(?:const\s+)? 62 {VAR_TYPE_SPEC} 63 )''' 64#GLOBAL_VAR_START = rf'''(?: 65# (?: 66# (?: 67# extern | 68# static 69# )\s+ 70# )? 71# (?: 72# {TYPE_QUAL} 73# (?:\s+{TYPE_QUAL})? 74# )?\s+ 75# {VAR_TYPE_SPEC} 76# )''' 77GLOBAL_DECL_START_RE = re.compile(rf''' 78 ^ 79 (?: 80 ({FUNC_START}) 81 ) 82 ''', re.VERBOSE) 83 84LOCAL_VAR_START = rf'''(?: 85 (?: 86 (?: 87 register | 88 static 89 )\s+ 90 )? 91 (?: 92 (?: 93 {TYPE_QUAL} 94 (?:\s+{TYPE_QUAL})? 95 )\s+ 96 )? 97 {VAR_TYPE_SPEC} 98 {POINTER}? 99 )''' 100LOCAL_STMT_START_RE = re.compile(rf''' 101 ^ 102 (?: 103 ({LOCAL_VAR_START}) 104 ) 105 ''', re.VERBOSE) 106 107 108def iter_global_declarations(lines): 109 """Yield (decl, body) for each global declaration in the given lines. 110 111 For function definitions the header is reduced to one line and 112 the body is provided as-is. For other compound declarations (e.g. 113 struct) the entire declaration is reduced to one line and "body" 114 is None. Likewise for simple declarations (e.g. variables). 115 116 Declarations inside function bodies are ignored, though their text 117 is provided in the function body. 118 """ 119 # XXX Bail out upon bogus syntax. 120 lines = source.iter_clean_lines(lines) 121 for line in lines: 122 if not GLOBAL_DECL_START_RE.match(line): 123 continue 124 # We only need functions here, since we only need locals for now. 125 if line.endswith(';'): 126 continue 127 if line.endswith('{') and '(' not in line: 128 continue 129 130 # Capture the function. 131 # (assume no func is a one-liner) 132 decl = line 133 while '{' not in line: # assume no inline structs, etc. 134 try: 135 line = next(lines) 136 except StopIteration: 137 return 138 decl += ' ' + line 139 140 body, end = _extract_block(lines) 141 if end is None: 142 return 143 assert end == '}' 144 yield (f'{decl}\n{body}\n{end}', body) 145 146 147def iter_local_statements(lines): 148 """Yield (lines, blocks) for each statement in the given lines. 149 150 For simple statements, "blocks" is None and the statement is reduced 151 to a single line. For compound statements, "blocks" is a pair of 152 (header, body) for each block in the statement. The headers are 153 reduced to a single line each, but the bpdies are provided as-is. 154 """ 155 # XXX Bail out upon bogus syntax. 156 lines = source.iter_clean_lines(lines) 157 for line in lines: 158 if not LOCAL_STMT_START_RE.match(line): 159 continue 160 161 stmt = line 162 blocks = None 163 if not line.endswith(';'): 164 # XXX Support compound & multiline simple statements. 165 #blocks = [] 166 continue 167 168 yield (stmt, blocks) 169 170 171def _extract_block(lines): 172 end = None 173 depth = 1 174 body = [] 175 for line in lines: 176 depth += line.count('{') - line.count('}') 177 if depth == 0: 178 end = line 179 break 180 body.append(line) 181 return '\n'.join(body), end 182 183 184def parse_func(stmt, body): 185 """Return (name, signature) for the given function definition.""" 186 header, _, end = stmt.partition(body) 187 assert end.strip() == '}' 188 assert header.strip().endswith('{') 189 header, _, _= header.rpartition('{') 190 191 signature = ' '.join(header.strip().splitlines()) 192 193 _, _, name = signature.split('(')[0].strip().rpartition(' ') 194 assert name 195 196 return name, signature 197 198 199#TYPE_SPEC = rf'''(?: 200# )''' 201#VAR_DECLARATOR = rf'''(?: 202# )''' 203#VAR_DECL = rf'''(?: 204# {TYPE_SPEC}+ 205# {VAR_DECLARATOR} 206# \s* 207# )''' 208#VAR_DECLARATION = rf'''(?: 209# {VAR_DECL} 210# (?: = [^=] [^;]* )? 211# ; 212# )''' 213# 214# 215#def parse_variable(decl, *, inFunc=False): 216# """Return [(name, storage, vartype)] for the given variable declaration.""" 217# ... 218 219 220def _parse_var(stmt): 221 """Return (name, vartype) for the given variable declaration.""" 222 stmt = stmt.rstrip(';') 223 m = LOCAL_STMT_START_RE.match(stmt) 224 assert m 225 vartype = m.group(0) 226 name = stmt[len(vartype):].partition('=')[0].strip() 227 228 if name.startswith('('): 229 name, _, after = name[1:].partition(')') 230 assert after 231 name = name.replace('*', '* ') 232 inside, _, name = name.strip().rpartition(' ') 233 vartype = f'{vartype} ({inside.strip()}){after}' 234 else: 235 name = name.replace('*', '* ') 236 before, _, name = name.rpartition(' ') 237 vartype = f'{vartype} {before}' 238 239 vartype = vartype.strip() 240 while ' ' in vartype: 241 vartype = vartype.replace(' ', ' ') 242 243 return name, vartype 244 245 246def extract_storage(decl, *, infunc=None): 247 """Return (storage, vartype) based on the given declaration. 248 249 The default storage is "implicit" (or "local" if infunc is True). 250 """ 251 if decl == UNKNOWN: 252 return decl 253 if decl.startswith('static '): 254 return 'static' 255 #return 'static', decl.partition(' ')[2].strip() 256 elif decl.startswith('extern '): 257 return 'extern' 258 #return 'extern', decl.partition(' ')[2].strip() 259 elif re.match('.*\b(static|extern)\b', decl): 260 raise NotImplementedError 261 elif infunc: 262 return 'local' 263 else: 264 return 'implicit' 265 266 267def parse_compound(stmt, blocks): 268 """Return (headers, bodies) for the given compound statement.""" 269 # XXX Identify declarations inside compound statements 270 # (if/switch/for/while). 271 raise NotImplementedError 272 273 274def iter_variables(filename, *, 275 preprocessed=False, 276 _iter_source_lines=source.iter_lines, 277 _iter_global=iter_global_declarations, 278 _iter_local=iter_local_statements, 279 _parse_func=parse_func, 280 _parse_var=_parse_var, 281 _parse_compound=parse_compound, 282 ): 283 """Yield (funcname, name, vartype) for every variable in the given file.""" 284 if preprocessed: 285 raise NotImplementedError 286 lines = _iter_source_lines(filename) 287 for stmt, body in _iter_global(lines): 288 # At the file top-level we only have to worry about vars & funcs. 289 if not body: 290 name, vartype = _parse_var(stmt) 291 if name: 292 yield (None, name, vartype) 293 else: 294 funcname, _ = _parse_func(stmt, body) 295 localvars = _iter_locals(body, 296 _iter_statements=_iter_local, 297 _parse_var=_parse_var, 298 _parse_compound=_parse_compound, 299 ) 300 for name, vartype in localvars: 301 yield (funcname, name, vartype) 302 303 304def _iter_locals(lines, *, 305 _iter_statements=iter_local_statements, 306 _parse_var=_parse_var, 307 _parse_compound=parse_compound, 308 ): 309 compound = [lines] 310 while compound: 311 body = compound.pop(0) 312 bodylines = body.splitlines() 313 for stmt, blocks in _iter_statements(bodylines): 314 if not blocks: 315 name, vartype = _parse_var(stmt) 316 if name: 317 yield (name, vartype) 318 else: 319 headers, bodies = _parse_compound(stmt, blocks) 320 for header in headers: 321 for line in header: 322 name, vartype = _parse_var(line) 323 if name: 324 yield (name, vartype) 325 compound.extend(bodies) 326 327 328def iter_all(filename, *, 329 preprocessed=False, 330 ): 331 """Yield a Declaration for each one found. 332 333 If there are duplicates, due to preprocessor conditionals, then 334 they are checked to make sure they are the same. 335 """ 336 # XXX For the moment we cheat. 337 for funcname, name, decl in iter_variables(filename, 338 preprocessed=preprocessed): 339 yield 'variable', funcname, name, decl 340