1import re
2import shlex
3import subprocess
4
5from ..common.info import UNKNOWN
6
7from . import source
8
9
10IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
11
12TYPE_QUAL = r'(?:const|volatile)'
13
14VAR_TYPE_SPEC = r'''(?:
15        void |
16        (?:
17         (?:(?:un)?signed\s+)?
18         (?:
19          char |
20          short |
21          int |
22          long |
23          long\s+int |
24          long\s+long
25          ) |
26         ) |
27        float |
28        double |
29        {IDENTIFIER} |
30        (?:struct|union)\s+{IDENTIFIER}
31        )'''
32
33POINTER = rf'''(?:
34        (?:\s+const)?\s*[*]
35        )'''
36
37#STRUCT = r'''(?:
38#        (?:struct|(struct\s+%s))\s*[{]
39#            [^}]*
40#        [}]
41#        )''' % (IDENTIFIER)
42#UNION = r'''(?:
43#        (?:union|(union\s+%s))\s*[{]
44#            [^}]*
45#        [}]
46#        )''' % (IDENTIFIER)
47#DECL_SPEC = rf'''(?:
48#        ({VAR_TYPE_SPEC}) |
49#        ({STRUCT}) |
50#        ({UNION})
51#        )'''
52
53FUNC_START = rf'''(?:
54        (?:
55          (?:
56            extern |
57            static |
58            static\s+inline
59           )\s+
60         )?
61        #(?:const\s+)?
62        {VAR_TYPE_SPEC}
63        )'''
64#GLOBAL_VAR_START = rf'''(?:
65#        (?:
66#          (?:
67#            extern |
68#            static
69#           )\s+
70#         )?
71#        (?:
72#           {TYPE_QUAL}
73#           (?:\s+{TYPE_QUAL})?
74#         )?\s+
75#        {VAR_TYPE_SPEC}
76#        )'''
77GLOBAL_DECL_START_RE = re.compile(rf'''
78        ^
79        (?:
80            ({FUNC_START})
81         )
82        ''', re.VERBOSE)
83
84LOCAL_VAR_START = rf'''(?:
85        (?:
86          (?:
87            register |
88            static
89           )\s+
90         )?
91        (?:
92          (?:
93            {TYPE_QUAL}
94            (?:\s+{TYPE_QUAL})?
95           )\s+
96         )?
97        {VAR_TYPE_SPEC}
98        {POINTER}?
99        )'''
100LOCAL_STMT_START_RE = re.compile(rf'''
101        ^
102        (?:
103            ({LOCAL_VAR_START})
104         )
105        ''', re.VERBOSE)
106
107
108def iter_global_declarations(lines):
109    """Yield (decl, body) for each global declaration in the given lines.
110
111    For function definitions the header is reduced to one line and
112    the body is provided as-is.  For other compound declarations (e.g.
113    struct) the entire declaration is reduced to one line and "body"
114    is None.  Likewise for simple declarations (e.g. variables).
115
116    Declarations inside function bodies are ignored, though their text
117    is provided in the function body.
118    """
119    # XXX Bail out upon bogus syntax.
120    lines = source.iter_clean_lines(lines)
121    for line in lines:
122        if not GLOBAL_DECL_START_RE.match(line):
123            continue
124        # We only need functions here, since we only need locals for now.
125        if line.endswith(';'):
126            continue
127        if line.endswith('{') and '(' not in line:
128            continue
129
130        # Capture the function.
131        # (assume no func is a one-liner)
132        decl = line
133        while '{' not in line:  # assume no inline structs, etc.
134            try:
135                line = next(lines)
136            except StopIteration:
137                return
138            decl += ' ' + line
139
140        body, end = _extract_block(lines)
141        if end is None:
142            return
143        assert end == '}'
144        yield (f'{decl}\n{body}\n{end}', body)
145
146
147def iter_local_statements(lines):
148    """Yield (lines, blocks) for each statement in the given lines.
149
150    For simple statements, "blocks" is None and the statement is reduced
151    to a single line.  For compound statements, "blocks" is a pair of
152    (header, body) for each block in the statement.  The headers are
153    reduced to a single line each, but the bpdies are provided as-is.
154    """
155    # XXX Bail out upon bogus syntax.
156    lines = source.iter_clean_lines(lines)
157    for line in lines:
158        if not LOCAL_STMT_START_RE.match(line):
159            continue
160
161        stmt = line
162        blocks = None
163        if not line.endswith(';'):
164            # XXX Support compound & multiline simple statements.
165            #blocks = []
166            continue
167
168        yield (stmt, blocks)
169
170
171def _extract_block(lines):
172    end = None
173    depth = 1
174    body = []
175    for line in lines:
176        depth += line.count('{') - line.count('}')
177        if depth == 0:
178            end = line
179            break
180        body.append(line)
181    return '\n'.join(body), end
182
183
184def parse_func(stmt, body):
185    """Return (name, signature) for the given function definition."""
186    header, _, end = stmt.partition(body)
187    assert end.strip() == '}'
188    assert header.strip().endswith('{')
189    header, _, _= header.rpartition('{')
190
191    signature = ' '.join(header.strip().splitlines())
192
193    _, _, name = signature.split('(')[0].strip().rpartition(' ')
194    assert name
195
196    return name, signature
197
198
199#TYPE_SPEC = rf'''(?:
200#        )'''
201#VAR_DECLARATOR = rf'''(?:
202#        )'''
203#VAR_DECL = rf'''(?:
204#            {TYPE_SPEC}+
205#            {VAR_DECLARATOR}
206#            \s*
207#        )'''
208#VAR_DECLARATION = rf'''(?:
209#            {VAR_DECL}
210#            (?: = [^=] [^;]* )?
211#            ;
212#        )'''
213#
214#
215#def parse_variable(decl, *, inFunc=False):
216#    """Return [(name, storage, vartype)] for the given variable declaration."""
217#    ...
218
219
220def _parse_var(stmt):
221    """Return (name, vartype) for the given variable declaration."""
222    stmt = stmt.rstrip(';')
223    m = LOCAL_STMT_START_RE.match(stmt)
224    assert m
225    vartype = m.group(0)
226    name = stmt[len(vartype):].partition('=')[0].strip()
227
228    if name.startswith('('):
229        name, _, after = name[1:].partition(')')
230        assert after
231        name = name.replace('*', '* ')
232        inside, _, name = name.strip().rpartition(' ')
233        vartype = f'{vartype} ({inside.strip()}){after}'
234    else:
235        name = name.replace('*', '* ')
236        before, _, name = name.rpartition(' ')
237        vartype = f'{vartype} {before}'
238
239    vartype = vartype.strip()
240    while '  ' in vartype:
241        vartype = vartype.replace('  ', ' ')
242
243    return name, vartype
244
245
246def extract_storage(decl, *, infunc=None):
247    """Return (storage, vartype) based on the given declaration.
248
249    The default storage is "implicit" (or "local" if infunc is True).
250    """
251    if decl == UNKNOWN:
252        return decl
253    if decl.startswith('static '):
254        return 'static'
255        #return 'static', decl.partition(' ')[2].strip()
256    elif decl.startswith('extern '):
257        return 'extern'
258        #return 'extern', decl.partition(' ')[2].strip()
259    elif re.match('.*\b(static|extern)\b', decl):
260        raise NotImplementedError
261    elif infunc:
262        return 'local'
263    else:
264        return 'implicit'
265
266
267def parse_compound(stmt, blocks):
268    """Return (headers, bodies) for the given compound statement."""
269    # XXX Identify declarations inside compound statements
270    # (if/switch/for/while).
271    raise NotImplementedError
272
273
274def iter_variables(filename, *,
275                   preprocessed=False,
276                   _iter_source_lines=source.iter_lines,
277                   _iter_global=iter_global_declarations,
278                   _iter_local=iter_local_statements,
279                   _parse_func=parse_func,
280                   _parse_var=_parse_var,
281                   _parse_compound=parse_compound,
282                   ):
283    """Yield (funcname, name, vartype) for every variable in the given file."""
284    if preprocessed:
285        raise NotImplementedError
286    lines = _iter_source_lines(filename)
287    for stmt, body in _iter_global(lines):
288        # At the file top-level we only have to worry about vars & funcs.
289        if not body:
290            name, vartype = _parse_var(stmt)
291            if name:
292                yield (None, name, vartype)
293        else:
294            funcname, _ = _parse_func(stmt, body)
295            localvars = _iter_locals(body,
296                                     _iter_statements=_iter_local,
297                                     _parse_var=_parse_var,
298                                     _parse_compound=_parse_compound,
299                                     )
300            for name, vartype in localvars:
301                yield (funcname, name, vartype)
302
303
304def _iter_locals(lines, *,
305                 _iter_statements=iter_local_statements,
306                 _parse_var=_parse_var,
307                 _parse_compound=parse_compound,
308                 ):
309    compound = [lines]
310    while compound:
311        body = compound.pop(0)
312        bodylines = body.splitlines()
313        for stmt, blocks in _iter_statements(bodylines):
314            if not blocks:
315                name, vartype = _parse_var(stmt)
316                if name:
317                    yield (name, vartype)
318            else:
319                headers, bodies = _parse_compound(stmt, blocks)
320                for header in headers:
321                    for line in header:
322                        name, vartype = _parse_var(line)
323                        if name:
324                            yield (name, vartype)
325                compound.extend(bodies)
326
327
328def iter_all(filename, *,
329             preprocessed=False,
330             ):
331    """Yield a Declaration for each one found.
332
333    If there are duplicates, due to preprocessor conditionals, then
334    they are checked to make sure they are the same.
335    """
336    # XXX For the moment we cheat.
337    for funcname, name, decl in iter_variables(filename,
338                                               preprocessed=preprocessed):
339        yield 'variable', funcname, name, decl
340