1
2from collections import namedtuple
3import glob
4import os.path
5import re
6import shutil
7import sys
8import subprocess
9
10
11VERBOSITY = 2
12
13C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__))
14TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR)
15ROOT_DIR = os.path.dirname(TOOLS_DIR)
16GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt')
17
18SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python']
19
20CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$')
21
22
23IGNORED_VARS = {
24        '_DYNAMIC',
25        '_GLOBAL_OFFSET_TABLE_',
26        '__JCR_LIST__',
27        '__JCR_END__',
28        '__TMC_END__',
29        '__bss_start',
30        '__data_start',
31        '__dso_handle',
32        '_edata',
33        '_end',
34        }
35
36
37def find_capi_vars(root):
38    capi_vars = {}
39    for dirname in SOURCE_DIRS:
40        for filename in glob.glob(os.path.join(
41                                  glob.escape(os.path.join(ROOT_DIR, dirname)),
42                                  '**/*.[hc]'),
43                                  recursive=True):
44            with open(filename) as file:
45                for name in _find_capi_vars(file):
46                    if name in capi_vars:
47                        assert not filename.endswith('.c')
48                        assert capi_vars[name].endswith('.c')
49                    capi_vars[name] = filename
50    return capi_vars
51
52
53def _find_capi_vars(lines):
54    for line in lines:
55        if not line.startswith('PyAPI_DATA'):
56            continue
57        assert '{' not in line
58        match = CAPI_REGEX.match(line)
59        assert match
60        names, = match.groups()
61        for name in names.split(', '):
62            yield name
63
64
65def _read_global_names(filename):
66    # These variables are shared between all interpreters in the process.
67    with open(filename) as file:
68        return {line.partition('#')[0].strip()
69                for line in file
70                if line.strip() and not line.startswith('#')}
71
72
73def _is_global_var(name, globalnames):
74    if _is_autogen_var(name):
75        return True
76    if _is_type_var(name):
77        return True
78    if _is_module(name):
79        return True
80    if _is_exception(name):
81        return True
82    if _is_compiler(name):
83        return True
84    return name in globalnames
85
86
87def _is_autogen_var(name):
88    return (
89        name.startswith('PyId_') or
90        '.' in name or
91        # Objects/typeobject.c
92        name.startswith('op_id.') or
93        name.startswith('rop_id.') or
94        # Python/graminit.c
95        name.startswith('arcs_') or
96        name.startswith('states_')
97        )
98
99
100def _is_type_var(name):
101    if name.endswith(('Type', '_Type', '_type')):  # XXX Always a static type?
102        return True
103    if name.endswith('_desc'):  # for structseq types
104        return True
105    return (
106        name.startswith('doc_') or
107        name.endswith(('_doc', '__doc__', '_docstring')) or
108        name.endswith('_methods') or
109        name.endswith('_fields') or
110        name.endswith(('_memberlist', '_members')) or
111        name.endswith('_slots') or
112        name.endswith(('_getset', '_getsets', '_getsetlist')) or
113        name.endswith('_as_mapping') or
114        name.endswith('_as_number') or
115        name.endswith('_as_sequence') or
116        name.endswith('_as_buffer') or
117        name.endswith('_as_async')
118        )
119
120
121def _is_module(name):
122    if name.endswith(('_functions', 'Methods', '_Methods')):
123        return True
124    if name == 'module_def':
125        return True
126    if name == 'initialized':
127        return True
128    return name.endswith(('module', '_Module'))
129
130
131def _is_exception(name):
132    # Other vars are enumerated in globals-core.txt.
133    if not name.startswith(('PyExc_', '_PyExc_')):
134        return False
135    return name.endswith(('Error', 'Warning'))
136
137
138def _is_compiler(name):
139    return (
140        # Python/Python-ast.c
141        name.endswith('_type') or
142        name.endswith('_singleton') or
143        name.endswith('_attributes')
144        )
145
146
147class Var(namedtuple('Var', 'name kind scope capi filename')):
148
149    @classmethod
150    def parse_nm(cls, line, expected, ignored, capi_vars, globalnames):
151        _, _, line = line.partition(' ')  # strip off the address
152        line = line.strip()
153        kind, _, line = line.partition(' ')
154        if kind in ignored or ():
155            return None
156        elif kind not in expected or ():
157            raise RuntimeError('unsupported NM type {!r}'.format(kind))
158
159        name, _, filename = line.partition('\t')
160        name = name.strip()
161        if _is_autogen_var(name):
162            return None
163        if _is_global_var(name, globalnames):
164            scope = 'global'
165        else:
166            scope = None
167        capi = (name in capi_vars or ())
168        if filename:
169            filename = os.path.relpath(filename.partition(':')[0])
170        return cls(name, kind, scope, capi, filename or '~???~')
171
172    @property
173    def external(self):
174        return self.kind.isupper()
175
176
177def find_vars(root, globals_filename=GLOBALS_FILE):
178    python = os.path.join(root, 'python')
179    if not os.path.exists(python):
180        raise RuntimeError('python binary missing (need to build it first?)')
181    capi_vars = find_capi_vars(root)
182    globalnames = _read_global_names(globals_filename)
183
184    nm = shutil.which('nm')
185    if nm is None:
186        # XXX Use dumpbin.exe /SYMBOLS on Windows.
187        raise NotImplementedError
188    else:
189        yield from (var
190                    for var in _find_var_symbols(python, nm, capi_vars,
191                                                 globalnames)
192                    if var.name not in IGNORED_VARS)
193
194
195NM_FUNCS = set('Tt')
196NM_PUBLIC_VARS = set('BD')
197NM_PRIVATE_VARS = set('bd')
198NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS
199NM_DATA = set('Rr')
200NM_OTHER = set('ACGgiINpSsuUVvWw-?')
201NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER
202
203
204def _find_var_symbols(python, nm, capi_vars, globalnames):
205    args = [nm,
206            '--line-numbers',
207            python]
208    out = subprocess.check_output(args)
209    for line in out.decode('utf-8').splitlines():
210        var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames)
211        if var is None:
212            continue
213        yield var
214
215
216#######################################
217
218class Filter(namedtuple('Filter', 'name op value action')):
219
220    @classmethod
221    def parse(cls, raw):
222        action = '+'
223        if raw.startswith(('+', '-')):
224            action = raw[0]
225            raw = raw[1:]
226        # XXX Support < and >?
227        name, op, value = raw.partition('=')
228        return cls(name, op, value, action)
229
230    def check(self, var):
231        value = getattr(var, self.name, None)
232        if not self.op:
233            matched = bool(value)
234        elif self.op == '=':
235            matched = (value == self.value)
236        else:
237            raise NotImplementedError
238
239        if self.action == '+':
240            return matched
241        elif self.action == '-':
242            return not matched
243        else:
244            raise NotImplementedError
245
246
247def filter_var(var, filters):
248    for filter in filters:
249        if not filter.check(var):
250            return False
251    return True
252
253
254def make_sort_key(spec):
255    columns = [(col.strip('_'), '_' if col.startswith('_') else '')
256               for col in spec]
257    def sort_key(var):
258        return tuple(getattr(var, col).lstrip(prefix)
259                     for col, prefix in columns)
260    return sort_key
261
262
263def make_groups(allvars, spec):
264    group = spec
265    groups = {}
266    for var in allvars:
267        value = getattr(var, group)
268        key = '{}: {}'.format(group, value)
269        try:
270            groupvars = groups[key]
271        except KeyError:
272            groupvars = groups[key] = []
273        groupvars.append(var)
274    return groups
275
276
277def format_groups(groups, columns, fmts, widths):
278    for group in sorted(groups):
279        groupvars = groups[group]
280        yield '', 0
281        yield '  # {}'.format(group), 0
282        yield from format_vars(groupvars, columns, fmts, widths)
283
284
285def format_vars(allvars, columns, fmts, widths):
286    fmt = ' '.join(fmts[col] for col in columns)
287    fmt = ' ' + fmt.replace(' ', '   ') + ' '  # for div margin
288    header = fmt.replace(':', ':^').format(*(col.upper() for col in columns))
289    yield header, 0
290    div = ' '.join('-'*(widths[col]+2) for col in columns)
291    yield div, 0
292    for var in allvars:
293        values = (getattr(var, col) for col in columns)
294        row = fmt.format(*('X' if val is True else val or ''
295                           for val in values))
296        yield row, 1
297    yield div, 0
298
299
300#######################################
301
302COLUMNS = 'name,external,capi,scope,filename'
303COLUMN_NAMES = COLUMNS.split(',')
304
305COLUMN_WIDTHS = {col: len(col)
306                 for col in COLUMN_NAMES}
307COLUMN_WIDTHS.update({
308        'name': 50,
309        'scope': 7,
310        'filename': 40,
311        })
312COLUMN_FORMATS = {col: '{:%s}' % width
313                  for col, width in COLUMN_WIDTHS.items()}
314for col in COLUMN_FORMATS:
315    if COLUMN_WIDTHS[col] == len(col):
316        COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^')
317
318
319def _parse_filters_arg(raw, error):
320    filters = []
321    for value in raw.split(','):
322        value=value.strip()
323        if not value:
324            continue
325        try:
326            filter = Filter.parse(value)
327            if filter.name not in COLUMN_NAMES:
328                raise Exception('unsupported column {!r}'.format(filter.name))
329        except Exception as e:
330            error('bad filter {!r}: {}'.format(raw, e))
331        filters.append(filter)
332    return filters
333
334
335def _parse_columns_arg(raw, error):
336    columns = raw.split(',')
337    for column in columns:
338        if column not in COLUMN_NAMES:
339            error('unsupported column {!r}'.format(column))
340    return columns
341
342
343def _parse_sort_arg(raw, error):
344    sort = raw.split(',')
345    for column in sort:
346        if column.lstrip('_') not in COLUMN_NAMES:
347            error('unsupported column {!r}'.format(column))
348    return sort
349
350
351def _parse_group_arg(raw, error):
352    if not raw:
353        return raw
354    group = raw
355    if group not in COLUMN_NAMES:
356        error('unsupported column {!r}'.format(group))
357    if group != 'filename':
358        error('unsupported group {!r}'.format(group))
359    return group
360
361
362def parse_args(argv=None):
363    if argv is None:
364        argv = sys.argv[1:]
365
366    import argparse
367    parser = argparse.ArgumentParser()
368
369    parser.add_argument('-v', '--verbose', action='count', default=0)
370    parser.add_argument('-q', '--quiet', action='count', default=0)
371
372    parser.add_argument('--filters', default='-scope',
373                        help='[[-]<COLUMN>[=<GLOB>]] ...')
374
375    parser.add_argument('--columns', default=COLUMNS,
376                        help='a comma-separated list of columns to show')
377    parser.add_argument('--sort', default='filename,_name',
378                        help='a comma-separated list of columns to sort')
379    parser.add_argument('--group',
380                        help='group by the given column name (- to not group)')
381
382    parser.add_argument('--rc-on-match', dest='rc', type=int)
383
384    parser.add_argument('filename', nargs='?', default=GLOBALS_FILE)
385
386    args = parser.parse_args(argv)
387
388    verbose = vars(args).pop('verbose', 0)
389    quiet = vars(args).pop('quiet', 0)
390    args.verbosity = max(0, VERBOSITY + verbose - quiet)
391
392    if args.sort.startswith('filename') and not args.group:
393        args.group = 'filename'
394
395    if args.rc is None:
396        if '-scope=core' in args.filters or 'core' not in args.filters:
397            args.rc = 0
398        else:
399            args.rc = 1
400
401    args.filters = _parse_filters_arg(args.filters, parser.error)
402    args.columns = _parse_columns_arg(args.columns, parser.error)
403    args.sort = _parse_sort_arg(args.sort, parser.error)
404    args.group = _parse_group_arg(args.group, parser.error)
405
406    return args
407
408
409def main(root=ROOT_DIR, filename=GLOBALS_FILE,
410         filters=None, columns=COLUMN_NAMES, sort=None, group=None,
411         verbosity=VERBOSITY, rc=1):
412
413    log = lambda msg: ...
414    if verbosity >= 2:
415        log = lambda msg: print(msg)
416
417    allvars = (var
418               for var in find_vars(root, filename)
419               if filter_var(var, filters))
420    if sort:
421        allvars = sorted(allvars, key=make_sort_key(sort))
422
423    if group:
424        try:
425            columns.remove(group)
426        except ValueError:
427            pass
428        grouped = make_groups(allvars, group)
429        lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
430    else:
431        lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
432
433    total = 0
434    for line, count in lines:
435        total += count
436        log(line)
437    log('\ntotal: {}'.format(total))
438
439    if total and rc:
440        print('ERROR: found unsafe globals', file=sys.stderr)
441        return rc
442    return 0
443
444
445if __name__ == '__main__':
446    args = parse_args()
447    sys.exit(
448            main(**vars(args)))
449