1 2from collections import namedtuple 3import glob 4import os.path 5import re 6import shutil 7import sys 8import subprocess 9 10 11VERBOSITY = 2 12 13C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__)) 14TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR) 15ROOT_DIR = os.path.dirname(TOOLS_DIR) 16GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt') 17 18SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python'] 19 20CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$') 21 22 23IGNORED_VARS = { 24 '_DYNAMIC', 25 '_GLOBAL_OFFSET_TABLE_', 26 '__JCR_LIST__', 27 '__JCR_END__', 28 '__TMC_END__', 29 '__bss_start', 30 '__data_start', 31 '__dso_handle', 32 '_edata', 33 '_end', 34 } 35 36 37def find_capi_vars(root): 38 capi_vars = {} 39 for dirname in SOURCE_DIRS: 40 for filename in glob.glob(os.path.join( 41 glob.escape(os.path.join(ROOT_DIR, dirname)), 42 '**/*.[hc]'), 43 recursive=True): 44 with open(filename) as file: 45 for name in _find_capi_vars(file): 46 if name in capi_vars: 47 assert not filename.endswith('.c') 48 assert capi_vars[name].endswith('.c') 49 capi_vars[name] = filename 50 return capi_vars 51 52 53def _find_capi_vars(lines): 54 for line in lines: 55 if not line.startswith('PyAPI_DATA'): 56 continue 57 assert '{' not in line 58 match = CAPI_REGEX.match(line) 59 assert match 60 names, = match.groups() 61 for name in names.split(', '): 62 yield name 63 64 65def _read_global_names(filename): 66 # These variables are shared between all interpreters in the process. 67 with open(filename) as file: 68 return {line.partition('#')[0].strip() 69 for line in file 70 if line.strip() and not line.startswith('#')} 71 72 73def _is_global_var(name, globalnames): 74 if _is_autogen_var(name): 75 return True 76 if _is_type_var(name): 77 return True 78 if _is_module(name): 79 return True 80 if _is_exception(name): 81 return True 82 if _is_compiler(name): 83 return True 84 return name in globalnames 85 86 87def _is_autogen_var(name): 88 return ( 89 name.startswith('PyId_') or 90 '.' in name or 91 # Objects/typeobject.c 92 name.startswith('op_id.') or 93 name.startswith('rop_id.') or 94 # Python/graminit.c 95 name.startswith('arcs_') or 96 name.startswith('states_') 97 ) 98 99 100def _is_type_var(name): 101 if name.endswith(('Type', '_Type', '_type')): # XXX Always a static type? 102 return True 103 if name.endswith('_desc'): # for structseq types 104 return True 105 return ( 106 name.startswith('doc_') or 107 name.endswith(('_doc', '__doc__', '_docstring')) or 108 name.endswith('_methods') or 109 name.endswith('_fields') or 110 name.endswith(('_memberlist', '_members')) or 111 name.endswith('_slots') or 112 name.endswith(('_getset', '_getsets', '_getsetlist')) or 113 name.endswith('_as_mapping') or 114 name.endswith('_as_number') or 115 name.endswith('_as_sequence') or 116 name.endswith('_as_buffer') or 117 name.endswith('_as_async') 118 ) 119 120 121def _is_module(name): 122 if name.endswith(('_functions', 'Methods', '_Methods')): 123 return True 124 if name == 'module_def': 125 return True 126 if name == 'initialized': 127 return True 128 return name.endswith(('module', '_Module')) 129 130 131def _is_exception(name): 132 # Other vars are enumerated in globals-core.txt. 133 if not name.startswith(('PyExc_', '_PyExc_')): 134 return False 135 return name.endswith(('Error', 'Warning')) 136 137 138def _is_compiler(name): 139 return ( 140 # Python/Python-ast.c 141 name.endswith('_type') or 142 name.endswith('_singleton') or 143 name.endswith('_attributes') 144 ) 145 146 147class Var(namedtuple('Var', 'name kind scope capi filename')): 148 149 @classmethod 150 def parse_nm(cls, line, expected, ignored, capi_vars, globalnames): 151 _, _, line = line.partition(' ') # strip off the address 152 line = line.strip() 153 kind, _, line = line.partition(' ') 154 if kind in ignored or (): 155 return None 156 elif kind not in expected or (): 157 raise RuntimeError('unsupported NM type {!r}'.format(kind)) 158 159 name, _, filename = line.partition('\t') 160 name = name.strip() 161 if _is_autogen_var(name): 162 return None 163 if _is_global_var(name, globalnames): 164 scope = 'global' 165 else: 166 scope = None 167 capi = (name in capi_vars or ()) 168 if filename: 169 filename = os.path.relpath(filename.partition(':')[0]) 170 return cls(name, kind, scope, capi, filename or '~???~') 171 172 @property 173 def external(self): 174 return self.kind.isupper() 175 176 177def find_vars(root, globals_filename=GLOBALS_FILE): 178 python = os.path.join(root, 'python') 179 if not os.path.exists(python): 180 raise RuntimeError('python binary missing (need to build it first?)') 181 capi_vars = find_capi_vars(root) 182 globalnames = _read_global_names(globals_filename) 183 184 nm = shutil.which('nm') 185 if nm is None: 186 # XXX Use dumpbin.exe /SYMBOLS on Windows. 187 raise NotImplementedError 188 else: 189 yield from (var 190 for var in _find_var_symbols(python, nm, capi_vars, 191 globalnames) 192 if var.name not in IGNORED_VARS) 193 194 195NM_FUNCS = set('Tt') 196NM_PUBLIC_VARS = set('BD') 197NM_PRIVATE_VARS = set('bd') 198NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS 199NM_DATA = set('Rr') 200NM_OTHER = set('ACGgiINpSsuUVvWw-?') 201NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER 202 203 204def _find_var_symbols(python, nm, capi_vars, globalnames): 205 args = [nm, 206 '--line-numbers', 207 python] 208 out = subprocess.check_output(args) 209 for line in out.decode('utf-8').splitlines(): 210 var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames) 211 if var is None: 212 continue 213 yield var 214 215 216####################################### 217 218class Filter(namedtuple('Filter', 'name op value action')): 219 220 @classmethod 221 def parse(cls, raw): 222 action = '+' 223 if raw.startswith(('+', '-')): 224 action = raw[0] 225 raw = raw[1:] 226 # XXX Support < and >? 227 name, op, value = raw.partition('=') 228 return cls(name, op, value, action) 229 230 def check(self, var): 231 value = getattr(var, self.name, None) 232 if not self.op: 233 matched = bool(value) 234 elif self.op == '=': 235 matched = (value == self.value) 236 else: 237 raise NotImplementedError 238 239 if self.action == '+': 240 return matched 241 elif self.action == '-': 242 return not matched 243 else: 244 raise NotImplementedError 245 246 247def filter_var(var, filters): 248 for filter in filters: 249 if not filter.check(var): 250 return False 251 return True 252 253 254def make_sort_key(spec): 255 columns = [(col.strip('_'), '_' if col.startswith('_') else '') 256 for col in spec] 257 def sort_key(var): 258 return tuple(getattr(var, col).lstrip(prefix) 259 for col, prefix in columns) 260 return sort_key 261 262 263def make_groups(allvars, spec): 264 group = spec 265 groups = {} 266 for var in allvars: 267 value = getattr(var, group) 268 key = '{}: {}'.format(group, value) 269 try: 270 groupvars = groups[key] 271 except KeyError: 272 groupvars = groups[key] = [] 273 groupvars.append(var) 274 return groups 275 276 277def format_groups(groups, columns, fmts, widths): 278 for group in sorted(groups): 279 groupvars = groups[group] 280 yield '', 0 281 yield ' # {}'.format(group), 0 282 yield from format_vars(groupvars, columns, fmts, widths) 283 284 285def format_vars(allvars, columns, fmts, widths): 286 fmt = ' '.join(fmts[col] for col in columns) 287 fmt = ' ' + fmt.replace(' ', ' ') + ' ' # for div margin 288 header = fmt.replace(':', ':^').format(*(col.upper() for col in columns)) 289 yield header, 0 290 div = ' '.join('-'*(widths[col]+2) for col in columns) 291 yield div, 0 292 for var in allvars: 293 values = (getattr(var, col) for col in columns) 294 row = fmt.format(*('X' if val is True else val or '' 295 for val in values)) 296 yield row, 1 297 yield div, 0 298 299 300####################################### 301 302COLUMNS = 'name,external,capi,scope,filename' 303COLUMN_NAMES = COLUMNS.split(',') 304 305COLUMN_WIDTHS = {col: len(col) 306 for col in COLUMN_NAMES} 307COLUMN_WIDTHS.update({ 308 'name': 50, 309 'scope': 7, 310 'filename': 40, 311 }) 312COLUMN_FORMATS = {col: '{:%s}' % width 313 for col, width in COLUMN_WIDTHS.items()} 314for col in COLUMN_FORMATS: 315 if COLUMN_WIDTHS[col] == len(col): 316 COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^') 317 318 319def _parse_filters_arg(raw, error): 320 filters = [] 321 for value in raw.split(','): 322 value=value.strip() 323 if not value: 324 continue 325 try: 326 filter = Filter.parse(value) 327 if filter.name not in COLUMN_NAMES: 328 raise Exception('unsupported column {!r}'.format(filter.name)) 329 except Exception as e: 330 error('bad filter {!r}: {}'.format(raw, e)) 331 filters.append(filter) 332 return filters 333 334 335def _parse_columns_arg(raw, error): 336 columns = raw.split(',') 337 for column in columns: 338 if column not in COLUMN_NAMES: 339 error('unsupported column {!r}'.format(column)) 340 return columns 341 342 343def _parse_sort_arg(raw, error): 344 sort = raw.split(',') 345 for column in sort: 346 if column.lstrip('_') not in COLUMN_NAMES: 347 error('unsupported column {!r}'.format(column)) 348 return sort 349 350 351def _parse_group_arg(raw, error): 352 if not raw: 353 return raw 354 group = raw 355 if group not in COLUMN_NAMES: 356 error('unsupported column {!r}'.format(group)) 357 if group != 'filename': 358 error('unsupported group {!r}'.format(group)) 359 return group 360 361 362def parse_args(argv=None): 363 if argv is None: 364 argv = sys.argv[1:] 365 366 import argparse 367 parser = argparse.ArgumentParser() 368 369 parser.add_argument('-v', '--verbose', action='count', default=0) 370 parser.add_argument('-q', '--quiet', action='count', default=0) 371 372 parser.add_argument('--filters', default='-scope', 373 help='[[-]<COLUMN>[=<GLOB>]] ...') 374 375 parser.add_argument('--columns', default=COLUMNS, 376 help='a comma-separated list of columns to show') 377 parser.add_argument('--sort', default='filename,_name', 378 help='a comma-separated list of columns to sort') 379 parser.add_argument('--group', 380 help='group by the given column name (- to not group)') 381 382 parser.add_argument('--rc-on-match', dest='rc', type=int) 383 384 parser.add_argument('filename', nargs='?', default=GLOBALS_FILE) 385 386 args = parser.parse_args(argv) 387 388 verbose = vars(args).pop('verbose', 0) 389 quiet = vars(args).pop('quiet', 0) 390 args.verbosity = max(0, VERBOSITY + verbose - quiet) 391 392 if args.sort.startswith('filename') and not args.group: 393 args.group = 'filename' 394 395 if args.rc is None: 396 if '-scope=core' in args.filters or 'core' not in args.filters: 397 args.rc = 0 398 else: 399 args.rc = 1 400 401 args.filters = _parse_filters_arg(args.filters, parser.error) 402 args.columns = _parse_columns_arg(args.columns, parser.error) 403 args.sort = _parse_sort_arg(args.sort, parser.error) 404 args.group = _parse_group_arg(args.group, parser.error) 405 406 return args 407 408 409def main(root=ROOT_DIR, filename=GLOBALS_FILE, 410 filters=None, columns=COLUMN_NAMES, sort=None, group=None, 411 verbosity=VERBOSITY, rc=1): 412 413 log = lambda msg: ... 414 if verbosity >= 2: 415 log = lambda msg: print(msg) 416 417 allvars = (var 418 for var in find_vars(root, filename) 419 if filter_var(var, filters)) 420 if sort: 421 allvars = sorted(allvars, key=make_sort_key(sort)) 422 423 if group: 424 try: 425 columns.remove(group) 426 except ValueError: 427 pass 428 grouped = make_groups(allvars, group) 429 lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS) 430 else: 431 lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS) 432 433 total = 0 434 for line, count in lines: 435 total += count 436 log(line) 437 log('\ntotal: {}'.format(total)) 438 439 if total and rc: 440 print('ERROR: found unsafe globals', file=sys.stderr) 441 return rc 442 return 0 443 444 445if __name__ == '__main__': 446 args = parse_args() 447 sys.exit( 448 main(**vars(args))) 449