1import os.path
2import re
3
4from c_analyzer.common.info import ID
5from c_analyzer.common.util import read_tsv, write_tsv
6
7from . import DATA_DIR
8
9# XXX need tests:
10# * generate / script
11
12
13IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv')
14
15IGNORED_COLUMNS = ('filename', 'funcname', 'name', 'kind', 'reason')
16IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS)
17
18# XXX Move these to ignored.tsv.
19IGNORED = {
20        # global
21        'PyImport_FrozenModules': 'process-global',
22        'M___hello__': 'process-global',
23        'inittab_copy': 'process-global',
24        'PyHash_Func': 'process-global',
25        '_Py_HashSecret_Initialized': 'process-global',
26        '_TARGET_LOCALES': 'process-global',
27
28        # startup (only changed before/during)
29        '_PyRuntime': 'runtime startup',
30        'runtime_initialized': 'runtime startup',
31        'static_arg_parsers': 'runtime startup',
32        'orig_argv': 'runtime startup',
33        'opt_ptr': 'runtime startup',
34        '_preinit_warnoptions': 'runtime startup',
35        '_Py_StandardStreamEncoding': 'runtime startup',
36        'Py_FileSystemDefaultEncoding': 'runtime startup',
37        '_Py_StandardStreamErrors': 'runtime startup',
38        'Py_FileSystemDefaultEncodeErrors': 'runtime startup',
39        'Py_BytesWarningFlag': 'runtime startup',
40        'Py_DebugFlag': 'runtime startup',
41        'Py_DontWriteBytecodeFlag': 'runtime startup',
42        'Py_FrozenFlag': 'runtime startup',
43        'Py_HashRandomizationFlag': 'runtime startup',
44        'Py_IgnoreEnvironmentFlag': 'runtime startup',
45        'Py_InspectFlag': 'runtime startup',
46        'Py_InteractiveFlag': 'runtime startup',
47        'Py_IsolatedFlag': 'runtime startup',
48        'Py_NoSiteFlag': 'runtime startup',
49        'Py_NoUserSiteDirectory': 'runtime startup',
50        'Py_OptimizeFlag': 'runtime startup',
51        'Py_QuietFlag': 'runtime startup',
52        'Py_UTF8Mode': 'runtime startup',
53        'Py_UnbufferedStdioFlag': 'runtime startup',
54        'Py_VerboseFlag': 'runtime startup',
55        '_Py_path_config': 'runtime startup',
56        '_PyOS_optarg': 'runtime startup',
57        '_PyOS_opterr': 'runtime startup',
58        '_PyOS_optind': 'runtime startup',
59        '_Py_HashSecret': 'runtime startup',
60
61        # REPL
62        '_PyOS_ReadlineLock': 'repl',
63        '_PyOS_ReadlineTState': 'repl',
64
65        # effectively const
66        'tracemalloc_empty_traceback': 'const',
67        '_empty_bitmap_node': 'const',
68        'posix_constants_pathconf': 'const',
69        'posix_constants_confstr': 'const',
70        'posix_constants_sysconf': 'const',
71        '_PySys_ImplCacheTag': 'const',
72        '_PySys_ImplName': 'const',
73        'PyImport_Inittab': 'const',
74        '_PyImport_DynLoadFiletab': 'const',
75        '_PyParser_Grammar': 'const',
76        'Py_hexdigits': 'const',
77        '_PyImport_Inittab': 'const',
78        '_PyByteArray_empty_string': 'const',
79        '_PyLong_DigitValue': 'const',
80        '_Py_SwappedOp': 'const',
81        'PyStructSequence_UnnamedField': 'const',
82
83        # signals are main-thread only
84        'faulthandler_handlers': 'signals are main-thread only',
85        'user_signals': 'signals are main-thread only',
86        'wakeup': 'signals are main-thread only',
87
88        # hacks
89        '_PySet_Dummy': 'only used as a placeholder',
90        }
91
92BENIGN = 'races here are benign and unlikely'
93
94
95def is_supported(variable, ignored=None, known=None, *,
96                 _ignored=(lambda *a, **k: _is_ignored(*a, **k)),
97                 _vartype_okay=(lambda *a, **k: _is_vartype_okay(*a, **k)),
98                 ):
99    """Return True if the given global variable is okay in CPython."""
100    if _ignored(variable,
101                ignored and ignored.get('variables')):
102        return True
103    elif _vartype_okay(variable.vartype,
104                       ignored.get('types')):
105        return True
106    else:
107        return False
108
109
110def _is_ignored(variable, ignoredvars=None, *,
111                _IGNORED=IGNORED,
112                ):
113    """Return the reason if the variable is a supported global.
114
115    Return None if the variable is not a supported global.
116    """
117    if ignoredvars and (reason := ignoredvars.get(variable.id)):
118        return reason
119
120    if variable.funcname is None:
121        if reason := _IGNORED.get(variable.name):
122            return reason
123
124    # compiler
125    if variable.filename == 'Python/graminit.c':
126        if variable.vartype.startswith('static state '):
127            return 'compiler'
128    if variable.filename == 'Python/symtable.c':
129        if variable.vartype.startswith('static identifier '):
130            return 'compiler'
131    if variable.filename == 'Python/Python-ast.c':
132        # These should be const.
133        if variable.name.endswith('_field'):
134            return 'compiler'
135        if variable.name.endswith('_attribute'):
136            return 'compiler'
137
138    # other
139    if variable.filename == 'Python/dtoa.c':
140        # guarded by lock?
141        if variable.name in ('p5s', 'freelist'):
142            return 'dtoa is thread-safe?'
143        if variable.name in ('private_mem', 'pmem_next'):
144            return 'dtoa is thread-safe?'
145    if variable.filename == 'Python/thread.c':
146        # Threads do not become an issue until after these have been set
147        # and these never get changed after that.
148        if variable.name in ('initialized', 'thread_debug'):
149            return 'thread-safe'
150    if variable.filename == 'Python/getversion.c':
151        if variable.name == 'version':
152            # Races are benign here, as well as unlikely.
153            return BENIGN
154    if variable.filename == 'Python/fileutils.c':
155        if variable.name == 'force_ascii':
156            return BENIGN
157        if variable.name == 'ioctl_works':
158            return BENIGN
159        if variable.name == '_Py_open_cloexec_works':
160            return BENIGN
161    if variable.filename == 'Python/codecs.c':
162        if variable.name == 'ucnhash_CAPI':
163            return BENIGN
164    if variable.filename == 'Python/bootstrap_hash.c':
165        if variable.name == 'getrandom_works':
166            return BENIGN
167    if variable.filename == 'Objects/unicodeobject.c':
168        if variable.name == 'ucnhash_CAPI':
169            return BENIGN
170        if variable.name == 'bloom_linebreak':
171            # *mostly* benign
172            return BENIGN
173    if variable.filename == 'Modules/getbuildinfo.c':
174        if variable.name == 'buildinfo':
175            # The static is used for pre-allocation.
176            return BENIGN
177    if variable.filename == 'Modules/posixmodule.c':
178        if variable.name == 'ticks_per_second':
179            return BENIGN
180        if variable.name == 'dup3_works':
181            return BENIGN
182    if variable.filename == 'Modules/timemodule.c':
183        if variable.name == 'ticks_per_second':
184            return BENIGN
185    if variable.filename == 'Objects/longobject.c':
186        if variable.name == 'log_base_BASE':
187            return BENIGN
188        if variable.name == 'convwidth_base':
189            return BENIGN
190        if variable.name == 'convmultmax_base':
191            return BENIGN
192
193    return None
194
195
196def _is_vartype_okay(vartype, ignoredtypes=None):
197    if _is_object(vartype):
198        return None
199
200    if vartype.startswith('static const '):
201        return 'const'
202    if vartype.startswith('const '):
203        return 'const'
204
205    # components for TypeObject definitions
206    for name in ('PyMethodDef', 'PyGetSetDef', 'PyMemberDef'):
207        if name in vartype:
208            return 'const'
209    for name in ('PyNumberMethods', 'PySequenceMethods', 'PyMappingMethods',
210                 'PyBufferProcs', 'PyAsyncMethods'):
211        if name in vartype:
212            return 'const'
213    for name in ('slotdef', 'newfunc'):
214        if name in vartype:
215            return 'const'
216
217    # structseq
218    for name in ('PyStructSequence_Desc', 'PyStructSequence_Field'):
219        if name in vartype:
220            return 'const'
221
222    # other definiitions
223    if 'PyModuleDef' in vartype:
224        return 'const'
225
226    # thread-safe
227    if '_Py_atomic_int' in vartype:
228        return 'thread-safe'
229    if 'pthread_condattr_t' in vartype:
230        return 'thread-safe'
231
232    # startup
233    if '_Py_PreInitEntry' in vartype:
234        return 'startup'
235
236    # global
237#    if 'PyMemAllocatorEx' in vartype:
238#        return True
239
240    # others
241#    if 'PyThread_type_lock' in vartype:
242#        return True
243
244    # XXX ???
245    # _Py_tss_t
246    # _Py_hashtable_t
247    # stack_t
248    # _PyUnicode_Name_CAPI
249
250    # functions
251    if '(' in vartype and '[' not in vartype:
252        return 'function pointer'
253
254    # XXX finish!
255    # * allow const values?
256    #raise NotImplementedError
257    return None
258
259
260PYOBJECT_RE = re.compile(r'''
261        ^
262        (
263            # must start with "static "
264            static \s+
265            (
266                identifier
267            )
268            \b
269        ) |
270        (
271            # may start with "static "
272            ( static \s+ )?
273            (
274                .*
275                (
276                    PyObject |
277                    PyTypeObject |
278                    _? Py \w+ Object |
279                    _PyArg_Parser |
280                    _Py_Identifier |
281                    traceback_t |
282                    PyAsyncGenASend |
283                    _PyAsyncGenWrappedValue |
284                    PyContext |
285                    method_cache_entry
286                )
287                \b
288            ) |
289            (
290                (
291                    _Py_IDENTIFIER |
292                    _Py_static_string
293                )
294                [(]
295            )
296        )
297        ''', re.VERBOSE)
298
299
300def _is_object(vartype):
301    if 'PyDictKeysObject' in vartype:
302        return False
303    if PYOBJECT_RE.match(vartype):
304        return True
305    if vartype.endswith((' _Py_FalseStruct', ' _Py_TrueStruct')):
306        return True
307
308    # XXX Add more?
309
310    #for part in vartype.split():
311    #    # XXX const is automatic True?
312    #    if part == 'PyObject' or part.startswith('PyObject['):
313    #        return True
314    return False
315
316
317def ignored_from_file(infile, *,
318                      _read_tsv=read_tsv,
319                      ):
320    """Yield a Variable for each ignored var in the file."""
321    ignored = {
322        'variables': {},
323        #'types': {},
324        #'constants': {},
325        #'macros': {},
326        }
327    for row in _read_tsv(infile, IGNORED_HEADER):
328        filename, funcname, name, kind, reason = row
329        if not funcname or funcname == '-':
330            funcname = None
331        id = ID(filename, funcname, name)
332        if kind == 'variable':
333            values = ignored['variables']
334        else:
335            raise ValueError(f'unsupported kind in row {row}')
336        values[id] = reason
337    return ignored
338
339
340##################################
341# generate
342
343def _get_row(varid, reason):
344    return (
345            varid.filename,
346            varid.funcname or '-',
347            varid.name,
348            'variable',
349            str(reason),
350            )
351
352
353def _get_rows(variables, ignored=None, *,
354              _as_row=_get_row,
355              _is_ignored=_is_ignored,
356              _vartype_okay=_is_vartype_okay,
357              ):
358    count = 0
359    for variable in variables:
360        reason = _is_ignored(variable,
361                             ignored and ignored.get('variables'),
362                             )
363        if not reason:
364            reason = _vartype_okay(variable.vartype,
365                                   ignored and ignored.get('types'))
366        if not reason:
367            continue
368
369        print(' ', variable, repr(reason))
370        yield _as_row(variable.id, reason)
371        count += 1
372    print(f'total: {count}')
373
374
375def _generate_ignored_file(variables, filename=None, *,
376                           _generate_rows=_get_rows,
377                           _write_tsv=write_tsv,
378                           ):
379    if not filename:
380        filename = IGNORED_FILE + '.new'
381    rows = _generate_rows(variables)
382    _write_tsv(filename, IGNORED_HEADER, rows)
383
384
385if __name__ == '__main__':
386    from cpython import SOURCE_DIRS
387    from cpython.known import (
388        from_file as known_from_file,
389        DATA_FILE as KNOWN_FILE,
390        )
391    # XXX This is wrong!
392    from . import find
393    known = known_from_file(KNOWN_FILE)
394    knownvars = (known or {}).get('variables')
395    variables = find.globals_from_binary(knownvars=knownvars,
396                                         dirnames=SOURCE_DIRS)
397
398    _generate_ignored_file(variables)
399