1#!/usr/bin/env python3.8
2
3import argparse
4import ast
5import os
6import sys
7import time
8import tokenize
9import _peg_parser
10from glob import glob, escape
11from pathlib import PurePath
12
13from typing import List, Optional, Any, Tuple
14
15sys.path.insert(0, os.getcwd())
16from pegen.ast_dump import ast_dump
17from pegen.testutil import print_memstats
18from scripts import show_parse
19
20SUCCESS = "\033[92m"
21FAIL = "\033[91m"
22ENDC = "\033[0m"
23
24COMPILE = 2
25PARSE = 1
26NOTREE = 0
27
28argparser = argparse.ArgumentParser(
29    prog="test_parse_directory",
30    description="Helper program to test directories or files for pegen",
31)
32argparser.add_argument("-d", "--directory", help="Directory path containing files to test")
33argparser.add_argument(
34    "-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude"
35)
36argparser.add_argument(
37    "-s", "--short", action="store_true", help="Only show errors, in a more Emacs-friendly format"
38)
39argparser.add_argument(
40    "-v", "--verbose", action="store_true", help="Display detailed errors for failures"
41)
42argparser.add_argument(
43    "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
44)
45
46
47def report_status(
48    succeeded: bool,
49    file: str,
50    verbose: bool,
51    error: Optional[Exception] = None,
52    short: bool = False,
53) -> None:
54    if short and succeeded:
55        return
56
57    if succeeded is True:
58        status = "OK"
59        COLOR = SUCCESS
60    else:
61        status = "Fail"
62        COLOR = FAIL
63
64    if short:
65        lineno = 0
66        offset = 0
67        if isinstance(error, SyntaxError):
68            lineno = error.lineno or 1
69            offset = error.offset or 1
70            message = error.args[0]
71        else:
72            message = f"{error.__class__.__name__}: {error}"
73        print(f"{file}:{lineno}:{offset}: {message}")
74    else:
75        print(f"{COLOR}{file:60} {status}{ENDC}")
76
77        if error and verbose:
78            print(f"  {str(error.__class__.__name__)}: {error}")
79
80
81def compare_trees(
82    actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False,
83) -> int:
84    with open(file) as f:
85        expected_tree = _peg_parser.parse_string(f.read(), oldparser=True)
86
87    expected_text = ast_dump(expected_tree, include_attributes=include_attributes)
88    actual_text = ast_dump(actual_tree, include_attributes=include_attributes)
89    if actual_text == expected_text:
90        if verbose:
91            print("Tree for {file}:")
92            print(show_parse.format_tree(actual_tree, include_attributes))
93        return 0
94
95    print(f"Diffing ASTs for {file} ...")
96
97    expected = show_parse.format_tree(expected_tree, include_attributes)
98    actual = show_parse.format_tree(actual_tree, include_attributes)
99
100    if verbose:
101        print("Expected for {file}:")
102        print(expected)
103        print("Actual for {file}:")
104        print(actual)
105        print(f"Diff for {file}:")
106
107    diff = show_parse.diff_trees(expected_tree, actual_tree, include_attributes)
108    for line in diff:
109        print(line)
110
111    return 1
112
113
114def parse_file(source: str, file: str, mode: int, oldparser: bool) -> Tuple[Any, float]:
115    t0 = time.time()
116    if mode == COMPILE:
117        result = _peg_parser.compile_string(
118            source,
119            filename=file,
120            oldparser=oldparser,
121        )
122    else:
123        result = _peg_parser.parse_string(
124            source,
125            filename=file,
126            oldparser=oldparser,
127            ast=(mode == PARSE),
128        )
129    t1 = time.time()
130    return result, t1 - t0
131
132
133def is_parsing_failure(source: str) -> bool:
134    try:
135        _peg_parser.parse_string(source, mode="exec", oldparser=True)
136    except SyntaxError:
137        return False
138    return True
139
140
141def generate_time_stats(files, total_seconds) -> None:
142    total_files = len(files)
143    total_bytes = 0
144    total_lines = 0
145    for file in files:
146        # Count lines and bytes separately
147        with open(file, "rb") as f:
148            total_lines += sum(1 for _ in f)
149            total_bytes += f.tell()
150
151    print(
152        f"Checked {total_files:,} files, {total_lines:,} lines,",
153        f"{total_bytes:,} bytes in {total_seconds:,.3f} seconds.",
154    )
155    if total_seconds > 0:
156        print(
157            f"That's {total_lines / total_seconds :,.0f} lines/sec,",
158            f"or {total_bytes / total_seconds :,.0f} bytes/sec.",
159        )
160
161
162def parse_directory(
163    directory: str,
164    verbose: bool,
165    excluded_files: List[str],
166    tree_arg: int,
167    short: bool,
168    mode: int,
169    oldparser: bool,
170) -> int:
171    if tree_arg:
172        assert mode == PARSE, "Mode should be 1 (parse), when comparing the generated trees"
173
174    if oldparser and tree_arg:
175        print("Cannot specify tree argument with the cpython parser.", file=sys.stderr)
176        return 1
177
178    # For a given directory, traverse files and attempt to parse each one
179    # - Output success/failure for each file
180    errors = 0
181    files = []
182    trees = {}  # Trees to compare (after everything else is done)
183    total_seconds = 0
184
185    for file in sorted(glob(os.path.join(escape(directory), f"**/*.py"), recursive=True)):
186        # Only attempt to parse Python files and files that are not excluded
187        if any(PurePath(file).match(pattern) for pattern in excluded_files):
188            continue
189
190        with tokenize.open(file) as f:
191            source = f.read()
192
193        try:
194            result, dt = parse_file(source, file, mode, oldparser)
195            total_seconds += dt
196            if tree_arg:
197                trees[file] = result
198            report_status(succeeded=True, file=file, verbose=verbose, short=short)
199        except SyntaxError as error:
200            if is_parsing_failure(source):
201                print(f"File {file} cannot be parsed by either parser.")
202            else:
203                report_status(
204                    succeeded=False, file=file, verbose=verbose, error=error, short=short
205                )
206                errors += 1
207        files.append(file)
208
209    t1 = time.time()
210
211    generate_time_stats(files, total_seconds)
212    if short:
213        print_memstats()
214
215    if errors:
216        print(f"Encountered {errors} failures.", file=sys.stderr)
217
218    # Compare trees (the dict is empty unless -t is given)
219    compare_trees_errors = 0
220    for file, tree in trees.items():
221        if not short:
222            print("Comparing ASTs for", file)
223        if compare_trees(tree, file, verbose, tree_arg >= 2) == 1:
224            compare_trees_errors += 1
225
226    if errors or compare_trees_errors:
227        return 1
228
229    return 0
230
231
232def main() -> None:
233    args = argparser.parse_args()
234    directory = args.directory
235    verbose = args.verbose
236    excluded_files = args.exclude
237    tree = args.tree
238    short = args.short
239    mode = 1 if args.tree else 2
240    sys.exit(
241        parse_directory(
242            directory,
243            verbose,
244            excluded_files,
245            tree,
246            short,
247            mode,
248            oldparser=False,
249        )
250    )
251
252
253if __name__ == "__main__":
254    main()
255