1import pathlib
2import shutil
3import tokenize
4import sysconfig
5import tempfile
6import itertools
7
8from typing import Optional, Tuple, List, IO, Set, Dict
9
10from pegen.c_generator import CParserGenerator
11from pegen.grammar import Grammar
12from pegen.grammar_parser import GeneratedParser as GrammarParser
13from pegen.parser import Parser
14from pegen.parser_generator import ParserGenerator
15from pegen.python_generator import PythonParserGenerator
16from pegen.tokenizer import Tokenizer
17
18MOD_DIR = pathlib.Path(__file__).resolve().parent
19
20TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
21
22
23def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
24    flags = sysconfig.get_config_var(compiler_flags)
25    py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
26    if flags is None or py_flags_nodist is None:
27        return []
28    return f"{flags} {py_flags_nodist}".split()
29
30
31def compile_c_extension(
32    generated_source_path: str,
33    build_dir: Optional[str] = None,
34    verbose: bool = False,
35    keep_asserts: bool = True,
36) -> str:
37    """Compile the generated source for a parser generator into an extension module.
38
39    The extension module will be generated in the same directory as the provided path
40    for the generated source, with the same basename (in addition to extension module
41    metadata). For example, for the source mydir/parser.c the generated extension
42    in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so.
43
44    If *build_dir* is provided, that path will be used as the temporary build directory
45    of distutils (this is useful in case you want to use a temporary directory).
46    """
47    import distutils.log
48    from distutils.core import Distribution, Extension
49    from distutils.command.clean import clean  # type: ignore
50    from distutils.command.build_ext import build_ext  # type: ignore
51    from distutils.tests.support import fixup_build_ext  # type: ignore
52
53    if verbose:
54        distutils.log.set_verbosity(distutils.log.DEBUG)
55
56    source_file_path = pathlib.Path(generated_source_path)
57    extension_name = source_file_path.stem
58    extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
59    extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
60    if keep_asserts:
61        extra_compile_args.append("-UNDEBUG")
62    extension = [
63        Extension(
64            extension_name,
65            sources=[
66                str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
67                str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
68                str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
69                str(MOD_DIR.parent.parent.parent / "Parser" / "pegen" / "pegen.c"),
70                str(MOD_DIR.parent.parent.parent / "Parser" / "pegen" / "parse_string.c"),
71                str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
72                generated_source_path,
73            ],
74            include_dirs=[
75                str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
76                str(MOD_DIR.parent.parent.parent / "Parser"),
77                str(MOD_DIR.parent.parent.parent / "Parser" / "pegen"),
78            ],
79            extra_compile_args=extra_compile_args,
80            extra_link_args=extra_link_args,
81        )
82    ]
83    dist = Distribution({"name": extension_name, "ext_modules": extension})
84    cmd = build_ext(dist)
85    fixup_build_ext(cmd)
86    cmd.inplace = True
87    if build_dir:
88        cmd.build_temp = build_dir
89        cmd.build_lib = build_dir
90    cmd.ensure_finalized()
91    cmd.run()
92
93    extension_path = source_file_path.parent / cmd.get_ext_filename(extension_name)
94    shutil.move(cmd.get_ext_fullpath(extension_name), extension_path)
95
96    cmd = clean(dist)
97    cmd.finalize_options()
98    cmd.run()
99
100    return extension_path
101
102
103def build_parser(
104    grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
105) -> Tuple[Grammar, Parser, Tokenizer]:
106    with open(grammar_file) as file:
107        tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
108        parser = GrammarParser(tokenizer, verbose=verbose_parser)
109        grammar = parser.start()
110
111        if not grammar:
112            raise parser.make_syntax_error(grammar_file)
113
114    return grammar, parser, tokenizer
115
116
117def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions:
118    all_tokens = {}
119    exact_tokens = {}
120    non_exact_tokens = set()
121    numbers = itertools.count(0)
122
123    for line in tokens:
124        line = line.strip()
125
126        if not line or line.startswith("#"):
127            continue
128
129        pieces = line.split()
130        index = next(numbers)
131
132        if len(pieces) == 1:
133            (token,) = pieces
134            non_exact_tokens.add(token)
135            all_tokens[index] = token
136        elif len(pieces) == 2:
137            token, op = pieces
138            exact_tokens[op.strip("'")] = index
139            all_tokens[index] = token
140        else:
141            raise ValueError(f"Unexpected line found in Tokens file: {line}")
142
143    return all_tokens, exact_tokens, non_exact_tokens
144
145
146def build_c_generator(
147    grammar: Grammar,
148    grammar_file: str,
149    tokens_file: str,
150    output_file: str,
151    compile_extension: bool = False,
152    verbose_c_extension: bool = False,
153    keep_asserts_in_extension: bool = True,
154    skip_actions: bool = False,
155) -> ParserGenerator:
156    with open(tokens_file, "r") as tok_file:
157        all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
158    with open(output_file, "w") as file:
159        gen: ParserGenerator = CParserGenerator(
160            grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
161        )
162        gen.generate(grammar_file)
163
164    if compile_extension:
165        with tempfile.TemporaryDirectory() as build_dir:
166            compile_c_extension(
167                output_file,
168                build_dir=build_dir,
169                verbose=verbose_c_extension,
170                keep_asserts=keep_asserts_in_extension,
171            )
172    return gen
173
174
175def build_python_generator(
176    grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False,
177) -> ParserGenerator:
178    with open(output_file, "w") as file:
179        gen: ParserGenerator = PythonParserGenerator(grammar, file)  # TODO: skip_actions
180        gen.generate(grammar_file)
181    return gen
182
183
184def build_c_parser_and_generator(
185    grammar_file: str,
186    tokens_file: str,
187    output_file: str,
188    compile_extension: bool = False,
189    verbose_tokenizer: bool = False,
190    verbose_parser: bool = False,
191    verbose_c_extension: bool = False,
192    keep_asserts_in_extension: bool = True,
193    skip_actions: bool = False,
194) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
195    """Generate rules, C parser, tokenizer, parser generator for a given grammar
196
197    Args:
198        grammar_file (string): Path for the grammar file
199        tokens_file (string): Path for the tokens file
200        output_file (string): Path for the output file
201        compile_extension (bool, optional): Whether to compile the C extension.
202          Defaults to False.
203        verbose_tokenizer (bool, optional): Whether to display additional output
204          when generating the tokenizer. Defaults to False.
205        verbose_parser (bool, optional): Whether to display additional output
206          when generating the parser. Defaults to False.
207        verbose_c_extension (bool, optional): Whether to display additional
208          output when compiling the C extension . Defaults to False.
209        keep_asserts_in_extension (bool, optional): Whether to keep the assert statements
210          when compiling the extension module. Defaults to True.
211        skip_actions (bool, optional): Whether to pretend no rule has any actions.
212    """
213    grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
214    gen = build_c_generator(
215        grammar,
216        grammar_file,
217        tokens_file,
218        output_file,
219        compile_extension,
220        verbose_c_extension,
221        keep_asserts_in_extension,
222        skip_actions=skip_actions,
223    )
224
225    return grammar, parser, tokenizer, gen
226
227
228def build_python_parser_and_generator(
229    grammar_file: str,
230    output_file: str,
231    verbose_tokenizer: bool = False,
232    verbose_parser: bool = False,
233    skip_actions: bool = False,
234) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
235    """Generate rules, python parser, tokenizer, parser generator for a given grammar
236
237    Args:
238        grammar_file (string): Path for the grammar file
239        output_file (string): Path for the output file
240        verbose_tokenizer (bool, optional): Whether to display additional output
241          when generating the tokenizer. Defaults to False.
242        verbose_parser (bool, optional): Whether to display additional output
243          when generating the parser. Defaults to False.
244        skip_actions (bool, optional): Whether to pretend no rule has any actions.
245    """
246    grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
247    gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,)
248    return grammar, parser, tokenizer, gen
249