1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""Parser engine for the grammar tables generated by pgen.
5
6The grammar table must be loaded first.
7
8See Parser/parser.c in the Python distribution for additional info on
9how this parsing engine works.
10
11"""
12
13# Local imports
14from . import token
15
16class ParseError(Exception):
17    """Exception to signal the parser is stuck."""
18
19    def __init__(self, msg, type, value, context):
20        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
21                           (msg, type, value, context))
22        self.msg = msg
23        self.type = type
24        self.value = value
25        self.context = context
26
27    def __reduce__(self):
28        return type(self), (self.msg, self.type, self.value, self.context)
29
30class Parser(object):
31    """Parser engine.
32
33    The proper usage sequence is:
34
35    p = Parser(grammar, [converter])  # create instance
36    p.setup([start])                  # prepare for parsing
37    <for each input token>:
38        if p.addtoken(...):           # parse a token; may raise ParseError
39            break
40    root = p.rootnode                 # root of abstract syntax tree
41
42    A Parser instance may be reused by calling setup() repeatedly.
43
44    A Parser instance contains state pertaining to the current token
45    sequence, and should not be used concurrently by different threads
46    to parse separate token sequences.
47
48    See driver.py for how to get input tokens by tokenizing a file or
49    string.
50
51    Parsing is complete when addtoken() returns True; the root of the
52    abstract syntax tree can then be retrieved from the rootnode
53    instance variable.  When a syntax error occurs, addtoken() raises
54    the ParseError exception.  There is no error recovery; the parser
55    cannot be used after a syntax error was reported (but it can be
56    reinitialized by calling setup()).
57
58    """
59
60    def __init__(self, grammar, convert=None):
61        """Constructor.
62
63        The grammar argument is a grammar.Grammar instance; see the
64        grammar module for more information.
65
66        The parser is not ready yet for parsing; you must call the
67        setup() method to get it started.
68
69        The optional convert argument is a function mapping concrete
70        syntax tree nodes to abstract syntax tree nodes.  If not
71        given, no conversion is done and the syntax tree produced is
72        the concrete syntax tree.  If given, it must be a function of
73        two arguments, the first being the grammar (a grammar.Grammar
74        instance), and the second being the concrete syntax tree node
75        to be converted.  The syntax tree is converted from the bottom
76        up.
77
78        A concrete syntax tree node is a (type, value, context, nodes)
79        tuple, where type is the node type (a token or symbol number),
80        value is None for symbols and a string for tokens, context is
81        None or an opaque value used for error reporting (typically a
82        (lineno, offset) pair), and nodes is a list of children for
83        symbols, and None for tokens.
84
85        An abstract syntax tree node may be anything; this is entirely
86        up to the converter function.
87
88        """
89        self.grammar = grammar
90        self.convert = convert or (lambda grammar, node: node)
91
92    def setup(self, start=None):
93        """Prepare for parsing.
94
95        This *must* be called before starting to parse.
96
97        The optional argument is an alternative start symbol; it
98        defaults to the grammar's start symbol.
99
100        You can use a Parser instance to parse any number of programs;
101        each time you call setup() the parser is reset to an initial
102        state determined by the (implicit or explicit) start symbol.
103
104        """
105        if start is None:
106            start = self.grammar.start
107        # Each stack entry is a tuple: (dfa, state, node).
108        # A node is a tuple: (type, value, context, children),
109        # where children is a list of nodes or None, and context may be None.
110        newnode = (start, None, None, [])
111        stackentry = (self.grammar.dfas[start], 0, newnode)
112        self.stack = [stackentry]
113        self.rootnode = None
114        self.used_names = set() # Aliased to self.rootnode.used_names in pop()
115
116    def addtoken(self, type, value, context):
117        """Add a token; return True iff this is the end of the program."""
118        # Map from token to label
119        ilabel = self.classify(type, value, context)
120        # Loop until the token is shifted; may raise exceptions
121        while True:
122            dfa, state, node = self.stack[-1]
123            states, first = dfa
124            arcs = states[state]
125            # Look for a state with this label
126            for i, newstate in arcs:
127                t, v = self.grammar.labels[i]
128                if ilabel == i:
129                    # Look it up in the list of labels
130                    assert t < 256
131                    # Shift a token; we're done with it
132                    self.shift(type, value, newstate, context)
133                    # Pop while we are in an accept-only state
134                    state = newstate
135                    while states[state] == [(0, state)]:
136                        self.pop()
137                        if not self.stack:
138                            # Done parsing!
139                            return True
140                        dfa, state, node = self.stack[-1]
141                        states, first = dfa
142                    # Done with this token
143                    return False
144                elif t >= 256:
145                    # See if it's a symbol and if we're in its first set
146                    itsdfa = self.grammar.dfas[t]
147                    itsstates, itsfirst = itsdfa
148                    if ilabel in itsfirst:
149                        # Push a symbol
150                        self.push(t, self.grammar.dfas[t], newstate, context)
151                        break # To continue the outer while loop
152            else:
153                if (0, state) in arcs:
154                    # An accepting state, pop it and try something else
155                    self.pop()
156                    if not self.stack:
157                        # Done parsing, but another token is input
158                        raise ParseError("too much input",
159                                         type, value, context)
160                else:
161                    # No success finding a transition
162                    raise ParseError("bad input", type, value, context)
163
164    def classify(self, type, value, context):
165        """Turn a token into a label.  (Internal)"""
166        if type == token.NAME:
167            # Keep a listing of all used names
168            self.used_names.add(value)
169            # Check for reserved words
170            ilabel = self.grammar.keywords.get(value)
171            if ilabel is not None:
172                return ilabel
173        ilabel = self.grammar.tokens.get(type)
174        if ilabel is None:
175            raise ParseError("bad token", type, value, context)
176        return ilabel
177
178    def shift(self, type, value, newstate, context):
179        """Shift a token.  (Internal)"""
180        dfa, state, node = self.stack[-1]
181        newnode = (type, value, context, None)
182        newnode = self.convert(self.grammar, newnode)
183        if newnode is not None:
184            node[-1].append(newnode)
185        self.stack[-1] = (dfa, newstate, node)
186
187    def push(self, type, newdfa, newstate, context):
188        """Push a nonterminal.  (Internal)"""
189        dfa, state, node = self.stack[-1]
190        newnode = (type, None, context, [])
191        self.stack[-1] = (dfa, newstate, node)
192        self.stack.append((newdfa, 0, newnode))
193
194    def pop(self):
195        """Pop a nonterminal.  (Internal)"""
196        popdfa, popstate, popnode = self.stack.pop()
197        newnode = self.convert(self.grammar, popnode)
198        if newnode is not None:
199            if self.stack:
200                dfa, state, node = self.stack[-1]
201                node[-1].append(newnode)
202            else:
203                self.rootnode = newnode
204                self.rootnode.used_names = self.used_names
205