1# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""Pattern compiler.
5
6The grammar is taken from PatternGrammar.txt.
7
8The compiler compiles a pattern to a pytree.*Pattern instance.
9"""
10
11__author__ = "Guido van Rossum <guido@python.org>"
12
13# Python imports
14import StringIO
15
16# Fairly local imports
17from .pgen2 import driver, literals, token, tokenize, parse, grammar
18
19# Really local imports
20from . import pytree
21from . import pygram
22
23
24class PatternSyntaxError(Exception):
25    pass
26
27
28def tokenize_wrapper(input):
29    """Tokenizes a string suppressing significant whitespace."""
30    skip = set((token.NEWLINE, token.INDENT, token.DEDENT))
31    tokens = tokenize.generate_tokens(StringIO.StringIO(input).readline)
32    for quintuple in tokens:
33        type, value, start, end, line_text = quintuple
34        if type not in skip:
35            yield quintuple
36
37
38class PatternCompiler(object):
39
40    def __init__(self, grammar_file=None):
41        """Initializer.
42
43        Takes an optional alternative filename for the pattern grammar.
44        """
45        if grammar_file is None:
46            self.grammar = pygram.pattern_grammar
47            self.syms = pygram.pattern_symbols
48        else:
49            self.grammar = driver.load_grammar(grammar_file)
50            self.syms = pygram.Symbols(self.grammar)
51        self.pygrammar = pygram.python_grammar
52        self.pysyms = pygram.python_symbols
53        self.driver = driver.Driver(self.grammar, convert=pattern_convert)
54
55    def compile_pattern(self, input, debug=False, with_tree=False):
56        """Compiles a pattern string to a nested pytree.*Pattern object."""
57        tokens = tokenize_wrapper(input)
58        try:
59            root = self.driver.parse_tokens(tokens, debug=debug)
60        except parse.ParseError as e:
61            raise PatternSyntaxError(str(e))
62        if with_tree:
63            return self.compile_node(root), root
64        else:
65            return self.compile_node(root)
66
67    def compile_node(self, node):
68        """Compiles a node, recursively.
69
70        This is one big switch on the node type.
71        """
72        # XXX Optimize certain Wildcard-containing-Wildcard patterns
73        # that can be merged
74        if node.type == self.syms.Matcher:
75            node = node.children[0] # Avoid unneeded recursion
76
77        if node.type == self.syms.Alternatives:
78            # Skip the odd children since they are just '|' tokens
79            alts = [self.compile_node(ch) for ch in node.children[::2]]
80            if len(alts) == 1:
81                return alts[0]
82            p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
83            return p.optimize()
84
85        if node.type == self.syms.Alternative:
86            units = [self.compile_node(ch) for ch in node.children]
87            if len(units) == 1:
88                return units[0]
89            p = pytree.WildcardPattern([units], min=1, max=1)
90            return p.optimize()
91
92        if node.type == self.syms.NegatedUnit:
93            pattern = self.compile_basic(node.children[1:])
94            p = pytree.NegatedPattern(pattern)
95            return p.optimize()
96
97        assert node.type == self.syms.Unit
98
99        name = None
100        nodes = node.children
101        if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
102            name = nodes[0].value
103            nodes = nodes[2:]
104        repeat = None
105        if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
106            repeat = nodes[-1]
107            nodes = nodes[:-1]
108
109        # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
110        pattern = self.compile_basic(nodes, repeat)
111
112        if repeat is not None:
113            assert repeat.type == self.syms.Repeater
114            children = repeat.children
115            child = children[0]
116            if child.type == token.STAR:
117                min = 0
118                max = pytree.HUGE
119            elif child.type == token.PLUS:
120                min = 1
121                max = pytree.HUGE
122            elif child.type == token.LBRACE:
123                assert children[-1].type == token.RBRACE
124                assert  len(children) in (3, 5)
125                min = max = self.get_int(children[1])
126                if len(children) == 5:
127                    max = self.get_int(children[3])
128            else:
129                assert False
130            if min != 1 or max != 1:
131                pattern = pattern.optimize()
132                pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
133
134        if name is not None:
135            pattern.name = name
136        return pattern.optimize()
137
138    def compile_basic(self, nodes, repeat=None):
139        # Compile STRING | NAME [Details] | (...) | [...]
140        assert len(nodes) >= 1
141        node = nodes[0]
142        if node.type == token.STRING:
143            value = unicode(literals.evalString(node.value))
144            return pytree.LeafPattern(_type_of_literal(value), value)
145        elif node.type == token.NAME:
146            value = node.value
147            if value.isupper():
148                if value not in TOKEN_MAP:
149                    raise PatternSyntaxError("Invalid token: %r" % value)
150                if nodes[1:]:
151                    raise PatternSyntaxError("Can't have details for token")
152                return pytree.LeafPattern(TOKEN_MAP[value])
153            else:
154                if value == "any":
155                    type = None
156                elif not value.startswith("_"):
157                    type = getattr(self.pysyms, value, None)
158                    if type is None:
159                        raise PatternSyntaxError("Invalid symbol: %r" % value)
160                if nodes[1:]: # Details present
161                    content = [self.compile_node(nodes[1].children[1])]
162                else:
163                    content = None
164                return pytree.NodePattern(type, content)
165        elif node.value == "(":
166            return self.compile_node(nodes[1])
167        elif node.value == "[":
168            assert repeat is None
169            subpattern = self.compile_node(nodes[1])
170            return pytree.WildcardPattern([[subpattern]], min=0, max=1)
171        assert False, node
172
173    def get_int(self, node):
174        assert node.type == token.NUMBER
175        return int(node.value)
176
177
178# Map named tokens to the type value for a LeafPattern
179TOKEN_MAP = {"NAME": token.NAME,
180             "STRING": token.STRING,
181             "NUMBER": token.NUMBER,
182             "TOKEN": None}
183
184
185def _type_of_literal(value):
186    if value[0].isalpha():
187        return token.NAME
188    elif value in grammar.opmap:
189        return grammar.opmap[value]
190    else:
191        return None
192
193
194def pattern_convert(grammar, raw_node_info):
195    """Converts raw node information to a Node or Leaf instance."""
196    type, value, context, children = raw_node_info
197    if children or type in grammar.number2symbol:
198        return pytree.Node(type, children, context=context)
199    else:
200        return pytree.Leaf(type, value, context=context)
201
202
203def compile_pattern(pattern):
204    return PatternCompiler().compile_pattern(pattern)
205