1# Copyright 2006 Google, Inc. All Rights Reserved. 2# Licensed to PSF under a Contributor Agreement. 3 4"""Pattern compiler. 5 6The grammar is taken from PatternGrammar.txt. 7 8The compiler compiles a pattern to a pytree.*Pattern instance. 9""" 10 11__author__ = "Guido van Rossum <guido@python.org>" 12 13# Python imports 14import StringIO 15 16# Fairly local imports 17from .pgen2 import driver, literals, token, tokenize, parse, grammar 18 19# Really local imports 20from . import pytree 21from . import pygram 22 23 24class PatternSyntaxError(Exception): 25 pass 26 27 28def tokenize_wrapper(input): 29 """Tokenizes a string suppressing significant whitespace.""" 30 skip = set((token.NEWLINE, token.INDENT, token.DEDENT)) 31 tokens = tokenize.generate_tokens(StringIO.StringIO(input).readline) 32 for quintuple in tokens: 33 type, value, start, end, line_text = quintuple 34 if type not in skip: 35 yield quintuple 36 37 38class PatternCompiler(object): 39 40 def __init__(self, grammar_file=None): 41 """Initializer. 42 43 Takes an optional alternative filename for the pattern grammar. 44 """ 45 if grammar_file is None: 46 self.grammar = pygram.pattern_grammar 47 self.syms = pygram.pattern_symbols 48 else: 49 self.grammar = driver.load_grammar(grammar_file) 50 self.syms = pygram.Symbols(self.grammar) 51 self.pygrammar = pygram.python_grammar 52 self.pysyms = pygram.python_symbols 53 self.driver = driver.Driver(self.grammar, convert=pattern_convert) 54 55 def compile_pattern(self, input, debug=False, with_tree=False): 56 """Compiles a pattern string to a nested pytree.*Pattern object.""" 57 tokens = tokenize_wrapper(input) 58 try: 59 root = self.driver.parse_tokens(tokens, debug=debug) 60 except parse.ParseError as e: 61 raise PatternSyntaxError(str(e)) 62 if with_tree: 63 return self.compile_node(root), root 64 else: 65 return self.compile_node(root) 66 67 def compile_node(self, node): 68 """Compiles a node, recursively. 69 70 This is one big switch on the node type. 71 """ 72 # XXX Optimize certain Wildcard-containing-Wildcard patterns 73 # that can be merged 74 if node.type == self.syms.Matcher: 75 node = node.children[0] # Avoid unneeded recursion 76 77 if node.type == self.syms.Alternatives: 78 # Skip the odd children since they are just '|' tokens 79 alts = [self.compile_node(ch) for ch in node.children[::2]] 80 if len(alts) == 1: 81 return alts[0] 82 p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1) 83 return p.optimize() 84 85 if node.type == self.syms.Alternative: 86 units = [self.compile_node(ch) for ch in node.children] 87 if len(units) == 1: 88 return units[0] 89 p = pytree.WildcardPattern([units], min=1, max=1) 90 return p.optimize() 91 92 if node.type == self.syms.NegatedUnit: 93 pattern = self.compile_basic(node.children[1:]) 94 p = pytree.NegatedPattern(pattern) 95 return p.optimize() 96 97 assert node.type == self.syms.Unit 98 99 name = None 100 nodes = node.children 101 if len(nodes) >= 3 and nodes[1].type == token.EQUAL: 102 name = nodes[0].value 103 nodes = nodes[2:] 104 repeat = None 105 if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater: 106 repeat = nodes[-1] 107 nodes = nodes[:-1] 108 109 # Now we've reduced it to: STRING | NAME [Details] | (...) | [...] 110 pattern = self.compile_basic(nodes, repeat) 111 112 if repeat is not None: 113 assert repeat.type == self.syms.Repeater 114 children = repeat.children 115 child = children[0] 116 if child.type == token.STAR: 117 min = 0 118 max = pytree.HUGE 119 elif child.type == token.PLUS: 120 min = 1 121 max = pytree.HUGE 122 elif child.type == token.LBRACE: 123 assert children[-1].type == token.RBRACE 124 assert len(children) in (3, 5) 125 min = max = self.get_int(children[1]) 126 if len(children) == 5: 127 max = self.get_int(children[3]) 128 else: 129 assert False 130 if min != 1 or max != 1: 131 pattern = pattern.optimize() 132 pattern = pytree.WildcardPattern([[pattern]], min=min, max=max) 133 134 if name is not None: 135 pattern.name = name 136 return pattern.optimize() 137 138 def compile_basic(self, nodes, repeat=None): 139 # Compile STRING | NAME [Details] | (...) | [...] 140 assert len(nodes) >= 1 141 node = nodes[0] 142 if node.type == token.STRING: 143 value = unicode(literals.evalString(node.value)) 144 return pytree.LeafPattern(_type_of_literal(value), value) 145 elif node.type == token.NAME: 146 value = node.value 147 if value.isupper(): 148 if value not in TOKEN_MAP: 149 raise PatternSyntaxError("Invalid token: %r" % value) 150 if nodes[1:]: 151 raise PatternSyntaxError("Can't have details for token") 152 return pytree.LeafPattern(TOKEN_MAP[value]) 153 else: 154 if value == "any": 155 type = None 156 elif not value.startswith("_"): 157 type = getattr(self.pysyms, value, None) 158 if type is None: 159 raise PatternSyntaxError("Invalid symbol: %r" % value) 160 if nodes[1:]: # Details present 161 content = [self.compile_node(nodes[1].children[1])] 162 else: 163 content = None 164 return pytree.NodePattern(type, content) 165 elif node.value == "(": 166 return self.compile_node(nodes[1]) 167 elif node.value == "[": 168 assert repeat is None 169 subpattern = self.compile_node(nodes[1]) 170 return pytree.WildcardPattern([[subpattern]], min=0, max=1) 171 assert False, node 172 173 def get_int(self, node): 174 assert node.type == token.NUMBER 175 return int(node.value) 176 177 178# Map named tokens to the type value for a LeafPattern 179TOKEN_MAP = {"NAME": token.NAME, 180 "STRING": token.STRING, 181 "NUMBER": token.NUMBER, 182 "TOKEN": None} 183 184 185def _type_of_literal(value): 186 if value[0].isalpha(): 187 return token.NAME 188 elif value in grammar.opmap: 189 return grammar.opmap[value] 190 else: 191 return None 192 193 194def pattern_convert(grammar, raw_node_info): 195 """Converts raw node information to a Node or Leaf instance.""" 196 type, value, context, children = raw_node_info 197 if children or type in grammar.number2symbol: 198 return pytree.Node(type, children, context=context) 199 else: 200 return pytree.Leaf(type, value, context=context) 201 202 203def compile_pattern(pattern): 204 return PatternCompiler().compile_pattern(pattern) 205