1# parser for Unix yacc-based grammars 2# 3# Author: David Beazley (dave@dabeaz.com) 4# Date : October 2, 2006 5 6import ylex 7tokens = ylex.tokens 8 9from ply import * 10 11tokenlist = [] 12preclist = [] 13 14emit_code = 1 15 16 17def p_yacc(p): 18 '''yacc : defsection rulesection''' 19 20 21def p_defsection(p): 22 '''defsection : definitions SECTION 23 | SECTION''' 24 p.lexer.lastsection = 1 25 print("tokens = ", repr(tokenlist)) 26 print() 27 print("precedence = ", repr(preclist)) 28 print() 29 print("# -------------- RULES ----------------") 30 print() 31 32 33def p_rulesection(p): 34 '''rulesection : rules SECTION''' 35 36 print("# -------------- RULES END ----------------") 37 print_code(p[2], 0) 38 39 40def p_definitions(p): 41 '''definitions : definitions definition 42 | definition''' 43 44 45def p_definition_literal(p): 46 '''definition : LITERAL''' 47 print_code(p[1], 0) 48 49 50def p_definition_start(p): 51 '''definition : START ID''' 52 print("start = '%s'" % p[2]) 53 54 55def p_definition_token(p): 56 '''definition : toktype opttype idlist optsemi ''' 57 for i in p[3]: 58 if i[0] not in "'\"": 59 tokenlist.append(i) 60 if p[1] == '%left': 61 preclist.append(('left',) + tuple(p[3])) 62 elif p[1] == '%right': 63 preclist.append(('right',) + tuple(p[3])) 64 elif p[1] == '%nonassoc': 65 preclist.append(('nonassoc',) + tuple(p[3])) 66 67 68def p_toktype(p): 69 '''toktype : TOKEN 70 | LEFT 71 | RIGHT 72 | NONASSOC''' 73 p[0] = p[1] 74 75 76def p_opttype(p): 77 '''opttype : '<' ID '>' 78 | empty''' 79 80 81def p_idlist(p): 82 '''idlist : idlist optcomma tokenid 83 | tokenid''' 84 if len(p) == 2: 85 p[0] = [p[1]] 86 else: 87 p[0] = p[1] 88 p[1].append(p[3]) 89 90 91def p_tokenid(p): 92 '''tokenid : ID 93 | ID NUMBER 94 | QLITERAL 95 | QLITERAL NUMBER''' 96 p[0] = p[1] 97 98 99def p_optsemi(p): 100 '''optsemi : ';' 101 | empty''' 102 103 104def p_optcomma(p): 105 '''optcomma : ',' 106 | empty''' 107 108 109def p_definition_type(p): 110 '''definition : TYPE '<' ID '>' namelist optsemi''' 111 # type declarations are ignored 112 113 114def p_namelist(p): 115 '''namelist : namelist optcomma ID 116 | ID''' 117 118 119def p_definition_union(p): 120 '''definition : UNION CODE optsemi''' 121 # Union declarations are ignored 122 123 124def p_rules(p): 125 '''rules : rules rule 126 | rule''' 127 if len(p) == 2: 128 rule = p[1] 129 else: 130 rule = p[2] 131 132 # Print out a Python equivalent of this rule 133 134 embedded = [] # Embedded actions (a mess) 135 embed_count = 0 136 137 rulename = rule[0] 138 rulecount = 1 139 for r in rule[1]: 140 # r contains one of the rule possibilities 141 print("def p_%s_%d(p):" % (rulename, rulecount)) 142 prod = [] 143 prodcode = "" 144 for i in range(len(r)): 145 item = r[i] 146 if item[0] == '{': # A code block 147 if i == len(r) - 1: 148 prodcode = item 149 break 150 else: 151 # an embedded action 152 embed_name = "_embed%d_%s" % (embed_count, rulename) 153 prod.append(embed_name) 154 embedded.append((embed_name, item)) 155 embed_count += 1 156 else: 157 prod.append(item) 158 print(" '''%s : %s'''" % (rulename, " ".join(prod))) 159 # Emit code 160 print_code(prodcode, 4) 161 print() 162 rulecount += 1 163 164 for e, code in embedded: 165 print("def p_%s(p):" % e) 166 print(" '''%s : '''" % e) 167 print_code(code, 4) 168 print() 169 170 171def p_rule(p): 172 '''rule : ID ':' rulelist ';' ''' 173 p[0] = (p[1], [p[3]]) 174 175 176def p_rule2(p): 177 '''rule : ID ':' rulelist morerules ';' ''' 178 p[4].insert(0, p[3]) 179 p[0] = (p[1], p[4]) 180 181 182def p_rule_empty(p): 183 '''rule : ID ':' ';' ''' 184 p[0] = (p[1], [[]]) 185 186 187def p_rule_empty2(p): 188 '''rule : ID ':' morerules ';' ''' 189 190 p[3].insert(0, []) 191 p[0] = (p[1], p[3]) 192 193 194def p_morerules(p): 195 '''morerules : morerules '|' rulelist 196 | '|' rulelist 197 | '|' ''' 198 199 if len(p) == 2: 200 p[0] = [[]] 201 elif len(p) == 3: 202 p[0] = [p[2]] 203 else: 204 p[0] = p[1] 205 p[0].append(p[3]) 206 207# print("morerules", len(p), p[0]) 208 209 210def p_rulelist(p): 211 '''rulelist : rulelist ruleitem 212 | ruleitem''' 213 214 if len(p) == 2: 215 p[0] = [p[1]] 216 else: 217 p[0] = p[1] 218 p[1].append(p[2]) 219 220 221def p_ruleitem(p): 222 '''ruleitem : ID 223 | QLITERAL 224 | CODE 225 | PREC''' 226 p[0] = p[1] 227 228 229def p_empty(p): 230 '''empty : ''' 231 232 233def p_error(p): 234 pass 235 236yacc.yacc(debug=0) 237 238 239def print_code(code, indent): 240 if not emit_code: 241 return 242 codelines = code.splitlines() 243 for c in codelines: 244 print("%s# %s" % (" " * indent, c)) 245