1# parser for Unix yacc-based grammars
2#
3# Author: David Beazley (dave@dabeaz.com)
4# Date  : October 2, 2006
5
6import ylex
7tokens = ylex.tokens
8
9from ply import *
10
11tokenlist = []
12preclist = []
13
14emit_code = 1
15
16
17def p_yacc(p):
18    '''yacc : defsection rulesection'''
19
20
21def p_defsection(p):
22    '''defsection : definitions SECTION
23                  | SECTION'''
24    p.lexer.lastsection = 1
25    print("tokens = ", repr(tokenlist))
26    print()
27    print("precedence = ", repr(preclist))
28    print()
29    print("# -------------- RULES ----------------")
30    print()
31
32
33def p_rulesection(p):
34    '''rulesection : rules SECTION'''
35
36    print("# -------------- RULES END ----------------")
37    print_code(p[2], 0)
38
39
40def p_definitions(p):
41    '''definitions : definitions definition
42                   | definition'''
43
44
45def p_definition_literal(p):
46    '''definition : LITERAL'''
47    print_code(p[1], 0)
48
49
50def p_definition_start(p):
51    '''definition : START ID'''
52    print("start = '%s'" % p[2])
53
54
55def p_definition_token(p):
56    '''definition : toktype opttype idlist optsemi '''
57    for i in p[3]:
58        if i[0] not in "'\"":
59            tokenlist.append(i)
60    if p[1] == '%left':
61        preclist.append(('left',) + tuple(p[3]))
62    elif p[1] == '%right':
63        preclist.append(('right',) + tuple(p[3]))
64    elif p[1] == '%nonassoc':
65        preclist.append(('nonassoc',) + tuple(p[3]))
66
67
68def p_toktype(p):
69    '''toktype : TOKEN
70               | LEFT
71               | RIGHT
72               | NONASSOC'''
73    p[0] = p[1]
74
75
76def p_opttype(p):
77    '''opttype : '<' ID '>'
78               | empty'''
79
80
81def p_idlist(p):
82    '''idlist  : idlist optcomma tokenid
83               | tokenid'''
84    if len(p) == 2:
85        p[0] = [p[1]]
86    else:
87        p[0] = p[1]
88        p[1].append(p[3])
89
90
91def p_tokenid(p):
92    '''tokenid : ID
93               | ID NUMBER
94               | QLITERAL
95               | QLITERAL NUMBER'''
96    p[0] = p[1]
97
98
99def p_optsemi(p):
100    '''optsemi : ';'
101               | empty'''
102
103
104def p_optcomma(p):
105    '''optcomma : ','
106                | empty'''
107
108
109def p_definition_type(p):
110    '''definition : TYPE '<' ID '>' namelist optsemi'''
111    # type declarations are ignored
112
113
114def p_namelist(p):
115    '''namelist : namelist optcomma ID
116                | ID'''
117
118
119def p_definition_union(p):
120    '''definition : UNION CODE optsemi'''
121    # Union declarations are ignored
122
123
124def p_rules(p):
125    '''rules   : rules rule
126               | rule'''
127    if len(p) == 2:
128        rule = p[1]
129    else:
130        rule = p[2]
131
132    # Print out a Python equivalent of this rule
133
134    embedded = []      # Embedded actions (a mess)
135    embed_count = 0
136
137    rulename = rule[0]
138    rulecount = 1
139    for r in rule[1]:
140        # r contains one of the rule possibilities
141        print("def p_%s_%d(p):" % (rulename, rulecount))
142        prod = []
143        prodcode = ""
144        for i in range(len(r)):
145            item = r[i]
146            if item[0] == '{':    # A code block
147                if i == len(r) - 1:
148                    prodcode = item
149                    break
150                else:
151                    # an embedded action
152                    embed_name = "_embed%d_%s" % (embed_count, rulename)
153                    prod.append(embed_name)
154                    embedded.append((embed_name, item))
155                    embed_count += 1
156            else:
157                prod.append(item)
158        print("    '''%s : %s'''" % (rulename, " ".join(prod)))
159        # Emit code
160        print_code(prodcode, 4)
161        print()
162        rulecount += 1
163
164    for e, code in embedded:
165        print("def p_%s(p):" % e)
166        print("    '''%s : '''" % e)
167        print_code(code, 4)
168        print()
169
170
171def p_rule(p):
172    '''rule : ID ':' rulelist ';' '''
173    p[0] = (p[1], [p[3]])
174
175
176def p_rule2(p):
177    '''rule : ID ':' rulelist morerules ';' '''
178    p[4].insert(0, p[3])
179    p[0] = (p[1], p[4])
180
181
182def p_rule_empty(p):
183    '''rule : ID ':' ';' '''
184    p[0] = (p[1], [[]])
185
186
187def p_rule_empty2(p):
188    '''rule : ID ':' morerules ';' '''
189
190    p[3].insert(0, [])
191    p[0] = (p[1], p[3])
192
193
194def p_morerules(p):
195    '''morerules : morerules '|' rulelist
196                 | '|' rulelist
197                 | '|'  '''
198
199    if len(p) == 2:
200        p[0] = [[]]
201    elif len(p) == 3:
202        p[0] = [p[2]]
203    else:
204        p[0] = p[1]
205        p[0].append(p[3])
206
207#   print("morerules", len(p), p[0])
208
209
210def p_rulelist(p):
211    '''rulelist : rulelist ruleitem
212                | ruleitem'''
213
214    if len(p) == 2:
215        p[0] = [p[1]]
216    else:
217        p[0] = p[1]
218        p[1].append(p[2])
219
220
221def p_ruleitem(p):
222    '''ruleitem : ID
223                | QLITERAL
224                | CODE
225                | PREC'''
226    p[0] = p[1]
227
228
229def p_empty(p):
230    '''empty : '''
231
232
233def p_error(p):
234    pass
235
236yacc.yacc(debug=0)
237
238
239def print_code(code, indent):
240    if not emit_code:
241        return
242    codelines = code.splitlines()
243    for c in codelines:
244        print("%s# %s" % (" " * indent, c))
245