1
2 /* Grammar implementation */
3
4 #include "Python.h"
5 #include "pgenheaders.h"
6
7 #include <ctype.h>
8
9 #include "token.h"
10 #include "grammar.h"
11
12 extern int Py_DebugFlag;
13
14 grammar *
newgrammar(int start)15 newgrammar(int start)
16 {
17 grammar *g;
18
19 g = (grammar *)PyObject_MALLOC(sizeof(grammar));
20 if (g == NULL)
21 Py_FatalError("no mem for new grammar");
22 g->g_ndfas = 0;
23 g->g_dfa = NULL;
24 g->g_start = start;
25 g->g_ll.ll_nlabels = 0;
26 g->g_ll.ll_label = NULL;
27 g->g_accel = 0;
28 return g;
29 }
30
31 void
freegrammar(grammar * g)32 freegrammar(grammar *g)
33 {
34 int i;
35 for (i = 0; i < g->g_ndfas; i++) {
36 free(g->g_dfa[i].d_name);
37 for (int j = 0; j < g->g_dfa[i].d_nstates; j++)
38 PyObject_FREE(g->g_dfa[i].d_state[j].s_arc);
39 PyObject_FREE(g->g_dfa[i].d_state);
40 }
41 PyObject_FREE(g->g_dfa);
42 for (i = 0; i < g->g_ll.ll_nlabels; i++)
43 free(g->g_ll.ll_label[i].lb_str);
44 PyObject_FREE(g->g_ll.ll_label);
45 PyObject_FREE(g);
46 }
47
48 dfa *
adddfa(grammar * g,int type,const char * name)49 adddfa(grammar *g, int type, const char *name)
50 {
51 dfa *d;
52
53 g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa,
54 sizeof(dfa) * (g->g_ndfas + 1));
55 if (g->g_dfa == NULL)
56 Py_FatalError("no mem to resize dfa in adddfa");
57 d = &g->g_dfa[g->g_ndfas++];
58 d->d_type = type;
59 d->d_name = strdup(name);
60 d->d_nstates = 0;
61 d->d_state = NULL;
62 d->d_initial = -1;
63 d->d_first = NULL;
64 return d; /* Only use while fresh! */
65 }
66
67 int
addstate(dfa * d)68 addstate(dfa *d)
69 {
70 state *s;
71
72 d->d_state = (state *)PyObject_REALLOC(d->d_state,
73 sizeof(state) * (d->d_nstates + 1));
74 if (d->d_state == NULL)
75 Py_FatalError("no mem to resize state in addstate");
76 s = &d->d_state[d->d_nstates++];
77 s->s_narcs = 0;
78 s->s_arc = NULL;
79 s->s_lower = 0;
80 s->s_upper = 0;
81 s->s_accel = NULL;
82 s->s_accept = 0;
83 return Py_SAFE_DOWNCAST(s - d->d_state, intptr_t, int);
84 }
85
86 void
addarc(dfa * d,int from,int to,int lbl)87 addarc(dfa *d, int from, int to, int lbl)
88 {
89 state *s;
90 arc *a;
91
92 assert(0 <= from && from < d->d_nstates);
93 assert(0 <= to && to < d->d_nstates);
94
95 s = &d->d_state[from];
96 s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1));
97 if (s->s_arc == NULL)
98 Py_FatalError("no mem to resize arc list in addarc");
99 a = &s->s_arc[s->s_narcs++];
100 a->a_lbl = lbl;
101 a->a_arrow = to;
102 }
103
104 int
addlabel(labellist * ll,int type,const char * str)105 addlabel(labellist *ll, int type, const char *str)
106 {
107 int i;
108 label *lb;
109
110 for (i = 0; i < ll->ll_nlabels; i++) {
111 if (ll->ll_label[i].lb_type == type &&
112 strcmp(ll->ll_label[i].lb_str, str) == 0)
113 return i;
114 }
115 ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label,
116 sizeof(label) * (ll->ll_nlabels + 1));
117 if (ll->ll_label == NULL)
118 Py_FatalError("no mem to resize labellist in addlabel");
119 lb = &ll->ll_label[ll->ll_nlabels++];
120 lb->lb_type = type;
121 lb->lb_str = strdup(str);
122 if (Py_DebugFlag)
123 printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels,
124 PyGrammar_LabelRepr(lb));
125 return Py_SAFE_DOWNCAST(lb - ll->ll_label, intptr_t, int);
126 }
127
128 /* Same, but rather dies than adds */
129
130 int
findlabel(labellist * ll,int type,const char * str)131 findlabel(labellist *ll, int type, const char *str)
132 {
133 int i;
134
135 for (i = 0; i < ll->ll_nlabels; i++) {
136 if (ll->ll_label[i].lb_type == type /*&&
137 strcmp(ll->ll_label[i].lb_str, str) == 0*/)
138 return i;
139 }
140 fprintf(stderr, "Label %d/'%s' not found\n", type, str);
141 Py_FatalError("grammar.c:findlabel()");
142
143 /* Py_FatalError() is declared with __attribute__((__noreturn__)).
144 GCC emits a warning without "return 0;" (compiler bug!), but Clang is
145 smarter and emits a warning on the return... */
146 #ifndef __clang__
147 return 0; /* Make gcc -Wall happy */
148 #endif
149 }
150
151 /* Forward */
152 static void translabel(grammar *, label *);
153
154 void
translatelabels(grammar * g)155 translatelabels(grammar *g)
156 {
157 int i;
158
159 #ifdef Py_DEBUG
160 printf("Translating labels ...\n");
161 #endif
162 /* Don't translate EMPTY */
163 for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++)
164 translabel(g, &g->g_ll.ll_label[i]);
165 }
166
167 static void
translabel(grammar * g,label * lb)168 translabel(grammar *g, label *lb)
169 {
170 int i;
171
172 if (Py_DebugFlag)
173 printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb));
174
175 if (lb->lb_type == NAME) {
176 for (i = 0; i < g->g_ndfas; i++) {
177 if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {
178 if (Py_DebugFlag)
179 printf(
180 "Label %s is non-terminal %d.\n",
181 lb->lb_str,
182 g->g_dfa[i].d_type);
183 lb->lb_type = g->g_dfa[i].d_type;
184 free(lb->lb_str);
185 lb->lb_str = NULL;
186 return;
187 }
188 }
189 for (i = 0; i < (int)N_TOKENS; i++) {
190 if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) {
191 if (Py_DebugFlag)
192 printf("Label %s is terminal %d.\n",
193 lb->lb_str, i);
194 lb->lb_type = i;
195 free(lb->lb_str);
196 lb->lb_str = NULL;
197 return;
198 }
199 }
200 printf("Can't translate NAME label '%s'\n", lb->lb_str);
201 return;
202 }
203
204 if (lb->lb_type == STRING) {
205 if (isalpha(Py_CHARMASK(lb->lb_str[1])) ||
206 lb->lb_str[1] == '_') {
207 char *p;
208 char *src;
209 char *dest;
210 size_t name_len;
211 if (Py_DebugFlag)
212 printf("Label %s is a keyword\n", lb->lb_str);
213 lb->lb_type = NAME;
214 src = lb->lb_str + 1;
215 p = strchr(src, '\'');
216 if (p)
217 name_len = p - src;
218 else
219 name_len = strlen(src);
220 dest = (char *)malloc(name_len + 1);
221 if (!dest) {
222 printf("Can't alloc dest '%s'\n", src);
223 return;
224 }
225 strncpy(dest, src, name_len);
226 dest[name_len] = '\0';
227 free(lb->lb_str);
228 lb->lb_str = dest;
229 }
230 else if (lb->lb_str[2] == lb->lb_str[0]) {
231 int type = (int) PyToken_OneChar(lb->lb_str[1]);
232 if (type != OP) {
233 lb->lb_type = type;
234 free(lb->lb_str);
235 lb->lb_str = NULL;
236 }
237 else
238 printf("Unknown OP label %s\n",
239 lb->lb_str);
240 }
241 else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) {
242 int type = (int) PyToken_TwoChars(lb->lb_str[1],
243 lb->lb_str[2]);
244 if (type != OP) {
245 lb->lb_type = type;
246 free(lb->lb_str);
247 lb->lb_str = NULL;
248 }
249 else
250 printf("Unknown OP label %s\n",
251 lb->lb_str);
252 }
253 else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) {
254 int type = (int) PyToken_ThreeChars(lb->lb_str[1],
255 lb->lb_str[2],
256 lb->lb_str[3]);
257 if (type != OP) {
258 lb->lb_type = type;
259 free(lb->lb_str);
260 lb->lb_str = NULL;
261 }
262 else
263 printf("Unknown OP label %s\n",
264 lb->lb_str);
265 }
266 else
267 printf("Can't translate STRING label %s\n",
268 lb->lb_str);
269 }
270 else
271 printf("Can't translate label '%s'\n",
272 PyGrammar_LabelRepr(lb));
273 }
274