1 /* Compute lookahead criteria for Bison.
2 
3    Copyright (C) 1984, 1986, 1989, 2000-2012 Free Software Foundation,
4    Inc.
5 
6    This file is part of Bison, the GNU Compiler Compiler.
7 
8    This program is free software: you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation, either version 3 of the License, or
11    (at your option) any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20 
21 
22 /* Find which rules need lookahead in each state, and which lookahead
23    tokens they accept.  */
24 
25 #include <config.h>
26 #include "system.h"
27 
28 #include <bitset.h>
29 #include <bitsetv.h>
30 
31 #include "LR0.h"
32 #include "complain.h"
33 #include "derives.h"
34 #include "getargs.h"
35 #include "gram.h"
36 #include "lalr.h"
37 #include "muscle-tab.h"
38 #include "nullable.h"
39 #include "reader.h"
40 #include "relation.h"
41 #include "symtab.h"
42 
43 goto_number *goto_map;
44 goto_number ngotos;
45 state_number *from_state;
46 state_number *to_state;
47 bitsetv goto_follows = NULL;
48 
49 /* Linked list of goto numbers.  */
50 typedef struct goto_list
51 {
52   struct goto_list *next;
53   goto_number value;
54 } goto_list;
55 
56 
57 /* LA is an NLA by NTOKENS matrix of bits.  LA[l, i] is 1 if the rule
58    LArule[l] is applicable in the appropriate state when the next
59    token is symbol i.  If LA[l, i] and LA[l, j] are both 1 for i != j,
60    it is a conflict.  */
61 
62 static bitsetv LA = NULL;
63 size_t nLA;
64 
65 
66 static goto_number **includes;
67 static goto_list **lookback;
68 
69 
70 
71 
72 void
set_goto_map(void)73 set_goto_map (void)
74 {
75   state_number s;
76   goto_number *temp_map;
77 
78   goto_map = xcalloc (nvars + 1, sizeof *goto_map);
79   temp_map = xnmalloc (nvars + 1, sizeof *temp_map);
80 
81   ngotos = 0;
82   for (s = 0; s < nstates; ++s)
83     {
84       transitions *sp = states[s]->transitions;
85       int i;
86       for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i)
87 	{
88 	  ngotos++;
89 
90 	  /* Abort if (ngotos + 1) would overflow.  */
91 	  aver (ngotos != GOTO_NUMBER_MAXIMUM);
92 
93 	  goto_map[TRANSITION_SYMBOL (sp, i) - ntokens]++;
94 	}
95     }
96 
97   {
98     goto_number k = 0;
99     int i;
100     for (i = ntokens; i < nsyms; i++)
101       {
102 	temp_map[i - ntokens] = k;
103 	k += goto_map[i - ntokens];
104       }
105 
106     for (i = ntokens; i < nsyms; i++)
107       goto_map[i - ntokens] = temp_map[i - ntokens];
108 
109     goto_map[nsyms - ntokens] = ngotos;
110     temp_map[nsyms - ntokens] = ngotos;
111   }
112 
113   from_state = xcalloc (ngotos, sizeof *from_state);
114   to_state = xcalloc (ngotos, sizeof *to_state);
115 
116   for (s = 0; s < nstates; ++s)
117     {
118       transitions *sp = states[s]->transitions;
119       int i;
120       for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i)
121 	{
122 	  goto_number k = temp_map[TRANSITION_SYMBOL (sp, i) - ntokens]++;
123 	  from_state[k] = s;
124 	  to_state[k] = sp->states[i]->number;
125 	}
126     }
127 
128   free (temp_map);
129 }
130 
131 
132 goto_number
map_goto(state_number s0,symbol_number sym)133 map_goto (state_number s0, symbol_number sym)
134 {
135   goto_number high;
136   goto_number low;
137   goto_number middle;
138   state_number s;
139 
140   low = goto_map[sym - ntokens];
141   high = goto_map[sym - ntokens + 1] - 1;
142 
143   for (;;)
144     {
145       aver (low <= high);
146       middle = (low + high) / 2;
147       s = from_state[middle];
148       if (s == s0)
149 	return middle;
150       else if (s < s0)
151 	low = middle + 1;
152       else
153 	high = middle - 1;
154     }
155 }
156 
157 
158 static void
initialize_F(void)159 initialize_F (void)
160 {
161   goto_number **reads = xnmalloc (ngotos, sizeof *reads);
162   goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge);
163   goto_number nedges = 0;
164 
165   goto_number i;
166 
167   goto_follows = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
168 
169   for (i = 0; i < ngotos; i++)
170     {
171       state_number stateno = to_state[i];
172       transitions *sp = states[stateno]->transitions;
173 
174       int j;
175       FOR_EACH_SHIFT (sp, j)
176 	bitset_set (goto_follows[i], TRANSITION_SYMBOL (sp, j));
177 
178       for (; j < sp->num; j++)
179 	{
180 	  symbol_number sym = TRANSITION_SYMBOL (sp, j);
181 	  if (nullable[sym - ntokens])
182 	    edge[nedges++] = map_goto (stateno, sym);
183 	}
184 
185       if (nedges == 0)
186 	reads[i] = NULL;
187       else
188 	{
189 	  reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]);
190 	  memcpy (reads[i], edge, nedges * sizeof edge[0]);
191 	  reads[i][nedges] = END_NODE;
192 	  nedges = 0;
193 	}
194     }
195 
196   relation_digraph (reads, ngotos, &goto_follows);
197 
198   for (i = 0; i < ngotos; i++)
199     free (reads[i]);
200 
201   free (reads);
202   free (edge);
203 }
204 
205 
206 static void
add_lookback_edge(state * s,rule * r,goto_number gotono)207 add_lookback_edge (state *s, rule *r, goto_number gotono)
208 {
209   int ri = state_reduction_find (s, r);
210   goto_list *sp = xmalloc (sizeof *sp);
211   sp->next = lookback[(s->reductions->lookahead_tokens - LA) + ri];
212   sp->value = gotono;
213   lookback[(s->reductions->lookahead_tokens - LA) + ri] = sp;
214 }
215 
216 
217 
218 static void
build_relations(void)219 build_relations (void)
220 {
221   goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge);
222   state_number *states1 = xnmalloc (ritem_longest_rhs () + 1, sizeof *states1);
223   goto_number i;
224 
225   includes = xnmalloc (ngotos, sizeof *includes);
226 
227   for (i = 0; i < ngotos; i++)
228     {
229       int nedges = 0;
230       symbol_number symbol1 = states[to_state[i]]->accessing_symbol;
231       rule **rulep;
232 
233       for (rulep = derives[symbol1 - ntokens]; *rulep; rulep++)
234 	{
235 	  bool done;
236 	  int length = 1;
237 	  item_number const *rp;
238 	  state *s = states[from_state[i]];
239 	  states1[0] = s->number;
240 
241 	  for (rp = (*rulep)->rhs; ! item_number_is_rule_number (*rp); rp++)
242 	    {
243 	      s = transitions_to (s->transitions,
244 				  item_number_as_symbol_number (*rp));
245 	      states1[length++] = s->number;
246 	    }
247 
248 	  if (!s->consistent)
249 	    add_lookback_edge (s, *rulep, i);
250 
251 	  length--;
252 	  done = false;
253 	  while (!done)
254 	    {
255 	      done = true;
256 	      /* Each rhs ends in a rule number, and there is a
257 		 sentinel (ritem[-1]=0) before the first rhs, so it is safe to
258 		 decrement RP here.  */
259 	      rp--;
260 	      if (ISVAR (*rp))
261 		{
262 		  /* Downcasting from item_number to symbol_number.  */
263 		  edge[nedges++] = map_goto (states1[--length],
264 					     item_number_as_symbol_number (*rp));
265 		  if (nullable[*rp - ntokens])
266 		    done = false;
267 		}
268 	    }
269 	}
270 
271       if (nedges == 0)
272 	includes[i] = NULL;
273       else
274 	{
275 	  int j;
276 	  includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]);
277 	  for (j = 0; j < nedges; j++)
278 	    includes[i][j] = edge[j];
279 	  includes[i][nedges] = END_NODE;
280 	}
281     }
282 
283   free (edge);
284   free (states1);
285 
286   relation_transpose (&includes, ngotos);
287 }
288 
289 
290 
291 static void
compute_FOLLOWS(void)292 compute_FOLLOWS (void)
293 {
294   goto_number i;
295 
296   relation_digraph (includes, ngotos, &goto_follows);
297 
298   for (i = 0; i < ngotos; i++)
299     free (includes[i]);
300 
301   free (includes);
302 }
303 
304 
305 static void
compute_lookahead_tokens(void)306 compute_lookahead_tokens (void)
307 {
308   size_t i;
309   goto_list *sp;
310 
311   for (i = 0; i < nLA; i++)
312     for (sp = lookback[i]; sp; sp = sp->next)
313       bitset_or (LA[i], LA[i], goto_follows[sp->value]);
314 
315   /* Free LOOKBACK. */
316   for (i = 0; i < nLA; i++)
317     LIST_FREE (goto_list, lookback[i]);
318 
319   free (lookback);
320 }
321 
322 
323 /*----------------------------------------------------.
324 | Count the number of lookahead tokens required for S |
325 | (N_LOOKAHEAD_TOKENS member).                        |
326 `----------------------------------------------------*/
327 
328 static int
state_lookahead_tokens_count(state * s,bool default_reduction_only_for_accept)329 state_lookahead_tokens_count (state *s, bool default_reduction_only_for_accept)
330 {
331   int n_lookahead_tokens = 0;
332   reductions *rp = s->reductions;
333   transitions *sp = s->transitions;
334 
335   /* Transitions are only disabled during conflict resolution, and that
336      hasn't happened yet, so there should be no need to check that
337      transition 0 hasn't been disabled before checking if it is a shift.
338      However, this check was performed at one time, so we leave it as an
339      aver.  */
340   aver (sp->num == 0 || !TRANSITION_IS_DISABLED (sp, 0));
341 
342   /* We need a lookahead either to distinguish different reductions
343      (i.e., there are two or more), or to distinguish a reduction from a
344      shift.  Otherwise, it is straightforward, and the state is
345      `consistent'.  However, do not treat a state with any reductions as
346      consistent unless it is the accepting state (because there is never
347      a lookahead token that makes sense there, and so no lookahead token
348      should be read) if the user has otherwise disabled default
349      reductions.  */
350   if (rp->num > 1
351       || (rp->num == 1 && sp->num && TRANSITION_IS_SHIFT (sp, 0))
352       || (rp->num == 1 && rp->rules[0]->number != 0
353           && default_reduction_only_for_accept))
354     n_lookahead_tokens += rp->num;
355   else
356     s->consistent = 1;
357 
358   return n_lookahead_tokens;
359 }
360 
361 
362 /*----------------------------------------------------.
363 | Compute LA, NLA, and the lookahead_tokens members.  |
364 `----------------------------------------------------*/
365 
366 void
initialize_LA(void)367 initialize_LA (void)
368 {
369   state_number i;
370   bitsetv pLA;
371   bool default_reduction_only_for_accept;
372   {
373     char *default_reductions =
374       muscle_percent_define_get ("lr.default-reductions");
375     default_reduction_only_for_accept =
376       0 == strcmp (default_reductions, "accepting");
377     free (default_reductions);
378   }
379 
380   /* Compute the total number of reductions requiring a lookahead.  */
381   nLA = 0;
382   for (i = 0; i < nstates; i++)
383     nLA +=
384       state_lookahead_tokens_count (states[i],
385                                     default_reduction_only_for_accept);
386   /* Avoid having to special case 0.  */
387   if (!nLA)
388     nLA = 1;
389 
390   pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
391 
392   /* Initialize the members LOOKAHEAD_TOKENS for each state whose reductions
393      require lookahead tokens.  */
394   for (i = 0; i < nstates; i++)
395     {
396       int count =
397         state_lookahead_tokens_count (states[i],
398                                       default_reduction_only_for_accept);
399       if (count)
400 	{
401 	  states[i]->reductions->lookahead_tokens = pLA;
402 	  pLA += count;
403 	}
404     }
405 }
406 
407 
408 /*---------------------------------------------.
409 | Output the lookahead tokens for each state.  |
410 `---------------------------------------------*/
411 
412 static void
lookahead_tokens_print(FILE * out)413 lookahead_tokens_print (FILE *out)
414 {
415   state_number i;
416   int j, k;
417   fprintf (out, "Lookahead tokens: BEGIN\n");
418   for (i = 0; i < nstates; ++i)
419     {
420       reductions *reds = states[i]->reductions;
421       bitset_iterator iter;
422       int n_lookahead_tokens = 0;
423 
424       if (reds->lookahead_tokens)
425 	for (k = 0; k < reds->num; ++k)
426 	  if (reds->lookahead_tokens[k])
427 	    ++n_lookahead_tokens;
428 
429       fprintf (out, "State %d: %d lookahead tokens\n",
430 	       i, n_lookahead_tokens);
431 
432       if (reds->lookahead_tokens)
433 	for (j = 0; j < reds->num; ++j)
434 	  BITSET_FOR_EACH (iter, reds->lookahead_tokens[j], k, 0)
435 	  {
436 	    fprintf (out, "   on %d (%s) -> rule %d\n",
437 		     k, symbols[k]->tag,
438 		     reds->rules[j]->number);
439 	  };
440     }
441   fprintf (out, "Lookahead tokens: END\n");
442 }
443 
444 void
lalr(void)445 lalr (void)
446 {
447   initialize_LA ();
448   set_goto_map ();
449   initialize_F ();
450   lookback = xcalloc (nLA, sizeof *lookback);
451   build_relations ();
452   compute_FOLLOWS ();
453   compute_lookahead_tokens ();
454 
455   if (trace_flag & trace_sets)
456     lookahead_tokens_print (stderr);
457 }
458 
459 
460 void
lalr_update_state_numbers(state_number old_to_new[],state_number nstates_old)461 lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old)
462 {
463   goto_number ngotos_reachable = 0;
464   symbol_number nonterminal = 0;
465   aver (nsyms == nvars + ntokens);
466   {
467     goto_number i;
468     for (i = 0; i < ngotos; ++i)
469       {
470         while (i == goto_map[nonterminal])
471           goto_map[nonterminal++] = ngotos_reachable;
472         /* If old_to_new[from_state[i]] = nstates_old, remove this goto
473            entry.  */
474         if (old_to_new[from_state[i]] != nstates_old)
475           {
476             /* from_state[i] is not removed, so it and thus to_state[i] are
477                reachable, so to_state[i] != nstates_old.  */
478             aver (old_to_new[to_state[i]] != nstates_old);
479             from_state[ngotos_reachable] = old_to_new[from_state[i]];
480             to_state[ngotos_reachable] = old_to_new[to_state[i]];
481             ++ngotos_reachable;
482           }
483       }
484   }
485   while (nonterminal <= nvars)
486     {
487       aver (ngotos == goto_map[nonterminal]);
488       goto_map[nonterminal++] = ngotos_reachable;
489     }
490   ngotos = ngotos_reachable;
491 }
492 
493 
494 void
lalr_free(void)495 lalr_free (void)
496 {
497   state_number s;
498   for (s = 0; s < nstates; ++s)
499     states[s]->reductions->lookahead_tokens = NULL;
500   bitsetv_free (LA);
501 }
502