1 /* Grammar reduction for Bison.
2 
3    Copyright (C) 1988-1989, 2000-2003, 2005-2012 Free Software
4    Foundation, Inc.
5 
6    This file is part of Bison, the GNU Compiler Compiler.
7 
8    This program is free software: you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation, either version 3 of the License, or
11    (at your option) any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20 
21 
22 /* Reduce the grammar: Find and eliminate unreachable terminals,
23    nonterminals, and productions.  David S. Bakin.  */
24 
25 /* Don't eliminate unreachable terminals: They may be used by the
26    user's parser.  */
27 
28 #include <config.h>
29 #include "system.h"
30 
31 #include <bitset.h>
32 
33 #include "complain.h"
34 #include "files.h"
35 #include "getargs.h"
36 #include "gram.h"
37 #include "print-xml.h"
38 #include "reader.h"
39 #include "reduce.h"
40 #include "symtab.h"
41 
42 /* Set of all nonterminals which are not useless.  */
43 static bitset N;
44 
45 /* Set of all rules which have no useless nonterminals in their RHS.  */
46 static bitset P;
47 
48 /* Set of all accessible symbols.  */
49 static bitset V;
50 
51 /* Set of symbols used to define rule precedence (so they are
52    `useless', but no warning should be issued).  */
53 static bitset V1;
54 
55 static rule_number nuseful_productions;
56 rule_number nuseless_productions;
57 static int nuseful_nonterminals;
58 symbol_number nuseless_nonterminals;
59 
60 /*-------------------------------------------------------------------.
61 | Another way to do this would be with a set for each production and |
62 | then do subset tests against N0, but even for the C grammar the    |
63 | whole reducing process takes only 2 seconds on my 8Mhz AT.         |
64 `-------------------------------------------------------------------*/
65 
66 static bool
useful_production(rule_number r,bitset N0)67 useful_production (rule_number r, bitset N0)
68 {
69   item_number *rhsp;
70 
71   /* A production is useful if all of the nonterminals in its appear
72      in the set of useful nonterminals.  */
73 
74   for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
75     if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
76       return false;
77   return true;
78 }
79 
80 
81 /*---------------------------------------------------------.
82 | Remember that rules are 1-origin, symbols are 0-origin.  |
83 `---------------------------------------------------------*/
84 
85 static void
useless_nonterminals(void)86 useless_nonterminals (void)
87 {
88   bitset Np, Ns;
89   rule_number r;
90 
91   /* N is set as built.  Np is set being built this iteration. P is
92      set of all productions which have a RHS all in N.  */
93 
94   Np = bitset_create (nvars, BITSET_FIXED);
95 
96 
97   /* The set being computed is a set of nonterminals which can derive
98      the empty string or strings consisting of all terminals. At each
99      iteration a nonterminal is added to the set if there is a
100      production with that nonterminal as its LHS for which all the
101      nonterminals in its RHS are already in the set.  Iterate until
102      the set being computed remains unchanged.  Any nonterminals not
103      in the set at that point are useless in that they will never be
104      used in deriving a sentence of the language.
105 
106      This iteration doesn't use any special traversal over the
107      productions.  A set is kept of all productions for which all the
108      nonterminals in the RHS are in useful.  Only productions not in
109      this set are scanned on each iteration.  At the end, this set is
110      saved to be used when finding useful productions: only
111      productions in this set will appear in the final grammar.  */
112 
113   while (1)
114     {
115       bitset_copy (Np, N);
116       for (r = 0; r < nrules; r++)
117 	if (!bitset_test (P, r)
118 	    && useful_production (r, N))
119 	  {
120 	    bitset_set (Np, rules[r].lhs->number - ntokens);
121 	    bitset_set (P, r);
122 	  }
123       if (bitset_equal_p (N, Np))
124 	break;
125       Ns = Np;
126       Np = N;
127       N = Ns;
128     }
129   bitset_free (N);
130   N = Np;
131 }
132 
133 
134 static void
inaccessable_symbols(void)135 inaccessable_symbols (void)
136 {
137   bitset Vp, Vs, Pp;
138 
139   /* Find out which productions are reachable and which symbols are
140      used.  Starting with an empty set of productions and a set of
141      symbols which only has the start symbol in it, iterate over all
142      productions until the set of productions remains unchanged for an
143      iteration.  For each production which has a LHS in the set of
144      reachable symbols, add the production to the set of reachable
145      productions, and add all of the nonterminals in the RHS of the
146      production to the set of reachable symbols.
147 
148      Consider only the (partially) reduced grammar which has only
149      nonterminals in N and productions in P.
150 
151      The result is the set P of productions in the reduced grammar,
152      and the set V of symbols in the reduced grammar.
153 
154      Although this algorithm also computes the set of terminals which
155      are reachable, no terminal will be deleted from the grammar. Some
156      terminals might not be in the grammar but might be generated by
157      semantic routines, and so the user might want them available with
158      specified numbers.  (Is this true?)  However, the nonreachable
159      terminals are printed (if running in verbose mode) so that the
160      user can know.  */
161 
162   Vp = bitset_create (nsyms, BITSET_FIXED);
163   Pp = bitset_create (nrules, BITSET_FIXED);
164 
165   /* If the start symbol isn't useful, then nothing will be useful. */
166   if (bitset_test (N, accept->number - ntokens))
167     {
168       bitset_set (V, accept->number);
169 
170       while (1)
171 	{
172 	  rule_number r;
173 	  bitset_copy (Vp, V);
174 	  for (r = 0; r < nrules; r++)
175 	    {
176 	      if (!bitset_test (Pp, r)
177 		  && bitset_test (P, r)
178 		  && bitset_test (V, rules[r].lhs->number))
179 		{
180 		  item_number *rhsp;
181 		  for (rhsp = rules[r].rhs; *rhsp >= 0; rhsp++)
182 		    if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
183 		      bitset_set (Vp, *rhsp);
184 		  bitset_set (Pp, r);
185 		}
186 	    }
187 	  if (bitset_equal_p (V, Vp))
188 	    break;
189 	  Vs = Vp;
190 	  Vp = V;
191 	  V = Vs;
192 	}
193     }
194 
195   bitset_free (V);
196   V = Vp;
197 
198   /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */
199   bitset_set (V, endtoken->number);		/* end-of-input token */
200   bitset_set (V, errtoken->number);		/* error token */
201   bitset_set (V, undeftoken->number);		/* some undefined token */
202 
203   bitset_free (P);
204   P = Pp;
205 
206   nuseful_productions = bitset_count (P);
207   nuseless_productions = nrules - nuseful_productions;
208 
209   nuseful_nonterminals = 0;
210   {
211     symbol_number i;
212     for (i = ntokens; i < nsyms; i++)
213       if (bitset_test (V, i))
214 	nuseful_nonterminals++;
215   }
216   nuseless_nonterminals = nvars - nuseful_nonterminals;
217 
218   /* A token that was used in %prec should not be warned about.  */
219   {
220     rule_number r;
221     for (r = 0; r < nrules; ++r)
222       if (rules[r].precsym != 0)
223 	bitset_set (V1, rules[r].precsym->number);
224   }
225 }
226 
227 
228 /*-------------------------------------------------------------------.
229 | Put the useless productions at the end of RULES, and adjust NRULES |
230 | accordingly.                                                       |
231 `-------------------------------------------------------------------*/
232 
233 static void
reduce_grammar_tables(void)234 reduce_grammar_tables (void)
235 {
236   /* Report and flag useless productions.  */
237   {
238     rule_number r;
239     for (r = 0; r < nrules; r++)
240       rules[r].useful = bitset_test (P, r);
241     grammar_rules_useless_report (_("rule useless in grammar"));
242   }
243 
244   /* Map the nonterminals to their new index: useful first, useless
245      afterwards.  Kept for later report.  */
246   {
247     int useful = 0;
248     int useless = nrules - nuseless_productions;
249     rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
250     rule_number r;
251     for (r = 0; r < nrules; ++r)
252       rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
253     free (rules);
254     rules = rules_sorted;
255 
256     /* Renumber the rules markers in RITEMS.  */
257     for (r = 0; r < nrules; ++r)
258       {
259 	item_number *rhsp = rules[r].rhs;
260 	for (/* Nothing. */; *rhsp >= 0; ++rhsp)
261 	  /* Nothing. */;
262 	*rhsp = rule_number_as_item_number (r);
263 	rules[r].number = r;
264       }
265     nrules -= nuseless_productions;
266   }
267 
268   /* Adjust NRITEMS.  */
269   {
270     rule_number r;
271     int length;
272     for (r = nrules; r < nrules + nuseless_productions; ++r)
273       {
274 	length = rule_rhs_length (&rules[r]);
275 	nritems -= length + 1;
276       }
277   }
278 }
279 
280 
281 /*------------------------------.
282 | Remove useless nonterminals.  |
283 `------------------------------*/
284 
285 static void
nonterminals_reduce(void)286 nonterminals_reduce (void)
287 {
288   symbol_number i, n;
289 
290   /* Map the nonterminals to their new index: useful first, useless
291      afterwards.  Kept for later report.  */
292 
293   symbol_number *nontermmap = xnmalloc (nvars, sizeof *nontermmap);
294   n = ntokens;
295   for (i = ntokens; i < nsyms; i++)
296     if (bitset_test (V, i))
297       nontermmap[i - ntokens] = n++;
298   for (i = ntokens; i < nsyms; i++)
299     if (!bitset_test (V, i))
300       {
301 	nontermmap[i - ntokens] = n++;
302 	warn_at (symbols[i]->location, _("nonterminal useless in grammar: %s"),
303 		 symbols[i]->tag);
304       }
305 
306 
307   /* Shuffle elements of tables indexed by symbol number.  */
308   {
309     symbol **symbols_sorted = xnmalloc (nvars, sizeof *symbols_sorted);
310 
311     for (i = ntokens; i < nsyms; i++)
312       symbols[i]->number = nontermmap[i - ntokens];
313     for (i = ntokens; i < nsyms; i++)
314       symbols_sorted[nontermmap[i - ntokens] - ntokens] = symbols[i];
315     for (i = ntokens; i < nsyms; i++)
316       symbols[i] = symbols_sorted[i - ntokens];
317     free (symbols_sorted);
318   }
319 
320   {
321     rule_number r;
322     for (r = 0; r < nrules; ++r)
323       {
324 	item_number *rhsp;
325 	for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
326 	  if (ISVAR (*rhsp))
327 	    *rhsp =  symbol_number_as_item_number (nontermmap[*rhsp
328 							      - ntokens]);
329       }
330     accept->number = nontermmap[accept->number - ntokens];
331   }
332 
333   nsyms -= nuseless_nonterminals;
334   nvars -= nuseless_nonterminals;
335 
336   free (nontermmap);
337 }
338 
339 
340 /*------------------------------------------------------------------.
341 | Output the detailed results of the reductions.  For FILE.output.  |
342 `------------------------------------------------------------------*/
343 
344 void
reduce_output(FILE * out)345 reduce_output (FILE *out)
346 {
347   if (nuseless_nonterminals > 0)
348     {
349       int i;
350       fprintf (out, "%s\n\n", _("Nonterminals useless in grammar"));
351       for (i = 0; i < nuseless_nonterminals; ++i)
352 	fprintf (out, "   %s\n", symbols[nsyms + i]->tag);
353       fputs ("\n\n", out);
354     }
355 
356   {
357     bool b = false;
358     int i;
359     for (i = 0; i < ntokens; i++)
360       if (reduce_token_unused_in_grammar (i))
361 	{
362 	  if (!b)
363 	    fprintf (out, "%s\n\n", _("Terminals unused in grammar"));
364 	  b = true;
365 	  fprintf (out, "   %s\n", symbols[i]->tag);
366 	}
367     if (b)
368       fputs ("\n\n", out);
369   }
370 
371   if (nuseless_productions > 0)
372     grammar_rules_partial_print (out, _("Rules useless in grammar"),
373 				 rule_useless_in_grammar_p);
374 }
375 
376 
377 /*-------------------------------.
378 | Report the results to STDERR.  |
379 `-------------------------------*/
380 
381 static void
reduce_print(void)382 reduce_print (void)
383 {
384   if (nuseless_nonterminals > 0)
385     warn (ngettext ("%d nonterminal useless in grammar",
386                     "%d nonterminals useless in grammar",
387                     nuseless_nonterminals),
388           nuseless_nonterminals);
389   if (nuseless_productions > 0)
390     warn (ngettext ("%d rule useless in grammar",
391                     "%d rules useless in grammar",
392                     nuseless_productions),
393           nuseless_productions);
394 }
395 
396 void
reduce_grammar(void)397 reduce_grammar (void)
398 {
399   bool reduced;
400 
401   /* Allocate the global sets used to compute the reduced grammar */
402 
403   N = bitset_create (nvars, BITSET_FIXED);
404   P =  bitset_create (nrules, BITSET_FIXED);
405   V = bitset_create (nsyms, BITSET_FIXED);
406   V1 = bitset_create (nsyms, BITSET_FIXED);
407 
408   useless_nonterminals ();
409   inaccessable_symbols ();
410 
411   reduced = (nuseless_nonterminals + nuseless_productions > 0);
412   if (!reduced)
413     return;
414 
415   reduce_print ();
416 
417   if (!bitset_test (N, accept->number - ntokens))
418     fatal_at (startsymbol_location,
419 	      _("start symbol %s does not derive any sentence"),
420 	      startsymbol->tag);
421 
422   /* First reduce the nonterminals, as they renumber themselves in the
423      whole grammar.  If you change the order, nonterms would be
424      renumbered only in the reduced grammar.  */
425   if (nuseless_nonterminals > 0)
426     nonterminals_reduce ();
427   if (nuseless_productions > 0)
428     reduce_grammar_tables ();
429 
430   if (trace_flag & trace_grammar)
431     {
432       grammar_dump (stderr, "Reduced Grammar");
433 
434       fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
435 , and %d productions.\n",
436 	       grammar_file, ntokens, nvars, nrules);
437     }
438 }
439 
440 bool
reduce_token_unused_in_grammar(symbol_number i)441 reduce_token_unused_in_grammar (symbol_number i)
442 {
443   aver (i < ntokens);
444   return !bitset_test (V, i) && !bitset_test (V1, i);
445 }
446 
447 bool
reduce_nonterminal_useless_in_grammar(symbol_number i)448 reduce_nonterminal_useless_in_grammar (symbol_number i)
449 {
450   aver (ntokens <= i && i < nsyms + nuseless_nonterminals);
451   return nsyms <= i;
452 }
453 
454 /*-----------------------------------------------------------.
455 | Free the global sets used to compute the reduced grammar.  |
456 `-----------------------------------------------------------*/
457 
458 void
reduce_free(void)459 reduce_free (void)
460 {
461   bitset_free (N);
462   bitset_free (V);
463   bitset_free (V1);
464   bitset_free (P);
465 }
466