• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /** A generic recognizer that can handle recognizers generated from
2  *  lexer, parser, and tree grammars.  This is all the parsing
3  *  support code essentially; most of it is error recovery stuff and
4  *  backtracking.
5  *
6  *  <p>This class should not be instantiated directly.  Instead, use one of its
7  *  subclasses.</p>
8  *
9  *  @class
10  *  @param {org.antlr.runtime.RecognizerSharedState} [state] state object with
11  *      which to initialize this recognizer.
12  */
13 org.antlr.runtime.BaseRecognizer = function(state) {
14     /** State of a lexer, parser, or tree parser are collected into a state
15      *  object so the state can be shared.  This sharing is needed to
16      *  have one grammar import others and share same error variables
17      *  and other state variables.  It's a kind of explicit multiple
18      *  inheritance via delegation of methods and shared state.
19      *  @type org.antlr.runtime.RecognizerSharedState
20      */
21     this.state = state || new org.antlr.runtime.RecognizerSharedState();
22 };
23 
24 org.antlr.lang.augmentObject(org.antlr.runtime.BaseRecognizer, {
25     /**
26      * @memberOf org.antlr.runtime.BaseRecognizer
27      * @type Number
28      */
29     MEMO_RULE_FAILED: -2,
30 
31     /**
32      * @memberOf org.antlr.runtime.BaseRecognizer
33      * @type Number
34      */
35     MEMO_RULE_UNKNOWN: -1,
36 
37     /**
38      * @memberOf org.antlr.runtime.BaseRecognizer
39      * @type Number
40      */
41     INITIAL_FOLLOW_STACK_SIZE: 100,
42 
43     /**
44      * @memberOf org.antlr.runtime.BaseRecognizer
45      * @type Number
46      */
47     MEMO_RULE_FAILED_I: -2,
48 
49     /**
50      * @memberOf org.antlr.runtime.BaseRecognizer
51      * @type Number
52      */
53     DEFAULT_TOKEN_CHANNEL: org.antlr.runtime.Token.DEFAULT_CHANNEL,
54 
55     /**
56      * @memberOf org.antlr.runtime.BaseRecognizer
57      * @type Number
58      */
59     HIDDEN: org.antlr.runtime.Token.HIDDEN_CHANNEL,
60 
61     /**
62      * @memberOf org.antlr.runtime.BaseRecognizer
63      * @type String
64      */
65     NEXT_TOKEN_RULE_NAME: "nextToken"
66 });
67 
68 org.antlr.runtime.BaseRecognizer.prototype = {
69     /** Reset the parser's state.  Subclasses must rewinds the input stream */
70     reset: function() {
71         var i, len;
72 
73         // wack everything related to error recovery
74         if (!this.state) {
75             return; // no shared state work to do
76         }
77         this.state._fsp = -1;
78         this.state.errorRecovery = false;
79         this.state.lastErrorIndex = -1;
80         this.state.failed = false;
81         this.state.syntaxErrors = 0;
82         // wack everything related to backtracking and memoization
83         this.state.backtracking = 0;
84         // wipe cache
85         if (this.state.ruleMemo) {
86             for (i=0, len=this.state.ruleMemo.length; i<len; i++) {
87                 this.state.ruleMemo[i] = null;
88             }
89         }
90     },
91 
92     /** Match current input symbol against ttype.  Attempt
93      *  single token insertion or deletion error recovery.  If
94      *  that fails, throw {@link org.antlr.runtime.MismatchedTokenException}.
95      *
96      *  <p>To turn off single token insertion or deletion error
97      *  recovery, override {@link #mismatchRecover} and have it call
98      *  plain {@link #mismatch}, which does not recover.  Then any error
99      *  in a rule will cause an exception and immediate exit from
100      *  rule.  Rule would recover by resynchronizing to the set of
101      *  symbols that can follow rule ref.</p>
102      *
103      *  @param {org.antlr.runtime.IntStream} input input stream to match against.
104      *  @param {Number} ttype  input type to match.
105      *  @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the
106      *      matched token.
107      *  @returns {Object} the matched symbol
108      */
109     match: function(input, ttype, follow) {
110         var matchedSymbol = this.getCurrentInputSymbol(input);
111         if ( input.LA(1)===ttype ) {
112             input.consume();
113             this.state.errorRecovery = false;
114             this.state.failed = false;
115             return matchedSymbol;
116         }
117         if ( this.state.backtracking>0 ) {
118             this.state.failed = true;
119             return matchedSymbol;
120         }
121         matchedSymbol = this.recoverFromMismatchedToken(input, ttype, follow);
122         return matchedSymbol;
123     },
124 
125     /**
126      * Match any token.
127      * @param {org.antlr.runtime.IntStream} input input stream to match against.
128      */
129     matchAny: function(input) {
130         this.state.errorRecovery = false;
131         this.state.failed = false;
132         input.consume();
133     },
134 
135     /**
136      * Is the following token (LA(2)) the unwanted type (ttype)?
137      * @param {org.antlr.runtime.IntStream} input input stream to match against.
138      * @param {Number} ttype the undesired token type.
139      * @returns {Boolean} true if and only if the following token is the
140      *      unwanted type.
141      */
142     mismatchIsUnwantedToken: function(input, ttype) {
143         return input.LA(2)===ttype;
144     },
145 
146     /**
147      * Does the stream appear to be missing a single token?
148      * @param {org.antlr.runtime.IntStream} input input stream to match against.
149      * @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the
150      *      matched token.
151      * @returns {Boolean} true if and only if it appears that the stream is
152      *      missing a single token.
153      */
154     mismatchIsMissingToken: function(input, follow) {
155         if ( !follow ) {
156             // we have no information about the follow; we can only consume
157             // a single token and hope for the best
158             return false;
159         }
160         // compute what can follow this grammar element reference
161         if ( follow.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) ) {
162             var viableTokensFollowingThisRule = this.computeContextSensitiveRuleFOLLOW();
163             follow = follow.or(this.viableTokensFollowingThisRule);
164             if ( this.state._fsp>=0 ) { // remove EOR if we're not the start symbol
165                 follow.remove(org.antlr.runtime.Token.EOR_TOKEN_TYPE);
166             }
167         }
168         // if current token is consistent with what could come after set
169         // then we know we're missing a token; error recovery is free to
170         // "insert" the missing token
171 
172         // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
173         // in follow set to indicate that the fall of the start symbol is
174         // in the set (EOF can follow).
175         if ( follow.member(input.LA(1)) ||
176              follow.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) )
177         {
178             return true;
179         }
180         return false;
181     },
182 
183     /** Factor out what to do upon token mismatch so tree parsers can behave
184      *  differently.  Override and call {@link #mismatchRecover}
185      *  to get single token insertion and deletion.
186      *
187      *  @param {org.antlr.runtime.IntStream} input input stream to match against.
188      *  @param {Number} ttype  input type to match.
189      *  @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the
190      *      matched token.
191      */
192     mismatch: function(input, ttype, follow) {
193         if ( this.mismatchIsUnwantedToken(input, ttype) ) {
194             throw new org.antlr.runtime.UnwantedTokenException(ttype, input);
195         } else if ( this.mismatchIsMissingToken(input, follow) ) {
196             throw new org.antlr.runtime.MissingTokenException(ttype, input, null);
197         }
198         throw new org.antlr.runtime.MismatchedTokenException(ttype, input);
199     },
200 
201     /** Report a recognition problem.
202      *
203      *  <p>This method sets errorRecovery to indicate the parser is recovering
204      *  not parsing.  Once in recovery mode, no errors are generated.
205      *  To get out of recovery mode, the parser must successfully match
206      *  a token (after a resync).  So it will go:</p>
207      *  <ol>
208      *      <li>error occurs</li>
209      *      <li>enter recovery mode, report error</li>
210      *      <li>consume until token found in resynch set</li>
211      *      <li>try to resume parsing</li>
212      *      <li>next match() will reset errorRecovery mode</li>
213      *  </ol>
214      *
215      *  <p>If you override, make sure to update this.state.syntaxErrors if you
216      *  care about that.</p>
217      *  @param {org.antlr.runtime.RecognitionException} e the error to be reported.
218      */
219     reportError: function(e) {
220         // if we've already reported an error and have not matched a token
221         // yet successfully, don't report any errors.
222         if ( this.state.errorRecovery ) {
223             return;
224         }
225         this.state.syntaxErrors++;
226         this.state.errorRecovery = true;
227 
228         this.displayRecognitionError(this.getTokenNames(), e);
229     },
230 
231     /**
232      * Assemble recognition error message.
233      * @param {Array} tokenNames array of token names (strings).
234      * @param {org.antlr.runtime.RecognitionException} e the error to be reported.
235      */
236     displayRecognitionError: function(tokenNames, e) {
237         var hdr = this.getErrorHeader(e),
238             msg = this.getErrorMessage(e, tokenNames);
239         this.emitErrorMessage(hdr+" "+msg);
240     },
241 
242     /**
243      * Create error header message.  Format is <q>line
244      * lineNumber:positionInLine</q>.
245      * @param {org.antlr.runtime.RecognitionException} e the error to be reported.
246      * @returns {String} The error header.
247      */
248     getErrorHeader: function(e) {
249         /* handle null input */
250         if (!org.antlr.lang.isNumber(e.line)) {
251             e.line = 0;
252         }
253         return "line "+e.line+":"+e.charPositionInLine;
254     },
255 
256     /**
257      * Override this method to change where error messages go.
258      * Defaults to "alert"-ing the error in browsers and "print"-ing the error
259      * in other environments (e.g. Rhino, SpiderMonkey).
260      * @param {String} msg the error message to be displayed.
261      */
262     emitErrorMessage: function(msg) {
263         if (typeof(window) != 'undefined' && window.alert) {
264             alert(msg);
265         } else {
266             print(msg);
267         }
268     },
269 
270     /** What error message should be generated for the various
271      *  exception types?
272      *
273      *  <p>Not very object-oriented code, but I like having all error message
274      *  generation within one method rather than spread among all of the
275      *  exception classes. This also makes it much easier for the exception
276      *  handling because the exception classes do not have to have pointers back
277      *  to this object to access utility routines and so on. Also, changing
278      *  the message for an exception type would be difficult because you
279      *  would have to be subclassing exceptions, but then somehow get ANTLR
280      *  to make those kinds of exception objects instead of the default.</p>
281      *
282      *  <p>For grammar debugging, you will want to override this to add
283      *  more information such as the stack frame and no viable alts.</p>
284      *
285      *  <p>Override this to change the message generated for one or more
286      *  exception types.</p>
287      *
288      * @param {Array} tokenNames array of token names (strings).
289      * @param {org.antlr.runtime.RecognitionException} e the error to be reported.
290      * @returns {String} the error message to be emitted.
291      */
292     getErrorMessage: function(e, tokenNames) {
293         var msg = (e && e.getMessage) ? e.getMessage() : null,
294             mte,
295             tokenName;
296         if ( e instanceof org.antlr.runtime.UnwantedTokenException ) {
297             var ute = e;
298             tokenName="<unknown>";
299             if ( ute.expecting== org.antlr.runtime.Token.EOF ) {
300                 tokenName = "EOF";
301             } else {
302                 tokenName = tokenNames[ute.expecting];
303             }
304             msg = "extraneous input "+this.getTokenErrorDisplay(ute.getUnexpectedToken())+
305                 " expecting "+tokenName;
306         }
307         else if ( e instanceof org.antlr.runtime.MissingTokenException ) {
308             mte = e;
309             tokenName="<unknown>";
310             if ( mte.expecting== org.antlr.runtime.Token.EOF ) {
311                 tokenName = "EOF";
312             } else {
313                 tokenName = tokenNames[mte.expecting];
314             }
315             msg = "missing "+tokenName+" at "+this.getTokenErrorDisplay(e.token);
316         }
317         else if ( e instanceof org.antlr.runtime.MismatchedTokenException ) {
318             mte = e;
319             tokenName="<unknown>";
320             if ( mte.expecting== org.antlr.runtime.Token.EOF ) {
321                 tokenName = "EOF";
322             }
323             else {
324                 tokenName = tokenNames[mte.expecting];
325             }
326             msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+
327                 " expecting "+tokenName;
328         }
329         else if ( e instanceof org.antlr.runtime.NoViableAltException ) {
330             msg = "no viable alternative at input "+this.getTokenErrorDisplay(e.token);
331         }
332         else if ( e instanceof org.antlr.runtime.EarlyExitException ) {
333             msg = "required (...)+ loop did not match anything at input "+
334                 this.getTokenErrorDisplay(e.token);
335         }
336         else if ( e instanceof org.antlr.runtime.MismatchedSetException ) {
337             msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+
338                 " expecting set "+e.expecting;
339         }
340         else if ( e instanceof org.antlr.runtime.MismatchedNotSetException ) {
341             msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+
342                 " expecting set "+e.expecting;
343         }
344         else if ( e instanceof org.antlr.runtime.FailedPredicateException ) {
345             msg = "rule "+e.ruleName+" failed predicate: {"+
346                 e.predicateText+"}?";
347         }
348         return msg;
349     },
350 
351     /** <p>Get number of recognition errors (lexer, parser, tree parser).  Each
352      *  recognizer tracks its own number.  So parser and lexer each have
353      *  separate count.  Does not count the spurious errors found between
354      *  an error and next valid token match.</p>
355      *
356      *  <p>See also {@link #reportError}()
357      *  @returns {Number} number of syntax errors encountered
358      */
359     getNumberOfSyntaxErrors: function() {
360         return this.state.syntaxErrors;
361     },
362 
363     /** How should a token be displayed in an error message? The default
364      *  is to display just the text, but during development you might
365      *  want to have a lot of information spit out.  Override in that case
366      *  to use t.toString() (which, for CommonToken, dumps everything about
367      *  the token).
368      * @param {org.antlr.runtime.Token} t token that will be displayed in an error message
369      * @return {String} the string representation of the token
370      */
371     getTokenErrorDisplay: function(t) {
372         var s = t.getText();
373         if ( !org.antlr.lang.isValue(s) ) {
374             if ( t.getType()==org.antlr.runtime.Token.EOF ) {
375                 s = "<EOF>";
376             }
377             else {
378                 s = "<"+t.getType()+">";
379             }
380         }
381         s = s.replace(/\n/g,"\\n");
382         s = s.replace(/\r/g,"\\r");
383         s = s.replace(/\t/g,"\\t");
384         return "'"+s+"'";
385     },
386 
387     /** Recover from an error found on the input stream.  This is
388      *  for NoViableAlt and mismatched symbol exceptions.  If you enable
389      *  single token insertion and deletion, this will usually not
390      *  handle mismatched symbol exceptions but there could be a mismatched
391      *  token that the match() routine could not recover from.
392      *  @param {org.antlr.runtime.IntStream} input the intput stream
393      *  @param {org.antlr.runtime.RecogntionException} the error found on the input stream
394      */
395     recover: function(input, re) {
396         if ( this.state.lastErrorIndex==input.index() ) {
397             // uh oh, another error at same token index; must be a case
398             // where LT(1) is in the recovery token set so nothing is
399             // consumed; consume a single token so at least to prevent
400             // an infinite loop; this is a failsafe.
401             input.consume();
402         }
403         this.state.lastErrorIndex = input.index();
404         var followSet = this.computeErrorRecoverySet();
405         this.beginResync();
406         this.consumeUntil(input, followSet);
407         this.endResync();
408     },
409 
410     /** A hook to listen in on the token consumption during error recovery.
411      */
412     beginResync: function() {
413     },
414 
415     /** A hook to listen in on the token consumption during error recovery.
416      */
417     endResync: function() {
418     },
419 
420     /** Compute the error recovery set for the current rule.
421      *  <p>During rule invocation, the parser pushes the set of tokens that can
422      *  follow that rule reference on the stack; this amounts to
423      *  computing FIRST of what follows the rule reference in the
424      *  enclosing rule. This local follow set only includes tokens
425      *  from within the rule; i.e., the FIRST computation done by
426      *  ANTLR stops at the end of a rule.</p>
427      *
428      *  <p>EXAMPLE</p>
429      *
430      *  <p>When you find a "no viable alt exception", the input is not
431      *  consistent with any of the alternatives for rule r.  The best
432      *  thing to do is to consume tokens until you see something that
433      *  can legally follow a call to r *or* any rule that called r.
434      *  You don't want the exact set of viable next tokens because the
435      *  input might just be missing a token--you might consume the
436      *  rest of the input looking for one of the missing tokens.</p>
437      *
438      *  <p>Consider grammar:</p>
439      *  <code><pre>
440      *  a : '[' b ']'
441      *    | '(' b ')'
442      *    ;
443      *  b : c '^' INT ;
444      *  c : ID
445      *    | INT
446      *    ;
447      *  </pre></code>
448      *
449      *  <p>At each rule invocation, the set of tokens that could follow
450      *  that rule is pushed on a stack.  Here are the various "local"
451      *  follow sets:</p>
452      *
453      *  <code><pre>
454      *  FOLLOW(b1_in_a) = FIRST(']') = ']'
455      *  FOLLOW(b2_in_a) = FIRST(')') = ')'
456      *  FOLLOW(c_in_b) = FIRST('^') = '^'
457      *  </pre></code>
458      *
459      *  <p>Upon erroneous input "[]", the call chain is</p>
460      *
461      *  <code>a -> b -> c</code>
462      *
463      *  <p>and, hence, the follow context stack is:</p>
464      *
465      *  <code><pre>
466      *  depth  local follow set     after call to rule
467      *    0         <EOF>                    a (from main())
468      *    1          ']'                     b
469      *    3          '^'                     c
470      *  </pre></code>
471      *
472      *  <p>Notice that ')' is not included, because b would have to have
473      *  been called from a different context in rule a for ')' to be
474      *  included.</p>
475      *
476      *  <p>For error recovery, we cannot consider FOLLOW(c)
477      *  (context-sensitive or otherwise).  We need the combined set of
478      *  all context-sensitive FOLLOW sets--the set of all tokens that
479      *  could follow any reference in the call chain.  We need to
480      *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
481      *  we resync'd to that token, we'd consume until EOF.  We need to
482      *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
483      *  In this case, for input "[]", LA(1) is in this set so we would
484      *  not consume anything and after printing an error rule c would
485      *  return normally.  It would not find the required '^' though.
486      *  At this point, it gets a mismatched token error and throws an
487      *  exception (since LA(1) is not in the viable following token
488      *  set).  The rule exception handler tries to recover, but finds
489      *  the same recovery set and doesn't consume anything.  Rule b
490      *  exits normally returning to rule a.  Now it finds the ']' (and
491      *  with the successful match exits errorRecovery mode).</p>
492      *
493      *  <p>So, you cna see that the parser walks up call chain looking
494      *  for the token that was a member of the recovery set.</p>
495      *
496      *  <p>Errors are not generated in errorRecovery mode.</p>
497      *
498      *  <p>ANTLR's error recovery mechanism is based upon original ideas:</p>
499      *
500      *  <p>"Algorithms + Data Structures = Programs" by Niklaus Wirth</p>
501      *
502      *  <p>and</p>
503      *
504      *  <p>"A note on error recovery in recursive descent parsers":
505      *  http://portal.acm.org/citation.cfm?id=947902.947905</p>
506      *
507      *  <p>Later, Josef Grosch had some good ideas:</p>
508      *
509      *  <p>"Efficient and Comfortable Error Recovery in Recursive Descent
510      *  Parsers":
511      *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip</p>
512      *
513      *  <p>Like Grosch I implemented local FOLLOW sets that are combined
514      *  at run-time upon error to avoid overhead during parsing.</p>
515      *  @returns {org.antlr.runtime.BitSet}
516      */
517     computeErrorRecoverySet: function() {
518         return this.combineFollows(false);
519     },
520 
521 
522     /** Compute the context-sensitive FOLLOW set for current rule.
523      *  <p>This is set of token types that can follow a specific rule
524      *  reference given a specific call chain.  You get the set of
525      *  viable tokens that can possibly come next (lookahead depth 1)
526      *  given the current call chain.  Contrast this with the
527      *  definition of plain FOLLOW for rule r:</p>
528      *
529      *   <code>FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}</code>
530      *
531      *  <p>where x in T* and alpha, beta in V*; T is set of terminals and
532      *  V is the set of terminals and nonterminals.  In other words,
533      *  FOLLOW(r) is the set of all tokens that can possibly follow
534      *  references to r in *any* sentential form (context).  At
535      *  runtime, however, we know precisely which context applies as
536      *  we have the call chain.  We may compute the exact (rather
537      *  than covering superset) set of following tokens.</p>
538      *
539      *  <p>For example, consider grammar:</p>
540      *
541      *  <code><pre>
542      *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
543      *       | "return" expr '.'
544      *       ;
545      *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
546      *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
547      *       | '(' expr ')'
548      *       ;
549      *  </pre></code>
550      *
551      *  <p>The FOLLOW sets are all inclusive whereas context-sensitive
552      *  FOLLOW sets are precisely what could follow a rule reference.
553      *  For input input "i=(3);", here is the derivation:</p>
554      *
555      *  <code><pre>
556      *  stat => ID '=' expr ';'
557      *       => ID '=' atom ('+' atom)* ';'
558      *       => ID '=' '(' expr ')' ('+' atom)* ';'
559      *       => ID '=' '(' atom ')' ('+' atom)* ';'
560      *       => ID '=' '(' INT ')' ('+' atom)* ';'
561      *       => ID '=' '(' INT ')' ';'
562      *  </pre></code>
563      *
564      *  <p>At the "3" token, you'd have a call chain of</p>
565      *
566      *  <code>  stat -> expr -> atom -> expr -> atom</code>
567      *
568      *  <p>What can follow that specific nested ref to atom?  Exactly ')'
569      *  as you can see by looking at the derivation of this specific
570      *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.</p>
571      *
572      *  <p>You want the exact viable token set when recovering from a
573      *  token mismatch.  Upon token mismatch, if LA(1) is member of
574      *  the viable next token set, then you know there is most likely
575      *  a missing token in the input stream.  "Insert" one by just not
576      *  throwing an exception.</p>
577      *  @returns {org.antlr.runtime.BitSet}
578      */
579     computeContextSensitiveRuleFOLLOW: function() {
580         return this.combineFollows(true);
581     },
582 
583     /**
584      * Helper method for {@link #computeErrorRecoverySet} and
585      * {@link computeContextSensitiveRuleFOLLO}.
586      * @param {Boolean} exact
587      * @returns {org.antlr.runtime.BitSet}
588      */
589     combineFollows: function(exact) {
590         var top = this.state._fsp,
591             i,
592             localFollowSet,
593             followSet = new org.antlr.runtime.BitSet();
594         for (i=top; i>=0; i--) {
595             localFollowSet = this.state.following[i];
596             followSet.orInPlace(localFollowSet);
597             if ( exact ) {
598                 // can we see end of rule?
599                 if ( localFollowSet.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) )
600                 {
601                     // Only leave EOR in set if at top (start rule); this lets
602                     // us know if have to include follow(start rule); i.e., EOF
603                     if ( i>0 ) {
604                         followSet.remove(org.antlr.runtime.Token.EOR_TOKEN_TYPE);
605                     }
606                 }
607                 else { // can't see end of rule, quit
608                     break;
609                 }
610             }
611         }
612         return followSet;
613     },
614 
615     /** Attempt to recover from a single missing or extra token.
616      *
617      *  <p>EXTRA TOKEN</p>
618      *
619      *  <p>LA(1) is not what we are looking for.  If LA(2) has the right token,
620      *  however, then assume LA(1) is some extra spurious token.  Delete it
621      *  and LA(2) as if we were doing a normal match(), which advances the
622      *  input.</p>
623      *
624      *  <p>MISSING TOKEN</p>
625      *
626      *  <p>If current token is consistent with what could come after
627      *  ttype then it is ok to "insert" the missing token, else throw
628      *  exception For example, Input "i=(3;" is clearly missing the
629      *  ')'.  When the parser returns from the nested call to expr, it
630      *  will have call chain:</p>
631      *
632      *  <pre><code>  stat -> expr -> atom</code></pre>
633      *
634      *  <p>and it will be trying to match the ')' at this point in the
635      *  derivation:</p>
636      *
637      *  <pre><code>     => ID '=' '(' INT ')' ('+' atom)* ';'</code></pre>
638      *                          ^
639      *  <p>match() will see that ';' doesn't match ')' and report a
640      *  mismatched token error.  To recover, it sees that LA(1)==';'
641      *  is in the set of tokens that can follow the ')' token
642      *  reference in rule atom.  It can assume that you forgot the ')'.</p>
643      *
644      *  @param {org.antlr.runtime.IntStream} input
645      *  @param {Number} ttype
646      *  @param {org.antlr.runtime.BitSet} follow
647      *  @returns {Object}
648      */
649     recoverFromMismatchedToken: function(input,
650                                          ttype,
651                                          follow)
652     {
653         var e = null;
654         // if next token is what we are looking for then "delete" this token
655         if ( this.mismatchIsUnwantedToken(input, ttype) ) {
656             e = new org.antlr.runtime.UnwantedTokenException(ttype, input);
657             this.beginResync();
658             input.consume(); // simply delete extra token
659             this.endResync();
660             this.reportError(e);  // report after consuming so AW sees the token in the exception
661             // we want to return the token we're actually matching
662             var matchedSymbol = this.getCurrentInputSymbol(input);
663             input.consume(); // move past ttype token as if all were ok
664             return matchedSymbol;
665         }
666         // can't recover with single token deletion, try insertion
667         if ( this.mismatchIsMissingToken(input, follow) ) {
668             var inserted = this.getMissingSymbol(input, e, ttype, follow);
669             e = new org.antlr.runtime.MissingTokenException(ttype, input, inserted);
670             this.reportError(e);  // report after inserting so AW sees the token in the exception
671             return inserted;
672         }
673         // even that didn't work; must throw the exception
674         e = new org.antlr.runtime.MismatchedTokenException(ttype, input);
675         throw e;
676     },
677 
678     /**
679      * Recover from a mismatched set exception.
680      * @param {org.antlr.runtime.IntStream} input
681      * @param {org.antlr.runtime.RecognitionException} e
682      * @param {org.antlr.runtime.BitSet} follow
683      * @returns {Object}
684      */
685     recoverFromMismatchedSet: function(input,
686                                        e,
687                                        follow)
688     {
689         if ( this.mismatchIsMissingToken(input, follow) ) {
690             // System.out.println("missing token");
691             this.reportError(e);
692             // we don't know how to conjure up a token for sets yet
693             return this.getMissingSymbol(input, e, org.antlr.runtime.Token.INVALID_TOKEN_TYPE, follow);
694         }
695         throw e;
696     },
697 
698     /** Match needs to return the current input symbol, which gets put
699      *  into the label for the associated token ref; e.g., x=ID.  Token
700      *  and tree parsers need to return different objects. Rather than test
701      *  for input stream type or change the IntStream interface, I use
702      *  a simple method to ask the recognizer to tell me what the current
703      *  input symbol is.
704      *
705      *  <p>This is ignored for lexers.</p>
706      *  @param {org.antlr.runtime.IntStream} input
707      *  @returns {Object}
708      */
709     getCurrentInputSymbol: function(input) { return null; },
710 
711     /** Conjure up a missing token during error recovery.
712      *
713      *  <p>The recognizer attempts to recover from single missing
714      *  symbols. But, actions might refer to that missing symbol.
715      *  For example, x=ID {f($x);}. The action clearly assumes
716      *  that there has been an identifier matched previously and that
717      *  $x points at that token. If that token is missing, but
718      *  the next token in the stream is what we want we assume that
719      *  this token is missing and we keep going. Because we
720      *  have to return some token to replace the missing token,
721      *  we have to conjure one up. This method gives the user control
722      *  over the tokens returned for missing tokens. Mostly,
723      *  you will want to create something special for identifier
724      *  tokens. For literals such as '{' and ',', the default
725      *  action in the parser or tree parser works. It simply creates
726      *  a CommonToken of the appropriate type. The text will be the token.
727      *  If you change what tokens must be created by the lexer,
728      *  override this method to create the appropriate tokens.</p>
729      *
730      *  @param {org.antlr.runtime.IntStream} input
731      *  @param {org.antlr.runtime.RecognitionException} e
732      *  @param {Number} expectedTokenType
733      *  @param {org.antlr.runtime.BitSet} follow
734      *  @returns {Object}
735      */
736     getMissingSymbol: function(input,
737                                e,
738                                expectedTokenType,
739                                follow)
740     {
741         return null;
742     },
743 
744 
745     /**
746      * Consume tokens until one matches the given token or token set
747      * @param {org.antlr.runtime.IntStream} input
748      * @param {Number|org.antlr.runtime.BitSet} set
749      */
750     consumeUntil: function(input, set) {
751         var ttype = input.LA(1);
752         while (ttype != org.antlr.runtime.Token.EOF && !set.member(ttype) ) {
753             input.consume();
754             ttype = input.LA(1);
755         }
756     },
757 
758     /**
759      * Push a rule's follow set using our own hardcoded stack.
760      * @param {org.antlr.runtime.BitSet} fset
761      */
762     pushFollow: function(fset) {
763         if ( (this.state._fsp +1)>=this.state.following.length ) {
764             var f = [];
765             var i;
766             for (i=this.state.following.length-1; i>=0; i--) {
767                 f[i] = this.state.following[i];
768             }
769             this.state.following = f;
770         }
771         this.state._fsp++;
772         this.state.following[this.state._fsp] = fset;
773     },
774 
775     /**
776      * Sadly JavaScript doesn't provide a robust mechanism for runtime stack reflection.
777      * This makes implementing this function impossible without maintaining an auxillary
778      * stack data structure, which would be crazy expensive, especially in Lexers.  As such,
779      * this method remains unimplemented.
780      * @deprecated
781      */
782     getRuleInvocationStack: function(e, recognizerClassName)
783     {
784         throw new Error("Not implemented.");
785     },
786 
787     /**
788      * Get this recognizer's backtracking level.
789      * @returns {Number} backtracking level
790      */
791     getBacktrackingLevel: function() {
792         return this.state.backtracking;
793     },
794 
795     /** Used to print out token names like ID during debugging and
796      *  error reporting.  The generated parsers implement a method
797      *  that overrides this to point to their String[] tokenNames.
798      *  @returns {Array} String array of token names.
799      */
800     getTokenNames: function() {
801         return null;
802     },
803 
804     /** For debugging and other purposes, might want the grammar name.
805      *  Have ANTLR generate an implementation for this method.
806      *  @returns {String} the grammar name.
807      */
808     getGrammarFileName: function() {
809         return null;
810     },
811 
812     /** A convenience method for use most often with template rewrites.
813      *  Convert an array of Tokens to an array of Strings.
814      *  @param {Array} array of org.antlr.runtime.Token objects.
815      *  @returns {Array} array of string representations of the argument.
816      */
817     toStrings: function(tokens) {
818         if ( !tokens ) {
819             return null;
820         }
821         var strings = [];
822         var i;
823         for (i=0; i<tokens.length; i++) {
824             strings.push(tokens[i].getText());
825         }
826         return strings;
827     },
828 
829     /** Given a rule number and a start token index number, return
830      *  MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
831      *  start index.  If this rule has parsed input starting from the
832      *  start index before, then return where the rule stopped parsing.
833      *  It returns the index of the last token matched by the rule.
834      *
835      *  <p>For now we use a hashtable and just the slow Object-based one.
836      *  Later, we can make a special one for ints and also one that
837      *  tosses out data after we commit past input position i.</p>
838      *  @param {Number} ruleIndex
839      *  @param {Number} ruleStartIndex
840      *  @returns {Number}
841      */
842     getRuleMemoization: function(ruleIndex, ruleStartIndex) {
843         if ( !this.state.ruleMemo[ruleIndex] ) {
844             this.state.ruleMemo[ruleIndex] = {};
845         }
846         var stopIndexI =
847             this.state.ruleMemo[ruleIndex][ruleStartIndex];
848         if ( !org.antlr.lang.isNumber(stopIndexI) ) {
849             return org.antlr.runtime.BaseRecognizer.MEMO_RULE_UNKNOWN;
850         }
851         return stopIndexI;
852     },
853 
854     /** Has this rule already parsed input at the current index in the
855      *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
856      *  If we attempted but failed to parse properly before, return
857      *  MEMO_RULE_FAILED.
858      *
859      *  <p>This method has a side-effect: if we have seen this input for
860      *  this rule and successfully parsed before, then seek ahead to
861      *  1 past the stop token matched for this rule last time.</p>
862      *  @param {org.antlr.runtime.IntStream} input
863      *  @param {Number} ruleIndex
864      *  @returns {Boolean}
865      */
866     alreadyParsedRule: function(input, ruleIndex) {
867         var stopIndex = this.getRuleMemoization(ruleIndex, input.index());
868         if ( stopIndex==org.antlr.runtime.BaseRecognizer.MEMO_RULE_UNKNOWN ) {
869             return false;
870         }
871         if ( stopIndex==org.antlr.runtime.BaseRecognizer.MEMO_RULE_FAILED ) {
872             //System.out.println("rule "+ruleIndex+" will never succeed");
873             this.state.failed=true;
874         }
875         else {
876             input.seek(stopIndex+1); // jump to one past stop token
877         }
878         return true;
879     },
880 
881     /** Record whether or not this rule parsed the input at this position
882      *  successfully.  Use a standard java hashtable for now.
883      *  @param {org.antlr.runtime.IntStream} input
884      *  @param {Number} ruleIndex
885      *  @param {Number} ruleStartIndex
886      */
887     memoize: function(input,
888                       ruleIndex,
889                       ruleStartIndex)
890     {
891         var stopTokenIndex = this.state.failed ?
892             org.antlr.runtime.BaseRecognizer.MEMO_RULE_FAILED : input.index()-1;
893         if ( !org.antlr.lang.isValue(this.state.ruleMemo) ) {
894             throw new Error("!!!!!!!!! memo array is null for "+ this.getGrammarFileName());
895         }
896         if ( ruleIndex >= this.state.ruleMemo.length ) {
897             throw new Error("!!!!!!!!! memo size is "+this.state.ruleMemo.length+", but rule index is "+ruleIndex);
898         }
899         if ( org.antlr.lang.isValue(this.state.ruleMemo[ruleIndex]) ) {
900             this.state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex;
901         }
902     },
903 
904     /** return how many rule/input-index pairs there are in total.
905      *  TODO: this includes synpreds.
906      *  @returns {Number}
907      */
908     getRuleMemoizationCacheSize: function() {
909         var n = 0, i;
910         for (i = 0; this.state.ruleMemo && i < this.state.ruleMemo.length; i++) {
911             var ruleMap = this.state.ruleMemo[i];
912             if ( ruleMap ) {
913                 // @todo need to get size of rulemap?
914                 n += ruleMap.length; // how many input indexes are recorded?
915             }
916         }
917         return n;
918     },
919 
920     /**
921      * When a grammar is compiled with the tracing flag enabled, this method is invoked
922      * at the start of each rule.
923      * @param {String} ruleName the current ruleName
924      * @param {Number} ruleIndex
925      * @param {Object} inputSymbol
926      */
927     traceIn: function(ruleName, ruleIndex, inputSymbol)  {
928         this.emitErrorMessage("enter "+ruleName+" "+inputSymbol);
929         if ( this.state.failed ) {
930             this.emitErrorMessage(" failed="+this.failed);
931         }
932         if ( this.state.backtracking>0 ) {
933             this.emitErrorMessage(" backtracking="+this.state.backtracking);
934         }
935         // System.out.println();
936     },
937 
938     /**
939      * When a grammar is compiled with the tracing flag enabled, this method is invoked
940      * at the end of each rule.
941      * @param {String} ruleName the current ruleName
942      * @param {Number} ruleIndex
943      * @param {Object} inputSymbol
944      */
945     traceOut: function(ruleName, ruleIndex, inputSymbol) {
946         this.emitErrorMessage("exit "+ruleName+" "+inputSymbol);
947         if ( this.state.failed ) {
948             this.emitErrorMessage(" failed="+this.state.failed);
949         }
950         if ( this.state.backtracking>0 ) {
951             this.emitErrorMessage(" backtracking="+this.state.backtracking);
952         }
953     }
954 };
955