1//
2//  BaseRecognizer.m
3//  ANTLR
4//
5//  Created by Alan Condit on 6/16/10.
6// [The "BSD licence"]
7// Copyright (c) 2010 Alan Condit
8// All rights reserved.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions
12// are met:
13// 1. Redistributions of source code must retain the above copyright
14//    notice, this list of conditions and the following disclaimer.
15// 2. Redistributions in binary form must reproduce the above copyright
16//    notice, this list of conditions and the following disclaimer in the
17//    documentation and/or other materials provided with the distribution.
18// 3. The name of the author may not be used to endorse or promote products
19//    derived from this software without specific prior written permission.
20//
21// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32#import "ACNumber.h"
33#import "BaseRecognizer.h"
34#import "HashRule.h"
35#import "RuleMemo.h"
36#import "CommonToken.h"
37#import "Map.h"
38#import "NoViableAltException.h"
39
40extern NSInteger debug;
41
42@implementation BaseRecognizer
43
44static AMutableArray *_tokenNames;
45static NSString *_grammarFileName;
46static NSString *NEXT_TOKEN_RULE_NAME;
47
48@synthesize state;
49@synthesize grammarFileName;
50//@synthesize failed;
51@synthesize sourceName;
52//@synthesize numberOfSyntaxErrors;
53@synthesize tokenNames;
54
55+ (void) initialize
56{
57    NEXT_TOKEN_RULE_NAME = [NSString stringWithString:@"nextToken"];
58    [NEXT_TOKEN_RULE_NAME retain];
59}
60
61+ (BaseRecognizer *) newBaseRecognizer
62{
63    return [[BaseRecognizer alloc] init];
64}
65
66+ (BaseRecognizer *) newBaseRecognizerWithRuleLen:(NSInteger)aLen
67{
68    return [[BaseRecognizer alloc] initWithLen:aLen];
69}
70
71+ (BaseRecognizer *) newBaseRecognizer:(RecognizerSharedState *)aState
72{
73	return [[BaseRecognizer alloc] initWithState:aState];
74}
75
76+ (AMutableArray *)getTokenNames
77{
78    return _tokenNames;
79}
80
81+ (void)setTokenNames:(AMutableArray *)theTokNams
82{
83    if ( _tokenNames != theTokNams ) {
84        if ( _tokenNames ) [_tokenNames release];
85        [theTokNams retain];
86    }
87    _tokenNames = theTokNams;
88}
89
90+ (void)setGrammarFileName:(NSString *)aFileName
91{
92    if ( _grammarFileName != aFileName ) {
93        if ( _grammarFileName ) [_grammarFileName release];
94        [aFileName retain];
95    }
96    [_grammarFileName retain];
97}
98
99- (id) init
100{
101	if ((self = [super init]) != nil) {
102        if (state == nil) {
103            state = [[RecognizerSharedState newRecognizerSharedState] retain];
104        }
105        tokenNames = _tokenNames;
106        if ( tokenNames ) [tokenNames retain];
107        grammarFileName = _grammarFileName;
108        if ( grammarFileName ) [grammarFileName retain];
109        state._fsp = -1;
110        state.errorRecovery = NO;		// are we recovering?
111        state.lastErrorIndex = -1;
112        state.failed = NO;				// indicate that some match failed
113        state.syntaxErrors = 0;
114        state.backtracking = 0;			// the level of backtracking
115        state.tokenStartCharIndex = -1;
116	}
117	return self;
118}
119
120- (id) initWithLen:(NSInteger)aLen
121{
122	if ((self = [super init]) != nil) {
123        if (state == nil) {
124            state = [[RecognizerSharedState newRecognizerSharedStateWithRuleLen:aLen] retain];
125        }
126        tokenNames = _tokenNames;
127        if ( tokenNames ) [tokenNames retain];
128        grammarFileName = _grammarFileName;
129        if ( grammarFileName ) [grammarFileName retain];
130        state._fsp = -1;
131        state.errorRecovery = NO;		// are we recovering?
132        state.lastErrorIndex = -1;
133        state.failed = NO;				// indicate that some match failed
134        state.syntaxErrors = 0;
135        state.backtracking = 0;			// the level of backtracking
136        state.tokenStartCharIndex = -1;
137	}
138	return self;
139}
140
141- (id) initWithState:(RecognizerSharedState *)aState
142{
143	if ((self = [super init]) != nil) {
144		state = aState;
145        if (state == nil) {
146            state = [RecognizerSharedState newRecognizerSharedState];
147        }
148        [state retain];
149        tokenNames = _tokenNames;
150        if ( tokenNames ) [tokenNames retain];
151        grammarFileName = _grammarFileName;
152        if ( grammarFileName ) [grammarFileName retain];
153        state._fsp = -1;
154        state.errorRecovery = NO;		// are we recovering?
155        state.lastErrorIndex = -1;
156        state.failed = NO;				// indicate that some match failed
157        state.syntaxErrors = 0;
158        state.backtracking = 0;			// the level of backtracking
159        state.tokenStartCharIndex = -1;
160	}
161	return self;
162}
163
164- (void)dealloc
165{
166#ifdef DEBUG_DEALLOC
167    NSLog( @"called dealloc in BaseRecognizer" );
168#endif
169	if ( grammarFileName ) [grammarFileName release];
170	if ( tokenNames ) [tokenNames release];
171	if ( state ) [state release];
172	[super dealloc];
173}
174
175// reset the recognizer to the initial state. does not touch the token source!
176// this can be extended by the grammar writer to reset custom ivars
177- (void) reset
178{
179    if ( state == nil )
180        return;
181    if ( state.following != nil ) {
182        if ( [state.following count] )
183            [state.following removeAllObjects];
184    }
185    state._fsp = -1;
186    state.errorRecovery = NO;		// are we recovering?
187    state.lastErrorIndex = -1;
188    state.failed = NO;				// indicate that some match failed
189    state.syntaxErrors = 0;
190    state.backtracking = 0;			// the level of backtracking
191    state.tokenStartCharIndex = -1;
192    if ( state.ruleMemo != nil ) {
193        if ( [state.ruleMemo count] )
194            [state.ruleMemo removeAllObjects];
195    }
196}
197
198- (BOOL) getFailed
199{
200	return [state getFailed];
201}
202
203- (void) setFailed:(BOOL)flag
204{
205	[state setFailed:flag];
206}
207
208- (RecognizerSharedState *) getState
209{
210	return state;
211}
212
213- (void) setState:(RecognizerSharedState *) theState
214{
215	if (state != theState) {
216		if ( state ) [state release];
217		state = theState;
218		[state retain];
219	}
220}
221
222- (id)input
223{
224    return nil; // Must be overriden in inheriting class
225}
226
227- (void)skip // override in inheriting class
228{
229    return;
230}
231
232-(id) match:(id<IntStream>)anInput TokenType:(NSInteger)ttype Follow:(ANTLRBitSet *)follow
233{
234	id matchedSymbol = [self getCurrentInputSymbol:anInput];
235	if ([anInput LA:1] == ttype) {
236		[anInput consume];
237		state.errorRecovery = NO;
238		state.failed = NO;
239		return matchedSymbol;
240	}
241	if (state.backtracking > 0) {
242		state.failed = YES;
243		return matchedSymbol;
244	}
245	matchedSymbol = [self recoverFromMismatchedToken:anInput TokenType:ttype Follow:follow];
246	return matchedSymbol;
247}
248
249-(void) matchAny:(id<IntStream>)anInput
250{
251    state.errorRecovery = NO;
252    state.failed = NO;
253    [anInput consume];
254}
255
256-(BOOL) mismatchIsUnwantedToken:(id<IntStream>)anInput TokenType:(NSInteger)ttype
257{
258    return [anInput LA:2] == ttype;
259}
260
261-(BOOL) mismatchIsMissingToken:(id<IntStream>)anInput Follow:(ANTLRBitSet *) follow
262{
263    if ( follow == nil ) {
264        // we have no information about the follow; we can only consume
265        // a single token and hope for the best
266        return NO;
267    }
268    // compute what can follow this grammar element reference
269    if ( [follow member:TokenTypeEOR] ) {
270        ANTLRBitSet *viableTokensFollowingThisRule = [self computeContextSensitiveRuleFOLLOW];
271        follow = [follow or:viableTokensFollowingThisRule];
272        if ( state._fsp >= 0 ) { // remove EOR if we're not the start symbol
273            [follow remove:(TokenTypeEOR)];
274        }
275    }
276    // if current token is consistent with what could come after set
277    // then we know we're missing a token; error recovery is free to
278    // "insert" the missing token
279
280    //System.out.println("viable tokens="+follow.toString(getTokenNames()));
281    //System.out.println("LT(1)="+((TokenStream)input).LT(1));
282
283    // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
284    // in follow set to indicate that the fall of the start symbol is
285    // in the set (EOF can follow).
286    if ( [follow member:[anInput LA:1]] || [follow member:TokenTypeEOR] ) {
287        //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
288        return YES;
289    }
290    return NO;
291}
292
293/** Report a recognition problem.
294 *
295 *  This method sets errorRecovery to indicate the parser is recovering
296 *  not parsing.  Once in recovery mode, no errors are generated.
297 *  To get out of recovery mode, the parser must successfully match
298 *  a token (after a resync).  So it will go:
299 *
300 * 		1. error occurs
301 * 		2. enter recovery mode, report error
302 * 		3. consume until token found in resynch set
303 * 		4. try to resume parsing
304 * 		5. next match() will reset errorRecovery mode
305 *
306 *  If you override, make sure to update syntaxErrors if you care about that.
307 */
308-(void) reportError:(RecognitionException *) e
309{
310    // if we've already reported an error and have not matched a token
311    // yet successfully, don't report any errors.
312    if ( state.errorRecovery ) {
313        //System.err.print("[SPURIOUS] ");
314        return;
315    }
316    state.syntaxErrors++; // don't count spurious
317    state.errorRecovery = YES;
318
319    [self displayRecognitionError:[self getTokenNames] Exception:e];
320}
321
322-(void) displayRecognitionError:(AMutableArray *)theTokNams Exception:(RecognitionException *)e
323{
324    NSString *hdr = [self getErrorHeader:e];
325    NSString *msg = [self getErrorMessage:e TokenNames:theTokNams];
326    [self emitErrorMessage:[NSString stringWithFormat:@" %@ %@", hdr, msg]];
327}
328
329/** What error message should be generated for the various
330 *  exception types?
331 *
332 *  Not very object-oriented code, but I like having all error message
333 *  generation within one method rather than spread among all of the
334 *  exception classes. This also makes it much easier for the exception
335 *  handling because the exception classes do not have to have pointers back
336 *  to this object to access utility routines and so on. Also, changing
337 *  the message for an exception type would be difficult because you
338 *  would have to subclassing exception, but then somehow get ANTLR
339 *  to make those kinds of exception objects instead of the default.
340 *  This looks weird, but trust me--it makes the most sense in terms
341 *  of flexibility.
342 *
343 *  For grammar debugging, you will want to override this to add
344 *  more information such as the stack frame with
345 *  getRuleInvocationStack(e, this.getClass().getName()) and,
346 *  for no viable alts, the decision description and state etc...
347 *
348 *  Override this to change the message generated for one or more
349 *  exception types.
350 */
351- (NSString *)getErrorMessage:(RecognitionException *)e TokenNames:(AMutableArray *)theTokNams
352{
353    // NSString *msg = [e getMessage];
354    NSString *msg;
355    if ( [e isKindOfClass:[UnwantedTokenException class]] ) {
356        UnwantedTokenException *ute = (UnwantedTokenException *)e;
357        NSString *tokenName=@"<unknown>";
358        if ( ute.expecting == TokenTypeEOF ) {
359            tokenName = @"EOF";
360        }
361        else {
362            tokenName = (NSString *)[theTokNams objectAtIndex:ute.expecting];
363        }
364        msg = [NSString stringWithFormat:@"extraneous input %@ expecting %@", [self getTokenErrorDisplay:[ute getUnexpectedToken]],
365               tokenName];
366    }
367    else if ( [e isKindOfClass:[MissingTokenException class] ] ) {
368        MissingTokenException *mte = (MissingTokenException *)e;
369        NSString *tokenName=@"<unknown>";
370        if ( mte.expecting== TokenTypeEOF ) {
371            tokenName = @"EOF";
372        }
373        else {
374            tokenName = [theTokNams objectAtIndex:mte.expecting];
375        }
376        msg = [NSString stringWithFormat:@"missing %@ at %@", tokenName, [self getTokenErrorDisplay:(e.token)] ];
377    }
378    else if ( [e isKindOfClass:[MismatchedTokenException class]] ) {
379        MismatchedTokenException *mte = (MismatchedTokenException *)e;
380        NSString *tokenName=@"<unknown>";
381        if ( mte.expecting== TokenTypeEOF ) {
382            tokenName = @"EOF";
383        }
384        else {
385            tokenName = [theTokNams objectAtIndex:mte.expecting];
386        }
387        msg = [NSString stringWithFormat:@"mismatched input %@ expecting %@",[self getTokenErrorDisplay:(e.token)], tokenName];
388    }
389    else if ( [e isKindOfClass:[MismatchedTreeNodeException class]] ) {
390        MismatchedTreeNodeException *mtne = (MismatchedTreeNodeException *)e;
391        NSString *tokenName=@"<unknown>";
392        if ( mtne.expecting==TokenTypeEOF ) {
393            tokenName = @"EOF";
394        }
395        else {
396            tokenName = [theTokNams objectAtIndex:mtne.expecting];
397        }
398        msg = [NSString stringWithFormat:@"mismatched tree node: %@ expecting %@", mtne.node, tokenName];
399    }
400    else if ( [e isKindOfClass:[NoViableAltException class]] ) {
401        //NoViableAltException *nvae = (NoViableAltException *)e;
402        // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
403        // and "(decision="+nvae.decisionNumber+") and
404        // "state "+nvae.stateNumber
405        //        msg = [NSString stringWithFormat:@"no viable alternative at input %@", [self getTokenErrorDisplay:e.token]];
406        msg = [NSString stringWithFormat:@"no viable alternative decision:%d state:%d at input %@", ((NoViableAltException *)e).stateNumber, ((NoViableAltException *)e).decisionNumber, [self getTokenErrorDisplay:e.token]];
407    }
408    else if ( [e isKindOfClass:[EarlyExitException class]] ) {
409        //EarlyExitException *eee = (EarlyExitException *)e;
410        // for development, can add "(decision="+eee.decisionNumber+")"
411        msg =[NSString stringWithFormat: @"required (...)+ loop did not match anything at input ", [self getTokenErrorDisplay:e.token]];
412    }
413    else if ( [e isKindOfClass:[MismatchedSetException class]] ) {
414        MismatchedSetException *mse = (MismatchedSetException *)e;
415        msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
416               [self getTokenErrorDisplay:(e.token)],
417               mse.expecting];
418    }
419#pragma warning NotSet not yet implemented.
420    else if ( [e isKindOfClass:[MismatchedNotSetException class] ] ) {
421        MismatchedNotSetException *mse = (MismatchedNotSetException *)e;
422        msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
423               [self getTokenErrorDisplay:(e.token)],
424               mse.expecting];
425    }
426    else if ( [e isKindOfClass:[FailedPredicateException class]] ) {
427        FailedPredicateException *fpe = (FailedPredicateException *)e;
428        msg = [NSString stringWithFormat:@"rule %@ failed predicate: { %@ }?", fpe.ruleName, fpe.predicate];
429    }
430    else {
431        msg = [NSString stringWithFormat:@"Exception= %@\n", e.name];
432    }
433    return msg;
434}
435
436/** Get number of recognition errors (lexer, parser, tree parser).  Each
437 *  recognizer tracks its own number.  So parser and lexer each have
438 *  separate count.  Does not count the spurious errors found between
439 *  an error and next valid token match
440 *
441 *  See also reportError()
442 */
443- (NSInteger) getNumberOfSyntaxErrors
444{
445    return state.syntaxErrors;
446}
447
448/** What is the error header, normally line/character position information? */
449- (NSString *)getErrorHeader:(RecognitionException *)e
450{
451    return [NSString stringWithFormat:@"line %d:%d", e.line, e.charPositionInLine];
452}
453
454/** How should a token be displayed in an error message? The default
455 *  is to display just the text, but during development you might
456 *  want to have a lot of information spit out.  Override in that case
457 *  to use t.toString() (which, for CommonToken, dumps everything about
458 *  the token). This is better than forcing you to override a method in
459 *  your token objects because you don't have to go modify your lexer
460 *  so that it creates a new Java type.
461 */
462- (NSString *)getTokenErrorDisplay:(id<Token>)t
463{
464    NSString *s = t.text;
465    if ( s == nil ) {
466        if ( t.type == TokenTypeEOF ) {
467            s = @"<EOF>";
468        }
469        else {
470            s = [NSString stringWithFormat:@"<%@>", t.type];
471        }
472    }
473    s = [s stringByReplacingOccurrencesOfString:@"\n" withString:@"\\\\n"];
474    s = [s stringByReplacingOccurrencesOfString:@"\r" withString:@"\\\\r"];
475    s = [s stringByReplacingOccurrencesOfString:@"\t" withString:@"\\\\t"];
476    return [NSString stringWithFormat:@"\'%@\'", s];
477}
478
479/** Override this method to change where error messages go */
480- (void) emitErrorMessage:(NSString *) msg
481{
482//    System.err.println(msg);
483    NSLog(@"%@", msg);
484}
485
486/** Recover from an error found on the input stream.  This is
487 *  for NoViableAlt and mismatched symbol exceptions.  If you enable
488 *  single token insertion and deletion, this will usually not
489 *  handle mismatched symbol exceptions but there could be a mismatched
490 *  token that the match() routine could not recover from.
491 */
492- (void)recover:(id<IntStream>)anInput Exception:(RecognitionException *)re
493{
494    if ( state.lastErrorIndex == anInput.index ) {
495        // uh oh, another error at same token index; must be a case
496        // where LT(1) is in the recovery token set so nothing is
497        // consumed; consume a single token so at least to prevent
498        // an infinite loop; this is a failsafe.
499        [anInput consume];
500    }
501    state.lastErrorIndex = anInput.index;
502    ANTLRBitSet *followSet = [self computeErrorRecoverySet];
503    [self beginResync];
504    [self consumeUntilFollow:anInput Follow:followSet];
505    [self endResync];
506}
507
508- (void) beginResync
509{
510
511}
512
513- (void) endResync
514{
515
516}
517
518/*  Compute the error recovery set for the current rule.  During
519 *  rule invocation, the parser pushes the set of tokens that can
520 *  follow that rule reference on the stack; this amounts to
521 *  computing FIRST of what follows the rule reference in the
522 *  enclosing rule. This local follow set only includes tokens
523 *  from within the rule; i.e., the FIRST computation done by
524 *  ANTLR stops at the end of a rule.
525 *
526 *  EXAMPLE
527 *
528 *  When you find a "no viable alt exception", the input is not
529 *  consistent with any of the alternatives for rule r.  The best
530 *  thing to do is to consume tokens until you see something that
531 *  can legally follow a call to r *or* any rule that called r.
532 *  You don't want the exact set of viable next tokens because the
533 *  input might just be missing a token--you might consume the
534 *  rest of the input looking for one of the missing tokens.
535 *
536 *  Consider grammar:
537 *
538 *  a : '[' b ']'
539 *    | '(' b ')'
540 *    ;
541 *  b : c '^' INT ;
542 *  c : ID
543 *    | INT
544 *    ;
545 *
546 *  At each rule invocation, the set of tokens that could follow
547 *  that rule is pushed on a stack.  Here are the various "local"
548 *  follow sets:
549 *
550 *  FOLLOW(b1_in_a) = FIRST(']') = ']'
551 *  FOLLOW(b2_in_a) = FIRST(')') = ')'
552 *  FOLLOW(c_in_b) = FIRST('^') = '^'
553 *
554 *  Upon erroneous input "[]", the call chain is
555 *
556 *  a -> b -> c
557 *
558 *  and, hence, the follow context stack is:
559 *
560 *  depth  local follow set     after call to rule
561 *    0         <EOF>                    a (from main())
562 *    1          ']'                     b
563 *    3          '^'                     c
564 *
565 *  Notice that ')' is not included, because b would have to have
566 *  been called from a different context in rule a for ')' to be
567 *  included.
568 *
569 *  For error recovery, we cannot consider FOLLOW(c)
570 *  (context-sensitive or otherwise).  We need the combined set of
571 *  all context-sensitive FOLLOW sets--the set of all tokens that
572 *  could follow any reference in the call chain.  We need to
573 *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
574 *  we resync'd to that token, we'd consume until EOF.  We need to
575 *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
576 *  In this case, for input "[]", LA(1) is in this set so we would
577 *  not consume anything and after printing an error rule c would
578 *  return normally.  It would not find the required '^' though.
579 *  At this point, it gets a mismatched token error and throws an
580 *  exception (since LA(1) is not in the viable following token
581 *  set).  The rule exception handler tries to recover, but finds
582 *  the same recovery set and doesn't consume anything.  Rule b
583 *  exits normally returning to rule a.  Now it finds the ']' (and
584 *  with the successful match exits errorRecovery mode).
585 *
586 *  So, you cna see that the parser walks up call chain looking
587 *  for the token that was a member of the recovery set.
588 *
589 *  Errors are not generated in errorRecovery mode.
590 *
591 *  ANTLR's error recovery mechanism is based upon original ideas:
592 *
593 *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
594 *
595 *  and
596 *
597 *  "A note on error recovery in recursive descent parsers":
598 *  http://portal.acm.org/citation.cfm?id=947902.947905
599 *
600 *  Later, Josef Grosch had some good ideas:
601 *
602 *  "Efficient and Comfortable Error Recovery in Recursive Descent
603 *  Parsers":
604 *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
605 *
606 *  Like Grosch I implemented local FOLLOW sets that are combined
607 *  at run-time upon error to avoid overhead during parsing.
608 */
609- (ANTLRBitSet *) computeErrorRecoverySet
610{
611    return [self combineFollows:NO];
612}
613
614/** Compute the context-sensitive FOLLOW set for current rule.
615 *  This is set of token types that can follow a specific rule
616 *  reference given a specific call chain.  You get the set of
617 *  viable tokens that can possibly come next (lookahead depth 1)
618 *  given the current call chain.  Contrast this with the
619 *  definition of plain FOLLOW for rule r:
620 *
621 *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
622 *
623 *  where x in T* and alpha, beta in V*; T is set of terminals and
624 *  V is the set of terminals and nonterminals.  In other words,
625 *  FOLLOW(r) is the set of all tokens that can possibly follow
626 *  references to r in *any* sentential form (context).  At
627 *  runtime, however, we know precisely which context applies as
628 *  we have the call chain.  We may compute the exact (rather
629 *  than covering superset) set of following tokens.
630 *
631 *  For example, consider grammar:
632 *
633 *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
634 *       | "return" expr '.'
635 *       ;
636 *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
637 *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
638 *       | '(' expr ')'
639 *       ;
640 *
641 *  The FOLLOW sets are all inclusive whereas context-sensitive
642 *  FOLLOW sets are precisely what could follow a rule reference.
643 *  For input input "i=(3);", here is the derivation:
644 *
645 *  stat => ID '=' expr ';'
646 *       => ID '=' atom ('+' atom)* ';'
647 *       => ID '=' '(' expr ')' ('+' atom)* ';'
648 *       => ID '=' '(' atom ')' ('+' atom)* ';'
649 *       => ID '=' '(' INT ')' ('+' atom)* ';'
650 *       => ID '=' '(' INT ')' ';'
651 *
652 *  At the "3" token, you'd have a call chain of
653 *
654 *    stat -> expr -> atom -> expr -> atom
655 *
656 *  What can follow that specific nested ref to atom?  Exactly ')'
657 *  as you can see by looking at the derivation of this specific
658 *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
659 *
660 *  You want the exact viable token set when recovering from a
661 *  token mismatch.  Upon token mismatch, if LA(1) is member of
662 *  the viable next token set, then you know there is most likely
663 *  a missing token in the input stream.  "Insert" one by just not
664 *  throwing an exception.
665 */
666- (ANTLRBitSet *)computeContextSensitiveRuleFOLLOW
667{
668    return [self combineFollows:YES];
669}
670
671// what is exact? it seems to only add sets from above on stack
672// if EOR is in set i.  When it sees a set w/o EOR, it stops adding.
673// Why would we ever want them all?  Maybe no viable alt instead of
674// mismatched token?
675- (ANTLRBitSet *)combineFollows:(BOOL) exact
676{
677    NSInteger top = state._fsp;
678    ANTLRBitSet *followSet = [[ANTLRBitSet newBitSet] retain];
679    for (int i = top; i >= 0; i--) {
680        ANTLRBitSet *localFollowSet = (ANTLRBitSet *)[state.following objectAtIndex:i];
681        /*
682         System.out.println("local follow depth "+i+"="+
683         localFollowSet.toString(getTokenNames())+")");
684         */
685        [followSet orInPlace:localFollowSet];
686        if ( exact ) {
687            // can we see end of rule?
688            if ( [localFollowSet member:TokenTypeEOR] ) {
689                // Only leave EOR in set if at top (start rule); this lets
690                // us know if have to include follow(start rule); i.e., EOF
691                if ( i > 0 ) {
692                    [followSet remove:TokenTypeEOR];
693                }
694            }
695            else { // can't see end of rule, quit
696                break;
697            }
698        }
699    }
700    return followSet;
701}
702
703/** Attempt to recover from a single missing or extra token.
704 *
705 *  EXTRA TOKEN
706 *
707 *  LA(1) is not what we are looking for.  If LA(2) has the right token,
708 *  however, then assume LA(1) is some extra spurious token.  Delete it
709 *  and LA(2) as if we were doing a normal match(), which advances the
710 *  input.
711 *
712 *  MISSING TOKEN
713 *
714 *  If current token is consistent with what could come after
715 *  ttype then it is ok to "insert" the missing token, else throw
716 *  exception For example, Input "i=(3;" is clearly missing the
717 *  ')'.  When the parser returns from the nested call to expr, it
718 *  will have call chain:
719 *
720 *    stat -> expr -> atom
721 *
722 *  and it will be trying to match the ')' at this point in the
723 *  derivation:
724 *
725 *       => ID '=' '(' INT ')' ('+' atom)* ';'
726 *                          ^
727 *  match() will see that ';' doesn't match ')' and report a
728 *  mismatched token error.  To recover, it sees that LA(1)==';'
729 *  is in the set of tokens that can follow the ')' token
730 *  reference in rule atom.  It can assume that you forgot the ')'.
731 */
732- (id<Token>)recoverFromMismatchedToken:(id<IntStream>)anInput
733                       TokenType:(NSInteger)ttype
734                          Follow:(ANTLRBitSet *)follow
735{
736    RecognitionException *e = nil;
737    // if next token is what we are looking for then "delete" this token
738    if ( [self mismatchIsUnwantedToken:anInput TokenType:ttype] ) {
739        e = [UnwantedTokenException newException:ttype Stream:anInput];
740        /*
741         System.err.println("recoverFromMismatchedToken deleting "+
742         ((TokenStream)input).LT(1)+
743         " since "+((TokenStream)input).LT(2)+" is what we want");
744         */
745        [self beginResync];
746        [anInput consume]; // simply delete extra token
747        [self endResync];
748        [self reportError:e];  // report after consuming so AW sees the token in the exception
749                         // we want to return the token we're actually matching
750        id matchedSymbol = [self getCurrentInputSymbol:anInput];
751        [anInput consume]; // move past ttype token as if all were ok
752        return matchedSymbol;
753    }
754    // can't recover with single token deletion, try insertion
755    if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
756        id<Token> inserted = [self getMissingSymbol:anInput Exception:e TokenType:ttype Follow:follow];
757        e = [MissingTokenException newException:ttype Stream:anInput With:inserted];
758        [self reportError:e];  // report after inserting so AW sees the token in the exception
759        return inserted;
760    }
761    // even that didn't work; must throw the exception
762    e = [MismatchedTokenException newException:ttype Stream:anInput];
763    @throw e;
764}
765
766/** Not currently used */
767-(id) recoverFromMismatchedSet:(id<IntStream>)anInput
768                     Exception:(RecognitionException *)e
769                        Follow:(ANTLRBitSet *) follow
770{
771    if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
772        // System.out.println("missing token");
773        [self reportError:e];
774        // we don't know how to conjure up a token for sets yet
775        return [self getMissingSymbol:anInput Exception:e TokenType:TokenTypeInvalid Follow:follow];
776    }
777    // TODO do single token deletion like above for Token mismatch
778    @throw e;
779}
780
781/** Match needs to return the current input symbol, which gets put
782 *  into the label for the associated token ref; e.g., x=ID.  Token
783 *  and tree parsers need to return different objects. Rather than test
784 *  for input stream type or change the IntStream interface, I use
785 *  a simple method to ask the recognizer to tell me what the current
786 *  input symbol is.
787 *
788 *  This is ignored for lexers.
789 */
790- (id) getCurrentInputSymbol:(id<IntStream>)anInput
791{
792    return nil;
793}
794
795/** Conjure up a missing token during error recovery.
796 *
797 *  The recognizer attempts to recover from single missing
798 *  symbols. But, actions might refer to that missing symbol.
799 *  For example, x=ID {f($x);}. The action clearly assumes
800 *  that there has been an identifier matched previously and that
801 *  $x points at that token. If that token is missing, but
802 *  the next token in the stream is what we want we assume that
803 *  this token is missing and we keep going. Because we
804 *  have to return some token to replace the missing token,
805 *  we have to conjure one up. This method gives the user control
806 *  over the tokens returned for missing tokens. Mostly,
807 *  you will want to create something special for identifier
808 *  tokens. For literals such as '{' and ',', the default
809 *  action in the parser or tree parser works. It simply creates
810 *  a CommonToken of the appropriate type. The text will be the token.
811 *  If you change what tokens must be created by the lexer,
812 *  override this method to create the appropriate tokens.
813 */
814- (id)getMissingSymbol:(id<IntStream>)anInput
815             Exception:(RecognitionException *)e
816             TokenType:(NSInteger)expectedTokenType
817                Follow:(ANTLRBitSet *)follow
818{
819    return nil;
820}
821
822
823-(void) consumeUntilTType:(id<IntStream>)anInput TokenType:(NSInteger)tokenType
824{
825    //System.out.println("consumeUntil "+tokenType);
826    int ttype = [anInput LA:1];
827    while (ttype != TokenTypeEOF && ttype != tokenType) {
828        [anInput consume];
829        ttype = [anInput LA:1];
830    }
831}
832
833/** Consume tokens until one matches the given token set */
834-(void) consumeUntilFollow:(id<IntStream>)anInput Follow:(ANTLRBitSet *)set
835{
836    //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
837    int ttype = [anInput LA:1];
838    while (ttype != TokenTypeEOF && ![set member:ttype] ) {
839        //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
840        [anInput consume];
841        ttype = [anInput LA:1];
842    }
843}
844
845/** Push a rule's follow set using our own hardcoded stack */
846- (void)pushFollow:(ANTLRBitSet *)fset
847{
848    if ( (state._fsp +1) >= [state.following count] ) {
849        //        AMutableArray *f = [AMutableArray arrayWithCapacity:[[state.following] count]*2];
850        //        System.arraycopy(state.following, 0, f, 0, state.following.length);
851        //        state.following = f;
852        [state.following addObject:fset];
853        [fset retain];
854        state._fsp++;
855    }
856    else {
857        [state.following replaceObjectAtIndex:++state._fsp withObject:fset];
858    }
859}
860
861- (ANTLRBitSet *)popFollow
862{
863    ANTLRBitSet *fset;
864
865    if ( state._fsp >= 0 && [state.following count] > 0 ) {
866        fset = [state.following objectAtIndex:state._fsp--];
867        [state.following removeLastObject];
868        return fset;
869    }
870    else {
871        NSLog( @"Attempted to pop a follow when none exists on the stack\n" );
872    }
873    return nil;
874}
875
876/** Return List<String> of the rules in your parser instance
877 *  leading up to a call to this method.  You could override if
878 *  you want more details such as the file/line info of where
879 *  in the parser java code a rule is invoked.
880 *
881 *  This is very useful for error messages and for context-sensitive
882 *  error recovery.
883 */
884- (AMutableArray *)getRuleInvocationStack
885{
886    NSString *parserClassName = [[self className] retain];
887    return [self getRuleInvocationStack:[RecognitionException newException] Recognizer:parserClassName];
888}
889
890/** A more general version of getRuleInvocationStack where you can
891 *  pass in, for example, a RecognitionException to get it's rule
892 *  stack trace.  This routine is shared with all recognizers, hence,
893 *  static.
894 *
895 *  TODO: move to a utility class or something; weird having lexer call this
896 */
897- (AMutableArray *)getRuleInvocationStack:(RecognitionException *)e
898                                Recognizer:(NSString *)recognizerClassName
899{
900    // char *name;
901    AMutableArray *rules = [[AMutableArray arrayWithCapacity:20] retain];
902    NSArray *stack = [e callStackSymbols];
903    int i = 0;
904    for (i = [stack count]-1; i >= 0; i--) {
905        NSString *t = [stack objectAtIndex:i];
906        // NSLog(@"stack %d = %@\n", i, t);
907        if ( [t commonPrefixWithString:@"org.antlr.runtime." options:NSLiteralSearch] ) {
908            // id aClass = objc_getClass( [t UTF8String] );
909            continue; // skip support code such as this method
910        }
911        if ( [t isEqualTo:NEXT_TOKEN_RULE_NAME] ) {
912            // name = sel_getName(method_getName(method));
913            // NSString *aMethod = [NSString stringWithFormat:@"%s", name];
914            continue;
915        }
916        if ( ![t isEqualTo:recognizerClassName] ) {
917            // name = class_getName( [t UTF8String] );
918            continue; // must not be part of this parser
919        }
920        [rules addObject:t];
921    }
922#ifdef DONTUSEYET
923    StackTraceElement[] stack = e.getStackTrace();
924    int i = 0;
925    for (i=stack.length-1; i>=0; i--) {
926        StackTraceElement t = stack[i];
927        if ( [t getClassName().startsWith("org.antlr.runtime.") ) {
928            continue; // skip support code such as this method
929        }
930              if ( [[t getMethodName] equals:NEXT_TOKEN_RULE_NAME] ) {
931            continue;
932        }
933              if ( ![[t getClassName] equals:recognizerClassName] ) {
934            continue; // must not be part of this parser
935        }
936              [rules addObject:[t getMethodName]];
937    }
938#endif
939    [stack release];
940    return rules;
941}
942
943- (NSInteger) getBacktrackingLevel
944{
945    return [state getBacktracking];
946}
947
948- (void) setBacktrackingLevel:(NSInteger)level
949{
950    [state setBacktracking:level];
951}
952
953        /** Used to print out token names like ID during debugging and
954 *  error reporting.  The generated parsers implement a method
955 *  that overrides this to point to their String[] tokenNames.
956 */
957- (NSArray *)getTokenNames
958{
959    return tokenNames;
960}
961
962/** For debugging and other purposes, might want the grammar name.
963 *  Have ANTLR generate an implementation for this method.
964 */
965- (NSString *)getGrammarFileName
966{
967    return grammarFileName;
968}
969
970- (NSString *)getSourceName
971{
972    return nil;
973}
974
975/** A convenience method for use most often with template rewrites.
976 *  Convert a List<Token> to List<String>
977 */
978- (AMutableArray *)toStrings:(AMutableArray *)tokens
979{
980    if ( tokens == nil )
981        return nil;
982    AMutableArray *strings = [AMutableArray arrayWithCapacity:[tokens count]];
983    id object;
984    NSInteger i = 0;
985    for (object in tokens) {
986        [strings addObject:[object text]];
987        i++;
988    }
989    return strings;
990}
991
992/** Given a rule number and a start token index number, return
993 *  ANTLR_MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
994 *  start index.  If this rule has parsed input starting from the
995 *  start index before, then return where the rule stopped parsing.
996 *  It returns the index of the last token matched by the rule.
997 *
998 *  For now we use a hashtable and just the slow Object-based one.
999 *  Later, we can make a special one for ints and also one that
1000 *  tosses out data after we commit past input position i.
1001 */
1002- (NSInteger)getRuleMemoization:(NSInteger)ruleIndex StartIndex:(NSInteger)ruleStartIndex
1003{
1004    ACNumber *stopIndexI;
1005    HashRule *aHashRule;
1006    if ( (aHashRule = [state.ruleMemo objectAtIndex:ruleIndex]) == nil ) {
1007        aHashRule = [HashRule newHashRuleWithLen:17];
1008        [state.ruleMemo insertObject:aHashRule atIndex:ruleIndex];
1009    }
1010    stopIndexI = [aHashRule getRuleMemoStopIndex:ruleStartIndex];
1011    if ( stopIndexI == nil ) {
1012        return ANTLR_MEMO_RULE_UNKNOWN;
1013    }
1014    return [stopIndexI integerValue];
1015}
1016
1017/** Has this rule already parsed input at the current index in the
1018 *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
1019 *  If we attempted but failed to parse properly before, return
1020 *  MEMO_RULE_FAILED.
1021 *
1022 *  This method has a side-effect: if we have seen this input for
1023 *  this rule and successfully parsed before, then seek ahead to
1024 *  1 past the stop token matched for this rule last time.
1025 */
1026- (BOOL)alreadyParsedRule:(id<IntStream>)anInput RuleIndex:(NSInteger)ruleIndex
1027{
1028    NSInteger aStopIndex = [self getRuleMemoization:ruleIndex StartIndex:anInput.index];
1029    if ( aStopIndex == ANTLR_MEMO_RULE_UNKNOWN ) {
1030        // NSLog(@"rule %d not yet encountered\n", ruleIndex);
1031        return NO;
1032    }
1033    if ( aStopIndex == ANTLR_MEMO_RULE_FAILED ) {
1034        if (debug) NSLog(@"rule %d will never succeed\n", ruleIndex);
1035        state.failed = YES;
1036    }
1037    else {
1038        if (debug) NSLog(@"seen rule %d before; skipping ahead to %d failed = %@\n", ruleIndex, aStopIndex+1, state.failed?@"YES":@"NO");
1039        [anInput seek:(aStopIndex+1)]; // jump to one past stop token
1040    }
1041    return YES;
1042}
1043
1044/** Record whether or not this rule parsed the input at this position
1045 *  successfully.  Use a standard java hashtable for now.
1046 */
1047- (void)memoize:(id<IntStream>)anInput
1048      RuleIndex:(NSInteger)ruleIndex
1049     StartIndex:(NSInteger)ruleStartIndex
1050{
1051    RuleStack *aRuleStack;
1052    NSInteger stopTokenIndex;
1053
1054    aRuleStack = state.ruleMemo;
1055    stopTokenIndex = (state.failed ? ANTLR_MEMO_RULE_FAILED : (anInput.index-1));
1056    if ( aRuleStack == nil ) {
1057        if (debug) NSLog(@"!!!!!!!!! memo array is nil for %@", [self getGrammarFileName]);
1058        return;
1059    }
1060    if ( ruleIndex >= [aRuleStack length] ) {
1061        if (debug) NSLog(@"!!!!!!!!! memo size is %d, but rule index is %d", [state.ruleMemo length], ruleIndex);
1062        return;
1063    }
1064    if ( [aRuleStack objectAtIndex:ruleIndex] != nil ) {
1065        [aRuleStack putHashRuleAtRuleIndex:ruleIndex StartIndex:ruleStartIndex StopIndex:stopTokenIndex];
1066    }
1067    return;
1068}
1069
1070/** return how many rule/input-index pairs there are in total.
1071 *  TODO: this includes synpreds. :(
1072 */
1073- (NSInteger)getRuleMemoizationCacheSize
1074{
1075    RuleStack *aRuleStack;
1076    HashRule *aHashRule;
1077
1078    int aCnt = 0;
1079    aRuleStack = state.ruleMemo;
1080    for (NSUInteger i = 0; aRuleStack != nil && i < [aRuleStack length]; i++) {
1081        aHashRule = [aRuleStack objectAtIndex:i];
1082        if ( aHashRule != nil ) {
1083            aCnt += [aHashRule count]; // how many input indexes are recorded?
1084        }
1085    }
1086    return aCnt;
1087}
1088
1089#pragma warning Have to fix traceIn and traceOut.
1090- (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
1091{
1092    NSLog(@"enter %@ %@", ruleName, inputSymbol);
1093    if ( state.backtracking > 0 ) {
1094        NSLog(@" backtracking=%s", ((state.backtracking==YES)?"YES":"NO"));
1095    }
1096    NSLog(@"\n");
1097}
1098
1099- (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
1100{
1101    NSLog(@"exit %@ -- %@", ruleName, inputSymbol);
1102    if ( state.backtracking > 0 ) {
1103        NSLog(@" backtracking=%s %s", state.backtracking?"YES":"NO", state.failed ? "failed":"succeeded");
1104    }
1105    NSLog(@"\n");
1106}
1107
1108
1109// call a syntactic predicate methods using its selector. this way we can support arbitrary synpreds.
1110- (BOOL) evaluateSyntacticPredicate:(SEL)synpredFragment // stream:(id<IntStream>)input
1111{
1112    id<IntStream> input;
1113
1114    state.backtracking++;
1115    // input = state.token.input;
1116    input = self.input;
1117    int start = [input mark];
1118    @try {
1119        [self performSelector:synpredFragment];
1120    }
1121    @catch (RecognitionException *re) {
1122        NSLog(@"impossible synpred: %@", re.name);
1123    }
1124    BOOL success = (state.failed == NO);
1125    [input rewind:start];
1126    state.backtracking--;
1127    state.failed = NO;
1128    return success;
1129}
1130
1131@end
1132
1133