1// [The "BSD licence"]
2// Copyright (c) 2006-2007 Kay Roepke 2010 Alan Condit
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions
7// are met:
8// 1. Redistributions of source code must retain the above copyright
9//    notice, this list of conditions and the following disclaimer.
10// 2. Redistributions in binary form must reproduce the above copyright
11//    notice, this list of conditions and the following disclaimer in the
12//    documentation and/or other materials provided with the distribution.
13// 3. The name of the author may not be used to endorse or promote products
14//    derived from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#import <ANTLR/antlr.h>
28#import "ANTLRLexer.h"
29
30@implementation ANTLRLexer
31
32@synthesize input;
33@synthesize ruleNestingLevel;
34#pragma mark Initializer
35
36- (id) initWithCharStream:(id<ANTLRCharStream>)anInput
37{
38	self = [super initWithState:[[ANTLRRecognizerSharedState alloc] init]];
39	if ( self != nil ) {
40        input = [anInput retain];
41        if (state.token != nil)
42            [((ANTLRCommonToken *)state.token) setInput:anInput];
43		ruleNestingLevel = 0;
44	}
45	return self;
46}
47
48- (id) initWithCharStream:(id<ANTLRCharStream>)anInput State:(ANTLRRecognizerSharedState *)aState
49{
50	self = [super initWithState:aState];
51	if ( self != nil ) {
52        input = [anInput retain];
53        if (state.token != nil)
54            [((ANTLRCommonToken *)state.token) setInput:anInput];
55		ruleNestingLevel = 0;
56	}
57	return self;
58}
59
60- (void) dealloc
61{
62    if ( input ) [input release];
63    [super dealloc];
64}
65
66- (id) copyWithZone:(NSZone *)aZone
67{
68    ANTLRLexer *copy;
69
70    copy = [[[self class] allocWithZone:aZone] init];
71    //    copy = [super copyWithZone:aZone]; // allocation occurs here
72    if ( input != nil )
73        copy.input = input;
74    copy.ruleNestingLevel = ruleNestingLevel;
75    return copy;
76}
77
78- (void) reset
79{
80    [super reset]; // reset all recognizer state variables
81                   // wack Lexer state variables
82    if ( input != nil ) {
83        [input seek:0]; // rewind the input
84    }
85    if ( state == nil ) {
86        return; // no shared state work to do
87    }
88    state.token = nil;
89    state.type = ANTLRCommonToken.INVALID_TOKEN_TYPE;
90    state.channel = ANTLRCommonToken.DEFAULT_CHANNEL;
91    state.tokenStartCharIndex = -1;
92    state.tokenStartCharPositionInLine = -1;
93    state.tokenStartLine = -1;
94    state.text = nil;
95}
96
97// token stuff
98#pragma mark Tokens
99
100- (id<ANTLRToken>)getToken
101{
102    return [state getToken];
103}
104
105- (void) setToken: (id<ANTLRToken>) aToken
106{
107    if (state.token != aToken) {
108        [aToken retain];
109        state.token = aToken;
110    }
111}
112
113
114// this method may be overridden in the generated lexer if we generate a filtering lexer.
115- (id<ANTLRToken>) nextToken
116{
117	while (YES) {
118        [self setToken:nil];
119        state.channel = ANTLRCommonToken.DEFAULT_CHANNEL;
120        state.tokenStartCharIndex = input.index;
121        state.tokenStartCharPositionInLine = input.charPositionInLine;
122        state.tokenStartLine = input.line;
123        state.text = nil;
124
125        // [self setText:[self text]];
126		if ([input LA:1] == ANTLRCharStreamEOF) {
127            ANTLRCommonToken *eof = [ANTLRCommonToken newToken:input
128                                                          Type:ANTLRTokenTypeEOF
129                                                       Channel:ANTLRCommonToken.DEFAULT_CHANNEL
130                                                         Start:input.index
131                                                          Stop:input.index];
132            [eof setLine:input.line];
133            [eof setCharPositionInLine:input.charPositionInLine];
134			return eof;
135		}
136		@try {
137			[self mTokens];
138            // SEL aMethod = @selector(mTokens);
139            // [[self class] instancesRespondToSelector:aMethod];
140            if ( state.token == nil)
141                [self emit];
142            else if ( state.token == [ANTLRCommonToken skipToken] ) {
143                continue;
144            }
145			return state.token;
146		}
147		@catch (ANTLRNoViableAltException *nva) {
148			[self reportError:nva];
149			[self recover:nva];
150		}
151		@catch (ANTLRRecognitionException *e) {
152			[self reportError:e];
153		}
154	}
155}
156
157- (void) mTokens
158{   // abstract, defined in generated source as a starting point for matching
159    [self doesNotRecognizeSelector:_cmd];
160}
161
162- (void) skip
163{
164    state.token = [ANTLRCommonToken skipToken];
165}
166
167- (id<ANTLRCharStream>) input
168{
169    return input;
170}
171
172- (void) setInput:(id<ANTLRCharStream>) anInput
173{
174    if ( anInput != input ) {
175        if ( input ) [input release];
176    }
177    input = nil;
178    [self reset];
179    input = anInput;
180    [input retain];
181}
182
183/** Currently does not support multiple emits per nextToken invocation
184 *  for efficiency reasons.  Subclass and override this method and
185 *  nextToken (to push tokens into a list and pull from that list rather
186 *  than a single variable as this implementation does).
187 */
188- (void) emit:(id<ANTLRToken>)aToken
189{
190	state.token = aToken;
191}
192
193/** The standard method called to automatically emit a token at the
194 *  outermost lexical rule.  The token object should point into the
195 *  char buffer start..stop.  If there is a text override in 'text',
196 *  use that to set the token's text.  Override this method to emit
197 *  custom Token objects.
198 *
199 *  If you are building trees, then you should also override
200 *  Parser or TreeParser.getMissingSymbol().
201 */
202- (void) emit
203{
204	id<ANTLRToken> aToken = [ANTLRCommonToken newToken:input
205                                                  Type:state.type
206                                               Channel:state.channel
207                                                 Start:state.tokenStartCharIndex
208                                                  Stop:input.index-1];
209	[aToken setLine:state.tokenStartLine];
210    aToken.text = [self text];
211	[aToken setCharPositionInLine:state.tokenStartCharPositionInLine];
212    [aToken retain];
213	[self emit:aToken];
214	// [aToken release];
215}
216
217// matching
218#pragma mark Matching
219- (void) matchString:(NSString *)aString
220{
221    unichar c;
222	unsigned int i = 0;
223	unsigned int stringLength = [aString length];
224	while ( i < stringLength ) {
225		c = [input LA:1];
226        if ( c != [aString characterAtIndex:i] ) {
227			if ([state getBacktracking] > 0) {
228				state.failed = YES;
229				return;
230			}
231			ANTLRMismatchedTokenException *mte = [ANTLRMismatchedTokenException newExceptionChar:[aString characterAtIndex:i] Stream:input];
232            mte.c = c;
233			[self recover:mte];
234			@throw mte;
235		}
236		i++;
237		[input consume];
238		state.failed = NO;
239	}
240}
241
242- (void) matchAny
243{
244	[input consume];
245}
246
247- (void) matchChar:(unichar) aChar
248{
249	// TODO: -LA: is returning an int because it sometimes is used in the generated parser to compare lookahead with a tokentype.
250	//		 try to change all those occurrences to -LT: if possible (i.e. if ANTLR can be made to generate LA only for lexer code)
251    unichar charLA;
252	charLA = [input LA:1];
253	if ( charLA != aChar) {
254		if ([state getBacktracking] > 0) {
255			state.failed = YES;
256			return;
257		}
258		ANTLRMismatchedTokenException  *mte = [ANTLRMismatchedTokenException newExceptionChar:aChar Stream:input];
259        mte.c = charLA;
260		[self recover:mte];
261		@throw mte;
262	}
263	[input consume];
264	state.failed = NO;
265}
266
267- (void) matchRangeFromChar:(unichar)fromChar to:(unichar)toChar
268{
269	unichar charLA = (unichar)[input LA:1];
270	if ( charLA < fromChar || charLA > toChar ) {
271		if ([state getBacktracking] > 0) {
272			state.failed = YES;
273			return;
274		}
275		ANTLRMismatchedRangeException  *mre = [ANTLRMismatchedRangeException
276					newException:NSMakeRange((NSUInteger)fromChar,(NSUInteger)toChar)
277							   stream:input];
278        mre.c = charLA;
279		[self recover:mre];
280		@throw mre;
281	}
282	[input consume];
283	state.failed = NO;
284}
285
286	// info
287#pragma mark Informational
288
289- (NSUInteger) line
290{
291	return input.line;
292}
293
294- (NSUInteger) charPositionInLine
295{
296	return input.charPositionInLine;
297}
298
299- (NSInteger) index
300{
301    return 0;
302}
303
304- (NSString *) text
305{
306    if (state.text != nil) {
307        return state.text;
308    }
309	return [input substringWithRange:NSMakeRange(state.tokenStartCharIndex, input.index-state.tokenStartCharIndex)];
310}
311
312- (void) setText:(NSString *) theText
313{
314    state.text = theText;
315}
316
317	// error handling
318- (void) reportError:(ANTLRRecognitionException *)e
319{
320    /** TODO: not thought about recovery in lexer yet.
321     *
322     // if we've already reported an error and have not matched a token
323     // yet successfully, don't report any errors.
324     if ( errorRecovery ) {
325     //System.err.print("[SPURIOUS] ");
326     return;
327     }
328     errorRecovery = true;
329     */
330
331    [self displayRecognitionError:[self getTokenNames] Exception:e];
332}
333
334- (NSString *)getErrorMessage:(ANTLRRecognitionException *)e TokenNames:(AMutableArray *)tokenNames
335{
336/*    NSString *msg = [NSString stringWithFormat:@"Gotta fix getErrorMessage in ANTLRLexer.m--%@\n",
337                     e.name];
338 */
339    NSString *msg = nil;
340    if ( [e isKindOfClass:[ANTLRMismatchedTokenException class]] ) {
341        ANTLRMismatchedTokenException *mte = (ANTLRMismatchedTokenException *)e;
342        msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting \"%@\"",
343               [self getCharErrorDisplay:mte.c], [self getCharErrorDisplay:mte.expecting]];
344    }
345    else if ( [e isKindOfClass:[ANTLRNoViableAltException class]] ) {
346        ANTLRNoViableAltException *nvae = (ANTLRNoViableAltException *)e;
347        // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
348        // and "(decision="+nvae.decisionNumber+") and
349        // "state "+nvae.stateNumber
350        msg = [NSString stringWithFormat:@"no viable alternative at character \"%@\"",
351               [self getCharErrorDisplay:(nvae.c)]];
352    }
353    else if ( [e isKindOfClass:[ANTLREarlyExitException class]] ) {
354        ANTLREarlyExitException *eee = (ANTLREarlyExitException *)e;
355        // for development, can add "(decision="+eee.decisionNumber+")"
356        msg = [NSString stringWithFormat:@"required (...)+ loop did not match anything at character \"%@\"",
357               [self getCharErrorDisplay:(eee.c)]];
358    }
359    else if ( [e isKindOfClass:[ANTLRMismatchedNotSetException class]] ) {
360        ANTLRMismatchedNotSetException *mse = (ANTLRMismatchedNotSetException *)e;
361        msg = [NSString stringWithFormat:@"mismatched character \"%@\"  expecting set \"%@\"",
362               [self getCharErrorDisplay:(mse.c)], mse.expecting];
363    }
364    else if ( [e isKindOfClass:[ANTLRMismatchedSetException class]] ) {
365        ANTLRMismatchedSetException *mse = (ANTLRMismatchedSetException *)e;
366        msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"",
367               [self getCharErrorDisplay:(mse.c)], mse.expecting];
368    }
369    else if ( [e isKindOfClass:[ANTLRMismatchedRangeException class]] ) {
370        ANTLRMismatchedRangeException *mre = (ANTLRMismatchedRangeException *)e;
371        msg = [NSString stringWithFormat:@"mismatched character \"%@\" \"%@..%@\"",
372               [self getCharErrorDisplay:(mre.c)], [self getCharErrorDisplay:(mre.range.location)],
373               [self getCharErrorDisplay:(mre.range.location+mre.range.length-1)]];
374    }
375    else {
376        msg = [super getErrorMessage:e TokenNames:[self getTokenNames]];
377    }
378    return msg;
379}
380
381- (NSString *)getCharErrorDisplay:(NSInteger)c
382{
383    NSString *s;
384    switch ( c ) {
385        case ANTLRTokenTypeEOF :
386            s = @"<EOF>";
387            break;
388        case '\n' :
389            s = @"\\n";
390            break;
391        case '\t' :
392            s = @"\\t";
393            break;
394        case '\r' :
395            s = @"\\r";
396            break;
397        default:
398            s = [NSString stringWithFormat:@"%c", (char)c];
399            break;
400    }
401    return s;
402}
403
404/** Lexers can normally match any char in it's vocabulary after matching
405 *  a token, so do the easy thing and just kill a character and hope
406 *  it all works out.  You can instead use the rule invocation stack
407 *  to do sophisticated error recovery if you are in a fragment rule.
408 */
409- (void)recover:(ANTLRRecognitionException *)re
410{
411    //System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
412    //re.printStackTrace();
413    [input consume];
414}
415
416- (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex
417{
418    NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.line, input.charPositionInLine];
419    [super traceIn:ruleName Index:ruleIndex Object:inputSymbol];
420}
421
422- (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex
423{
424    NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.line, input.charPositionInLine];
425    [super traceOut:ruleName Index:ruleIndex Object:inputSymbol];
426}
427
428@end
429