1 /*
2  * [The "BSD license"]
3  * Copyright (c) 2011 Terence Parr
4  * All rights reserved.
5  *
6  * Conversion to C#:
7  * Copyright (c) 2011 Sam Harwell, Pixel Mine, Inc.
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 namespace Antlr.Runtime
34 {
35     public abstract class SlimLexer
36         : BaseRecognizer
37         , ITokenSource<SlimToken>
38     {
39         /** <summary>Where is the lexer drawing characters from?</summary> */
40         protected SlimStringStream input;
41         SlimToken _token;
42         bool _emitted;
43         bool _skip;
44 
SlimLexer()45         public SlimLexer()
46         {
47         }
48 
SlimLexer( ICharStream input )49         public SlimLexer( ICharStream input )
50         {
51             this.input = (SlimStringStream)input;
52         }
53 
SlimLexer( ICharStream input, RecognizerSharedState state )54         public SlimLexer( ICharStream input, RecognizerSharedState state )
55             : base( state )
56         {
57             this.input = (SlimStringStream)input;
58         }
59 
60         #region Properties
61         public string Text
62         {
63             /** <summary>Return the text matched so far for the current token or any text override.</summary> */
64             get
65             {
66                 if ( state.text != null )
67                 {
68                     return state.text;
69                 }
70                 return input.Substring( state.tokenStartCharIndex, CharIndex - state.tokenStartCharIndex );
71             }
72             /** <summary>Set the complete text of this token; it wipes any previous changes to the text.</summary> */
73             set
74             {
75                 state.text = value;
76             }
77         }
78         public int Line
79         {
80             get
81             {
82                 return input.Line;
83             }
84             set
85             {
86                 input.Line = value;
87             }
88         }
89         public int CharPositionInLine
90         {
91             get
92             {
93                 return input.CharPositionInLine;
94             }
95             set
96             {
97                 input.CharPositionInLine = value;
98             }
99         }
100         #endregion
101 
Reset()102         public override void Reset()
103         {
104             base.Reset(); // reset all recognizer state variables
105             // wack Lexer state variables
106             if ( input != null )
107             {
108                 input.Seek( 0 ); // rewind the input
109             }
110             if ( state == null )
111             {
112                 return; // no shared state work to do
113             }
114             _token = default( SlimToken );
115             _emitted = false;
116             _skip = false;
117             //state.token = null;
118             state.type = TokenTypes.Invalid;
119             state.channel = TokenChannels.Default;
120             state.tokenStartCharIndex = -1;
121 #if TRACK_POSITION
122             state.tokenStartCharPositionInLine = -1;
123             state.tokenStartLine = -1;
124 #endif
125             state.text = null;
126         }
127 
128         /** <summary>Return a token from this source; i.e., match a token on the char stream.</summary> */
NextToken()129         public virtual SlimToken NextToken()
130         {
131             for ( ; ; )
132             {
133                 _token = default( SlimToken );
134                 _emitted = false;
135                 _skip = false;
136                 //state.token = null;
137                 state.channel = TokenChannels.Default;
138                 state.tokenStartCharIndex = input.Index;
139 #if TRACK_POSITION
140                 state.tokenStartCharPositionInLine = input.CharPositionInLine;
141                 state.tokenStartLine = input.Line;
142 #endif
143                 state.text = null;
144                 if ( input.LA( 1 ) == CharStreamConstants.EndOfFile )
145                 {
146                     return new SlimToken(TokenTypes.EndOfFile);
147                 }
148                 try
149                 {
150                     mTokens();
151                     if ( _skip )
152                     {
153                         continue;
154                     }
155                     else if ( !_emitted )
156                     {
157                         Emit();
158                     }
159 
160                     return _token;
161                 }
162                 catch ( NoViableAltException nva )
163                 {
164                     ReportError( nva );
165                     Recover( nva ); // throw out current char and try again
166                 }
167                 catch ( RecognitionException re )
168                 {
169                     ReportError( re );
170                     // match() routine has already called recover()
171                 }
172             }
173         }
ITokenSource.NextToken()174         IToken ITokenSource.NextToken()
175         {
176             return NextToken();
177         }
178 
179         /** <summary>
180          *  Instruct the lexer to skip creating a token for current lexer rule
181          *  and look for another token.  nextToken() knows to keep looking when
182          *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
183          *  if token==null at end of any token rule, it creates one for you
184          *  and emits it.
185          *  </summary>
186          */
Skip()187         public virtual void Skip()
188         {
189             _skip = true;
190             //state.token = Tokens.Skip;
191         }
192 
193         /** <summary>This is the lexer entry point that sets instance var 'token'</summary> */
mTokens()194         public abstract void mTokens();
195 
196         public ICharStream CharStream
197         {
198             get
199             {
200                 return input;
201             }
202             /** <summary>Set the char stream and reset the lexer</summary> */
203             set
204             {
205                 input = null;
206                 Reset();
207                 input = (SlimStringStream)value;
208             }
209         }
210 
211         public override string SourceName
212         {
213             get
214             {
215                 return input.SourceName;
216             }
217         }
218 
219         ///** <summary>
220         // *  Currently does not support multiple emits per nextToken invocation
221         // *  for efficiency reasons.  Subclass and override this method and
222         // *  nextToken (to push tokens into a list and pull from that list rather
223         // *  than a single variable as this implementation does).
224         // *  </summary>
225         // */
226         //public void Emit( T token )
227         //{
228         //    _token = token;
229         //}
230 
231         /** <summary>
232          *  The standard method called to automatically emit a token at the
233          *  outermost lexical rule.  The token object should point into the
234          *  char buffer start..stop.  If there is a text override in 'text',
235          *  use that to set the token's text.  Override this method to emit
236          *  custom Token objects.
237          *  </summary>
238          *
239          *  <remarks>
240          *  If you are building trees, then you should also override
241          *  Parser or TreeParser.getMissingSymbol().
242          *  </remarks>
243          */
Emit()244         public void Emit()
245         {
246             _token = new SlimToken()
247             {
248                 //InputStream = input,
249                 Type = state.type,
250                 Channel = state.channel,
251                 //CharPositionInLine = state.tokenStartCharPositionInLine,
252                 //Line = state.tokenStartLine,
253                 //Text = state.text
254             };
255             //Emit( t );
256             //return t;
257 
258             //IToken t = new CommonToken( input, state.type, state.channel, state.tokenStartCharIndex, CharIndex - 1 );
259             //t.Line = state.tokenStartLine;
260             //t.Text = state.text;
261             //t.CharPositionInLine = state.tokenStartCharPositionInLine;
262             //Emit( t );
263             //return t;
264         }
265 
Match( string s )266         public void Match( string s )
267         {
268             int i = 0;
269             while ( i < s.Length )
270             {
271                 if ( input.LA( 1 ) != s[i] )
272                 {
273                     if ( state.backtracking > 0 )
274                     {
275                         state.failed = true;
276                         return;
277                     }
278                     MismatchedTokenException mte = new MismatchedTokenException(s[i], input, TokenNames);
279                     Recover( mte );
280                     throw mte;
281                 }
282                 i++;
283                 input.Consume();
284                 state.failed = false;
285             }
286         }
287 
MatchAny()288         public void MatchAny()
289         {
290             input.Consume();
291         }
292 
Match( int c )293         public void Match( int c )
294         {
295             if ( input.LA( 1 ) != c )
296             {
297                 if ( state.backtracking > 0 )
298                 {
299                     state.failed = true;
300                     return;
301                 }
302                 MismatchedTokenException mte = new MismatchedTokenException(c, input, TokenNames);
303                 Recover( mte );  // don't really recover; just consume in lexer
304                 throw mte;
305             }
306             input.Consume();
307             state.failed = false;
308         }
309 
MatchRange( int a, int b )310         public void MatchRange( int a, int b )
311         {
312             if ( input.LA( 1 ) < a || input.LA( 1 ) > b )
313             {
314                 if ( state.backtracking > 0 )
315                 {
316                     state.failed = true;
317                     return;
318                 }
319                 MismatchedRangeException mre =
320                     new MismatchedRangeException( a, b, input );
321                 Recover( mre );
322                 throw mre;
323             }
324             input.Consume();
325             state.failed = false;
326         }
327 
328         /** <summary>What is the index of the current character of lookahead?</summary> */
329         public int CharIndex
330         {
331             get
332             {
333                 return input.Index;
334             }
335         }
336 
ReportError( RecognitionException e )337         public override void ReportError( RecognitionException e )
338         {
339             /** TODO: not thought about recovery in lexer yet.
340              *
341             // if we've already reported an error and have not matched a token
342             // yet successfully, don't report any errors.
343             if ( errorRecovery ) {
344                 //System.err.print("[SPURIOUS] ");
345                 return;
346             }
347             errorRecovery = true;
348              */
349 
350             DisplayRecognitionError( this.TokenNames, e );
351         }
352 
GetErrorMessage( RecognitionException e, string[] tokenNames )353         public override string GetErrorMessage( RecognitionException e, string[] tokenNames )
354         {
355             string msg = null;
356             if ( e is MismatchedTokenException )
357             {
358                 MismatchedTokenException mte = (MismatchedTokenException)e;
359                 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting " + GetCharErrorDisplay( mte.Expecting );
360             }
361             else if ( e is NoViableAltException )
362             {
363                 NoViableAltException nvae = (NoViableAltException)e;
364                 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
365                 // and "(decision="+nvae.decisionNumber+") and
366                 // "state "+nvae.stateNumber
367                 msg = "no viable alternative at character " + GetCharErrorDisplay( e.Character );
368             }
369             else if ( e is EarlyExitException )
370             {
371                 EarlyExitException eee = (EarlyExitException)e;
372                 // for development, can add "(decision="+eee.decisionNumber+")"
373                 msg = "required (...)+ loop did not match anything at character " + GetCharErrorDisplay( e.Character );
374             }
375             else if ( e is MismatchedNotSetException )
376             {
377                 MismatchedNotSetException mse = (MismatchedNotSetException)e;
378                 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting;
379             }
380             else if ( e is MismatchedSetException )
381             {
382                 MismatchedSetException mse = (MismatchedSetException)e;
383                 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting;
384             }
385             else if ( e is MismatchedRangeException )
386             {
387                 MismatchedRangeException mre = (MismatchedRangeException)e;
388                 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " +
389                       GetCharErrorDisplay( mre.A ) + ".." + GetCharErrorDisplay( mre.B );
390             }
391             else
392             {
393                 msg = base.GetErrorMessage( e, tokenNames );
394             }
395             return msg;
396         }
397 
GetCharErrorDisplay( int c )398         public virtual string GetCharErrorDisplay( int c )
399         {
400             string s = ( (char)c ).ToString();
401             switch ( c )
402             {
403             case TokenTypes.EndOfFile:
404                 s = "<EOF>";
405                 break;
406             case '\n':
407                 s = "\\n";
408                 break;
409             case '\t':
410                 s = "\\t";
411                 break;
412             case '\r':
413                 s = "\\r";
414                 break;
415             }
416             return "'" + s + "'";
417         }
418 
419         /** <summary>
420          *  Lexers can normally match any char in it's vocabulary after matching
421          *  a token, so do the easy thing and just kill a character and hope
422          *  it all works out.  You can instead use the rule invocation stack
423          *  to do sophisticated error recovery if you are in a fragment rule.
424          *  </summary>
425          */
Recover( RecognitionException re )426         public virtual void Recover( RecognitionException re )
427         {
428             //System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
429             //re.printStackTrace();
430             input.Consume();
431         }
432 
TraceIn( string ruleName, int ruleIndex )433         public virtual void TraceIn( string ruleName, int ruleIndex )
434         {
435             string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine;
436             base.TraceIn( ruleName, ruleIndex, inputSymbol );
437         }
438 
TraceOut( string ruleName, int ruleIndex )439         public virtual void TraceOut( string ruleName, int ruleIndex )
440         {
441             string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine;
442             base.TraceOut( ruleName, ruleIndex, inputSymbol );
443         }
444     }
445 }
446