1 /* 2 * [The "BSD licence"] 3 * Copyright (c) 2005-2008 Terence Parr 4 * All rights reserved. 5 * 6 * Conversion to C#: 7 * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 namespace Antlr.Runtime 34 { 35 using ConditionalAttribute = System.Diagnostics.ConditionalAttribute; 36 37 /** <summary> 38 * A lexer is recognizer that draws input symbols from a character stream. 39 * lexer grammars result in a subclass of this object. A Lexer object 40 * uses simplified match() and error recovery mechanisms in the interest 41 * of speed. 42 * </summary> 43 */ 44 public abstract class Lexer : BaseRecognizer, ITokenSource 45 { 46 /** <summary>Where is the lexer drawing characters from?</summary> */ 47 protected ICharStream input; 48 Lexer()49 public Lexer() 50 { 51 } 52 Lexer( ICharStream input )53 public Lexer( ICharStream input ) 54 { 55 this.input = input; 56 } 57 Lexer( ICharStream input, RecognizerSharedState state )58 public Lexer( ICharStream input, RecognizerSharedState state ) 59 : base(state) 60 { 61 this.input = input; 62 } 63 64 #region Properties 65 public string Text 66 { 67 /** <summary>Return the text matched so far for the current token or any text override.</summary> */ 68 get 69 { 70 if ( state.text != null ) 71 { 72 return state.text; 73 } 74 return input.Substring( state.tokenStartCharIndex, CharIndex - state.tokenStartCharIndex ); 75 } 76 /** <summary>Set the complete text of this token; it wipes any previous changes to the text.</summary> */ 77 set 78 { 79 state.text = value; 80 } 81 } 82 public int Line 83 { 84 get 85 { 86 return input.Line; 87 } 88 set 89 { 90 input.Line = value; 91 } 92 } 93 public int CharPositionInLine 94 { 95 get 96 { 97 return input.CharPositionInLine; 98 } 99 set 100 { 101 input.CharPositionInLine = value; 102 } 103 } 104 #endregion 105 Reset()106 public override void Reset() 107 { 108 base.Reset(); // reset all recognizer state variables 109 // wack Lexer state variables 110 if ( input != null ) 111 { 112 input.Seek( 0 ); // rewind the input 113 } 114 if ( state == null ) 115 { 116 return; // no shared state work to do 117 } 118 state.token = null; 119 state.type = TokenTypes.Invalid; 120 state.channel = TokenChannels.Default; 121 state.tokenStartCharIndex = -1; 122 state.tokenStartCharPositionInLine = -1; 123 state.tokenStartLine = -1; 124 state.text = null; 125 } 126 127 /** <summary>Return a token from this source; i.e., match a token on the char stream.</summary> */ NextToken()128 public virtual IToken NextToken() 129 { 130 for ( ; ; ) 131 { 132 state.token = null; 133 state.channel = TokenChannels.Default; 134 state.tokenStartCharIndex = input.Index; 135 state.tokenStartCharPositionInLine = input.CharPositionInLine; 136 state.tokenStartLine = input.Line; 137 state.text = null; 138 if ( input.LA( 1 ) == CharStreamConstants.EndOfFile ) 139 { 140 return GetEndOfFileToken(); 141 } 142 try 143 { 144 ParseNextToken(); 145 if ( state.token == null ) 146 { 147 Emit(); 148 } 149 else if ( state.token == Tokens.Skip ) 150 { 151 continue; 152 } 153 return state.token; 154 } 155 catch (MismatchedRangeException mre) 156 { 157 ReportError(mre); 158 // MatchRange() routine has already called recover() 159 } 160 catch (MismatchedTokenException mte) 161 { 162 ReportError(mte); 163 // Match() routine has already called recover() 164 } 165 catch ( RecognitionException re ) 166 { 167 ReportError( re ); 168 Recover( re ); // throw out current char and try again 169 } 170 } 171 } 172 173 /** Returns the EOF token (default), if you need 174 * to return a custom token instead override this method. 175 */ GetEndOfFileToken()176 public virtual IToken GetEndOfFileToken() 177 { 178 IToken eof = new CommonToken((ICharStream)input, CharStreamConstants.EndOfFile, TokenChannels.Default, input.Index, input.Index); 179 eof.Line = Line; 180 eof.CharPositionInLine = CharPositionInLine; 181 return eof; 182 } 183 184 /** <summary> 185 * Instruct the lexer to skip creating a token for current lexer rule 186 * and look for another token. nextToken() knows to keep looking when 187 * a lexer rule finishes with token set to SKIP_TOKEN. Recall that 188 * if token==null at end of any token rule, it creates one for you 189 * and emits it. 190 * </summary> 191 */ Skip()192 public virtual void Skip() 193 { 194 state.token = Tokens.Skip; 195 } 196 197 /** <summary>This is the lexer entry point that sets instance var 'token'</summary> */ mTokens()198 public abstract void mTokens(); 199 200 public virtual ICharStream CharStream 201 { 202 get 203 { 204 return input; 205 } 206 /** <summary>Set the char stream and reset the lexer</summary> */ 207 set 208 { 209 input = null; 210 Reset(); 211 input = value; 212 } 213 } 214 215 public override string SourceName 216 { 217 get 218 { 219 return input.SourceName; 220 } 221 } 222 223 /** <summary> 224 * Currently does not support multiple emits per nextToken invocation 225 * for efficiency reasons. Subclass and override this method and 226 * nextToken (to push tokens into a list and pull from that list rather 227 * than a single variable as this implementation does). 228 * </summary> 229 */ Emit( IToken token )230 public virtual void Emit( IToken token ) 231 { 232 state.token = token; 233 } 234 235 /** <summary> 236 * The standard method called to automatically emit a token at the 237 * outermost lexical rule. The token object should point into the 238 * char buffer start..stop. If there is a text override in 'text', 239 * use that to set the token's text. Override this method to emit 240 * custom Token objects. 241 * </summary> 242 * 243 * <remarks> 244 * If you are building trees, then you should also override 245 * Parser or TreeParser.getMissingSymbol(). 246 * </remarks> 247 */ Emit()248 public virtual IToken Emit() 249 { 250 IToken t = new CommonToken( input, state.type, state.channel, state.tokenStartCharIndex, CharIndex - 1 ); 251 t.Line = state.tokenStartLine; 252 t.Text = state.text; 253 t.CharPositionInLine = state.tokenStartCharPositionInLine; 254 Emit( t ); 255 return t; 256 } 257 Match( string s )258 public virtual void Match( string s ) 259 { 260 int i = 0; 261 while ( i < s.Length ) 262 { 263 if ( input.LA( 1 ) != s[i] ) 264 { 265 if ( state.backtracking > 0 ) 266 { 267 state.failed = true; 268 return; 269 } 270 MismatchedTokenException mte = new MismatchedTokenException(s[i], input, TokenNames); 271 Recover( mte ); 272 throw mte; 273 } 274 i++; 275 input.Consume(); 276 state.failed = false; 277 } 278 } 279 MatchAny()280 public virtual void MatchAny() 281 { 282 input.Consume(); 283 } 284 Match( int c )285 public virtual void Match( int c ) 286 { 287 if ( input.LA( 1 ) != c ) 288 { 289 if ( state.backtracking > 0 ) 290 { 291 state.failed = true; 292 return; 293 } 294 MismatchedTokenException mte = new MismatchedTokenException(c, input, TokenNames); 295 Recover( mte ); // don't really recover; just consume in lexer 296 throw mte; 297 } 298 input.Consume(); 299 state.failed = false; 300 } 301 MatchRange( int a, int b )302 public virtual void MatchRange( int a, int b ) 303 { 304 if ( input.LA( 1 ) < a || input.LA( 1 ) > b ) 305 { 306 if ( state.backtracking > 0 ) 307 { 308 state.failed = true; 309 return; 310 } 311 MismatchedRangeException mre = new MismatchedRangeException(a, b, input); 312 Recover( mre ); 313 throw mre; 314 } 315 input.Consume(); 316 state.failed = false; 317 } 318 319 /** <summary>What is the index of the current character of lookahead?</summary> */ 320 public virtual int CharIndex 321 { 322 get 323 { 324 return input.Index; 325 } 326 } 327 ReportError( RecognitionException e )328 public override void ReportError( RecognitionException e ) 329 { 330 /** TODO: not thought about recovery in lexer yet. 331 * 332 // if we've already reported an error and have not matched a token 333 // yet successfully, don't report any errors. 334 if ( errorRecovery ) { 335 //System.err.print("[SPURIOUS] "); 336 return; 337 } 338 errorRecovery = true; 339 */ 340 341 DisplayRecognitionError( this.TokenNames, e ); 342 } 343 GetErrorMessage( RecognitionException e, string[] tokenNames )344 public override string GetErrorMessage( RecognitionException e, string[] tokenNames ) 345 { 346 string msg = null; 347 if ( e is MismatchedTokenException ) 348 { 349 MismatchedTokenException mte = (MismatchedTokenException)e; 350 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting " + GetCharErrorDisplay( mte.Expecting ); 351 } 352 else if ( e is NoViableAltException ) 353 { 354 NoViableAltException nvae = (NoViableAltException)e; 355 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 356 // and "(decision="+nvae.decisionNumber+") and 357 // "state "+nvae.stateNumber 358 msg = "no viable alternative at character " + GetCharErrorDisplay( e.Character ); 359 } 360 else if ( e is EarlyExitException ) 361 { 362 EarlyExitException eee = (EarlyExitException)e; 363 // for development, can add "(decision="+eee.decisionNumber+")" 364 msg = "required (...)+ loop did not match anything at character " + GetCharErrorDisplay( e.Character ); 365 } 366 else if ( e is MismatchedNotSetException ) 367 { 368 MismatchedNotSetException mse = (MismatchedNotSetException)e; 369 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting; 370 } 371 else if ( e is MismatchedSetException ) 372 { 373 MismatchedSetException mse = (MismatchedSetException)e; 374 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting; 375 } 376 else if ( e is MismatchedRangeException ) 377 { 378 MismatchedRangeException mre = (MismatchedRangeException)e; 379 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + 380 GetCharErrorDisplay( mre.A ) + ".." + GetCharErrorDisplay( mre.B ); 381 } 382 else 383 { 384 msg = base.GetErrorMessage( e, tokenNames ); 385 } 386 return msg; 387 } 388 GetCharErrorDisplay( int c )389 public virtual string GetCharErrorDisplay( int c ) 390 { 391 string s = ( (char)c ).ToString(); 392 switch ( c ) 393 { 394 case TokenTypes.EndOfFile: 395 s = "<EOF>"; 396 break; 397 case '\n': 398 s = "\\n"; 399 break; 400 case '\t': 401 s = "\\t"; 402 break; 403 case '\r': 404 s = "\\r"; 405 break; 406 } 407 return "'" + s + "'"; 408 } 409 410 /** <summary> 411 * Lexers can normally match any char in it's vocabulary after matching 412 * a token, so do the easy thing and just kill a character and hope 413 * it all works out. You can instead use the rule invocation stack 414 * to do sophisticated error recovery if you are in a fragment rule. 415 * </summary> 416 */ Recover( RecognitionException re )417 public virtual void Recover( RecognitionException re ) 418 { 419 //System.out.println("consuming char "+(char)input.LA(1)+" during recovery"); 420 //re.printStackTrace(); 421 input.Consume(); 422 } 423 424 [Conditional("ANTLR_TRACE")] TraceIn( string ruleName, int ruleIndex )425 public virtual void TraceIn( string ruleName, int ruleIndex ) 426 { 427 string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine; 428 base.TraceIn( ruleName, ruleIndex, inputSymbol ); 429 } 430 431 [Conditional("ANTLR_TRACE")] TraceOut( string ruleName, int ruleIndex )432 public virtual void TraceOut( string ruleName, int ruleIndex ) 433 { 434 string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine; 435 base.TraceOut( ruleName, ruleIndex, inputSymbol ); 436 } 437 ParseNextToken()438 protected virtual void ParseNextToken() 439 { 440 mTokens(); 441 } 442 } 443 } 444