1 /** \file
2  *
3  * Base implementation of an antlr 3 lexer.
4  *
5  * An ANTLR3 lexer implements a base recongizer, a token source and
6  * a lexer interface. It constructs a base recognizer with default
7  * functions, then overrides any of these that are parser specific (usual
8  * default implementation of base recognizer.
9  */
10 
11 // [The "BSD licence"]
12 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13 // http://www.temporal-wave.com
14 // http://www.linkedin.com/in/jimidle
15 //
16 // All rights reserved.
17 //
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions
20 // are met:
21 // 1. Redistributions of source code must retain the above copyright
22 //    notice, this list of conditions and the following disclaimer.
23 // 2. Redistributions in binary form must reproduce the above copyright
24 //    notice, this list of conditions and the following disclaimer in the
25 //    documentation and/or other materials provided with the distribution.
26 // 3. The name of the author may not be used to endorse or promote products
27 //    derived from this software without specific prior written permission.
28 //
29 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 
40 #include    <antlr3lexer.h>
41 
42 static void					mTokens						(pANTLR3_LEXER lexer);
43 static void					setCharStream				(pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input);
44 static void					pushCharStream				(pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input);
45 static void					popCharStream				(pANTLR3_LEXER lexer);
46 
47 static void					emitNew						(pANTLR3_LEXER lexer,  pANTLR3_COMMON_TOKEN token);
48 static pANTLR3_COMMON_TOKEN emit						(pANTLR3_LEXER lexer);
49 static ANTLR3_BOOLEAN	    matchs						(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string);
50 static ANTLR3_BOOLEAN	    matchc						(pANTLR3_LEXER lexer, ANTLR3_UCHAR c);
51 static ANTLR3_BOOLEAN	    matchRange					(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high);
52 static void					matchAny					(pANTLR3_LEXER lexer);
53 static void					recover						(pANTLR3_LEXER lexer);
54 static ANTLR3_UINT32	    getLine						(pANTLR3_LEXER lexer);
55 static ANTLR3_MARKER	    getCharIndex				(pANTLR3_LEXER lexer);
56 static ANTLR3_UINT32	    getCharPositionInLine		(pANTLR3_LEXER lexer);
57 static pANTLR3_STRING	    getText						(pANTLR3_LEXER lexer);
58 static pANTLR3_COMMON_TOKEN nextToken					(pANTLR3_TOKEN_SOURCE toksource);
59 
60 static void					displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 * tokenNames);
61 static void					reportError					(pANTLR3_BASE_RECOGNIZER rec);
62 static void *				getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
63 static void *				getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
64 															ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
65 
66 static void					reset						(pANTLR3_BASE_RECOGNIZER rec);
67 
68 static void					freeLexer					(pANTLR3_LEXER lexer);
69 
70 
71 ANTLR3_API pANTLR3_LEXER
antlr3LexerNew(ANTLR3_UINT32 sizeHint,pANTLR3_RECOGNIZER_SHARED_STATE state)72 antlr3LexerNew(ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
73 {
74     pANTLR3_LEXER   lexer;
75     pANTLR3_COMMON_TOKEN	specialT;
76 
77 	/* Allocate memory
78 	*/
79 	lexer   = (pANTLR3_LEXER) ANTLR3_MALLOC(sizeof(ANTLR3_LEXER));
80 
81 	if	(lexer == NULL)
82 	{
83 		return	NULL;
84 	}
85 
86 	/* Now we need to create the base recognizer
87 	*/
88 	lexer->rec	    =  antlr3BaseRecognizerNew(ANTLR3_TYPE_LEXER, sizeHint, state);
89 
90 	if	(lexer->rec == NULL)
91 	{
92 		lexer->free(lexer);
93 		return	NULL;
94 	}
95 	lexer->rec->super  =  lexer;
96 
97 	lexer->rec->displayRecognitionError	    = displayRecognitionError;
98 	lexer->rec->reportError					= reportError;
99 	lexer->rec->reset						= reset;
100 	lexer->rec->getCurrentInputSymbol		= getCurrentInputSymbol;
101 	lexer->rec->getMissingSymbol			= getMissingSymbol;
102 
103 	/* Now install the token source interface
104 	*/
105 	if	(lexer->rec->state->tokSource == NULL)
106 	{
107 		lexer->rec->state->tokSource	= (pANTLR3_TOKEN_SOURCE)ANTLR3_CALLOC(1, sizeof(ANTLR3_TOKEN_SOURCE));
108 
109 		if	(lexer->rec->state->tokSource == NULL)
110 		{
111 			lexer->rec->free(lexer->rec);
112 			lexer->free(lexer);
113 
114 			return	NULL;
115 		}
116 		lexer->rec->state->tokSource->super    =  lexer;
117 
118 		/* Install the default nextToken() method, which may be overridden
119 		 * by generated code, or by anything else in fact.
120 		 */
121 		lexer->rec->state->tokSource->nextToken	    =  nextToken;
122 		lexer->rec->state->tokSource->strFactory    = NULL;
123 
124 		lexer->rec->state->tokFactory				= NULL;
125 	}
126 
127     /* Install the lexer API
128      */
129     lexer->setCharStream			=  setCharStream;
130     lexer->mTokens					= (void (*)(void *))(mTokens);
131     lexer->setCharStream			=  setCharStream;
132     lexer->pushCharStream			=  pushCharStream;
133     lexer->popCharStream			=  popCharStream;
134     lexer->emit						=  emit;
135     lexer->emitNew					=  emitNew;
136     lexer->matchs					=  matchs;
137     lexer->matchc					=  matchc;
138     lexer->matchRange				=  matchRange;
139     lexer->matchAny					=  matchAny;
140     lexer->recover					=  recover;
141     lexer->getLine					=  getLine;
142     lexer->getCharIndex				=  getCharIndex;
143     lexer->getCharPositionInLine    =  getCharPositionInLine;
144     lexer->getText					=  getText;
145     lexer->free						=  freeLexer;
146 
147     /* Initialise the eof token
148      */
149     specialT					= &(lexer->rec->state->tokSource->eofToken);
150     antlr3SetTokenAPI	  (specialT);
151     specialT->setType	  (specialT, ANTLR3_TOKEN_EOF);
152     specialT->factoryMade		= ANTLR3_TRUE;					// Prevent things trying to free() it
153     specialT->strFactory        = NULL;
154 	specialT->textState			= ANTLR3_TEXT_NONE;
155 	specialT->custom			= NULL;
156 	specialT->user1				= 0;
157 	specialT->user2				= 0;
158 	specialT->user3				= 0;
159 
160 	// Initialize the skip token.
161 	//
162     specialT					= &(lexer->rec->state->tokSource->skipToken);
163     antlr3SetTokenAPI	  (specialT);
164     specialT->setType	  (specialT, ANTLR3_TOKEN_INVALID);
165     specialT->factoryMade		= ANTLR3_TRUE;					// Prevent things trying to free() it
166     specialT->strFactory        = NULL;
167 	specialT->custom			= NULL;
168 	specialT->user1				= 0;
169 	specialT->user2				= 0;
170 	specialT->user3				= 0;
171     return  lexer;
172 }
173 
174 static void
reset(pANTLR3_BASE_RECOGNIZER rec)175 reset	(pANTLR3_BASE_RECOGNIZER rec)
176 {
177     pANTLR3_LEXER   lexer;
178 
179     lexer   = (pANTLR3_LEXER)rec->super;
180 
181     lexer->rec->state->token			    = NULL;
182     lexer->rec->state->type			    = ANTLR3_TOKEN_INVALID;
183     lexer->rec->state->channel			    = ANTLR3_TOKEN_DEFAULT_CHANNEL;
184     lexer->rec->state->tokenStartCharIndex	    = -1;
185     lexer->rec->state->tokenStartCharPositionInLine = -1;
186     lexer->rec->state->tokenStartLine		    = -1;
187 
188     lexer->rec->state->text	                    = NULL;
189 
190     // OK - that's all hunky dory, but we may well have had
191     // a token factory that needs a reset. Do that here
192     //
193     if  (lexer->rec->state->tokFactory != NULL)
194     {
195         lexer->rec->state->tokFactory->reset(lexer->rec->state->tokFactory);
196     }
197 }
198 
199 ///
200 /// \brief
201 /// Returns the next available token from the current input stream.
202 ///
203 /// \param toksource
204 /// Points to the implementation of a token source. The lexer is
205 /// addressed by the super structure pointer.
206 ///
207 /// \returns
208 /// The next token in the current input stream or the EOF token
209 /// if there are no more tokens.
210 ///
211 /// \remarks
212 /// Write remarks for nextToken here.
213 ///
214 /// \see nextToken
215 ///
216 ANTLR3_INLINE static pANTLR3_COMMON_TOKEN
nextTokenStr(pANTLR3_TOKEN_SOURCE toksource)217 nextTokenStr	    (pANTLR3_TOKEN_SOURCE toksource)
218 {
219     pANTLR3_LEXER                   lexer;
220     pANTLR3_RECOGNIZER_SHARED_STATE state;
221     pANTLR3_INPUT_STREAM            input;
222     pANTLR3_INT_STREAM              istream;
223 
224     lexer   = (pANTLR3_LEXER)(toksource->super);
225     state   = lexer->rec->state;
226     input   = lexer->input;
227     istream = input->istream;
228 
229     /// Loop until we get a non skipped token or EOF
230     ///
231     for	(;;)
232     {
233         // Get rid of any previous token (token factory takes care of
234         // any de-allocation when this token is finally used up.
235         //
236         state->token		    = NULL;
237         state->error		    = ANTLR3_FALSE;	    // Start out without an exception
238         state->failed		    = ANTLR3_FALSE;
239 
240         // Now call the matching rules and see if we can generate a new token
241         //
242         for	(;;)
243         {
244             // Record the start of the token in our input stream.
245             //
246             state->channel			    = ANTLR3_TOKEN_DEFAULT_CHANNEL;
247             state->tokenStartCharIndex	            = (ANTLR3_MARKER)(((pANTLR3_UINT8)input->nextChar));
248             state->tokenStartCharPositionInLine     = input->charPositionInLine;
249             state->tokenStartLine		    = input->line;
250             state->text			            = NULL;
251             state->custom                           = NULL;
252             state->user1                            = 0;
253             state->user2                            = 0;
254             state->user3                            = 0;
255 
256             if  (istream->_LA(istream, 1) == ANTLR3_CHARSTREAM_EOF)
257             {
258                 // Reached the end of the current stream, nothing more to do if this is
259                 // the last in the stack.
260                 //
261                 pANTLR3_COMMON_TOKEN    teof = &(toksource->eofToken);
262 
263                 teof->setStartIndex (teof, lexer->getCharIndex(lexer));
264                 teof->setStopIndex  (teof, lexer->getCharIndex(lexer));
265                 teof->setLine	    (teof, lexer->getLine(lexer));
266                 teof->factoryMade = ANTLR3_TRUE;	// This isn't really manufactured but it stops things from trying to free it
267                 return  teof;
268             }
269 
270             state->token		= NULL;
271             state->error		= ANTLR3_FALSE;	    // Start out without an exception
272             state->failed		= ANTLR3_FALSE;
273 
274             // Call the generated lexer, see if it can get a new token together.
275             //
276             lexer->mTokens(lexer->ctx);
277 
278             if  (state->error  == ANTLR3_TRUE)
279             {
280                 // Recognition exception, report it and try to recover.
281                 //
282                 state->failed	    = ANTLR3_TRUE;
283                 lexer->rec->reportError(lexer->rec);
284                 lexer->recover(lexer);
285             }
286             else
287             {
288                 if (state->token == NULL)
289                 {
290                     // Emit the real token, which adds it in to the token stream basically
291                     //
292                     emit(lexer);
293                 }
294                 else if	(state->token ==  &(toksource->skipToken))
295                 {
296                     // A real token could have been generated, but "Computer say's naaaaah" and it
297                     // it is just something we need to skip altogether.
298                     //
299                     continue;
300                 }
301 
302                 // Good token, not skipped, not EOF token
303                 //
304                 return  state->token;
305             }
306         }
307     }
308 }
309 
310 /**
311  * \brief
312  * Default implementation of the nextToken() call for a lexer.
313  *
314  * \param toksource
315  * Points to the implementation of a token source. The lexer is
316  * addressed by the super structure pointer.
317  *
318  * \returns
319  * The next token in the current input stream or the EOF token
320  * if there are no more tokens in any input stream in the stack.
321  *
322  * Write detailed description for nextToken here.
323  *
324  * \remarks
325  * Write remarks for nextToken here.
326  *
327  * \see nextTokenStr
328  */
329 static pANTLR3_COMMON_TOKEN
nextToken(pANTLR3_TOKEN_SOURCE toksource)330 nextToken	    (pANTLR3_TOKEN_SOURCE toksource)
331 {
332 	pANTLR3_COMMON_TOKEN tok;
333 
334 	// Find the next token in the current stream
335 	//
336 	tok = nextTokenStr(toksource);
337 
338 	// If we got to the EOF token then switch to the previous
339 	// input stream if there were any and just return the
340 	// EOF if there are none. We must check the next token
341 	// in any outstanding input stream we pop into the active
342 	// role to see if it was sitting at EOF after PUSHing the
343 	// stream we just consumed, otherwise we will return EOF
344 	// on the reinstalled input stream, when in actual fact
345 	// there might be more input streams to POP before the
346 	// real EOF of the whole logical input stream. Hence we
347 	// use a while loop here until we find something in the stream
348 	// that isn't EOF or we reach the actual end of the last input
349 	// stream on the stack.
350 	//
351 	while	((tok != NULL) && (tok->type == ANTLR3_TOKEN_EOF))
352 	{
353 		pANTLR3_LEXER   lexer;
354 
355 		lexer   = (pANTLR3_LEXER)(toksource->super);
356 
357 		if  (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
358 		{
359 			// We have another input stream in the stack so we
360 			// need to revert to it, then resume the loop to check
361 			// it wasn't sitting at EOF itself.
362 			//
363 			lexer->popCharStream(lexer);
364 			tok = nextTokenStr(toksource);
365 		}
366 		else
367 		{
368 			// There were no more streams on the input stack
369 			// so this EOF is the 'real' logical EOF for
370 			// the input stream. So we just exit the loop and
371 			// return the EOF we have found.
372 			//
373 			break;
374 		}
375 
376 	}
377 
378 	// return whatever token we have, which may be EOF
379 	//
380 	return  tok;
381 }
382 
383 ANTLR3_API pANTLR3_LEXER
antlr3LexerNewStream(ANTLR3_UINT32 sizeHint,pANTLR3_INPUT_STREAM input,pANTLR3_RECOGNIZER_SHARED_STATE state)384 antlr3LexerNewStream(ANTLR3_UINT32 sizeHint, pANTLR3_INPUT_STREAM input, pANTLR3_RECOGNIZER_SHARED_STATE state)
385 {
386     pANTLR3_LEXER   lexer;
387 
388     // Create a basic lexer first
389     //
390     lexer   = antlr3LexerNew(sizeHint, state);
391 
392     if	(lexer != NULL)
393     {
394 		// Install the input stream and reset the lexer
395 		//
396 		setCharStream(lexer, input);
397     }
398 
399     return  lexer;
400 }
401 
mTokens(pANTLR3_LEXER lexer)402 static void mTokens	    (pANTLR3_LEXER lexer)
403 {
404     if	(lexer)	    // Fool compiler, avoid pragmas
405     {
406 		ANTLR3_FPRINTF(stderr, "lexer->mTokens(): Error: No lexer rules were added to the lexer yet!\n");
407     }
408 }
409 
410 static void
reportError(pANTLR3_BASE_RECOGNIZER rec)411 reportError		    (pANTLR3_BASE_RECOGNIZER rec)
412 {
413     // Indicate this recognizer had an error while processing.
414 	//
415 	rec->state->errorCount++;
416 
417     rec->displayRecognitionError(rec, rec->state->tokenNames);
418 }
419 
420 #ifdef	ANTLR3_WINDOWS
421 #pragma warning( disable : 4100 )
422 #endif
423 
424 /** Default lexer error handler (works for 8 bit streams only!!!)
425  */
426 static void
displayRecognitionError(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_UINT8 * tokenNames)427 displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
428 {
429     pANTLR3_LEXER			lexer;
430 	pANTLR3_EXCEPTION	    ex;
431 	pANTLR3_STRING			ftext;
432 
433     lexer   = (pANTLR3_LEXER)(recognizer->super);
434 	ex		= lexer->rec->state->exception;
435 
436 	// See if there is a 'filename' we can use
437     //
438     if	(ex->name == NULL)
439     {
440 		ANTLR3_FPRINTF(stderr, "-unknown source-(");
441     }
442     else
443     {
444 		ftext = ex->streamName->to8(ex->streamName);
445 		ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
446     }
447 
448     ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
449     ANTLR3_FPRINTF(stderr, ": lexer error %d :\n\t%s at offset %d, ",
450 						ex->type,
451 						(pANTLR3_UINT8)	   (ex->message),
452 					    ex->charPositionInLine+1
453 		    );
454 	{
455 		ANTLR3_INT32	width;
456 
457 		width	= ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)(lexer->input->data) + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index));
458 
459 		if	(width >= 1)
460 		{
461 			if	(isprint(ex->c))
462 			{
463 				ANTLR3_FPRINTF(stderr, "near '%c' :\n", ex->c);
464 			}
465 			else
466 			{
467 				ANTLR3_FPRINTF(stderr, "near char(%#02X) :\n", (ANTLR3_UINT8)(ex->c));
468 			}
469 			ANTLR3_FPRINTF(stderr, "\t%.*s\n", width > 20 ? 20 : width ,((pANTLR3_UINT8)ex->index));
470 		}
471 		else
472 		{
473 			ANTLR3_FPRINTF(stderr, "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n");
474 			ANTLR3_FPRINTF(stderr, "\t The lexer was matching from line %d, offset %d, which\n\t ",
475 								(ANTLR3_UINT32)(lexer->rec->state->tokenStartLine),
476 								(ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine)
477 								);
478 			width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)(lexer->input->data)+(lexer->input->size(lexer->input))) - (pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
479 
480 			if	(width >= 1)
481 			{
482 				ANTLR3_FPRINTF(stderr, "looks like this:\n\t\t%.*s\n", width > 20 ? 20 : width ,(pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
483 			}
484 			else
485 			{
486 				ANTLR3_FPRINTF(stderr, "is also the end of the line, so you must check your lexer rules\n");
487 			}
488 		}
489 	}
490 }
491 
setCharStream(pANTLR3_LEXER lexer,pANTLR3_INPUT_STREAM input)492 static void setCharStream   (pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input)
493 {
494     /* Install the input interface
495      */
496     lexer->input	= input;
497 
498     /* We may need a token factory for the lexer; we don't destroy any existing factory
499      * until the lexer is destroyed, as people may still be using the tokens it produced.
500      * TODO: Later I will provide a dup() method for a token so that it can extract itself
501      * out of the factory.
502      */
503     if	(lexer->rec->state->tokFactory == NULL)
504     {
505 	lexer->rec->state->tokFactory	= antlr3TokenFactoryNew(input);
506     }
507     else
508     {
509 	/* When the input stream is being changed on the fly, rather than
510 	 * at the start of a new lexer, then we must tell the tokenFactory
511 	 * which input stream to adorn the tokens with so that when they
512 	 * are asked to provide their original input strings they can
513 	 * do so from the correct text stream.
514 	 */
515 	lexer->rec->state->tokFactory->setInputStream(lexer->rec->state->tokFactory, input);
516     }
517 
518     /* Propagate the string factory so that we preserve the encoding form from
519      * the input stream.
520      */
521     if	(lexer->rec->state->tokSource->strFactory == NULL)
522     {
523         lexer->rec->state->tokSource->strFactory	= input->strFactory;
524 
525         // Set the newly acquired string factory up for our pre-made tokens
526         // for EOF.
527         //
528         if (lexer->rec->state->tokSource->eofToken.strFactory == NULL)
529         {
530             lexer->rec->state->tokSource->eofToken.strFactory = input->strFactory;
531         }
532     }
533 
534     /* This is a lexer, install the appropriate exception creator
535      */
536     lexer->rec->exConstruct = antlr3RecognitionExceptionNew;
537 
538     /* Set the current token to nothing
539      */
540     lexer->rec->state->token		= NULL;
541     lexer->rec->state->text			= NULL;
542     lexer->rec->state->tokenStartCharIndex	= -1;
543 
544     /* Copy the name of the char stream to the token source
545      */
546     lexer->rec->state->tokSource->fileName = input->fileName;
547 }
548 
549 /*!
550  * \brief
551  * Change to a new input stream, remembering the old one.
552  *
553  * \param lexer
554  * Pointer to the lexer instance to switch input streams for.
555  *
556  * \param input
557  * New input stream to install as the current one.
558  *
559  * Switches the current character input stream to
560  * a new one, saving the old one, which we will revert to at the end of this
561  * new one.
562  */
563 static void
pushCharStream(pANTLR3_LEXER lexer,pANTLR3_INPUT_STREAM input)564 pushCharStream  (pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input)
565 {
566 	// Do we need a new input stream stack?
567 	//
568 	if	(lexer->rec->state->streams == NULL)
569 	{
570 		// This is the first call to stack a new
571 		// stream and so we must create the stack first.
572 		//
573 		lexer->rec->state->streams = antlr3StackNew(0);
574 
575 		if  (lexer->rec->state->streams == NULL)
576 		{
577 			// Could not do this, we just fail to push it.
578 			// TODO: Consider if this is what we want to do, but then
579 			//       any programmer can override this method to do something else.
580 			return;
581 		}
582 	}
583 
584 	// We have a stack, so we can save the current input stream
585 	// into it.
586 	//
587 	lexer->input->istream->mark(lexer->input->istream);
588 	lexer->rec->state->streams->push(lexer->rec->state->streams, lexer->input, NULL);
589 
590 	// And now we can install this new one
591 	//
592 	lexer->setCharStream(lexer, input);
593 }
594 
595 /*!
596  * \brief
597  * Stops using the current input stream and reverts to any prior
598  * input stream on the stack.
599  *
600  * \param lexer
601  * Description of parameter lexer.
602  *
603  * Pointer to a function that abandons the current input stream, whether it
604  * is empty or not and reverts to the previous stacked input stream.
605  *
606  * \remark
607  * The function fails silently if there are no prior input streams.
608  */
609 static void
popCharStream(pANTLR3_LEXER lexer)610 popCharStream   (pANTLR3_LEXER lexer)
611 {
612     pANTLR3_INPUT_STREAM input;
613 
614     // If we do not have a stream stack or we are already at the
615     // stack bottom, then do nothing.
616     //
617     if	(lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
618     {
619 	// We just leave the current stream to its fate, we do not close
620 	// it or anything as we do not know what the programmer intended
621 	// for it. This method can always be overridden of course.
622 	// So just find out what was currently saved on the stack and use
623 	// that now, then pop it from the stack.
624 	//
625 	input	= (pANTLR3_INPUT_STREAM)(lexer->rec->state->streams->top);
626 	lexer->rec->state->streams->pop(lexer->rec->state->streams);
627 
628 	// Now install the stream as the current one.
629 	//
630 	lexer->setCharStream(lexer, input);
631 	lexer->input->istream->rewindLast(lexer->input->istream);
632     }
633     return;
634 }
635 
emitNew(pANTLR3_LEXER lexer,pANTLR3_COMMON_TOKEN token)636 static void emitNew	    (pANTLR3_LEXER lexer,  pANTLR3_COMMON_TOKEN token)
637 {
638     lexer->rec->state->token    = token;	/* Voila!   */
639 }
640 
641 static pANTLR3_COMMON_TOKEN
emit(pANTLR3_LEXER lexer)642 emit	    (pANTLR3_LEXER lexer)
643 {
644     pANTLR3_COMMON_TOKEN	token;
645 
646     /* We could check pointers to token factories and so on, but
647     * we are in code that we want to run as fast as possible
648     * so we are not checking any errors. So make sure you have installed an input stream before
649     * trying to emit a new token.
650     */
651     token   = lexer->rec->state->tokFactory->newToken(lexer->rec->state->tokFactory);
652 	if (token == NULL) { return NULL; }
653 
654     /* Install the supplied information, and some other bits we already know
655     * get added automatically, such as the input stream it is associated with
656     * (though it can all be overridden of course)
657     */
658     token->type		    = lexer->rec->state->type;
659     token->channel	    = lexer->rec->state->channel;
660     token->start	    = lexer->rec->state->tokenStartCharIndex;
661     token->stop		    = lexer->getCharIndex(lexer) - 1;
662     token->line		    = lexer->rec->state->tokenStartLine;
663     token->charPosition	= lexer->rec->state->tokenStartCharPositionInLine;
664 
665     if	(lexer->rec->state->text != NULL)
666     {
667         token->textState	    = ANTLR3_TEXT_STRING;
668         token->tokText.text	    = lexer->rec->state->text;
669     }
670     else
671     {
672         token->textState	= ANTLR3_TEXT_NONE;
673     }
674     token->lineStart	= lexer->input->currentLine;
675     token->user1	= lexer->rec->state->user1;
676     token->user2	= lexer->rec->state->user2;
677     token->user3	= lexer->rec->state->user3;
678     token->custom	= lexer->rec->state->custom;
679 
680     lexer->rec->state->token	    = token;
681 
682     return  token;
683 }
684 
685 /**
686  * Free the resources allocated by a lexer
687  */
688 static void
freeLexer(pANTLR3_LEXER lexer)689 freeLexer    (pANTLR3_LEXER lexer)
690 {
691 	// This may have ben a delegate or delegator lexer, in which case the
692 	// state may already have been freed (and set to NULL therefore)
693 	// so we ignore the state if we don't have it.
694 	//
695 	if	(lexer->rec->state != NULL)
696 	{
697 		if	(lexer->rec->state->streams != NULL)
698 		{
699 			lexer->rec->state->streams->free(lexer->rec->state->streams);
700 		}
701 		if	(lexer->rec->state->tokFactory != NULL)
702 		{
703 			lexer->rec->state->tokFactory->close(lexer->rec->state->tokFactory);
704 			lexer->rec->state->tokFactory = NULL;
705 		}
706 		if	(lexer->rec->state->tokSource != NULL)
707 		{
708 			ANTLR3_FREE(lexer->rec->state->tokSource);
709 			lexer->rec->state->tokSource = NULL;
710 		}
711 	}
712 	if	(lexer->rec != NULL)
713 	{
714 		lexer->rec->free(lexer->rec);
715 		lexer->rec = NULL;
716 	}
717 	ANTLR3_FREE(lexer);
718 }
719 
720 /** Implementation of matchs for the lexer, overrides any
721  *  base implementation in the base recognizer.
722  *
723  *  \remark
724  *  Note that the generated code lays down arrays of ints for constant
725  *  strings so that they are int UTF32 form!
726  */
727 static ANTLR3_BOOLEAN
matchs(pANTLR3_LEXER lexer,ANTLR3_UCHAR * string)728 matchs(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string)
729 {
730 	while   (*string != ANTLR3_STRING_TERMINATOR)
731 	{
732 		if  (lexer->input->istream->_LA(lexer->input->istream, 1) != (*string))
733 		{
734 			if	(lexer->rec->state->backtracking > 0)
735 			{
736 				lexer->rec->state->failed = ANTLR3_TRUE;
737 				return ANTLR3_FALSE;
738 			}
739 
740 			lexer->rec->exConstruct(lexer->rec);
741 			lexer->rec->state->failed	 = ANTLR3_TRUE;
742 
743 			/* TODO: Implement exception creation more fully perhaps
744 			 */
745 			lexer->recover(lexer);
746 			return  ANTLR3_FALSE;
747 		}
748 
749 		/* Matched correctly, do consume it
750 		 */
751 		lexer->input->istream->consume(lexer->input->istream);
752 		string++;
753 
754 		/* Reset any failed indicator
755 		 */
756 		lexer->rec->state->failed = ANTLR3_FALSE;
757 	}
758 
759 
760 	return  ANTLR3_TRUE;
761 }
762 
763 /** Implementation of matchc for the lexer, overrides any
764  *  base implementation in the base recognizer.
765  *
766  *  \remark
767  *  Note that the generated code lays down arrays of ints for constant
768  *  strings so that they are int UTF32 form!
769  */
770 static ANTLR3_BOOLEAN
matchc(pANTLR3_LEXER lexer,ANTLR3_UCHAR c)771 matchc(pANTLR3_LEXER lexer, ANTLR3_UCHAR c)
772 {
773 	if	(lexer->input->istream->_LA(lexer->input->istream, 1) == c)
774 	{
775 		/* Matched correctly, do consume it
776 		 */
777 		lexer->input->istream->consume(lexer->input->istream);
778 
779 		/* Reset any failed indicator
780 		 */
781 		lexer->rec->state->failed = ANTLR3_FALSE;
782 
783 		return	ANTLR3_TRUE;
784 	}
785 
786 	/* Failed to match, exception and recovery time.
787 	 */
788 	if	(lexer->rec->state->backtracking > 0)
789 	{
790 		lexer->rec->state->failed  = ANTLR3_TRUE;
791 		return	ANTLR3_FALSE;
792 	}
793 
794 	lexer->rec->exConstruct(lexer->rec);
795 
796 	/* TODO: Implement exception creation more fully perhaps
797 	 */
798 	lexer->recover(lexer);
799 
800 	return  ANTLR3_FALSE;
801 }
802 
803 /** Implementation of match range for the lexer, overrides any
804  *  base implementation in the base recognizer.
805  *
806  *  \remark
807  *  Note that the generated code lays down arrays of ints for constant
808  *  strings so that they are int UTF32 form!
809  */
810 static ANTLR3_BOOLEAN
matchRange(pANTLR3_LEXER lexer,ANTLR3_UCHAR low,ANTLR3_UCHAR high)811 matchRange(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high)
812 {
813     ANTLR3_UCHAR    c;
814 
815     /* What is in the stream at the moment?
816      */
817     c	= lexer->input->istream->_LA(lexer->input->istream, 1);
818     if	( c >= low && c <= high)
819     {
820 	/* Matched correctly, consume it
821 	 */
822 	lexer->input->istream->consume(lexer->input->istream);
823 
824 	/* Reset any failed indicator
825 	 */
826 	lexer->rec->state->failed = ANTLR3_FALSE;
827 
828 	return	ANTLR3_TRUE;
829     }
830 
831     /* Failed to match, execption and recovery time.
832      */
833 
834     if	(lexer->rec->state->backtracking > 0)
835     {
836 	lexer->rec->state->failed  = ANTLR3_TRUE;
837 	return	ANTLR3_FALSE;
838     }
839 
840     lexer->rec->exConstruct(lexer->rec);
841 
842     /* TODO: Implement exception creation more fully
843      */
844     lexer->recover(lexer);
845 
846     return  ANTLR3_FALSE;
847 }
848 
849 static void
matchAny(pANTLR3_LEXER lexer)850 matchAny	    (pANTLR3_LEXER lexer)
851 {
852     lexer->input->istream->consume(lexer->input->istream);
853 }
854 
855 static void
recover(pANTLR3_LEXER lexer)856 recover	    (pANTLR3_LEXER lexer)
857 {
858     lexer->input->istream->consume(lexer->input->istream);
859 }
860 
861 static ANTLR3_UINT32
getLine(pANTLR3_LEXER lexer)862 getLine	    (pANTLR3_LEXER lexer)
863 {
864     return  lexer->input->getLine(lexer->input);
865 }
866 
867 static ANTLR3_UINT32
getCharPositionInLine(pANTLR3_LEXER lexer)868 getCharPositionInLine	(pANTLR3_LEXER lexer)
869 {
870     return  lexer->input->charPositionInLine;
871 }
872 
getCharIndex(pANTLR3_LEXER lexer)873 static ANTLR3_MARKER	getCharIndex	    (pANTLR3_LEXER lexer)
874 {
875     return lexer->input->istream->index(lexer->input->istream);
876 }
877 
878 static pANTLR3_STRING
getText(pANTLR3_LEXER lexer)879 getText	    (pANTLR3_LEXER lexer)
880 {
881 	if (lexer->rec->state->text)
882 	{
883 		return	lexer->rec->state->text;
884 
885 	}
886 	return  lexer->input->substr(
887 									lexer->input,
888 									lexer->rec->state->tokenStartCharIndex,
889 									lexer->getCharIndex(lexer) - lexer->input->charByteSize
890 							);
891 
892 }
893 
894 static void *
getCurrentInputSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream)895 getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
896 {
897 	return NULL;
898 }
899 
900 static void *
getMissingSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream,pANTLR3_EXCEPTION e,ANTLR3_UINT32 expectedTokenType,pANTLR3_BITSET_LIST follow)901 getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
902 									ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
903 {
904 	return NULL;
905 }
906