1 /** \file
2  * Contains the base functions that all recognizers require.
3  * Any function can be overridden by a lexer/parser/tree parser or by the
4  * ANTLR3 programmer.
5  *
6  * \addtogroup pANTLR3_BASE_RECOGNIZER
7  * @{
8  */
9 #include    <antlr3baserecognizer.h>
10 
11 // [The "BSD licence"]
12 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13 // http://www.temporal-wave.com
14 // http://www.linkedin.com/in/jimidle
15 //
16 // All rights reserved.
17 //
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions
20 // are met:
21 // 1. Redistributions of source code must retain the above copyright
22 //    notice, this list of conditions and the following disclaimer.
23 // 2. Redistributions in binary form must reproduce the above copyright
24 //    notice, this list of conditions and the following disclaimer in the
25 //    documentation and/or other materials provided with the distribution.
26 // 3. The name of the author may not be used to endorse or promote products
27 //    derived from this software without specific prior written permission.
28 //
29 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 
40 #ifdef	ANTLR3_WINDOWS
41 #pragma warning( disable : 4100 )
42 #endif
43 
44 /* Interface functions -standard implementations cover parser and treeparser
45  * almost completely but are overridden by the parser or tree parser as needed. Lexer overrides
46  * most of these functions.
47  */
48 static void					beginResync					(pANTLR3_BASE_RECOGNIZER recognizer);
49 static pANTLR3_BITSET		computeErrorRecoverySet	    (pANTLR3_BASE_RECOGNIZER recognizer);
50 static void					endResync					(pANTLR3_BASE_RECOGNIZER recognizer);
51 static void					beginBacktrack				(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level);
52 static void					endBacktrack				(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);
53 
54 static void *				match						(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
55 static void					matchAny					(pANTLR3_BASE_RECOGNIZER recognizer);
56 static void					mismatch					(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
57 static ANTLR3_BOOLEAN		mismatchIsUnwantedToken		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype);
58 static ANTLR3_BOOLEAN		mismatchIsMissingToken		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow);
59 static void					reportError					(pANTLR3_BASE_RECOGNIZER recognizer);
60 static pANTLR3_BITSET		computeCSRuleFollow			(pANTLR3_BASE_RECOGNIZER recognizer);
61 static pANTLR3_BITSET		combineFollows				(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact);
62 static void					displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames);
63 static void					recover						(pANTLR3_BASE_RECOGNIZER recognizer);
64 static void	*				recoverFromMismatchedToken  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
65 static void	*				recoverFromMismatchedSet    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
66 static ANTLR3_BOOLEAN		recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
67 static void					consumeUntil				(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType);
68 static void					consumeUntilSet				(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set);
69 static pANTLR3_STACK		getRuleInvocationStack	    (pANTLR3_BASE_RECOGNIZER recognizer);
70 static pANTLR3_STACK		getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name);
71 static pANTLR3_HASH_TABLE	toStrings					(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE);
72 static ANTLR3_MARKER		getRuleMemoization			(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart);
73 static ANTLR3_BOOLEAN		alreadyParsedRule			(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex);
74 static void					memoize						(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart);
75 static ANTLR3_BOOLEAN		synpred						(pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx));
76 static void					reset						(pANTLR3_BASE_RECOGNIZER recognizer);
77 static void					freeBR						(pANTLR3_BASE_RECOGNIZER recognizer);
78 static void *				getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
79 static void *				getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
80 															ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
81 static ANTLR3_UINT32		getNumberOfSyntaxErrors		(pANTLR3_BASE_RECOGNIZER recognizer);
82 
83 ANTLR3_API pANTLR3_BASE_RECOGNIZER
antlr3BaseRecognizerNew(ANTLR3_UINT32 type,ANTLR3_UINT32 sizeHint,pANTLR3_RECOGNIZER_SHARED_STATE state)84 antlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
85 {
86     pANTLR3_BASE_RECOGNIZER recognizer;
87 
88     // Allocate memory for the structure
89     //
90     recognizer	    = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER));
91 
92     if	(recognizer == NULL)
93     {
94 		// Allocation failed
95 		//
96 		return	NULL;
97     }
98 
99 
100 	// If we have been supplied with a pre-existing recognizer state
101 	// then we just install it, otherwise we must create one from scratch
102 	//
103 	if	(state == NULL)
104 	{
105 		recognizer->state = (pANTLR3_RECOGNIZER_SHARED_STATE) ANTLR3_CALLOC(1, (size_t)sizeof(ANTLR3_RECOGNIZER_SHARED_STATE));
106 
107 		if	(recognizer->state == NULL)
108 		{
109 			ANTLR3_FREE(recognizer);
110 			return	NULL;
111 		}
112 
113 		// Initialize any new recognizer state
114 		//
115 		recognizer->state->errorRecovery	= ANTLR3_FALSE;
116 		recognizer->state->lastErrorIndex	= -1;
117 		recognizer->state->failed		= ANTLR3_FALSE;
118 		recognizer->state->errorCount		= 0;
119 		recognizer->state->backtracking		= 0;
120 		recognizer->state->following		= NULL;
121 		recognizer->state->ruleMemo		= NULL;
122 		recognizer->state->tokenNames		= NULL;
123 		recognizer->state->sizeHint             = sizeHint;
124 		recognizer->state->tokSource		= NULL;
125                 recognizer->state->tokFactory           = NULL;
126 
127 		// Rather than check to see if we must initialize
128 		// the stack every time we are asked for an new rewrite stream
129 		// we just always create an empty stack and then just
130 		// free it when the base recognizer is freed.
131 		//
132 		recognizer->state->rStreams		= antlr3VectorNew(0);  // We don't know the size.
133 
134 		if	(recognizer->state->rStreams == NULL)
135 		{
136 			// Out of memory
137 			//
138 			ANTLR3_FREE(recognizer->state);
139 			ANTLR3_FREE(recognizer);
140 			return	NULL;
141 		}
142 	}
143 	else
144 	{
145 		// Install the one we were given, and do not reset it here
146 		// as it will either already have been initialized or will
147 		// be in a state that needs to be preserved.
148 		//
149 		recognizer->state = state;
150 	}
151 
152     // Install the BR API
153     //
154     recognizer->alreadyParsedRule           = alreadyParsedRule;
155     recognizer->beginResync                 = beginResync;
156     recognizer->combineFollows              = combineFollows;
157     recognizer->beginBacktrack              = beginBacktrack;
158     recognizer->endBacktrack                = endBacktrack;
159     recognizer->computeCSRuleFollow         = computeCSRuleFollow;
160     recognizer->computeErrorRecoverySet     = computeErrorRecoverySet;
161     recognizer->consumeUntil                = consumeUntil;
162     recognizer->consumeUntilSet             = consumeUntilSet;
163     recognizer->displayRecognitionError     = displayRecognitionError;
164     recognizer->endResync                   = endResync;
165     recognizer->exConstruct                 = antlr3MTExceptionNew;
166     recognizer->getRuleInvocationStack      = getRuleInvocationStack;
167     recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed;
168     recognizer->getRuleMemoization          = getRuleMemoization;
169     recognizer->match                       = match;
170     recognizer->matchAny                    = matchAny;
171     recognizer->memoize                     = memoize;
172     recognizer->mismatch                    = mismatch;
173     recognizer->mismatchIsUnwantedToken     = mismatchIsUnwantedToken;
174     recognizer->mismatchIsMissingToken      = mismatchIsMissingToken;
175     recognizer->recover                     = recover;
176     recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement;
177     recognizer->recoverFromMismatchedSet    = recoverFromMismatchedSet;
178     recognizer->recoverFromMismatchedToken  = recoverFromMismatchedToken;
179     recognizer->getNumberOfSyntaxErrors     = getNumberOfSyntaxErrors;
180     recognizer->reportError                 = reportError;
181     recognizer->reset                       = reset;
182     recognizer->synpred                     = synpred;
183     recognizer->toStrings                   = toStrings;
184     recognizer->getCurrentInputSymbol       = getCurrentInputSymbol;
185     recognizer->getMissingSymbol            = getMissingSymbol;
186     recognizer->debugger                    = NULL;
187 
188     recognizer->free = freeBR;
189 
190     /* Initialize variables
191      */
192     recognizer->type			= type;
193 
194 
195     return  recognizer;
196 }
197 static void
freeBR(pANTLR3_BASE_RECOGNIZER recognizer)198 freeBR	    (pANTLR3_BASE_RECOGNIZER recognizer)
199 {
200     pANTLR3_EXCEPTION thisE;
201 
202 	// Did we have a state allocated?
203 	//
204 	if	(recognizer->state != NULL)
205 	{
206 		// Free any rule memoization we set up
207 		//
208 		if	(recognizer->state->ruleMemo != NULL)
209 		{
210 			recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
211 			recognizer->state->ruleMemo = NULL;
212 		}
213 
214 		// Free any exception space we have left around
215 		//
216 		thisE = recognizer->state->exception;
217 		if	(thisE != NULL)
218 		{
219 			thisE->freeEx(thisE);
220 		}
221 
222 		// Free any rewrite streams we have allocated
223 		//
224 		if	(recognizer->state->rStreams != NULL)
225 		{
226 			recognizer->state->rStreams->free(recognizer->state->rStreams);
227 		}
228 
229 		// Free up any token factory we created (error recovery for instance)
230 		//
231 		if	(recognizer->state->tokFactory != NULL)
232 		{
233 			recognizer->state->tokFactory->close(recognizer->state->tokFactory);
234 		}
235 		// Free the shared state memory
236 		//
237 		ANTLR3_FREE(recognizer->state);
238 	}
239 
240 	// Free the actual recognizer space
241 	//
242     ANTLR3_FREE(recognizer);
243 }
244 
245 /**
246  * Creates a new Mismatched Token Exception and inserts in the recognizer
247  * exception stack.
248  *
249  * \param recognizer
250  * Context pointer for this recognizer
251  *
252  */
253 ANTLR3_API	void
antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)254 antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
255 {
256     /* Create a basic recognition exception structure
257      */
258     antlr3RecognitionExceptionNew(recognizer);
259 
260     /* Now update it to indicate this is a Mismatched token exception
261      */
262     recognizer->state->exception->name		= ANTLR3_MISMATCHED_EX_NAME;
263     recognizer->state->exception->type		= ANTLR3_MISMATCHED_TOKEN_EXCEPTION;
264 
265     return;
266 }
267 
268 ANTLR3_API	void
antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)269 antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
270 {
271 	pANTLR3_EXCEPTION				ex;
272 	pANTLR3_LEXER					lexer;
273 	pANTLR3_PARSER					parser;
274 	pANTLR3_TREE_PARSER				tparser;
275 
276 	pANTLR3_INPUT_STREAM			ins;
277 	pANTLR3_INT_STREAM				is;
278 	pANTLR3_COMMON_TOKEN_STREAM	    cts;
279 	pANTLR3_TREE_NODE_STREAM	    tns;
280 
281 	ins	    = NULL;
282 	cts	    = NULL;
283 	tns	    = NULL;
284 	is	    = NULL;
285 	lexer   = NULL;
286 	parser  = NULL;
287 	tparser = NULL;
288 
289 	switch	(recognizer->type)
290 	{
291 	case	ANTLR3_TYPE_LEXER:
292 
293 		lexer	= (pANTLR3_LEXER) (recognizer->super);
294 		ins	= lexer->input;
295 		is	= ins->istream;
296 
297 		break;
298 
299 	case	ANTLR3_TYPE_PARSER:
300 
301 		parser  = (pANTLR3_PARSER) (recognizer->super);
302 		cts	= (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super);
303 		is	= parser->tstream->istream;
304 
305 		break;
306 
307 	case	ANTLR3_TYPE_TREE_PARSER:
308 
309 		tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
310 		tns	= tparser->ctnstream->tnstream;
311 		is	= tns->istream;
312 
313 		break;
314 
315 	default:
316 
317 		ANTLR3_FPRINTF(stderr, "Base recognizer function antlr3RecognitionExceptionNew called by unknown parser type - provide override for this function\n");
318 		return;
319 
320 		break;
321 	}
322 
323 	/* Create a basic exception structure
324 	 */
325 	ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION,
326 		(void *)ANTLR3_RECOGNITION_EX_NAME,
327 		NULL,
328 		ANTLR3_FALSE);
329 
330 	/* Rest of information depends on the base type of the
331 	 * input stream.
332 	 */
333 	switch  (is->type & ANTLR3_INPUT_MASK)
334 	{
335 	case    ANTLR3_CHARSTREAM:
336 
337 		ex->c			= is->_LA		    	(is, 1);					/* Current input character			*/
338 		ex->line		= ins->getLine			(ins);						/* Line number comes from stream		*/
339 		ex->charPositionInLine	= ins->getCharPositionInLine	(ins);	    /* Line offset also comes from the stream   */
340 		ex->index		= is->index			(is);
341 		ex->streamName		= ins->fileName;
342 		ex->message		= "Unexpected character";
343 		break;
344 
345 	case    ANTLR3_TOKENSTREAM:
346 
347 		ex->token		= cts->tstream->_LT						(cts->tstream, 1);	    /* Current input token			    */
348 		ex->line		= ((pANTLR3_COMMON_TOKEN)(ex->token))->getLine			(ex->token);
349 		ex->charPositionInLine	= ((pANTLR3_COMMON_TOKEN)(ex->token))->getCharPositionInLine	(ex->token);
350 		ex->index		= cts->tstream->istream->index					(cts->tstream->istream);
351 		if	(((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
352 		{
353 			ex->streamName		= NULL;
354 		}
355 		else
356 		{
357 			ex->streamName		= ((pANTLR3_COMMON_TOKEN)(ex->token))->input->fileName;
358 		}
359 		ex->message		= "Unexpected token";
360 		break;
361 
362 	case    ANTLR3_COMMONTREENODE:
363 
364 		ex->token		= tns->_LT						    (tns, 1);	    /* Current input tree node			    */
365 		ex->line		= ((pANTLR3_BASE_TREE)(ex->token))->getLine		    (ex->token);
366 		ex->charPositionInLine	= ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine   (ex->token);
367 		ex->index		= tns->istream->index					    (tns->istream);
368 
369 		// Are you ready for this? Deep breath now...
370 		//
371 		{
372 			pANTLR3_COMMON_TREE tnode;
373 
374 			tnode		= ((pANTLR3_COMMON_TREE)(((pANTLR3_BASE_TREE)(ex->token))->super));
375 
376 			if	(tnode->token    == NULL)
377 			{
378 				ex->streamName = ((pANTLR3_BASE_TREE)(ex->token))->strFactory->newStr(((pANTLR3_BASE_TREE)(ex->token))->strFactory, (pANTLR3_UINT8)"-unknown source-");
379 			}
380 			else
381 			{
382 				if	(tnode->token->input == NULL)
383 				{
384 					ex->streamName		= NULL;
385 				}
386 				else
387 				{
388 					ex->streamName		= tnode->token->input->fileName;
389 				}
390 			}
391 			ex->message		= "Unexpected node";
392 		}
393 		break;
394 	}
395 
396 	ex->input						= is;
397 	ex->nextException				= recognizer->state->exception;	/* So we don't leak the memory */
398 	recognizer->state->exception	= ex;
399 	recognizer->state->error	    = ANTLR3_TRUE;	    /* Exception is outstanding	*/
400 
401 	return;
402 }
403 
404 
405 /// Match current input symbol against ttype.  Upon error, do one token
406 /// insertion or deletion if possible.
407 /// To turn off single token insertion or deletion error
408 /// recovery, override mismatchRecover() and have it call
409 /// plain mismatch(), which does not recover.  Then any error
410 /// in a rule will cause an exception and immediate exit from
411 /// rule.  Rule would recover by resynchronizing to the set of
412 /// symbols that can follow rule ref.
413 ///
414 static void *
match(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 ttype,pANTLR3_BITSET_LIST follow)415 match(	pANTLR3_BASE_RECOGNIZER recognizer,
416 		ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
417 {
418     pANTLR3_PARSER			parser;
419     pANTLR3_TREE_PARSER	    tparser;
420     pANTLR3_INT_STREAM	    is;
421 	void					* matchedSymbol;
422 
423     switch	(recognizer->type)
424     {
425 		case	ANTLR3_TYPE_PARSER:
426 
427 			parser  = (pANTLR3_PARSER) (recognizer->super);
428 			tparser	= NULL;
429 			is	= parser->tstream->istream;
430 
431 			break;
432 
433 		case	ANTLR3_TYPE_TREE_PARSER:
434 
435 			tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
436 			parser	= NULL;
437 			is	= tparser->ctnstream->tnstream->istream;
438 
439 			break;
440 
441 		default:
442 
443 			ANTLR3_FPRINTF(stderr, "Base recognizer function 'match' called by unknown parser type - provide override for this function\n");
444 			return ANTLR3_FALSE;
445 
446 			break;
447     }
448 
449 	// Pick up the current input token/node for assignment to labels
450 	//
451 	matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
452 
453     if	(is->_LA(is, 1) == ttype)
454     {
455 		// The token was the one we were told to expect
456 		//
457 		is->consume(is);									// Consume that token from the stream
458 		recognizer->state->errorRecovery	= ANTLR3_FALSE;	// Not in error recovery now (if we were)
459 		recognizer->state->failed			= ANTLR3_FALSE;	// The match was a success
460 		return matchedSymbol;								// We are done
461     }
462 
463     // We did not find the expected token type, if we are backtracking then
464     // we just set the failed flag and return.
465     //
466     if	(recognizer->state->backtracking > 0)
467     {
468 		// Backtracking is going on
469 		//
470 		recognizer->state->failed  = ANTLR3_TRUE;
471 		return matchedSymbol;
472 	}
473 
474     // We did not find the expected token and there is no backtracking
475     // going on, so we mismatch, which creates an exception in the recognizer exception
476     // stack.
477     //
478 	matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, follow);
479     return matchedSymbol;
480 }
481 
482 /// Consumes the next token, whatever it is, and resets the recognizer state
483 /// so that it is not in error.
484 ///
485 /// \param recognizer
486 /// Recognizer context pointer
487 ///
488 static void
matchAny(pANTLR3_BASE_RECOGNIZER recognizer)489 matchAny(pANTLR3_BASE_RECOGNIZER recognizer)
490 {
491     pANTLR3_PARSER	    parser;
492     pANTLR3_TREE_PARSER	    tparser;
493     pANTLR3_INT_STREAM	    is;
494 
495     switch	(recognizer->type)
496     {
497 		case	ANTLR3_TYPE_PARSER:
498 
499 			parser  = (pANTLR3_PARSER) (recognizer->super);
500 			tparser	= NULL;
501 			is	= parser->tstream->istream;
502 
503 			break;
504 
505 		case	ANTLR3_TYPE_TREE_PARSER:
506 
507 			tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
508 			parser	= NULL;
509 			is	= tparser->ctnstream->tnstream->istream;
510 
511 			break;
512 
513 		default:
514 
515 			ANTLR3_FPRINTF(stderr, "Base recognizer function 'matchAny' called by unknown parser type - provide override for this function\n");
516 			return;
517 
518 		break;
519     }
520     recognizer->state->errorRecovery	= ANTLR3_FALSE;
521     recognizer->state->failed		    = ANTLR3_FALSE;
522     is->consume(is);
523 
524     return;
525 }
526 ///
527 ///
528 static ANTLR3_BOOLEAN
mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM is,ANTLR3_UINT32 ttype)529 mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype)
530 {
531 	ANTLR3_UINT32 nextt;
532 
533 	nextt = is->_LA(is, 2);
534 
535 	if	(nextt == ttype)
536 	{
537 		if	(recognizer->state->exception != NULL)
538 		{
539 			recognizer->state->exception->expecting = nextt;
540 		}
541 		return ANTLR3_TRUE;		// This token is unknown, but the next one is the one we wanted
542 	}
543 	else
544 	{
545 		return ANTLR3_FALSE;	// Neither this token, nor the one following is the one we wanted
546 	}
547 }
548 
549 ///
550 ///
551 static ANTLR3_BOOLEAN
mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM is,pANTLR3_BITSET_LIST follow)552 mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow)
553 {
554 	ANTLR3_BOOLEAN	retcode;
555 	pANTLR3_BITSET	followClone;
556 	pANTLR3_BITSET	viableTokensFollowingThisRule;
557 
558 	if	(follow == NULL)
559 	{
560 		// There is no information about the tokens that can follow the last one
561 		// hence we must say that the current one we found is not a member of the
562 		// follow set and does not indicate a missing token. We will just consume this
563 		// single token and see if the parser works it out from there.
564 		//
565 		return	ANTLR3_FALSE;
566 	}
567 
568 	followClone						= NULL;
569 	viableTokensFollowingThisRule	= NULL;
570 
571 	// The C bitset maps are laid down at compile time by the
572 	// C code generation. Hence we cannot remove things from them
573 	// and so on. So, in order to remove EOR (if we need to) then
574 	// we clone the static bitset.
575 	//
576 	followClone = antlr3BitsetLoad(follow);
577 	if	(followClone == NULL)
578 	{
579 		return ANTLR3_FALSE;
580 	}
581 
582 	// Compute what can follow this grammar reference
583 	//
584 	if	(followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE))
585 	{
586 		// EOR can follow, but if we are not the start symbol, we
587 		// need to remove it.
588 		//
589 		if	(recognizer->state->following->vector->count >= 0)
590 		{
591 			followClone->remove(followClone, ANTLR3_EOR_TOKEN_TYPE);
592 		}
593 
594 		// Now compute the visiable tokens that can follow this rule, according to context
595 		// and make them part of the follow set.
596 		//
597 		viableTokensFollowingThisRule = recognizer->computeCSRuleFollow(recognizer);
598 		followClone->borInPlace(followClone, viableTokensFollowingThisRule);
599 	}
600 
601 	/// if current token is consistent with what could come after set
602 	/// then we know we're missing a token; error recovery is free to
603 	/// "insert" the missing token
604 	///
605 	/// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
606 	/// in follow set to indicate that the fall of the start symbol is
607 	/// in the set (EOF can follow).
608 	///
609 	if	(		followClone->isMember(followClone, is->_LA(is, 1))
610 			||	followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE)
611 		)
612 	{
613 		retcode = ANTLR3_TRUE;
614 	}
615 	else
616 	{
617 		retcode	= ANTLR3_FALSE;
618 	}
619 
620 	if	(viableTokensFollowingThisRule != NULL)
621 	{
622 		viableTokensFollowingThisRule->free(viableTokensFollowingThisRule);
623 	}
624 	if	(followClone != NULL)
625 	{
626 		followClone->free(followClone);
627 	}
628 
629 	return retcode;
630 
631 }
632 
633 /// Factor out what to do upon token mismatch so tree parsers can behave
634 /// differently.  Override and call mismatchRecover(input, ttype, follow)
635 /// to get single token insertion and deletion.  Use this to turn off
636 /// single token insertion and deletion. Override mismatchRecover
637 /// to call this instead.
638 ///
639 /// \remark mismatch only works for parsers and must be overridden for anything else.
640 ///
641 static	void
mismatch(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 ttype,pANTLR3_BITSET_LIST follow)642 mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
643 {
644     pANTLR3_PARSER	    parser;
645     pANTLR3_TREE_PARSER	    tparser;
646     pANTLR3_INT_STREAM	    is;
647 
648     // Install a mismatched token exception in the exception stack
649     //
650     antlr3MTExceptionNew(recognizer);
651     recognizer->state->exception->expecting    = ttype;
652 
653     switch	(recognizer->type)
654     {
655 		case	ANTLR3_TYPE_PARSER:
656 
657 			parser  = (pANTLR3_PARSER) (recognizer->super);
658 			tparser	= NULL;
659 			is	= parser->tstream->istream;
660 
661 			break;
662 
663 		default:
664 
665 			ANTLR3_FPRINTF(stderr, "Base recognizer function 'mismatch' called by unknown parser type - provide override for this function\n");
666 			return;
667 
668 			break;
669     }
670 
671 	if	(mismatchIsUnwantedToken(recognizer, is, ttype))
672 	{
673 		// Create a basic recognition exception structure
674 		//
675 	    antlr3RecognitionExceptionNew(recognizer);
676 
677 		// Now update it to indicate this is an unwanted token exception
678 		//
679 		recognizer->state->exception->name		= ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
680 		recognizer->state->exception->type		= ANTLR3_UNWANTED_TOKEN_EXCEPTION;
681 
682 		return;
683 	}
684 
685 	if	(mismatchIsMissingToken(recognizer, is, follow))
686 	{
687 		// Create a basic recognition exception structure
688 		//
689 	    antlr3RecognitionExceptionNew(recognizer);
690 
691 		// Now update it to indicate this is an unwanted token exception
692 		//
693 		recognizer->state->exception->name		= ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
694 		recognizer->state->exception->type		= ANTLR3_MISSING_TOKEN_EXCEPTION;
695 
696 		return;
697 	}
698 
699 	// Just a mismatched token is all we can dtermine
700 	//
701 	antlr3MTExceptionNew(recognizer);
702 
703 	return;
704 }
705 /// Report a recognition problem.
706 ///
707 /// This method sets errorRecovery to indicate the parser is recovering
708 /// not parsing.  Once in recovery mode, no errors are generated.
709 /// To get out of recovery mode, the parser must successfully match
710 /// a token (after a resync).  So it will go:
711 ///
712 ///		1. error occurs
713 ///		2. enter recovery mode, report error
714 ///		3. consume until token found in resynch set
715 ///		4. try to resume parsing
716 ///		5. next match() will reset errorRecovery mode
717 ///
718 /// If you override, make sure to update errorCount if you care about that.
719 ///
720 static void
reportError(pANTLR3_BASE_RECOGNIZER recognizer)721 reportError		    (pANTLR3_BASE_RECOGNIZER recognizer)
722 {
723     	// Invoke the debugger event if there is a debugger listening to us
724 	//
725 	if	(recognizer->debugger != NULL)
726 	{
727 		recognizer->debugger->recognitionException(recognizer->debugger, recognizer->state->exception);
728 	}
729 
730     if	(recognizer->state->errorRecovery == ANTLR3_TRUE)
731     {
732 		// Already in error recovery so don't display another error while doing so
733 		//
734 		return;
735     }
736 
737     // Signal we are in error recovery now
738     //
739     recognizer->state->errorRecovery = ANTLR3_TRUE;
740 
741 	// Indicate this recognizer had an error while processing.
742 	//
743 	recognizer->state->errorCount++;
744 
745 	// Call the error display routine
746 	//
747     recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
748 }
749 
750 static void
beginBacktrack(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 level)751 beginBacktrack		(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level)
752 {
753 	if	(recognizer->debugger != NULL)
754 	{
755 		recognizer->debugger->beginBacktrack(recognizer->debugger, level);
756 	}
757 }
758 
759 static void
endBacktrack(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 level,ANTLR3_BOOLEAN successful)760 endBacktrack		(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful)
761 {
762 	if	(recognizer->debugger != NULL)
763 	{
764 		recognizer->debugger->endBacktrack(recognizer->debugger, level, successful);
765 	}
766 }
767 static void
beginResync(pANTLR3_BASE_RECOGNIZER recognizer)768 beginResync		    (pANTLR3_BASE_RECOGNIZER recognizer)
769 {
770 	if	(recognizer->debugger != NULL)
771 	{
772 		recognizer->debugger->beginResync(recognizer->debugger);
773 	}
774 }
775 
776 static void
endResync(pANTLR3_BASE_RECOGNIZER recognizer)777 endResync		    (pANTLR3_BASE_RECOGNIZER recognizer)
778 {
779 	if	(recognizer->debugger != NULL)
780 	{
781 		recognizer->debugger->endResync(recognizer->debugger);
782 	}
783 }
784 
785 /// Compute the error recovery set for the current rule.
786 /// Documentation below is from the Java implementation.
787 ///
788 /// During rule invocation, the parser pushes the set of tokens that can
789 /// follow that rule reference on the stack; this amounts to
790 /// computing FIRST of what follows the rule reference in the
791 /// enclosing rule. This local follow set only includes tokens
792 /// from within the rule; i.e., the FIRST computation done by
793 /// ANTLR stops at the end of a rule.
794 //
795 /// EXAMPLE
796 //
797 /// When you find a "no viable alt exception", the input is not
798 /// consistent with any of the alternatives for rule r.  The best
799 /// thing to do is to consume tokens until you see something that
800 /// can legally follow a call to r *or* any rule that called r.
801 /// You don't want the exact set of viable next tokens because the
802 /// input might just be missing a token--you might consume the
803 /// rest of the input looking for one of the missing tokens.
804 ///
805 /// Consider grammar:
806 ///
807 /// a : '[' b ']'
808 ///   | '(' b ')'
809 ///   ;
810 /// b : c '^' INT ;
811 /// c : ID
812 ///   | INT
813 ///   ;
814 ///
815 /// At each rule invocation, the set of tokens that could follow
816 /// that rule is pushed on a stack.  Here are the various "local"
817 /// follow sets:
818 ///
819 /// FOLLOW(b1_in_a) = FIRST(']') = ']'
820 /// FOLLOW(b2_in_a) = FIRST(')') = ')'
821 /// FOLLOW(c_in_b) = FIRST('^') = '^'
822 ///
823 /// Upon erroneous input "[]", the call chain is
824 ///
825 /// a -> b -> c
826 ///
827 /// and, hence, the follow context stack is:
828 ///
829 /// depth  local follow set     after call to rule
830 ///   0         <EOF>                    a (from main())
831 ///   1          ']'                     b
832 ///   3          '^'                     c
833 ///
834 /// Notice that ')' is not included, because b would have to have
835 /// been called from a different context in rule a for ')' to be
836 /// included.
837 ///
838 /// For error recovery, we cannot consider FOLLOW(c)
839 /// (context-sensitive or otherwise).  We need the combined set of
840 /// all context-sensitive FOLLOW sets--the set of all tokens that
841 /// could follow any reference in the call chain.  We need to
842 /// resync to one of those tokens.  Note that FOLLOW(c)='^' and if
843 /// we resync'd to that token, we'd consume until EOF.  We need to
844 /// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
845 /// In this case, for input "[]", LA(1) is in this set so we would
846 /// not consume anything and after printing an error rule c would
847 /// return normally.  It would not find the required '^' though.
848 /// At this point, it gets a mismatched token error and throws an
849 /// exception (since LA(1) is not in the viable following token
850 /// set).  The rule exception handler tries to recover, but finds
851 /// the same recovery set and doesn't consume anything.  Rule b
852 /// exits normally returning to rule a.  Now it finds the ']' (and
853 /// with the successful match exits errorRecovery mode).
854 ///
855 /// So, you can see that the parser walks up call chain looking
856 /// for the token that was a member of the recovery set.
857 ///
858 /// Errors are not generated in errorRecovery mode.
859 ///
860 /// ANTLR's error recovery mechanism is based upon original ideas:
861 ///
862 /// "Algorithms + Data Structures = Programs" by Niklaus Wirth
863 ///
864 /// and
865 ///
866 /// "A note on error recovery in recursive descent parsers":
867 /// http://portal.acm.org/citation.cfm?id=947902.947905
868 ///
869 /// Later, Josef Grosch had some good ideas:
870 ///
871 /// "Efficient and Comfortable Error Recovery in Recursive Descent
872 /// Parsers":
873 /// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
874 ///
875 /// Like Grosch I implemented local FOLLOW sets that are combined
876 /// at run-time upon error to avoid overhead during parsing.
877 ///
878 static pANTLR3_BITSET
computeErrorRecoverySet(pANTLR3_BASE_RECOGNIZER recognizer)879 computeErrorRecoverySet	    (pANTLR3_BASE_RECOGNIZER recognizer)
880 {
881     return   recognizer->combineFollows(recognizer, ANTLR3_FALSE);
882 }
883 
884 /// Compute the context-sensitive FOLLOW set for current rule.
885 /// Documentation below is from the Java runtime.
886 ///
887 /// This is the set of token types that can follow a specific rule
888 /// reference given a specific call chain.  You get the set of
889 /// viable tokens that can possibly come next (look ahead depth 1)
890 /// given the current call chain.  Contrast this with the
891 /// definition of plain FOLLOW for rule r:
892 ///
893 ///  FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
894 ///
895 /// where x in T* and alpha, beta in V*; T is set of terminals and
896 /// V is the set of terminals and non terminals.  In other words,
897 /// FOLLOW(r) is the set of all tokens that can possibly follow
898 /// references to r in///any* sentential form (context).  At
899 /// runtime, however, we know precisely which context applies as
900 /// we have the call chain.  We may compute the exact (rather
901 /// than covering superset) set of following tokens.
902 ///
903 /// For example, consider grammar:
904 ///
905 /// stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
906 ///      | "return" expr '.'
907 ///      ;
908 /// expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
909 /// atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
910 ///      | '(' expr ')'
911 ///      ;
912 ///
913 /// The FOLLOW sets are all inclusive whereas context-sensitive
914 /// FOLLOW sets are precisely what could follow a rule reference.
915 /// For input input "i=(3);", here is the derivation:
916 ///
917 /// stat => ID '=' expr ';'
918 ///      => ID '=' atom ('+' atom)* ';'
919 ///      => ID '=' '(' expr ')' ('+' atom)* ';'
920 ///      => ID '=' '(' atom ')' ('+' atom)* ';'
921 ///      => ID '=' '(' INT ')' ('+' atom)* ';'
922 ///      => ID '=' '(' INT ')' ';'
923 ///
924 /// At the "3" token, you'd have a call chain of
925 ///
926 ///   stat -> expr -> atom -> expr -> atom
927 ///
928 /// What can follow that specific nested ref to atom?  Exactly ')'
929 /// as you can see by looking at the derivation of this specific
930 /// input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
931 ///
932 /// You want the exact viable token set when recovering from a
933 /// token mismatch.  Upon token mismatch, if LA(1) is member of
934 /// the viable next token set, then you know there is most likely
935 /// a missing token in the input stream.  "Insert" one by just not
936 /// throwing an exception.
937 ///
938 static pANTLR3_BITSET
computeCSRuleFollow(pANTLR3_BASE_RECOGNIZER recognizer)939 computeCSRuleFollow	    (pANTLR3_BASE_RECOGNIZER recognizer)
940 {
941     return   recognizer->combineFollows(recognizer, ANTLR3_FALSE);
942 }
943 
944 /// Compute the current followset for the input stream.
945 ///
946 static pANTLR3_BITSET
combineFollows(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_BOOLEAN exact)947 combineFollows		    (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact)
948 {
949     pANTLR3_BITSET	followSet;
950     pANTLR3_BITSET	localFollowSet;
951     ANTLR3_UINT32	top;
952     ANTLR3_UINT32	i;
953 
954     top	= recognizer->state->following->size(recognizer->state->following);
955 
956     followSet	    = antlr3BitsetNew(0);
957 	localFollowSet	= NULL;
958 
959     for (i = top; i>0; i--)
960     {
961 		localFollowSet = antlr3BitsetLoad((pANTLR3_BITSET_LIST) recognizer->state->following->get(recognizer->state->following, i-1));
962 
963 		if  (localFollowSet != NULL)
964 		{
965 			followSet->borInPlace(followSet, localFollowSet);
966 
967 			if	(exact == ANTLR3_TRUE)
968 			{
969 				if	(localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE)
970 				{
971 					// Only leave EOR in the set if at top (start rule); this lets us know
972 					// if we have to include the follow(start rule); I.E., EOF
973 					//
974 					if	(i>1)
975 					{
976 						followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE);
977 					}
978 				}
979 				else
980 				{
981 					break;	// Cannot see End Of Rule from here, just drop out
982 				}
983 			}
984 			localFollowSet->free(localFollowSet);
985 			localFollowSet = NULL;
986 		}
987     }
988 
989 	if	(localFollowSet != NULL)
990 	{
991 		localFollowSet->free(localFollowSet);
992 	}
993     return  followSet;
994 }
995 
996 /// Standard/Example error display method.
997 /// No generic error message display funciton coudl possibly do everything correctly
998 /// for all possible parsers. Hence you are provided with this example routine, which
999 /// you should override in your parser/tree parser to do as you will.
1000 ///
1001 /// Here we depart somewhat from the Java runtime as that has now split up a lot
1002 /// of the error display routines into spearate units. However, ther is little advantage
1003 /// to this in the C version as you will probably implement all such routines as a
1004 /// separate translation unit, rather than install them all as pointers to functions
1005 /// in the base recognizer.
1006 ///
1007 static void
displayRecognitionError(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_UINT8 * tokenNames)1008 displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
1009 {
1010 	pANTLR3_PARSER			parser;
1011 	pANTLR3_TREE_PARSER	    tparser;
1012 	pANTLR3_INT_STREAM	    is;
1013 	pANTLR3_STRING			ttext;
1014 	pANTLR3_STRING			ftext;
1015 	pANTLR3_EXCEPTION	    ex;
1016 	pANTLR3_COMMON_TOKEN    theToken;
1017 	pANTLR3_BASE_TREE	    theBaseTree;
1018 	pANTLR3_COMMON_TREE	    theCommonTree;
1019 
1020 	// Retrieve some info for easy reading.
1021 	//
1022 	ex	    =		recognizer->state->exception;
1023 	ttext   =		NULL;
1024 
1025 	// See if there is a 'filename' we can use
1026 	//
1027 	if	(ex->streamName == NULL)
1028 	{
1029 		if	(((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
1030 		{
1031 			ANTLR3_FPRINTF(stderr, "-end of input-(");
1032 		}
1033 		else
1034 		{
1035 			ANTLR3_FPRINTF(stderr, "-unknown source-(");
1036 		}
1037 	}
1038 	else
1039 	{
1040 		ftext = ex->streamName->to8(ex->streamName);
1041 		ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
1042 	}
1043 
1044 	// Next comes the line number
1045 	//
1046 
1047 	ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
1048 	ANTLR3_FPRINTF(stderr, " : error %d : %s",
1049 										recognizer->state->exception->type,
1050 					(pANTLR3_UINT8)	   (recognizer->state->exception->message));
1051 
1052 
1053 	// How we determine the next piece is dependent on which thing raised the
1054 	// error.
1055 	//
1056 	switch	(recognizer->type)
1057 	{
1058 	case	ANTLR3_TYPE_PARSER:
1059 
1060 		// Prepare the knowledge we know we have
1061 		//
1062 		parser	    = (pANTLR3_PARSER) (recognizer->super);
1063 		tparser	    = NULL;
1064 		is			= parser->tstream->istream;
1065 		theToken    = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token);
1066 		ttext	    = theToken->toString(theToken);
1067 
1068 		ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine);
1069 		if  (theToken != NULL)
1070 		{
1071 			if (theToken->type == ANTLR3_TOKEN_EOF)
1072 			{
1073 				ANTLR3_FPRINTF(stderr, ", at <EOF>");
1074 			}
1075 			else
1076 			{
1077 				// Guard against null text in a token
1078 				//
1079 				ANTLR3_FPRINTF(stderr, "\n    near %s\n    ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars);
1080 			}
1081 		}
1082 		break;
1083 
1084 	case	ANTLR3_TYPE_TREE_PARSER:
1085 
1086 		tparser		= (pANTLR3_TREE_PARSER) (recognizer->super);
1087 		parser		= NULL;
1088 		is			= tparser->ctnstream->tnstream->istream;
1089 		theBaseTree	= (pANTLR3_BASE_TREE)(recognizer->state->exception->token);
1090 		ttext		= theBaseTree->toStringTree(theBaseTree);
1091 
1092 		if  (theBaseTree != NULL)
1093 		{
1094 			theCommonTree	= (pANTLR3_COMMON_TREE)	    theBaseTree->super;
1095 
1096 			if	(theCommonTree != NULL)
1097 			{
1098 				theToken	= (pANTLR3_COMMON_TOKEN)    theBaseTree->getToken(theBaseTree);
1099 			}
1100 			ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree));
1101 			ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars);
1102 		}
1103 		break;
1104 
1105 	default:
1106 
1107 		ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n");
1108 		return;
1109 		break;
1110 	}
1111 
1112 	// Although this function should generally be provided by the implementation, this one
1113 	// should be as helpful as possible for grammar developers and serve as an example
1114 	// of what you can do with each exception type. In general, when you make up your
1115 	// 'real' handler, you should debug the routine with all possible errors you expect
1116 	// which will then let you be as specific as possible about all circumstances.
1117 	//
1118 	// Note that in the general case, errors thrown by tree parsers indicate a problem
1119 	// with the output of the parser or with the tree grammar itself. The job of the parser
1120 	// is to produce a perfect (in traversal terms) syntactically correct tree, so errors
1121 	// at that stage should really be semantic errors that your own code determines and handles
1122 	// in whatever way is appropriate.
1123 	//
1124 	switch  (ex->type)
1125 	{
1126 	case	ANTLR3_UNWANTED_TOKEN_EXCEPTION:
1127 
1128 		// Indicates that the recognizer was fed a token which seesm to be
1129 		// spurious input. We can detect this when the token that follows
1130 		// this unwanted token would normally be part of the syntactically
1131 		// correct stream. Then we can see that the token we are looking at
1132 		// is just something that should not be there and throw this exception.
1133 		//
1134 		if	(tokenNames == NULL)
1135 		{
1136 			ANTLR3_FPRINTF(stderr, " : Extraneous input...");
1137 		}
1138 		else
1139 		{
1140 			if	(ex->expecting == ANTLR3_TOKEN_EOF)
1141 			{
1142 				ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n");
1143 			}
1144 			else
1145 			{
1146 				ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]);
1147 			}
1148 		}
1149 		break;
1150 
1151 	case	ANTLR3_MISSING_TOKEN_EXCEPTION:
1152 
1153 		// Indicates that the recognizer detected that the token we just
1154 		// hit would be valid syntactically if preceeded by a particular
1155 		// token. Perhaps a missing ';' at line end or a missing ',' in an
1156 		// expression list, and such like.
1157 		//
1158 		if	(tokenNames == NULL)
1159 		{
1160 			ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting);
1161 		}
1162 		else
1163 		{
1164 			if	(ex->expecting == ANTLR3_TOKEN_EOF)
1165 			{
1166 				ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n");
1167 			}
1168 			else
1169 			{
1170 				ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]);
1171 			}
1172 		}
1173 		break;
1174 
1175 	case	ANTLR3_RECOGNITION_EXCEPTION:
1176 
1177 		// Indicates that the recognizer received a token
1178 		// in the input that was not predicted. This is the basic exception type
1179 		// from which all others are derived. So we assume it was a syntax error.
1180 		// You may get this if there are not more tokens and more are needed
1181 		// to complete a parse for instance.
1182 		//
1183 		ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1184 		break;
1185 
1186 	case    ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
1187 
1188 		// We were expecting to see one thing and got another. This is the
1189 		// most common error if we coudl not detect a missing or unwanted token.
1190 		// Here you can spend your efforts to
1191 		// derive more useful error messages based on the expected
1192 		// token set and the last token and so on. The error following
1193 		// bitmaps do a good job of reducing the set that we were looking
1194 		// for down to something small. Knowing what you are parsing may be
1195 		// able to allow you to be even more specific about an error.
1196 		//
1197 		if	(tokenNames == NULL)
1198 		{
1199 			ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1200 		}
1201 		else
1202 		{
1203 			if	(ex->expecting == ANTLR3_TOKEN_EOF)
1204 			{
1205 				ANTLR3_FPRINTF(stderr, " : expected <EOF>\n");
1206 			}
1207 			else
1208 			{
1209 				ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]);
1210 			}
1211 		}
1212 		break;
1213 
1214 	case	ANTLR3_NO_VIABLE_ALT_EXCEPTION:
1215 
1216 		// We could not pick any alt decision from the input given
1217 		// so god knows what happened - however when you examine your grammar,
1218 		// you should. It means that at the point where the current token occurred
1219 		// that the DFA indicates nowhere to go from here.
1220 		//
1221 		ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n");
1222 
1223 		break;
1224 
1225 	case	ANTLR3_MISMATCHED_SET_EXCEPTION:
1226 
1227 		{
1228 			ANTLR3_UINT32	  count;
1229 			ANTLR3_UINT32	  bit;
1230 			ANTLR3_UINT32	  size;
1231 			ANTLR3_UINT32	  numbits;
1232 			pANTLR3_BITSET	  errBits;
1233 
1234 			// This means we were able to deal with one of a set of
1235 			// possible tokens at this point, but we did not see any
1236 			// member of that set.
1237 			//
1238 			ANTLR3_FPRINTF(stderr, " : unexpected input...\n  expected one of : ");
1239 
1240 			// What tokens could we have accepted at this point in the
1241 			// parse?
1242 			//
1243 			count   = 0;
1244 			errBits = antlr3BitsetLoad		(ex->expectingSet);
1245 			numbits = errBits->numBits		(errBits);
1246 			size    = errBits->size			(errBits);
1247 
1248 			if  (size > 0)
1249 			{
1250 				// However many tokens we could have dealt with here, it is usually
1251 				// not useful to print ALL of the set here. I arbitrarily chose 8
1252 				// here, but you should do whatever makes sense for you of course.
1253 				// No token number 0, so look for bit 1 and on.
1254 				//
1255 				for	(bit = 1; bit < numbits && count < 8 && count < size; bit++)
1256 				{
1257 					// TODO: This doesn;t look right - should be asking if the bit is set!!
1258 					//
1259 					if  (tokenNames[bit])
1260 					{
1261 						ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]);
1262 						count++;
1263 					}
1264 				}
1265 				ANTLR3_FPRINTF(stderr, "\n");
1266 			}
1267 			else
1268 			{
1269 				ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n");
1270 				ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n");
1271 			}
1272 		}
1273 		break;
1274 
1275 	case	ANTLR3_EARLY_EXIT_EXCEPTION:
1276 
1277 		// We entered a loop requiring a number of token sequences
1278 		// but found a token that ended that sequence earlier than
1279 		// we should have done.
1280 		//
1281 		ANTLR3_FPRINTF(stderr, " : missing elements...\n");
1282 		break;
1283 
1284 	default:
1285 
1286 		// We don't handle any other exceptions here, but you can
1287 		// if you wish. If we get an exception that hits this point
1288 		// then we are just going to report what we know about the
1289 		// token.
1290 		//
1291 		ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n");
1292 		break;
1293 	}
1294 
1295 	// Here you have the token that was in error which if this is
1296 	// the standard implementation will tell you the line and offset
1297 	// and also record the address of the start of the line in the
1298 	// input stream. You could therefore print the source line and so on.
1299 	// Generally though, I would expect that your lexer/parser will keep
1300 	// its own map of lines and source pointers or whatever as there
1301 	// are a lot of specific things you need to know about the input
1302 	// to do something like that.
1303 	// Here is where you do it though :-).
1304 	//
1305 }
1306 
1307 /// Return how many syntax errors were detected by this recognizer
1308 ///
1309 static ANTLR3_UINT32
getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)1310 getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)
1311 {
1312 	return	recognizer->state->errorCount;
1313 }
1314 
1315 /// Recover from an error found on the input stream.  Mostly this is
1316 /// NoViableAlt exceptions, but could be a mismatched token that
1317 /// the match() routine could not recover from.
1318 ///
1319 static void
recover(pANTLR3_BASE_RECOGNIZER recognizer)1320 recover			    (pANTLR3_BASE_RECOGNIZER recognizer)
1321 {
1322     // Used to compute the follow set of tokens
1323     //
1324     pANTLR3_BITSET			followSet;
1325     pANTLR3_PARSER			parser;
1326     pANTLR3_TREE_PARSER	    tparser;
1327     pANTLR3_INT_STREAM	    is;
1328 
1329     switch	(recognizer->type)
1330     {
1331 		case	ANTLR3_TYPE_PARSER:
1332 
1333 		parser  = (pANTLR3_PARSER) (recognizer->super);
1334 		tparser	= NULL;
1335 		is		= parser->tstream->istream;
1336 
1337 	break;
1338 
1339     case	ANTLR3_TYPE_TREE_PARSER:
1340 
1341 		tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1342 		parser	= NULL;
1343 		is		= tparser->ctnstream->tnstream->istream;
1344 
1345 	break;
1346 
1347     default:
1348 
1349 		ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1350 		return;
1351 
1352 	break;
1353     }
1354 
1355 	// Are we about to repeat the same error?
1356 	//
1357     if	(recognizer->state->lastErrorIndex == is->index(is))
1358     {
1359 		// The last error was at the same token index point. This must be a case
1360 		// where LT(1) is in the recovery token set so nothing is
1361 		// consumed. Consume a single token so at least to prevent
1362 		// an infinite loop; this is a failsafe.
1363 		//
1364 		is->consume(is);
1365     }
1366 
1367     // Record error index position
1368     //
1369     recognizer->state->lastErrorIndex	 = is->index(is);
1370 
1371     // Work out the follows set for error recovery
1372     //
1373     followSet	= recognizer->computeErrorRecoverySet(recognizer);
1374 
1375     // Call resync hook (for debuggers and so on)
1376     //
1377     recognizer->beginResync(recognizer);
1378 
1379     // Consume tokens until we have resynced to something in the follows set
1380     //
1381     recognizer->consumeUntilSet(recognizer, followSet);
1382 
1383     // End resync hook
1384     //
1385     recognizer->endResync(recognizer);
1386 
1387     // Destroy the temporary bitset we produced.
1388     //
1389     followSet->free(followSet);
1390 
1391     // Reset the inError flag so we don't re-report the exception
1392     //
1393     recognizer->state->error	= ANTLR3_FALSE;
1394     recognizer->state->failed	= ANTLR3_FALSE;
1395 }
1396 
1397 
1398 /// Attempt to recover from a single missing or extra token.
1399 ///
1400 /// EXTRA TOKEN
1401 ///
1402 /// LA(1) is not what we are looking for.  If LA(2) has the right token,
1403 /// however, then assume LA(1) is some extra spurious token.  Delete it
1404 /// and LA(2) as if we were doing a normal match(), which advances the
1405 /// input.
1406 ///
1407 /// MISSING TOKEN
1408 ///
1409 /// If current token is consistent with what could come after
1410 /// ttype then it is ok to "insert" the missing token, else throw
1411 /// exception For example, Input "i=(3;" is clearly missing the
1412 /// ')'.  When the parser returns from the nested call to expr, it
1413 /// will have call chain:
1414 ///
1415 ///    stat -> expr -> atom
1416 ///
1417 /// and it will be trying to match the ')' at this point in the
1418 /// derivation:
1419 ///
1420 ///       => ID '=' '(' INT ')' ('+' atom)* ';'
1421 ///                          ^
1422 /// match() will see that ';' doesn't match ')' and report a
1423 /// mismatched token error.  To recover, it sees that LA(1)==';'
1424 /// is in the set of tokens that can follow the ')' token
1425 /// reference in rule atom.  It can assume that you forgot the ')'.
1426 ///
1427 /// The exception that was passed in, in the java implementation is
1428 /// sorted in the recognizer exception stack in the C version. To 'throw' it we set the
1429 /// error flag and rules cascade back when this is set.
1430 ///
1431 static void *
recoverFromMismatchedToken(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 ttype,pANTLR3_BITSET_LIST follow)1432 recoverFromMismatchedToken  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
1433 {
1434 	pANTLR3_PARSER			  parser;
1435 	pANTLR3_TREE_PARSER	      tparser;
1436 	pANTLR3_INT_STREAM	      is;
1437 	void					* matchedSymbol;
1438 
1439 
1440 
1441 	switch	(recognizer->type)
1442 	{
1443 	case	ANTLR3_TYPE_PARSER:
1444 
1445 		parser  = (pANTLR3_PARSER) (recognizer->super);
1446 		tparser	= NULL;
1447 		is	= parser->tstream->istream;
1448 
1449 		break;
1450 
1451 	case	ANTLR3_TYPE_TREE_PARSER:
1452 
1453 		tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1454 		parser	= NULL;
1455 		is	= tparser->ctnstream->tnstream->istream;
1456 
1457 		break;
1458 
1459 	default:
1460 
1461 		ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedToken called by unknown parser type - provide override for this function\n");
1462 		return NULL;
1463 
1464 		break;
1465 	}
1466 
1467 	// Create an exception if we need one
1468 	//
1469 	if	(recognizer->state->exception == NULL)
1470 	{
1471 		antlr3RecognitionExceptionNew(recognizer);
1472 	}
1473 
1474 	// If the next token after the one we are looking at in the input stream
1475 	// is what we are looking for then we remove the one we have discovered
1476 	// from the stream by consuming it, then consume this next one along too as
1477 	// if nothing had happened.
1478 	//
1479 	if	( recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE)
1480 	{
1481 		recognizer->state->exception->type		= ANTLR3_UNWANTED_TOKEN_EXCEPTION;
1482 		recognizer->state->exception->message	= ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
1483 
1484 		// Call resync hook (for debuggers and so on)
1485 		//
1486 		if	(recognizer->debugger != NULL)
1487 		{
1488 			recognizer->debugger->beginResync(recognizer->debugger);
1489 		}
1490 
1491 		// "delete" the extra token
1492 		//
1493 		recognizer->beginResync(recognizer);
1494 		is->consume(is);
1495 		recognizer->endResync(recognizer);
1496 		// End resync hook
1497 		//
1498 		if	(recognizer->debugger != NULL)
1499 		{
1500 			recognizer->debugger->endResync(recognizer->debugger);
1501 		}
1502 
1503 		// Print out the error after we consume so that ANTLRWorks sees the
1504 		// token in the exception.
1505 		//
1506 		recognizer->reportError(recognizer);
1507 
1508 		// Return the token we are actually matching
1509 		//
1510 		matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
1511 
1512 		// Consume the token that the rule actually expected to get as if everything
1513 		// was hunky dory.
1514 		//
1515 		is->consume(is);
1516 
1517 		recognizer->state->error  = ANTLR3_FALSE;	// Exception is not outstanding any more
1518 
1519 		return	matchedSymbol;
1520 	}
1521 
1522 	// Single token deletion (Unwanted above) did not work
1523 	// so we see if we can insert a token instead by calculating which
1524 	// token would be missing
1525 	//
1526 	if	(mismatchIsMissingToken(recognizer, is, follow))
1527 	{
1528 		// We can fake the missing token and proceed
1529 		//
1530 		matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ttype, follow);
1531 		recognizer->state->exception->type		= ANTLR3_MISSING_TOKEN_EXCEPTION;
1532 		recognizer->state->exception->message	= ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
1533 		recognizer->state->exception->token		= matchedSymbol;
1534 		recognizer->state->exception->expecting	= ttype;
1535 
1536 		// Print out the error after we insert so that ANTLRWorks sees the
1537 		// token in the exception.
1538 		//
1539 		recognizer->reportError(recognizer);
1540 
1541 		recognizer->state->error  = ANTLR3_FALSE;	// Exception is not outstanding any more
1542 
1543 		return	matchedSymbol;
1544 	}
1545 
1546 
1547 	// Neither deleting nor inserting tokens allows recovery
1548 	// must just report the exception.
1549 	//
1550 	recognizer->state->error	    = ANTLR3_TRUE;
1551 	return NULL;
1552 }
1553 
1554 static void *
recoverFromMismatchedSet(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_BITSET_LIST follow)1555 recoverFromMismatchedSet	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow)
1556 {
1557     pANTLR3_PARSER			parser;
1558     pANTLR3_TREE_PARSER	    tparser;
1559     pANTLR3_INT_STREAM	    is;
1560 	pANTLR3_COMMON_TOKEN	matchedSymbol;
1561 
1562     switch	(recognizer->type)
1563     {
1564     case	ANTLR3_TYPE_PARSER:
1565 
1566 		parser  = (pANTLR3_PARSER) (recognizer->super);
1567 		tparser	= NULL;
1568 		is	= parser->tstream->istream;
1569 
1570 	break;
1571 
1572     case	ANTLR3_TYPE_TREE_PARSER:
1573 
1574 		tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1575 		parser	= NULL;
1576 		is	= tparser->ctnstream->tnstream->istream;
1577 
1578 	break;
1579 
1580     default:
1581 
1582 		ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedSet called by unknown parser type - provide override for this function\n");
1583 		return NULL;
1584 
1585 	break;
1586     }
1587 
1588 	if	(recognizer->mismatchIsMissingToken(recognizer, is, follow) == ANTLR3_TRUE)
1589 	{
1590 		// We can fake the missing token and proceed
1591 		//
1592 		matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ANTLR3_TOKEN_INVALID, follow);
1593 		recognizer->state->exception->type	= ANTLR3_MISSING_TOKEN_EXCEPTION;
1594 		recognizer->state->exception->token	= matchedSymbol;
1595 
1596 		// Print out the error after we insert so that ANTLRWorks sees the
1597 		// token in the exception.
1598 		//
1599 		recognizer->reportError(recognizer);
1600 
1601 		recognizer->state->error  = ANTLR3_FALSE;	// Exception is not outstanding any more
1602 
1603 		return	matchedSymbol;
1604 	}
1605 
1606     // TODO - Single token deletion like in recoverFromMismatchedToken()
1607     //
1608     recognizer->state->error	= ANTLR3_TRUE;
1609 	recognizer->state->failed	= ANTLR3_TRUE;
1610 	return NULL;
1611 }
1612 
1613 /// This code is factored out from mismatched token and mismatched set
1614 ///  recovery.  It handles "single token insertion" error recovery for
1615 /// both.  No tokens are consumed to recover from insertions.  Return
1616 /// true if recovery was possible else return false.
1617 ///
1618 static ANTLR3_BOOLEAN
recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_BITSET_LIST followBits)1619 recoverFromMismatchedElement	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST followBits)
1620 {
1621     pANTLR3_BITSET	    viableToksFollowingRule;
1622     pANTLR3_BITSET	    follow;
1623     pANTLR3_PARSER	    parser;
1624     pANTLR3_TREE_PARSER	    tparser;
1625     pANTLR3_INT_STREAM	    is;
1626 
1627     switch	(recognizer->type)
1628     {
1629     case	ANTLR3_TYPE_PARSER:
1630 
1631 		parser  = (pANTLR3_PARSER) (recognizer->super);
1632 		tparser	= NULL;
1633 		is	= parser->tstream->istream;
1634 
1635 	break;
1636 
1637     case	ANTLR3_TYPE_TREE_PARSER:
1638 
1639 		tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1640 		parser	= NULL;
1641 		is	= tparser->ctnstream->tnstream->istream;
1642 
1643 	break;
1644 
1645     default:
1646 
1647 		ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1648 		return ANTLR3_FALSE;
1649 
1650 	break;
1651     }
1652 
1653     follow	= antlr3BitsetLoad(followBits);
1654 
1655     if	(follow == NULL)
1656     {
1657 		/* The follow set is NULL, which means we don't know what can come
1658 		 * next, so we "hit and hope" by just signifying that we cannot
1659 		 * recover, which will just cause the next token to be consumed,
1660 		 * which might dig us out.
1661 		 */
1662 		return	ANTLR3_FALSE;
1663     }
1664 
1665     /* We have a bitmap for the follow set, hence we can compute
1666      * what can follow this grammar element reference.
1667      */
1668     if	(follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE)
1669     {
1670 		/* First we need to know which of the available tokens are viable
1671 		 * to follow this reference.
1672 		 */
1673 		viableToksFollowingRule	= recognizer->computeCSRuleFollow(recognizer);
1674 
1675 		/* Remove the EOR token, which we do not wish to compute with
1676 		 */
1677 		follow->remove(follow, ANTLR3_EOR_TOKEN_TYPE);
1678 		viableToksFollowingRule->free(viableToksFollowingRule);
1679 		/* We now have the computed set of what can follow the current token
1680 		 */
1681     }
1682 
1683     /* We can now see if the current token works with the set of tokens
1684      * that could follow the current grammar reference. If it looks like it
1685      * is consistent, then we can "insert" that token by not throwing
1686      * an exception and assuming that we saw it.
1687      */
1688     if	( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE)
1689     {
1690 		/* report the error, but don't cause any rules to abort and stuff
1691 		 */
1692 		recognizer->reportError(recognizer);
1693 		if	(follow != NULL)
1694 		{
1695 			follow->free(follow);
1696 		}
1697 		recognizer->state->error			= ANTLR3_FALSE;
1698 		recognizer->state->failed			= ANTLR3_FALSE;
1699 		return ANTLR3_TRUE;	/* Success in recovery	*/
1700     }
1701 
1702     if	(follow != NULL)
1703     {
1704 		follow->free(follow);
1705     }
1706 
1707     /* We could not find anything viable to do, so this is going to
1708      * cause an exception.
1709      */
1710     return  ANTLR3_FALSE;
1711 }
1712 
1713 /// Eat tokens from the input stream until we get one of JUST the right type
1714 ///
1715 static void
consumeUntil(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 tokenType)1716 consumeUntil	(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType)
1717 {
1718     ANTLR3_UINT32			ttype;
1719     pANTLR3_PARSER			parser;
1720     pANTLR3_TREE_PARSER	    tparser;
1721     pANTLR3_INT_STREAM	    is;
1722 
1723     switch	(recognizer->type)
1724     {
1725 		case	ANTLR3_TYPE_PARSER:
1726 
1727 			parser  = (pANTLR3_PARSER) (recognizer->super);
1728 			tparser	= NULL;
1729 			is	= parser->tstream->istream;
1730 
1731 			break;
1732 
1733 		case	ANTLR3_TYPE_TREE_PARSER:
1734 
1735 			tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1736 			parser	= NULL;
1737 			is	= tparser->ctnstream->tnstream->istream;
1738 
1739 			break;
1740 
1741 		default:
1742 
1743 			ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntil' called by unknown parser type - provide override for this function\n");
1744 			return;
1745 
1746 			break;
1747     }
1748 
1749     // What do have at the moment?
1750     //
1751     ttype	= is->_LA(is, 1);
1752 
1753     // Start eating tokens until we get to the one we want.
1754     //
1755     while   (ttype != ANTLR3_TOKEN_EOF && ttype != tokenType)
1756     {
1757 		is->consume(is);
1758 		ttype	= is->_LA(is, 1);
1759     }
1760 }
1761 
1762 /// Eat tokens from the input stream until we find one that
1763 /// belongs to the supplied set.
1764 ///
1765 static void
consumeUntilSet(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_BITSET set)1766 consumeUntilSet			    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set)
1767 {
1768     ANTLR3_UINT32	    ttype;
1769     pANTLR3_PARSER	    parser;
1770     pANTLR3_TREE_PARSER	    tparser;
1771     pANTLR3_INT_STREAM	    is;
1772 
1773     switch	(recognizer->type)
1774     {
1775 		case	ANTLR3_TYPE_PARSER:
1776 
1777 			parser  = (pANTLR3_PARSER) (recognizer->super);
1778 			tparser	= NULL;
1779 			is	= parser->tstream->istream;
1780 
1781 			break;
1782 
1783 		case	ANTLR3_TYPE_TREE_PARSER:
1784 
1785 			tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1786 			parser	= NULL;
1787 			is	= tparser->ctnstream->tnstream->istream;
1788 
1789 			break;
1790 
1791 		default:
1792 
1793 			ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntilSet' called by unknown parser type - provide override for this function\n");
1794 			return;
1795 
1796 			break;
1797     }
1798 
1799     // What do have at the moment?
1800     //
1801     ttype	= is->_LA(is, 1);
1802 
1803     // Start eating tokens until we get to one we want.
1804     //
1805     while   (ttype != ANTLR3_TOKEN_EOF && set->isMember(set, ttype) == ANTLR3_FALSE)
1806     {
1807 		is->consume(is);
1808 		ttype	= is->_LA(is, 1);
1809     }
1810 }
1811 
1812 /** Return the rule invocation stack (how we got here in the parse.
1813  *  In the java version Ter just asks the JVM for all the information
1814  *  but in C we don't get this information, so I am going to do nothing
1815  *  right now.
1816  */
1817 static pANTLR3_STACK
getRuleInvocationStack(pANTLR3_BASE_RECOGNIZER recognizer)1818 getRuleInvocationStack		    (pANTLR3_BASE_RECOGNIZER recognizer)
1819 {
1820     return NULL;
1821 }
1822 
1823 static pANTLR3_STACK
getRuleInvocationStackNamed(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_UINT8 name)1824 getRuleInvocationStackNamed	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name)
1825 {
1826     return NULL;
1827 }
1828 
1829 /** Convenience method for template rewrites - NYI.
1830  */
1831 static pANTLR3_HASH_TABLE
toStrings(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_HASH_TABLE tokens)1832 toStrings			    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE tokens)
1833 {
1834     return NULL;
1835 }
1836 
1837 static	void ANTLR3_CDECL
freeIntTrie(void * trie)1838 freeIntTrie    (void * trie)
1839 {
1840     ((pANTLR3_INT_TRIE)trie)->free((pANTLR3_INT_TRIE)trie);
1841 }
1842 
1843 
1844 /** Pointer to a function to return whether the rule has parsed input starting at the supplied
1845  *  start index before. If the rule has not parsed input starting from the supplied start index,
1846  *  then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
1847  *  then it will return the point where it last stopped parsing after that start point.
1848  *
1849  * \remark
1850  * The rule memos are an ANTLR3_LIST of ANTLR3_LISTS, however if this becomes any kind of performance
1851  * issue (it probably won't, the hash tables are pretty quick) then we could make a special int only
1852  * version of the table.
1853  */
1854 static ANTLR3_MARKER
getRuleMemoization(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_INTKEY ruleIndex,ANTLR3_MARKER ruleParseStart)1855 getRuleMemoization		    (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart)
1856 {
1857     /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1858      */
1859     pANTLR3_INT_TRIE	ruleList;
1860     ANTLR3_MARKER	stopIndex;
1861     pANTLR3_TRIE_ENTRY	entry;
1862 
1863     /* See if we have a list in the ruleMemos for this rule, and if not, then create one
1864      * as we will need it eventually if we are being asked for the memo here.
1865      */
1866     entry	= recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
1867 
1868     if	(entry == NULL)
1869     {
1870 		/* Did not find it, so create a new one for it, with a bit depth based on the
1871 		 * size of the input stream. We need the bit depth to incorporate the number if
1872 		 * bits required to represent the largest possible stop index in the input, which is the
1873 		 * last character. An int stream is free to return the largest 64 bit offset if it has
1874 		 * no idea of the size, but you should remember that this will cause the leftmost
1875 		 * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-)
1876 		 */
1877 		ruleList    = antlr3IntTrieNew(63);	/* Depth is theoretically 64 bits, but probably not ;-)	*/
1878 
1879 		if (ruleList != NULL)
1880 		{
1881 			recognizer->state->ruleMemo->add(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex, ANTLR3_HASH_TYPE_STR, 0, ANTLR3_FUNC_PTR(ruleList), freeIntTrie);
1882 		}
1883 
1884 		/* We cannot have a stopIndex in a trie we have just created of course
1885 		 */
1886 		return	MEMO_RULE_UNKNOWN;
1887     }
1888 
1889     ruleList	= (pANTLR3_INT_TRIE) (entry->data.ptr);
1890 
1891     /* See if there is a stop index associated with the supplied start index.
1892      */
1893     stopIndex	= 0;
1894 
1895     entry = ruleList->get(ruleList, ruleParseStart);
1896     if (entry != NULL)
1897     {
1898 		stopIndex = (ANTLR3_MARKER)(entry->data.intVal);
1899     }
1900 
1901     if	(stopIndex == 0)
1902     {
1903 		return MEMO_RULE_UNKNOWN;
1904     }
1905 
1906     return  stopIndex;
1907 }
1908 
1909 /** Has this rule already parsed input at the current index in the
1910  *  input stream?  Return ANTLR3_TRUE if we have and ANTLR3_FALSE
1911  *  if we have not.
1912  *
1913  *  This method has a side-effect: if we have seen this input for
1914  *  this rule and successfully parsed before, then seek ahead to
1915  *  1 past the stop token matched for this rule last time.
1916  */
1917 static ANTLR3_BOOLEAN
alreadyParsedRule(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_MARKER ruleIndex)1918 alreadyParsedRule		    (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex)
1919 {
1920     ANTLR3_MARKER			stopIndex;
1921     pANTLR3_LEXER			lexer;
1922     pANTLR3_PARSER			parser;
1923     pANTLR3_TREE_PARSER	    tparser;
1924     pANTLR3_INT_STREAM	    is;
1925 
1926     switch	(recognizer->type)
1927     {
1928 		case	ANTLR3_TYPE_PARSER:
1929 
1930 			parser  = (pANTLR3_PARSER) (recognizer->super);
1931 			tparser	= NULL;
1932 			lexer	= NULL;
1933 			is	= parser->tstream->istream;
1934 
1935 			break;
1936 
1937 		case	ANTLR3_TYPE_TREE_PARSER:
1938 
1939 			tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1940 			parser	= NULL;
1941 			lexer	= NULL;
1942 			is	= tparser->ctnstream->tnstream->istream;
1943 
1944 			break;
1945 
1946 		case	ANTLR3_TYPE_LEXER:
1947 
1948 			lexer	= (pANTLR3_LEXER)   (recognizer->super);
1949 			parser	= NULL;
1950 			tparser	= NULL;
1951 			is	= lexer->input->istream;
1952 			break;
1953 
1954 		default:
1955 
1956 			ANTLR3_FPRINTF(stderr, "Base recognizer function 'alreadyParsedRule' called by unknown parser type - provide override for this function\n");
1957 			return ANTLR3_FALSE;
1958 
1959 			break;
1960     }
1961 
1962     /* See if we have a memo marker for this.
1963      */
1964     stopIndex	    = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is));
1965 
1966     if	(stopIndex  == MEMO_RULE_UNKNOWN)
1967     {
1968 		return ANTLR3_FALSE;
1969     }
1970 
1971     if	(stopIndex == MEMO_RULE_FAILED)
1972     {
1973 		recognizer->state->failed = ANTLR3_TRUE;
1974     }
1975     else
1976     {
1977 		is->seek(is, stopIndex+1);
1978     }
1979 
1980     /* If here then the rule was executed for this input already
1981      */
1982     return  ANTLR3_TRUE;
1983 }
1984 
1985 /** Record whether or not this rule parsed the input at this position
1986  *  successfully.
1987  */
1988 static void
memoize(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_MARKER ruleIndex,ANTLR3_MARKER ruleParseStart)1989 memoize	(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart)
1990 {
1991     /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1992      */
1993     pANTLR3_INT_TRIE	    ruleList;
1994     pANTLR3_TRIE_ENTRY	    entry;
1995     ANTLR3_MARKER	    stopIndex;
1996     pANTLR3_LEXER	    lexer;
1997     pANTLR3_PARSER	    parser;
1998     pANTLR3_TREE_PARSER	    tparser;
1999     pANTLR3_INT_STREAM	    is;
2000 
2001     switch	(recognizer->type)
2002     {
2003 		case	ANTLR3_TYPE_PARSER:
2004 
2005 			parser  = (pANTLR3_PARSER) (recognizer->super);
2006 			tparser	= NULL;
2007 			is	= parser->tstream->istream;
2008 
2009 			break;
2010 
2011 		case	ANTLR3_TYPE_TREE_PARSER:
2012 
2013 			tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2014 			parser	= NULL;
2015 			is	= tparser->ctnstream->tnstream->istream;
2016 
2017 			break;
2018 
2019 		case	ANTLR3_TYPE_LEXER:
2020 
2021 			lexer	= (pANTLR3_LEXER)   (recognizer->super);
2022 			parser	= NULL;
2023 			tparser	= NULL;
2024 			is		= lexer->input->istream;
2025 			break;
2026 
2027 		default:
2028 
2029 			ANTLR3_FPRINTF(stderr, "Base recognizer function consumeUntilSet called by unknown parser type - provide override for this function\n");
2030 			return;
2031 
2032 			break;
2033     }
2034 
2035     stopIndex	= recognizer->state->failed == ANTLR3_TRUE ? MEMO_RULE_FAILED : is->index(is) - 1;
2036 
2037     entry	= recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
2038 
2039     if	(entry != NULL)
2040     {
2041 		ruleList = (pANTLR3_INT_TRIE)(entry->data.ptr);
2042 
2043 		/* If we don't already have this entry, append it. The memoize trie does not
2044 		 * accept duplicates so it won't add it if already there and we just ignore the
2045 		 * return code as we don't care if it is there already.
2046 		 */
2047 		ruleList->add(ruleList, ruleParseStart, ANTLR3_HASH_TYPE_INT, stopIndex, NULL, NULL);
2048     }
2049 }
2050 /** A syntactic predicate.  Returns true/false depending on whether
2051  *  the specified grammar fragment matches the current input stream.
2052  *  This resets the failed instance var afterwards.
2053  */
2054 static ANTLR3_BOOLEAN
synpred(pANTLR3_BASE_RECOGNIZER recognizer,void * ctx,void (* predicate)(void * ctx))2055 synpred	(pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx))
2056 {
2057     ANTLR3_MARKER   start;
2058     pANTLR3_PARSER	    parser;
2059     pANTLR3_TREE_PARSER	    tparser;
2060     pANTLR3_INT_STREAM	    is;
2061 
2062     switch	(recognizer->type)
2063     {
2064 		case	ANTLR3_TYPE_PARSER:
2065 
2066 			parser  = (pANTLR3_PARSER) (recognizer->super);
2067 			tparser	= NULL;
2068 			is	= parser->tstream->istream;
2069 
2070 			break;
2071 
2072 		case	ANTLR3_TYPE_TREE_PARSER:
2073 
2074 			tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2075 			parser	= NULL;
2076 			is	= tparser->ctnstream->tnstream->istream;
2077 
2078 			break;
2079 
2080 		default:
2081 
2082 			ANTLR3_FPRINTF(stderr, "Base recognizer function 'synPred' called by unknown parser type - provide override for this function\n");
2083 			return ANTLR3_FALSE;
2084 
2085 			break;
2086     }
2087 
2088     /* Begin backtracking so we can get back to where we started after trying out
2089      * the syntactic predicate.
2090      */
2091     start   = is->mark(is);
2092     recognizer->state->backtracking++;
2093 
2094     /* Try the syntactical predicate
2095      */
2096     predicate(ctx);
2097 
2098     /* Reset
2099      */
2100     is->rewind(is, start);
2101     recognizer->state->backtracking--;
2102 
2103     if	(recognizer->state->failed == ANTLR3_TRUE)
2104     {
2105 		/* Predicate failed
2106 		 */
2107 		recognizer->state->failed = ANTLR3_FALSE;
2108 		return	ANTLR3_FALSE;
2109     }
2110     else
2111     {
2112 		/* Predicate was successful
2113 		 */
2114 		recognizer->state->failed	= ANTLR3_FALSE;
2115 		return	ANTLR3_TRUE;
2116     }
2117 }
2118 
2119 static void
reset(pANTLR3_BASE_RECOGNIZER recognizer)2120 reset(pANTLR3_BASE_RECOGNIZER recognizer)
2121 {
2122     if	(recognizer->state->following != NULL)
2123     {
2124 		recognizer->state->following->free(recognizer->state->following);
2125     }
2126 
2127 	// Reset the state flags
2128 	//
2129 	recognizer->state->errorRecovery	= ANTLR3_FALSE;
2130 	recognizer->state->lastErrorIndex	= -1;
2131 	recognizer->state->failed			= ANTLR3_FALSE;
2132 	recognizer->state->errorCount		= 0;
2133 	recognizer->state->backtracking		= 0;
2134 	recognizer->state->following		= NULL;
2135 
2136 	if	(recognizer->state != NULL)
2137 	{
2138 		if	(recognizer->state->ruleMemo != NULL)
2139 		{
2140 			recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
2141 			recognizer->state->ruleMemo = antlr3IntTrieNew(15);	/* 16 bit depth is enough for 32768 rules! */
2142 		}
2143 	}
2144 
2145 
2146     // Install a new following set
2147     //
2148     recognizer->state->following   = antlr3StackNew(8);
2149 
2150 }
2151 
2152 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2153 // You MAY need override this function if the standard TOKEN_STREAM is not what you are using.
2154 //
2155 static void *
getCurrentInputSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream)2156 getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
2157 {
2158 	return ((pANTLR3_TOKEN_STREAM)istream->super)->_LT((pANTLR3_TOKEN_STREAM)istream->super, 1);
2159 }
2160 
2161 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2162 // You MAY need override this function if the standard COMMON_TOKEN_STREAM is not what you are using.
2163 //
2164 static void *
getMissingSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream,pANTLR3_EXCEPTION e,ANTLR3_UINT32 expectedTokenType,pANTLR3_BITSET_LIST follow)2165 getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
2166 									ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
2167 {
2168 	pANTLR3_TOKEN_STREAM			ts;
2169 	pANTLR3_COMMON_TOKEN_STREAM		cts;
2170 	pANTLR3_COMMON_TOKEN			token;
2171 	pANTLR3_COMMON_TOKEN			current;
2172 	pANTLR3_STRING					text;
2173 
2174 	// Dereference the standard pointers
2175 	//
2176 	ts		= (pANTLR3_TOKEN_STREAM)istream->super;
2177 	cts		= (pANTLR3_COMMON_TOKEN_STREAM)ts->super;
2178 
2179 	// Work out what to use as the current symbol to make a line and offset etc
2180 	// If we are at EOF, we use the token before EOF
2181 	//
2182 	current	= ts->_LT(ts, 1);
2183 	if	(current->getType(current) == ANTLR3_TOKEN_EOF)
2184 	{
2185 		current = ts->_LT(ts, -1);
2186 	}
2187 
2188 	// Create a new empty token
2189 	//
2190 	if	(recognizer->state->tokFactory == NULL)
2191 	{
2192 		// We don't yet have a token factory for making tokens
2193 		// we just need a fake one using the input stream of the current
2194 		// token.
2195 		//
2196 		recognizer->state->tokFactory = antlr3TokenFactoryNew(current->input);
2197 	}
2198 	token	= recognizer->state->tokFactory->newToken(recognizer->state->tokFactory);
2199 
2200 	// Set some of the token properties based on the current token
2201 	//
2202 	token->setLine					(token, current->getLine(current));
2203 	token->setCharPositionInLine	(token, current->getCharPositionInLine(current));
2204 	token->setChannel				(token, ANTLR3_TOKEN_DEFAULT_CHANNEL);
2205 	token->setType					(token, expectedTokenType);
2206     token->user1                    = current->user1;
2207     token->user2                    = current->user2;
2208     token->user3                    = current->user3;
2209     token->custom                   = current->custom;
2210     token->lineStart                = current->lineStart;
2211 
2212 	// Create the token text that shows it has been inserted
2213 	//
2214 	token->setText8(token, (pANTLR3_UINT8)"<missing ");
2215 	text = token->getText(token);
2216 
2217 	if	(text != NULL)
2218 	{
2219 		text->append8(text, (const char *)recognizer->state->tokenNames[expectedTokenType]);
2220 		text->append8(text, (const char *)">");
2221 	}
2222 
2223 	// Finally return the pointer to our new token
2224 	//
2225 	return	token;
2226 }
2227 
2228 
2229 #ifdef	ANTLR3_WINDOWS
2230 #pragma warning( default : 4100 )
2231 #endif
2232 
2233 /// @}
2234 ///
2235 
2236