1 /**
2  * Contains the default implementation of the common token used within
3  * java. Custom tokens should create this structure and then append to it using the
4  * custom pointer to install their own structure and API.
5  */
6 
7 // [The "BSD licence"]
8 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
9 // http://www.temporal-wave.com
10 // http://www.linkedin.com/in/jimidle
11 //
12 // All rights reserved.
13 //
14 // Redistribution and use in source and binary forms, with or without
15 // modification, are permitted provided that the following conditions
16 // are met:
17 // 1. Redistributions of source code must retain the above copyright
18 //    notice, this list of conditions and the following disclaimer.
19 // 2. Redistributions in binary form must reproduce the above copyright
20 //    notice, this list of conditions and the following disclaimer in the
21 //    documentation and/or other materials provided with the distribution.
22 // 3. The name of the author may not be used to endorse or promote products
23 //    derived from this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
26 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
29 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
34 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 
36 #include    <antlr3.h>
37 
38 /* Token API
39  */
40 static  pANTLR3_STRING	getText					(pANTLR3_COMMON_TOKEN token);
41 static  void			setText					(pANTLR3_COMMON_TOKEN token, pANTLR3_STRING text);
42 static  void			setText8				(pANTLR3_COMMON_TOKEN token, pANTLR3_UINT8 text);
43 static	ANTLR3_UINT32   getType					(pANTLR3_COMMON_TOKEN token);
44 static  void			setType					(pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 type);
45 static  ANTLR3_UINT32   getLine					(pANTLR3_COMMON_TOKEN token);
46 static  void			setLine					(pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 line);
47 static  ANTLR3_INT32    getCharPositionInLine	(pANTLR3_COMMON_TOKEN token);
48 static  void			setCharPositionInLine	(pANTLR3_COMMON_TOKEN token, ANTLR3_INT32 pos);
49 static  ANTLR3_UINT32   getChannel				(pANTLR3_COMMON_TOKEN token);
50 static  void			setChannel				(pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 channel);
51 static  ANTLR3_MARKER   getTokenIndex			(pANTLR3_COMMON_TOKEN token);
52 static  void			setTokenIndex			(pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER);
53 static  ANTLR3_MARKER   getStartIndex			(pANTLR3_COMMON_TOKEN token);
54 static  void			setStartIndex			(pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER index);
55 static  ANTLR3_MARKER   getStopIndex			(pANTLR3_COMMON_TOKEN token);
56 static  void			setStopIndex			(pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER index);
57 static  pANTLR3_STRING  toString				(pANTLR3_COMMON_TOKEN token);
58 
59 /* Factory API
60  */
61 static	void			factoryClose	(pANTLR3_TOKEN_FACTORY factory);
62 static	pANTLR3_COMMON_TOKEN	newToken	(void);
63 static  void			setInputStream	(pANTLR3_TOKEN_FACTORY factory, pANTLR3_INPUT_STREAM input);
64 static	void                    factoryReset    (pANTLR3_TOKEN_FACTORY factory);
65 
66 /* Internal management functions
67  */
68 static	ANTLR3_BOOLEAN			newPool		(pANTLR3_TOKEN_FACTORY factory);
69 static	pANTLR3_COMMON_TOKEN    newPoolToken	(pANTLR3_TOKEN_FACTORY factory);
70 
71 
72 ANTLR3_API pANTLR3_COMMON_TOKEN
antlr3CommonTokenNew(ANTLR3_UINT32 ttype)73 antlr3CommonTokenNew(ANTLR3_UINT32 ttype)
74 {
75 	pANTLR3_COMMON_TOKEN    token;
76 
77 	// Create a raw token with the interface installed
78 	//
79 	token   = newToken();
80 
81 	if	(token != NULL)
82 	{
83 		token->setType(token, ttype);
84 	}
85 
86 	// All good
87 	//
88 	return  token;
89 }
90 
91 ANTLR3_API pANTLR3_TOKEN_FACTORY
antlr3TokenFactoryNew(pANTLR3_INPUT_STREAM input)92 antlr3TokenFactoryNew(pANTLR3_INPUT_STREAM input)
93 {
94     pANTLR3_TOKEN_FACTORY   factory;
95 
96     /* allocate memory
97      */
98     factory	= (pANTLR3_TOKEN_FACTORY) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_TOKEN_FACTORY));
99 
100     if	(factory == NULL)
101     {
102 	return	NULL;
103     }
104 
105     /* Install factory API
106      */
107     factory->newToken	    = newPoolToken;
108     factory->close	    = factoryClose;
109     factory->setInputStream = setInputStream;
110     factory->reset          = factoryReset;
111 
112     /* Allocate the initial pool
113      */
114     factory->thisPool	= -1;
115     factory->pools      = NULL;
116     factory->maxPool    = -1;
117     newPool(factory);
118 
119     /* Factory space is good, we now want to initialize our cheating token
120      * which one it is initialized is the model for all tokens we manufacture
121      */
122     antlr3SetTokenAPI(&factory->unTruc);
123 
124     /* Set some initial variables for future copying
125      */
126     factory->unTruc.factoryMade	= ANTLR3_TRUE;
127 
128     // Input stream
129     //
130     setInputStream(factory, input);
131 
132     return  factory;
133 
134 }
135 
136 static void
setInputStream(pANTLR3_TOKEN_FACTORY factory,pANTLR3_INPUT_STREAM input)137 setInputStream	(pANTLR3_TOKEN_FACTORY factory, pANTLR3_INPUT_STREAM input)
138 {
139     factory->input          =  input;
140     factory->unTruc.input   =  input;
141 	if	(input != NULL)
142 	{
143 		factory->unTruc.strFactory	= input->strFactory;
144 	}
145 	else
146 	{
147 		factory->unTruc.strFactory = NULL;
148     }
149 }
150 
151 static ANTLR3_BOOLEAN
newPool(pANTLR3_TOKEN_FACTORY factory)152 newPool(pANTLR3_TOKEN_FACTORY factory)
153 {
154     /* Increment factory count
155      */
156     ++(factory->thisPool);
157 
158     // If we were reusing this token factory then we may already have a pool
159     // allocated. If we exceeded the max available then we must allocate a new
160     // one.
161     if  (factory->thisPool > factory->maxPool)
162     {
163         /* Ensure we have enough pointers allocated
164          */
165 		pANTLR3_COMMON_TOKEN *newPools = (pANTLR3_COMMON_TOKEN *)
166 			ANTLR3_REALLOC((void *)factory->pools,	    /* Current pools pointer (starts at NULL)	*/
167 		                   (ANTLR3_UINT32)((factory->thisPool + 1) * sizeof(pANTLR3_COMMON_TOKEN *))	/* Memory for new pool pointers */
168 			);
169 		if (newPools == NULL)
170 		{
171 			// We are out of memory, but the old allocation is still valid for now
172 			--(factory->thisPool);
173 			return ANTLR3_FALSE;
174 		}
175 
176         factory->pools = newPools;
177 
178         /* Allocate a new pool for the factory
179          */
180         factory->pools[factory->thisPool]	=
181 			        (pANTLR3_COMMON_TOKEN)
182 				    ANTLR3_CALLOC(1, (size_t)(sizeof(ANTLR3_COMMON_TOKEN) * ANTLR3_FACTORY_POOL_SIZE));
183 		if (factory->pools[factory->thisPool] == NULL)
184 		{
185 			// Allocation failed
186 			--(factory->thisPool);
187 			return ANTLR3_FALSE;
188 		}
189 
190         // We now have a new pool and can track it as the maximum we have created so far
191         //
192         factory->maxPool = factory->thisPool;
193     }
194 
195     /* Reset the counters
196      */
197     factory->nextToken	= 0;
198 
199     /* Done
200      */
201     return ANTLR3_TRUE;
202 }
203 
204 static pANTLR3_COMMON_TOKEN
newPoolToken(pANTLR3_TOKEN_FACTORY factory)205 newPoolToken(pANTLR3_TOKEN_FACTORY factory)
206 {
207     pANTLR3_COMMON_TOKEN token;
208 
209 	if (factory == NULL) { return NULL; }
210 
211     /* See if we need a new token pool before allocating a new
212      * one
213      */
214     if (factory->nextToken >= ANTLR3_FACTORY_POOL_SIZE)
215     {
216         /* We ran out of tokens in the current pool, so we need a new pool
217          */
218         if (!newPool(factory))
219 		{
220 			return NULL;
221 		}
222     }
223 
224 	// make sure the factory is sane
225 	if (factory->pools == NULL) { return NULL; }
226 	if (factory->pools[factory->thisPool] == NULL) { return NULL; }
227 
228     /* Assuming everything went well (we are trying for performance here so doing minimal
229      * error checking. Then we can work out what the pointer is to the next token.
230      */
231     token = factory->pools[factory->thisPool] + factory->nextToken;
232     factory->nextToken++;
233 
234     /* We have our token pointer now, so we can initialize it to the predefined model.
235      * We only need do this though if the token is not already initialized, we just check
236      * an api function pointer for this as they are allocated via calloc.
237      */
238     if  (token->setStartIndex == NULL)
239     {
240         antlr3SetTokenAPI(token);
241 
242         // It is factory made, and we need to copy the string factory pointer
243         //
244         token->factoryMade  = ANTLR3_TRUE;
245         token->strFactory   = factory->input == NULL ? NULL : factory->input->strFactory;
246         token->input        = factory->input;
247     }
248 
249     /* And we are done
250      */
251     return token;
252 }
253 
254 static	void
factoryReset(pANTLR3_TOKEN_FACTORY factory)255 factoryReset	    (pANTLR3_TOKEN_FACTORY factory)
256 {
257     // Just start again with pool #0 when we are
258     // called.
259     //
260     factory->thisPool   = -1;
261     newPool(factory);
262 }
263 
264 static	void
factoryClose(pANTLR3_TOKEN_FACTORY factory)265 factoryClose	    (pANTLR3_TOKEN_FACTORY factory)
266 {
267     pANTLR3_COMMON_TOKEN    pool;
268     ANTLR3_INT32	    poolCount;
269     ANTLR3_UINT32	    limit;
270     ANTLR3_UINT32	    token;
271     pANTLR3_COMMON_TOKEN    check;
272 
273     /* We iterate the token pools one at a time
274      */
275     for	(poolCount = 0; poolCount <= factory->thisPool; poolCount++)
276     {
277 	/* Pointer to current pool
278 	 */
279 	pool	= factory->pools[poolCount];
280 
281 	/* Work out how many tokens we need to check in this pool.
282 	 */
283 	limit	= (poolCount == factory->thisPool ? factory->nextToken : ANTLR3_FACTORY_POOL_SIZE);
284 
285 	/* Marginal condition, we might be at the start of a brand new pool
286 	 * where the nextToken is 0 and nothing has been allocated.
287 	 */
288 	if  (limit > 0)
289 	{
290 	    /* We have some tokens allocated from this pool
291 	     */
292 	    for (token = 0; token < limit; token++)
293 	    {
294 		/* Next one in the chain
295 		 */
296 		check	= pool + token;
297 
298 		/* If the programmer made this a custom token, then
299 		 * see if we need to call their free routine.
300 		 */
301 		if  (check->custom != NULL && check->freeCustom != NULL)
302 		{
303 		    check->freeCustom(check->custom);
304 		    check->custom = NULL;
305 		}
306 	    }
307 	}
308 
309 	/* We can now free this pool allocation
310 	 */
311 	ANTLR3_FREE(factory->pools[poolCount]);
312 	factory->pools[poolCount] = NULL;
313     }
314 
315     /* All the pools are deallocated we can free the pointers to the pools
316      * now.
317      */
318     ANTLR3_FREE(factory->pools);
319 
320     /* Finally, we can free the space for the factory itself
321      */
322     ANTLR3_FREE(factory);
323 }
324 
325 
326 static	pANTLR3_COMMON_TOKEN
newToken(void)327 newToken(void)
328 {
329     pANTLR3_COMMON_TOKEN    token;
330 
331     /* Allocate memory for this
332      */
333     token   = (pANTLR3_COMMON_TOKEN) ANTLR3_CALLOC(1, (size_t)(sizeof(ANTLR3_COMMON_TOKEN)));
334 
335     if	(token == NULL)
336     {
337 	return	NULL;
338     }
339 
340     // Install the API
341     //
342     antlr3SetTokenAPI(token);
343     token->factoryMade = ANTLR3_FALSE;
344 
345     return  token;
346 }
347 
348 ANTLR3_API void
antlr3SetTokenAPI(pANTLR3_COMMON_TOKEN token)349 antlr3SetTokenAPI(pANTLR3_COMMON_TOKEN token)
350 {
351     token->getText		    = getText;
352     token->setText		    = setText;
353     token->setText8		    = setText8;
354     token->getType		    = getType;
355     token->setType		    = setType;
356     token->getLine		    = getLine;
357     token->setLine		    = setLine;
358     token->setLine		    = setLine;
359     token->getCharPositionInLine    = getCharPositionInLine;
360     token->setCharPositionInLine    = setCharPositionInLine;
361     token->getChannel		    = getChannel;
362     token->setChannel		    = setChannel;
363     token->getTokenIndex	    = getTokenIndex;
364     token->setTokenIndex	    = setTokenIndex;
365     token->getStartIndex	    = getStartIndex;
366     token->setStartIndex	    = setStartIndex;
367     token->getStopIndex		    = getStopIndex;
368     token->setStopIndex		    = setStopIndex;
369     token->toString		    = toString;
370 
371     return;
372 }
373 
getText(pANTLR3_COMMON_TOKEN token)374 static  pANTLR3_STRING  getText			(pANTLR3_COMMON_TOKEN token)
375 {
376 	switch (token->textState)
377 	{
378 		case ANTLR3_TEXT_STRING:
379 
380 			// Someone already created a string for this token, so we just
381 			// use it.
382 			//
383 			return	token->tokText.text;
384 			break;
385 
386 		case ANTLR3_TEXT_CHARP:
387 
388 			// We had a straight text pointer installed, now we
389 			// must convert it to a string. Note we have to do this here
390 			// or otherwise setText8() will just install the same char*
391 			//
392 			if	(token->strFactory != NULL)
393 			{
394 				token->tokText.text	= token->strFactory->newStr8(token->strFactory, (pANTLR3_UINT8)token->tokText.chars);
395 				token->textState	= ANTLR3_TEXT_STRING;
396 				return token->tokText.text;
397 			}
398 			else
399 			{
400 				// We cannot do anything here
401 				//
402 				return NULL;
403 			}
404 			break;
405 
406 		default:
407 
408 			// EOF is a special case
409 			//
410 			if (token->type == ANTLR3_TOKEN_EOF)
411 			{
412 				token->tokText.text				= token->strFactory->newStr8(token->strFactory, (pANTLR3_UINT8)"<EOF>");
413 				token->textState				= ANTLR3_TEXT_STRING;
414 				token->tokText.text->factory	= token->strFactory;
415 				return token->tokText.text;
416 			}
417 
418 
419 			// We had nothing installed in the token, create a new string
420 			// from the input stream
421 			//
422 
423 			if	(token->input != NULL)
424 			{
425 
426 				return	token->input->substr(	token->input,
427 												token->getStartIndex(token),
428  												token->getStopIndex(token)
429 											);
430 			}
431 
432 			// Nothing to return, there is no input stream
433 			//
434 			return NULL;
435 			break;
436 	}
437 }
setText8(pANTLR3_COMMON_TOKEN token,pANTLR3_UINT8 text)438 static  void		setText8		(pANTLR3_COMMON_TOKEN token, pANTLR3_UINT8 text)
439 {
440 	// No text to set, so ignore
441 	//
442 	if	(text == NULL) return;
443 
444 	switch	(token->textState)
445 	{
446 		case	ANTLR3_TEXT_NONE:
447 		case	ANTLR3_TEXT_CHARP:	// Caller must free before setting again, if it needs to be freed
448 
449 			// Nothing in there yet, or just a char *, so just set the
450 			// text as a pointer
451 			//
452 			token->textState		= ANTLR3_TEXT_CHARP;
453 			token->tokText.chars	= (pANTLR3_UCHAR)text;
454 			break;
455 
456 		default:
457 
458 			// It was already a pANTLR3_STRING, so just override it
459 			//
460 			token->tokText.text->set8(token->tokText.text, (const char *)text);
461 			break;
462 	}
463 
464 	// We are done
465 	//
466 	return;
467 }
468 
469 /** \brief Install the supplied text string as teh text for the token.
470  * The method assumes that the existing text (if any) was created by a factory
471  * and so does not attempt to release any memory it is using.Text not created
472  * by a string fctory (not advised) should be released prior to this call.
473  */
setText(pANTLR3_COMMON_TOKEN token,pANTLR3_STRING text)474 static  void		setText			(pANTLR3_COMMON_TOKEN token, pANTLR3_STRING text)
475 {
476 	// Merely replaces and existing pre-defined text with the supplied
477 	// string
478 	//
479 	token->textState	= ANTLR3_TEXT_STRING;
480 	token->tokText.text	= text;
481 
482 	/* We are done
483 	*/
484 	return;
485 }
486 
getType(pANTLR3_COMMON_TOKEN token)487 static	ANTLR3_UINT32   getType			(pANTLR3_COMMON_TOKEN token)
488 {
489     return  token->type;
490 }
491 
setType(pANTLR3_COMMON_TOKEN token,ANTLR3_UINT32 type)492 static  void		setType			(pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 type)
493 {
494     token->type = type;
495 }
496 
getLine(pANTLR3_COMMON_TOKEN token)497 static  ANTLR3_UINT32   getLine			(pANTLR3_COMMON_TOKEN token)
498 {
499     return  token->line;
500 }
501 
setLine(pANTLR3_COMMON_TOKEN token,ANTLR3_UINT32 line)502 static  void		setLine			(pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 line)
503 {
504     token->line = line;
505 }
506 
getCharPositionInLine(pANTLR3_COMMON_TOKEN token)507 static  ANTLR3_INT32    getCharPositionInLine	(pANTLR3_COMMON_TOKEN token)
508 {
509     return  token->charPosition;
510 }
511 
setCharPositionInLine(pANTLR3_COMMON_TOKEN token,ANTLR3_INT32 pos)512 static  void		setCharPositionInLine	(pANTLR3_COMMON_TOKEN token, ANTLR3_INT32 pos)
513 {
514     token->charPosition = pos;
515 }
516 
getChannel(pANTLR3_COMMON_TOKEN token)517 static  ANTLR3_UINT32   getChannel		(pANTLR3_COMMON_TOKEN token)
518 {
519     return  token->channel;
520 }
521 
setChannel(pANTLR3_COMMON_TOKEN token,ANTLR3_UINT32 channel)522 static  void		setChannel		(pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 channel)
523 {
524     token->channel  = channel;
525 }
526 
getTokenIndex(pANTLR3_COMMON_TOKEN token)527 static  ANTLR3_MARKER   getTokenIndex		(pANTLR3_COMMON_TOKEN token)
528 {
529     return  token->index;
530 }
531 
setTokenIndex(pANTLR3_COMMON_TOKEN token,ANTLR3_MARKER index)532 static  void		setTokenIndex		(pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER index)
533 {
534     token->index    = index;
535 }
536 
getStartIndex(pANTLR3_COMMON_TOKEN token)537 static  ANTLR3_MARKER   getStartIndex		(pANTLR3_COMMON_TOKEN token)
538 {
539 	return  token->start == -1 ? (ANTLR3_MARKER)(token->input->data) : token->start;
540 }
541 
setStartIndex(pANTLR3_COMMON_TOKEN token,ANTLR3_MARKER start)542 static  void		setStartIndex		(pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER start)
543 {
544     token->start    = start;
545 }
546 
getStopIndex(pANTLR3_COMMON_TOKEN token)547 static  ANTLR3_MARKER   getStopIndex		(pANTLR3_COMMON_TOKEN token)
548 {
549     return  token->stop;
550 }
551 
setStopIndex(pANTLR3_COMMON_TOKEN token,ANTLR3_MARKER stop)552 static  void		setStopIndex		(pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER stop)
553 {
554     token->stop	= stop;
555 }
556 
toString(pANTLR3_COMMON_TOKEN token)557 static  pANTLR3_STRING    toString		(pANTLR3_COMMON_TOKEN token)
558 {
559     pANTLR3_STRING  text;
560     pANTLR3_STRING  outtext;
561 
562     text    =	token->getText(token);
563 
564     if	(text == NULL)
565     {
566 		return NULL;
567     }
568 
569 	if	(text->factory == NULL)
570 	{
571 		return text;		// This usally means it is the EOF token
572 	}
573 
574     /* A new empty string to assemble all the stuff in
575      */
576     outtext = text->factory->newRaw(text->factory);
577 
578     /* Now we use our handy dandy string utility to assemble the
579      * the reporting string
580      * return "[@"+getTokenIndex()+","+start+":"+stop+"='"+txt+"',<"+type+">"+channelStr+","+line+":"+getCharPositionInLine()+"]";
581      */
582     outtext->append8(outtext, "[Index: ");
583     outtext->addi   (outtext, (ANTLR3_INT32)token->getTokenIndex(token));
584     outtext->append8(outtext, " (Start: ");
585     outtext->addi   (outtext, (ANTLR3_INT32)token->getStartIndex(token));
586     outtext->append8(outtext, "-Stop: ");
587     outtext->addi   (outtext, (ANTLR3_INT32)token->getStopIndex(token));
588     outtext->append8(outtext, ") ='");
589     outtext->appendS(outtext, text);
590     outtext->append8(outtext, "', type<");
591     outtext->addi   (outtext, token->type);
592     outtext->append8(outtext, "> ");
593 
594     if	(token->getChannel(token) > ANTLR3_TOKEN_DEFAULT_CHANNEL)
595     {
596 		outtext->append8(outtext, "(channel = ");
597 		outtext->addi	(outtext, (ANTLR3_INT32)token->getChannel(token));
598 		outtext->append8(outtext, ") ");
599     }
600 
601     outtext->append8(outtext, "Line: ");
602     outtext->addi   (outtext, (ANTLR3_INT32)token->getLine(token));
603     outtext->append8(outtext, " LinePos:");
604     outtext->addi   (outtext, token->getCharPositionInLine(token));
605     outtext->addc   (outtext, ']');
606 
607     return  outtext;
608 }
609 
610