1ANTLR_BEGIN_NAMESPACE()
2
3template<class ImplTraits, class SuperType>
4ANTLR_INLINE IntStream<ImplTraits, SuperType>::IntStream()
5{
6	m_lastMarker = 0;
7	m_upper_case = false;
8}
9
10template<class ImplTraits, class SuperType>
11ANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType	IntStream<ImplTraits, SuperType>::getSourceName()
12{
13	return m_streamName;
14}
15
16template<class ImplTraits, class SuperType>
17ANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType& 	IntStream<ImplTraits, SuperType>::get_streamName()
18{
19	return m_streamName;
20}
21
22template<class ImplTraits, class SuperType>
23ANTLR_INLINE const typename IntStream<ImplTraits, SuperType>::StringType& 	IntStream<ImplTraits, SuperType>::get_streamName() const
24{
25	return m_streamName;
26}
27
28template<class ImplTraits, class SuperType>
29ANTLR_INLINE ANTLR_MARKER IntStream<ImplTraits, SuperType>::get_lastMarker() const
30{
31	return m_lastMarker;
32}
33
34template<class ImplTraits, class SuperType>
35ANTLR_INLINE void	IntStream<ImplTraits, SuperType>::setUcaseLA(bool flag)
36{
37	m_upper_case = flag;
38}
39
40template<class ImplTraits, class SuperType>
41ANTLR_INLINE SuperType* IntStream<ImplTraits, SuperType>::get_super()
42{
43	return static_cast<SuperType*>(this);
44}
45
46template<class ImplTraits, class SuperType>
47void	IntStream<ImplTraits, SuperType>::consume()
48{
49	SuperType* input = this->get_super();
50
51	const ANTLR_UINT8* nextChar = input->get_nextChar();
52	const ANTLR_UINT8* data = input->get_data();
53	ANTLR_UINT32 sizeBuf = input->get_sizeBuf();
54
55    if	( nextChar < ( data + sizeBuf ) )
56    {
57		/* Indicate one more character in this line
58		 */
59		input->inc_charPositionInLine();
60
61		if  ((ANTLR_UCHAR)(*(nextChar)) == input->get_newlineChar() )
62		{
63			/* Reset for start of a new line of input
64			 */
65			input->inc_line();
66			input->set_charPositionInLine(0);
67			input->set_currentLine(nextChar + 1);
68		}
69
70		/* Increment to next character position
71		 */
72		input->set_nextChar( nextChar + 1 );
73    }
74}
75
76template<class ImplTraits, class SuperType>
77ANTLR_UINT32	IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la )
78{
79	SuperType* input = this->get_super();
80	const ANTLR_UINT8* nextChar = input->get_nextChar();
81	const ANTLR_UINT8* data = input->get_data();
82	ANTLR_UINT32 sizeBuf = input->get_sizeBuf();
83
84    if	(( nextChar + la - 1) >= (data + sizeBuf))
85    {
86		return	ANTLR_CHARSTREAM_EOF;
87    }
88    else
89    {
90		if( !m_upper_case )
91			return	(ANTLR_UCHAR)(*(nextChar + la - 1));
92		else
93			return	(ANTLR_UCHAR)toupper(*(nextChar + la - 1));
94    }
95}
96
97template<class ImplTraits, class SuperType>
98ANTLR_MARKER IntStream<ImplTraits, SuperType>::mark()
99{
100	LexState<ImplTraits>*	    state;
101    SuperType* input = this->get_super();
102
103    /* New mark point
104     */
105    input->inc_markDepth();
106
107    /* See if we are revisiting a mark as we can just reuse the vector
108     * entry if we are, otherwise, we need a new one
109     */
110    if	(input->get_markDepth() > input->get_markers().size() )
111    {
112		input->get_markers().push_back( LexState<ImplTraits>() );
113		LexState<ImplTraits>& state_r = input->get_markers().back();
114		state = &state_r;
115    }
116    else
117    {
118		LexState<ImplTraits>& state_r = input->get_markers().at( input->get_markDepth() - 1 );
119		state	= &state_r;
120
121		/* Assume no errors for speed, it will just blow up if the table failed
122		 * for some reasons, hence lots of unit tests on the tables ;-)
123		 */
124    }
125
126    /* We have created or retrieved the state, so update it with the current
127     * elements of the lexer state.
128     */
129    state->set_charPositionInLine( input->get_charPositionInLine() );
130    state->set_currentLine( input->get_currentLine() );
131    state->set_line( input->get_line() );
132    state->set_nextChar( input->get_nextChar() );
133
134    m_lastMarker = input->get_markDepth();
135
136    /* And that's it
137     */
138    return  input->get_markDepth();
139}
140
141template<class ImplTraits, class SuperType>
142ANTLR_MARKER	IntStream<ImplTraits, SuperType>::index()
143{
144	SuperType* input = this->get_super();
145	return input->index_impl();
146}
147
148template<class ImplTraits, class SuperType>
149void	IntStream<ImplTraits, SuperType>::rewind(ANTLR_MARKER mark)
150{
151    SuperType* input = this->get_super();
152
153    /* Perform any clean up of the marks
154     */
155    this->release(mark);
156
157    /* Find the supplied mark state
158     */
159	ANTLR_UINT32 idx = static_cast<ANTLR_UINT32>( mark-1 );
160    typename ImplTraits::LexStateType&   state = input->get_markers().at( idx );
161
162    /* Seek input pointer to the requested point (note we supply the void *pointer
163     * to whatever is implementing the int stream to seek).
164     */
165	this->seek( (ANTLR_MARKER)state.get_nextChar() );
166
167    /* Reset to the reset of the information in the mark
168     */
169    input->set_charPositionInLine( state.get_charPositionInLine() );
170    input->set_currentLine( state.get_currentLine() );
171    input->set_line( state.get_line() );
172    input->set_nextChar( state.get_nextChar() );
173
174    /* And we are done
175     */
176}
177
178template<class ImplTraits, class SuperType>
179void	IntStream<ImplTraits, SuperType>::rewindLast()
180{
181	this->rewind(m_lastMarker);
182}
183
184template<class ImplTraits, class SuperType>
185void	IntStream<ImplTraits, SuperType>::release(ANTLR_MARKER mark)
186{
187	SuperType* input = this->get_super();
188
189	/* We don't do much here in fact as we never free any higher marks in
190     * the hashtable as we just resuse any memory allocated for them.
191     */
192    input->set_markDepth( (ANTLR_UINT32)(mark - 1) );
193
194}
195
196template<class ImplTraits, class SuperType>
197void IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool)
198{
199}
200
201template<class ImplTraits, class SuperType>
202void	IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
203{
204	ANTLR_INT32   count;
205	SuperType* input = this->get_super();
206
207	ANTLR_MARKER nextChar = (ANTLR_MARKER) input->get_nextChar();
208	/* If the requested seek point is less than the current
209	* input point, then we assume that we are resetting from a mark
210	* and do not need to scan, but can just set to there.
211	*/
212	if	(seekPoint <= nextChar)
213	{
214		input->set_nextChar((ANTLR_UINT8*) seekPoint);
215	}
216	else
217	{
218		count	= (ANTLR_UINT32)(seekPoint - nextChar);
219
220		while (count--)
221		{
222			this->consume();
223		}
224	}
225}
226
227template<class ImplTraits, class SuperType>
228IntStream<ImplTraits, SuperType>::~IntStream()
229{
230}
231
232template<class ImplTraits, class SuperType>
233ANTLR_UINT32	EBCDIC_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la)
234{
235	// EBCDIC to ASCII conversion table
236	//
237	// This for EBCDIC EDF04 translated to ISO-8859.1 which is the usually accepted POSIX
238	// translation and the character tables are published all over the interweb.
239	//
240	const ANTLR_UCHAR e2a[256] =
241	{
242		0x00, 0x01, 0x02, 0x03, 0x85, 0x09, 0x86, 0x7f,
243		0x87, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
244		0x10, 0x11, 0x12, 0x13, 0x8f, 0x0a, 0x08, 0x97,
245		0x18, 0x19, 0x9c, 0x9d, 0x1c, 0x1d, 0x1e, 0x1f,
246		0x80, 0x81, 0x82, 0x83, 0x84, 0x92, 0x17, 0x1b,
247		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07,
248		0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
249		0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a,
250		0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5,
251		0xe7, 0xf1, 0x60, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
252		0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef,
253		0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x9f,
254		0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5,
255		0xc7, 0xd1, 0x5e, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
256		0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
257		0xcc, 0xa8, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
258		0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
259		0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1,
260		0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
261		0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4,
262		0xb5, 0xaf, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
263		0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0xdd, 0xde, 0xae,
264		0xa2, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc,
265		0xbd, 0xbe, 0xac, 0x5b, 0x5c, 0x5d, 0xb4, 0xd7,
266		0xf9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
267		0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
268		0xa6, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
269		0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xdb, 0xfa, 0xff,
270		0xd9, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
271		0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5,
272		0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
273		0x38, 0x39, 0xb3, 0x7b, 0xdc, 0x7d, 0xda, 0x7e
274	};
275
276	SuperType* input = this->get_super();
277
278    if	(( input->get_nextChar() + la - 1) >= ( input->get_data() + input->get_sizeBuf() ))
279    {
280        return	ANTLR_CHARSTREAM_EOF;
281    }
282    else
283    {
284        // Translate the required character via the constant conversion table
285        //
286        return	e2a[(*(input->get_nextChar() + la - 1))];
287    }
288}
289
290template<class ImplTraits, class SuperType>
291void EBCDIC_IntStream<ImplTraits, SuperType>::setupIntStream()
292{
293	SuperType* super = this->get_super();
294	super->set_charByteSize(1);
295}
296
297template<class ImplTraits, class SuperType>
298ANTLR_UINT32	UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 i)
299{
300	return this->_LA(i, ClassForwarder< typename ImplTraits::Endianness >() );
301}
302
303template<class ImplTraits, class SuperType>
304void UTF16_IntStream<ImplTraits, SuperType>::consume()
305{
306	this->consume( ClassForwarder< typename ImplTraits::Endianness >() );
307}
308
309template<class ImplTraits, class SuperType>
310ANTLR_MARKER	UTF16_IntStream<ImplTraits, SuperType>::index()
311{
312	SuperType* input = this->get_super();
313    return  (ANTLR_MARKER)(input->get_nextChar());
314}
315
316template<class ImplTraits, class SuperType>
317void UTF16_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
318{
319	SuperType* input = this->get_super();
320
321	// If the requested seek point is less than the current
322	// input point, then we assume that we are resetting from a mark
323	// and do not need to scan, but can just set to there as rewind will
324    // reset line numbers and so on.
325	//
326	if	(seekPoint <= (ANTLR_MARKER)(input->get_nextChar()))
327	{
328		input->set_nextChar( seekPoint );
329	}
330	else
331	{
332        // Call consume until we reach the asked for seek point or EOF
333        //
334        while( (this->_LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar() ) )
335	    {
336			this->consume();
337	    }
338	}
339}
340
341template<class ImplTraits, class SuperType>
342void IntStream<ImplTraits, SuperType>::findout_endian_spec(bool machineBigEndian, bool inputBigEndian)
343{
344	// We must install different UTF16 routines according to whether the input
345	// is the same endianess as the machine we are executing upon or not. If it is not
346	// then we must install methods that can convert the endianess on the fly as they go
347	//
348
349	if(machineBigEndian == true)
350	{
351		// Machine is Big Endian, if the input is also then install the
352		// methods that do not access input by bytes and reverse them.
353		// Otherwise install endian aware methods.
354		//
355		if  (inputBigEndian == true)
356		{
357			// Input is machine compatible
358			//
359			m_endian_spec = 1;
360		}
361		else
362		{
363			// Need to use methods that know that the input is little endian
364			//
365			m_endian_spec = 2;
366		}
367	}
368	else
369	{
370		// Machine is Little Endian, if the input is also then install the
371		// methods that do not access input by bytes and reverse them.
372		// Otherwise install endian aware methods.
373		//
374		if  (inputBigEndian == false)
375		{
376			// Input is machine compatible
377			//
378			m_endian_spec =  1;
379		}
380		else
381		{
382			// Need to use methods that know that the input is Big Endian
383			//
384			m_endian_spec	= 3;
385		}
386	}
387}
388
389template<class ImplTraits, class SuperType>
390void UTF16_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian)
391{
392	SuperType* super = this->get_super();
393	super->set_charByteSize(2);
394
395	this->findout_endian_spec( machineBigEndian, inputBigEndian );
396}
397
398template<class ImplTraits, class SuperType>
399ANTLR_UINT32 IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> )
400{
401	assert( (m_endian_spec >= 1) && (m_endian_spec <= 3));
402	switch(m_endian_spec)
403	{
404	case 1:
405		return this->_LA(i, ClassForwarder<BYTE_AGNOSTIC>() );
406		break;
407	case 2:
408		return this->_LA(i, ClassForwarder<ANTLR_LITTLE_ENDIAN>() );
409		break;
410	case 3:
411		return this->_LA(i, ClassForwarder<ANTLR_BIG_ENDIAN>() );
412		break;
413	default:
414		break;
415	}
416	return 0;
417}
418
419template<class ImplTraits, class SuperType>
420void	IntStream<ImplTraits, SuperType>::consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> )
421{
422	assert( (m_endian_spec >= 1) && (m_endian_spec <= 3));
423	switch(m_endian_spec)
424	{
425	case 1:
426		this->consume( ClassForwarder<BYTE_AGNOSTIC>() );
427		break;
428	case 2:
429		this->consume( ClassForwarder<ANTLR_LITTLE_ENDIAN>() );
430		break;
431	case 3:
432		this->consume( ClassForwarder<ANTLR_BIG_ENDIAN>() );
433		break;
434	default:
435		break;
436	}
437}
438
439template<class ImplTraits, class SuperType>
440ANTLR_UINT32	UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> )
441{
442	SuperType* input;
443    UTF32   ch;
444    UTF32   ch2;
445    UTF16*	nextChar;
446
447    // Find the input interface and where we are currently pointing to
448    // in the input stream
449    //
450	input   = this->get_super;
451	nextChar    = input->get_nextChar();
452
453    // If a positive offset then advance forward, else retreat
454    //
455    if  (la >= 0)
456    {
457        while   (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
458        {
459            // Advance our copy of the input pointer
460            //
461            // Next char in natural machine byte order
462            //
463            ch  = *nextChar++;
464
465            // If we have a surrogate pair then we need to consume
466            // a following valid LO surrogate.
467            //
468            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
469            {
470                // If the 16 bits following the high surrogate are in the source buffer...
471                //
472                if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
473                {
474                    // Next character is in natural machine byte order
475                    //
476                    ch2 = *nextChar;
477
478                    // If it's a valid low surrogate, consume it
479                    //
480                    if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
481                    {
482                        // We consumed one 16 bit character
483                        //
484						nextChar++;
485                    }
486                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
487                    // it.
488                    //
489                }
490                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
491                // it because the buffer ended
492                //
493            }
494            // Note that we did not check for an invalid low surrogate here, or that fact that the
495            // lo surrogate was missing. We just picked out one 16 bit character unless the character
496            // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
497            //
498        }
499    }
500    else
501    {
502        // We need to go backwards from our input point
503        //
504        while   (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
505        {
506            // Get the previous 16 bit character
507            //
508            ch = *--nextChar;
509
510            // If we found a low surrogate then go back one more character if
511            // the hi surrogate is there
512            //
513            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
514            {
515                ch2 = *(nextChar-1);
516                if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
517                {
518                    // Yes, there is a high surrogate to match it so decrement one more and point to that
519                    //
520                    nextChar--;
521                }
522            }
523        }
524    }
525
526    // Our local copy of nextChar is now pointing to either the correct character or end of file
527    //
528    // Input buffer size is always in bytes
529    //
530	if	( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
531	{
532		return	ANTLR_CHARSTREAM_EOF;
533	}
534	else
535	{
536        // Pick up the next 16 character (native machine byte order)
537        //
538        ch = *nextChar++;
539
540        // If we have a surrogate pair then we need to consume
541        // a following valid LO surrogate.
542        //
543        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
544        {
545            // If the 16 bits following the high surrogate are in the source buffer...
546            //
547            if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
548            {
549                // Next character is in natural machine byte order
550                //
551                ch2 = *nextChar;
552
553                // If it's a valid low surrogate, consume it
554                //
555                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
556                {
557                    // Construct the UTF32 code point
558                    //
559                    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
560								+ (ch2 - UNI_SUR_LOW_START) + halfBase;
561                }
562                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
563                // it.
564                //
565            }
566            // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
567            // it because the buffer ended
568            //
569        }
570    }
571    return ch;
572}
573
574template<class ImplTraits, class SuperType>
575ANTLR_UINT32	UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> )
576{
577	SuperType* input;
578    UTF32           ch;
579    UTF32           ch2;
580    ANTLR_UCHAR*   nextChar;
581
582    // Find the input interface and where we are currently pointing to
583    // in the input stream
584    //
585	input       = this->get_super();
586    nextChar    = input->get_nextChar();
587
588    // If a positive offset then advance forward, else retreat
589    //
590    if  (la >= 0)
591    {
592        while   (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
593        {
594            // Advance our copy of the input pointer
595            //
596            // Next char in Little Endian byte order
597            //
598            ch  = (*nextChar) + (*(nextChar+1) << 8);
599            nextChar += 2;
600
601            // If we have a surrogate pair then we need to consume
602            // a following valid LO surrogate.
603            //
604            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
605            {
606                // If the 16 bits following the high surrogate are in the source buffer...
607                //
608                if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
609                {
610                    // Next character is in little endian byte order
611                    //
612                    ch2 = (*nextChar) + (*(nextChar+1) << 8);
613
614                    // If it's a valid low surrogate, consume it
615                    //
616                    if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
617                    {
618                        // We consumed one 16 bit character
619                        //
620						nextChar += 2;
621                    }
622                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
623                    // it.
624                    //
625                }
626                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
627                // it because the buffer ended
628                //
629            }
630            // Note that we did not check for an invalid low surrogate here, or that fact that the
631            // lo surrogate was missing. We just picked out one 16 bit character unless the character
632            // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
633            //
634        }
635    }
636    else
637    {
638        // We need to go backwards from our input point
639        //
640        while   (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
641        {
642            // Get the previous 16 bit character
643            //
644            ch = (*nextChar - 2) + ((*nextChar -1) << 8);
645            nextChar -= 2;
646
647            // If we found a low surrogate then go back one more character if
648            // the hi surrogate is there
649            //
650            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
651            {
652                ch2 = (*nextChar - 2) + ((*nextChar -1) << 8);
653                if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
654                {
655                    // Yes, there is a high surrogate to match it so decrement one more and point to that
656                    //
657                    nextChar -=2;
658                }
659            }
660        }
661    }
662
663    // Our local copy of nextChar is now pointing to either the correct character or end of file
664    //
665    // Input buffer size is always in bytes
666    //
667	if	( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
668	{
669		return	ANTLR_CHARSTREAM_EOF;
670	}
671	else
672	{
673        // Pick up the next 16 character (little endian byte order)
674        //
675        ch = (*nextChar) + (*(nextChar+1) << 8);
676        nextChar += 2;
677
678        // If we have a surrogate pair then we need to consume
679        // a following valid LO surrogate.
680        //
681        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
682        {
683            // If the 16 bits following the high surrogate are in the source buffer...
684            //
685            if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
686            {
687                // Next character is in little endian byte order
688                //
689                ch2 = (*nextChar) + (*(nextChar+1) << 8);
690
691                // If it's a valid low surrogate, consume it
692                //
693                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
694                {
695                    // Construct the UTF32 code point
696                    //
697                    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
698								+ (ch2 - UNI_SUR_LOW_START) + halfBase;
699                }
700                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
701                // it.
702                //
703            }
704            // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
705            // it because the buffer ended
706            //
707        }
708    }
709    return ch;
710}
711
712template<class ImplTraits, class SuperType>
713ANTLR_UINT32	UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> )
714{
715	SuperType* input;
716    UTF32           ch;
717    UTF32           ch2;
718    ANTLR_UCHAR*   nextChar;
719
720    // Find the input interface and where we are currently pointing to
721    // in the input stream
722    //
723	input       = this->get_super();
724    nextChar    = input->get_nextChar();
725
726    // If a positive offset then advance forward, else retreat
727    //
728    if  (la >= 0)
729    {
730        while   (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
731        {
732            // Advance our copy of the input pointer
733            //
734            // Next char in Big Endian byte order
735            //
736            ch  = ((*nextChar) << 8) + *(nextChar+1);
737            nextChar += 2;
738
739            // If we have a surrogate pair then we need to consume
740            // a following valid LO surrogate.
741            //
742            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
743            {
744                // If the 16 bits following the high surrogate are in the source buffer...
745                //
746                if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
747                {
748                    // Next character is in big endian byte order
749                    //
750                    ch2 = ((*nextChar) << 8) + *(nextChar+1);
751
752                    // If it's a valid low surrogate, consume it
753                    //
754                    if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
755                    {
756                        // We consumed one 16 bit character
757                        //
758						nextChar += 2;
759                    }
760                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
761                    // it.
762                    //
763                }
764                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
765                // it because the buffer ended
766                //
767            }
768            // Note that we did not check for an invalid low surrogate here, or that fact that the
769            // lo surrogate was missing. We just picked out one 16 bit character unless the character
770            // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
771            //
772        }
773    }
774    else
775    {
776        // We need to go backwards from our input point
777        //
778        while   (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
779        {
780            // Get the previous 16 bit character
781            //
782            ch = ((*nextChar - 2) << 8) + (*nextChar -1);
783            nextChar -= 2;
784
785            // If we found a low surrogate then go back one more character if
786            // the hi surrogate is there
787            //
788            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
789            {
790                ch2 = ((*nextChar - 2) << 8) + (*nextChar -1);
791                if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
792                {
793                    // Yes, there is a high surrogate to match it so decrement one more and point to that
794                    //
795                    nextChar -=2;
796                }
797            }
798        }
799    }
800
801    // Our local copy of nextChar is now pointing to either the correct character or end of file
802    //
803    // Input buffer size is always in bytes
804    //
805	if	( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
806	{
807		return	ANTLR_CHARSTREAM_EOF;
808	}
809	else
810	{
811        // Pick up the next 16 character (big endian byte order)
812        //
813        ch = ((*nextChar) << 8) + *(nextChar+1);
814        nextChar += 2;
815
816        // If we have a surrogate pair then we need to consume
817        // a following valid LO surrogate.
818        //
819        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
820        {
821            // If the 16 bits following the high surrogate are in the source buffer...
822            //
823            if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
824            {
825                // Next character is in big endian byte order
826                //
827                ch2 = ((*nextChar) << 8) + *(nextChar+1);
828
829                // If it's a valid low surrogate, consume it
830                //
831                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
832                {
833                    // Construct the UTF32 code point
834                    //
835                    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
836								+ (ch2 - UNI_SUR_LOW_START) + halfBase;
837                }
838                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
839                // it.
840                //
841            }
842            // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
843            // it because the buffer ended
844            //
845        }
846    }
847    return ch;
848}
849
850template<class ImplTraits, class SuperType>
851void	UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<BYTE_AGNOSTIC> )
852{
853	SuperType* input;
854    UTF32   ch;
855    UTF32   ch2;
856
857	input   = this->get_super();
858
859    // Buffer size is always in bytes
860    //
861	if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
862	{
863		// Indicate one more character in this line
864		//
865		input->inc_charPositionInLine();
866
867		if  ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
868		{
869			// Reset for start of a new line of input
870			//
871			input->inc_line();
872			input->set_charPositionInLine(0);
873			input->set_currentLine( input->get_nextChar() + 1 );
874		}
875
876		// Increment to next character position, accounting for any surrogates
877		//
878        // Next char in natural machine byte order
879        //
880        ch  = *(input->get_nextChar());
881
882        // We consumed one 16 bit character
883        //
884		input->set_nextChar( input->get_nextChar() + 1 );
885
886        // If we have a surrogate pair then we need to consume
887        // a following valid LO surrogate.
888        //
889        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
890
891            // If the 16 bits following the high surrogate are in the source buffer...
892            //
893            if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
894            {
895                // Next character is in natural machine byte order
896                //
897                ch2 = *(input->get_nextChar());
898
899                // If it's a valid low surrogate, consume it
900                //
901                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
902                {
903                    // We consumed one 16 bit character
904                    //
905					input->set_nextChar( input->get_nextChar() + 1 );
906                }
907                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
908                // it.
909                //
910            }
911            // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
912            // it because the buffer ended
913            //
914        }
915        // Note that we did not check for an invalid low surrogate here, or that fact that the
916        // lo surrogate was missing. We just picked out one 16 bit character unless the character
917        // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
918        //
919	}
920
921}
922
923template<class ImplTraits, class SuperType>
924void	UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> )
925{
926	SuperType* input;
927    UTF32   ch;
928    UTF32   ch2;
929
930	input   = this->get_super();
931
932    // Buffer size is always in bytes
933    //
934	if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
935	{
936		// Indicate one more character in this line
937		//
938		input->inc_charPositionInLine();
939
940		if  ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
941		{
942			// Reset for start of a new line of input
943			//
944			input->inc_line();
945			input->set_charPositionInLine(0);
946			input->set_currentLine(input->get_nextChar() + 1);
947		}
948
949		// Increment to next character position, accounting for any surrogates
950		//
951        // Next char in litle endian form
952        //
953        ch  = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8);
954
955        // We consumed one 16 bit character
956        //
957		input->set_nextChar( input->get_nextChar() + 1);
958
959        // If we have a surrogate pair then we need to consume
960        // a following valid LO surrogate.
961        //
962        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
963		{
964            // If the 16 bits following the high surrogate are in the source buffer...
965            //
966            if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
967            {
968                ch2 = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8);
969
970                // If it's a valid low surrogate, consume it
971                //
972                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
973                {
974                    // We consumed one 16 bit character
975                    //
976					input->set_nextChar( input->get_nextChar() + 1);
977                }
978                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
979                // it.
980                //
981            }
982            // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
983            // it because the buffer ended
984            //
985        }
986        // Note that we did not check for an invalid low surrogate here, or that fact that the
987        // lo surrogate was missing. We just picked out one 16 bit character unless the character
988        // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
989        //
990	}
991}
992
993template<class ImplTraits, class SuperType>
994void	UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_BIG_ENDIAN> )
995{
996	SuperType* input;
997    UTF32   ch;
998    UTF32   ch2;
999
1000	input   = this->get_super();
1001
1002    // Buffer size is always in bytes
1003    //
1004	if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
1005	{
1006		// Indicate one more character in this line
1007		//
1008		input->inc_charPositionInLine();
1009
1010		if  ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
1011		{
1012			// Reset for start of a new line of input
1013			//
1014			input->inc_line();
1015			input->set_charPositionInLine(0);
1016			input->set_currentLine(input->get_nextChar() + 1);
1017		}
1018
1019		// Increment to next character position, accounting for any surrogates
1020		//
1021        // Next char in big endian form
1022        //
1023        ch  = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8);
1024
1025        // We consumed one 16 bit character
1026        //
1027		input->set_nextChar( input->get_nextChar() + 1);
1028
1029        // If we have a surrogate pair then we need to consume
1030        // a following valid LO surrogate.
1031        //
1032        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
1033		{
1034            // If the 16 bits following the high surrogate are in the source buffer...
1035            //
1036            if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
1037            {
1038                // Big endian
1039                //
1040                ch2 = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8);
1041
1042                // If it's a valid low surrogate, consume it
1043                //
1044                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
1045                {
1046                    // We consumed one 16 bit character
1047                    //
1048					input->set_nextChar( input->get_nextChar() + 1);
1049                }
1050                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1051                // it.
1052                //
1053            }
1054            // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1055            // it because the buffer ended
1056            //
1057        }
1058        // Note that we did not check for an invalid low surrogate here, or that fact that the
1059        // lo surrogate was missing. We just picked out one 16 bit character unless the character
1060        // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
1061        //
1062	}
1063}
1064
1065template<class ImplTraits, class SuperType>
1066ANTLR_UINT32	UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 i)
1067{
1068	return this->_LA( i, ClassForwarder<typename ImplTraits::Endianness>() );
1069}
1070
1071template<class ImplTraits, class SuperType>
1072ANTLR_MARKER	UTF32_IntStream<ImplTraits, SuperType>::index()
1073{
1074	SuperType* input = this->get_super();
1075    return  (ANTLR_MARKER)(input->get_nextChar());
1076}
1077
1078template<class ImplTraits, class SuperType>
1079void UTF32_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
1080{
1081	SuperType* input;
1082
1083	input   = this->get_super();
1084
1085	// If the requested seek point is less than the current
1086	// input point, then we assume that we are resetting from a mark
1087	// and do not need to scan, but can just set to there as rewind will
1088        // reset line numbers and so on.
1089	//
1090	if	(seekPoint <= (ANTLR_MARKER)(input->get_nextChar()))
1091	{
1092		input->set_nextChar( static_cast<typename ImplTraits::DataType*>(seekPoint) );
1093	}
1094	else
1095	{
1096        // Call consume until we reach the asked for seek point or EOF
1097        //
1098        while( (this->_LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar()) )
1099	    {
1100			this->consume();
1101	    }
1102	}
1103
1104}
1105
1106template<class ImplTraits, class SuperType>
1107void UTF32_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian)
1108{
1109	SuperType* super = this->get_super();
1110	super->set_charByteSize(4);
1111
1112	this->findout_endian_spec(machineBigEndian, inputBigEndian);
1113}
1114
1115template<class ImplTraits, class SuperType>
1116ANTLR_UINT32	UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> )
1117{
1118    SuperType* input = this->get_super();
1119
1120    if	(( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
1121    {
1122		return	ANTLR_CHARSTREAM_EOF;
1123    }
1124    else
1125    {
1126		return	(ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
1127    }
1128}
1129
1130template<class ImplTraits, class SuperType>
1131ANTLR_UINT32	UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> )
1132{
1133	SuperType* input = this->get_super();
1134
1135    if	(( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
1136    {
1137		return	ANTLR_CHARSTREAM_EOF;
1138    }
1139    else
1140    {
1141        ANTLR_UCHAR   c;
1142
1143        c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
1144
1145        // Swap Endianess to Big Endian
1146        //
1147        return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24);
1148    }
1149}
1150
1151template<class ImplTraits, class SuperType>
1152ANTLR_UINT32	UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> )
1153{
1154	SuperType* input = this->get_super();
1155
1156    if	(( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
1157    {
1158		return	ANTLR_CHARSTREAM_EOF;
1159    }
1160    else
1161    {
1162        ANTLR_UCHAR   c;
1163
1164        c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
1165
1166        // Swap Endianess to Little Endian
1167        //
1168        return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24);
1169    }
1170}
1171
1172template<class ImplTraits, class SuperType>
1173void	UTF32_IntStream<ImplTraits, SuperType>::consume()
1174{
1175	SuperType* input = this->get_super();
1176
1177    // SizeBuf is always in bytes
1178    //
1179	if	( input->get_nextChar()  < (input->get_data() + input->get_sizeBuf()/4 ))
1180    {
1181		/* Indicate one more character in this line
1182		 */
1183		input->inc_charPositionInLine();
1184
1185		if  ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
1186		{
1187			/* Reset for start of a new line of input
1188			 */
1189			input->inc_line();
1190			input->set_charPositionInLine(0);
1191			input->set_currentLine(	input->get_nextChar() + 1 );
1192		}
1193
1194		/* Increment to next character position
1195		 */
1196		input->set_nextChar( input->get_nextChar() + 1 );
1197    }
1198}
1199
1200template<class ImplTraits, class SuperType>
1201void UTF8_IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool)
1202{
1203	SuperType* super = this->get_super();
1204	super->set_charByteSize(0);
1205}
1206
1207// ------------------------------------------------------
1208// Following is from Unicode.org (see antlr3convertutf.c)
1209//
1210
1211/// Index into the table below with the first byte of a UTF-8 sequence to
1212/// get the number of trailing bytes that are supposed to follow it.
1213/// Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
1214/// left as-is for anyone who may want to do such conversion, which was
1215/// allowed in earlier algorithms.
1216///
1217template<class ImplTraits, class SuperType>
1218const ANTLR_UINT32* UTF8_IntStream<ImplTraits, SuperType>::TrailingBytesForUTF8()
1219{
1220	static const ANTLR_UINT32 trailingBytesForUTF8[256] = {
1221		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1222		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1223		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1224		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1225		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1226		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1227		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1228		2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
1229	};
1230
1231	return trailingBytesForUTF8;
1232}
1233
1234/// Magic values subtracted from a buffer value during UTF8 conversion.
1235/// This table contains as many values as there might be trailing bytes
1236/// in a UTF-8 sequence.
1237///
1238template<class ImplTraits, class SuperType>
1239const UTF32* UTF8_IntStream<ImplTraits, SuperType>::OffsetsFromUTF8()
1240{
1241	static const UTF32 offsetsFromUTF8[6] =
1242		{   0x00000000UL, 0x00003080UL, 0x000E2080UL,
1243			0x03C82080UL, 0xFA082080UL, 0x82082080UL
1244		};
1245	return 	offsetsFromUTF8;
1246}
1247
1248// End of Unicode.org tables
1249// -------------------------
1250
1251
1252/** \brief Consume the next character in a UTF8 input stream
1253 *
1254 * \param input Input stream context pointer
1255 */
1256template<class ImplTraits, class SuperType>
1257void UTF8_IntStream<ImplTraits, SuperType>::consume()
1258{
1259    SuperType* input = this->get_super();
1260	const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8();
1261	const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8();
1262
1263    ANTLR_UINT32           extraBytesToRead;
1264    ANTLR_UCHAR            ch;
1265    ANTLR_UINT8*           nextChar;
1266
1267    nextChar = input->get_nextChar();
1268
1269    if	(nextChar < (input->get_data() + input->get_sizeBuf()))
1270    {
1271		// Indicate one more character in this line
1272		//
1273		input->inc_charPositionInLine();
1274
1275        // Are there more bytes needed to make up the whole thing?
1276        //
1277        extraBytesToRead = trailingBytesForUTF8[*nextChar];
1278
1279        if	((nextChar + extraBytesToRead) >= (input->get_data() + input->get_sizeBuf()))
1280        {
1281            input->set_nextChar( input->get_data() + input->get_sizeBuf() );
1282            return;
1283        }
1284
1285        // Cases deliberately fall through (see note A in antlrconvertutf.c)
1286        // Legal UTF8 is only 4 bytes but 6 bytes could be used in old UTF8 so
1287        // we allow it.
1288        //
1289        ch  = 0;
1290       	switch (extraBytesToRead)
1291		{
1292			case 5: ch += *nextChar++; ch <<= 6;
1293			case 4: ch += *nextChar++; ch <<= 6;
1294			case 3: ch += *nextChar++; ch <<= 6;
1295			case 2: ch += *nextChar++; ch <<= 6;
1296			case 1: ch += *nextChar++; ch <<= 6;
1297			case 0: ch += *nextChar++;
1298		}
1299
1300        // Magically correct the input value
1301        //
1302		ch -= offsetsFromUTF8[extraBytesToRead];
1303		if  (ch == input->get_newlineChar())
1304		{
1305			/* Reset for start of a new line of input
1306			 */
1307			input->inc_line();
1308			input->set_charPositionInLine(0);
1309			input->set_currentLine(nextChar);
1310		}
1311
1312        // Update input pointer
1313        //
1314        input->set_nextChar(nextChar);
1315    }
1316}
1317
1318/** \brief Return the input element assuming a UTF8 input
1319 *
1320 * \param[in] input Input stream context pointer
1321 * \param[in] la 1 based offset of next input stream element
1322 *
1323 * \return Next input character in internal ANTLR3 encoding (UTF32)
1324 */
1325template<class ImplTraits, class SuperType>
1326ANTLR_UCHAR UTF8_IntStream<ImplTraits, SuperType>::_LA(ANTLR_INT32 la)
1327{
1328    SuperType* input = this->get_super();
1329	const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8();
1330	const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8();
1331    ANTLR_UINT32           extraBytesToRead;
1332    ANTLR_UCHAR            ch;
1333    ANTLR_UINT8*           nextChar;
1334
1335    nextChar = input->get_nextChar();
1336
1337    // Do we need to traverse forwards or backwards?
1338    // - LA(0) is treated as LA(1) and we assume that the nextChar is
1339    //   already positioned.
1340    // - LA(n+) ; n>1 means we must traverse forward n-1 characters catering for UTF8 encoding
1341    // - LA(-n) means we must traverse backwards n chracters
1342    //
1343    if (la > 1) {
1344
1345        // Make sure that we have at least one character left before trying to
1346        // loop through the buffer.
1347        //
1348        if	(nextChar < (input->get_data() + input->get_sizeBuf()))
1349        {
1350            // Now traverse n-1 characters forward
1351            //
1352            while (--la > 0)
1353            {
1354                // Does the next character require trailing bytes?
1355                // If so advance the pointer by that many bytes as well as advancing
1356                // one position for what will be at least a single byte character.
1357                //
1358                nextChar += trailingBytesForUTF8[*nextChar] + 1;
1359
1360                // Does that calculation take us past the byte length of the buffer?
1361                //
1362                if	(nextChar >= (input->get_data() + input->get_sizeBuf()))
1363                {
1364                    return ANTLR_CHARSTREAM_EOF;
1365                }
1366            }
1367        }
1368        else
1369        {
1370            return ANTLR_CHARSTREAM_EOF;
1371        }
1372    }
1373    else
1374    {
1375        // LA is negative so we decrease the pointer by n character positions
1376        //
1377        while   (nextChar > input->get_data() && la++ < 0)
1378        {
1379            // Traversing backwards in UTF8 means decermenting by one
1380            // then continuing to decrement while ever a character pattern
1381            // is flagged as being a trailing byte of an encoded code point.
1382            // Trailing UTF8 bytes always start with 10 in binary. We assumne that
1383            // the UTF8 is well formed and do not check boundary conditions
1384            //
1385            nextChar--;
1386            while ((*nextChar & 0xC0) == 0x80)
1387            {
1388                nextChar--;
1389            }
1390        }
1391    }
1392
1393    // nextChar is now pointing at the UTF8 encoded character that we need to
1394    // decode and return.
1395    //
1396    // Are there more bytes needed to make up the whole thing?
1397    //
1398    extraBytesToRead = trailingBytesForUTF8[*nextChar];
1399    if	(nextChar + extraBytesToRead >= (input->get_data() + input->get_sizeBuf()))
1400    {
1401        return ANTLR_CHARSTREAM_EOF;
1402    }
1403
1404    // Cases deliberately fall through (see note A in antlrconvertutf.c)
1405    //
1406    ch  = 0;
1407    switch (extraBytesToRead)
1408	{
1409        case 5: ch += *nextChar++; ch <<= 6;
1410        case 4: ch += *nextChar++; ch <<= 6;
1411        case 3: ch += *nextChar++; ch <<= 6;
1412        case 2: ch += *nextChar++; ch <<= 6;
1413        case 1: ch += *nextChar++; ch <<= 6;
1414        case 0: ch += *nextChar++;
1415    }
1416
1417    // Magically correct the input value
1418    //
1419    ch -= offsetsFromUTF8[extraBytesToRead];
1420
1421    return ch;
1422}
1423
1424template<class ImplTraits>
1425TokenIntStream<ImplTraits>::TokenIntStream()
1426{
1427	m_cachedSize = 0;
1428}
1429
1430template<class ImplTraits>
1431ANTLR_UINT32 TokenIntStream<ImplTraits>::get_cachedSize() const
1432{
1433	return m_cachedSize;
1434}
1435
1436template<class ImplTraits>
1437void TokenIntStream<ImplTraits>::set_cachedSize( ANTLR_UINT32 cachedSize )
1438{
1439	m_cachedSize = cachedSize;
1440}
1441
1442/** Move the input pointer to the next incoming token.  The stream
1443 *  must become active with LT(1) available.  consume() simply
1444 *  moves the input pointer so that LT(1) points at the next
1445 *  input symbol. Consume at least one token.
1446 *
1447 *  Walk past any token not on the channel the parser is listening to.
1448 */
1449template<class ImplTraits>
1450void TokenIntStream<ImplTraits>::consume()
1451{
1452	TokenStreamType* cts = static_cast<TokenStreamType*>(this);
1453
1454    if((ANTLR_UINT32)cts->get_p() < m_cachedSize )
1455	{
1456		cts->inc_p();
1457		cts->set_p( cts->skipOffTokenChannels(cts->get_p()) );
1458	}
1459}
1460template<class ImplTraits>
1461void  TokenIntStream<ImplTraits>::consumeInitialHiddenTokens()
1462{
1463	ANTLR_MARKER	first;
1464	ANTLR_INT32	i;
1465	TokenStreamType*	ts;
1466
1467	ts	    = this->get_super();
1468	first	= this->index();
1469
1470	for	(i=0; i<first; i++)
1471	{
1472		ts->get_debugger()->consumeHiddenToken(ts->get(i));
1473	}
1474
1475	ts->set_initialStreamState(false);
1476}
1477
1478
1479template<class ImplTraits>
1480ANTLR_UINT32	TokenIntStream<ImplTraits>::_LA( ANTLR_INT32 i )
1481{
1482	const CommonTokenType*    tok;
1483	TokenStreamType*    ts	    = static_cast<TokenStreamType*>(this);
1484
1485	tok	    =  ts->_LT(i);
1486
1487	if	(tok != NULL)
1488	{
1489		return	tok->get_type();
1490	}
1491	else
1492	{
1493		return	CommonTokenType::TOKEN_INVALID;
1494	}
1495
1496}
1497
1498template<class ImplTraits>
1499ANTLR_MARKER	TokenIntStream<ImplTraits>::mark()
1500{
1501    BaseType::m_lastMarker = this->index();
1502    return  BaseType::m_lastMarker;
1503}
1504
1505template<class ImplTraits>
1506ANTLR_UINT32 TokenIntStream<ImplTraits>::size()
1507{
1508    if (this->get_cachedSize() > 0)
1509    {
1510		return  this->get_cachedSize();
1511    }
1512    TokenStreamType* cts   = this->get_super();
1513
1514    this->set_cachedSize( static_cast<ANTLR_UINT32>(cts->get_tokens().size()) );
1515    return  this->get_cachedSize();
1516}
1517
1518template<class ImplTraits>
1519void	TokenIntStream<ImplTraits>::release()
1520{
1521    return;
1522}
1523
1524template<class ImplTraits>
1525ANTLR_MARKER   TokenIntStream<ImplTraits>::tindex()
1526{
1527	return this->get_super()->get_p();
1528}
1529
1530template<class ImplTraits>
1531void	TokenIntStream<ImplTraits>::rewindLast()
1532{
1533    this->rewind( this->get_lastMarker() );
1534}
1535
1536template<class ImplTraits>
1537void	TokenIntStream<ImplTraits>::rewind(ANTLR_MARKER marker)
1538{
1539	return this->seek(marker);
1540}
1541
1542template<class ImplTraits>
1543void	TokenIntStream<ImplTraits>::seek(ANTLR_MARKER index)
1544{
1545    TokenStreamType* cts = static_cast<TokenStreamType*>(this);
1546
1547    cts->set_p( static_cast<ANTLR_INT32>(index) );
1548}
1549
1550
1551/// Return a string that represents the name assoicated with the input source
1552///
1553/// /param[in] is The ANTLR3_INT_STREAM interface that is representing this token stream.
1554///
1555/// /returns
1556/// /implements ANTLR3_INT_STREAM_struct::getSourceName()
1557///
1558template<class ImplTraits>
1559typename TokenIntStream<ImplTraits>::StringType
1560TokenIntStream<ImplTraits>::getSourceName()
1561{
1562	// Slightly convoluted as we must trace back to the lexer's input source
1563	// via the token source. The streamName that is here is not initialized
1564	// because this is a token stream, not a file or string stream, which are the
1565	// only things that have a context for a source name.
1566	//
1567	return this->get_super()->get_tokenSource()->get_fileName();
1568}
1569
1570template<class ImplTraits>
1571void  TreeNodeIntStream<ImplTraits>::consume()
1572{
1573	CommonTreeNodeStreamType* ctns = this->get_super();
1574	if( ctns->get_p() == -1 )
1575		ctns->fillBufferRoot();
1576	ctns->inc_p();
1577}
1578template<class ImplTraits>
1579ANTLR_MARKER		TreeNodeIntStream<ImplTraits>::tindex()
1580{
1581	CommonTreeNodeStreamType* ctns = this->get_super();
1582	return (ANTLR_MARKER)(ctns->get_p());
1583}
1584
1585template<class ImplTraits>
1586ANTLR_UINT32		TreeNodeIntStream<ImplTraits>::_LA(ANTLR_INT32 i)
1587{
1588	CommonTreeNodeStreamType* tns	    = this->get_super();
1589
1590	// Ask LT for the 'token' at that position
1591	//
1592	TreeType* t = tns->_LT(i);
1593
1594	if	(t == NULL)
1595	{
1596		return	CommonTokenType::TOKEN_INVALID;
1597	}
1598
1599	// Token node was there so return the type of it
1600	//
1601	return  t->get_type();
1602}
1603
1604template<class ImplTraits>
1605ANTLR_MARKER	TreeNodeIntStream<ImplTraits>::mark()
1606{
1607	CommonTreeNodeStreamType* ctns	    = this->get_super();
1608
1609	if	(ctns->get_p() == -1)
1610	{
1611		ctns->fillBufferRoot();
1612	}
1613
1614	// Return the current mark point
1615	//
1616	this->set_lastMarker( this->index() );
1617
1618	return this->get_lastMarker();
1619
1620}
1621
1622template<class ImplTraits>
1623void  TreeNodeIntStream<ImplTraits>::release(ANTLR_MARKER marker)
1624{
1625
1626}
1627
1628template<class ImplTraits>
1629void TreeNodeIntStream<ImplTraits>::rewindMark(ANTLR_MARKER marker)
1630{
1631	this->seek(marker);
1632}
1633
1634template<class ImplTraits>
1635void TreeNodeIntStream<ImplTraits>::rewindLast()
1636{
1637	this->seek( this->get_lastMarker() );
1638}
1639
1640template<class ImplTraits>
1641void	TreeNodeIntStream<ImplTraits>::seek(ANTLR_MARKER index)
1642{
1643	CommonTreeNodeStreamType* ctns	    = this->get_super();
1644	ctns->set_p( ANTLR_UINT32_CAST(index) );
1645}
1646
1647template<class ImplTraits>
1648ANTLR_UINT32	TreeNodeIntStream<ImplTraits>::size()
1649{
1650	CommonTreeNodeStreamType* ctns	    = this->get_super();
1651
1652	if	(ctns->get_p() == -1)
1653	{
1654		ctns->fillBufferRoot();
1655	}
1656
1657	return ctns->get_nodes().size();
1658}
1659
1660
1661ANTLR_END_NAMESPACE()
1662