1ANTLR_BEGIN_NAMESPACE()
2
3template<class ImplTraits>
4InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* fileName, ANTLR_UINT32 encoding)
5{
6    // First order of business is to read the file into some buffer space
7    // as just straight 8 bit bytes. Then we will work out the encoding and
8    // byte order and adjust the API functions that are installed for the
9    // default 8Bit stream accordingly.
10    //
11    this->createFileStream(fileName);
12
13    // We have the data in memory now so we can deal with it according to
14    // the encoding scheme we were given by the user.
15    //
16    m_encoding = encoding;
17
18    // Now we need to work out the endian type and install any
19    // API functions that differ from 8Bit
20    //
21    this->setupInputStream();
22
23    // Now we can set up the file name
24    //
25    BaseType::m_streamName	= (const char* )fileName;
26    m_fileName		= BaseType::m_streamName;
27}
28
29template<class ImplTraits>
30InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* data, ANTLR_UINT32 encoding, ANTLR_UINT32 size, ANTLR_UINT8* name)
31{
32	// First order of business is to set up the stream and install the data pointer.
33    // Then we will work out the encoding and byte order and adjust the API functions that are installed for the
34    // default 8Bit stream accordingly.
35    //
36    this->createStringStream(data);
37
38    // Size (in bytes) of the given 'string'
39    //
40    m_sizeBuf		= size;
41
42    // We have the data in memory now so we can deal with it according to
43    // the encoding scheme we were given by the user.
44    //
45    m_encoding = encoding;
46
47    // Now we need to work out the endian type and install any
48    // API functions that differ from 8Bit
49    //
50    this->setupInputStream();
51
52    // Now we can set up the file name
53    //
54    BaseType::m_streamName	= (name == NULL ) ? "" : (const char*)name;
55    m_fileName		= BaseType::m_streamName;
56
57}
58
59template<class ImplTraits>
60void InputStream<ImplTraits>::createStringStream(const ANTLR_UINT8* data)
61{
62	if	(data == NULL)
63	{
64		ParseNullStringException ex;
65		throw ex;
66	}
67
68	// Structure was allocated correctly, now we can install the pointer
69	//
70    m_data             = data;
71    m_isAllocated	   = false;
72
73	// Call the common 8 bit input stream handler
74	// initialization.
75	//
76	this->genericSetupStream();
77}
78
79template<class ImplTraits>
80void InputStream<ImplTraits>::createFileStream(const ANTLR_UINT8* fileName)
81{
82	if	(fileName == NULL)
83	{
84		ParseFileAbsentException ex;
85		throw ex;
86	}
87
88	// Structure was allocated correctly, now we can read the file.
89	//
90	FileUtils<ImplTraits>::AntlrRead8Bit(this, fileName);
91
92	// Call the common 8 bit input stream handler
93	// initialization.
94	//
95	this->genericSetupStream();
96}
97
98template<class ImplTraits>
99void InputStream<ImplTraits>::genericSetupStream()
100{
101	this->set_charByteSize(1);
102
103    /* Set up the input stream brand new
104     */
105    this->reset();
106
107    /* Install default line separator character (it can be replaced
108     * by the grammar programmer later)
109     */
110    this->set_newLineChar((ANTLR_UCHAR)'\n');
111}
112
113template<class ImplTraits>
114InputStream<ImplTraits>::~InputStream()
115{
116	// Free the input stream buffer if we allocated it
117    //
118    if	(m_isAllocated && (m_data != NULL))
119		AllocPolicyType::free((void*)m_data); //const_cast is required
120}
121
122template<class ImplTraits>
123ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_data() const
124{
125	return m_data;
126}
127template<class ImplTraits>
128ANTLR_INLINE bool InputStream<ImplTraits>::get_isAllocated() const
129{
130	return m_isAllocated;
131}
132template<class ImplTraits>
133ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_nextChar() const
134{
135	return m_nextChar;
136}
137template<class ImplTraits>
138ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_sizeBuf() const
139{
140	return m_sizeBuf;
141}
142template<class ImplTraits>
143ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_line() const
144{
145	return m_line;
146}
147template<class ImplTraits>
148ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_currentLine() const
149{
150	return m_currentLine;
151}
152template<class ImplTraits>
153ANTLR_INLINE ANTLR_INT32 InputStream<ImplTraits>::get_charPositionInLine() const
154{
155	return m_charPositionInLine;
156}
157template<class ImplTraits>
158ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_markDepth() const
159{
160	return m_markDepth;
161}
162template<class ImplTraits>
163ANTLR_INLINE typename InputStream<ImplTraits>::MarkersType& InputStream<ImplTraits>::get_markers()
164{
165	return m_markers;
166}
167template<class ImplTraits>
168ANTLR_INLINE const typename InputStream<ImplTraits>::StringType& InputStream<ImplTraits>::get_fileName() const
169{
170	return m_fileName;
171}
172template<class ImplTraits>
173ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_fileNo() const
174{
175	return m_fileNo;
176}
177template<class ImplTraits>
178ANTLR_INLINE ANTLR_UCHAR InputStream<ImplTraits>::get_newlineChar() const
179{
180	return m_newlineChar;
181}
182template<class ImplTraits>
183ANTLR_INLINE ANTLR_UINT8 InputStream<ImplTraits>::get_charByteSize() const
184{
185	return m_charByteSize;
186}
187template<class ImplTraits>
188ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_encoding() const
189{
190	return m_encoding;
191}
192template<class ImplTraits>
193ANTLR_INLINE void InputStream<ImplTraits>::set_data( DataType* data )
194{
195	m_data = data;
196}
197template<class ImplTraits>
198ANTLR_INLINE void InputStream<ImplTraits>::set_isAllocated( bool isAllocated )
199{
200	m_isAllocated = isAllocated;
201}
202template<class ImplTraits>
203ANTLR_INLINE void InputStream<ImplTraits>::set_nextChar( const DataType* nextChar )
204{
205	m_nextChar = nextChar;
206}
207template<class ImplTraits>
208ANTLR_INLINE void InputStream<ImplTraits>::set_sizeBuf( ANTLR_UINT32 sizeBuf )
209{
210	m_sizeBuf = sizeBuf;
211}
212template<class ImplTraits>
213ANTLR_INLINE void InputStream<ImplTraits>::set_line( ANTLR_UINT32 line )
214{
215	m_line = line;
216}
217template<class ImplTraits>
218ANTLR_INLINE void InputStream<ImplTraits>::set_currentLine( const DataType* currentLine )
219{
220	m_currentLine = currentLine;
221}
222template<class ImplTraits>
223ANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine )
224{
225	m_charPositionInLine = charPositionInLine;
226}
227template<class ImplTraits>
228ANTLR_INLINE void InputStream<ImplTraits>::set_markDepth( ANTLR_UINT32 markDepth )
229{
230	m_markDepth = markDepth;
231}
232template<class ImplTraits>
233ANTLR_INLINE void InputStream<ImplTraits>::set_markers( const MarkersType& markers )
234{
235	m_markers = markers;
236}
237template<class ImplTraits>
238ANTLR_INLINE void InputStream<ImplTraits>::set_fileName( const StringType& fileName )
239{
240	m_fileName = fileName;
241}
242template<class ImplTraits>
243ANTLR_INLINE void InputStream<ImplTraits>::set_fileNo( ANTLR_UINT32 fileNo )
244{
245	m_fileNo = fileNo;
246}
247template<class ImplTraits>
248ANTLR_INLINE void InputStream<ImplTraits>::set_newlineChar( ANTLR_UCHAR newlineChar )
249{
250	m_newlineChar = newlineChar;
251}
252template<class ImplTraits>
253ANTLR_INLINE void InputStream<ImplTraits>::set_charByteSize( ANTLR_UINT8 charByteSize )
254{
255	m_charByteSize = charByteSize;
256}
257template<class ImplTraits>
258ANTLR_INLINE void InputStream<ImplTraits>::set_encoding( ANTLR_UINT32 encoding )
259{
260	m_encoding = encoding;
261}
262
263template<class ImplTraits>
264ANTLR_INLINE void InputStream<ImplTraits>::inc_charPositionInLine()
265{
266	++m_charPositionInLine;
267}
268
269template<class ImplTraits>
270ANTLR_INLINE void InputStream<ImplTraits>::inc_line()
271{
272	++m_line;
273}
274
275template<class ImplTraits>
276ANTLR_INLINE void InputStream<ImplTraits>::inc_markDepth()
277{
278	++m_markDepth;
279}
280
281template<class ImplTraits>
282ANTLR_INLINE void	InputStream<ImplTraits>::reset()
283{
284	m_nextChar		= m_data;	/* Input at first character */
285    m_line			= 1;		/* starts at line 1	    */
286    m_charPositionInLine	= 0;
287    m_currentLine		= m_data;
288    m_markDepth		= 0;		/* Reset markers	    */
289
290    /* Clear out up the markers table if it is there
291     */
292	m_markers.clear();
293}
294
295template<class ImplTraits>
296void    InputStream<ImplTraits>::reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name)
297{
298	m_isAllocated	= false;
299    m_data		= inString;
300    m_sizeBuf	= size;
301
302    // Now we can set up the file name. As we are reusing the stream, there may already
303    // be a string that we can reuse for holding the filename.
304    //
305	if	( BaseType::m_streamName.empty() )
306	{
307		BaseType::m_streamName	= ((name == NULL) ? "-memory-" : (const char *)name);
308		m_fileName		= BaseType::m_streamName;
309	}
310	else
311	{
312		BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name);
313	}
314
315    this->reset();
316}
317
318/*
319template<class ImplTraits>
320typename InputStream<ImplTraits>::DataType*	InputStream<ImplTraits>::_LT(ANTLR_INT32 lt)
321{
322	return this->_LA(lt);
323}
324*/
325
326template<class ImplTraits>
327ANTLR_UINT32	InputStream<ImplTraits>::size()
328{
329	return m_sizeBuf;
330}
331
332template<class ImplTraits>
333ANTLR_MARKER	InputStream<ImplTraits>::index_impl()
334{
335	return (ANTLR_MARKER)m_nextChar;
336}
337
338
339template<class ImplTraits>
340typename InputStream<ImplTraits>::StringType	InputStream<ImplTraits>::substr(ANTLR_MARKER start, ANTLR_MARKER stop)
341{
342	std::size_t len = static_cast<std::size_t>( (stop-start)/sizeof(DataType) + 1 );
343	StringType str( (const char*)start, len );
344	return str;
345}
346
347template<class ImplTraits>
348ANTLR_UINT32	InputStream<ImplTraits>::get_line()
349{
350	return m_line;
351}
352
353template<class ImplTraits>
354const typename InputStream<ImplTraits>::DataType*	InputStream<ImplTraits>::getLineBuf()
355{
356	return m_currentLine;
357}
358
359template<class ImplTraits>
360ANTLR_INLINE ANTLR_UINT32	InputStream<ImplTraits>::get_charPositionInLine()
361{
362	return m_charPositionInLine;
363}
364
365template<class ImplTraits>
366ANTLR_INLINE void	InputStream<ImplTraits>::set_charPositionInLine(ANTLR_UINT32 position)
367{
368	m_charPositionInLine = position;
369}
370
371template<class ImplTraits>
372void	InputStream<ImplTraits>::set_newLineChar(ANTLR_UINT32 newlineChar)
373{
374	m_newlineChar = newlineChar;
375}
376
377template<class ImplTraits>
378ANTLR_INLINE LexState<ImplTraits>::LexState()
379{
380	m_nextChar = NULL;
381	m_line = 0;
382	m_currentLine = NULL;
383	m_charPositionInLine = 0;
384}
385
386template<class ImplTraits>
387ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_nextChar() const
388{
389	return m_nextChar;
390}
391
392template<class ImplTraits>
393ANTLR_INLINE ANTLR_UINT32 LexState<ImplTraits>::get_line() const
394{
395	return m_line;
396}
397
398template<class ImplTraits>
399ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_currentLine() const
400{
401	return m_currentLine;
402}
403
404template<class ImplTraits>
405ANTLR_INLINE ANTLR_INT32 LexState<ImplTraits>::get_charPositionInLine() const
406{
407	return m_charPositionInLine;
408}
409
410template<class ImplTraits>
411ANTLR_INLINE void LexState<ImplTraits>::set_nextChar( const DataType* nextChar )
412{
413	m_nextChar = nextChar;
414}
415
416template<class ImplTraits>
417ANTLR_INLINE void LexState<ImplTraits>::set_line( ANTLR_UINT32 line )
418{
419	m_line = line;
420}
421
422template<class ImplTraits>
423ANTLR_INLINE void LexState<ImplTraits>::set_currentLine( const DataType* currentLine )
424{
425	m_currentLine = currentLine;
426}
427
428template<class ImplTraits>
429ANTLR_INLINE void LexState<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine )
430{
431	m_charPositionInLine = charPositionInLine;
432}
433
434template<class ImplTraits>
435ANTLR_INLINE typename InputStream<ImplTraits>::IntStreamType*	InputStream<ImplTraits>::get_istream()
436{
437	return this;
438}
439
440template<class ImplTraits>
441void InputStream<ImplTraits>::setupInputStream()
442{
443	bool  isBigEndian;
444
445    // Used to determine the endianness of the machine we are currently
446    // running on.
447    //
448    ANTLR_UINT16 bomTest = 0xFEFF;
449
450    // What endianess is the machine we are running on? If the incoming
451    // encoding endianess is the same as this machine's natural byte order
452    // then we can use more efficient API calls.
453    //
454    if  (*((ANTLR_UINT8*)(&bomTest)) == 0xFE)
455    {
456        isBigEndian = true;
457    }
458    else
459    {
460        isBigEndian = false;
461    }
462
463    // What encoding did the user tell us {s}he thought it was? I am going
464    // to get sick of the questions on antlr-interest, I know I am.
465    //
466    switch  (m_encoding)
467    {
468        case    ANTLR_ENC_UTF8:
469
470            // See if there is a BOM at the start of this UTF-8 sequence
471            // and just eat it if there is. Windows .TXT files have this for instance
472            // as it identifies UTF-8 even though it is of no consequence for byte order
473            // as UTF-8 does not have a byte order.
474            //
475            if  (       (*(m_nextChar))      == 0xEF
476                    &&  (*(m_nextChar+1))    == 0xBB
477                    &&  (*(m_nextChar+2))    == 0xBF
478                )
479            {
480                // The UTF8 BOM is present so skip it
481                //
482                m_nextChar += 3;
483            }
484
485            // Install the UTF8 input routines
486            //
487			this->setupIntStream( isBigEndian, isBigEndian );
488			this->set_charByteSize(0);
489            break;
490
491        case    ANTLR_ENC_UTF16:
492
493            // See if there is a BOM at the start of the input. If not then
494            // we assume that the byte order is the natural order of this
495            // machine (or it is really UCS2). If there is a BOM we determine if the encoding
496            // is the same as the natural order of this machine.
497            //
498            if  (       (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0xFE
499                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0xFF
500                )
501            {
502                // BOM Present, indicates Big Endian
503                //
504                m_nextChar += 1;
505
506				this->setupIntStream( isBigEndian, true );
507            }
508            else if  (      (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0xFF
509                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0xFE
510                )
511            {
512                // BOM present, indicates Little Endian
513                //
514                m_nextChar += 1;
515
516                this->setupIntStream( isBigEndian, false );
517            }
518            else
519            {
520                // No BOM present, assume local computer byte order
521                //
522                this->setupIntStream(isBigEndian, isBigEndian);
523            }
524			this->set_charByteSize(2);
525            break;
526
527        case    ANTLR_ENC_UTF32:
528
529            // See if there is a BOM at the start of the input. If not then
530            // we assume that the byte order is the natural order of this
531            // machine. If there is we determine if the encoding
532            // is the same as the natural order of this machine.
533            //
534            if  (       (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0x00
535                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0x00
536                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+2))    == 0xFE
537                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+3))    == 0xFF
538                )
539            {
540                // BOM Present, indicates Big Endian
541                //
542                m_nextChar += 1;
543
544                this->setupIntStream(isBigEndian, true);
545            }
546            else if  (      (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0xFF
547                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0xFE
548                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0x00
549                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0x00
550                )
551            {
552                // BOM present, indicates Little Endian
553                //
554                m_nextChar += 1;
555
556				this->setupIntStream( isBigEndian, false );
557            }
558            else
559            {
560                // No BOM present, assume local computer byte order
561                //
562				this->setupIntStream( isBigEndian, isBigEndian );
563            }
564			this->set_charByteSize(4);
565            break;
566
567        case    ANTLR_ENC_UTF16BE:
568
569            // Encoding is definately Big Endian with no BOM
570            //
571			this->setupIntStream( isBigEndian, true );
572			this->set_charByteSize(2);
573            break;
574
575        case    ANTLR_ENC_UTF16LE:
576
577            // Encoding is definately Little Endian with no BOM
578            //
579            this->setupIntStream( isBigEndian, false );
580			this->set_charByteSize(2);
581            break;
582
583        case    ANTLR_ENC_UTF32BE:
584
585            // Encoding is definately Big Endian with no BOM
586            //
587			this->setupIntStream( isBigEndian, true );
588			this->set_charByteSize(4);
589            break;
590
591        case    ANTLR_ENC_UTF32LE:
592
593            // Encoding is definately Little Endian with no BOM
594            //
595			this->setupIntStream( isBigEndian, false );
596			this->set_charByteSize(4);
597            break;
598
599        case    ANTLR_ENC_EBCDIC:
600
601            // EBCDIC is basically the same as ASCII but with an on the
602            // fly translation to ASCII
603            //
604            this->setupIntStream( isBigEndian, isBigEndian );
605			this->set_charByteSize(1);
606            break;
607
608        case    ANTLR_ENC_8BIT:
609        default:
610
611            // Standard 8bit/ASCII
612            //
613            this->setupIntStream( isBigEndian, isBigEndian );
614			this->set_charByteSize(1);
615            break;
616    }
617}
618
619ANTLR_END_NAMESPACE()
620