1 /** \file
2  * Defines the interface for an ANTLR3 common token stream. Custom token streams should create
3  * one of these and then override any functions by installing their own pointers
4  * to implement the various functions.
5  */
6 #ifndef	_ANTLR3_TOKENSTREAM_HPP
7 #define	_ANTLR3_TOKENSTREAM_HPP
8 
9 // [The "BSD licence"]
10 // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB
11 
12 //
13 // All rights reserved.
14 //
15 // Redistribution and use in source and binary forms, with or without
16 // modification, are permitted provided that the following conditions
17 // are met:
18 // 1. Redistributions of source code must retain the above copyright
19 //    notice, this list of conditions and the following disclaimer.
20 // 2. Redistributions in binary form must reproduce the above copyright
21 //    notice, this list of conditions and the following disclaimer in the
22 //    documentation and/or other materials provided with the distribution.
23 // 3. The name of the author may not be used to endorse or promote products
24 //    derived from this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
27 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
28 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
30 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
31 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
35 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 
37 #include    "antlr3defs.hpp"
38 
39 /** Definition of a token source, which has a pointer to a function that
40  *  returns the next token (using a token factory if it is going to be
41  *  efficient) and a pointer to an ANTLR3_INPUT_STREAM. This is slightly
42  *  different to the Java interface because we have no way to implement
43  *  multiple interfaces without defining them in the interface structure
44  *  or casting (void *), which is too convoluted.
45  */
46 ANTLR_BEGIN_NAMESPACE()
47 
48 //We are not making it subclass AllocPolicy, as this will always be a base class
49 template<class ImplTraits>
50 class TokenSource
51 {
52 public:
53 	typedef typename ImplTraits::CommonTokenType TokenType;
54 	typedef TokenType CommonTokenType;
55 	typedef typename ImplTraits::StringType StringType;
56 	typedef typename ImplTraits::LexerType LexerType;
57 
58 private:
59     /** A special pre-allocated token, which signifies End Of Tokens. Because this must
60      *  be set up with the current input index and so on, we embed the structure and
61      *  return the address of it. It is marked as factoryMade, so that it is never
62      *  attempted to be freed.
63      */
64     TokenType				m_eofToken;
65 
66 	/// A special pre-allocated token, which is returned by mTokens() if the
67 	/// lexer rule said to just skip the generated token altogether.
68 	/// Having this single token stops us wasting memory by have the token factory
69 	/// actually create something that we are going to SKIP(); anyway.
70 	///
71 	TokenType				m_skipToken;
72 
73     /** When the token source is constructed, it is populated with the file
74      *  name from whence the tokens were produced by the lexer. This pointer is a
75      *  copy of the one supplied by the CharStream (and may be NULL) so should
76      *  not be manipulated other than to copy or print it.
77      */
78     StringType				m_fileName;
79 
80 public:
81 	TokenType& get_eofToken();
82 	const TokenType& get_eofToken() const;
83 	TokenType& get_skipToken();
84 	StringType& get_fileName();
85 	LexerType* get_super();
86 
87 	void set_fileName( const StringType& fileName );
88 
89 	/**
90 	 * \brief
91 	 * Default implementation of the nextToken() call for a lexer.
92 	 *
93 	 * \param toksource
94 	 * Points to the implementation of a token source. The lexer is
95 	 * addressed by the super structure pointer.
96 	 *
97 	 * \returns
98 	 * The next token in the current input stream or the EOF token
99 	 * if there are no more tokens in any input stream in the stack.
100 	 *
101 	 * Write detailed description for nextToken here.
102 	 *
103 	 * \remarks
104 	 * Write remarks for nextToken here.
105 	 *
106 	 * \see nextTokenStr
107 	 */
108     TokenType*  nextToken();
109 	CommonTokenType* nextToken( BoolForwarder<true> /*isFiltered*/ );
110 	CommonTokenType* nextToken( BoolForwarder<false> /*isFiltered*/ );
111 
112 	///
113 	/// \brief
114 	/// Returns the next available token from the current input stream.
115 	///
116 	/// \param toksource
117 	/// Points to the implementation of a token source. The lexer is
118 	/// addressed by the super structure pointer.
119 	///
120 	/// \returns
121 	/// The next token in the current input stream or the EOF token
122 	/// if there are no more tokens.
123 	///
124 	/// \remarks
125 	/// Write remarks for nextToken here.
126 	///
127 	/// \see nextToken
128 	///
129 	TokenType*	nextTokenStr();
130 
131 protected:
132 	TokenSource();
133 };
134 
135 /** Definition of the ANTLR3 common token stream interface.
136  * \remark
137  * Much of the documentation for this interface is stolen from Ter's Java implementation.
138  */
139 template<class ImplTraits>
140 class TokenStream  : public ImplTraits::TokenIntStreamType
141 {
142 public:
143 	typedef typename ImplTraits::TokenSourceType TokenSourceType;
144 	typedef typename ImplTraits::TokenIntStreamType IntStreamType;
145 	typedef typename ImplTraits::CommonTokenType TokenType;
146 	typedef TokenType UnitType;
147 	typedef typename ImplTraits::StringType StringType;
148 	typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType;
149 	typedef typename ImplTraits::TokenStreamType TokenStreamType;
150 	typedef typename ImplTraits::ParserType ComponentType;
151 
152 protected:
153     /** Pointer to the token source for this stream
154      */
155     TokenSourceType*    m_tokenSource;
156 
157 	/// Debugger interface, is this is a debugging token stream
158 	///
159 	DebugEventListenerType*	m_debugger;
160 
161 	/// Indicates the initial stream state for dbgConsume()
162 	///
163 	bool				m_initialStreamState;
164 
165 public:
166 	TokenStream(TokenSourceType* source, DebugEventListenerType* debugger);
167 	IntStreamType* get_istream();
168 	TokenSourceType* get_tokenSource() const;
169 	void set_tokenSource( TokenSourceType* tokenSource );
170 
171     /** Get Token at current input pointer + i ahead where i=1 is next Token.
172      *  i<0 indicates tokens in the past.  So -1 is previous token and -2 is
173      *  two tokens ago. LT(0) is undefined.  For i>=n, return Token.EOFToken.
174      *  Return null for LT(0) and any index that results in an absolute address
175      *  that is negative.
176      */
177     const TokenType*  _LT(ANTLR_INT32 k);
178 
179     /** Where is this stream pulling tokens from?  This is not the name, but
180      *  a pointer into an interface that contains a ANTLR3_TOKEN_SOURCE interface.
181      *  The Token Source interface contains a pointer to the input stream and a pointer
182      *  to a function that returns the next token.
183      */
184     TokenSourceType*   getTokenSource();
185 
186     /** Function that installs a token source for teh stream
187      */
188     void	setTokenSource(TokenSourceType*   tokenSource);
189 
190     /** Return the text of all the tokens in the stream, as the old tramp in
191      *  Leeds market used to say; "Get the lot!"
192      */
193     StringType	toString();
194 
195     /** Return the text of all tokens from start to stop, inclusive.
196      *  If the stream does not buffer all the tokens then it can just
197      *  return an empty ANTLR3_STRING or NULL;  Grammars should not access $ruleLabel.text in
198      *  an action in that case.
199      */
200     StringType	 toStringSS(ANTLR_MARKER start, ANTLR_MARKER stop);
201 
202     /** Because the user is not required to use a token with an index stored
203      *  in it, we must provide a means for two token objects themselves to
204      *  indicate the start/end location.  Most often this will just delegate
205      *  to the other toString(int,int).  This is also parallel with
206      *  the pTREENODE_STREAM->toString(Object,Object).
207      */
208     StringType	 toStringTT(const TokenType* start, const TokenType* stop);
209 
210 
211     /** Function that sets the token stream into debugging mode
212      */
213     void	setDebugListener(DebugEventListenerType* debugger);
214 
215 	TokenStream();
216 
217 };
218 
219 /** Common token stream is an implementation of ANTLR_TOKEN_STREAM for the default
220  *  parsers and recognizers. You may of course build your own implementation if
221  *  you are so inclined.
222  */
223 template<bool TOKENS_ACCESSED_FROM_OWNING_RULE, class ListType, class MapType>
224 class TokenStoreSelector
225 {
226 public:
227 	typedef ListType TokensType;
228 };
229 
230 template<class ListType, class MapType>
231 class TokenStoreSelector<true, ListType, MapType>
232 {
233 public:
234 	typedef MapType TokensType;
235 };
236 
237 template<class ImplTraits>
238 class	CommonTokenStream : public TokenStream<ImplTraits>
239 {
240 public:
241 	typedef typename ImplTraits::AllocPolicyType AllocPolicyType;
242 	typedef typename ImplTraits::BitsetType BitsetType;
243 	typedef typename ImplTraits::CommonTokenType TokenType;
244 	typedef typename ImplTraits::TokenSourceType TokenSourceType;
245 	typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType;
246 	typedef typename AllocPolicyType::template ListType<TokenType> TokensListType;
247 	typedef typename AllocPolicyType::template OrderedMapType<ANTLR_MARKER, TokenType> TokensMapType;
248 	typedef typename TokenStoreSelector< ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE,
249 	                                       TokensListType, TokensMapType >::TokensType TokensType;
250 
251 	typedef typename AllocPolicyType::template UnOrderedMapType<ANTLR_UINT32, ANTLR_UINT32> ChannelOverridesType;
252 	typedef typename AllocPolicyType::template OrderedSetType<ANTLR_UINT32> DiscardSetType;
253 	typedef typename AllocPolicyType::template ListType<ANTLR_UINT32> IntListType;
254 	typedef TokenStream<ImplTraits> BaseType;
255 
256 private:
257     /** Records every single token pulled from the source indexed by the token index.
258      *  There might be more efficient ways to do this, such as referencing directly in to
259      *  the token factory pools, but for now this is convenient and the ANTLR3_LIST is not
260      *  a huge overhead as it only stores pointers anyway, but allows for iterations and
261      *  so on.
262      */
263     TokensType			m_tokens;
264 
265     /** Override map of tokens. If a token type has an entry in here, then
266      *  the pointer in the table points to an int, being the override channel number
267      *  that should always be used for this token type.
268      */
269     ChannelOverridesType	m_channelOverrides;
270 
271     /** Discared set. If a token has an entry in this table, then it is thrown
272      *  away (data pointer is always NULL).
273      */
274     DiscardSetType			m_discardSet;
275 
276     /* The channel number that this token stream is tuned to. For instance, whitespace
277      * is usually tuned to channel 99, which no token stream would normally tune to and
278      * so it is thrown away.
279      */
280     ANTLR_UINT32			m_channel;
281 
282 	/** The index into the tokens list of the current token (the next one that will be
283      *  consumed. p = -1 indicates that the token list is empty.
284      */
285     ANTLR_INT32				m_p;
286 
287 	/* The total number of tokens issued till now. For streams that delete tokens,
288 	   this helps in issuing the index
289 	 */
290 	ANTLR_UINT32			m_nissued;
291 
292     /** If this flag is set to true, then tokens that the stream sees that are not
293      *  in the channel that this stream is tuned to, are not tracked in the
294      *  tokens table. When set to false, ALL tokens are added to the tracking.
295      */
296     bool					m_discardOffChannel;
297 
298 public:
299 	CommonTokenStream(ANTLR_UINT32 hint, TokenSourceType* source = NULL,
300 										DebugEventListenerType* debugger = NULL);
301 	~CommonTokenStream();
302 	TokensType& get_tokens();
303 	const TokensType& get_tokens() const;
304 	DiscardSetType& get_discardSet();
305 	const DiscardSetType& get_discardSet() const;
306 	ANTLR_INT32 get_p() const;
307 	void set_p( ANTLR_INT32 p );
308 	void inc_p();
309 	void dec_p();
310 
311     /** A simple filter mechanism whereby you can tell this token stream
312      *  to force all tokens of type ttype to be on channel.  For example,
313      *  when interpreting, we cannot exec actions so we need to tell
314      *  the stream to force all WS and NEWLINE to be a different, ignored
315      *  channel.
316      */
317     void setTokenTypeChannel(ANTLR_UINT32 ttype, ANTLR_UINT32 channel);
318 
319     /** Add a particular token type to the discard set. If a token is found to belong
320      *  to this set, then it is skipped/thrown away
321      */
322     void discardTokenType(ANTLR_INT32 ttype);
323 
324 	//This will discard tokens of a particular rule after the rule execution completion
325 	void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop );
326 	void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop,
327 								BoolForwarder<true>  tokens_accessed_from_owning_rule  );
328 	void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop,
329 								BoolForwarder<false>  tokens_accessed_from_owning_rule  );
330 
331 	void insertToken( const TokenType& tok );
332 	void insertToken( const TokenType& tok, BoolForwarder<true>  tokens_accessed_from_owning_rule  );
333 	void insertToken( const TokenType& tok, BoolForwarder<false>  tokens_accessed_from_owning_rule  );
334 
335 	/** Get a token at an absolute index i; 0..n-1.  This is really only
336      *  needed for profiling and debugging and token stream rewriting.
337      *  If you don't want to buffer up tokens, then this method makes no
338      *  sense for you.  Naturally you can't use the rewrite stream feature.
339      *  I believe DebugTokenStream can easily be altered to not use
340      *  this method, removing the dependency.
341      */
342     const TokenType*   get(ANTLR_MARKER i);
343 	const TokenType*   getToken(ANTLR_MARKER i);
344 	const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder<true>  tokens_accessed_from_owning_rule );
345 	const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder<false>  tokens_accessed_from_owning_rule  );
346 
347     /** Signal to discard off channel tokens from here on in.
348      */
349     void discardOffChannelToks(bool discard);
350 
351     /** Function that returns a pointer to the ANTLR3_LIST of all tokens
352      *  in the stream (this causes the buffer to fill if we have not get any yet)
353      */
354     TokensType*	getTokens();
355 
356     /** Function that returns all the tokens between a start and a stop index.
357      */
358     void getTokenRange(ANTLR_UINT32 start, ANTLR_UINT32 stop, TokensListType& tokenRange);
359 
360     /** Function that returns all the tokens indicated by the specified bitset, within a range of tokens
361      */
362     void getTokensSet(ANTLR_UINT32 start, ANTLR_UINT32 stop, BitsetType* types, TokensListType& tokenSet);
363 
364     /** Function that returns all the tokens indicated by being a member of the supplied List
365      */
366     void getTokensList(ANTLR_UINT32 start, ANTLR_UINT32 stop,
367 									const IntListType& list, TokensListType& tokenList);
368 
369     /** Function that returns all tokens of a certain type within a range.
370      */
371     void getTokensType(ANTLR_UINT32 start, ANTLR_UINT32 stop, ANTLR_UINT32 type, TokensListType& tokens);
372 
373     /** Function that resets the token stream so that it can be reused, but
374      *  but that does not free up any resources, such as the token factory
375      *  the factory pool and so on. This prevents the need to keep freeing
376      *  and reallocating the token pools if the thing you are building is
377      *  a multi-shot dameon or somethign like that. It is much faster to
378      *  just reuse all the vectors.
379      */
380     void  reset();
381 
382 	const TokenType* LB(ANTLR_INT32 k);
383 
384 
385 	void fillBufferExt();
386 	void fillBuffer();
387 
388 	bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<true>  tokens_accessed_from_owning_rule  );
389 	bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<false>  tokens_accessed_from_owning_rule  );
390 
391 	ANTLR_UINT32 skipOffTokenChannels(ANTLR_INT32 i);
392 	ANTLR_UINT32 skipOffTokenChannelsReverse(ANTLR_INT32 x);
393 	ANTLR_MARKER index_impl();
394 };
395 
396 class TokenAccessException : public std::exception
397 {
what() const398 	virtual const char* what() const throw()
399 	{
400 		return " Attempted access on Deleted Token";
401 	}
402 };
403 
404 ANTLR_END_NAMESPACE()
405 
406 #include "antlr3tokenstream.inl"
407 
408 #endif
409