1 /** \file 2 * While the C runtime does not need to model the state of 3 * multiple lexers and parsers in the same way as the Java runtime does 4 * it is no overhead to reflect that model. In fact the 5 * C runtime has always been able to share recognizer state. 6 * 7 * This 'class' therefore defines all the elements of a recognizer 8 * (either lexer, parser or tree parser) that are need to 9 * track the current recognition state. Multiple recognizers 10 * may then share this state, for instance when one grammar 11 * imports another. 12 */ 13 14 #ifndef _ANTLR3_RECOGNIZER_SHARED_STATE_HPP 15 #define _ANTLR3_RECOGNIZER_SHARED_STATE_HPP 16 17 // [The "BSD licence"] 18 // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB 19 20 // 21 // All rights reserved. 22 // 23 // Redistribution and use in source and binary forms, with or without 24 // modification, are permitted provided that the following conditions 25 // are met: 26 // 1. Redistributions of source code must retain the above copyright 27 // notice, this list of conditions and the following disclaimer. 28 // 2. Redistributions in binary form must reproduce the above copyright 29 // notice, this list of conditions and the following disclaimer in the 30 // documentation and/or other materials provided with the distribution. 31 // 3. The name of the author may not be used to endorse or promote products 32 // derived from this software without specific prior written permission. 33 // 34 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 35 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 36 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 37 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 38 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 39 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 40 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 41 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 42 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 43 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 44 45 #include "antlr3defs.hpp" 46 47 ANTLR_BEGIN_NAMESPACE() 48 49 /** All the data elements required to track the current state 50 * of any recognizer (lexer, parser, tree parser). 51 * May be share between multiple recognizers such that 52 * grammar inheritance is easily supported. 53 */ 54 template<class ImplTraits, class StreamType> 55 class RecognizerSharedState : public ImplTraits::AllocPolicyType 56 { 57 public: 58 typedef typename ImplTraits::AllocPolicyType AllocPolicyType; 59 typedef typename StreamType::UnitType TokenType; 60 typedef typename ImplTraits::CommonTokenType CommonTokenType; 61 62 typedef typename ComponentTypeFinder<ImplTraits, StreamType>::ComponentType ComponentType; 63 typedef typename ImplTraits::template RewriteStreamType< ComponentType > RewriteStreamType; 64 typedef typename ImplTraits::StringType StringType; 65 typedef typename ImplTraits::TokenSourceType TokenSourceType; 66 typedef typename ImplTraits::template ExceptionBaseType<StreamType> ExceptionBaseType; 67 typedef typename ImplTraits::BitsetType BitsetType; 68 typedef typename ImplTraits::BitsetListType BitsetListType; 69 70 typedef typename AllocPolicyType::template StackType< BitsetListType > FollowingType; 71 typedef typename AllocPolicyType::template StackType< typename ImplTraits::InputStreamType* > InputStreamsType; 72 typedef InputStreamsType StreamsType; 73 typedef typename AllocPolicyType::template VectorType<RewriteStreamType> RewriteStreamsType; 74 75 typedef IntTrie<ImplTraits, ANTLR_MARKER> RuleListType; 76 typedef IntTrie<ImplTraits, RuleListType*> RuleMemoType; 77 78 private: 79 /** Points to the first in a possible chain of exceptions that the 80 * recognizer has discovered. 81 */ 82 ExceptionBaseType* m_exception; 83 84 85 /** Track the set of token types that can follow any rule invocation. 86 * Stack structure, to support: List<BitSet>. 87 */ 88 FollowingType m_following; 89 90 /** Track around a hint from the creator of the recognizer as to how big this 91 * thing is going to get, as the actress said to the bishop. This allows us 92 * to tune hash tables accordingly. This might not be the best place for this 93 * in the end but we will see. 94 */ 95 ANTLR_UINT32 m_sizeHint; 96 97 98 /** If set to true then the recognizer has an exception 99 * condition (this is tested by the generated code for the rules of 100 * the grammar). 101 */ 102 bool m_error; 103 104 105 /** This is true when we see an error and before having successfully 106 * matched a token. Prevents generation of more than one error message 107 * per error. 108 */ 109 bool m_errorRecovery; 110 111 /** In lieu of a return value, this indicates that a rule or token 112 * has failed to match. Reset to false upon valid token match. 113 */ 114 bool m_failed; 115 116 /* 117 Instead of allocating CommonTokenType, we do it in the stack. hence we need a null indicator 118 */ 119 bool m_token_present; 120 121 /** The index into the input stream where the last error occurred. 122 * This is used to prevent infinite loops where an error is found 123 * but no token is consumed during recovery...another error is found, 124 * ad nauseam. This is a failsafe mechanism to guarantee that at least 125 * one token/tree node is consumed for two errors. 126 */ 127 ANTLR_MARKER m_lastErrorIndex; 128 129 /** When the recognizer terminates, the error handling functions 130 * will have incremented this value if any error occurred (that was displayed). It can then be 131 * used by the grammar programmer without having to use static globals. 132 */ 133 ANTLR_UINT32 m_errorCount; 134 135 /** If 0, no backtracking is going on. Safe to exec actions etc... 136 * If >0 then it's the level of backtracking. 137 */ 138 ANTLR_INT32 m_backtracking; 139 140 /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing. 141 * Tracks the stop token index for each rule. ruleMemo[ruleIndex] is 142 * the memoization table for ruleIndex. For key ruleStartIndex, you 143 * get back the stop token for associated rule or MEMO_RULE_FAILED. 144 * 145 * This is only used if rule memoization is on. 146 */ 147 RuleMemoType* m_ruleMemo; 148 149 /** Pointer to an array of token names 150 * that are generally useful in error reporting. The generated parsers install 151 * this pointer. The table it points to is statically allocated as 8 bit ascii 152 * at parser compile time - grammar token names are thus restricted in character 153 * sets, which does not seem to terrible. 154 */ 155 ANTLR_UINT8** m_tokenNames; 156 157 /** The goal of all lexer rules/methods is to create a token object. 158 * This is an instance variable as multiple rules may collaborate to 159 * create a single token. For example, NUM : INT | FLOAT ; 160 * In this case, you want the INT or FLOAT rule to set token and not 161 * have it reset to a NUM token in rule NUM. 162 */ 163 CommonTokenType m_token; 164 165 /** A lexer is a source of tokens, produced by all the generated (or 166 * hand crafted if you like) matching rules. As such it needs to provide 167 * a token source interface implementation. For others, this will become a empty class 168 */ 169 TokenSourceType* m_tokSource; 170 171 /** The channel number for the current token 172 */ 173 ANTLR_UINT32 m_channel; 174 175 /** The token type for the current token 176 */ 177 ANTLR_UINT32 m_type; 178 179 /** The input line (where it makes sense) on which the first character of the current 180 * token resides. 181 */ 182 ANTLR_INT32 m_tokenStartLine; 183 184 /** The character position of the first character of the current token 185 * within the line specified by tokenStartLine 186 */ 187 ANTLR_INT32 m_tokenStartCharPositionInLine; 188 189 /** What character index in the stream did the current token start at? 190 * Needed, for example, to get the text for current token. Set at 191 * the start of nextToken. 192 */ 193 ANTLR_MARKER m_tokenStartCharIndex; 194 195 /** Text for the current token. This can be overridden by setting this 196 * variable directly or by using the SETTEXT() macro (preferred) in your 197 * lexer rules. 198 */ 199 StringType m_text; 200 201 /** Input stream stack, which allows the C programmer to switch input streams 202 * easily and allow the standard nextToken() implementation to deal with it 203 * as this is a common requirement. 204 */ 205 InputStreamsType m_streams; 206 207 public: 208 RecognizerSharedState(); 209 ExceptionBaseType* get_exception() const; 210 FollowingType& get_following(); 211 ANTLR_UINT32 get_sizeHint() const; 212 bool get_error() const; 213 bool get_errorRecovery() const; 214 bool get_failed() const; 215 bool get_token_present() const; 216 ANTLR_MARKER get_lastErrorIndex() const; 217 ANTLR_UINT32 get_errorCount() const; 218 ANTLR_INT32 get_backtracking() const; 219 RuleMemoType* get_ruleMemo() const; 220 ANTLR_UINT8** get_tokenNames() const; 221 ANTLR_UINT8* get_tokenName( ANTLR_UINT32 i ) const; 222 CommonTokenType* get_token(); 223 TokenSourceType* get_tokSource() const; 224 ANTLR_UINT32& get_channel(); 225 ANTLR_UINT32 get_type() const; 226 ANTLR_INT32 get_tokenStartLine() const; 227 ANTLR_INT32 get_tokenStartCharPositionInLine() const; 228 ANTLR_MARKER get_tokenStartCharIndex() const; 229 StringType& get_text(); 230 InputStreamsType& get_streams(); 231 232 void set_following( const FollowingType& following ); 233 void set_sizeHint( ANTLR_UINT32 sizeHint ); 234 void set_error( bool error ); 235 void set_errorRecovery( bool errorRecovery ); 236 void set_failed( bool failed ); 237 void set_token_present(bool token_present); 238 void set_lastErrorIndex( ANTLR_MARKER lastErrorIndex ); 239 void set_errorCount( ANTLR_UINT32 errorCount ); 240 void set_backtracking( ANTLR_INT32 backtracking ); 241 void set_ruleMemo( RuleMemoType* ruleMemo ); 242 void set_tokenNames( ANTLR_UINT8** tokenNames ); 243 void set_tokSource( TokenSourceType* tokSource ); 244 void set_channel( ANTLR_UINT32 channel ); 245 void set_exception( ExceptionBaseType* exception ); 246 void set_type( ANTLR_UINT32 type ); 247 void set_token( const CommonTokenType* tok); 248 void set_tokenStartLine( ANTLR_INT32 tokenStartLine ); 249 void set_tokenStartCharPositionInLine( ANTLR_INT32 tokenStartCharPositionInLine ); 250 void set_tokenStartCharIndex( ANTLR_MARKER tokenStartCharIndex ); 251 void set_text( const StringType& text ); 252 void set_streams( const InputStreamsType& streams ); 253 254 void inc_errorCount(); 255 void inc_backtracking(); 256 void dec_backtracking(); 257 }; 258 259 ANTLR_END_NAMESPACE() 260 261 #include "antlr3recognizersharedstate.inl" 262 263 #endif 264 265 266