Cpp/include/antlr3recognizersharedstate.hpp

/** \file
 * While the C runtime does not need to model the state of
 * multiple lexers and parsers in the same way as the Java runtime does
 * it is no overhead to reflect that model. In fact the
 * C runtime has always been able to share recognizer state.
 *
 * This 'class' therefore defines all the elements of a recognizer
 * (either lexer, parser or tree parser) that are need to
 * track the current recognition state. Multiple recognizers
 * may then share this state, for instance when one grammar
 * imports another.
 */

#ifndef	_ANTLR3_RECOGNIZER_SHARED_STATE_HPP
#define	_ANTLR3_RECOGNIZER_SHARED_STATE_HPP

// [The "BSD licence"]
// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB

//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "antlr3defs.hpp"

ANTLR_BEGIN_NAMESPACE()

/** All the data elements required to track the current state
 *  of any recognizer (lexer, parser, tree parser).
 * May be share between multiple recognizers such that
 * grammar inheritance is easily supported.
 */
template<class ImplTraits, class StreamType>
class RecognizerSharedState  : public ImplTraits::AllocPolicyType
{
public:
	typedef typename ImplTraits::AllocPolicyType AllocPolicyType;
	typedef typename StreamType::UnitType TokenType;
	typedef typename ImplTraits::CommonTokenType CommonTokenType;

	typedef typename ComponentTypeFinder<ImplTraits, StreamType>::ComponentType  ComponentType;
	typedef typename ImplTraits::template RewriteStreamType< ComponentType > RewriteStreamType;
	typedef typename ImplTraits::StringType StringType;
	typedef typename ImplTraits::TokenSourceType TokenSourceType;
	typedef typename ImplTraits::template ExceptionBaseType<StreamType> ExceptionBaseType;
	typedef typename ImplTraits::BitsetType BitsetType;
	typedef typename ImplTraits::BitsetListType BitsetListType;

	typedef typename AllocPolicyType::template StackType< BitsetListType > FollowingType;
	typedef typename AllocPolicyType::template StackType< typename ImplTraits::InputStreamType* > InputStreamsType;
	typedef InputStreamsType StreamsType;
	typedef typename AllocPolicyType::template VectorType<RewriteStreamType> RewriteStreamsType;

	typedef IntTrie<ImplTraits, ANTLR_MARKER> RuleListType;
	typedef IntTrie<ImplTraits, RuleListType*> RuleMemoType;

private:
	/** Points to the first in a possible chain of exceptions that the
     *  recognizer has discovered.
     */
    ExceptionBaseType*			m_exception;


    /** Track the set of token types that can follow any rule invocation.
     *  Stack structure, to support: List<BitSet>.
     */
    FollowingType		m_following;

    /** Track around a hint from the creator of the recognizer as to how big this
     *  thing is going to get, as the actress said to the bishop. This allows us
     *  to tune hash tables accordingly. This might not be the best place for this
     *  in the end but we will see.
     */
    ANTLR_UINT32		m_sizeHint;


    /** If set to true then the recognizer has an exception
     * condition (this is tested by the generated code for the rules of
     * the grammar).
     */
    bool				m_error;


    /** This is true when we see an error and before having successfully
     *  matched a token.  Prevents generation of more than one error message
     *  per error.
     */
    bool				m_errorRecovery;

	/** In lieu of a return value, this indicates that a rule or token
     *  has failed to match.  Reset to false upon valid token match.
     */
    bool				m_failed;

	/*
	Instead of allocating CommonTokenType, we do it in the stack. hence we need a null indicator
	*/
	bool				m_token_present;

    /** The index into the input stream where the last error occurred.
     * 	This is used to prevent infinite loops where an error is found
     *  but no token is consumed during recovery...another error is found,
     *  ad nauseam.  This is a failsafe mechanism to guarantee that at least
     *  one token/tree node is consumed for two errors.
     */
    ANTLR_MARKER		m_lastErrorIndex;

    /** When the recognizer terminates, the error handling functions
     *  will have incremented this value if any error occurred (that was displayed). It can then be
     *  used by the grammar programmer without having to use static globals.
     */
    ANTLR_UINT32		m_errorCount;

    /** If 0, no backtracking is going on.  Safe to exec actions etc...
     *  If >0 then it's the level of backtracking.
     */
    ANTLR_INT32			m_backtracking;

    /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing.
     *  Tracks  the stop token index for each rule.  ruleMemo[ruleIndex] is
     *  the memoization table for ruleIndex.  For key ruleStartIndex, you
     *  get back the stop token for associated rule or MEMO_RULE_FAILED.
     *
     *  This is only used if rule memoization is on.
     */
    RuleMemoType*		m_ruleMemo;

    /** Pointer to an array of token names
     *  that are generally useful in error reporting. The generated parsers install
     *  this pointer. The table it points to is statically allocated as 8 bit ascii
     *  at parser compile time - grammar token names are thus restricted in character
     *  sets, which does not seem to terrible.
     */
    ANTLR_UINT8**		m_tokenNames;

    /** The goal of all lexer rules/methods is to create a token object.
     *  This is an instance variable as multiple rules may collaborate to
     *  create a single token.  For example, NUM : INT | FLOAT ;
     *  In this case, you want the INT or FLOAT rule to set token and not
     *  have it reset to a NUM token in rule NUM.
     */
    CommonTokenType		m_token;

    /** A lexer is a source of tokens, produced by all the generated (or
     *  hand crafted if you like) matching rules. As such it needs to provide
     *  a token source interface implementation. For others, this will become a empty class
     */
    TokenSourceType*	m_tokSource;

    /** The channel number for the current token
     */
    ANTLR_UINT32			m_channel;

    /** The token type for the current token
     */
    ANTLR_UINT32			m_type;

    /** The input line (where it makes sense) on which the first character of the current
     *  token resides.
     */
    ANTLR_INT32			m_tokenStartLine;

    /** The character position of the first character of the current token
     *  within the line specified by tokenStartLine
     */
    ANTLR_INT32		m_tokenStartCharPositionInLine;

    /** What character index in the stream did the current token start at?
     *  Needed, for example, to get the text for current token.  Set at
     *  the start of nextToken.
     */
    ANTLR_MARKER		m_tokenStartCharIndex;

    /** Text for the current token. This can be overridden by setting this
     *  variable directly or by using the SETTEXT() macro (preferred) in your
     *  lexer rules.
     */
    StringType			m_text;

    /** Input stream stack, which allows the C programmer to switch input streams
     *  easily and allow the standard nextToken() implementation to deal with it
     *  as this is a common requirement.
     */
    InputStreamsType	m_streams;

public:
	RecognizerSharedState();
	ExceptionBaseType* get_exception() const;
	FollowingType& get_following();
	ANTLR_UINT32 get_sizeHint() const;
	bool get_error() const;
	bool get_errorRecovery() const;
	bool get_failed() const;
	bool get_token_present() const;
	ANTLR_MARKER get_lastErrorIndex() const;
	ANTLR_UINT32 get_errorCount() const;
	ANTLR_INT32 get_backtracking() const;
	RuleMemoType* get_ruleMemo() const;
	ANTLR_UINT8** get_tokenNames() const;
	ANTLR_UINT8* get_tokenName( ANTLR_UINT32 i ) const;
	CommonTokenType* get_token();
	TokenSourceType* get_tokSource() const;
	ANTLR_UINT32& get_channel();
	ANTLR_UINT32 get_type() const;
	ANTLR_INT32 get_tokenStartLine() const;
	ANTLR_INT32 get_tokenStartCharPositionInLine() const;
	ANTLR_MARKER get_tokenStartCharIndex() const;
	StringType& get_text();
	InputStreamsType& get_streams();

	void  set_following( const FollowingType& following );
	void  set_sizeHint( ANTLR_UINT32 sizeHint );
	void  set_error( bool error );
	void  set_errorRecovery( bool errorRecovery );
	void  set_failed( bool failed );
	void  set_token_present(bool token_present);
	void  set_lastErrorIndex( ANTLR_MARKER lastErrorIndex );
	void  set_errorCount( ANTLR_UINT32 errorCount );
	void  set_backtracking( ANTLR_INT32 backtracking );
	void  set_ruleMemo( RuleMemoType* ruleMemo );
	void  set_tokenNames( ANTLR_UINT8** tokenNames );
	void  set_tokSource( TokenSourceType* tokSource );
	void  set_channel( ANTLR_UINT32 channel );
	void  set_exception( ExceptionBaseType* exception );
	void  set_type( ANTLR_UINT32 type );
	void  set_token( const CommonTokenType* tok);
	void  set_tokenStartLine( ANTLR_INT32 tokenStartLine );
	void  set_tokenStartCharPositionInLine( ANTLR_INT32 tokenStartCharPositionInLine );
	void  set_tokenStartCharIndex( ANTLR_MARKER tokenStartCharIndex );
	void  set_text( const StringType& text );
	void  set_streams( const InputStreamsType& streams );

	void inc_errorCount();
	void inc_backtracking();
	void dec_backtracking();
};

ANTLR_END_NAMESPACE()

#include "antlr3recognizersharedstate.inl"

#endif