1 #ifndef	ANTLR3COLLECTIONS_HPP
2 #define	ANTLR3COLLECTIONS_HPP
3 
4 // [The "BSD licence"]
5 // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB
6 
7 //
8 // All rights reserved.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions
12 // are met:
13 // 1. Redistributions of source code must retain the above copyright
14 //    notice, this list of conditions and the following disclaimer.
15 // 2. Redistributions in binary form must reproduce the above copyright
16 //    notice, this list of conditions and the following disclaimer in the
17 //    documentation and/or other materials provided with the distribution.
18 // 3. The name of the author may not be used to endorse or promote products
19 //    derived from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 
32 #include    "antlr3defs.hpp"
33 
34 ANTLR_BEGIN_NAMESPACE()
35 
36 /* -------------- TRIE Interfaces ---------------- */
37 
38 /** Structure that holds the payload entry in an ANTLR3_INT_TRIE or ANTLR3_STRING_TRIE
39  */
40 template< class ImplTraits, class DataType >
41 class TrieEntry : public ImplTraits::AllocPolicyType
42 {
43 public:
44 	typedef typename ImplTraits::AllocPolicyType AllocPolicy;
45 
46 private:
47 	DataType			m_data;
48 	TrieEntry*			m_next;	    /* Allows duplicate entries for same key in insertion order	*/
49 
50 public:
51 	TrieEntry(const DataType& data, TrieEntry* next);
52 	DataType& get_data();
53 	const DataType& get_data() const;
54 	TrieEntry* get_next() const;
55 	void set_next( TrieEntry* next );
56 };
57 
58 /** Structure that defines an element/node in an ANTLR_INT_TRIE
59  */
60 template< class ImplTraits, class DataType >
61 class IntTrieNode : public ImplTraits::AllocPolicyType
62 {
63 public:
64 	typedef TrieEntry<ImplTraits, DataType> TrieEntryType;
65 	typedef TrieEntryType BucketsType;
66 
67 private:
68     ANTLR_UINT32	m_bitNum;	/**< This is the left/right bit index for traversal along the nodes				*/
69     ANTLR_INTKEY	m_key;		/**< This is the actual key that the entry represents if it is a terminal node  */
70     BucketsType*	m_buckets;	/**< This is the data bucket(s) that the key indexes, which may be NULL			*/
71     IntTrieNode*	m_leftN;	/**< Pointer to the left node from here when sKey & bitNum = 0					*/
72     IntTrieNode*	m_rightN;	/**< Pointer to the right node from here when sKey & bitNum, = 1				*/
73 
74 public:
75 	IntTrieNode();
76 	~IntTrieNode();
77 
78 	ANTLR_UINT32 get_bitNum() const;
79 	ANTLR_INTKEY get_key() const;
80 	BucketsType* get_buckets() const;
81 	IntTrieNode* get_leftN() const;
82 	IntTrieNode* get_rightN() const;
83 	void  set_bitNum( ANTLR_UINT32 bitNum );
84 	void  set_key( ANTLR_INTKEY key );
85 	void  set_buckets( BucketsType* buckets );
86 	void  set_leftN( IntTrieNode* leftN );
87 	void  set_rightN( IntTrieNode* rightN );
88 };
89 
90 /** Structure that defines an ANTLR3_INT_TRIE. For this particular implementation,
91  *  as you might expect, the key is turned into a "string" by looking at bit(key, depth)
92  *  of the integer key. Using 64 bit keys gives us a depth limit of 64 (or bit 0..63)
93  *  and potentially a huge trie. This is the algorithm for a Patricia Trie.
94  *  Note also that this trie [can] accept multiple entries for the same key and is
95  *  therefore a kind of elastic bucket patricia trie.
96  *
97  *  If you find this code useful, please feel free to 'steal' it for any purpose
98  *  as covered by the BSD license under which ANTLR is issued. You can cut the code
99  *  but as the ANTLR library is only about 50K (Windows Vista), you might find it
100  *  easier to just link the library. Please keep all comments and licenses and so on
101  *  in any version of this you create of course.
102  *
103  *  Jim Idle.
104  *
105  */
106 class IntTrieBase
107 {
108 public:
109 	static const ANTLR_UINT8* get_bitIndex();
110 	static const ANTLR_UINT64* get_bitMask();
111 };
112 
113 template< class ImplTraits, class DataType >
114 class IntTrie : public ImplTraits::AllocPolicyType, public IntTrieBase
115 {
116 public:
117 	typedef TrieEntry<ImplTraits, DataType> TrieEntryType;
118 	typedef IntTrieNode<ImplTraits, DataType> IntTrieNodeType;
119 
120 private:
121     IntTrieNodeType*	m_root;			/* Root node of this integer trie					*/
122     IntTrieNodeType*	m_current;		/* Used to traverse the TRIE with the next() method	*/
123     ANTLR_UINT32	m_count;			/* Current entry count								*/
124     bool			m_allowDups;		/* Whether this trie accepts duplicate keys			*/
125 
126 public:
127 	/* INT TRIE Implementation of depth 64 bits, being the number of bits
128 	 * in a 64 bit integer.
129 	 */
130     IntTrie( ANTLR_UINT32 depth );
131 
132 	/** Search the int Trie and return a pointer to the first bucket indexed
133 	 *  by the key if it is contained in the trie, otherwise NULL.
134 	 */
135     TrieEntryType*	get( ANTLR_INTKEY key);
136     bool		del( ANTLR_INTKEY key);
137 
138 	/** Add an entry into the INT trie.
139 	 *  Basically we descend the trie as we do when searching it, which will
140 	 *  locate the only node in the trie that can be reached by the bit pattern of the
141 	 *  key. If the key is actually at that node, then if the trie accepts duplicates
142 	 *  we add the supplied data in a new chained bucket to that data node. If it does
143 	 *  not accept duplicates then we merely return FALSE in case the caller wants to know
144 	 *  whether the key was already in the trie.
145 	 *  If the node we locate is not the key we are looking to add, then we insert a new node
146 	 *  into the trie with a bit index of the leftmost differing bit and the left or right
147 	 *  node pointing to itself or the data node we are inserting 'before'.
148 	 */
149     bool		add( ANTLR_INTKEY key, const DataType& data );
150     ~IntTrie();
151 };
152 
153 /**
154  * A topological sort system that given a set of dependencies of a node m on node n,
155  * can sort them in dependency order. This is a generally useful utility object
156  * that does not care what the things are it is sorting. Generally the set
157  * to be sorted will be numeric indexes into some other structure such as an ANTLR3_VECTOR.
158  * I have provided a sort method that given ANTLR3_VECTOR as an input will sort
159  * the vector entries in place, as well as a sort method that just returns an
160  * array of the sorted noded indexes, in case you are not sorting ANTLR3_VECTORS but
161  * some set of your own device.
162  *
163  * Of the two main algorithms that could be used, I chose to use the depth first
164  * search for unvisited nodes as a) This runs in linear time, and b) it is what
165  * we used in the ANTLR Tool to perform a topological sort of the input grammar files
166  * based on their dependencies.
167  */
168 template<class ImplTraits>
169 class Topo : public ImplTraits::AllocPolicyType
170 {
171 public:
172 	typedef typename ImplTraits::BitsetType BitsetType;
173 	typedef typename ImplTraits::AllocPolicyType AllocPolicyType;
174 
175 private:
176     /**
177      * A vector of vectors of edges, built by calling the addEdge method()
178      * to indicate that node number n depends on node number m. Each entry in the vector
179      * contains a bitset, which has a bit index set for each node upon which the
180      * entry node depends.
181      */
182     BitsetType**	m_edges;
183 
184     /**
185      * A vector used to build up the sorted output order. Note that
186      * as the vector contains UINT32 then the maximum node index is
187      * 'limited' to 2^32, as nodes should be zero based.
188      */
189     ANTLR_UINT32*				m_sorted;
190 
191     /**
192      * A vector used to detect cycles in the edge dependecies. It is used
193      * as a stack and each time we descend a node to one of its edges we
194      * add the node into this stack. If we find a node that we have already
195      * visited in the stack, then it means there wasa cycle such as 9->8->1->9
196      * as the only way a node can be on the stack is if we are currently
197      * descnding from it as we remove it from the stack as we exit from
198      * descending its dependencies
199      */
200     ANTLR_UINT32*		m_cycle;
201 
202     /**
203      * A flag that indicates the algorithm found a cycle in the edges
204      * such as 9->8->1->9
205      * If this flag is set after you have called one of the sort routines
206      * then the detected cycle will be contained in the cycle array and
207      * cycleLimit will point to the one after the last entry in the cycle.
208      */
209     bool				m_hasCycle;
210 
211     /**
212      * A watermark used to accumulate potential cycles in the cycle array.
213      * This should be zero when we are done. Check hasCycle after calling one
214      * of the sort methods and if it is true then you can find the cycle
215      * in cycle[0]...cycle[cycleMark-1]
216      */
217     ANTLR_UINT32		m_cycleMark;
218 
219     /**
220      * One more than the largest node index that is contained in edges/sorted.
221      */
222     ANTLR_UINT32		m_limit;
223 
224     /**
225      * The set of visited nodes as determined by a set entry in
226      * the bitmap.
227      */
228     BitsetType*			m_visited;
229 
230 public:
231 	Topo();
232     /**
233      * A method that adds an edge from one node to another. An edge
234      * of n -> m indicates that node n is dependent on node m. Note that
235      * while building these edges, it is perfectly OK to add nodes out of
236      * sequence. So, if you have edges:
237      *
238      * 3 -> 0
239      * 2 -> 1
240      * 1 -> 3
241      *
242      * The you can add them in that order and so add node 3 before nodes 2 and 1
243      *
244      */
245     void  addEdge(ANTLR_UINT32 edge, ANTLR_UINT32 dependency);
246 
247 
248     /**
249      * A method that returns a pointer to an array of sorted node indexes.
250      * The array is sorted in topological sorted order. Note that the array
251      * is only as large as the largest node index you created an edge for. This means
252      * that if you had an input of 32 nodes, but that largest node with an edge
253      * was 16, then the returned array will be the sorted order of the first 16
254      * nodes and the last 16 nodes of your array are basically fine as they are
255      * as they had no dependencies and do not need any particular sort order.
256      *
257      * NB: If the structure that contains the array is freed, then the sorted
258      * array will be freed too so you should use the value of limit to
259      * make a long term copy of this array if you do not want to keep the topo
260      * structure around as well.
261      */
262     ANTLR_UINT32*  sortToArray();
263 
264     /**
265      * A method that sorts the supplied ANTLR3_VECTOR in place based
266      * on the previously supplied edge data.
267      */
268 	template<typename DataType>
269     void   sortVector( typename ImplTraits::template VectorType<DataType>& v);
270 
271 	void   DFS(ANTLR_UINT32 node);
272 
273     /**
274      *  A method to free this structure and any associated memory.
275      */
276 	~Topo();
277 };
278 
279 ANTLR_END_NAMESPACE()
280 
281 #include "antlr3collections.inl"
282 
283 #endif
284 
285 
286