1 //===- llvm/ADT/Trie.h ---- Generic trie structure --------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This class defines a generic trie structure. The trie structure
11 // is immutable after creation, but the payload contained within it is not.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_ADT_TRIE_H
16 #define LLVM_ADT_TRIE_H
17 
18 #include "llvm/ADT/GraphTraits.h"
19 #include "llvm/Support/DOTGraphTraits.h"
20 
21 #include <cassert>
22 #include <vector>
23 
24 namespace llvm {
25 
26 // FIXME:
27 // - Labels are usually small, maybe it's better to use SmallString
28 // - Should we use char* during construction?
29 // - Should we templatize Empty with traits-like interface?
30 
31 template<class Payload>
32 class Trie {
33   friend class GraphTraits<Trie<Payload> >;
34   friend class DOTGraphTraits<Trie<Payload> >;
35 public:
36   class Node {
37     friend class Trie;
38 
39   public:
40     typedef std::vector<Node*> NodeVectorType;
41     typedef typename NodeVectorType::iterator iterator;
42     typedef typename NodeVectorType::const_iterator const_iterator;
43 
44   private:
45     enum QueryResult {
46       Same           = -3,
47       StringIsPrefix = -2,
48       LabelIsPrefix  = -1,
49       DontMatch      = 0,
50       HaveCommonPart
51     };
52 
53     struct NodeCmp {
operatorNodeCmp54       bool operator() (Node* N1, Node* N2) {
55         return (N1->Label[0] < N2->Label[0]);
56       }
operatorNodeCmp57       bool operator() (Node* N, char Id) {
58         return (N->Label[0] < Id);
59       }
60     };
61 
62     std::string Label;
63     Payload Data;
64     NodeVectorType Children;
65 
66     // Do not implement
67     Node(const Node&);
68     Node& operator=(const Node&);
69 
addEdge(Node * N)70     inline void addEdge(Node* N) {
71       if (Children.empty())
72         Children.push_back(N);
73       else {
74         iterator I = std::lower_bound(Children.begin(), Children.end(),
75                                       N, NodeCmp());
76         // FIXME: no dups are allowed
77         Children.insert(I, N);
78       }
79     }
80 
setEdge(Node * N)81     inline void setEdge(Node* N) {
82       char Id = N->Label[0];
83       iterator I = std::lower_bound(Children.begin(), Children.end(),
84                                      Id, NodeCmp());
85       assert(I != Children.end() && "Node does not exists!");
86       *I = N;
87     }
88 
query(const std::string & s)89     QueryResult query(const std::string& s) const {
90       unsigned i, l;
91       unsigned l1 = s.length();
92       unsigned l2 = Label.length();
93 
94       // Find the length of common part
95       l = std::min(l1, l2);
96       i = 0;
97       while ((i < l) && (s[i] == Label[i]))
98         ++i;
99 
100       if (i == l) { // One is prefix of another, find who is who
101         if (l1 == l2)
102           return Same;
103         else if (i == l1)
104           return StringIsPrefix;
105         else
106           return LabelIsPrefix;
107       } else // s and Label have common (possible empty) part, return its length
108         return (QueryResult)i;
109     }
110 
111   public:
112     inline explicit Node(const Payload& data, const std::string& label = ""):
Label(label)113         Label(label), Data(data) { }
114 
data()115     inline const Payload& data() const { return Data; }
setData(const Payload & data)116     inline void setData(const Payload& data) { Data = data; }
117 
label()118     inline const std::string& label() const { return Label; }
119 
120 #if 0
dump()121     inline void dump() {
122       llvm::cerr << "Node: " << this << "\n"
123                 << "Label: " << Label << "\n"
124                 << "Children:\n";
125 
126       for (iterator I = Children.begin(), E = Children.end(); I != E; ++I)
127         llvm::cerr << (*I)->Label << "\n";
128     }
129 #endif
130 
getEdge(char Id)131     inline Node* getEdge(char Id) {
132       Node* fNode = NULL;
133       iterator I = std::lower_bound(Children.begin(), Children.end(),
134                                           Id, NodeCmp());
135       if (I != Children.end() && (*I)->Label[0] == Id)
136         fNode = *I;
137 
138       return fNode;
139     }
140 
begin()141     inline iterator       begin()       { return Children.begin(); }
begin()142     inline const_iterator begin() const { return Children.begin(); }
end()143     inline iterator       end  ()       { return Children.end();   }
end()144     inline const_iterator end  () const { return Children.end();   }
145 
size()146     inline size_t         size () const { return Children.size();  }
empty()147     inline bool           empty() const { return Children.empty(); }
front()148     inline const Node*   &front() const { return Children.front(); }
front()149     inline       Node*   &front()       { return Children.front(); }
back()150     inline const Node*   &back()  const { return Children.back();  }
back()151     inline       Node*   &back()        { return Children.back();  }
152 
153   };
154 
155 private:
156   std::vector<Node*> Nodes;
157   Payload Empty;
158 
159   inline Node* addNode(const Payload& data, const std::string label = "") {
160     Node* N = new Node(data, label);
161     Nodes.push_back(N);
162     return N;
163   }
164 
splitEdge(Node * N,char Id,size_t index)165   inline Node* splitEdge(Node* N, char Id, size_t index) {
166     Node* eNode = N->getEdge(Id);
167     assert(eNode && "Node doesn't exist");
168 
169     const std::string &l = eNode->Label;
170     assert(index > 0 && index < l.length() && "Trying to split too far!");
171     std::string l1 = l.substr(0, index);
172     std::string l2 = l.substr(index);
173 
174     Node* nNode = addNode(Empty, l1);
175     N->setEdge(nNode);
176 
177     eNode->Label = l2;
178     nNode->addEdge(eNode);
179 
180     return nNode;
181   }
182 
183   // Do not implement
184   Trie(const Trie&);
185   Trie& operator=(const Trie&);
186 
187 public:
Trie(const Payload & empty)188   inline explicit Trie(const Payload& empty):Empty(empty) {
189     addNode(Empty);
190   }
~Trie()191   inline ~Trie() {
192     for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
193       delete Nodes[i];
194   }
195 
getRoot()196   inline Node* getRoot() const { return Nodes[0]; }
197 
198   bool addString(const std::string& s, const Payload& data);
199   const Payload& lookup(const std::string& s) const;
200 
201 };
202 
203 // Define this out-of-line to dissuade the C++ compiler from inlining it.
204 template<class Payload>
addString(const std::string & s,const Payload & data)205 bool Trie<Payload>::addString(const std::string& s, const Payload& data) {
206   Node* cNode = getRoot();
207   Node* tNode = NULL;
208   std::string s1(s);
209 
210   while (tNode == NULL) {
211     char Id = s1[0];
212     if (Node* nNode = cNode->getEdge(Id)) {
213       typename Node::QueryResult r = nNode->query(s1);
214 
215       switch (r) {
216       case Node::Same:
217       case Node::StringIsPrefix:
218         // Currently we don't allow to have two strings in the trie one
219         // being a prefix of another. This should be fixed.
220         assert(0 && "FIXME!");
221         return false;
222       case Node::DontMatch:
223         assert(0 && "Impossible!");
224         return false;
225       case Node::LabelIsPrefix:
226         s1 = s1.substr(nNode->label().length());
227         cNode = nNode;
228         break;
229       default:
230         nNode = splitEdge(cNode, Id, r);
231         tNode = addNode(data, s1.substr(r));
232         nNode->addEdge(tNode);
233       }
234     } else {
235       tNode = addNode(data, s1);
236       cNode->addEdge(tNode);
237     }
238   }
239 
240   return true;
241 }
242 
243 template<class Payload>
lookup(const std::string & s)244 const Payload& Trie<Payload>::lookup(const std::string& s) const {
245   Node* cNode = getRoot();
246   Node* tNode = NULL;
247   std::string s1(s);
248 
249   while (tNode == NULL) {
250     char Id = s1[0];
251     if (Node* nNode = cNode->getEdge(Id)) {
252       typename Node::QueryResult r = nNode->query(s1);
253 
254       switch (r) {
255       case Node::Same:
256         tNode = nNode;
257         break;
258       case Node::StringIsPrefix:
259         return Empty;
260       case Node::DontMatch:
261         assert(0 && "Impossible!");
262         return Empty;
263       case Node::LabelIsPrefix:
264         s1 = s1.substr(nNode->label().length());
265         cNode = nNode;
266         break;
267       default:
268         return Empty;
269       }
270     } else
271       return Empty;
272   }
273 
274   return tNode->data();
275 }
276 
277 template<class Payload>
278 struct GraphTraits<Trie<Payload> > {
279   typedef Trie<Payload> TrieType;
280   typedef typename TrieType::Node NodeType;
281   typedef typename NodeType::iterator ChildIteratorType;
282 
283   static inline NodeType *getEntryNode(const TrieType& T) {
284     return T.getRoot();
285   }
286 
287   static inline ChildIteratorType child_begin(NodeType *N) {
288     return N->begin();
289   }
290   static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
291 
292   typedef typename std::vector<NodeType*>::const_iterator nodes_iterator;
293 
294   static inline nodes_iterator nodes_begin(const TrieType& G) {
295     return G.Nodes.begin();
296   }
297   static inline nodes_iterator nodes_end(const TrieType& G) {
298     return G.Nodes.end();
299   }
300 
301 };
302 
303 template<class Payload>
304 struct DOTGraphTraits<Trie<Payload> > : public DefaultDOTGraphTraits {
305   typedef typename Trie<Payload>::Node NodeType;
306   typedef typename GraphTraits<Trie<Payload> >::ChildIteratorType EdgeIter;
307 
308   static std::string getGraphName(const Trie<Payload>& T) {
309     return "Trie";
310   }
311 
312   static std::string getNodeLabel(NodeType* Node, const Trie<Payload>& T) {
313     if (T.getRoot() == Node)
314       return "<Root>";
315     else
316       return Node->label();
317   }
318 
319   static std::string getEdgeSourceLabel(NodeType* Node, EdgeIter I) {
320     NodeType* N = *I;
321     return N->label().substr(0, 1);
322   }
323 
324   static std::string getNodeAttributes(const NodeType* Node,
325                                        const Trie<Payload>& T) {
326     if (Node->data() != T.Empty)
327       return "color=blue";
328 
329     return "";
330   }
331 
332 };
333 
334 } // end of llvm namespace
335 
336 #endif // LLVM_ADT_TRIE_H
337