1 // bi-table.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Classes for representing a bijective mapping between an arbitrary entry
20 // of type T and a signed integral ID.
21 
22 #ifndef FST_LIB_BI_TABLE_H__
23 #define FST_LIB_BI_TABLE_H__
24 
25 #include <deque>
26 using std::deque;
27 #include <functional>
28 #include <vector>
29 using std::vector;
30 
31 #include <tr1/unordered_set>
32 using std::tr1::unordered_set;
33 using std::tr1::unordered_multiset;
34 
35 namespace fst {
36 
37 // BI TABLES - these determine a bijective mapping between an
38 // arbitrary entry of type T and an signed integral ID of type I. The IDs are
39 // allocated starting from 0 in order.
40 //
41 // template <class I, class T>
42 // class BiTable {
43 //  public:
44 //
45 //   // Required constructors.
46 //   BiTable();
47 //
48 //   // Lookup integer ID from entry. If it doesn't exist and 'insert'
49 //   / is true, then add it. Otherwise return -1.
50 //   I FindId(const T &entry, bool insert = true);
51 //   // Lookup entry from integer ID.
52 //   const T &FindEntry(I) const;
53 //   // # of stored entries.
54 //   I Size() const;
55 // };
56 
57 // An implementation using a hash map for the entry to ID mapping.
58 // H is the hash function and E is the equality function.
59 // If passed to the constructor, ownership is given to this class.
60 
61 template <class I, class T, class H, class E = std::equal_to<T> >
62 class HashBiTable {
63  public:
64   // Reserves space for 'table_size' elements.
65   explicit HashBiTable(size_t table_size = 0, H *h = 0, E *e = 0)
hash_func_(h)66       : hash_func_(h),
67         hash_equal_(e),
68         entry2id_(table_size, (h ? *h : H()), (e ? *e : E())) {
69     if (table_size)
70       id2entry_.reserve(table_size);
71   }
72 
HashBiTable(const HashBiTable<I,T,H,E> & table)73   HashBiTable(const HashBiTable<I, T, H, E> &table)
74       : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0),
75         hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0),
76         entry2id_(table.entry2id_.begin(), table.entry2id_.end(),
77                   table.entry2id_.size(),
78                   (hash_func_ ? *hash_func_ : H()),
79                   (hash_equal_ ? *hash_equal_ : E())),
80         id2entry_(table.id2entry_) { }
81 
~HashBiTable()82   ~HashBiTable() {
83     delete hash_func_;
84     delete hash_equal_;
85   }
86 
87   I FindId(const T &entry, bool insert = true) {
88     I &id_ref = entry2id_[entry];
89     if (id_ref == 0) {  // T not found
90       if (insert) {     // store and assign it a new ID
91         id2entry_.push_back(entry);
92         id_ref = id2entry_.size();
93       } else {
94         return -1;
95       }
96     }
97     return id_ref - 1;  // NB: id_ref = ID + 1
98   }
99 
FindEntry(I s)100   const T &FindEntry(I s) const {
101     return id2entry_[s];
102   }
103 
Size()104   I Size() const { return id2entry_.size(); }
105 
106  private:
107   H *hash_func_;
108   E *hash_equal_;
109   unordered_map<T, I, H, E> entry2id_;
110   vector<T> id2entry_;
111 
112   void operator=(const HashBiTable<I, T, H, E> &table);  // disallow
113 };
114 
115 
116 // Enables alternative hash set representations below.
117 // typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType;
118 typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType;
119 
120 // Default hash set is STL hash_set
121 template<class K, class H, class E, HSType>
122 struct  HashSet : public unordered_set<K, H, E> {
123   HashSet(size_t n = 0, const H &h = H(), const E &e = E())
124       : unordered_set<K, H, E>(n, h, e)  { }
125 
rehashHashSet126   void rehash(size_t n) { }
127 };
128 
129 
130 // An implementation using a hash set for the entry to ID mapping.
131 // The hash set holds 'keys' which are either the ID or kCurrentKey.
132 // These keys can be mapped to entrys either by looking up in the
133 // entry vector or, if kCurrentKey, in current_entry_ member. The hash
134 // and key equality functions map to entries first. H
135 // is the hash function and E is the equality function. If passed to
136 // the constructor, ownership is given to this class.
137 template <class I, class T, class H,
138           class E = std::equal_to<T>, HSType HS = HS_DENSE>
139 class CompactHashBiTable {
140  public:
141   friend class HashFunc;
142   friend class HashEqual;
143 
144   // Reserves space for 'table_size' elements.
145   explicit CompactHashBiTable(size_t table_size = 0, H *h = 0, E *e = 0)
hash_func_(h)146       : hash_func_(h),
147         hash_equal_(e),
148         compact_hash_func_(*this),
149         compact_hash_equal_(*this),
150         keys_(table_size, compact_hash_func_, compact_hash_equal_) {
151     if (table_size)
152       id2entry_.reserve(table_size);
153   }
154 
CompactHashBiTable(const CompactHashBiTable<I,T,H,E,HS> & table)155   CompactHashBiTable(const CompactHashBiTable<I, T, H, E, HS> &table)
156       : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0),
157         hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0),
158         compact_hash_func_(*this),
159         compact_hash_equal_(*this),
160         keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_),
161         id2entry_(table.id2entry_) {
162     keys_.insert(table.keys_.begin(), table.keys_.end());
163  }
164 
~CompactHashBiTable()165   ~CompactHashBiTable() {
166     delete hash_func_;
167     delete hash_equal_;
168   }
169 
170   I FindId(const T &entry, bool insert = true) {
171     current_entry_ = &entry;
172     typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
173     if (it == keys_.end()) {  // T not found
174       if (insert) {           // store and assign it a new ID
175         I key = id2entry_.size();
176         id2entry_.push_back(entry);
177         keys_.insert(key);
178         return key;
179       } else {
180         return -1;
181       }
182     } else {
183       return *it;
184     }
185   }
186 
FindEntry(I s)187   const T &FindEntry(I s) const { return id2entry_[s]; }
188 
Size()189   I Size() const { return id2entry_.size(); }
190 
191   // Clear content. With argument, erases last n IDs.
192   void Clear(ssize_t n = -1) {
193     if (n < 0 || n > id2entry_.size())
194       n = id2entry_.size();
195     while (n-- > 0) {
196       I key = id2entry_.size() - 1;
197       keys_.erase(key);
198       id2entry_.pop_back();
199     }
200     keys_.rehash(0);
201   }
202 
203  private:
204   static const I kCurrentKey;    // -1
205   static const I kEmptyKey;      // -2
206   static const I kDeletedKey;    // -3
207 
208   class HashFunc {
209    public:
HashFunc(const CompactHashBiTable & ht)210     HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {}
211 
operator()212     size_t operator()(I k) const {
213       if (k >= kCurrentKey) {
214         return (*ht_->hash_func_)(ht_->Key2Entry(k));
215       } else {
216         return 0;
217       }
218     }
219 
220    private:
221     const CompactHashBiTable *ht_;
222   };
223 
224   class HashEqual {
225    public:
HashEqual(const CompactHashBiTable & ht)226     HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {}
227 
operator()228     bool operator()(I k1, I k2) const {
229       if (k1 >= kCurrentKey && k2 >= kCurrentKey) {
230         return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2));
231       } else {
232         return k1 == k2;
233       }
234     }
235    private:
236     const CompactHashBiTable *ht_;
237   };
238 
239   typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet;
240 
Key2Entry(I k)241   const T &Key2Entry(I k) const {
242     if (k == kCurrentKey)
243       return *current_entry_;
244     else
245       return id2entry_[k];
246   }
247 
248   H *hash_func_;
249   E *hash_equal_;
250   HashFunc compact_hash_func_;
251   HashEqual compact_hash_equal_;
252   KeyHashSet keys_;
253   vector<T> id2entry_;
254   const T *current_entry_;
255 
256   void operator=(const CompactHashBiTable<I, T, H, E, HS> &table);  // disallow
257 };
258 
259 
260 template <class I, class T, class H, class E, HSType HS>
261 const I CompactHashBiTable<I, T, H, E, HS>::kCurrentKey = -1;
262 
263 template <class I, class T, class H, class E, HSType HS>
264 const I CompactHashBiTable<I, T, H, E, HS>::kEmptyKey = -2;
265 
266 template <class I, class T, class H, class E, HSType HS>
267 const I CompactHashBiTable<I, T, H, E, HS>::kDeletedKey = -3;
268 
269 
270 // An implementation using a vector for the entry to ID mapping.
271 // It is passed a function object FP that should fingerprint entries
272 // uniquely to an integer that can used as a vector index. Normally,
273 // VectorBiTable constructs the FP object.  The user can instead
274 // pass in this object; in that case, VectorBiTable takes its
275 // ownership.
276 template <class I, class T, class FP>
277 class VectorBiTable {
278  public:
279   // Reserves space for 'table_size' elements.
280   explicit VectorBiTable(FP *fp = 0, size_t table_size = 0)
281       : fp_(fp ? fp : new FP()) {
282     if (table_size)
283       id2entry_.reserve(table_size);
284   }
285 
VectorBiTable(const VectorBiTable<I,T,FP> & table)286   VectorBiTable(const VectorBiTable<I, T, FP> &table)
287       : fp_(table.fp_ ? new FP(*table.fp_) : 0),
288         fp2id_(table.fp2id_),
289         id2entry_(table.id2entry_) { }
290 
~VectorBiTable()291   ~VectorBiTable() { delete fp_; }
292 
293   I FindId(const T &entry, bool insert = true) {
294     ssize_t fp = (*fp_)(entry);
295     if (fp >= fp2id_.size())
296       fp2id_.resize(fp + 1);
297     I &id_ref = fp2id_[fp];
298     if (id_ref == 0) {  // T not found
299       if (insert) {     // store and assign it a new ID
300         id2entry_.push_back(entry);
301         id_ref = id2entry_.size();
302       } else {
303         return -1;
304       }
305     }
306     return id_ref - 1;  // NB: id_ref = ID + 1
307   }
308 
FindEntry(I s)309   const T &FindEntry(I s) const { return id2entry_[s]; }
310 
Size()311   I Size() const { return id2entry_.size(); }
312 
Fingerprint()313   const FP &Fingerprint() const { return *fp_; }
314 
315  private:
316   FP *fp_;
317   vector<I> fp2id_;
318   vector<T> id2entry_;
319 
320   void operator=(const VectorBiTable<I, T, FP> &table);  // disallow
321 };
322 
323 
324 // An implementation using a vector and a compact hash table. The
325 // selecting functor S returns true for entries to be hashed in the
326 // vector.  The fingerprinting functor FP returns a unique fingerprint
327 // for each entry to be hashed in the vector (these need to be
328 // suitable for indexing in a vector).  The hash functor H is used
329 // when hashing entry into the compact hash table.  If passed to the
330 // constructor, ownership is given to this class.
331 template <class I, class T, class S, class FP, class H, HSType HS = HS_DENSE>
332 class VectorHashBiTable {
333  public:
334   friend class HashFunc;
335   friend class HashEqual;
336 
337   explicit VectorHashBiTable(S *s, FP *fp = 0, H *h = 0,
338                              size_t vector_size = 0,
339                              size_t entry_size = 0)
selector_(s)340       : selector_(s),
341         fp_(fp ? fp : new FP()),
342         h_(h ? h : new H()),
343         hash_func_(*this),
344         hash_equal_(*this),
345         keys_(0, hash_func_, hash_equal_) {
346     if (vector_size)
347       fp2id_.reserve(vector_size);
348     if (entry_size)
349       id2entry_.reserve(entry_size);
350  }
351 
VectorHashBiTable(const VectorHashBiTable<I,T,S,FP,H,HS> & table)352   VectorHashBiTable(const VectorHashBiTable<I, T, S, FP, H, HS> &table)
353       : selector_(new S(table.s_)),
354         fp_(table.fp_ ? new FP(*table.fp_) : 0),
355         h_(table.h_ ? new H(*table.h_) : 0),
356         id2entry_(table.id2entry_),
357         fp2id_(table.fp2id_),
358         hash_func_(*this),
359         hash_equal_(*this),
360         keys_(table.keys_.size(), hash_func_, hash_equal_) {
361      keys_.insert(table.keys_.begin(), table.keys_.end());
362   }
363 
~VectorHashBiTable()364   ~VectorHashBiTable() {
365     delete selector_;
366     delete fp_;
367     delete h_;
368   }
369 
370   I FindId(const T &entry, bool insert = true) {
371     if ((*selector_)(entry)) {  // Use the vector if 'selector_(entry) == true'
372       uint64 fp = (*fp_)(entry);
373       if (fp2id_.size() <= fp)
374         fp2id_.resize(fp + 1, 0);
375       if (fp2id_[fp] == 0) {         // T not found
376         if (insert) {                // store and assign it a new ID
377           id2entry_.push_back(entry);
378           fp2id_[fp] = id2entry_.size();
379         } else {
380           return -1;
381         }
382       }
383       return fp2id_[fp] - 1;  // NB: assoc_value = ID + 1
384     } else {  // Use the hash table otherwise.
385       current_entry_ = &entry;
386       typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
387       if (it == keys_.end()) {
388         if (insert) {
389           I key = id2entry_.size();
390           id2entry_.push_back(entry);
391           keys_.insert(key);
392           return key;
393         } else {
394           return -1;
395         }
396       } else {
397         return *it;
398       }
399     }
400   }
401 
FindEntry(I s)402   const T &FindEntry(I s) const {
403     return id2entry_[s];
404   }
405 
Size()406   I Size() const { return id2entry_.size(); }
407 
Selector()408   const S &Selector() const { return *selector_; }
409 
Fingerprint()410   const FP &Fingerprint() const { return *fp_; }
411 
Hash()412   const H &Hash() const { return *h_; }
413 
414  private:
415   static const I kCurrentKey;  // -1
416   static const I kEmptyKey;    // -2
417 
418   class HashFunc {
419    public:
HashFunc(const VectorHashBiTable & ht)420     HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {}
421 
operator()422     size_t operator()(I k) const {
423       if (k >= kCurrentKey) {
424         return (*(ht_->h_))(ht_->Key2Entry(k));
425       } else {
426         return 0;
427       }
428     }
429    private:
430     const VectorHashBiTable *ht_;
431   };
432 
433   class HashEqual {
434    public:
HashEqual(const VectorHashBiTable & ht)435     HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {}
436 
operator()437     bool operator()(I k1, I k2) const {
438       if (k1 >= kCurrentKey && k2 >= kCurrentKey) {
439         return ht_->Key2Entry(k1) == ht_->Key2Entry(k2);
440       } else {
441         return k1 == k2;
442       }
443     }
444    private:
445     const VectorHashBiTable *ht_;
446   };
447 
448   typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet;
449 
Key2Entry(I k)450   const T &Key2Entry(I k) const {
451     if (k == kCurrentKey)
452       return *current_entry_;
453     else
454       return id2entry_[k];
455   }
456 
457   S *selector_;  // Returns true if entry hashed into vector
458   FP *fp_;       // Fingerprint used when hashing entry into vector
459   H *h_;         // Hash function used when hashing entry into hash_set
460 
461   vector<T> id2entry_;  // Maps state IDs to entry
462   vector<I> fp2id_;     // Maps entry fingerprints to IDs
463 
464   // Compact implementation of the hash table mapping entrys to
465   // state IDs using the hash function 'h_'
466   HashFunc hash_func_;
467   HashEqual hash_equal_;
468   KeyHashSet keys_;
469   const T *current_entry_;
470 
471   // disallow
472   void operator=(const VectorHashBiTable<I, T, S, FP, H, HS> &table);
473 };
474 
475 template <class I, class T, class S, class FP, class H, HSType HS>
476 const I VectorHashBiTable<I, T, S, FP, H, HS>::kCurrentKey = -1;
477 
478 template <class I, class T, class S, class FP, class H, HSType HS>
479 const I VectorHashBiTable<I, T, S, FP, H, HS>::kEmptyKey = -3;
480 
481 
482 // An implementation using a hash map for the entry to ID
483 // mapping. This version permits erasing of arbitrary states.  The
484 // entry T must have == defined and its default constructor must
485 // produce a entry that will never be seen. F is the hash function.
486 template <class I, class T, class F>
487 class ErasableBiTable {
488  public:
ErasableBiTable()489   ErasableBiTable() : first_(0) {}
490 
491   I FindId(const T &entry, bool insert = true) {
492     I &id_ref = entry2id_[entry];
493     if (id_ref == 0) {  // T not found
494       if (insert) {     // store and assign it a new ID
495         id2entry_.push_back(entry);
496         id_ref = id2entry_.size() + first_;
497       } else {
498         return -1;
499       }
500     }
501     return id_ref - 1;  // NB: id_ref = ID + 1
502   }
503 
FindEntry(I s)504   const T &FindEntry(I s) const { return id2entry_[s - first_]; }
505 
Size()506   I Size() const { return id2entry_.size(); }
507 
Erase(I s)508   void Erase(I s) {
509     T &entry = id2entry_[s - first_];
510     typename unordered_map<T, I, F>::iterator it =
511         entry2id_.find(entry);
512     entry2id_.erase(it);
513     id2entry_[s - first_] = empty_entry_;
514     while (!id2entry_.empty() && id2entry_.front() == empty_entry_) {
515       id2entry_.pop_front();
516       ++first_;
517     }
518   }
519 
520  private:
521   unordered_map<T, I, F> entry2id_;
522   deque<T> id2entry_;
523   const T empty_entry_;
524   I first_;        // I of first element in the deque;
525 
526   // disallow
527   void operator=(const ErasableBiTable<I, T, F> &table);  //disallow
528 };
529 
530 }  // namespace fst
531 
532 #endif  // FST_LIB_BI_TABLE_H__
533