1 // state-table.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Classes for representing the mapping between state tuples and state Ids.
20 
21 #ifndef FST_LIB_STATE_TABLE_H__
22 #define FST_LIB_STATE_TABLE_H__
23 
24 #include <deque>
25 using std::deque;
26 #include <vector>
27 using std::vector;
28 
29 #include <fst/bi-table.h>
30 #include <fst/expanded-fst.h>
31 
32 
33 namespace fst {
34 
35 // STATE TABLES - these determine the bijective mapping between state
36 // tuples (e.g. in composition triples of two FST states and a
37 // composition filter state) and their corresponding state IDs.
38 // They are classes, templated on state tuples, of the form:
39 //
40 // template <class T>
41 // class StateTable {
42 //  public:
43 //   typedef typename T StateTuple;
44 //
45 //   // Required constructors.
46 //   StateTable();
47 //
48 //   // Lookup state ID by tuple. If it doesn't exist, then add it.
49 //   StateId FindState(const StateTuple &);
50 //   // Lookup state tuple by state ID.
51 //   const StateTuple<StateId> &Tuple(StateId) const;
52 //   // # of stored tuples.
53 //   StateId Size() const;
54 // };
55 //
56 // A state tuple has the form:
57 //
58 // template <class S>
59 // struct StateTuple {
60 //   typedef typename S StateId;
61 //
62 //   // Required constructors.
63 //   StateTuple();
64 //   StateTuple(const StateTuple &);
65 // };
66 
67 
68 // An implementation using a hash map for the tuple to state ID mapping.
69 // The state tuple T must have == defined. H is the hash function.
70 template <class T, class H>
71 class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
72  public:
73   typedef T StateTuple;
74   typedef typename StateTuple::StateId StateId;
75   using HashBiTable<StateId, T, H>::FindId;
76   using HashBiTable<StateId, T, H>::FindEntry;
77   using HashBiTable<StateId, T, H>::Size;
78 
HashStateTable()79   HashStateTable() : HashBiTable<StateId, T, H>() {}
80 
81   // Reserves space for table_size elements.
HashStateTable(size_t table_size)82   explicit HashStateTable(size_t table_size)
83       : HashBiTable<StateId, T, H>(table_size) {}
84 
FindState(const StateTuple & tuple)85   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
Tuple(StateId s)86   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
87 };
88 
89 
90 // An implementation using a hash map for the tuple to state ID mapping.
91 // The state tuple T must have == defined. H is the hash function.
92 template <class T, class H>
93 class CompactHashStateTable
94     : public CompactHashBiTable<typename T::StateId, T, H> {
95  public:
96   typedef T StateTuple;
97   typedef typename StateTuple::StateId StateId;
98   using CompactHashBiTable<StateId, T, H>::FindId;
99   using CompactHashBiTable<StateId, T, H>::FindEntry;
100   using CompactHashBiTable<StateId, T, H>::Size;
101 
CompactHashStateTable()102   CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {}
103 
104   // Reserves space for 'table_size' elements.
CompactHashStateTable(size_t table_size)105   explicit CompactHashStateTable(size_t table_size)
106       : CompactHashBiTable<StateId, T, H>(table_size) {}
107 
FindState(const StateTuple & tuple)108   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
Tuple(StateId s)109   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
110 };
111 
112 // An implementation using a vector for the tuple to state mapping.
113 // It is passed a function object FP that should fingerprint tuples
114 // uniquely to an integer that can used as a vector index. Normally,
115 // VectorStateTable constructs the FP object.  The user can instead
116 // pass in this object; in that case, VectorStateTable takes its
117 // ownership.
118 template <class T, class FP>
119 class VectorStateTable
120     : public VectorBiTable<typename T::StateId, T, FP> {
121  public:
122   typedef T StateTuple;
123   typedef typename StateTuple::StateId StateId;
124   using VectorBiTable<StateId, T, FP>::FindId;
125   using VectorBiTable<StateId, T, FP>::FindEntry;
126   using VectorBiTable<StateId, T, FP>::Size;
127   using VectorBiTable<StateId, T, FP>::Fingerprint;
128 
129   // Reserves space for 'table_size' elements.
130   explicit VectorStateTable(FP *fp = 0, size_t table_size = 0)
131       : VectorBiTable<StateId, T, FP>(fp, table_size) {}
132 
FindState(const StateTuple & tuple)133   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
Tuple(StateId s)134   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
135 };
136 
137 
138 // An implementation using a vector and a compact hash table. The
139 // selecting functor S returns true for tuples to be hashed in the
140 // vector.  The fingerprinting functor FP returns a unique fingerprint
141 // for each tuple to be hashed in the vector (these need to be
142 // suitable for indexing in a vector).  The hash functor H is used when
143 // hashing tuple into the compact hash table.
144 template <class T, class S, class FP, class H>
145 class VectorHashStateTable
146     : public VectorHashBiTable<typename T::StateId, T, S, FP, H> {
147  public:
148   typedef T StateTuple;
149   typedef typename StateTuple::StateId StateId;
150   using VectorHashBiTable<StateId, T, S, FP, H>::FindId;
151   using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry;
152   using VectorHashBiTable<StateId, T, S, FP, H>::Size;
153   using VectorHashBiTable<StateId, T, S, FP, H>::Selector;
154   using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint;
155   using VectorHashBiTable<StateId, T, S, FP, H>::Hash;
156 
157   VectorHashStateTable(S *s, FP *fp, H *h,
158                        size_t vector_size = 0,
159                        size_t tuple_size = 0)
160       : VectorHashBiTable<StateId, T, S, FP, H>(
161           s, fp, h, vector_size, tuple_size) {}
162 
FindState(const StateTuple & tuple)163   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
Tuple(StateId s)164   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
165 };
166 
167 
168 // An implementation using a hash map for the tuple to state ID
169 // mapping. This version permits erasing of states.  The state tuple T
170 // must have == defined and its default constructor must produce a
171 // tuple that will never be seen. F is the hash function.
172 template <class T, class F>
173 class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> {
174  public:
175   typedef T StateTuple;
176   typedef typename StateTuple::StateId StateId;
177   using ErasableBiTable<StateId, T, F>::FindId;
178   using ErasableBiTable<StateId, T, F>::FindEntry;
179   using ErasableBiTable<StateId, T, F>::Size;
180   using ErasableBiTable<StateId, T, F>::Erase;
181 
ErasableStateTable()182   ErasableStateTable() : ErasableBiTable<StateId, T, F>() {}
FindState(const StateTuple & tuple)183   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
Tuple(StateId s)184   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
185 };
186 
187 //
188 // COMPOSITION STATE TUPLES AND TABLES
189 //
190 // The composition state table has the form:
191 //
192 // template <class A, class F>
193 // class ComposeStateTable {
194 //  public:
195 //   typedef A Arc;
196 //   typedef F FilterState;
197 //   typedef typename A::StateId StateId;
198 //   typedef ComposeStateTuple<StateId> StateTuple;
199 //
200 //   // Required constructors. Copy constructor does not copy state.
201 //   ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
202 //   ComposeStateTable(const ComposeStateTable<A, F> &table);
203 //   // Lookup state ID by tuple. If it doesn't exist, then add it.
204 //   StateId FindState(const StateTuple &);
205 //   // Lookup state tuple by state ID.
206 //   const StateTuple<StateId> &Tuple(StateId) const;
207 //   // # of stored tuples.
208 //   StateId Size() const;
209 //   // Return true if error encountered
210 //   bool Error() const;
211 // };
212 
213 // Represents the composition state.
214 template <typename S, typename F>
215 struct ComposeStateTuple {
216   typedef S StateId;
217   typedef F FilterState;
218 
ComposeStateTupleComposeStateTuple219   ComposeStateTuple()
220       : state_id1(kNoStateId), state_id2(kNoStateId),
221         filter_state(FilterState::NoState()) {}
222 
ComposeStateTupleComposeStateTuple223   ComposeStateTuple(StateId s1, StateId s2, const FilterState &f)
224       : state_id1(s1), state_id2(s2), filter_state(f) {}
225 
226   StateId state_id1;         // State Id on fst1
227   StateId state_id2;         // State Id on fst2
228   FilterState filter_state;  // State of composition filter
229 };
230 
231 // Equality of composition state tuples.
232 template <typename S, typename F>
233 inline bool operator==(const ComposeStateTuple<S, F>& x,
234                        const ComposeStateTuple<S, F>& y) {
235   if (&x == &y)
236     return true;
237   return x.state_id1 == y.state_id1 &&
238       x.state_id2 == y.state_id2 &&
239       x.filter_state == y.filter_state;
240 }
241 
242 
243 // Hashing of composition state tuples.
244 template <typename S, typename F>
245 class ComposeHash {
246  public:
operator()247   size_t operator()(const ComposeStateTuple<S, F>& t) const {
248     return t.state_id1 + t.state_id2 * kPrime0 +
249         t.filter_state.Hash() * kPrime1;
250   }
251  private:
252   static const size_t kPrime0;
253   static const size_t kPrime1;
254 };
255 
256 template <typename S, typename F>
257 const size_t ComposeHash<S, F>::kPrime0 = 7853;
258 
259 template <typename S, typename F>
260 const size_t ComposeHash<S, F>::kPrime1 = 7867;
261 
262 
263 // A HashStateTable over composition tuples.
264 template <typename A,
265           typename F,
266           typename H =
267           CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>,
268                                 ComposeHash<typename A::StateId, F> > >
269 class GenericComposeStateTable : public H {
270  public:
271   typedef A Arc;
272   typedef typename A::StateId StateId;
273   typedef F FilterState;
274   typedef ComposeStateTuple<StateId, F> StateTuple;
275 
GenericComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)276   GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
277 
278   // Reserves space for 'table_size' elements.
GenericComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2,size_t table_size)279   GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2,
280                            size_t table_size) : H(table_size) {}
281 
Error()282   bool Error() const { return false; }
283 
284  private:
285   void operator=(const GenericComposeStateTable<A, F> &table);  // disallow
286 };
287 
288 
289 //  Fingerprint for general composition tuples.
290 template  <typename S, typename F>
291 class ComposeFingerprint {
292  public:
293   typedef S StateId;
294   typedef F FilterState;
295   typedef ComposeStateTuple<S, F> StateTuple;
296 
297   // Required but suboptimal constructor.
ComposeFingerprint()298   ComposeFingerprint() : mult1_(8192), mult2_(8192) {
299     LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
300   }
301 
302   // Constructor is provided the sizes of the input FSTs
ComposeFingerprint(StateId nstates1,StateId nstates2)303   ComposeFingerprint(StateId nstates1, StateId nstates2)
304       : mult1_(nstates1), mult2_(nstates1 * nstates2) { }
305 
operator()306   size_t operator()(const StateTuple &tuple) {
307     return tuple.state_id1 + tuple.state_id2 * mult1_ +
308         tuple.filter_state.Hash() * mult2_;
309   }
310 
311  private:
312   ssize_t mult1_;
313   ssize_t mult2_;
314 };
315 
316 
317 // Useful when the first composition state determines the tuple.
318 template  <typename S, typename F>
319 class ComposeState1Fingerprint {
320  public:
321   typedef S StateId;
322   typedef F FilterState;
323   typedef ComposeStateTuple<S, F> StateTuple;
324 
operator()325   size_t operator()(const StateTuple &tuple) { return tuple.state_id1; }
326 };
327 
328 
329 // Useful when the second composition state determines the tuple.
330 template  <typename S, typename F>
331 class ComposeState2Fingerprint {
332  public:
333   typedef S StateId;
334   typedef F FilterState;
335   typedef ComposeStateTuple<S, F> StateTuple;
336 
operator()337   size_t operator()(const StateTuple &tuple) { return tuple.state_id2; }
338 };
339 
340 
341 // A VectorStateTable over composition tuples.  This can be used when
342 // the product of number of states in FST1 and FST2 (and the
343 // composition filter state hash) is manageable. If the FSTs are not
344 // expanded Fsts, they will first have their states counted.
345 template  <typename A, typename F>
346 class ProductComposeStateTable : public
347 VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
348                  ComposeFingerprint<typename A::StateId, F> > {
349  public:
350   typedef A Arc;
351   typedef typename A::StateId StateId;
352   typedef F FilterState;
353   typedef ComposeStateTuple<StateId, F> StateTuple;
354   typedef VectorStateTable<StateTuple,
355                            ComposeFingerprint<StateId, F> > StateTable;
356 
357   // Reserves space for 'table_size' elements.
358   ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2,
359                            size_t table_size = 0)
StateTable(new ComposeFingerprint<StateId,F> (CountStates (fst1),CountStates (fst2)),table_size)360       : StateTable(new ComposeFingerprint<StateId, F>(CountStates(fst1),
361                                                       CountStates(fst2)),
362                    table_size) {}
363 
ProductComposeStateTable(const ProductComposeStateTable<A,F> & table)364   ProductComposeStateTable(const ProductComposeStateTable<A, F> &table)
365       : StateTable(new ComposeFingerprint<StateId, F>(table.Fingerprint())) {}
366 
Error()367   bool Error() const { return false; }
368 
369  private:
370   void operator=(const ProductComposeStateTable<A, F> &table);  // disallow
371 };
372 
373 // A VectorStateTable over composition tuples.  This can be used when
374 // FST1 is a string (satisfies kStringProperties) and FST2 is
375 // epsilon-free and deterministic. It should be used with a
376 // composition filter that creates at most one filter state per tuple
377 // under these conditions (e.g. SequenceComposeFilter or
378 // MatchComposeFilter).
379 template <typename A, typename F>
380 class StringDetComposeStateTable : public
381 VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
382                  ComposeState1Fingerprint<typename A::StateId, F> > {
383  public:
384   typedef A Arc;
385   typedef typename A::StateId StateId;
386   typedef F FilterState;
387   typedef ComposeStateTuple<StateId, F> StateTuple;
388   typedef VectorStateTable<StateTuple,
389                            ComposeState1Fingerprint<StateId, F> > StateTable;
390 
StringDetComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)391   StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
392       : error_(false) {
393     uint64 props1 = kString;
394     uint64 props2 = kIDeterministic | kNoIEpsilons;
395     if (fst1.Properties(props1, true) != props1 ||
396         fst2.Properties(props2, true) != props2) {
397       FSTERROR() << "StringDetComposeStateTable: fst1 not a string or"
398                  << " fst2 not input deterministic and epsilon-free";
399       error_ = true;
400     }
401   }
402 
StringDetComposeStateTable(const StringDetComposeStateTable<A,F> & table)403   StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table)
404       : StateTable(table), error_(table.error_) {}
405 
Error()406   bool Error() const { return error_; }
407 
408  private:
409   bool error_;
410 
411   void operator=(const StringDetComposeStateTable<A, F> &table);  // disallow
412 };
413 
414 
415 // A VectorStateTable over composition tuples.  This can be used when
416 // FST2 is a string (satisfies kStringProperties) and FST1 is
417 // epsilon-free and deterministic. It should be used with a
418 // composition filter that creates at most one filter state per tuple
419 // under these conditions (e.g. SequenceComposeFilter or
420 // MatchComposeFilter).
421 template <typename A, typename F>
422 class DetStringComposeStateTable : public
423 VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
424                  ComposeState2Fingerprint<typename A::StateId, F> > {
425  public:
426   typedef A Arc;
427   typedef typename A::StateId StateId;
428   typedef F FilterState;
429   typedef ComposeStateTuple<StateId, F> StateTuple;
430   typedef VectorStateTable<StateTuple,
431                            ComposeState2Fingerprint<StateId, F> > StateTable;
432 
DetStringComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)433   DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
434       :error_(false) {
435     uint64 props1 = kODeterministic | kNoOEpsilons;
436     uint64 props2 = kString;
437     if (fst1.Properties(props1, true) != props1 ||
438         fst2.Properties(props2, true) != props2) {
439       FSTERROR() << "StringDetComposeStateTable: fst2 not a string or"
440                  << " fst1 not output deterministic and epsilon-free";
441       error_ = true;
442     }
443   }
444 
DetStringComposeStateTable(const DetStringComposeStateTable<A,F> & table)445   DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table)
446       : StateTable(table), error_(table.error_) {}
447 
Error()448   bool Error() const { return error_; }
449 
450  private:
451   bool error_;
452 
453   void operator=(const DetStringComposeStateTable<A, F> &table);  // disallow
454 };
455 
456 
457 // An ErasableStateTable over composition tuples. The Erase(StateId) method
458 // can be called if the user either is sure that composition will never return
459 // to that tuple or doesn't care that if it does, it is assigned a new
460 // state ID.
461 template <typename A, typename F>
462 class ErasableComposeStateTable : public
463 ErasableStateTable<ComposeStateTuple<typename A::StateId, F>,
464                    ComposeHash<typename A::StateId, F> > {
465  public:
466   typedef A Arc;
467   typedef typename A::StateId StateId;
468   typedef F FilterState;
469   typedef ComposeStateTuple<StateId, F> StateTuple;
470 
ErasableComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)471   ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
472 
Error()473   bool Error() const { return false; }
474 
475  private:
476   void operator=(const ErasableComposeStateTable<A, F> &table);  // disallow
477 };
478 
479 }  // namespace fst
480 
481 #endif  // FST_LIB_STATE_TABLE_H__
482