1 // state-table.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley@google.com (Michael Riley) 17 // 18 // \file 19 // Classes for representing the mapping between state tuples and state Ids. 20 21 #ifndef FST_LIB_STATE_TABLE_H__ 22 #define FST_LIB_STATE_TABLE_H__ 23 24 #include <deque> 25 using std::deque; 26 #include <vector> 27 using std::vector; 28 29 #include <fst/bi-table.h> 30 #include <fst/expanded-fst.h> 31 32 33 namespace fst { 34 35 // STATE TABLES - these determine the bijective mapping between state 36 // tuples (e.g. in composition triples of two FST states and a 37 // composition filter state) and their corresponding state IDs. 38 // They are classes, templated on state tuples, of the form: 39 // 40 // template <class T> 41 // class StateTable { 42 // public: 43 // typedef typename T StateTuple; 44 // 45 // // Required constructors. 46 // StateTable(); 47 // 48 // // Lookup state ID by tuple. If it doesn't exist, then add it. 49 // StateId FindState(const StateTuple &); 50 // // Lookup state tuple by state ID. 51 // const StateTuple<StateId> &Tuple(StateId) const; 52 // // # of stored tuples. 53 // StateId Size() const; 54 // }; 55 // 56 // A state tuple has the form: 57 // 58 // template <class S> 59 // struct StateTuple { 60 // typedef typename S StateId; 61 // 62 // // Required constructors. 63 // StateTuple(); 64 // StateTuple(const StateTuple &); 65 // }; 66 67 68 // An implementation using a hash map for the tuple to state ID mapping. 69 // The state tuple T must have == defined. H is the hash function. 70 template <class T, class H> 71 class HashStateTable : public HashBiTable<typename T::StateId, T, H> { 72 public: 73 typedef T StateTuple; 74 typedef typename StateTuple::StateId StateId; 75 using HashBiTable<StateId, T, H>::FindId; 76 using HashBiTable<StateId, T, H>::FindEntry; 77 using HashBiTable<StateId, T, H>::Size; 78 HashStateTable()79 HashStateTable() : HashBiTable<StateId, T, H>() {} 80 81 // Reserves space for table_size elements. HashStateTable(size_t table_size)82 explicit HashStateTable(size_t table_size) 83 : HashBiTable<StateId, T, H>(table_size) {} 84 FindState(const StateTuple & tuple)85 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } Tuple(StateId s)86 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 87 }; 88 89 90 // An implementation using a hash map for the tuple to state ID mapping. 91 // The state tuple T must have == defined. H is the hash function. 92 template <class T, class H> 93 class CompactHashStateTable 94 : public CompactHashBiTable<typename T::StateId, T, H> { 95 public: 96 typedef T StateTuple; 97 typedef typename StateTuple::StateId StateId; 98 using CompactHashBiTable<StateId, T, H>::FindId; 99 using CompactHashBiTable<StateId, T, H>::FindEntry; 100 using CompactHashBiTable<StateId, T, H>::Size; 101 CompactHashStateTable()102 CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {} 103 104 // Reserves space for 'table_size' elements. CompactHashStateTable(size_t table_size)105 explicit CompactHashStateTable(size_t table_size) 106 : CompactHashBiTable<StateId, T, H>(table_size) {} 107 FindState(const StateTuple & tuple)108 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } Tuple(StateId s)109 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 110 }; 111 112 // An implementation using a vector for the tuple to state mapping. 113 // It is passed a function object FP that should fingerprint tuples 114 // uniquely to an integer that can used as a vector index. Normally, 115 // VectorStateTable constructs the FP object. The user can instead 116 // pass in this object; in that case, VectorStateTable takes its 117 // ownership. 118 template <class T, class FP> 119 class VectorStateTable 120 : public VectorBiTable<typename T::StateId, T, FP> { 121 public: 122 typedef T StateTuple; 123 typedef typename StateTuple::StateId StateId; 124 using VectorBiTable<StateId, T, FP>::FindId; 125 using VectorBiTable<StateId, T, FP>::FindEntry; 126 using VectorBiTable<StateId, T, FP>::Size; 127 using VectorBiTable<StateId, T, FP>::Fingerprint; 128 129 // Reserves space for 'table_size' elements. 130 explicit VectorStateTable(FP *fp = 0, size_t table_size = 0) 131 : VectorBiTable<StateId, T, FP>(fp, table_size) {} 132 FindState(const StateTuple & tuple)133 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } Tuple(StateId s)134 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 135 }; 136 137 138 // An implementation using a vector and a compact hash table. The 139 // selecting functor S returns true for tuples to be hashed in the 140 // vector. The fingerprinting functor FP returns a unique fingerprint 141 // for each tuple to be hashed in the vector (these need to be 142 // suitable for indexing in a vector). The hash functor H is used when 143 // hashing tuple into the compact hash table. 144 template <class T, class S, class FP, class H> 145 class VectorHashStateTable 146 : public VectorHashBiTable<typename T::StateId, T, S, FP, H> { 147 public: 148 typedef T StateTuple; 149 typedef typename StateTuple::StateId StateId; 150 using VectorHashBiTable<StateId, T, S, FP, H>::FindId; 151 using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry; 152 using VectorHashBiTable<StateId, T, S, FP, H>::Size; 153 using VectorHashBiTable<StateId, T, S, FP, H>::Selector; 154 using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint; 155 using VectorHashBiTable<StateId, T, S, FP, H>::Hash; 156 157 VectorHashStateTable(S *s, FP *fp, H *h, 158 size_t vector_size = 0, 159 size_t tuple_size = 0) 160 : VectorHashBiTable<StateId, T, S, FP, H>( 161 s, fp, h, vector_size, tuple_size) {} 162 FindState(const StateTuple & tuple)163 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } Tuple(StateId s)164 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 165 }; 166 167 168 // An implementation using a hash map for the tuple to state ID 169 // mapping. This version permits erasing of states. The state tuple T 170 // must have == defined and its default constructor must produce a 171 // tuple that will never be seen. F is the hash function. 172 template <class T, class F> 173 class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> { 174 public: 175 typedef T StateTuple; 176 typedef typename StateTuple::StateId StateId; 177 using ErasableBiTable<StateId, T, F>::FindId; 178 using ErasableBiTable<StateId, T, F>::FindEntry; 179 using ErasableBiTable<StateId, T, F>::Size; 180 using ErasableBiTable<StateId, T, F>::Erase; 181 ErasableStateTable()182 ErasableStateTable() : ErasableBiTable<StateId, T, F>() {} FindState(const StateTuple & tuple)183 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } Tuple(StateId s)184 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 185 }; 186 187 // 188 // COMPOSITION STATE TUPLES AND TABLES 189 // 190 // The composition state table has the form: 191 // 192 // template <class A, class F> 193 // class ComposeStateTable { 194 // public: 195 // typedef A Arc; 196 // typedef F FilterState; 197 // typedef typename A::StateId StateId; 198 // typedef ComposeStateTuple<StateId> StateTuple; 199 // 200 // // Required constructors. Copy constructor does not copy state. 201 // ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2); 202 // ComposeStateTable(const ComposeStateTable<A, F> &table); 203 // // Lookup state ID by tuple. If it doesn't exist, then add it. 204 // StateId FindState(const StateTuple &); 205 // // Lookup state tuple by state ID. 206 // const StateTuple<StateId> &Tuple(StateId) const; 207 // // # of stored tuples. 208 // StateId Size() const; 209 // // Return true if error encountered 210 // bool Error() const; 211 // }; 212 213 // Represents the composition state. 214 template <typename S, typename F> 215 struct ComposeStateTuple { 216 typedef S StateId; 217 typedef F FilterState; 218 ComposeStateTupleComposeStateTuple219 ComposeStateTuple() 220 : state_id1(kNoStateId), state_id2(kNoStateId), 221 filter_state(FilterState::NoState()) {} 222 ComposeStateTupleComposeStateTuple223 ComposeStateTuple(StateId s1, StateId s2, const FilterState &f) 224 : state_id1(s1), state_id2(s2), filter_state(f) {} 225 226 StateId state_id1; // State Id on fst1 227 StateId state_id2; // State Id on fst2 228 FilterState filter_state; // State of composition filter 229 }; 230 231 // Equality of composition state tuples. 232 template <typename S, typename F> 233 inline bool operator==(const ComposeStateTuple<S, F>& x, 234 const ComposeStateTuple<S, F>& y) { 235 if (&x == &y) 236 return true; 237 return x.state_id1 == y.state_id1 && 238 x.state_id2 == y.state_id2 && 239 x.filter_state == y.filter_state; 240 } 241 242 243 // Hashing of composition state tuples. 244 template <typename S, typename F> 245 class ComposeHash { 246 public: operator()247 size_t operator()(const ComposeStateTuple<S, F>& t) const { 248 return t.state_id1 + t.state_id2 * kPrime0 + 249 t.filter_state.Hash() * kPrime1; 250 } 251 private: 252 static const size_t kPrime0; 253 static const size_t kPrime1; 254 }; 255 256 template <typename S, typename F> 257 const size_t ComposeHash<S, F>::kPrime0 = 7853; 258 259 template <typename S, typename F> 260 const size_t ComposeHash<S, F>::kPrime1 = 7867; 261 262 263 // A HashStateTable over composition tuples. 264 template <typename A, 265 typename F, 266 typename H = 267 CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>, 268 ComposeHash<typename A::StateId, F> > > 269 class GenericComposeStateTable : public H { 270 public: 271 typedef A Arc; 272 typedef typename A::StateId StateId; 273 typedef F FilterState; 274 typedef ComposeStateTuple<StateId, F> StateTuple; 275 GenericComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)276 GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {} 277 278 // Reserves space for 'table_size' elements. GenericComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2,size_t table_size)279 GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2, 280 size_t table_size) : H(table_size) {} 281 Error()282 bool Error() const { return false; } 283 284 private: 285 void operator=(const GenericComposeStateTable<A, F> &table); // disallow 286 }; 287 288 289 // Fingerprint for general composition tuples. 290 template <typename S, typename F> 291 class ComposeFingerprint { 292 public: 293 typedef S StateId; 294 typedef F FilterState; 295 typedef ComposeStateTuple<S, F> StateTuple; 296 297 // Required but suboptimal constructor. ComposeFingerprint()298 ComposeFingerprint() : mult1_(8192), mult2_(8192) { 299 LOG(WARNING) << "TupleFingerprint: # of FST states should be provided."; 300 } 301 302 // Constructor is provided the sizes of the input FSTs ComposeFingerprint(StateId nstates1,StateId nstates2)303 ComposeFingerprint(StateId nstates1, StateId nstates2) 304 : mult1_(nstates1), mult2_(nstates1 * nstates2) { } 305 operator()306 size_t operator()(const StateTuple &tuple) { 307 return tuple.state_id1 + tuple.state_id2 * mult1_ + 308 tuple.filter_state.Hash() * mult2_; 309 } 310 311 private: 312 ssize_t mult1_; 313 ssize_t mult2_; 314 }; 315 316 317 // Useful when the first composition state determines the tuple. 318 template <typename S, typename F> 319 class ComposeState1Fingerprint { 320 public: 321 typedef S StateId; 322 typedef F FilterState; 323 typedef ComposeStateTuple<S, F> StateTuple; 324 operator()325 size_t operator()(const StateTuple &tuple) { return tuple.state_id1; } 326 }; 327 328 329 // Useful when the second composition state determines the tuple. 330 template <typename S, typename F> 331 class ComposeState2Fingerprint { 332 public: 333 typedef S StateId; 334 typedef F FilterState; 335 typedef ComposeStateTuple<S, F> StateTuple; 336 operator()337 size_t operator()(const StateTuple &tuple) { return tuple.state_id2; } 338 }; 339 340 341 // A VectorStateTable over composition tuples. This can be used when 342 // the product of number of states in FST1 and FST2 (and the 343 // composition filter state hash) is manageable. If the FSTs are not 344 // expanded Fsts, they will first have their states counted. 345 template <typename A, typename F> 346 class ProductComposeStateTable : public 347 VectorStateTable<ComposeStateTuple<typename A::StateId, F>, 348 ComposeFingerprint<typename A::StateId, F> > { 349 public: 350 typedef A Arc; 351 typedef typename A::StateId StateId; 352 typedef F FilterState; 353 typedef ComposeStateTuple<StateId, F> StateTuple; 354 typedef VectorStateTable<StateTuple, 355 ComposeFingerprint<StateId, F> > StateTable; 356 357 // Reserves space for 'table_size' elements. 358 ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2, 359 size_t table_size = 0) StateTable(new ComposeFingerprint<StateId,F> (CountStates (fst1),CountStates (fst2)),table_size)360 : StateTable(new ComposeFingerprint<StateId, F>(CountStates(fst1), 361 CountStates(fst2)), 362 table_size) {} 363 ProductComposeStateTable(const ProductComposeStateTable<A,F> & table)364 ProductComposeStateTable(const ProductComposeStateTable<A, F> &table) 365 : StateTable(new ComposeFingerprint<StateId, F>(table.Fingerprint())) {} 366 Error()367 bool Error() const { return false; } 368 369 private: 370 void operator=(const ProductComposeStateTable<A, F> &table); // disallow 371 }; 372 373 // A VectorStateTable over composition tuples. This can be used when 374 // FST1 is a string (satisfies kStringProperties) and FST2 is 375 // epsilon-free and deterministic. It should be used with a 376 // composition filter that creates at most one filter state per tuple 377 // under these conditions (e.g. SequenceComposeFilter or 378 // MatchComposeFilter). 379 template <typename A, typename F> 380 class StringDetComposeStateTable : public 381 VectorStateTable<ComposeStateTuple<typename A::StateId, F>, 382 ComposeState1Fingerprint<typename A::StateId, F> > { 383 public: 384 typedef A Arc; 385 typedef typename A::StateId StateId; 386 typedef F FilterState; 387 typedef ComposeStateTuple<StateId, F> StateTuple; 388 typedef VectorStateTable<StateTuple, 389 ComposeState1Fingerprint<StateId, F> > StateTable; 390 StringDetComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)391 StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) 392 : error_(false) { 393 uint64 props1 = kString; 394 uint64 props2 = kIDeterministic | kNoIEpsilons; 395 if (fst1.Properties(props1, true) != props1 || 396 fst2.Properties(props2, true) != props2) { 397 FSTERROR() << "StringDetComposeStateTable: fst1 not a string or" 398 << " fst2 not input deterministic and epsilon-free"; 399 error_ = true; 400 } 401 } 402 StringDetComposeStateTable(const StringDetComposeStateTable<A,F> & table)403 StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table) 404 : StateTable(table), error_(table.error_) {} 405 Error()406 bool Error() const { return error_; } 407 408 private: 409 bool error_; 410 411 void operator=(const StringDetComposeStateTable<A, F> &table); // disallow 412 }; 413 414 415 // A VectorStateTable over composition tuples. This can be used when 416 // FST2 is a string (satisfies kStringProperties) and FST1 is 417 // epsilon-free and deterministic. It should be used with a 418 // composition filter that creates at most one filter state per tuple 419 // under these conditions (e.g. SequenceComposeFilter or 420 // MatchComposeFilter). 421 template <typename A, typename F> 422 class DetStringComposeStateTable : public 423 VectorStateTable<ComposeStateTuple<typename A::StateId, F>, 424 ComposeState2Fingerprint<typename A::StateId, F> > { 425 public: 426 typedef A Arc; 427 typedef typename A::StateId StateId; 428 typedef F FilterState; 429 typedef ComposeStateTuple<StateId, F> StateTuple; 430 typedef VectorStateTable<StateTuple, 431 ComposeState2Fingerprint<StateId, F> > StateTable; 432 DetStringComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)433 DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) 434 :error_(false) { 435 uint64 props1 = kODeterministic | kNoOEpsilons; 436 uint64 props2 = kString; 437 if (fst1.Properties(props1, true) != props1 || 438 fst2.Properties(props2, true) != props2) { 439 FSTERROR() << "StringDetComposeStateTable: fst2 not a string or" 440 << " fst1 not output deterministic and epsilon-free"; 441 error_ = true; 442 } 443 } 444 DetStringComposeStateTable(const DetStringComposeStateTable<A,F> & table)445 DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table) 446 : StateTable(table), error_(table.error_) {} 447 Error()448 bool Error() const { return error_; } 449 450 private: 451 bool error_; 452 453 void operator=(const DetStringComposeStateTable<A, F> &table); // disallow 454 }; 455 456 457 // An ErasableStateTable over composition tuples. The Erase(StateId) method 458 // can be called if the user either is sure that composition will never return 459 // to that tuple or doesn't care that if it does, it is assigned a new 460 // state ID. 461 template <typename A, typename F> 462 class ErasableComposeStateTable : public 463 ErasableStateTable<ComposeStateTuple<typename A::StateId, F>, 464 ComposeHash<typename A::StateId, F> > { 465 public: 466 typedef A Arc; 467 typedef typename A::StateId StateId; 468 typedef F FilterState; 469 typedef ComposeStateTuple<StateId, F> StateTuple; 470 ErasableComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)471 ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {} 472 Error()473 bool Error() const { return false; } 474 475 private: 476 void operator=(const ErasableComposeStateTable<A, F> &table); // disallow 477 }; 478 479 } // namespace fst 480 481 #endif // FST_LIB_STATE_TABLE_H__ 482