1 // equivalent.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: wojciech@google.com (Wojciech Skut) 17 // 18 // \file Functions and classes to determine the equivalence of two 19 // FSTs. 20 21 #ifndef FST_LIB_EQUIVALENT_H__ 22 #define FST_LIB_EQUIVALENT_H__ 23 24 #include <algorithm> 25 #include <deque> 26 using std::deque; 27 #include <tr1/unordered_map> 28 using std::tr1::unordered_map; 29 using std::tr1::unordered_multimap; 30 #include <utility> 31 using std::pair; using std::make_pair; 32 #include <vector> 33 using std::vector; 34 35 #include <fst/encode.h> 36 #include <fst/push.h> 37 #include <fst/union-find.h> 38 #include <fst/vector-fst.h> 39 40 41 namespace fst { 42 43 // Traits-like struct holding utility functions/typedefs/constants for 44 // the equivalence algorithm. 45 // 46 // Encoding device: in order to make the statesets of the two acceptors 47 // disjoint, we map Arc::StateId on the type MappedId. The states of 48 // the first acceptor are mapped on odd numbers (s -> 2s + 1), and 49 // those of the second one on even numbers (s -> 2s + 2). The number 0 50 // is reserved for an implicit (non-final) 'dead state' (required for 51 // the correct treatment of non-coaccessible states; kNoStateId is 52 // mapped to kDeadState for both acceptors). The union-find algorithm 53 // operates on the mapped IDs. 54 template <class Arc> 55 struct EquivalenceUtil { 56 typedef typename Arc::StateId StateId; 57 typedef typename Arc::Weight Weight; 58 typedef StateId MappedId; // ID for an equivalence class. 59 60 // MappedId for an implicit dead state. 61 static const MappedId kDeadState = 0; 62 63 // MappedId for lookup failure. 64 static const MappedId kInvalidId = -1; 65 66 // Maps state ID to the representative of the corresponding 67 // equivalence class. The parameter 'which_fst' takes the values 1 68 // and 2, identifying the input FST. MapStateEquivalenceUtil69 static MappedId MapState(StateId s, int32 which_fst) { 70 return 71 (kNoStateId == s) 72 ? 73 kDeadState 74 : 75 (static_cast<MappedId>(s) << 1) + which_fst; 76 } 77 // Maps set ID to State ID. UnMapStateEquivalenceUtil78 static StateId UnMapState(MappedId id) { 79 return static_cast<StateId>((--id) >> 1); 80 } 81 // Convenience function: checks if state with MappedId 's' is final 82 // in acceptor 'fa'. IsFinalEquivalenceUtil83 static bool IsFinal(const Fst<Arc> &fa, MappedId s) { 84 return 85 (kDeadState == s) ? 86 false : (fa.Final(UnMapState(s)) != Weight::Zero()); 87 } 88 // Convenience function: returns the representative of 'id' in 'sets', 89 // creating a new set if needed. FindSetEquivalenceUtil90 static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) { 91 MappedId repr = sets->FindSet(id); 92 if (repr != kInvalidId) { 93 return repr; 94 } else { 95 sets->MakeSet(id); 96 return id; 97 } 98 } 99 }; 100 101 template <class Arc> const 102 typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kDeadState; 103 104 template <class Arc> const 105 typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kInvalidId; 106 107 108 // Equivalence checking algorithm: determines if the two FSTs 109 // <code>fst1</code> and <code>fst2</code> are equivalent. The input 110 // FSTs must be deterministic input-side epsilon-free acceptors, 111 // unweighted or with weights over a left semiring. Two acceptors are 112 // considered equivalent if they accept exactly the same set of 113 // strings (with the same weights). 114 // 115 // The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and 116 // Analysis of Computer Programs") successively constructs sets of 117 // states that can be reached by the same prefixes, starting with a 118 // set containing the start states of both acceptors. A disjoint tree 119 // forest (the union-find algorithm) is used to represent the sets of 120 // states. The algorithm returns 'false' if one of the constructed 121 // sets contains both final and non-final states. Returns optional error 122 // value (when FLAGS_error_fatal = false). 123 // 124 // Complexity: quasi-linear, i.e. O(n G(n)), where 125 // n = |S1| + |S2| is the number of states in both acceptors 126 // G(n) is a very slowly growing function that can be approximated 127 // by 4 by all practical purposes. 128 // 129 template <class Arc> 130 bool Equivalent(const Fst<Arc> &fst1, 131 const Fst<Arc> &fst2, 132 double delta = kDelta, bool *error = 0) { 133 typedef typename Arc::Weight Weight; 134 if (error) *error = false; 135 136 // Check that the symbol table are compatible 137 if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) || 138 !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) { 139 FSTERROR() << "Equivalent: input/output symbol tables of 1st argument " 140 << "do not match input/output symbol tables of 2nd argument"; 141 if (error) *error = true; 142 return false; 143 } 144 // Check properties first: 145 uint64 props = kNoEpsilons | kIDeterministic | kAcceptor; 146 if (fst1.Properties(props, true) != props) { 147 FSTERROR() << "Equivalent: first argument not an" 148 << " epsilon-free deterministic acceptor"; 149 if (error) *error = true; 150 return false; 151 } 152 if (fst2.Properties(props, true) != props) { 153 FSTERROR() << "Equivalent: second argument not an" 154 << " epsilon-free deterministic acceptor"; 155 if (error) *error = true; 156 return false; 157 } 158 159 if ((fst1.Properties(kUnweighted , true) != kUnweighted) 160 || (fst2.Properties(kUnweighted , true) != kUnweighted)) { 161 VectorFst<Arc> efst1(fst1); 162 VectorFst<Arc> efst2(fst2); 163 Push(&efst1, REWEIGHT_TO_INITIAL, delta); 164 Push(&efst2, REWEIGHT_TO_INITIAL, delta); 165 ArcMap(&efst1, QuantizeMapper<Arc>(delta)); 166 ArcMap(&efst2, QuantizeMapper<Arc>(delta)); 167 EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE); 168 ArcMap(&efst1, &mapper); 169 ArcMap(&efst2, &mapper); 170 return Equivalent(efst1, efst2); 171 } 172 173 // Convenience typedefs: 174 typedef typename Arc::StateId StateId; 175 typedef EquivalenceUtil<Arc> Util; 176 typedef typename Util::MappedId MappedId; 177 enum { FST1 = 1, FST2 = 2 }; // Required by Util::MapState(...) 178 179 MappedId s1 = Util::MapState(fst1.Start(), FST1); 180 MappedId s2 = Util::MapState(fst2.Start(), FST2); 181 182 // The union-find structure. 183 UnionFind<MappedId> eq_classes(1000, Util::kInvalidId); 184 185 // Initialize the union-find structure. 186 eq_classes.MakeSet(s1); 187 eq_classes.MakeSet(s2); 188 189 // Data structure for the (partial) acceptor transition function of 190 // fst1 and fst2: input labels mapped to pairs of MappedId's 191 // representing destination states of the corresponding arcs in fst1 192 // and fst2, respectively. 193 typedef 194 unordered_map<typename Arc::Label, pair<MappedId, MappedId> > 195 Label2StatePairMap; 196 197 Label2StatePairMap arc_pairs; 198 199 // Pairs of MappedId's to be processed, organized in a queue. 200 deque<pair<MappedId, MappedId> > q; 201 202 bool ret = true; 203 // Early return if the start states differ w.r.t. being final. 204 if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) { 205 ret = false; 206 } 207 208 // Main loop: explores the two acceptors in a breadth-first manner, 209 // updating the equivalence relation on the statesets. Loop 210 // invariant: each block of states contains either final states only 211 // or non-final states only. 212 for (q.push_back(make_pair(s1, s2)); ret && !q.empty(); q.pop_front()) { 213 s1 = q.front().first; 214 s2 = q.front().second; 215 216 // Representatives of the equivalence classes of s1/s2. 217 MappedId rep1 = Util::FindSet(&eq_classes, s1); 218 MappedId rep2 = Util::FindSet(&eq_classes, s2); 219 220 if (rep1 != rep2) { 221 eq_classes.Union(rep1, rep2); 222 arc_pairs.clear(); 223 224 // Copy outgoing arcs starting at s1 into the hashtable. 225 if (Util::kDeadState != s1) { 226 ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1)); 227 for (; !arc_iter.Done(); arc_iter.Next()) { 228 const Arc &arc = arc_iter.Value(); 229 if (arc.weight != Weight::Zero()) { // Zero-weight arcs 230 // are treated as 231 // non-exisitent. 232 arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1); 233 } 234 } 235 } 236 // Copy outgoing arcs starting at s2 into the hashtable. 237 if (Util::kDeadState != s2) { 238 ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2)); 239 for (; !arc_iter.Done(); arc_iter.Next()) { 240 const Arc &arc = arc_iter.Value(); 241 if (arc.weight != Weight::Zero()) { // Zero-weight arcs 242 // are treated as 243 // non-existent. 244 arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2); 245 } 246 } 247 } 248 // Iterate through the hashtable and process pairs of target 249 // states. 250 for (typename Label2StatePairMap::const_iterator 251 arc_iter = arc_pairs.begin(); 252 arc_iter != arc_pairs.end(); 253 ++arc_iter) { 254 const pair<MappedId, MappedId> &p = arc_iter->second; 255 if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) { 256 // Detected inconsistency: return false. 257 ret = false; 258 break; 259 } 260 q.push_back(p); 261 } 262 } 263 } 264 265 if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) { 266 if (error) *error = true; 267 return false; 268 } 269 270 return ret; 271 } 272 273 } // namespace fst 274 275 #endif // FST_LIB_EQUIVALENT_H__ 276