1 // compose.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Class to compute the composition of two FSTs
20 
21 #ifndef FST_LIB_COMPOSE_H__
22 #define FST_LIB_COMPOSE_H__
23 
24 #include <algorithm>
25 #include <string>
26 #include <vector>
27 using std::vector;
28 
29 #include <fst/cache.h>
30 #include <fst/compose-filter.h>
31 #include <fst/lookahead-filter.h>
32 #include <fst/matcher.h>
33 #include <fst/state-table.h>
34 #include <fst/test-properties.h>
35 
36 
37 namespace fst {
38 
39 // Delayed composition options templated on the arc type, the matcher,
40 // the composition filter, and the composition state table.  By
41 // default, the matchers, filter, and state table are constructed by
42 // composition. If set below, the user can instead pass in these
43 // objects; in that case, ComposeFst takes their ownership. This
44 // version controls composition implemented between generic Fst<Arc>
45 // types and a shared matcher type M for Fst<Arc>. This should be
46 // adequate for most applications, giving a reasonable tradeoff
47 // between efficiency and code sharing (but see ComposeFstImplOptions).
48 template <class A,
49           class M = Matcher<Fst<A> >,
50           class F = SequenceComposeFilter<M>,
51           class T = GenericComposeStateTable<A, typename F::FilterState> >
52 struct ComposeFstOptions : public CacheOptions {
53   M *matcher1;      // FST1 matcher (see matcher.h)
54   M *matcher2;      // FST2 matcher
55   F *filter;        // Composition filter (see compose-filter.h)
56   T *state_table;   // Composition state table (see compose-state-table.h)
57 
58   explicit ComposeFstOptions(const CacheOptions &opts,
59                              M *mat1 = 0, M *mat2 = 0,
60                              F *filt = 0, T *sttable= 0)
CacheOptionsComposeFstOptions61       : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
62         filter(filt), state_table(sttable) {}
63 
ComposeFstOptionsComposeFstOptions64   ComposeFstOptions() : matcher1(0), matcher2(0), filter(0), state_table(0) {}
65 };
66 
67 
68 // Delayed composition options templated on the two matcher types, the
69 // composition filter, and the composition state table.  By default,
70 // the matchers, filter, and state table are constructed by
71 // composition. If set below, the user can instead pass in these
72 // objects; in that case, ComposeFst takes their ownership. This
73 // version controls composition implemented using arbitrary matchers
74 // (of the same Arc type but otherwise arbitrary Fst type). The user
75 // must ensure the matchers are compatible. These options permit the
76 // most efficient use, but shares the least code. This is for advanced
77 // use only in the most demanding or specialized applications that can
78 // benefit from it (o.w. prefer ComposeFstOptions).
79 template <class M1, class M2,
80           class F = SequenceComposeFilter<M1, M2>,
81           class T = GenericComposeStateTable<typename M1::Arc,
82                                              typename F::FilterState> >
83 struct ComposeFstImplOptions : public CacheOptions {
84   M1 *matcher1;     // FST1 matcher (see matcher.h)
85   M2 *matcher2;     // FST2 matcher
86   F *filter;        // Composition filter (see compose-filter.h)
87   T *state_table;   // Composition state table (see compose-state-table.h)
88 
89   explicit ComposeFstImplOptions(const CacheOptions &opts,
90                                  M1 *mat1 = 0, M2 *mat2 = 0,
91                                  F *filt = 0, T *sttable= 0)
CacheOptionsComposeFstImplOptions92       : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
93         filter(filt), state_table(sttable) {}
94 
ComposeFstImplOptionsComposeFstImplOptions95   ComposeFstImplOptions()
96   : matcher1(0), matcher2(0), filter(0), state_table(0) {}
97 };
98 
99 
100 // Implementation of delayed composition. This base class is
101 // common to the variants with different matchers, composition filters
102 // and state tables.
103 template <class A>
104 class ComposeFstImplBase : public CacheImpl<A> {
105  public:
106   using FstImpl<A>::SetType;
107   using FstImpl<A>::SetProperties;
108   using FstImpl<A>::Properties;
109   using FstImpl<A>::SetInputSymbols;
110   using FstImpl<A>::SetOutputSymbols;
111 
112   using CacheBaseImpl< CacheState<A> >::HasStart;
113   using CacheBaseImpl< CacheState<A> >::HasFinal;
114   using CacheBaseImpl< CacheState<A> >::HasArcs;
115   using CacheBaseImpl< CacheState<A> >::SetFinal;
116   using CacheBaseImpl< CacheState<A> >::SetStart;
117 
118   typedef typename A::Label Label;
119   typedef typename A::Weight Weight;
120   typedef typename A::StateId StateId;
121   typedef CacheState<A> State;
122 
ComposeFstImplBase(const Fst<A> & fst1,const Fst<A> & fst2,const CacheOptions & opts)123   ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2,
124                      const CacheOptions &opts)
125       : CacheImpl<A>(opts) {
126     VLOG(2) << "ComposeFst(" << this << "): Begin";
127     SetType("compose");
128 
129     if (!CompatSymbols(fst2.InputSymbols(), fst1.OutputSymbols())) {
130       FSTERROR() << "ComposeFst: output symbol table of 1st argument "
131                  << "does not match input symbol table of 2nd argument";
132       SetProperties(kError, kError);
133     }
134 
135     SetInputSymbols(fst1.InputSymbols());
136     SetOutputSymbols(fst2.OutputSymbols());
137   }
138 
ComposeFstImplBase(const ComposeFstImplBase<A> & impl)139   ComposeFstImplBase(const ComposeFstImplBase<A> &impl)
140       : CacheImpl<A>(impl, true) {
141     SetProperties(impl.Properties(), kCopyProperties);
142     SetInputSymbols(impl.InputSymbols());
143     SetOutputSymbols(impl.OutputSymbols());
144   }
145 
146   virtual ComposeFstImplBase<A> *Copy() = 0;
147 
~ComposeFstImplBase()148   virtual ~ComposeFstImplBase() {}
149 
Start()150   StateId Start() {
151     if (!HasStart()) {
152       StateId start = ComputeStart();
153       if (start != kNoStateId) {
154         SetStart(start);
155       }
156     }
157     return CacheImpl<A>::Start();
158   }
159 
Final(StateId s)160   Weight Final(StateId s) {
161     if (!HasFinal(s)) {
162       Weight final = ComputeFinal(s);
163       SetFinal(s, final);
164     }
165     return CacheImpl<A>::Final(s);
166   }
167 
168   virtual void Expand(StateId s) = 0;
169 
NumArcs(StateId s)170   size_t NumArcs(StateId s) {
171     if (!HasArcs(s))
172       Expand(s);
173     return CacheImpl<A>::NumArcs(s);
174   }
175 
NumInputEpsilons(StateId s)176   size_t NumInputEpsilons(StateId s) {
177     if (!HasArcs(s))
178       Expand(s);
179     return CacheImpl<A>::NumInputEpsilons(s);
180   }
181 
NumOutputEpsilons(StateId s)182   size_t NumOutputEpsilons(StateId s) {
183     if (!HasArcs(s))
184       Expand(s);
185     return CacheImpl<A>::NumOutputEpsilons(s);
186   }
187 
InitArcIterator(StateId s,ArcIteratorData<A> * data)188   void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
189     if (!HasArcs(s))
190       Expand(s);
191     CacheImpl<A>::InitArcIterator(s, data);
192   }
193 
194  protected:
195   virtual StateId ComputeStart() = 0;
196   virtual Weight ComputeFinal(StateId s) = 0;
197 };
198 
199 
200 // Implementaion of delayed composition templated on the matchers (see
201 // matcher.h), composition filter (see compose-filter-inl.h) and
202 // the composition state table (see compose-state-table.h).
203 template <class M1, class M2, class F, class T>
204 class ComposeFstImpl : public ComposeFstImplBase<typename M1::Arc> {
205   typedef typename M1::FST FST1;
206   typedef typename M2::FST FST2;
207   typedef typename M1::Arc Arc;
208   typedef typename Arc::StateId StateId;
209   typedef typename Arc::Label Label;
210   typedef typename Arc::Weight Weight;
211   typedef typename F::FilterState FilterState;
212   typedef typename F::Matcher1 Matcher1;
213   typedef typename F::Matcher2 Matcher2;
214 
215   using CacheBaseImpl<CacheState<Arc> >::SetArcs;
216   using FstImpl<Arc>::SetType;
217   using FstImpl<Arc>::SetProperties;
218 
219   typedef ComposeStateTuple<StateId, FilterState> StateTuple;
220 
221  public:
222   ComposeFstImpl(const FST1 &fst1, const FST2 &fst2,
223                  const ComposeFstImplOptions<M1, M2, F, T> &opts);
224 
ComposeFstImpl(const ComposeFstImpl<M1,M2,F,T> & impl)225   ComposeFstImpl(const ComposeFstImpl<M1, M2, F, T> &impl)
226       : ComposeFstImplBase<Arc>(impl),
227         filter_(new F(*impl.filter_, true)),
228         matcher1_(filter_->GetMatcher1()),
229         matcher2_(filter_->GetMatcher2()),
230         fst1_(matcher1_->GetFst()),
231         fst2_(matcher2_->GetFst()),
232         state_table_(new T(*impl.state_table_)),
233         match_type_(impl.match_type_) {}
234 
~ComposeFstImpl()235   ~ComposeFstImpl() {
236     VLOG(2) << "ComposeFst(" << this
237             << "): End: # of visited states: " << state_table_->Size();
238 
239     delete filter_;
240     delete state_table_;
241   }
242 
Copy()243   virtual ComposeFstImpl<M1, M2, F, T> *Copy() {
244     return new ComposeFstImpl<M1, M2, F, T>(*this);
245   }
246 
Properties()247   uint64 Properties() const { return Properties(kFstProperties); }
248 
249   // Set error if found; return FST impl properties.
Properties(uint64 mask)250   uint64 Properties(uint64 mask) const {
251     if ((mask & kError) &&
252         (fst1_.Properties(kError, false) ||
253          fst2_.Properties(kError, false) ||
254          (matcher1_->Properties(0) & kError) ||
255          (matcher2_->Properties(0) & kError) |
256          (filter_->Properties(0) & kError) ||
257          state_table_->Error())) {
258       SetProperties(kError, kError);
259     }
260     return FstImpl<Arc>::Properties(mask);
261   }
262 
263   // Arranges it so that the first arg to OrderedExpand is the Fst
264   // that will be matched on.
Expand(StateId s)265   void Expand(StateId s) {
266     const StateTuple &tuple = state_table_->Tuple(s);
267     StateId s1 = tuple.state_id1;
268     StateId s2 = tuple.state_id2;
269     filter_->SetState(s1, s2, tuple.filter_state);
270     if (match_type_ == MATCH_OUTPUT ||
271         (match_type_ == MATCH_BOTH &&
272          internal::NumArcs(fst1_, s1) > internal::NumArcs(fst2_, s2)))
273       OrderedExpand(s, fst1_, s1, fst2_, s2, matcher1_, false);
274     else
275       OrderedExpand(s, fst2_, s2, fst1_, s1, matcher2_, true);
276   }
277 
GetFst1()278   const FST1 &GetFst1() { return fst1_; }
GetFst2()279   const FST2 &GetFst2() { return fst2_; }
GetMatcher1()280   M1 *GetMatcher1() { return matcher1_; }
GetMatcher2()281   M2 *GetMatcher2() { return matcher2_; }
GetFilter()282   F *GetFilter() { return filter_; }
GetStateTable()283   T *GetStateTable() { return state_table_; }
284 
285  private:
286   // This does that actual matching of labels in the composition. The
287   // arguments are ordered so matching is called on state 'sa' of
288   // 'fsta' for each arc leaving state 'sb' of 'fstb'. The 'match_input' arg
289   // determines whether the input or output label of arcs at 'sb' is
290   // the one to match on.
291   template <class FST, class Matcher>
OrderedExpand(StateId s,const Fst<Arc> &,StateId sa,const FST & fstb,StateId sb,Matcher * matchera,bool match_input)292   void OrderedExpand(StateId s, const Fst<Arc> &, StateId sa,
293                      const FST &fstb, StateId sb,
294                      Matcher *matchera,  bool match_input) {
295     matchera->SetState(sa);
296 
297     // First process non-consuming symbols (e.g., epsilons) on FSTA.
298     Arc loop(match_input ? 0 : kNoLabel, match_input ? kNoLabel : 0,
299            Weight::One(), sb);
300     MatchArc(s, matchera, loop, match_input);
301 
302     // Then process matches on FSTB.
303     for (ArcIterator<FST> iterb(fstb, sb); !iterb.Done(); iterb.Next())
304       MatchArc(s, matchera, iterb.Value(), match_input);
305 
306     SetArcs(s);
307   }
308 
309   // Matches a single transition from 'fstb' against 'fata' at 's'.
310   template <class Matcher>
MatchArc(StateId s,Matcher * matchera,const Arc & arc,bool match_input)311   void MatchArc(StateId s, Matcher *matchera,
312                 const Arc &arc, bool match_input) {
313     if (matchera->Find(match_input ? arc.olabel : arc.ilabel)) {
314       for (; !matchera->Done(); matchera->Next()) {
315         Arc arca = matchera->Value();
316         Arc arcb = arc;
317         if (match_input) {
318           const FilterState &f = filter_->FilterArc(&arcb, &arca);
319           if (f != FilterState::NoState())
320             AddArc(s, arcb, arca, f);
321         } else {
322           const FilterState &f = filter_->FilterArc(&arca, &arcb);
323           if (f != FilterState::NoState())
324             AddArc(s, arca, arcb, f);
325         }
326       }
327     }
328   }
329 
330   // Add a matching transition at 's'.
AddArc(StateId s,const Arc & arc1,const Arc & arc2,const FilterState & f)331    void AddArc(StateId s, const Arc &arc1, const Arc &arc2,
332                const FilterState &f) {
333     StateTuple tuple(arc1.nextstate, arc2.nextstate, f);
334     Arc oarc(arc1.ilabel, arc2.olabel, Times(arc1.weight, arc2.weight),
335            state_table_->FindState(tuple));
336     CacheImpl<Arc>::PushArc(s, oarc);
337   }
338 
ComputeStart()339   StateId ComputeStart() {
340     StateId s1 = fst1_.Start();
341     if (s1 == kNoStateId)
342       return kNoStateId;
343 
344     StateId s2 = fst2_.Start();
345     if (s2 == kNoStateId)
346       return kNoStateId;
347 
348     const FilterState &f = filter_->Start();
349     StateTuple tuple(s1, s2, f);
350     return state_table_->FindState(tuple);
351   }
352 
ComputeFinal(StateId s)353   Weight ComputeFinal(StateId s) {
354     const StateTuple &tuple = state_table_->Tuple(s);
355     StateId s1 = tuple.state_id1;
356     Weight final1 = internal::Final(fst1_, s1);
357     if (final1 == Weight::Zero())
358       return final1;
359 
360     StateId s2 = tuple.state_id2;
361     Weight final2 = internal::Final(fst2_, s2);
362     if (final2 == Weight::Zero())
363       return final2;
364 
365     filter_->SetState(s1, s2, tuple.filter_state);
366     filter_->FilterFinal(&final1, &final2);
367     return Times(final1, final2);
368   }
369 
370   // Identifies and verifies the capabilities of the matcher to be used for
371   // composition.
372   void SetMatchType();
373 
374   F *filter_;
375   Matcher1 *matcher1_;
376   Matcher2 *matcher2_;
377   const FST1 &fst1_;
378   const FST2 &fst2_;
379   T *state_table_;
380 
381   MatchType match_type_;
382 
383   void operator=(const ComposeFstImpl<M1, M2, F, T> &);  // disallow
384 };
385 
386 template <class M1, class M2, class F, class T> inline
ComposeFstImpl(const FST1 & fst1,const FST2 & fst2,const ComposeFstImplOptions<M1,M2,F,T> & opts)387 ComposeFstImpl<M1, M2, F, T>::ComposeFstImpl(
388     const FST1 &fst1, const FST2 &fst2,
389     const ComposeFstImplOptions<M1, M2, F, T> &opts)
390     : ComposeFstImplBase<Arc>(fst1, fst2, opts),
391       filter_(opts.filter ? opts.filter :
392               new F(fst1, fst2, opts.matcher1, opts.matcher2)),
393       matcher1_(filter_->GetMatcher1()),
394       matcher2_(filter_->GetMatcher2()),
395       fst1_(matcher1_->GetFst()),
396       fst2_(matcher2_->GetFst()),
397       state_table_(opts.state_table ? opts.state_table :
398                    new T(fst1_, fst2_)) {
399   SetMatchType();
400   if (match_type_ == MATCH_NONE)
401     SetProperties(kError, kError);
402   VLOG(2) << "ComposeFst(" << this << "): Match type: "
403           << (match_type_ == MATCH_OUTPUT ? "output" :
404               (match_type_ == MATCH_INPUT ? "input" :
405                (match_type_ == MATCH_BOTH ? "both" :
406                 (match_type_ == MATCH_NONE ? "none" : "unknown"))));
407 
408   uint64 fprops1 = fst1.Properties(kFstProperties, false);
409   uint64 fprops2 = fst2.Properties(kFstProperties, false);
410   uint64 mprops1 = matcher1_->Properties(fprops1);
411   uint64 mprops2 = matcher2_->Properties(fprops2);
412   uint64 cprops = ComposeProperties(mprops1, mprops2);
413   SetProperties(filter_->Properties(cprops), kCopyProperties);
414   if (state_table_->Error()) SetProperties(kError, kError);
415   VLOG(2) << "ComposeFst(" << this << "): Initialized";
416 }
417 
418 template <class M1, class M2, class F, class T>
SetMatchType()419 void ComposeFstImpl<M1, M2, F, T>::SetMatchType() {
420   MatchType type1 = matcher1_->Type(false);
421   MatchType type2 = matcher2_->Type(false);
422   uint32 flags1 = matcher1_->Flags();
423   uint32 flags2 = matcher2_->Flags();
424   if (flags1 & flags2 & kRequireMatch) {
425     FSTERROR() << "ComposeFst: only one argument can require matching.";
426     match_type_ = MATCH_NONE;
427   } else if (flags1 & kRequireMatch) {
428     if (matcher1_->Type(true) != MATCH_OUTPUT) {
429       FSTERROR() << "ComposeFst: 1st argument requires matching but cannot.";
430       match_type_ = MATCH_NONE;
431     }
432     match_type_ = MATCH_OUTPUT;
433   } else if (flags2 & kRequireMatch) {
434     if (matcher2_->Type(true) != MATCH_INPUT) {
435       FSTERROR() << "ComposeFst: 2nd argument requires matching but cannot.";
436       match_type_ = MATCH_NONE;
437     }
438     match_type_ = MATCH_INPUT;
439   } else if (flags1 & flags2 & kPreferMatch &&
440              type1 == MATCH_OUTPUT && type2  == MATCH_INPUT) {
441     match_type_ = MATCH_BOTH;
442   } else if (flags1 & kPreferMatch && type1 == MATCH_OUTPUT) {
443     match_type_ = MATCH_OUTPUT;
444   } else if  (flags2 & kPreferMatch && type2 == MATCH_INPUT) {
445     match_type_ = MATCH_INPUT;
446   } else if (type1 == MATCH_OUTPUT && type2  == MATCH_INPUT) {
447     match_type_ = MATCH_BOTH;
448   } else if (type1 == MATCH_OUTPUT) {
449     match_type_ = MATCH_OUTPUT;
450   } else if (type2 == MATCH_INPUT) {
451     match_type_ = MATCH_INPUT;
452   } else if (flags1 & kPreferMatch && matcher1_->Type(true) == MATCH_OUTPUT) {
453     match_type_ = MATCH_OUTPUT;
454   } else if  (flags2 & kPreferMatch && matcher2_->Type(true) == MATCH_INPUT) {
455     match_type_ = MATCH_INPUT;
456   } else if (matcher1_->Type(true) == MATCH_OUTPUT) {
457     match_type_ = MATCH_OUTPUT;
458   } else if (matcher2_->Type(true) == MATCH_INPUT) {
459     match_type_ = MATCH_INPUT;
460   } else {
461     FSTERROR() << "ComposeFst: 1st argument cannot match on output labels "
462                << "and 2nd argument cannot match on input labels (sort?).";
463     match_type_ = MATCH_NONE;
464   }
465 }
466 
467 
468 // Computes the composition of two transducers. This version is a
469 // delayed Fst. If FST1 transduces string x to y with weight a and FST2
470 // transduces y to z with weight b, then their composition transduces
471 // string x to z with weight Times(x, z).
472 //
473 // The output labels of the first transducer or the input labels of
474 // the second transducer must be sorted (with the default matcher).
475 // The weights need to form a commutative semiring (valid for
476 // TropicalWeight and LogWeight).
477 //
478 // Complexity:
479 // Assuming the first FST is unsorted and the second is sorted:
480 // - Time: O(v1 v2 d1 (log d2 + m2)),
481 // - Space: O(v1 v2)
482 // where vi = # of states visited, di = maximum out-degree, and mi the
483 // maximum multiplicity of the states visited for the ith
484 // FST. Constant time and space to visit an input state or arc is
485 // assumed and exclusive of caching.
486 //
487 // Caveats:
488 // - ComposeFst does not trim its output (since it is a delayed operation).
489 // - The efficiency of composition can be strongly affected by several factors:
490 //   - the choice of which tnansducer is sorted - prefer sorting the FST
491 //     that has the greater average out-degree.
492 //   - the amount of non-determinism
493 //   - the presence and location of epsilon transitions - avoid epsilon
494 //     transitions on the output side of the first transducer or
495 //     the input side of the second transducer or prefer placing
496 //     them later in a path since they delay matching and can
497 //     introduce non-coaccessible states and transitions.
498 //
499 // This class attaches interface to implementation and handles
500 // reference counting, delegating most methods to ImplToFst.
501 template <class A>
502 class ComposeFst : public ImplToFst< ComposeFstImplBase<A> > {
503  public:
504   friend class ArcIterator< ComposeFst<A> >;
505   friend class StateIterator< ComposeFst<A> >;
506 
507   typedef A Arc;
508   typedef typename A::Weight Weight;
509   typedef typename A::StateId StateId;
510   typedef CacheState<A> State;
511   typedef ComposeFstImplBase<A> Impl;
512 
513   using ImplToFst<Impl>::SetImpl;
514 
515   // Compose specifying only caching options.
516   ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
517              const CacheOptions &opts = CacheOptions())
CreateBase(fst1,fst2,opts)518       : ImplToFst<Impl>(CreateBase(fst1, fst2, opts)) {}
519 
520   // Compose specifying one shared matcher type M.  Requires input
521   // Fsts and matcher FST type (M::FST) be Fst<A>. Recommended for
522   // best code-sharing and matcher compatiblity.
523   template <class M, class F, class T>
ComposeFst(const Fst<A> & fst1,const Fst<A> & fst2,const ComposeFstOptions<A,M,F,T> & opts)524   ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
525              const ComposeFstOptions<A, M, F, T> &opts)
526       : ImplToFst<Impl>(CreateBase1(fst1, fst2, opts)) {}
527 
528   // Compose specifying two matcher types M1 and M2.  Requires input
529   // Fsts (of the same Arc type but o.w. arbitrary) match the
530   // corresponding matcher FST types (M1::FST, M2::FST). Recommended
531   // only for advanced use in demanding or specialized applications
532   // due to potential code bloat and matcher incompatibilities.
533   template <class M1, class M2, class F, class T>
ComposeFst(const typename M1::FST & fst1,const typename M2::FST & fst2,const ComposeFstImplOptions<M1,M2,F,T> & opts)534   ComposeFst(const typename M1::FST &fst1, const typename M2::FST &fst2,
535              const ComposeFstImplOptions<M1, M2, F, T> &opts)
536       : ImplToFst<Impl>(CreateBase2(fst1, fst2, opts)) {}
537 
538   // See Fst<>::Copy() for doc.
539   ComposeFst(const ComposeFst<A> &fst, bool safe = false) {
540     if (safe)
541       SetImpl(fst.GetImpl()->Copy());
542     else
543       SetImpl(fst.GetImpl(), false);
544   }
545 
546   // Get a copy of this ComposeFst. See Fst<>::Copy() for further doc.
547   virtual ComposeFst<A> *Copy(bool safe = false) const {
548     return new ComposeFst<A>(*this, safe);
549   }
550 
551   virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
552 
InitArcIterator(StateId s,ArcIteratorData<A> * data)553   virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
554     GetImpl()->InitArcIterator(s, data);
555   }
556 
557  protected:
ComposeFst()558   ComposeFst() {}
559 
560   // Create compose implementation specifying two matcher types.
561   template <class M1, class M2, class F, class T>
CreateBase2(const typename M1::FST & fst1,const typename M2::FST & fst2,const ComposeFstImplOptions<M1,M2,F,T> & opts)562   static Impl *CreateBase2(
563       const typename M1::FST &fst1, const typename M2::FST &fst2,
564       const ComposeFstImplOptions<M1, M2, F, T> &opts) {
565     Impl *impl = new ComposeFstImpl<M1, M2, F, T>(fst1, fst2, opts);
566     if (!(Weight::Properties() & kCommutative)) {
567       int64 props1 = fst1.Properties(kUnweighted, true);
568       int64 props2 = fst2.Properties(kUnweighted, true);
569       if (!(props1 & kUnweighted) && !(props2 & kUnweighted)) {
570         FSTERROR() << "ComposeFst: Weights must be a commutative semiring: "
571                    << Weight::Type();
572         impl->SetProperties(kError, kError);
573       }
574     }
575     return impl;
576   }
577 
578   // Create compose implementation specifying one matcher type.
579   //  Requires input Fsts and matcher FST type (M::FST) be Fst<A>
580   template <class M, class F, class T>
CreateBase1(const Fst<A> & fst1,const Fst<A> & fst2,const ComposeFstOptions<A,M,F,T> & opts)581   static Impl *CreateBase1(const Fst<A> &fst1, const Fst<A> &fst2,
582                            const ComposeFstOptions<A, M, F, T> &opts) {
583     ComposeFstImplOptions<M, M, F, T> nopts(opts, opts.matcher1, opts.matcher2,
584                                             opts.filter, opts.state_table);
585     return CreateBase2(fst1, fst2, nopts);
586   }
587 
588   // Create compose implementation specifying no matcher type.
CreateBase(const Fst<A> & fst1,const Fst<A> & fst2,const CacheOptions & opts)589   static Impl *CreateBase(const Fst<A> &fst1, const Fst<A> &fst2,
590                           const CacheOptions &opts) {
591     switch (LookAheadMatchType(fst1, fst2)) {  // Check for lookahead matchers
592       default:
593       case MATCH_NONE: {     // Default composition (no look-ahead)
594         VLOG(2) << "ComposeFst: Default composition (no look-ahead)";
595         ComposeFstOptions<Arc> nopts(opts);
596         return CreateBase1(fst1, fst2, nopts);
597       }
598       case MATCH_OUTPUT: {   // Lookahead on fst1
599         VLOG(2) << "ComposeFst: Lookahead on fst1";
600         typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::FstMatcher M;
601         typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::ComposeFilter F;
602         ComposeFstOptions<Arc, M, F> nopts(opts);
603         return CreateBase1(fst1, fst2, nopts);
604       }
605       case MATCH_INPUT: {    // Lookahead on fst2
606         VLOG(2) << "ComposeFst: Lookahead on fst2";
607         typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::FstMatcher M;
608         typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::ComposeFilter F;
609         ComposeFstOptions<Arc, M, F> nopts(opts);
610         return CreateBase1(fst1, fst2, nopts);
611       }
612     }
613   }
614 
615  private:
616   // Makes visible to friends.
GetImpl()617   Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
618 
619   void operator=(const ComposeFst<A> &fst);  // disallow
620 };
621 
622 
623 // Specialization for ComposeFst.
624 template<class A>
625 class StateIterator< ComposeFst<A> >
626     : public CacheStateIterator< ComposeFst<A> > {
627  public:
StateIterator(const ComposeFst<A> & fst)628   explicit StateIterator(const ComposeFst<A> &fst)
629       : CacheStateIterator< ComposeFst<A> >(fst, fst.GetImpl()) {}
630 };
631 
632 
633 // Specialization for ComposeFst.
634 template <class A>
635 class ArcIterator< ComposeFst<A> >
636     : public CacheArcIterator< ComposeFst<A> > {
637  public:
638   typedef typename A::StateId StateId;
639 
ArcIterator(const ComposeFst<A> & fst,StateId s)640   ArcIterator(const ComposeFst<A> &fst, StateId s)
641       : CacheArcIterator< ComposeFst<A> >(fst.GetImpl(), s) {
642     if (!fst.GetImpl()->HasArcs(s))
643       fst.GetImpl()->Expand(s);
644   }
645 
646  private:
647   DISALLOW_COPY_AND_ASSIGN(ArcIterator);
648 };
649 
650 template <class A> inline
InitStateIterator(StateIteratorData<A> * data)651 void ComposeFst<A>::InitStateIterator(StateIteratorData<A> *data) const {
652   data->base = new StateIterator< ComposeFst<A> >(*this);
653 }
654 
655 // Useful alias when using StdArc.
656 typedef ComposeFst<StdArc> StdComposeFst;
657 
658 enum ComposeFilter { AUTO_FILTER, SEQUENCE_FILTER, ALT_SEQUENCE_FILTER,
659                      MATCH_FILTER };
660 
661 struct ComposeOptions {
662   bool connect;  // Connect output
663   ComposeFilter filter_type;  // Which pre-defined filter to use
664 
665   ComposeOptions(bool c, ComposeFilter ft = AUTO_FILTER)
connectComposeOptions666       : connect(c), filter_type(ft) {}
ComposeOptionsComposeOptions667   ComposeOptions() : connect(true), filter_type(AUTO_FILTER) {}
668 };
669 
670 // Computes the composition of two transducers. This version writes
671 // the composed FST into a MurableFst. If FST1 transduces string x to
672 // y with weight a and FST2 transduces y to z with weight b, then
673 // their composition transduces string x to z with weight
674 // Times(x, z).
675 //
676 // The output labels of the first transducer or the input labels of
677 // the second transducer must be sorted.  The weights need to form a
678 // commutative semiring (valid for TropicalWeight and LogWeight).
679 //
680 // Complexity:
681 // Assuming the first FST is unsorted and the second is sorted:
682 // - Time: O(V1 V2 D1 (log D2 + M2)),
683 // - Space: O(V1 V2 D1 M2)
684 // where Vi = # of states, Di = maximum out-degree, and Mi is
685 // the maximum multiplicity for the ith FST.
686 //
687 // Caveats:
688 // - Compose trims its output.
689 // - The efficiency of composition can be strongly affected by several factors:
690 //   - the choice of which tnansducer is sorted - prefer sorting the FST
691 //     that has the greater average out-degree.
692 //   - the amount of non-determinism
693 //   - the presence and location of epsilon transitions - avoid epsilon
694 //     transitions on the output side of the first transducer or
695 //     the input side of the second transducer or prefer placing
696 //     them later in a path since they delay matching and can
697 //     introduce non-coaccessible states and transitions.
698 template<class Arc>
699 void Compose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
700              MutableFst<Arc> *ofst,
701              const ComposeOptions &opts = ComposeOptions()) {
702   typedef Matcher< Fst<Arc> > M;
703 
704   if (opts.filter_type == AUTO_FILTER) {
705      CacheOptions nopts;
706      nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
707      *ofst = ComposeFst<Arc>(ifst1, ifst2, nopts);
708   } else if (opts.filter_type == SEQUENCE_FILTER) {
709     ComposeFstOptions<Arc> copts;
710     copts.gc_limit = 0;  // Cache only the last state for fastest copy.
711     *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
712   } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
713     ComposeFstOptions<Arc, M,  AltSequenceComposeFilter<M> > copts;
714     copts.gc_limit = 0;  // Cache only the last state for fastest copy.
715     *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
716   } else if (opts.filter_type == MATCH_FILTER) {
717     ComposeFstOptions<Arc, M,  MatchComposeFilter<M> > copts;
718     copts.gc_limit = 0;  // Cache only the last state for fastest copy.
719     *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
720   }
721 
722   if (opts.connect)
723     Connect(ofst);
724 }
725 
726 }  // namespace fst
727 
728 #endif  // FST_LIB_COMPOSE_H__
729