1 // compose-filter.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Classes for filtering the composition matches, e.g. for correct epsilon
20 // handling.
21 
22 #ifndef FST_LIB_COMPOSE_FILTER_H__
23 #define FST_LIB_COMPOSE_FILTER_H__
24 
25 #include <fst/fst.h>
26 #include <fst/fst-decl.h>  // For optional argument declarations
27 #include <fst/matcher.h>
28 
29 
30 namespace fst {
31 
32 
33 // COMPOSITION FILTER STATE - this represents the state of
34 // the composition filter. It has the form:
35 //
36 // class FilterState {
37 //  public:
38 //   // Required constructors
39 //   FilterState();
40 //   FilterState(const FilterState &f);
41 //   // An invalid filter state.
42 //   static const FilterState NoState();
43 //   // Maps state to integer for hashing.
44 //   size_t Hash() const;
45 //   // Equality of filter states.
46 //   bool operator==(const FilterState &f) const;
47 //   // Inequality of filter states.
48 //   bool operator!=(const FilterState &f) const;
49 //   // Assignment to filter states.
50 //   FilterState& operator=(const FilterState& f);
51 // };
52 
53 
54 // Filter state that is a signed integral type.
55 template <typename T>
56 class IntegerFilterState {
57  public:
IntegerFilterState()58   IntegerFilterState() : state_(kNoStateId) {}
IntegerFilterState(T s)59   explicit IntegerFilterState(T s) : state_(s) {}
60 
NoState()61   static const IntegerFilterState NoState() { return IntegerFilterState(); }
62 
Hash()63   size_t Hash() const { return static_cast<size_t>(state_); }
64 
65   bool operator==(const IntegerFilterState &f) const {
66     return state_ == f.state_;
67   }
68 
69   bool operator!=(const IntegerFilterState &f) const {
70     return state_ != f.state_;
71   }
72 
GetState()73   T GetState() const { return state_; }
74 
SetState(T state)75   void SetState(T state) { state_ = state; }
76 
77 private:
78   T state_;
79 };
80 
81 typedef IntegerFilterState<signed char> CharFilterState;
82 typedef IntegerFilterState<short> ShortFilterState;
83 typedef IntegerFilterState<int> IntFilterState;
84 
85 
86 // Filter state that is a weight (class).
87 template <class W>
88 class WeightFilterState {
89  public:
WeightFilterState()90   WeightFilterState() : weight_(W::Zero()) {}
WeightFilterState(W w)91   explicit WeightFilterState(W w) : weight_(w) {}
92 
NoState()93   static const WeightFilterState NoState() { return WeightFilterState(); }
94 
Hash()95   size_t Hash() const { return weight_.Hash(); }
96 
97   bool operator==(const WeightFilterState &f) const {
98     return weight_ == f.weight_;
99   }
100 
101   bool operator!=(const WeightFilterState &f) const {
102     return weight_ != f.weight_;
103   }
104 
GetWeight()105   W GetWeight() const { return weight_; }
106 
SetWeight(W w)107   void SetWeight(W w) { weight_ = w; }
108 
109 private:
110   W weight_;
111 };
112 
113 
114 // Filter state that is the combination of two filter states.
115 template <class F1, class F2>
116 class PairFilterState {
117  public:
PairFilterState()118   PairFilterState() : f1_(F1::NoState()), f2_(F2::NoState()) {}
119 
PairFilterState(const F1 & f1,const F2 & f2)120   PairFilterState(const F1 &f1, const F2 &f2) : f1_(f1), f2_(f2) {}
121 
NoState()122   static const PairFilterState NoState() { return PairFilterState(); }
123 
Hash()124   size_t Hash() const {
125     size_t h1 = f1_.Hash();
126     size_t h2 = f2_.Hash();
127     const int lshift = 5;
128     const int rshift = CHAR_BIT * sizeof(size_t) - 5;
129     return h1 << lshift ^ h1 >> rshift ^ h2;
130   }
131 
132   bool operator==(const PairFilterState &f) const {
133     return f1_ == f.f1_ && f2_ == f.f2_;
134   }
135 
136   bool operator!=(const PairFilterState &f) const {
137     return f1_ != f.f1_ || f2_ != f.f2_;
138   }
139 
GetState1()140   const F1 &GetState1() const { return f1_; }
GetState2()141   const F2 &GetState2() const { return f2_; }
142 
SetState(const F1 & f1,const F2 & f2)143   void SetState(const F1 &f1, const F2 &f2) {
144     f1_ = f1;
145     f2_ = f2;
146   }
147 
148 private:
149   F1 f1_;
150   F2 f2_;
151 };
152 
153 
154 // COMPOSITION FILTERS - these determine which matches are allowed to
155 // proceed. The filter's state is represented by the type
156 // ComposeFilter::FilterState. The basic filters handle correct
157 // epsilon matching.  Their interface is:
158 //
159 // template <class M1, class M2>
160 // class ComposeFilter {
161 //  public:
162 //   typedef typename M1::FST1 FST1;
163 //   typedef typename M1::FST2 FST2;
164 //   typedef typename FST1::Arc Arc;
165 //   typedef ... FilterState;
166 //   typedef ... Matcher1;
167 //   typedef ... Matcher2;
168 //
169 //   // Required constructors.
170 //   ComposeFilter(const FST1 &fst1, const FST2 &fst2,
171 //   //            M1 *matcher1 = 0, M2 *matcher2 = 0);
172 //   // If safe=true, the copy is thread-safe. See Fst<>::Copy()
173 //   // for further doc.
174 //   ComposeFilter(const ComposeFilter<M1, M2> &filter,
175 //   //            bool safe = false);
176 //   // Return start state of filter.
177 //   FilterState Start() const;
178 //   // Specifies current composition state.
179 //   void SetState(StateId s1, StateId s2, const FilterState &f);
180 //
181 //   // Apply filter at current composition state to these transitions.
182 //   // If an arc label to be matched is kNolabel, then that side
183 //   // does not consume a symbol. Returns the new filter state or,
184 //   // if disallowed, FilterState::NoState(). The filter is permitted to
185 //   // modify its inputs, e.g. for optimizations.
186 //   FilterState FilterArc(Arc *arc1, Arc *arc2) const;
187 
188 //   // Apply filter at current composition state to these final weights
189 //   // (cf. superfinal transitions). The filter may modify its inputs,
190 //   // e.g. for optimizations.
191 //   void FilterFinal(Weight *final1, Weight *final2) const;
192 //
193 //   // Return resp matchers. Ownership stays with filter. These
194 //   // methods allow the filter to access and possibly modify
195 //   // the composition matchers (useful e.g. with lookahead).
196 //   Matcher1 *GetMatcher1();
197 //   Matcher2 *GetMatcher2();
198 //
199 //   // This specifies how the filter affects the composition result
200 //   // properties. It takes as argument the properties that would
201 //   // apply with a trivial composition fitler.
202 //   uint64 Properties(uint64 props) const;
203 // };
204 
205 // This filter requires epsilons on FST1 to be read before epsilons on FST2.
206 template <class M1, class M2>
207 class SequenceComposeFilter {
208  public:
209   typedef typename M1::FST FST1;
210   typedef typename M2::FST FST2;
211   typedef typename FST1::Arc Arc;
212   typedef CharFilterState FilterState;
213   typedef M1 Matcher1;
214   typedef M2 Matcher2;
215 
216   typedef typename Arc::StateId StateId;
217   typedef typename Arc::Label Label;
218   typedef typename Arc::Weight Weight;
219 
220   SequenceComposeFilter(const FST1 &fst1, const FST2 &fst2,
221                         M1 *matcher1 = 0, M2 *matcher2 = 0)
222       : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
223         matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
224         fst1_(matcher1_->GetFst()),
225         s1_(kNoStateId),
226         s2_(kNoStateId),
227         f_(kNoStateId) {}
228 
229   SequenceComposeFilter(const SequenceComposeFilter<M1, M2> &filter,
230                         bool safe = false)
231       : matcher1_(filter.matcher1_->Copy(safe)),
232         matcher2_(filter.matcher2_->Copy(safe)),
233         fst1_(matcher1_->GetFst()),
234         s1_(kNoStateId),
235         s2_(kNoStateId),
236         f_(kNoStateId) {}
237 
~SequenceComposeFilter()238   ~SequenceComposeFilter() {
239     delete matcher1_;
240     delete matcher2_;
241   }
242 
Start()243   FilterState Start() const { return FilterState(0); }
244 
SetState(StateId s1,StateId s2,const FilterState & f)245   void SetState(StateId s1, StateId s2, const FilterState &f) {
246     if (s1_ == s1 && s2_ == s2 && f == f_)
247       return;
248     s1_ = s1;
249     s2_ = s2;
250     f_ = f;
251     size_t na1 = internal::NumArcs(fst1_, s1);
252     size_t ne1 = internal::NumOutputEpsilons(fst1_, s1);
253     bool fin1 = internal::Final(fst1_, s1) != Weight::Zero();
254     alleps1_ = na1 == ne1 && !fin1;
255     noeps1_ = ne1 == 0;
256   }
257 
FilterArc(Arc * arc1,Arc * arc2)258   FilterState FilterArc(Arc *arc1, Arc *arc2) const {
259     if (arc1->olabel == kNoLabel)
260       return alleps1_ ? FilterState::NoState() :
261         noeps1_ ? FilterState(0) : FilterState(1);
262     else if (arc2->ilabel == kNoLabel)
263       return f_ != FilterState(0) ? FilterState::NoState() : FilterState(0);
264     else
265       return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0);
266   }
267 
FilterFinal(Weight *,Weight *)268   void FilterFinal(Weight *, Weight *) const {}
269 
270   // Return resp matchers. Ownership stays with filter.
GetMatcher1()271   Matcher1 *GetMatcher1() { return matcher1_; }
GetMatcher2()272   Matcher2 *GetMatcher2() { return matcher2_; }
273 
Properties(uint64 props)274   uint64 Properties(uint64 props) const { return props; }
275 
276  private:
277   Matcher1 *matcher1_;
278   Matcher2 *matcher2_;
279   const FST1 &fst1_;
280   StateId s1_;     // Current fst1_ state;
281   StateId s2_;     // Current fst2_ state;
282   FilterState f_;  // Current filter state
283   bool alleps1_;   // Only epsilons (and non-final) leaving s1_?
284   bool noeps1_;    // No epsilons leaving s1_?
285 
286   void operator=(const SequenceComposeFilter<M1, M2> &);  // disallow
287 };
288 
289 
290 // This filter requires epsilons on FST2 to be read before epsilons on FST1.
291 template <class M1, class M2>
292 class AltSequenceComposeFilter {
293  public:
294   typedef typename M1::FST FST1;
295   typedef typename M2::FST FST2;
296   typedef typename FST1::Arc Arc;
297   typedef CharFilterState FilterState;
298   typedef M1 Matcher1;
299   typedef M2 Matcher2;
300 
301   typedef typename Arc::StateId StateId;
302   typedef typename Arc::Label Label;
303   typedef typename Arc::Weight Weight;
304 
305   AltSequenceComposeFilter(const FST1 &fst1, const FST2 &fst2,
306                         M1 *matcher1 = 0, M2 *matcher2 = 0)
307       : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
308         matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
309         fst2_(matcher2_->GetFst()),
310         s1_(kNoStateId),
311         s2_(kNoStateId),
312         f_(kNoStateId) {}
313 
314   AltSequenceComposeFilter(const AltSequenceComposeFilter<M1, M2> &filter,
315                            bool safe = false)
316       : matcher1_(filter.matcher1_->Copy(safe)),
317         matcher2_(filter.matcher2_->Copy(safe)),
318         fst2_(matcher2_->GetFst()),
319         s1_(kNoStateId),
320         s2_(kNoStateId),
321         f_(kNoStateId) {}
322 
~AltSequenceComposeFilter()323   ~AltSequenceComposeFilter() {
324     delete matcher1_;
325     delete matcher2_;
326   }
327 
Start()328   FilterState Start() const { return FilterState(0); }
329 
SetState(StateId s1,StateId s2,const FilterState & f)330   void SetState(StateId s1, StateId s2, const FilterState &f) {
331     if (s1_ == s1 && s2_ == s2 && f == f_)
332       return;
333     s1_ = s1;
334     s2_ = s2;
335     f_ = f;
336     size_t na2 = internal::NumArcs(fst2_, s2);
337     size_t ne2 = internal::NumInputEpsilons(fst2_, s2);
338     bool fin2 = internal::Final(fst2_, s2) != Weight::Zero();
339     alleps2_ = na2 == ne2 && !fin2;
340     noeps2_ = ne2 == 0;
341   }
342 
FilterArc(Arc * arc1,Arc * arc2)343   FilterState FilterArc(Arc *arc1, Arc *arc2) const {
344     if (arc2->ilabel == kNoLabel)
345       return alleps2_ ? FilterState::NoState() :
346         noeps2_ ? FilterState(0) : FilterState(1);
347     else if (arc1->olabel == kNoLabel)
348       return f_ == FilterState(1) ? FilterState::NoState() : FilterState(0);
349     else
350       return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0);
351   }
352 
FilterFinal(Weight *,Weight *)353   void FilterFinal(Weight *, Weight *) const {}
354 
355   // Return resp matchers. Ownership stays with filter.
GetMatcher1()356   Matcher1 *GetMatcher1() { return matcher1_; }
GetMatcher2()357   Matcher2 *GetMatcher2() { return matcher2_; }
358 
Properties(uint64 props)359   uint64 Properties(uint64 props) const { return props; }
360 
361  private:
362   Matcher1 *matcher1_;
363   Matcher2 *matcher2_;
364   const FST2 &fst2_;
365   StateId s1_;     // Current fst1_ state;
366   StateId s2_;     // Current fst2_ state;
367   FilterState f_;  // Current filter state
368   bool alleps2_;   // Only epsilons (and non-final) leaving s2_?
369   bool noeps2_;    // No epsilons leaving s2_?
370 
371 void operator=(const AltSequenceComposeFilter<M1, M2> &);  // disallow
372 };
373 
374 
375 // This filter requires epsilons on FST1 to be matched with epsilons on FST2
376 // whenever possible.
377 template <class M1, class M2>
378 class MatchComposeFilter {
379  public:
380   typedef typename M1::FST FST1;
381   typedef typename M2::FST FST2;
382   typedef typename FST1::Arc Arc;
383   typedef CharFilterState FilterState;
384   typedef M1 Matcher1;
385   typedef M2 Matcher2;
386 
387   typedef typename Arc::StateId StateId;
388   typedef typename Arc::Label Label;
389   typedef typename Arc::Weight Weight;
390 
391   MatchComposeFilter(const FST1 &fst1, const FST2 &fst2,
392                      M1 *matcher1 = 0, M2 *matcher2 = 0)
393       : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
394         matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
395         fst1_(matcher1_->GetFst()),
396         fst2_(matcher2_->GetFst()),
397         s1_(kNoStateId),
398         s2_(kNoStateId),
399         f_(kNoStateId) {}
400 
401   MatchComposeFilter(const MatchComposeFilter<M1, M2> &filter,
402                      bool safe = false)
403       : matcher1_(filter.matcher1_->Copy(safe)),
404         matcher2_(filter.matcher2_->Copy(safe)),
405         fst1_(matcher1_->GetFst()),
406         fst2_(matcher2_->GetFst()),
407         s1_(kNoStateId),
408         s2_(kNoStateId),
409         f_(kNoStateId) {}
410 
~MatchComposeFilter()411   ~MatchComposeFilter() {
412     delete matcher1_;
413     delete matcher2_;
414   }
415 
Start()416   FilterState Start() const { return FilterState(0); }
417 
SetState(StateId s1,StateId s2,const FilterState & f)418   void SetState(StateId s1, StateId s2, const FilterState &f) {
419     if (s1_ == s1 && s2_ == s2 && f == f_)
420       return;
421     s1_ = s1;
422     s2_ = s2;
423     f_ = f;
424     size_t na1 = internal::NumArcs(fst1_, s1);
425     size_t ne1 = internal::NumOutputEpsilons(fst1_, s1);
426     bool f1 = internal::Final(fst1_, s1) != Weight::Zero();
427     alleps1_ = na1 == ne1 && !f1;
428     noeps1_ = ne1 == 0;
429     size_t na2 = internal::NumArcs(fst2_, s2);
430     size_t ne2 = internal::NumInputEpsilons(fst2_, s2);
431     bool f2 = internal::Final(fst2_, s2) != Weight::Zero();
432     alleps2_ = na2 == ne2 && !f2;
433     noeps2_ = ne2 == 0;
434   }
435 
FilterArc(Arc * arc1,Arc * arc2)436   FilterState FilterArc(Arc *arc1, Arc *arc2) const {
437     if (arc2->ilabel == kNoLabel)  // Epsilon on Fst1
438       return f_ == FilterState(0) ?
439           (noeps2_ ? FilterState(0) :
440            (alleps2_ ? FilterState::NoState(): FilterState(1))) :
441           (f_ == FilterState(1) ? FilterState(1) : FilterState::NoState());
442     else if (arc1->olabel == kNoLabel)  // Epsilon on Fst2
443       return f_ == FilterState(0) ?
444           (noeps1_ ? FilterState(0) :
445            (alleps1_ ? FilterState::NoState() : FilterState(2))) :
446           (f_ == FilterState(2) ? FilterState(2) : FilterState::NoState());
447     else if (arc1->olabel == 0)  // Epsilon on both
448       return f_ == FilterState(0) ? FilterState(0) : FilterState::NoState();
449     else  // Both are non-epsilons
450       return FilterState(0);
451   }
452 
FilterFinal(Weight *,Weight *)453   void FilterFinal(Weight *, Weight *) const {}
454 
455   // Return resp matchers. Ownership stays with filter.
GetMatcher1()456   Matcher1 *GetMatcher1() { return matcher1_; }
GetMatcher2()457   Matcher2 *GetMatcher2() { return matcher2_; }
458 
Properties(uint64 props)459   uint64 Properties(uint64 props) const { return props; }
460 
461  private:
462   Matcher1 *matcher1_;
463   Matcher2 *matcher2_;
464   const FST1 &fst1_;
465   const FST2 &fst2_;
466   StateId s1_;              // Current fst1_ state;
467   StateId s2_;              // Current fst2_ state;
468   FilterState f_;           // Current filter state ID
469   bool alleps1_, alleps2_;  // Only epsilons (and non-final) leaving s1, s2?
470   bool noeps1_, noeps2_;    // No epsilons leaving s1, s2?
471 
472   void operator=(const MatchComposeFilter<M1, M2> &);  // disallow
473 };
474 
475 
476 // This filter works with the MultiEpsMatcher to determine if
477 // 'multi-epsilons' are preserved in the composition output
478 // (rather than rewritten as 0) and ensures correct properties.
479 template <class F>
480 class MultiEpsFilter {
481  public:
482   typedef typename F::FST1 FST1;
483   typedef typename F::FST2 FST2;
484   typedef typename F::Arc Arc;
485   typedef typename F::Matcher1 Matcher1;
486   typedef typename F::Matcher2 Matcher2;
487   typedef typename F::FilterState FilterState;
488   typedef MultiEpsFilter<F> Filter;
489 
490   typedef typename Arc::StateId StateId;
491   typedef typename Arc::Label Label;
492   typedef typename Arc::Weight Weight;
493 
494   MultiEpsFilter(const FST1 &fst1, const FST2 &fst2,
495                  Matcher1 *matcher1 = 0,  Matcher2 *matcher2 = 0,
496                  bool keep_multi_eps = false)
filter_(fst1,fst2,matcher1,matcher2)497       : filter_(fst1, fst2, matcher1, matcher2),
498         keep_multi_eps_(keep_multi_eps) {}
499 
500   MultiEpsFilter(const Filter &filter, bool safe = false)
501       : filter_(filter.filter_, safe),
502         keep_multi_eps_(filter.keep_multi_eps_) {}
503 
Start()504   FilterState Start() const { return filter_.Start(); }
505 
SetState(StateId s1,StateId s2,const FilterState & f)506   void SetState(StateId s1, StateId s2, const FilterState &f) {
507     return filter_.SetState(s1, s2, f);
508   }
509 
FilterArc(Arc * arc1,Arc * arc2)510   FilterState FilterArc(Arc *arc1, Arc *arc2) const {
511     FilterState f = filter_.FilterArc(arc1, arc2);
512     if (keep_multi_eps_) {
513       if (arc1->olabel == kNoLabel)
514         arc1->ilabel = arc2->ilabel;
515       if (arc2->ilabel == kNoLabel)
516         arc2->olabel = arc1->olabel;
517     }
518     return f;
519   }
520 
FilterFinal(Weight * w1,Weight * w2)521   void FilterFinal(Weight *w1, Weight *w2) const {
522     return filter_.FilterFinal(w1, w2);
523   }
524 
525   // Return resp matchers. Ownership stays with filter.
GetMatcher1()526   Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); }
GetMatcher2()527   Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); }
528 
Properties(uint64 iprops)529   uint64 Properties(uint64 iprops) const {
530     uint64 oprops = filter_.Properties(iprops);
531     return oprops & kILabelInvariantProperties & kOLabelInvariantProperties;
532   }
533 
534  private:
535   F filter_;
536   bool keep_multi_eps_;
537 };
538 
539 }  // namespace fst
540 
541 
542 #endif  // FST_LIB_COMPOSE_FILTER_H__
543