1 // matcher-fst.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Class to add a matcher to an FST.
20 
21 #ifndef FST_LIB_MATCHER_FST_FST_H__
22 #define FST_LIB_MATCHER_FST_FST_H__
23 
24 #include <fst/add-on.h>
25 #include <fst/const-fst.h>
26 #include <fst/lookahead-matcher.h>
27 
28 
29 namespace fst {
30 
31 // WRITABLE MATCHERS - these have the interface of Matchers (see
32 // matcher.h) and these additional methods:
33 //
34 // template <class F>
35 // class Matcher {
36 //  public:
37 //   typedef ... MatcherData;  // Initialization data
38 //  ...
39 //   // Constructor with additional argument for external initialization
40 //   // data; matcher increments its reference count on construction and
41 //   // decrements the reference count, and if 0 deletes, on destruction.
42 //   Matcher(const F &fst, MatchType type, MatcherData *data);
43 //
44 //   // Returns pointer to initialization data that can be
45 //   // passed to a Matcher constructor.
46 //   MatcherData *GetData() const;
47 // };
48 
49 // The matcher initialization data class must have the form:
50 // class MatcherData {
51 // public:
52 //   // Required copy constructor.
53 //   MatcherData(const MatcherData &);
54 //   //
55 //   // Required I/O methods.
56 //   static MatcherData *Read(istream &istrm);
57 //   bool Write(ostream &ostrm);
58 //
59 //   // Required reference counting.
60 //   int RefCount() const;
61 //   int IncrRefCount();
62 //   int DecrRefCount();
63 // };
64 
65 // Default MatcherFst initializer - does nothing.
66 template <class M>
67 class NullMatcherFstInit {
68  public:
69   typedef AddOnPair<typename M::MatcherData, typename M::MatcherData> D;
70   typedef AddOnImpl<typename M::FST, D> Impl;
NullMatcherFstInit(Impl **)71   NullMatcherFstInit(Impl **) {}
72 };
73 
74 // Class to add a matcher M to an Fst F. Creates a new Fst of type name N.
75 // Optional function object I can be used to initialize the Fst.
76 template <class F, class M, const char* N,
77           class I = NullMatcherFstInit<M> >
78 class MatcherFst
79     : public ImplToExpandedFst<
80                AddOnImpl<F,
81                          AddOnPair<typename M::MatcherData,
82                                    typename M::MatcherData> > > {
83  public:
84   friend class StateIterator< MatcherFst<F, M, N, I> >;
85   friend class ArcIterator< MatcherFst<F, M, N, I> >;
86 
87   typedef F FST;
88   typedef M FstMatcher;
89   typedef typename F::Arc Arc;
90   typedef typename Arc::StateId StateId;
91   typedef AddOnPair<typename M::MatcherData, typename M::MatcherData> D;
92   typedef AddOnImpl<F, D> Impl;
93 
MatcherFst()94   MatcherFst() : ImplToExpandedFst<Impl>(new Impl(F(), N)) {}
95 
MatcherFst(const F & fst)96   explicit MatcherFst(const F &fst)
97       : ImplToExpandedFst<Impl>(CreateImpl(fst, N)) {}
98 
MatcherFst(const Fst<Arc> & fst)99   explicit MatcherFst(const Fst<Arc> &fst)
100       : ImplToExpandedFst<Impl>(CreateImpl(fst, N)) {}
101 
102   // See Fst<>::Copy() for doc.
103   MatcherFst(const MatcherFst<F, M, N, I> &fst, bool safe = false)
104       : ImplToExpandedFst<Impl>(fst, safe) {}
105 
106   // Get a copy of this MatcherFst. See Fst<>::Copy() for further doc.
107   virtual MatcherFst<F, M, N, I> *Copy(bool safe = false) const {
108     return new MatcherFst<F, M, N, I>(*this, safe);
109   }
110 
111   // Read a MatcherFst from an input stream; return NULL on error
Read(istream & strm,const FstReadOptions & opts)112   static MatcherFst<F, M, N, I> *Read(istream &strm,
113                                       const FstReadOptions &opts) {
114     Impl *impl = Impl::Read(strm, opts);
115     return impl ? new MatcherFst<F, M, N, I>(impl) : 0;
116   }
117 
118   // Read a MatcherFst from a file; return NULL on error
119   // Empty filename reads from standard input
Read(const string & filename)120   static MatcherFst<F, M, N, I> *Read(const string &filename) {
121     Impl *impl = ImplToExpandedFst<Impl>::Read(filename);
122     return impl ? new MatcherFst<F, M, N, I>(impl) : 0;
123   }
124 
Write(ostream & strm,const FstWriteOptions & opts)125   virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
126     return GetImpl()->Write(strm, opts);
127   }
128 
Write(const string & filename)129   virtual bool Write(const string &filename) const {
130     return Fst<Arc>::WriteFile(filename);
131   }
132 
InitStateIterator(StateIteratorData<Arc> * data)133   virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
134     return GetImpl()->InitStateIterator(data);
135   }
136 
InitArcIterator(StateId s,ArcIteratorData<Arc> * data)137   virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
138     return GetImpl()->InitArcIterator(s, data);
139   }
140 
InitMatcher(MatchType match_type)141   virtual M *InitMatcher(MatchType match_type) const {
142     return new M(GetFst(), match_type, GetData(match_type));
143   }
144 
145   // Allows access to MatcherFst components.
GetImpl()146   Impl *GetImpl() const {
147     return ImplToFst<Impl, ExpandedFst<Arc> >::GetImpl();
148   }
149 
GetFst()150   F& GetFst() const { return GetImpl()->GetFst(); }
151 
GetData(MatchType match_type)152   typename M::MatcherData *GetData(MatchType match_type) const {
153     D *data = GetImpl()->GetAddOn();
154     return match_type == MATCH_INPUT ? data->First() : data->Second();
155   }
156 
157  private:
CreateImpl(const F & fst,const string & name)158   static Impl *CreateImpl(const F &fst, const string &name) {
159     M imatcher(fst, MATCH_INPUT);
160     M omatcher(fst, MATCH_OUTPUT);
161     D *data = new D(imatcher.GetData(), omatcher.GetData());
162     Impl *impl = new Impl(fst, name);
163     impl->SetAddOn(data);
164     I init(&impl);
165     data->DecrRefCount();
166     return impl;
167   }
168 
CreateImpl(const Fst<Arc> & fst,const string & name)169   static Impl *CreateImpl(const Fst<Arc> &fst, const string &name) {
170     F ffst(fst);
171     return CreateImpl(ffst, name);
172   }
173 
MatcherFst(Impl * impl)174   explicit MatcherFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
175 
176   // Makes visible to friends.
177   void SetImpl(Impl *impl, bool own_impl = true) {
178     ImplToFst< Impl, ExpandedFst<Arc> >::SetImpl(impl, own_impl);
179   }
180 
181   void operator=(const MatcherFst<F, M, N, I> &fst);  // disallow
182 };
183 
184 
185 // Specialization fo MatcherFst.
186 template <class F, class M, const char* N, class I>
187 class StateIterator< MatcherFst<F, M, N, I> > : public StateIterator<F> {
188  public:
StateIterator(const MatcherFst<F,M,N,I> & fst)189   explicit StateIterator(const MatcherFst<F, M, N, I> &fst) :
190       StateIterator<F>(fst.GetImpl()->GetFst()) {}
191 };
192 
193 
194 // Specialization for MatcherFst.
195 template <class F, class M, const char* N, class I>
196 class ArcIterator< MatcherFst<F, M, N, I> > : public ArcIterator<F> {
197  public:
ArcIterator(const MatcherFst<F,M,N,I> & fst,typename F::Arc::StateId s)198   ArcIterator(const MatcherFst<F, M, N, I> &fst, typename F::Arc::StateId s)
199       : ArcIterator<F>(fst.GetImpl()->GetFst(), s) {}
200 };
201 
202 
203 // Specialization for MatcherFst
204 template <class F, class M, const char* N, class I>
205 class Matcher< MatcherFst<F, M, N, I> > {
206  public:
207   typedef MatcherFst<F, M, N, I> FST;
208   typedef typename F::Arc Arc;
209   typedef typename Arc::StateId StateId;
210   typedef typename Arc::Label Label;
211 
Matcher(const FST & fst,MatchType match_type)212   Matcher(const FST &fst, MatchType match_type) {
213     matcher_ = fst.InitMatcher(match_type);
214   }
215 
Matcher(const Matcher<FST> & matcher)216   Matcher(const Matcher<FST> &matcher) {
217     matcher_ = matcher.matcher_->Copy();
218   }
219 
~Matcher()220   ~Matcher() { delete matcher_; }
221 
Copy()222   Matcher<FST> *Copy() const {
223     return new Matcher<FST>(*this);
224   }
225 
Type(bool test)226   MatchType Type(bool test) const { return matcher_->Type(test); }
SetState(StateId s)227   void SetState(StateId s) { matcher_->SetState(s); }
Find(Label label)228   bool Find(Label label) { return matcher_->Find(label); }
Done()229   bool Done() const { return matcher_->Done(); }
Value()230   const Arc& Value() const { return matcher_->Value(); }
Next()231   void Next() { matcher_->Next(); }
Properties(uint64 props)232   uint64 Properties(uint64 props) const { return matcher_->Properties(props); }
Flags()233   uint32 Flags() const { return matcher_->Flags(); }
234 
235  private:
236   M *matcher_;
237 
238   void operator=(const Matcher<Arc> &);  // disallow
239 };
240 
241 
242 // Specialization for MatcherFst
243 template <class F, class M, const char* N, class I>
244 class LookAheadMatcher< MatcherFst<F, M, N, I> > {
245  public:
246   typedef MatcherFst<F, M, N, I> FST;
247   typedef typename F::Arc Arc;
248   typedef typename Arc::StateId StateId;
249   typedef typename Arc::Label Label;
250   typedef typename Arc::Weight Weight;
251 
LookAheadMatcher(const FST & fst,MatchType match_type)252   LookAheadMatcher(const FST &fst, MatchType match_type) {
253     matcher_ = fst.InitMatcher(match_type);
254   }
255 
256   LookAheadMatcher(const LookAheadMatcher<FST> &matcher, bool safe = false) {
257     matcher_ = matcher.matcher_->Copy(safe);
258   }
259 
~LookAheadMatcher()260   ~LookAheadMatcher() { delete matcher_; }
261 
262   // General matcher methods
263   LookAheadMatcher<FST> *Copy(bool safe = false) const {
264     return new LookAheadMatcher<FST>(*this, safe);
265   }
266 
Type(bool test)267   MatchType Type(bool test) const { return matcher_->Type(test); }
SetState(StateId s)268   void SetState(StateId s) { matcher_->SetState(s); }
Find(Label label)269   bool Find(Label label) { return matcher_->Find(label); }
Done()270   bool Done() const { return matcher_->Done(); }
Value()271   const Arc& Value() const { return matcher_->Value(); }
Next()272   void Next() { matcher_->Next(); }
GetFst()273   const FST &GetFst() const { return matcher_->GetFst(); }
Properties(uint64 props)274   uint64 Properties(uint64 props) const { return matcher_->Properties(props); }
Flags()275   uint32 Flags() const { return matcher_->Flags(); }
276 
277   // Look-ahead methods
LookAheadLabel(Label label)278   bool LookAheadLabel(Label label) const {
279     return matcher_->LookAheadLabel(label);
280   }
281 
LookAheadFst(const Fst<Arc> & fst,StateId s)282   bool LookAheadFst(const Fst<Arc> &fst, StateId s) {
283     return matcher_->LookAheadFst(fst, s);
284   }
285 
LookAheadWeight()286   Weight LookAheadWeight() const { return matcher_->LookAheadWeight(); }
287 
LookAheadPrefix(Arc * arc)288   bool LookAheadPrefix(Arc *arc) const {
289     return matcher_->LookAheadPrefix(arc);
290   }
291 
292   void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) {
293     matcher_->InitLookAheadFst(fst, copy);
294   }
295 
296  private:
297   M *matcher_;
298 
299   void operator=(const LookAheadMatcher<FST> &);  // disallow
300 };
301 
302 //
303 // Useful aliases when using StdArc and LogArc.
304 //
305 
306 // Arc look-ahead matchers
307 extern const char arc_lookahead_fst_type[];
308 
309 typedef MatcherFst<ConstFst<StdArc>,
310                    ArcLookAheadMatcher<SortedMatcher<ConstFst<StdArc> > >,
311                    arc_lookahead_fst_type> StdArcLookAheadFst;
312 
313 typedef MatcherFst<ConstFst<LogArc>,
314                    ArcLookAheadMatcher<SortedMatcher<ConstFst<LogArc> > >,
315                    arc_lookahead_fst_type> LogArcLookAheadFst;
316 
317 
318 // Label look-ahead matchers
319 extern const char ilabel_lookahead_fst_type[];
320 extern const char olabel_lookahead_fst_type[];
321 
322 static const uint32 ilabel_lookahead_flags = kInputLookAheadMatcher |
323     kLookAheadWeight | kLookAheadPrefix |
324     kLookAheadEpsilons | kLookAheadNonEpsilonPrefix;
325 static const uint32 olabel_lookahead_flags = kOutputLookAheadMatcher |
326     kLookAheadWeight | kLookAheadPrefix |
327     kLookAheadEpsilons | kLookAheadNonEpsilonPrefix;
328 
329 typedef MatcherFst<ConstFst<StdArc>,
330                    LabelLookAheadMatcher<SortedMatcher<ConstFst<StdArc> >,
331                                          ilabel_lookahead_flags,
332                                          FastLogAccumulator<StdArc> >,
333                    ilabel_lookahead_fst_type,
334                    LabelLookAheadRelabeler<StdArc> > StdILabelLookAheadFst;
335 
336 typedef MatcherFst<ConstFst<LogArc>,
337                    LabelLookAheadMatcher<SortedMatcher<ConstFst<LogArc> >,
338                                          ilabel_lookahead_flags,
339                                          FastLogAccumulator<LogArc> >,
340                    ilabel_lookahead_fst_type,
341                    LabelLookAheadRelabeler<LogArc> > LogILabelLookAheadFst;
342 
343 typedef MatcherFst<ConstFst<StdArc>,
344                    LabelLookAheadMatcher<SortedMatcher<ConstFst<StdArc> >,
345                                          olabel_lookahead_flags,
346                                          FastLogAccumulator<StdArc> >,
347                    olabel_lookahead_fst_type,
348                    LabelLookAheadRelabeler<StdArc> > StdOLabelLookAheadFst;
349 
350 typedef MatcherFst<ConstFst<LogArc>,
351                    LabelLookAheadMatcher<SortedMatcher<ConstFst<LogArc> >,
352                                          olabel_lookahead_flags,
353                                          FastLogAccumulator<LogArc> >,
354                    olabel_lookahead_fst_type,
355                    LabelLookAheadRelabeler<LogArc> > LogOLabelLookAheadFst;
356 
357 }  // namespace fst
358 
359 #endif  // FST_LIB_MATCHER_FST_FST_H__
360