1 // fst.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Finite-State Transducer (FST) - abstract base class definition,
20 // state and arc iterator interface, and suggested base implementation.
21 //
22 
23 #ifndef FST_LIB_FST_H__
24 #define FST_LIB_FST_H__
25 
26 #include <stddef.h>
27 #include <sys/types.h>
28 #include <cmath>
29 #include <string>
30 
31 #include <fst/compat.h>
32 #include <fst/types.h>
33 
34 #include <fst/arc.h>
35 #include <fst/properties.h>
36 #include <fst/register.h>
37 #include <iostream>
38 #include <fstream>
39 #include <sstream>
40 #include <fst/symbol-table.h>
41 #include <fst/util.h>
42 
43 
44 DECLARE_bool(fst_align);
45 
46 namespace fst {
47 
48 bool IsFstHeader(istream &, const string &);
49 
50 class FstHeader;
51 template <class A> class StateIteratorData;
52 template <class A> class ArcIteratorData;
53 template <class A> class MatcherBase;
54 
55 struct FstReadOptions {
56   // FileReadMode(s) are advisory, there are many conditions than prevent a
57   // file from being mapped, READ mode will be selected in these cases with
58   // a warning indicating why it was chosen.
59   enum FileReadMode { READ, MAP };
60 
61   string source;                // Where you're reading from
62   const FstHeader *header;      // Pointer to Fst header. If non-zero, use
63                                 // this info (don't read a stream header)
64   const SymbolTable* isymbols;  // Pointer to input symbols. If non-zero, use
65                                 // this info (read and skip stream isymbols)
66   const SymbolTable* osymbols;  // Pointer to output symbols. If non-zero, use
67                                 // this info (read and skip stream osymbols)
68   FileReadMode mode;            // Read or map files (advisory, if possible)
69 
70   explicit FstReadOptions(const string& src = "<unspecified>",
71                           const FstHeader *hdr = 0,
72                           const SymbolTable* isym = 0,
73                           const SymbolTable* osym = 0);
74 
75   explicit FstReadOptions(const string& src,
76                           const SymbolTable* isym,
77                           const SymbolTable* osym = 0);
78 
79   // Helper function to convert strings FileReadModes into their enum value.
80   static FileReadMode ReadMode(const string &mode);
81 };
82 
83 struct FstWriteOptions {
84   string source;                 // Where you're writing to
85   bool write_header;             // Write the header?
86   bool write_isymbols;           // Write input symbols?
87   bool write_osymbols;           // Write output symbols?
88   bool align;                    // Write data aligned where appropriate;
89                                  // this may fail on pipes
90 
91   explicit FstWriteOptions(const string& src = "<unspecifed>",
92                            bool hdr = true, bool isym = true,
93                            bool osym = true, bool alig = FLAGS_fst_align)
sourceFstWriteOptions94       : source(src), write_header(hdr),
95         write_isymbols(isym), write_osymbols(osym), align(alig) {}
96 };
97 
98 //
99 // Fst HEADER CLASS
100 //
101 // This is the recommended Fst file header representation.
102 //
103 class FstHeader {
104  public:
105   enum {
106     HAS_ISYMBOLS = 0x1,          // Has input symbol table
107     HAS_OSYMBOLS = 0x2,          // Has output symbol table
108     IS_ALIGNED   = 0x4,          // Memory-aligned (where appropriate)
109   } Flags;
110 
FstHeader()111   FstHeader() : version_(0), flags_(0), properties_(0), start_(-1),
112                 numstates_(0), numarcs_(0) {}
FstType()113   const string &FstType() const { return fsttype_; }
ArcType()114   const string &ArcType() const { return arctype_; }
Version()115   int32 Version() const { return version_; }
GetFlags()116   int32 GetFlags() const { return flags_; }
Properties()117   uint64 Properties() const { return properties_; }
Start()118   int64 Start() const { return start_; }
NumStates()119   int64 NumStates() const { return numstates_; }
NumArcs()120   int64 NumArcs() const { return numarcs_; }
121 
SetFstType(const string & type)122   void SetFstType(const string& type) { fsttype_ = type; }
SetArcType(const string & type)123   void SetArcType(const string& type) { arctype_ = type; }
SetVersion(int32 version)124   void SetVersion(int32 version) { version_ = version; }
SetFlags(int32 flags)125   void SetFlags(int32 flags) { flags_ = flags; }
SetProperties(uint64 properties)126   void SetProperties(uint64 properties) { properties_ = properties; }
SetStart(int64 start)127   void SetStart(int64 start) { start_ = start; }
SetNumStates(int64 numstates)128   void SetNumStates(int64 numstates) { numstates_ = numstates; }
SetNumArcs(int64 numarcs)129   void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; }
130 
131   bool Read(istream &strm, const string &source, bool rewind = false);
132   bool Write(ostream &strm, const string &source) const;
133 
134  private:
135 
136   string fsttype_;                   // E.g. "vector"
137   string arctype_;                   // E.g. "standard"
138   int32 version_;                    // Type version #
139   int32 flags_;                      // File format bits
140   uint64 properties_;                // FST property bits
141   int64 start_;                      // Start state
142   int64 numstates_;                  // # of states
143   int64 numarcs_;                    // # of arcs
144 };
145 
146 
147 // Specifies matcher action.
148 enum MatchType { MATCH_INPUT,      // Match input label.
149                  MATCH_OUTPUT,     // Match output label.
150                  MATCH_BOTH,       // Match input or output label.
151                  MATCH_NONE,       // Match nothing.
152                  MATCH_UNKNOWN };  // Match type unknown.
153 
154 //
155 // Fst INTERFACE CLASS DEFINITION
156 //
157 
158 // A generic FST, templated on the arc definition, with
159 // common-demoninator methods (use StateIterator and ArcIterator to
160 // iterate over its states and arcs).
161 template <class A>
162 class Fst {
163  public:
164   typedef A Arc;
165   typedef typename A::Weight Weight;
166   typedef typename A::StateId StateId;
167 
~Fst()168   virtual ~Fst() {}
169 
170   virtual StateId Start() const = 0;          // Initial state
171 
172   virtual Weight Final(StateId) const = 0;    // State's final weight
173 
174   virtual size_t NumArcs(StateId) const = 0;  // State's arc count
175 
176   virtual size_t NumInputEpsilons(StateId)
177       const = 0;                              // State's input epsilon count
178 
179   virtual size_t NumOutputEpsilons(StateId)
180       const = 0;                              // State's output epsilon count
181 
182   // If test=false, return stored properties bits for mask (some poss. unknown)
183   // If test=true, return property bits for mask (computing o.w. unknown)
184   virtual uint64 Properties(uint64 mask, bool test)
185       const = 0;  // Property bits
186 
187   virtual const string& Type() const = 0;    // Fst type name
188 
189   // Get a copy of this Fst. The copying behaves as follows:
190   //
191   // (1) The copying is constant time if safe = false or if safe = true
192   // and is on an otherwise unaccessed Fst.
193   //
194   // (2) If safe = true, the copy is thread-safe in that the original
195   // and copy can be safely accessed (but not necessarily mutated) by
196   // separate threads. For some Fst types, 'Copy(true)' should only be
197   // called on an Fst that has not otherwise been accessed. Its behavior
198   // is undefined otherwise.
199   //
200   // (3) If a MutableFst is copied and then mutated, then the original is
201   // unmodified and vice versa (often by a copy-on-write on the initial
202   // mutation, which may not be constant time).
203   virtual Fst<A> *Copy(bool safe = false) const = 0;
204 
205   // Read an Fst from an input stream; returns NULL on error
Read(istream & strm,const FstReadOptions & opts)206   static Fst<A> *Read(istream &strm, const FstReadOptions &opts) {
207     FstReadOptions ropts(opts);
208     FstHeader hdr;
209     if (ropts.header)
210       hdr = *opts.header;
211     else {
212       if (!hdr.Read(strm, opts.source))
213         return 0;
214       ropts.header = &hdr;
215     }
216     FstRegister<A> *registr = FstRegister<A>::GetRegister();
217     const typename FstRegister<A>::Reader reader =
218       registr->GetReader(hdr.FstType());
219     if (!reader) {
220       LOG(ERROR) << "Fst::Read: Unknown FST type \"" << hdr.FstType()
221                  << "\" (arc type = \"" << A::Type()
222                  << "\"): " << ropts.source;
223       return 0;
224     }
225     return reader(strm, ropts);
226   };
227 
228   // Read an Fst from a file; return NULL on error
229   // Empty filename reads from standard input
Read(const string & filename)230   static Fst<A> *Read(const string &filename) {
231     if (!filename.empty()) {
232       ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
233       if (!strm) {
234         LOG(ERROR) << "Fst::Read: Can't open file: " << filename;
235         return 0;
236       }
237       return Read(strm, FstReadOptions(filename));
238     } else {
239       return Read(cin, FstReadOptions("standard input"));
240     }
241   }
242 
243   // Write an Fst to an output stream; return false on error
Write(ostream & strm,const FstWriteOptions & opts)244   virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
245     LOG(ERROR) << "Fst::Write: No write stream method for " << Type()
246                << " Fst type";
247     return false;
248   }
249 
250   // Write an Fst to a file; return false on error
251   // Empty filename writes to standard output
Write(const string & filename)252   virtual bool Write(const string &filename) const {
253     LOG(ERROR) << "Fst::Write: No write filename method for " << Type()
254                << " Fst type";
255     return false;
256   }
257 
258   // Return input label symbol table; return NULL if not specified
259   virtual const SymbolTable* InputSymbols() const = 0;
260 
261   // Return output label symbol table; return NULL if not specified
262   virtual const SymbolTable* OutputSymbols() const = 0;
263 
264   // For generic state iterator construction; not normally called
265   // directly by users.
266   virtual void InitStateIterator(StateIteratorData<A> *) const = 0;
267 
268   // For generic arc iterator construction; not normally called
269   // directly by users.
270   virtual void InitArcIterator(StateId s, ArcIteratorData<A> *) const = 0;
271 
272   // For generic matcher construction; not normally called
273   // directly by users.
274   virtual MatcherBase<A> *InitMatcher(MatchType match_type) const;
275 
276  protected:
WriteFile(const string & filename)277   bool WriteFile(const string &filename) const {
278     if (!filename.empty()) {
279       ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
280       if (!strm) {
281         LOG(ERROR) << "Fst::Write: Can't open file: " << filename;
282         return false;
283       }
284       return Write(strm, FstWriteOptions(filename));
285     } else {
286       return Write(cout, FstWriteOptions("standard output"));
287     }
288   }
289 };
290 
291 
292 //
293 // STATE and ARC ITERATOR DEFINITIONS
294 //
295 
296 // State iterator interface templated on the Arc definition; used
297 // for StateIterator specializations returned by the InitStateIterator
298 // Fst method.
299 template <class A>
300 class StateIteratorBase {
301  public:
302   typedef A Arc;
303   typedef typename A::StateId StateId;
304 
~StateIteratorBase()305   virtual ~StateIteratorBase() {}
306 
Done()307   bool Done() const { return Done_(); }       // End of iterator?
Value()308   StateId Value() const { return Value_(); }  // Current state (when !Done)
Next()309   void Next() { Next_(); }      // Advance to next state (when !Done)
Reset()310   void Reset() { Reset_(); }    // Return to initial condition
311 
312  private:
313   // This allows base class virtual access to non-virtual derived-
314   // class members of the same name. It makes the derived class more
315   // efficient to use but unsafe to further derive.
316   virtual bool Done_() const = 0;
317   virtual StateId Value_() const = 0;
318   virtual void Next_() = 0;
319   virtual void Reset_() = 0;
320 };
321 
322 
323 // StateIterator initialization data
324 
325 template <class A> struct StateIteratorData {
326   StateIteratorBase<A> *base;   // Specialized iterator if non-zero
327   typename A::StateId nstates;  // O.w. total # of states
328 };
329 
330 
331 // Generic state iterator, templated on the FST definition
332 // - a wrapper around pointer to specific one.
333 // Here is a typical use: \code
334 //   for (StateIterator<StdFst> siter(fst);
335 //        !siter.Done();
336 //        siter.Next()) {
337 //     StateId s = siter.Value();
338 //     ...
339 //   } \endcode
340 template <class F>
341 class StateIterator {
342  public:
343   typedef F FST;
344   typedef typename F::Arc Arc;
345   typedef typename Arc::StateId StateId;
346 
StateIterator(const F & fst)347   explicit StateIterator(const F &fst) : s_(0) {
348     fst.InitStateIterator(&data_);
349   }
350 
~StateIterator()351   ~StateIterator() { if (data_.base) delete data_.base; }
352 
Done()353   bool Done() const {
354     return data_.base ? data_.base->Done() : s_ >= data_.nstates;
355   }
356 
Value()357   StateId Value() const { return data_.base ? data_.base->Value() : s_; }
358 
Next()359   void Next() {
360     if (data_.base)
361       data_.base->Next();
362     else
363       ++s_;
364   }
365 
Reset()366   void Reset() {
367     if (data_.base)
368       data_.base->Reset();
369     else
370       s_ = 0;
371   }
372 
373  private:
374   StateIteratorData<Arc> data_;
375   StateId s_;
376 
377   DISALLOW_COPY_AND_ASSIGN(StateIterator);
378 };
379 
380 
381 // Flags to control the behavior on an arc iterator:
382 static const uint32 kArcILabelValue    = 0x0001;  // Value() gives valid ilabel
383 static const uint32 kArcOLabelValue    = 0x0002;  //  "       "     "    olabel
384 static const uint32 kArcWeightValue    = 0x0004;  //  "       "     "    weight
385 static const uint32 kArcNextStateValue = 0x0008;  //  "       "     " nextstate
386 static const uint32 kArcNoCache   = 0x0010;       // No need to cache arcs
387 
388 static const uint32 kArcValueFlags =
389                   kArcILabelValue | kArcOLabelValue |
390                   kArcWeightValue | kArcNextStateValue;
391 
392 static const uint32 kArcFlags = kArcValueFlags | kArcNoCache;
393 
394 
395 // Arc iterator interface, templated on the Arc definition; used
396 // for Arc iterator specializations that are returned by the InitArcIterator
397 // Fst method.
398 template <class A>
399 class ArcIteratorBase {
400  public:
401   typedef A Arc;
402   typedef typename A::StateId StateId;
403 
~ArcIteratorBase()404   virtual ~ArcIteratorBase() {}
405 
Done()406   bool Done() const { return Done_(); }            // End of iterator?
Value()407   const A& Value() const { return Value_(); }      // Current arc (when !Done)
Next()408   void Next() { Next_(); }           // Advance to next arc (when !Done)
Position()409   size_t Position() const { return Position_(); }  // Return current position
Reset()410   void Reset() { Reset_(); }         // Return to initial condition
Seek(size_t a)411   void Seek(size_t a) { Seek_(a); }  // Random arc access by position
Flags()412   uint32 Flags() const { return Flags_(); }  // Return current behavorial flags
SetFlags(uint32 flags,uint32 mask)413   void SetFlags(uint32 flags, uint32 mask) {  // Set behavorial flags
414     SetFlags_(flags, mask);
415   }
416 
417  private:
418   // This allows base class virtual access to non-virtual derived-
419   // class members of the same name. It makes the derived class more
420   // efficient to use but unsafe to further derive.
421   virtual bool Done_() const = 0;
422   virtual const A& Value_() const = 0;
423   virtual void Next_() = 0;
424   virtual size_t Position_() const = 0;
425   virtual void Reset_() = 0;
426   virtual void Seek_(size_t a) = 0;
427   virtual uint32 Flags_() const = 0;
428   virtual void SetFlags_(uint32 flags, uint32 mask) = 0;
429 };
430 
431 
432 // ArcIterator initialization data
433 template <class A> struct ArcIteratorData {
434   ArcIteratorBase<A> *base;  // Specialized iterator if non-zero
435   const A *arcs;             // O.w. arcs pointer
436   size_t narcs;              // ... and arc count
437   int *ref_count;            // ... and reference count if non-zero
438 };
439 
440 
441 // Generic arc iterator, templated on the FST definition
442 // - a wrapper around pointer to specific one.
443 // Here is a typical use: \code
444 //   for (ArcIterator<StdFst> aiter(fst, s));
445 //        !aiter.Done();
446 //         aiter.Next()) {
447 //     StdArc &arc = aiter.Value();
448 //     ...
449 //   } \endcode
450 template <class F>
451 class ArcIterator {
452    public:
453   typedef F FST;
454   typedef typename F::Arc Arc;
455   typedef typename Arc::StateId StateId;
456 
ArcIterator(const F & fst,StateId s)457   ArcIterator(const F &fst, StateId s) : i_(0) {
458     fst.InitArcIterator(s, &data_);
459   }
460 
ArcIterator(const ArcIteratorData<Arc> & data)461   explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) {
462     if (data_.ref_count)
463       ++(*data_.ref_count);
464   }
465 
~ArcIterator()466   ~ArcIterator() {
467     if (data_.base)
468       delete data_.base;
469     else if (data_.ref_count)
470       --(*data_.ref_count);
471   }
472 
Done()473   bool Done() const {
474     return data_.base ?  data_.base->Done() : i_ >= data_.narcs;
475   }
476 
Value()477   const Arc& Value() const {
478     return data_.base ? data_.base->Value() : data_.arcs[i_];
479   }
480 
Next()481   void Next() {
482     if (data_.base)
483       data_.base->Next();
484     else
485       ++i_;
486   }
487 
Reset()488   void Reset() {
489     if (data_.base)
490       data_.base->Reset();
491     else
492       i_ = 0;
493   }
494 
Seek(size_t a)495   void Seek(size_t a) {
496     if (data_.base)
497       data_.base->Seek(a);
498     else
499       i_ = a;
500   }
501 
Position()502   size_t Position() const {
503     return data_.base ? data_.base->Position() : i_;
504   }
505 
Flags()506   uint32 Flags() const {
507     if (data_.base)
508       return data_.base->Flags();
509     else
510       return kArcValueFlags;
511   }
512 
SetFlags(uint32 flags,uint32 mask)513   void SetFlags(uint32 flags, uint32 mask) {
514     if (data_.base)
515       data_.base->SetFlags(flags, mask);
516   }
517 
518  private:
519   ArcIteratorData<Arc> data_;
520   size_t i_;
521   DISALLOW_COPY_AND_ASSIGN(ArcIterator);
522 };
523 
524 //
525 // MATCHER DEFINITIONS
526 //
527 
528 template <class A>
InitMatcher(MatchType match_type)529 MatcherBase<A> *Fst<A>::InitMatcher(MatchType match_type) const {
530   return 0;  // Use the default matcher
531 }
532 
533 
534 //
535 // FST ACCESSORS - Useful functions in high-performance cases.
536 //
537 
538 namespace internal {
539 
540 // General case - requires non-abstract, 'final' methods. Use for inlining.
541 template <class F> inline
Final(const F & fst,typename F::Arc::StateId s)542 typename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) {
543   return fst.F::Final(s);
544 }
545 
546 template <class F> inline
NumArcs(const F & fst,typename F::Arc::StateId s)547 ssize_t NumArcs(const F &fst, typename F::Arc::StateId s) {
548   return fst.F::NumArcs(s);
549 }
550 
551 template <class F> inline
NumInputEpsilons(const F & fst,typename F::Arc::StateId s)552 ssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) {
553   return fst.F::NumInputEpsilons(s);
554 }
555 
556 template <class F> inline
NumOutputEpsilons(const F & fst,typename F::Arc::StateId s)557 ssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) {
558   return fst.F::NumOutputEpsilons(s);
559 }
560 
561 
562 //  Fst<A> case - abstract methods.
563 template <class A> inline
Final(const Fst<A> & fst,typename A::StateId s)564 typename A::Weight Final(const Fst<A> &fst, typename A::StateId s) {
565   return fst.Final(s);
566 }
567 
568 template <class A> inline
NumArcs(const Fst<A> & fst,typename A::StateId s)569 ssize_t NumArcs(const Fst<A> &fst, typename A::StateId s) {
570   return fst.NumArcs(s);
571 }
572 
573 template <class A> inline
NumInputEpsilons(const Fst<A> & fst,typename A::StateId s)574 ssize_t NumInputEpsilons(const Fst<A> &fst, typename A::StateId s) {
575   return fst.NumInputEpsilons(s);
576 }
577 
578 template <class A> inline
NumOutputEpsilons(const Fst<A> & fst,typename A::StateId s)579 ssize_t NumOutputEpsilons(const Fst<A> &fst, typename A::StateId s) {
580   return fst.NumOutputEpsilons(s);
581 }
582 
583 }  // namespace internal
584 
585 // A useful alias when using StdArc.
586 typedef Fst<StdArc> StdFst;
587 
588 
589 //
590 //  CONSTANT DEFINITIONS
591 //
592 
593 const int kNoStateId   =  -1;  // Not a valid state ID
594 const int kNoLabel     =  -1;  // Not a valid label
595 
596 //
597 // Fst IMPLEMENTATION BASE
598 //
599 // This is the recommended Fst implementation base class. It will
600 // handle reference counts, property bits, type information and symbols.
601 //
602 
603 template <class A> class FstImpl {
604  public:
605   typedef typename A::Weight Weight;
606   typedef typename A::StateId StateId;
607 
FstImpl()608   FstImpl()
609       : properties_(0), type_("null"), isymbols_(0), osymbols_(0) {}
610 
FstImpl(const FstImpl<A> & impl)611   FstImpl(const FstImpl<A> &impl)
612       : properties_(impl.properties_), type_(impl.type_),
613         isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : 0),
614         osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : 0) {}
615 
~FstImpl()616   virtual ~FstImpl() {
617     delete isymbols_;
618     delete osymbols_;
619   }
620 
Type()621   const string& Type() const { return type_; }
622 
SetType(const string & type)623   void SetType(const string &type) { type_ = type; }
624 
Properties()625   virtual uint64 Properties() const { return properties_; }
626 
Properties(uint64 mask)627   virtual uint64 Properties(uint64 mask) const { return properties_ & mask; }
628 
SetProperties(uint64 props)629   void SetProperties(uint64 props) {
630     properties_ &= kError;          // kError can't be cleared
631     properties_ |= props;
632   }
633 
SetProperties(uint64 props,uint64 mask)634   void SetProperties(uint64 props, uint64 mask) {
635     properties_ &= ~mask | kError;  // kError can't be cleared
636     properties_ |= props & mask;
637   }
638 
639   // Allows (only) setting error bit on const FST impls
SetProperties(uint64 props,uint64 mask)640   void SetProperties(uint64 props, uint64 mask) const {
641     if (mask != kError)
642       FSTERROR() << "FstImpl::SetProperties() const: can only set kError";
643     properties_ |= kError;
644   }
645 
InputSymbols()646   const SymbolTable* InputSymbols() const { return isymbols_; }
647 
OutputSymbols()648   const SymbolTable* OutputSymbols() const { return osymbols_; }
649 
InputSymbols()650   SymbolTable* InputSymbols() { return isymbols_; }
651 
OutputSymbols()652   SymbolTable* OutputSymbols() { return osymbols_; }
653 
SetInputSymbols(const SymbolTable * isyms)654   void SetInputSymbols(const SymbolTable* isyms) {
655     if (isymbols_) delete isymbols_;
656     isymbols_ = isyms ? isyms->Copy() : 0;
657   }
658 
SetOutputSymbols(const SymbolTable * osyms)659   void SetOutputSymbols(const SymbolTable* osyms) {
660     if (osymbols_) delete osymbols_;
661     osymbols_ = osyms ? osyms->Copy() : 0;
662   }
663 
RefCount()664   int RefCount() const {
665     return ref_count_.count();
666   }
667 
IncrRefCount()668   int IncrRefCount() {
669     return ref_count_.Incr();
670   }
671 
DecrRefCount()672   int DecrRefCount() {
673     return ref_count_.Decr();
674   }
675 
676   // Read-in header and symbols from input stream, initialize Fst, and
677   // return the header.  If opts.header is non-null, skip read-in and
678   // use the option value.  If opts.[io]symbols is non-null, read-in
679   // (if present), but use the option value.
680   bool ReadHeader(istream &strm, const FstReadOptions& opts,
681                   int min_version, FstHeader *hdr);
682 
683   // Write-out header and symbols from output stream.
684   // If a opts.header is false, skip writing header.
685   // If opts.[io]symbols is false, skip writing those symbols.
686   // This method is needed for Impl's that implement Write methods.
WriteHeader(ostream & strm,const FstWriteOptions & opts,int version,FstHeader * hdr)687   void WriteHeader(ostream &strm, const FstWriteOptions& opts,
688                    int version, FstHeader *hdr) const {
689     if (opts.write_header) {
690       hdr->SetFstType(type_);
691       hdr->SetArcType(A::Type());
692       hdr->SetVersion(version);
693       hdr->SetProperties(properties_);
694       int32 file_flags = 0;
695       if (isymbols_ && opts.write_isymbols)
696         file_flags |= FstHeader::HAS_ISYMBOLS;
697       if (osymbols_ && opts.write_osymbols)
698         file_flags |= FstHeader::HAS_OSYMBOLS;
699       if (opts.align)
700         file_flags |= FstHeader::IS_ALIGNED;
701       hdr->SetFlags(file_flags);
702       hdr->Write(strm, opts.source);
703     }
704     if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm);
705     if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm);
706   }
707 
708   // Write-out header and symbols to output stream.
709   // If a opts.header is false, skip writing header.
710   // If opts.[io]symbols is false, skip writing those symbols.
711   // type is the Fst type being written.
712   // This method is used in the cross-type serialization methods Fst::WriteFst.
WriteFstHeader(const Fst<A> & fst,ostream & strm,const FstWriteOptions & opts,int version,const string & type,uint64 properties,FstHeader * hdr)713   static void WriteFstHeader(const Fst<A> &fst, ostream &strm,
714                              const FstWriteOptions& opts, int version,
715                              const string &type, uint64 properties,
716                              FstHeader *hdr) {
717     if (opts.write_header) {
718       hdr->SetFstType(type);
719       hdr->SetArcType(A::Type());
720       hdr->SetVersion(version);
721       hdr->SetProperties(properties);
722       int32 file_flags = 0;
723       if (fst.InputSymbols() && opts.write_isymbols)
724         file_flags |= FstHeader::HAS_ISYMBOLS;
725       if (fst.OutputSymbols() && opts.write_osymbols)
726         file_flags |= FstHeader::HAS_OSYMBOLS;
727       if (opts.align)
728         file_flags |= FstHeader::IS_ALIGNED;
729       hdr->SetFlags(file_flags);
730       hdr->Write(strm, opts.source);
731     }
732     if (fst.InputSymbols() && opts.write_isymbols) {
733       fst.InputSymbols()->Write(strm);
734     }
735     if (fst.OutputSymbols() && opts.write_osymbols) {
736       fst.OutputSymbols()->Write(strm);
737     }
738   }
739 
740   // In serialization routines where the header cannot be written until after
741   // the machine has been serialized, this routine can be called to seek to
742   // the beginning of the file an rewrite the header with updated fields.
743   // It repositions the file pointer back at the end of the file.
744   // returns true on success, false on failure.
UpdateFstHeader(const Fst<A> & fst,ostream & strm,const FstWriteOptions & opts,int version,const string & type,uint64 properties,FstHeader * hdr,size_t header_offset)745   static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm,
746                               const FstWriteOptions& opts, int version,
747                               const string &type, uint64 properties,
748                               FstHeader *hdr, size_t header_offset) {
749     strm.seekp(header_offset);
750     if (!strm) {
751       LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
752       return false;
753     }
754     WriteFstHeader(fst, strm, opts, version, type, properties, hdr);
755     if (!strm) {
756       LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
757       return false;
758     }
759     strm.seekp(0, ios_base::end);
760     if (!strm) {
761       LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
762       return false;
763     }
764     return true;
765   }
766 
767  protected:
768   mutable uint64 properties_;           // Property bits
769 
770  private:
771   string type_;                 // Unique name of Fst class
772   SymbolTable *isymbols_;       // Ilabel symbol table
773   SymbolTable *osymbols_;       // Olabel symbol table
774   RefCounter ref_count_;        // Reference count
775 
776   void operator=(const FstImpl<A> &impl);  // disallow
777 };
778 
779 template <class A> inline
ReadHeader(istream & strm,const FstReadOptions & opts,int min_version,FstHeader * hdr)780 bool FstImpl<A>::ReadHeader(istream &strm, const FstReadOptions& opts,
781                             int min_version, FstHeader *hdr) {
782   if (opts.header)
783     *hdr = *opts.header;
784   else if (!hdr->Read(strm, opts.source))
785     return false;
786 
787   if (FLAGS_v >= 2) {
788     LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source
789               << ", fst_type: " << hdr->FstType()
790               << ", arc_type: " << A::Type()
791               << ", version: " << hdr->Version()
792               << ", flags: " << hdr->GetFlags();
793   }
794 
795   if (hdr->FstType() != type_) {
796     LOG(ERROR) << "FstImpl::ReadHeader: Fst not of type \"" << type_
797                << "\": " << opts.source;
798     return false;
799   }
800   if (hdr->ArcType() != A::Type()) {
801     LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type \"" << A::Type()
802                << "\": " << opts.source;
803     return false;
804   }
805   if (hdr->Version() < min_version) {
806     LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_
807                << " Fst version: " << opts.source;
808     return false;
809   }
810   properties_ = hdr->Properties();
811   if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS)
812     isymbols_ = SymbolTable::Read(strm, opts.source);
813   if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS)
814     osymbols_ =SymbolTable::Read(strm, opts.source);
815 
816   if (opts.isymbols) {
817     delete isymbols_;
818     isymbols_ = opts.isymbols->Copy();
819   }
820   if (opts.osymbols) {
821     delete osymbols_;
822     osymbols_ = opts.osymbols->Copy();
823   }
824   return true;
825 }
826 
827 
828 template<class Arc>
829 uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known);
830 
831 
832 // This is a helper class template useful for attaching an Fst interface to
833 // its implementation, handling reference counting.
834 template < class I, class F = Fst<typename I::Arc> >
835 class ImplToFst : public F {
836  public:
837   typedef typename I::Arc Arc;
838   typedef typename Arc::Weight Weight;
839   typedef typename Arc::StateId StateId;
840 
~ImplToFst()841   virtual ~ImplToFst() { if (!impl_->DecrRefCount()) delete impl_;  }
842 
Start()843   virtual StateId Start() const { return impl_->Start(); }
844 
Final(StateId s)845   virtual Weight Final(StateId s) const { return impl_->Final(s); }
846 
NumArcs(StateId s)847   virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); }
848 
NumInputEpsilons(StateId s)849   virtual size_t NumInputEpsilons(StateId s) const {
850     return impl_->NumInputEpsilons(s);
851   }
852 
NumOutputEpsilons(StateId s)853   virtual size_t NumOutputEpsilons(StateId s) const {
854     return impl_->NumOutputEpsilons(s);
855   }
856 
Properties(uint64 mask,bool test)857   virtual uint64 Properties(uint64 mask, bool test) const {
858     if (test) {
859       uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops);
860       impl_->SetProperties(testprops, knownprops);
861       return testprops & mask;
862     } else {
863       return impl_->Properties(mask);
864     }
865   }
866 
Type()867   virtual const string& Type() const { return impl_->Type(); }
868 
InputSymbols()869   virtual const SymbolTable* InputSymbols() const {
870     return impl_->InputSymbols();
871   }
872 
OutputSymbols()873   virtual const SymbolTable* OutputSymbols() const {
874     return impl_->OutputSymbols();
875   }
876 
877  protected:
ImplToFst()878   ImplToFst() : impl_(0) {}
879 
ImplToFst(I * impl)880   ImplToFst(I *impl) : impl_(impl) {}
881 
ImplToFst(const ImplToFst<I,F> & fst)882   ImplToFst(const ImplToFst<I, F> &fst) {
883     impl_ = fst.impl_;
884     impl_->IncrRefCount();
885   }
886 
887   // This constructor presumes there is a copy constructor for the
888   // implementation.
ImplToFst(const ImplToFst<I,F> & fst,bool safe)889   ImplToFst(const ImplToFst<I, F> &fst, bool safe) {
890     if (safe) {
891       impl_ = new I(*(fst.impl_));
892     } else {
893       impl_ = fst.impl_;
894       impl_->IncrRefCount();
895     }
896   }
897 
GetImpl()898   I *GetImpl() const { return impl_; }
899 
900   // Change Fst implementation pointer. If 'own_impl' is true,
901   // ownership of the input implementation is given to this
902   // object; otherwise, the input implementation's reference count
903   // should be incremented.
904   void SetImpl(I *impl, bool own_impl = true) {
905     if (!own_impl)
906       impl->IncrRefCount();
907     if (impl_ && !impl_->DecrRefCount()) delete impl_;
908     impl_ = impl;
909   }
910 
911  private:
912   // Disallow
913   ImplToFst<I, F> &operator=(const ImplToFst<I, F> &fst);
914 
915   ImplToFst<I, F> &operator=(const Fst<Arc> &fst) {
916     FSTERROR() << "ImplToFst: Assignment operator disallowed";
917     GetImpl()->SetProperties(kError, kError);
918     return *this;
919   }
920 
921   I *impl_;
922 };
923 
924 
925 // Converts FSTs by casting their implementations, where this makes
926 // sense (which excludes implementations with weight-dependent virtual
927 // methods). Must be a friend of the Fst classes involved (currently
928 // the concrete Fsts: VectorFst, ConstFst, CompactFst).
Cast(const F & ifst,G * ofst)929 template<class F, class G> void Cast(const F &ifst, G *ofst) {
930   ofst->SetImpl(reinterpret_cast<typename G::Impl *>(ifst.GetImpl()), false);
931 }
932 
933 // Fst Serialization
934 template <class A>
FstToString(const Fst<A> & fst,string * result)935 void FstToString(const Fst<A> &fst, string *result) {
936   ostringstream ostrm;
937   fst.Write(ostrm, FstWriteOptions("FstToString"));
938   *result = ostrm.str();
939 }
940 
941 template <class A>
StringToFst(const string & s)942 Fst<A> *StringToFst(const string &s) {
943   istringstream istrm(s);
944   return Fst<A>::Read(istrm, FstReadOptions("StringToFst"));
945 }
946 
947 }  // namespace fst
948 
949 #endif  // FST_LIB_FST_H__
950