1 // fst.h
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Finite-State Transducer (FST) - abstract base class definition,
20 // state and arc iterator interface, and suggested base implementation.
21 //
22
23 #ifndef FST_LIB_FST_H__
24 #define FST_LIB_FST_H__
25
26 #include <stddef.h>
27 #include <sys/types.h>
28 #include <cmath>
29 #include <string>
30
31 #include <fst/compat.h>
32 #include <fst/types.h>
33
34 #include <fst/arc.h>
35 #include <fst/properties.h>
36 #include <fst/register.h>
37 #include <iostream>
38 #include <fstream>
39 #include <sstream>
40 #include <fst/symbol-table.h>
41 #include <fst/util.h>
42
43
44 DECLARE_bool(fst_align);
45
46 namespace fst {
47
48 bool IsFstHeader(istream &, const string &);
49
50 class FstHeader;
51 template <class A> class StateIteratorData;
52 template <class A> class ArcIteratorData;
53 template <class A> class MatcherBase;
54
55 struct FstReadOptions {
56 // FileReadMode(s) are advisory, there are many conditions than prevent a
57 // file from being mapped, READ mode will be selected in these cases with
58 // a warning indicating why it was chosen.
59 enum FileReadMode { READ, MAP };
60
61 string source; // Where you're reading from
62 const FstHeader *header; // Pointer to Fst header. If non-zero, use
63 // this info (don't read a stream header)
64 const SymbolTable* isymbols; // Pointer to input symbols. If non-zero, use
65 // this info (read and skip stream isymbols)
66 const SymbolTable* osymbols; // Pointer to output symbols. If non-zero, use
67 // this info (read and skip stream osymbols)
68 FileReadMode mode; // Read or map files (advisory, if possible)
69
70 explicit FstReadOptions(const string& src = "<unspecified>",
71 const FstHeader *hdr = 0,
72 const SymbolTable* isym = 0,
73 const SymbolTable* osym = 0);
74
75 explicit FstReadOptions(const string& src,
76 const SymbolTable* isym,
77 const SymbolTable* osym = 0);
78
79 // Helper function to convert strings FileReadModes into their enum value.
80 static FileReadMode ReadMode(const string &mode);
81 };
82
83 struct FstWriteOptions {
84 string source; // Where you're writing to
85 bool write_header; // Write the header?
86 bool write_isymbols; // Write input symbols?
87 bool write_osymbols; // Write output symbols?
88 bool align; // Write data aligned where appropriate;
89 // this may fail on pipes
90
91 explicit FstWriteOptions(const string& src = "<unspecifed>",
92 bool hdr = true, bool isym = true,
93 bool osym = true, bool alig = FLAGS_fst_align)
sourceFstWriteOptions94 : source(src), write_header(hdr),
95 write_isymbols(isym), write_osymbols(osym), align(alig) {}
96 };
97
98 //
99 // Fst HEADER CLASS
100 //
101 // This is the recommended Fst file header representation.
102 //
103 class FstHeader {
104 public:
105 enum {
106 HAS_ISYMBOLS = 0x1, // Has input symbol table
107 HAS_OSYMBOLS = 0x2, // Has output symbol table
108 IS_ALIGNED = 0x4, // Memory-aligned (where appropriate)
109 } Flags;
110
FstHeader()111 FstHeader() : version_(0), flags_(0), properties_(0), start_(-1),
112 numstates_(0), numarcs_(0) {}
FstType()113 const string &FstType() const { return fsttype_; }
ArcType()114 const string &ArcType() const { return arctype_; }
Version()115 int32 Version() const { return version_; }
GetFlags()116 int32 GetFlags() const { return flags_; }
Properties()117 uint64 Properties() const { return properties_; }
Start()118 int64 Start() const { return start_; }
NumStates()119 int64 NumStates() const { return numstates_; }
NumArcs()120 int64 NumArcs() const { return numarcs_; }
121
SetFstType(const string & type)122 void SetFstType(const string& type) { fsttype_ = type; }
SetArcType(const string & type)123 void SetArcType(const string& type) { arctype_ = type; }
SetVersion(int32 version)124 void SetVersion(int32 version) { version_ = version; }
SetFlags(int32 flags)125 void SetFlags(int32 flags) { flags_ = flags; }
SetProperties(uint64 properties)126 void SetProperties(uint64 properties) { properties_ = properties; }
SetStart(int64 start)127 void SetStart(int64 start) { start_ = start; }
SetNumStates(int64 numstates)128 void SetNumStates(int64 numstates) { numstates_ = numstates; }
SetNumArcs(int64 numarcs)129 void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; }
130
131 bool Read(istream &strm, const string &source, bool rewind = false);
132 bool Write(ostream &strm, const string &source) const;
133
134 private:
135
136 string fsttype_; // E.g. "vector"
137 string arctype_; // E.g. "standard"
138 int32 version_; // Type version #
139 int32 flags_; // File format bits
140 uint64 properties_; // FST property bits
141 int64 start_; // Start state
142 int64 numstates_; // # of states
143 int64 numarcs_; // # of arcs
144 };
145
146
147 // Specifies matcher action.
148 enum MatchType { MATCH_INPUT, // Match input label.
149 MATCH_OUTPUT, // Match output label.
150 MATCH_BOTH, // Match input or output label.
151 MATCH_NONE, // Match nothing.
152 MATCH_UNKNOWN }; // Match type unknown.
153
154 //
155 // Fst INTERFACE CLASS DEFINITION
156 //
157
158 // A generic FST, templated on the arc definition, with
159 // common-demoninator methods (use StateIterator and ArcIterator to
160 // iterate over its states and arcs).
161 template <class A>
162 class Fst {
163 public:
164 typedef A Arc;
165 typedef typename A::Weight Weight;
166 typedef typename A::StateId StateId;
167
~Fst()168 virtual ~Fst() {}
169
170 virtual StateId Start() const = 0; // Initial state
171
172 virtual Weight Final(StateId) const = 0; // State's final weight
173
174 virtual size_t NumArcs(StateId) const = 0; // State's arc count
175
176 virtual size_t NumInputEpsilons(StateId)
177 const = 0; // State's input epsilon count
178
179 virtual size_t NumOutputEpsilons(StateId)
180 const = 0; // State's output epsilon count
181
182 // If test=false, return stored properties bits for mask (some poss. unknown)
183 // If test=true, return property bits for mask (computing o.w. unknown)
184 virtual uint64 Properties(uint64 mask, bool test)
185 const = 0; // Property bits
186
187 virtual const string& Type() const = 0; // Fst type name
188
189 // Get a copy of this Fst. The copying behaves as follows:
190 //
191 // (1) The copying is constant time if safe = false or if safe = true
192 // and is on an otherwise unaccessed Fst.
193 //
194 // (2) If safe = true, the copy is thread-safe in that the original
195 // and copy can be safely accessed (but not necessarily mutated) by
196 // separate threads. For some Fst types, 'Copy(true)' should only be
197 // called on an Fst that has not otherwise been accessed. Its behavior
198 // is undefined otherwise.
199 //
200 // (3) If a MutableFst is copied and then mutated, then the original is
201 // unmodified and vice versa (often by a copy-on-write on the initial
202 // mutation, which may not be constant time).
203 virtual Fst<A> *Copy(bool safe = false) const = 0;
204
205 // Read an Fst from an input stream; returns NULL on error
Read(istream & strm,const FstReadOptions & opts)206 static Fst<A> *Read(istream &strm, const FstReadOptions &opts) {
207 FstReadOptions ropts(opts);
208 FstHeader hdr;
209 if (ropts.header)
210 hdr = *opts.header;
211 else {
212 if (!hdr.Read(strm, opts.source))
213 return 0;
214 ropts.header = &hdr;
215 }
216 FstRegister<A> *registr = FstRegister<A>::GetRegister();
217 const typename FstRegister<A>::Reader reader =
218 registr->GetReader(hdr.FstType());
219 if (!reader) {
220 LOG(ERROR) << "Fst::Read: Unknown FST type \"" << hdr.FstType()
221 << "\" (arc type = \"" << A::Type()
222 << "\"): " << ropts.source;
223 return 0;
224 }
225 return reader(strm, ropts);
226 };
227
228 // Read an Fst from a file; return NULL on error
229 // Empty filename reads from standard input
Read(const string & filename)230 static Fst<A> *Read(const string &filename) {
231 if (!filename.empty()) {
232 ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
233 if (!strm) {
234 LOG(ERROR) << "Fst::Read: Can't open file: " << filename;
235 return 0;
236 }
237 return Read(strm, FstReadOptions(filename));
238 } else {
239 return Read(cin, FstReadOptions("standard input"));
240 }
241 }
242
243 // Write an Fst to an output stream; return false on error
Write(ostream & strm,const FstWriteOptions & opts)244 virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
245 LOG(ERROR) << "Fst::Write: No write stream method for " << Type()
246 << " Fst type";
247 return false;
248 }
249
250 // Write an Fst to a file; return false on error
251 // Empty filename writes to standard output
Write(const string & filename)252 virtual bool Write(const string &filename) const {
253 LOG(ERROR) << "Fst::Write: No write filename method for " << Type()
254 << " Fst type";
255 return false;
256 }
257
258 // Return input label symbol table; return NULL if not specified
259 virtual const SymbolTable* InputSymbols() const = 0;
260
261 // Return output label symbol table; return NULL if not specified
262 virtual const SymbolTable* OutputSymbols() const = 0;
263
264 // For generic state iterator construction; not normally called
265 // directly by users.
266 virtual void InitStateIterator(StateIteratorData<A> *) const = 0;
267
268 // For generic arc iterator construction; not normally called
269 // directly by users.
270 virtual void InitArcIterator(StateId s, ArcIteratorData<A> *) const = 0;
271
272 // For generic matcher construction; not normally called
273 // directly by users.
274 virtual MatcherBase<A> *InitMatcher(MatchType match_type) const;
275
276 protected:
WriteFile(const string & filename)277 bool WriteFile(const string &filename) const {
278 if (!filename.empty()) {
279 ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
280 if (!strm) {
281 LOG(ERROR) << "Fst::Write: Can't open file: " << filename;
282 return false;
283 }
284 return Write(strm, FstWriteOptions(filename));
285 } else {
286 return Write(cout, FstWriteOptions("standard output"));
287 }
288 }
289 };
290
291
292 //
293 // STATE and ARC ITERATOR DEFINITIONS
294 //
295
296 // State iterator interface templated on the Arc definition; used
297 // for StateIterator specializations returned by the InitStateIterator
298 // Fst method.
299 template <class A>
300 class StateIteratorBase {
301 public:
302 typedef A Arc;
303 typedef typename A::StateId StateId;
304
~StateIteratorBase()305 virtual ~StateIteratorBase() {}
306
Done()307 bool Done() const { return Done_(); } // End of iterator?
Value()308 StateId Value() const { return Value_(); } // Current state (when !Done)
Next()309 void Next() { Next_(); } // Advance to next state (when !Done)
Reset()310 void Reset() { Reset_(); } // Return to initial condition
311
312 private:
313 // This allows base class virtual access to non-virtual derived-
314 // class members of the same name. It makes the derived class more
315 // efficient to use but unsafe to further derive.
316 virtual bool Done_() const = 0;
317 virtual StateId Value_() const = 0;
318 virtual void Next_() = 0;
319 virtual void Reset_() = 0;
320 };
321
322
323 // StateIterator initialization data
324
325 template <class A> struct StateIteratorData {
326 StateIteratorBase<A> *base; // Specialized iterator if non-zero
327 typename A::StateId nstates; // O.w. total # of states
328 };
329
330
331 // Generic state iterator, templated on the FST definition
332 // - a wrapper around pointer to specific one.
333 // Here is a typical use: \code
334 // for (StateIterator<StdFst> siter(fst);
335 // !siter.Done();
336 // siter.Next()) {
337 // StateId s = siter.Value();
338 // ...
339 // } \endcode
340 template <class F>
341 class StateIterator {
342 public:
343 typedef F FST;
344 typedef typename F::Arc Arc;
345 typedef typename Arc::StateId StateId;
346
StateIterator(const F & fst)347 explicit StateIterator(const F &fst) : s_(0) {
348 fst.InitStateIterator(&data_);
349 }
350
~StateIterator()351 ~StateIterator() { if (data_.base) delete data_.base; }
352
Done()353 bool Done() const {
354 return data_.base ? data_.base->Done() : s_ >= data_.nstates;
355 }
356
Value()357 StateId Value() const { return data_.base ? data_.base->Value() : s_; }
358
Next()359 void Next() {
360 if (data_.base)
361 data_.base->Next();
362 else
363 ++s_;
364 }
365
Reset()366 void Reset() {
367 if (data_.base)
368 data_.base->Reset();
369 else
370 s_ = 0;
371 }
372
373 private:
374 StateIteratorData<Arc> data_;
375 StateId s_;
376
377 DISALLOW_COPY_AND_ASSIGN(StateIterator);
378 };
379
380
381 // Flags to control the behavior on an arc iterator:
382 static const uint32 kArcILabelValue = 0x0001; // Value() gives valid ilabel
383 static const uint32 kArcOLabelValue = 0x0002; // " " " olabel
384 static const uint32 kArcWeightValue = 0x0004; // " " " weight
385 static const uint32 kArcNextStateValue = 0x0008; // " " " nextstate
386 static const uint32 kArcNoCache = 0x0010; // No need to cache arcs
387
388 static const uint32 kArcValueFlags =
389 kArcILabelValue | kArcOLabelValue |
390 kArcWeightValue | kArcNextStateValue;
391
392 static const uint32 kArcFlags = kArcValueFlags | kArcNoCache;
393
394
395 // Arc iterator interface, templated on the Arc definition; used
396 // for Arc iterator specializations that are returned by the InitArcIterator
397 // Fst method.
398 template <class A>
399 class ArcIteratorBase {
400 public:
401 typedef A Arc;
402 typedef typename A::StateId StateId;
403
~ArcIteratorBase()404 virtual ~ArcIteratorBase() {}
405
Done()406 bool Done() const { return Done_(); } // End of iterator?
Value()407 const A& Value() const { return Value_(); } // Current arc (when !Done)
Next()408 void Next() { Next_(); } // Advance to next arc (when !Done)
Position()409 size_t Position() const { return Position_(); } // Return current position
Reset()410 void Reset() { Reset_(); } // Return to initial condition
Seek(size_t a)411 void Seek(size_t a) { Seek_(a); } // Random arc access by position
Flags()412 uint32 Flags() const { return Flags_(); } // Return current behavorial flags
SetFlags(uint32 flags,uint32 mask)413 void SetFlags(uint32 flags, uint32 mask) { // Set behavorial flags
414 SetFlags_(flags, mask);
415 }
416
417 private:
418 // This allows base class virtual access to non-virtual derived-
419 // class members of the same name. It makes the derived class more
420 // efficient to use but unsafe to further derive.
421 virtual bool Done_() const = 0;
422 virtual const A& Value_() const = 0;
423 virtual void Next_() = 0;
424 virtual size_t Position_() const = 0;
425 virtual void Reset_() = 0;
426 virtual void Seek_(size_t a) = 0;
427 virtual uint32 Flags_() const = 0;
428 virtual void SetFlags_(uint32 flags, uint32 mask) = 0;
429 };
430
431
432 // ArcIterator initialization data
433 template <class A> struct ArcIteratorData {
434 ArcIteratorBase<A> *base; // Specialized iterator if non-zero
435 const A *arcs; // O.w. arcs pointer
436 size_t narcs; // ... and arc count
437 int *ref_count; // ... and reference count if non-zero
438 };
439
440
441 // Generic arc iterator, templated on the FST definition
442 // - a wrapper around pointer to specific one.
443 // Here is a typical use: \code
444 // for (ArcIterator<StdFst> aiter(fst, s));
445 // !aiter.Done();
446 // aiter.Next()) {
447 // StdArc &arc = aiter.Value();
448 // ...
449 // } \endcode
450 template <class F>
451 class ArcIterator {
452 public:
453 typedef F FST;
454 typedef typename F::Arc Arc;
455 typedef typename Arc::StateId StateId;
456
ArcIterator(const F & fst,StateId s)457 ArcIterator(const F &fst, StateId s) : i_(0) {
458 fst.InitArcIterator(s, &data_);
459 }
460
ArcIterator(const ArcIteratorData<Arc> & data)461 explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) {
462 if (data_.ref_count)
463 ++(*data_.ref_count);
464 }
465
~ArcIterator()466 ~ArcIterator() {
467 if (data_.base)
468 delete data_.base;
469 else if (data_.ref_count)
470 --(*data_.ref_count);
471 }
472
Done()473 bool Done() const {
474 return data_.base ? data_.base->Done() : i_ >= data_.narcs;
475 }
476
Value()477 const Arc& Value() const {
478 return data_.base ? data_.base->Value() : data_.arcs[i_];
479 }
480
Next()481 void Next() {
482 if (data_.base)
483 data_.base->Next();
484 else
485 ++i_;
486 }
487
Reset()488 void Reset() {
489 if (data_.base)
490 data_.base->Reset();
491 else
492 i_ = 0;
493 }
494
Seek(size_t a)495 void Seek(size_t a) {
496 if (data_.base)
497 data_.base->Seek(a);
498 else
499 i_ = a;
500 }
501
Position()502 size_t Position() const {
503 return data_.base ? data_.base->Position() : i_;
504 }
505
Flags()506 uint32 Flags() const {
507 if (data_.base)
508 return data_.base->Flags();
509 else
510 return kArcValueFlags;
511 }
512
SetFlags(uint32 flags,uint32 mask)513 void SetFlags(uint32 flags, uint32 mask) {
514 if (data_.base)
515 data_.base->SetFlags(flags, mask);
516 }
517
518 private:
519 ArcIteratorData<Arc> data_;
520 size_t i_;
521 DISALLOW_COPY_AND_ASSIGN(ArcIterator);
522 };
523
524 //
525 // MATCHER DEFINITIONS
526 //
527
528 template <class A>
InitMatcher(MatchType match_type)529 MatcherBase<A> *Fst<A>::InitMatcher(MatchType match_type) const {
530 return 0; // Use the default matcher
531 }
532
533
534 //
535 // FST ACCESSORS - Useful functions in high-performance cases.
536 //
537
538 namespace internal {
539
540 // General case - requires non-abstract, 'final' methods. Use for inlining.
541 template <class F> inline
Final(const F & fst,typename F::Arc::StateId s)542 typename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) {
543 return fst.F::Final(s);
544 }
545
546 template <class F> inline
NumArcs(const F & fst,typename F::Arc::StateId s)547 ssize_t NumArcs(const F &fst, typename F::Arc::StateId s) {
548 return fst.F::NumArcs(s);
549 }
550
551 template <class F> inline
NumInputEpsilons(const F & fst,typename F::Arc::StateId s)552 ssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) {
553 return fst.F::NumInputEpsilons(s);
554 }
555
556 template <class F> inline
NumOutputEpsilons(const F & fst,typename F::Arc::StateId s)557 ssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) {
558 return fst.F::NumOutputEpsilons(s);
559 }
560
561
562 // Fst<A> case - abstract methods.
563 template <class A> inline
Final(const Fst<A> & fst,typename A::StateId s)564 typename A::Weight Final(const Fst<A> &fst, typename A::StateId s) {
565 return fst.Final(s);
566 }
567
568 template <class A> inline
NumArcs(const Fst<A> & fst,typename A::StateId s)569 ssize_t NumArcs(const Fst<A> &fst, typename A::StateId s) {
570 return fst.NumArcs(s);
571 }
572
573 template <class A> inline
NumInputEpsilons(const Fst<A> & fst,typename A::StateId s)574 ssize_t NumInputEpsilons(const Fst<A> &fst, typename A::StateId s) {
575 return fst.NumInputEpsilons(s);
576 }
577
578 template <class A> inline
NumOutputEpsilons(const Fst<A> & fst,typename A::StateId s)579 ssize_t NumOutputEpsilons(const Fst<A> &fst, typename A::StateId s) {
580 return fst.NumOutputEpsilons(s);
581 }
582
583 } // namespace internal
584
585 // A useful alias when using StdArc.
586 typedef Fst<StdArc> StdFst;
587
588
589 //
590 // CONSTANT DEFINITIONS
591 //
592
593 const int kNoStateId = -1; // Not a valid state ID
594 const int kNoLabel = -1; // Not a valid label
595
596 //
597 // Fst IMPLEMENTATION BASE
598 //
599 // This is the recommended Fst implementation base class. It will
600 // handle reference counts, property bits, type information and symbols.
601 //
602
603 template <class A> class FstImpl {
604 public:
605 typedef typename A::Weight Weight;
606 typedef typename A::StateId StateId;
607
FstImpl()608 FstImpl()
609 : properties_(0), type_("null"), isymbols_(0), osymbols_(0) {}
610
FstImpl(const FstImpl<A> & impl)611 FstImpl(const FstImpl<A> &impl)
612 : properties_(impl.properties_), type_(impl.type_),
613 isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : 0),
614 osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : 0) {}
615
~FstImpl()616 virtual ~FstImpl() {
617 delete isymbols_;
618 delete osymbols_;
619 }
620
Type()621 const string& Type() const { return type_; }
622
SetType(const string & type)623 void SetType(const string &type) { type_ = type; }
624
Properties()625 virtual uint64 Properties() const { return properties_; }
626
Properties(uint64 mask)627 virtual uint64 Properties(uint64 mask) const { return properties_ & mask; }
628
SetProperties(uint64 props)629 void SetProperties(uint64 props) {
630 properties_ &= kError; // kError can't be cleared
631 properties_ |= props;
632 }
633
SetProperties(uint64 props,uint64 mask)634 void SetProperties(uint64 props, uint64 mask) {
635 properties_ &= ~mask | kError; // kError can't be cleared
636 properties_ |= props & mask;
637 }
638
639 // Allows (only) setting error bit on const FST impls
SetProperties(uint64 props,uint64 mask)640 void SetProperties(uint64 props, uint64 mask) const {
641 if (mask != kError)
642 FSTERROR() << "FstImpl::SetProperties() const: can only set kError";
643 properties_ |= kError;
644 }
645
InputSymbols()646 const SymbolTable* InputSymbols() const { return isymbols_; }
647
OutputSymbols()648 const SymbolTable* OutputSymbols() const { return osymbols_; }
649
InputSymbols()650 SymbolTable* InputSymbols() { return isymbols_; }
651
OutputSymbols()652 SymbolTable* OutputSymbols() { return osymbols_; }
653
SetInputSymbols(const SymbolTable * isyms)654 void SetInputSymbols(const SymbolTable* isyms) {
655 if (isymbols_) delete isymbols_;
656 isymbols_ = isyms ? isyms->Copy() : 0;
657 }
658
SetOutputSymbols(const SymbolTable * osyms)659 void SetOutputSymbols(const SymbolTable* osyms) {
660 if (osymbols_) delete osymbols_;
661 osymbols_ = osyms ? osyms->Copy() : 0;
662 }
663
RefCount()664 int RefCount() const {
665 return ref_count_.count();
666 }
667
IncrRefCount()668 int IncrRefCount() {
669 return ref_count_.Incr();
670 }
671
DecrRefCount()672 int DecrRefCount() {
673 return ref_count_.Decr();
674 }
675
676 // Read-in header and symbols from input stream, initialize Fst, and
677 // return the header. If opts.header is non-null, skip read-in and
678 // use the option value. If opts.[io]symbols is non-null, read-in
679 // (if present), but use the option value.
680 bool ReadHeader(istream &strm, const FstReadOptions& opts,
681 int min_version, FstHeader *hdr);
682
683 // Write-out header and symbols from output stream.
684 // If a opts.header is false, skip writing header.
685 // If opts.[io]symbols is false, skip writing those symbols.
686 // This method is needed for Impl's that implement Write methods.
WriteHeader(ostream & strm,const FstWriteOptions & opts,int version,FstHeader * hdr)687 void WriteHeader(ostream &strm, const FstWriteOptions& opts,
688 int version, FstHeader *hdr) const {
689 if (opts.write_header) {
690 hdr->SetFstType(type_);
691 hdr->SetArcType(A::Type());
692 hdr->SetVersion(version);
693 hdr->SetProperties(properties_);
694 int32 file_flags = 0;
695 if (isymbols_ && opts.write_isymbols)
696 file_flags |= FstHeader::HAS_ISYMBOLS;
697 if (osymbols_ && opts.write_osymbols)
698 file_flags |= FstHeader::HAS_OSYMBOLS;
699 if (opts.align)
700 file_flags |= FstHeader::IS_ALIGNED;
701 hdr->SetFlags(file_flags);
702 hdr->Write(strm, opts.source);
703 }
704 if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm);
705 if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm);
706 }
707
708 // Write-out header and symbols to output stream.
709 // If a opts.header is false, skip writing header.
710 // If opts.[io]symbols is false, skip writing those symbols.
711 // type is the Fst type being written.
712 // This method is used in the cross-type serialization methods Fst::WriteFst.
WriteFstHeader(const Fst<A> & fst,ostream & strm,const FstWriteOptions & opts,int version,const string & type,uint64 properties,FstHeader * hdr)713 static void WriteFstHeader(const Fst<A> &fst, ostream &strm,
714 const FstWriteOptions& opts, int version,
715 const string &type, uint64 properties,
716 FstHeader *hdr) {
717 if (opts.write_header) {
718 hdr->SetFstType(type);
719 hdr->SetArcType(A::Type());
720 hdr->SetVersion(version);
721 hdr->SetProperties(properties);
722 int32 file_flags = 0;
723 if (fst.InputSymbols() && opts.write_isymbols)
724 file_flags |= FstHeader::HAS_ISYMBOLS;
725 if (fst.OutputSymbols() && opts.write_osymbols)
726 file_flags |= FstHeader::HAS_OSYMBOLS;
727 if (opts.align)
728 file_flags |= FstHeader::IS_ALIGNED;
729 hdr->SetFlags(file_flags);
730 hdr->Write(strm, opts.source);
731 }
732 if (fst.InputSymbols() && opts.write_isymbols) {
733 fst.InputSymbols()->Write(strm);
734 }
735 if (fst.OutputSymbols() && opts.write_osymbols) {
736 fst.OutputSymbols()->Write(strm);
737 }
738 }
739
740 // In serialization routines where the header cannot be written until after
741 // the machine has been serialized, this routine can be called to seek to
742 // the beginning of the file an rewrite the header with updated fields.
743 // It repositions the file pointer back at the end of the file.
744 // returns true on success, false on failure.
UpdateFstHeader(const Fst<A> & fst,ostream & strm,const FstWriteOptions & opts,int version,const string & type,uint64 properties,FstHeader * hdr,size_t header_offset)745 static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm,
746 const FstWriteOptions& opts, int version,
747 const string &type, uint64 properties,
748 FstHeader *hdr, size_t header_offset) {
749 strm.seekp(header_offset);
750 if (!strm) {
751 LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
752 return false;
753 }
754 WriteFstHeader(fst, strm, opts, version, type, properties, hdr);
755 if (!strm) {
756 LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
757 return false;
758 }
759 strm.seekp(0, ios_base::end);
760 if (!strm) {
761 LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
762 return false;
763 }
764 return true;
765 }
766
767 protected:
768 mutable uint64 properties_; // Property bits
769
770 private:
771 string type_; // Unique name of Fst class
772 SymbolTable *isymbols_; // Ilabel symbol table
773 SymbolTable *osymbols_; // Olabel symbol table
774 RefCounter ref_count_; // Reference count
775
776 void operator=(const FstImpl<A> &impl); // disallow
777 };
778
779 template <class A> inline
ReadHeader(istream & strm,const FstReadOptions & opts,int min_version,FstHeader * hdr)780 bool FstImpl<A>::ReadHeader(istream &strm, const FstReadOptions& opts,
781 int min_version, FstHeader *hdr) {
782 if (opts.header)
783 *hdr = *opts.header;
784 else if (!hdr->Read(strm, opts.source))
785 return false;
786
787 if (FLAGS_v >= 2) {
788 LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source
789 << ", fst_type: " << hdr->FstType()
790 << ", arc_type: " << A::Type()
791 << ", version: " << hdr->Version()
792 << ", flags: " << hdr->GetFlags();
793 }
794
795 if (hdr->FstType() != type_) {
796 LOG(ERROR) << "FstImpl::ReadHeader: Fst not of type \"" << type_
797 << "\": " << opts.source;
798 return false;
799 }
800 if (hdr->ArcType() != A::Type()) {
801 LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type \"" << A::Type()
802 << "\": " << opts.source;
803 return false;
804 }
805 if (hdr->Version() < min_version) {
806 LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_
807 << " Fst version: " << opts.source;
808 return false;
809 }
810 properties_ = hdr->Properties();
811 if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS)
812 isymbols_ = SymbolTable::Read(strm, opts.source);
813 if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS)
814 osymbols_ =SymbolTable::Read(strm, opts.source);
815
816 if (opts.isymbols) {
817 delete isymbols_;
818 isymbols_ = opts.isymbols->Copy();
819 }
820 if (opts.osymbols) {
821 delete osymbols_;
822 osymbols_ = opts.osymbols->Copy();
823 }
824 return true;
825 }
826
827
828 template<class Arc>
829 uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known);
830
831
832 // This is a helper class template useful for attaching an Fst interface to
833 // its implementation, handling reference counting.
834 template < class I, class F = Fst<typename I::Arc> >
835 class ImplToFst : public F {
836 public:
837 typedef typename I::Arc Arc;
838 typedef typename Arc::Weight Weight;
839 typedef typename Arc::StateId StateId;
840
~ImplToFst()841 virtual ~ImplToFst() { if (!impl_->DecrRefCount()) delete impl_; }
842
Start()843 virtual StateId Start() const { return impl_->Start(); }
844
Final(StateId s)845 virtual Weight Final(StateId s) const { return impl_->Final(s); }
846
NumArcs(StateId s)847 virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); }
848
NumInputEpsilons(StateId s)849 virtual size_t NumInputEpsilons(StateId s) const {
850 return impl_->NumInputEpsilons(s);
851 }
852
NumOutputEpsilons(StateId s)853 virtual size_t NumOutputEpsilons(StateId s) const {
854 return impl_->NumOutputEpsilons(s);
855 }
856
Properties(uint64 mask,bool test)857 virtual uint64 Properties(uint64 mask, bool test) const {
858 if (test) {
859 uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops);
860 impl_->SetProperties(testprops, knownprops);
861 return testprops & mask;
862 } else {
863 return impl_->Properties(mask);
864 }
865 }
866
Type()867 virtual const string& Type() const { return impl_->Type(); }
868
InputSymbols()869 virtual const SymbolTable* InputSymbols() const {
870 return impl_->InputSymbols();
871 }
872
OutputSymbols()873 virtual const SymbolTable* OutputSymbols() const {
874 return impl_->OutputSymbols();
875 }
876
877 protected:
ImplToFst()878 ImplToFst() : impl_(0) {}
879
ImplToFst(I * impl)880 ImplToFst(I *impl) : impl_(impl) {}
881
ImplToFst(const ImplToFst<I,F> & fst)882 ImplToFst(const ImplToFst<I, F> &fst) {
883 impl_ = fst.impl_;
884 impl_->IncrRefCount();
885 }
886
887 // This constructor presumes there is a copy constructor for the
888 // implementation.
ImplToFst(const ImplToFst<I,F> & fst,bool safe)889 ImplToFst(const ImplToFst<I, F> &fst, bool safe) {
890 if (safe) {
891 impl_ = new I(*(fst.impl_));
892 } else {
893 impl_ = fst.impl_;
894 impl_->IncrRefCount();
895 }
896 }
897
GetImpl()898 I *GetImpl() const { return impl_; }
899
900 // Change Fst implementation pointer. If 'own_impl' is true,
901 // ownership of the input implementation is given to this
902 // object; otherwise, the input implementation's reference count
903 // should be incremented.
904 void SetImpl(I *impl, bool own_impl = true) {
905 if (!own_impl)
906 impl->IncrRefCount();
907 if (impl_ && !impl_->DecrRefCount()) delete impl_;
908 impl_ = impl;
909 }
910
911 private:
912 // Disallow
913 ImplToFst<I, F> &operator=(const ImplToFst<I, F> &fst);
914
915 ImplToFst<I, F> &operator=(const Fst<Arc> &fst) {
916 FSTERROR() << "ImplToFst: Assignment operator disallowed";
917 GetImpl()->SetProperties(kError, kError);
918 return *this;
919 }
920
921 I *impl_;
922 };
923
924
925 // Converts FSTs by casting their implementations, where this makes
926 // sense (which excludes implementations with weight-dependent virtual
927 // methods). Must be a friend of the Fst classes involved (currently
928 // the concrete Fsts: VectorFst, ConstFst, CompactFst).
Cast(const F & ifst,G * ofst)929 template<class F, class G> void Cast(const F &ifst, G *ofst) {
930 ofst->SetImpl(reinterpret_cast<typename G::Impl *>(ifst.GetImpl()), false);
931 }
932
933 // Fst Serialization
934 template <class A>
FstToString(const Fst<A> & fst,string * result)935 void FstToString(const Fst<A> &fst, string *result) {
936 ostringstream ostrm;
937 fst.Write(ostrm, FstWriteOptions("FstToString"));
938 *result = ostrm.str();
939 }
940
941 template <class A>
StringToFst(const string & s)942 Fst<A> *StringToFst(const string &s) {
943 istringstream istrm(s);
944 return Fst<A>::Read(istrm, FstReadOptions("StringToFst"));
945 }
946
947 } // namespace fst
948
949 #endif // FST_LIB_FST_H__
950