1 // far.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Finite-State Transducer (FST) archive classes.
20 //
21 
22 #ifndef FST_EXTENSIONS_FAR_FAR_H__
23 #define FST_EXTENSIONS_FAR_FAR_H__
24 
25 #include <fst/extensions/far/stlist.h>
26 #include <fst/extensions/far/sttable.h>
27 #include <fst/fst.h>
28 #include <fst/vector-fst.h>
29 
30 namespace fst {
31 
32 enum FarEntryType { FET_LINE, FET_FILE };
33 enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
34 
IsFst(const string & filename)35 inline bool IsFst(const string &filename) {
36   ifstream strm(filename.c_str());
37   if (!strm)
38     return false;
39   return IsFstHeader(strm, filename);
40 }
41 
42 // FST archive header class
43 class FarHeader {
44  public:
FarType()45   const string &FarType() const { return fartype_; }
ArcType()46   const string &ArcType() const { return arctype_; }
47 
Read(const string & filename)48   bool Read(const string &filename) {
49     FstHeader fsthdr;
50     if (filename.empty()) {
51       // Header reading unsupported on stdin. Assumes STList and StdArc.
52       fartype_ = "stlist";
53       arctype_ = "standard";
54       return true;
55     } else if (IsSTTable(filename)) {  // Check if STTable
56       ReadSTTableHeader(filename, &fsthdr);
57       fartype_ = "sttable";
58       arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
59       return true;
60     } else if (IsSTList(filename)) {  // Check if STList
61       ReadSTListHeader(filename, &fsthdr);
62       fartype_ = "sttable";
63       arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
64       return true;
65     } else if (IsFst(filename)) {  // Check if Fst
66       ifstream istrm(filename.c_str());
67       fsthdr.Read(istrm, filename);
68       fartype_ = "fst";
69       arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
70       return true;
71     }
72     return false;
73   }
74 
75  private:
76   string fartype_;
77   string arctype_;
78 };
79 
80 enum FarType {
81   FAR_DEFAULT = 0,
82   FAR_STTABLE = 1,
83   FAR_STLIST = 2,
84   FAR_FST = 3,
85 };
86 
87 // This class creates an archive of FSTs.
88 template <class A>
89 class FarWriter {
90  public:
91   typedef A Arc;
92 
93   // Creates a new (empty) FST archive; returns NULL on error.
94   static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT);
95 
96   // Adds an FST to the end of an archive. Keys must be non-empty and
97   // in lexicographic order. FSTs must have a suitable write method.
98   virtual void Add(const string &key, const Fst<A> &fst) = 0;
99 
100   virtual FarType Type() const = 0;
101 
102   virtual bool Error() const = 0;
103 
~FarWriter()104   virtual ~FarWriter() {}
105 
106  protected:
FarWriter()107   FarWriter() {}
108 
109  private:
110   DISALLOW_COPY_AND_ASSIGN(FarWriter);
111 };
112 
113 
114 // This class iterates through an existing archive of FSTs.
115 template <class A>
116 class FarReader {
117  public:
118  typedef A Arc;
119 
120   // Opens an existing FST archive in a single file; returns NULL on error.
121   // Sets current position to the beginning of the achive.
122   static FarReader *Open(const string &filename);
123 
124   // Opens an existing FST archive in multiple files; returns NULL on error.
125   // Sets current position to the beginning of the achive.
126   static FarReader *Open(const vector<string> &filenames);
127 
128   // Resets current posision to beginning of archive.
129   virtual void Reset() = 0;
130 
131   // Sets current position to first entry >= key.  Returns true if a match.
132   virtual bool Find(const string &key) = 0;
133 
134   // Current position at end of archive?
135   virtual bool Done() const = 0;
136 
137   // Move current position to next FST.
138   virtual void Next() = 0;
139 
140   // Returns key at the current position. This reference is invalidated if
141   // the current position in the archive is changed.
142   virtual const string &GetKey() const = 0;
143 
144   // Returns FST at the current position. This reference is invalidated if
145   // the current position in the archive is changed.
146   virtual const Fst<A> &GetFst() const = 0;
147 
148   virtual FarType Type() const = 0;
149 
150   virtual bool Error() const = 0;
151 
~FarReader()152   virtual ~FarReader() {}
153 
154  protected:
FarReader()155   FarReader() {}
156 
157  private:
158   DISALLOW_COPY_AND_ASSIGN(FarReader);
159 };
160 
161 
162 template <class A>
163 class FstWriter {
164  public:
operator()165   void operator()(ostream &strm, const Fst<A> &fst) const {
166     fst.Write(strm, FstWriteOptions());
167   }
168 };
169 
170 
171 template <class A>
172 class STTableFarWriter : public FarWriter<A> {
173  public:
174   typedef A Arc;
175 
Create(const string & filename)176   static STTableFarWriter *Create(const string &filename) {
177     STTableWriter<Fst<A>, FstWriter<A> > *writer =
178         STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
179     return new STTableFarWriter(writer);
180   }
181 
Add(const string & key,const Fst<A> & fst)182   void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
183 
Type()184   FarType Type() const { return FAR_STTABLE; }
185 
Error()186   bool Error() const { return writer_->Error(); }
187 
~STTableFarWriter()188   ~STTableFarWriter() { delete writer_; }
189 
190  private:
STTableFarWriter(STTableWriter<Fst<A>,FstWriter<A>> * writer)191   explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer)
192       : writer_(writer) {}
193 
194  private:
195   STTableWriter<Fst<A>, FstWriter<A> > *writer_;
196 
197   DISALLOW_COPY_AND_ASSIGN(STTableFarWriter);
198 };
199 
200 
201 template <class A>
202 class STListFarWriter : public FarWriter<A> {
203  public:
204   typedef A Arc;
205 
Create(const string & filename)206   static STListFarWriter *Create(const string &filename) {
207     STListWriter<Fst<A>, FstWriter<A> > *writer =
208         STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
209     return new STListFarWriter(writer);
210   }
211 
Add(const string & key,const Fst<A> & fst)212   void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
213 
Type()214   FarType Type() const { return FAR_STLIST; }
215 
Error()216   bool Error() const { return writer_->Error(); }
217 
~STListFarWriter()218   ~STListFarWriter() { delete writer_; }
219 
220  private:
STListFarWriter(STListWriter<Fst<A>,FstWriter<A>> * writer)221   explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer)
222       : writer_(writer) {}
223 
224  private:
225   STListWriter<Fst<A>, FstWriter<A> > *writer_;
226 
227   DISALLOW_COPY_AND_ASSIGN(STListFarWriter);
228 };
229 
230 
231 template <class A>
232 class FstFarWriter : public FarWriter<A> {
233  public:
234   typedef A Arc;
235 
FstFarWriter(const string & filename)236   explicit FstFarWriter(const string &filename)
237       : filename_(filename), error_(false), written_(false) {}
238 
Create(const string & filename)239   static FstFarWriter *Create(const string &filename) {
240     return new FstFarWriter(filename);
241   }
242 
Add(const string & key,const Fst<A> & fst)243   void Add(const string &key, const Fst<A> &fst) {
244     if (written_) {
245       LOG(WARNING) << "FstFarWriter::Add: only one Fst supported,"
246                  << " subsequent entries discarded.";
247     } else {
248       error_ = !fst.Write(filename_);
249       written_ = true;
250     }
251   }
252 
Type()253   FarType Type() const { return FAR_FST; }
254 
Error()255   bool Error() const { return error_; }
256 
~FstFarWriter()257   ~FstFarWriter() {}
258 
259  private:
260   string filename_;
261   bool error_;
262   bool written_;
263 
264   DISALLOW_COPY_AND_ASSIGN(FstFarWriter);
265 };
266 
267 
268 template <class A>
Create(const string & filename,FarType type)269 FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
270   switch(type) {
271     case FAR_DEFAULT:
272       if (filename.empty())
273         return STListFarWriter<A>::Create(filename);
274     case FAR_STTABLE:
275       return STTableFarWriter<A>::Create(filename);
276     case FAR_STLIST:
277       return STListFarWriter<A>::Create(filename);
278     case FAR_FST:
279       return FstFarWriter<A>::Create(filename);
280     default:
281       LOG(ERROR) << "FarWriter::Create: unknown far type";
282       return 0;
283   }
284 }
285 
286 
287 template <class A>
288 class FstReader {
289  public:
operator()290   Fst<A> *operator()(istream &strm) const {
291     return Fst<A>::Read(strm, FstReadOptions());
292   }
293 };
294 
295 
296 template <class A>
297 class STTableFarReader : public FarReader<A> {
298  public:
299   typedef A Arc;
300 
Open(const string & filename)301   static STTableFarReader *Open(const string &filename) {
302     STTableReader<Fst<A>, FstReader<A> > *reader =
303         STTableReader<Fst<A>, FstReader<A> >::Open(filename);
304     // TODO: error check
305     return new STTableFarReader(reader);
306   }
307 
Open(const vector<string> & filenames)308   static STTableFarReader *Open(const vector<string> &filenames) {
309     STTableReader<Fst<A>, FstReader<A> > *reader =
310         STTableReader<Fst<A>, FstReader<A> >::Open(filenames);
311     // TODO: error check
312     return new STTableFarReader(reader);
313   }
314 
Reset()315   void Reset() { reader_->Reset(); }
316 
Find(const string & key)317   bool Find(const string &key) { return reader_->Find(key); }
318 
Done()319   bool Done() const { return reader_->Done(); }
320 
Next()321   void Next() { return reader_->Next(); }
322 
GetKey()323   const string &GetKey() const { return reader_->GetKey(); }
324 
GetFst()325   const Fst<A> &GetFst() const { return reader_->GetEntry(); }
326 
Type()327   FarType Type() const { return FAR_STTABLE; }
328 
Error()329   bool Error() const { return reader_->Error(); }
330 
~STTableFarReader()331   ~STTableFarReader() { delete reader_; }
332 
333  private:
STTableFarReader(STTableReader<Fst<A>,FstReader<A>> * reader)334   explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader)
335       : reader_(reader) {}
336 
337  private:
338   STTableReader<Fst<A>, FstReader<A> > *reader_;
339 
340   DISALLOW_COPY_AND_ASSIGN(STTableFarReader);
341 };
342 
343 
344 template <class A>
345 class STListFarReader : public FarReader<A> {
346  public:
347   typedef A Arc;
348 
Open(const string & filename)349   static STListFarReader *Open(const string &filename) {
350     STListReader<Fst<A>, FstReader<A> > *reader =
351         STListReader<Fst<A>, FstReader<A> >::Open(filename);
352     // TODO: error check
353     return new STListFarReader(reader);
354   }
355 
Open(const vector<string> & filenames)356   static STListFarReader *Open(const vector<string> &filenames) {
357     STListReader<Fst<A>, FstReader<A> > *reader =
358         STListReader<Fst<A>, FstReader<A> >::Open(filenames);
359     // TODO: error check
360     return new STListFarReader(reader);
361   }
362 
Reset()363   void Reset() { reader_->Reset(); }
364 
Find(const string & key)365   bool Find(const string &key) { return reader_->Find(key); }
366 
Done()367   bool Done() const { return reader_->Done(); }
368 
Next()369   void Next() { return reader_->Next(); }
370 
GetKey()371   const string &GetKey() const { return reader_->GetKey(); }
372 
GetFst()373   const Fst<A> &GetFst() const { return reader_->GetEntry(); }
374 
Type()375   FarType Type() const { return FAR_STLIST; }
376 
Error()377   bool Error() const { return reader_->Error(); }
378 
~STListFarReader()379   ~STListFarReader() { delete reader_; }
380 
381  private:
STListFarReader(STListReader<Fst<A>,FstReader<A>> * reader)382   explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader)
383       : reader_(reader) {}
384 
385  private:
386   STListReader<Fst<A>, FstReader<A> > *reader_;
387 
388   DISALLOW_COPY_AND_ASSIGN(STListFarReader);
389 };
390 
391 template <class A>
392 class FstFarReader : public FarReader<A> {
393  public:
394   typedef A Arc;
395 
Open(const string & filename)396   static FstFarReader *Open(const string &filename) {
397     vector<string> filenames;
398     filenames.push_back(filename);
399     return new FstFarReader<A>(filenames);
400   }
401 
Open(const vector<string> & filenames)402   static FstFarReader *Open(const vector<string> &filenames) {
403     return new FstFarReader<A>(filenames);
404   }
405 
FstFarReader(const vector<string> & filenames)406   FstFarReader(const vector<string> &filenames)
407       : keys_(filenames), has_stdin_(false), pos_(0), fst_(0), error_(false) {
408     sort(keys_.begin(), keys_.end());
409     streams_.resize(keys_.size(), 0);
410     for (size_t i = 0; i < keys_.size(); ++i) {
411       if (keys_[i].empty()) {
412         if (!has_stdin_) {
413           streams_[i] = &cin;
414           //sources_[i] = "stdin";
415           has_stdin_ = true;
416         } else {
417           FSTERROR() << "FstFarReader::FstFarReader: stdin should only "
418                      << "appear once in the input file list.";
419           error_ = true;
420           return;
421         }
422       } else {
423         streams_[i] = new ifstream(
424             keys_[i].c_str(), ifstream::in | ifstream::binary);
425       }
426     }
427     if (pos_ >= keys_.size()) return;
428     ReadFst();
429   }
430 
Reset()431   void Reset() {
432     if (has_stdin_) {
433       FSTERROR() << "FstFarReader::Reset: operation not supported on stdin";
434       error_ = true;
435       return;
436     }
437     pos_ = 0;
438     ReadFst();
439   }
440 
Find(const string & key)441   bool Find(const string &key) {
442     if (has_stdin_) {
443       FSTERROR() << "FstFarReader::Find: operation not supported on stdin";
444       error_ = true;
445       return false;
446     }
447     pos_ = 0;//TODO
448     ReadFst();
449     return true;
450   }
451 
Done()452   bool Done() const { return error_ || pos_ >= keys_.size(); }
453 
Next()454   void Next() {
455     ++pos_;
456     ReadFst();
457   }
458 
GetKey()459   const string &GetKey() const {
460     return keys_[pos_];
461   }
462 
GetFst()463   const Fst<A> &GetFst() const {
464     return *fst_;
465   }
466 
Type()467   FarType Type() const { return FAR_FST; }
468 
Error()469   bool Error() const { return error_; }
470 
~FstFarReader()471   ~FstFarReader() {
472     if (fst_) delete fst_;
473     for (size_t i = 0; i < keys_.size(); ++i)
474       delete streams_[i];
475   }
476 
477  private:
ReadFst()478   void ReadFst() {
479     if (fst_) delete fst_;
480     if (pos_ >= keys_.size()) return;
481     streams_[pos_]->seekg(0);
482     fst_ = Fst<A>::Read(*streams_[pos_], FstReadOptions());
483     if (!fst_) {
484       FSTERROR() << "FstFarReader: error reading Fst from: " << keys_[pos_];
485       error_ = true;
486     }
487   }
488 
489  private:
490   vector<string> keys_;
491   vector<istream*> streams_;
492   bool has_stdin_;
493   size_t pos_;
494   mutable Fst<A> *fst_;
495   mutable bool error_;
496 
497   DISALLOW_COPY_AND_ASSIGN(FstFarReader);
498 };
499 
500 template <class A>
Open(const string & filename)501 FarReader<A> *FarReader<A>::Open(const string &filename) {
502   if (filename.empty())
503     return STListFarReader<A>::Open(filename);
504   else if (IsSTTable(filename))
505     return STTableFarReader<A>::Open(filename);
506   else if (IsSTList(filename))
507     return STListFarReader<A>::Open(filename);
508   else if (IsFst(filename))
509     return FstFarReader<A>::Open(filename);
510   return 0;
511 }
512 
513 
514 template <class A>
Open(const vector<string> & filenames)515 FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
516   if (!filenames.empty() && filenames[0].empty())
517     return STListFarReader<A>::Open(filenames);
518   else if (!filenames.empty() && IsSTTable(filenames[0]))
519     return STTableFarReader<A>::Open(filenames);
520   else if (!filenames.empty() && IsSTList(filenames[0]))
521     return STListFarReader<A>::Open(filenames);
522   else if (!filenames.empty() && IsFst(filenames[0]))
523     return FstFarReader<A>::Open(filenames);
524   return 0;
525 }
526 
527 }  // namespace fst
528 
529 #endif  // FST_EXTENSIONS_FAR_FAR_H__
530