1 
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 //     http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 //
14 // Copyright 2005-2010 Google, Inc.
15 // Author: allauzen@google.com (Cyril Allauzen)
16 //
17 // \file
18 // A generic (string,type) list file format.
19 //
20 // This is a stripped-down version of STTable that does
21 // not support the Find() operation but that does support
22 // reading/writting from standard in/out.
23 
24 #ifndef FST_EXTENSIONS_FAR_STLIST_H_
25 #define FST_EXTENSIONS_FAR_STLIST_H_
26 
27 #include <iostream>
28 #include <fstream>
29 #include <sstream>
30 #include <fst/util.h>
31 
32 #include <algorithm>
33 #include <functional>
34 #include <queue>
35 #include <string>
36 #include <utility>
37 using std::pair; using std::make_pair;
38 #include <vector>
39 using std::vector;
40 
41 namespace fst {
42 
43 static const int32 kSTListMagicNumber = 5656924;
44 static const int32 kSTListFileVersion = 1;
45 
46 // String-type list writing class for object of type 'T' using functor 'W'
47 // to write an object of type 'T' from a stream. 'W' must conform to the
48 // following interface:
49 //
50 //   struct Writer {
51 //     void operator()(ostream &, const T &) const;
52 //   };
53 //
54 template <class T, class W>
55 class STListWriter {
56  public:
57   typedef T EntryType;
58   typedef W EntryWriter;
59 
STListWriter(const string filename)60   explicit STListWriter(const string filename)
61       : stream_(
62           filename.empty() ? &cout :
63           new ofstream(filename.c_str(), ofstream::out | ofstream::binary)),
64         error_(false) {
65     WriteType(*stream_, kSTListMagicNumber);
66     WriteType(*stream_, kSTListFileVersion);
67     if (!stream_) {
68       FSTERROR() << "STListWriter::STListWriter: error writing to file: "
69                  << filename;
70       error_ = true;
71     }
72   }
73 
Create(const string & filename)74   static STListWriter<T, W> *Create(const string &filename) {
75     return new STListWriter<T, W>(filename);
76   }
77 
Add(const string & key,const T & t)78   void Add(const string &key, const T &t) {
79     if (key == "") {
80       FSTERROR() << "STListWriter::Add: key empty: " << key;
81       error_ = true;
82     } else if (key < last_key_) {
83       FSTERROR() << "STListWriter::Add: key disorder: " << key;
84       error_ = true;
85     }
86     if (error_) return;
87     last_key_ = key;
88     WriteType(*stream_, key);
89     entry_writer_(*stream_, t);
90   }
91 
Error()92   bool Error() const { return error_; }
93 
~STListWriter()94   ~STListWriter() {
95     WriteType(*stream_, string());
96     if (stream_ != &cout)
97       delete stream_;
98   }
99 
100  private:
101   EntryWriter entry_writer_;  // Write functor for 'EntryType'
102   ostream *stream_;           // Output stream
103   string last_key_;           // Last key
104   bool error_;
105 
106   DISALLOW_COPY_AND_ASSIGN(STListWriter);
107 };
108 
109 
110 // String-type list reading class for object of type 'T' using functor 'R'
111 // to read an object of type 'T' form a stream. 'R' must conform to the
112 // following interface:
113 //
114 //   struct Reader {
115 //     T *operator()(istream &) const;
116 //   };
117 //
118 template <class T, class R>
119 class STListReader {
120  public:
121   typedef T EntryType;
122   typedef R EntryReader;
123 
STListReader(const vector<string> & filenames)124   explicit STListReader(const vector<string> &filenames)
125       : sources_(filenames), entry_(0), error_(false) {
126     streams_.resize(filenames.size(), 0);
127     bool has_stdin = false;
128     for (size_t i = 0; i < filenames.size(); ++i) {
129       if (filenames[i].empty()) {
130         if (!has_stdin) {
131           streams_[i] = &cin;
132           sources_[i] = "stdin";
133           has_stdin = true;
134         } else {
135           FSTERROR() << "STListReader::STListReader: stdin should only "
136                      << "appear once in the input file list.";
137           error_ = true;
138           return;
139         }
140       } else {
141         streams_[i] = new ifstream(
142             filenames[i].c_str(), ifstream::in | ifstream::binary);
143       }
144       int32 magic_number = 0, file_version = 0;
145       ReadType(*streams_[i], &magic_number);
146       ReadType(*streams_[i], &file_version);
147       if (magic_number != kSTListMagicNumber) {
148         FSTERROR() << "STListReader::STListReader: wrong file type: "
149                    << filenames[i];
150         error_ = true;
151         return;
152       }
153       if (file_version != kSTListFileVersion) {
154         FSTERROR() << "STListReader::STListReader: wrong file version: "
155                    << filenames[i];
156         error_ = true;
157         return;
158       }
159       string key;
160       ReadType(*streams_[i], &key);
161       if (!key.empty())
162         heap_.push(make_pair(key, i));
163       if (!*streams_[i]) {
164         FSTERROR() << "STListReader: error reading file: " << sources_[i];
165         error_ = true;
166         return;
167       }
168     }
169     if (heap_.empty()) return;
170     size_t current = heap_.top().second;
171     entry_ = entry_reader_(*streams_[current]);
172     if (!entry_ || !*streams_[current]) {
173       FSTERROR() << "STListReader: error reading entry for key: "
174                  << heap_.top().first << ", file: " << sources_[current];
175       error_ = true;
176     }
177   }
178 
~STListReader()179   ~STListReader() {
180     for (size_t i = 0; i < streams_.size(); ++i) {
181       if (streams_[i] != &cin)
182         delete streams_[i];
183     }
184     if (entry_)
185       delete entry_;
186   }
187 
Open(const string & filename)188   static STListReader<T, R> *Open(const string &filename) {
189     vector<string> filenames;
190     filenames.push_back(filename);
191     return new STListReader<T, R>(filenames);
192   }
193 
Open(const vector<string> & filenames)194   static STListReader<T, R> *Open(const vector<string> &filenames) {
195     return new STListReader<T, R>(filenames);
196   }
197 
Reset()198   void Reset() {
199     FSTERROR()
200         << "STListReader::Reset: stlist does not support reset operation";
201     error_ = true;
202   }
203 
Find(const string & key)204   bool Find(const string &key) {
205     FSTERROR()
206         << "STListReader::Find: stlist does not support find operation";
207     error_ = true;
208     return false;
209   }
210 
Done()211   bool Done() const {
212     return error_ || heap_.empty();
213   }
214 
Next()215   void Next() {
216     if (error_) return;
217     size_t current = heap_.top().second;
218     string key;
219     heap_.pop();
220     ReadType(*(streams_[current]), &key);
221     if (!*streams_[current]) {
222       FSTERROR() << "STListReader: error reading file: "
223                  << sources_[current];
224       error_ = true;
225       return;
226     }
227     if (!key.empty())
228       heap_.push(make_pair(key, current));
229 
230     if(!heap_.empty()) {
231       current = heap_.top().second;
232       if (entry_)
233         delete entry_;
234       entry_ = entry_reader_(*streams_[current]);
235       if (!entry_ || !*streams_[current]) {
236         FSTERROR() << "STListReader: error reading entry for key: "
237                    << heap_.top().first << ", file: " << sources_[current];
238         error_ = true;
239       }
240     }
241   }
242 
GetKey()243   const string &GetKey() const {
244     return heap_.top().first;
245   }
246 
GetEntry()247   const EntryType &GetEntry() const {
248     return *entry_;
249   }
250 
Error()251   bool Error() const { return error_; }
252 
253  private:
254   EntryReader entry_reader_;   // Read functor for 'EntryType'
255   vector<istream*> streams_;   // Input streams
256   vector<string> sources_;     // and corresponding file names
257   priority_queue<
258     pair<string, size_t>, vector<pair<string, size_t> >,
259     greater<pair<string, size_t> > > heap_;  // (Key, stream id) heap
260   mutable EntryType *entry_;   // Pointer to the currently read entry
261   bool error_;
262 
263   DISALLOW_COPY_AND_ASSIGN(STListReader);
264 };
265 
266 
267 // String-type list header reading function template on the entry header
268 // type 'H' having a member function:
269 //   Read(istream &strm, const string &filename);
270 // Checks that 'filename' is an STList and call the H::Read() on the last
271 // entry in the STList.
272 // Does not support reading from stdin.
273 template <class H>
ReadSTListHeader(const string & filename,H * header)274 bool ReadSTListHeader(const string &filename, H *header) {
275   if (filename.empty()) {
276     LOG(ERROR) << "ReadSTListHeader: reading header not supported on stdin";
277     return false;
278   }
279   ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
280   int32 magic_number = 0, file_version = 0;
281   ReadType(strm, &magic_number);
282   ReadType(strm, &file_version);
283   if (magic_number != kSTListMagicNumber) {
284     LOG(ERROR) << "ReadSTListHeader: wrong file type: " << filename;
285     return false;
286   }
287   if (file_version != kSTListFileVersion) {
288     LOG(ERROR) << "ReadSTListHeader: wrong file version: " << filename;
289     return false;
290   }
291   string key;
292   ReadType(strm, &key);
293   header->Read(strm, filename + ":" + key);
294   if (!strm) {
295     LOG(ERROR) << "ReadSTListHeader: error reading file: " << filename;
296     return false;
297   }
298   return true;
299 }
300 
301 bool IsSTList(const string &filename);
302 
303 }  // namespace fst
304 
305 #endif  // FST_EXTENSIONS_FAR_STLIST_H_
306