1
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 //
14 // Copyright 2005-2010 Google, Inc.
15 // Author: allauzen@google.com (Cyril Allauzen)
16 //
17 // \file
18 // A generic (string,type) list file format.
19 //
20 // This is a stripped-down version of STTable that does
21 // not support the Find() operation but that does support
22 // reading/writting from standard in/out.
23
24 #ifndef FST_EXTENSIONS_FAR_STLIST_H_
25 #define FST_EXTENSIONS_FAR_STLIST_H_
26
27 #include <iostream>
28 #include <fstream>
29 #include <sstream>
30 #include <fst/util.h>
31
32 #include <algorithm>
33 #include <functional>
34 #include <queue>
35 #include <string>
36 #include <utility>
37 using std::pair; using std::make_pair;
38 #include <vector>
39 using std::vector;
40
41 namespace fst {
42
43 static const int32 kSTListMagicNumber = 5656924;
44 static const int32 kSTListFileVersion = 1;
45
46 // String-type list writing class for object of type 'T' using functor 'W'
47 // to write an object of type 'T' from a stream. 'W' must conform to the
48 // following interface:
49 //
50 // struct Writer {
51 // void operator()(ostream &, const T &) const;
52 // };
53 //
54 template <class T, class W>
55 class STListWriter {
56 public:
57 typedef T EntryType;
58 typedef W EntryWriter;
59
STListWriter(const string filename)60 explicit STListWriter(const string filename)
61 : stream_(
62 filename.empty() ? &cout :
63 new ofstream(filename.c_str(), ofstream::out | ofstream::binary)),
64 error_(false) {
65 WriteType(*stream_, kSTListMagicNumber);
66 WriteType(*stream_, kSTListFileVersion);
67 if (!stream_) {
68 FSTERROR() << "STListWriter::STListWriter: error writing to file: "
69 << filename;
70 error_ = true;
71 }
72 }
73
Create(const string & filename)74 static STListWriter<T, W> *Create(const string &filename) {
75 return new STListWriter<T, W>(filename);
76 }
77
Add(const string & key,const T & t)78 void Add(const string &key, const T &t) {
79 if (key == "") {
80 FSTERROR() << "STListWriter::Add: key empty: " << key;
81 error_ = true;
82 } else if (key < last_key_) {
83 FSTERROR() << "STListWriter::Add: key disorder: " << key;
84 error_ = true;
85 }
86 if (error_) return;
87 last_key_ = key;
88 WriteType(*stream_, key);
89 entry_writer_(*stream_, t);
90 }
91
Error()92 bool Error() const { return error_; }
93
~STListWriter()94 ~STListWriter() {
95 WriteType(*stream_, string());
96 if (stream_ != &cout)
97 delete stream_;
98 }
99
100 private:
101 EntryWriter entry_writer_; // Write functor for 'EntryType'
102 ostream *stream_; // Output stream
103 string last_key_; // Last key
104 bool error_;
105
106 DISALLOW_COPY_AND_ASSIGN(STListWriter);
107 };
108
109
110 // String-type list reading class for object of type 'T' using functor 'R'
111 // to read an object of type 'T' form a stream. 'R' must conform to the
112 // following interface:
113 //
114 // struct Reader {
115 // T *operator()(istream &) const;
116 // };
117 //
118 template <class T, class R>
119 class STListReader {
120 public:
121 typedef T EntryType;
122 typedef R EntryReader;
123
STListReader(const vector<string> & filenames)124 explicit STListReader(const vector<string> &filenames)
125 : sources_(filenames), entry_(0), error_(false) {
126 streams_.resize(filenames.size(), 0);
127 bool has_stdin = false;
128 for (size_t i = 0; i < filenames.size(); ++i) {
129 if (filenames[i].empty()) {
130 if (!has_stdin) {
131 streams_[i] = &cin;
132 sources_[i] = "stdin";
133 has_stdin = true;
134 } else {
135 FSTERROR() << "STListReader::STListReader: stdin should only "
136 << "appear once in the input file list.";
137 error_ = true;
138 return;
139 }
140 } else {
141 streams_[i] = new ifstream(
142 filenames[i].c_str(), ifstream::in | ifstream::binary);
143 }
144 int32 magic_number = 0, file_version = 0;
145 ReadType(*streams_[i], &magic_number);
146 ReadType(*streams_[i], &file_version);
147 if (magic_number != kSTListMagicNumber) {
148 FSTERROR() << "STListReader::STListReader: wrong file type: "
149 << filenames[i];
150 error_ = true;
151 return;
152 }
153 if (file_version != kSTListFileVersion) {
154 FSTERROR() << "STListReader::STListReader: wrong file version: "
155 << filenames[i];
156 error_ = true;
157 return;
158 }
159 string key;
160 ReadType(*streams_[i], &key);
161 if (!key.empty())
162 heap_.push(make_pair(key, i));
163 if (!*streams_[i]) {
164 FSTERROR() << "STListReader: error reading file: " << sources_[i];
165 error_ = true;
166 return;
167 }
168 }
169 if (heap_.empty()) return;
170 size_t current = heap_.top().second;
171 entry_ = entry_reader_(*streams_[current]);
172 if (!entry_ || !*streams_[current]) {
173 FSTERROR() << "STListReader: error reading entry for key: "
174 << heap_.top().first << ", file: " << sources_[current];
175 error_ = true;
176 }
177 }
178
~STListReader()179 ~STListReader() {
180 for (size_t i = 0; i < streams_.size(); ++i) {
181 if (streams_[i] != &cin)
182 delete streams_[i];
183 }
184 if (entry_)
185 delete entry_;
186 }
187
Open(const string & filename)188 static STListReader<T, R> *Open(const string &filename) {
189 vector<string> filenames;
190 filenames.push_back(filename);
191 return new STListReader<T, R>(filenames);
192 }
193
Open(const vector<string> & filenames)194 static STListReader<T, R> *Open(const vector<string> &filenames) {
195 return new STListReader<T, R>(filenames);
196 }
197
Reset()198 void Reset() {
199 FSTERROR()
200 << "STListReader::Reset: stlist does not support reset operation";
201 error_ = true;
202 }
203
Find(const string & key)204 bool Find(const string &key) {
205 FSTERROR()
206 << "STListReader::Find: stlist does not support find operation";
207 error_ = true;
208 return false;
209 }
210
Done()211 bool Done() const {
212 return error_ || heap_.empty();
213 }
214
Next()215 void Next() {
216 if (error_) return;
217 size_t current = heap_.top().second;
218 string key;
219 heap_.pop();
220 ReadType(*(streams_[current]), &key);
221 if (!*streams_[current]) {
222 FSTERROR() << "STListReader: error reading file: "
223 << sources_[current];
224 error_ = true;
225 return;
226 }
227 if (!key.empty())
228 heap_.push(make_pair(key, current));
229
230 if(!heap_.empty()) {
231 current = heap_.top().second;
232 if (entry_)
233 delete entry_;
234 entry_ = entry_reader_(*streams_[current]);
235 if (!entry_ || !*streams_[current]) {
236 FSTERROR() << "STListReader: error reading entry for key: "
237 << heap_.top().first << ", file: " << sources_[current];
238 error_ = true;
239 }
240 }
241 }
242
GetKey()243 const string &GetKey() const {
244 return heap_.top().first;
245 }
246
GetEntry()247 const EntryType &GetEntry() const {
248 return *entry_;
249 }
250
Error()251 bool Error() const { return error_; }
252
253 private:
254 EntryReader entry_reader_; // Read functor for 'EntryType'
255 vector<istream*> streams_; // Input streams
256 vector<string> sources_; // and corresponding file names
257 priority_queue<
258 pair<string, size_t>, vector<pair<string, size_t> >,
259 greater<pair<string, size_t> > > heap_; // (Key, stream id) heap
260 mutable EntryType *entry_; // Pointer to the currently read entry
261 bool error_;
262
263 DISALLOW_COPY_AND_ASSIGN(STListReader);
264 };
265
266
267 // String-type list header reading function template on the entry header
268 // type 'H' having a member function:
269 // Read(istream &strm, const string &filename);
270 // Checks that 'filename' is an STList and call the H::Read() on the last
271 // entry in the STList.
272 // Does not support reading from stdin.
273 template <class H>
ReadSTListHeader(const string & filename,H * header)274 bool ReadSTListHeader(const string &filename, H *header) {
275 if (filename.empty()) {
276 LOG(ERROR) << "ReadSTListHeader: reading header not supported on stdin";
277 return false;
278 }
279 ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
280 int32 magic_number = 0, file_version = 0;
281 ReadType(strm, &magic_number);
282 ReadType(strm, &file_version);
283 if (magic_number != kSTListMagicNumber) {
284 LOG(ERROR) << "ReadSTListHeader: wrong file type: " << filename;
285 return false;
286 }
287 if (file_version != kSTListFileVersion) {
288 LOG(ERROR) << "ReadSTListHeader: wrong file version: " << filename;
289 return false;
290 }
291 string key;
292 ReadType(strm, &key);
293 header->Read(strm, filename + ":" + key);
294 if (!strm) {
295 LOG(ERROR) << "ReadSTListHeader: error reading file: " << filename;
296 return false;
297 }
298 return true;
299 }
300
301 bool IsSTList(const string &filename);
302
303 } // namespace fst
304
305 #endif // FST_EXTENSIONS_FAR_STLIST_H_
306