1
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 //
14 // Copyright 2005-2010 Google, Inc.
15 // Author: sorenj@google.com (Jeffrey Sorensen)
16
17 #ifndef FST_LIB_SYMBOL_TABLE_OPS_H_
18 #define FST_LIB_SYMBOL_TABLE_OPS_H_
19
20 #include <vector>
21 using std::vector;
22 #include <string>
23 #include <tr1/unordered_set>
24 using std::tr1::unordered_set;
25 using std::tr1::unordered_multiset;
26
27
28 #include <fst/fst.h>
29 #include <fst/symbol-table.h>
30
31
32 namespace fst {
33
34 // Returns a minimal symbol table containing only symbols referenced by the
35 // passed fst. Symbols preserve their original numbering, so fst does not
36 // require relabeling.
37 template<class Arc>
PruneSymbolTable(const Fst<Arc> & fst,const SymbolTable & syms,bool input)38 SymbolTable *PruneSymbolTable(const Fst<Arc> &fst, const SymbolTable &syms,
39 bool input) {
40 unordered_set<typename Arc::Label> seen;
41 seen.insert(0); // Always keep epslion
42 StateIterator<Fst<Arc> > siter(fst);
43 for (; !siter.Done(); siter.Next()) {
44 ArcIterator<Fst<Arc> > aiter(fst, siter.Value());
45 for (; !aiter.Done(); aiter.Next()) {
46 typename Arc::Label sym = (input) ? aiter.Value().ilabel :
47 aiter.Value().olabel;
48 seen.insert(sym);
49 }
50 }
51 SymbolTable *pruned = new SymbolTable(syms.Name() + "_pruned");
52 for (SymbolTableIterator stiter(syms); !stiter.Done(); stiter.Next()) {
53 typename Arc::Label label = stiter.Value();
54 if (seen.find(label) != seen.end()) {
55 pruned->AddSymbol(stiter.Symbol(), stiter.Value());
56 }
57 }
58 return pruned;
59 }
60
61 // Relabels a symbol table to make it a contiguous mapping.
62 SymbolTable *CompactSymbolTable(const SymbolTable &syms);
63
64 // Merges two SymbolTables, all symbols from left will be merged into right
65 // with the same ids. Symbols in right that have conflicting ids with those
66 // in left will be assigned to value assigned from the left SymbolTable.
67 // The returned symbol table will never modify symbol assignments from the left
68 // side, but may do so on the right. If right_relabel_output is non-NULL, it
69 // will be assigned true if the symbols from the right table needed to be
70 // reassigned.
71 // A potential use case is to Compose two Fst's that have different symbol
72 // tables. You can reconcile them in the following way:
73 // Fst<Arc> a, b;
74 // bool relabel;
75 // SymbolTable *bnew = MergeSymbolTable(a.OutputSymbols(),
76 // b.InputSymbols(), &relabel);
77 // if (relabel) {
78 // Relabel(b, bnew, NULL);
79 // }
80 // b.SetInputSymbols(bnew);
81 // delete bnew;
82 SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right,
83 bool *right_relabel_output = 0);
84
85 // Read the symbol table from any Fst::Read()able file, without loading the
86 // corresponding Fst. Returns NULL if the Fst does not contain a symbol table
87 // or the symbol table cannot be read.
88 SymbolTable *FstReadSymbols(const string &filename, bool input);
89
90 } // namespace fst
91 #endif // FST_LIB_SYMBOL_TABLE_OPS_H_
92