1 // properties.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: Michael Riley <riley@google.com>
17 // \file
18 // FST property bits.
19 
20 #ifndef FST_LIB_PROPERTIES_H__
21 #define FST_LIB_PROPERTIES_H__
22 
23 #include <sys/types.h>
24 #include <vector>
25 using std::vector;
26 
27 #include <fst/compat.h>
28 
29 namespace fst {
30 
31 // The property bits here assert facts about an FST. If individual
32 // bits are added, then the composite properties below, the property
33 // functions and property names in properties.cc, and
34 // TestProperties() in test-properties.h should be updated.
35 
36 //
37 // BINARY PROPERTIES
38 //
39 // For each property below, there is a single bit. If it is set,
40 // the property is true. If it is not set, the property is false.
41 //
42 
43 // The Fst is an ExpandedFst
44 const uint64 kExpanded =          0x0000000000000001ULL;
45 
46 // The Fst is a MutableFst
47 const uint64 kMutable =           0x0000000000000002ULL;
48 
49 // An error was detected while constructing/using the FST
50 const uint64 kError =             0x0000000000000004ULL;
51 
52 //
53 // TRINARY PROPERTIES
54 //
55 // For each of these properties below there is a pair of property bits
56 // - one positive and one negative. If the positive bit is set, the
57 // property is true. If the negative bit is set, the property is
58 // false. If neither is set, the property has unknown value. Both
59 // should never be simultaneously set. The individual positive and
60 // negative bit pairs should be adjacent with the positive bit
61 // at an odd and lower position.
62 
63 // ilabel == olabel for each arc
64 const uint64 kAcceptor =          0x0000000000010000ULL;
65 // ilabel != olabel for some arc
66 const uint64 kNotAcceptor =       0x0000000000020000ULL;
67 
68 // ilabels unique leaving each state
69 const uint64 kIDeterministic =    0x0000000000040000ULL;
70 // ilabels not unique leaving some state
71 const uint64 kNonIDeterministic = 0x0000000000080000ULL;
72 
73 // olabels unique leaving each state
74 const uint64 kODeterministic =    0x0000000000100000ULL;
75 // olabels not unique leaving some state
76 const uint64 kNonODeterministic = 0x0000000000200000ULL;
77 
78 // FST has input/output epsilons
79 const uint64 kEpsilons =          0x0000000000400000ULL;
80 // FST has no input/output epsilons
81 const uint64 kNoEpsilons =        0x0000000000800000ULL;
82 
83 // FST has input epsilons
84 const uint64 kIEpsilons =         0x0000000001000000ULL;
85 // FST has no input epsilons
86 const uint64 kNoIEpsilons =       0x0000000002000000ULL;
87 
88 // FST has output epsilons
89 const uint64 kOEpsilons =         0x0000000004000000ULL;
90 // FST has no output epsilons
91 const uint64 kNoOEpsilons =       0x0000000008000000ULL;
92 
93 // ilabels sorted wrt < for each state
94 const uint64 kILabelSorted =      0x0000000010000000ULL;
95 // ilabels not sorted wrt < for some state
96 const uint64 kNotILabelSorted =   0x0000000020000000ULL;
97 
98 // olabels sorted wrt < for each state
99 const uint64 kOLabelSorted =      0x0000000040000000ULL;
100 // olabels not sorted wrt < for some state
101 const uint64 kNotOLabelSorted =   0x0000000080000000ULL;
102 
103 // Non-trivial arc or final weights
104 const uint64 kWeighted =          0x0000000100000000ULL;
105 // Only trivial arc and final weights
106 const uint64 kUnweighted =        0x0000000200000000ULL;
107 
108 // FST has cycles
109 const uint64 kCyclic =            0x0000000400000000ULL;
110 // FST has no cycles
111 const uint64 kAcyclic =           0x0000000800000000ULL;
112 
113 // FST has cycles containing the initial state
114 const uint64 kInitialCyclic =     0x0000001000000000ULL;
115 // FST has no cycles containing the initial state
116 const uint64 kInitialAcyclic =    0x0000002000000000ULL;
117 
118 // FST is topologically sorted
119 const uint64 kTopSorted =         0x0000004000000000ULL;
120 // FST is not topologically sorted
121 const uint64 kNotTopSorted =      0x0000008000000000ULL;
122 
123 // All states reachable from the initial state
124 const uint64 kAccessible =        0x0000010000000000ULL;
125 // Not all states reachable from the initial state
126 const uint64 kNotAccessible =     0x0000020000000000ULL;
127 
128 // All states can reach a final state
129 const uint64 kCoAccessible =      0x0000040000000000ULL;
130 // Not all states can reach a final state
131 const uint64 kNotCoAccessible =   0x0000080000000000ULL;
132 
133 // If NumStates() > 0, then state 0 is initial, state NumStates()-1 is
134 // final, there is a transition from each non-final state i to
135 // state i+1, and there are no other transitions.
136 const uint64 kString =            0x0000100000000000ULL;
137 
138 // Not a string FST
139 const uint64 kNotString =         0x0000200000000000ULL;
140 
141 //
142 // COMPOSITE PROPERTIES
143 //
144 
145 // Properties of an empty machine
146 const uint64 kNullProperties
147   = kAcceptor | kIDeterministic | kODeterministic | kNoEpsilons |
148     kNoIEpsilons | kNoOEpsilons | kILabelSorted | kOLabelSorted |
149     kUnweighted | kAcyclic | kInitialAcyclic | kTopSorted |
150     kAccessible | kCoAccessible | kString;
151 
152 // Properties that are preserved when an FST is copied
153 const uint64 kCopyProperties
154   = kError | kAcceptor | kNotAcceptor | kIDeterministic | kNonIDeterministic |
155     kODeterministic | kNonODeterministic | kEpsilons | kNoEpsilons |
156     kIEpsilons | kNoIEpsilons | kOEpsilons | kNoOEpsilons |
157     kILabelSorted | kNotILabelSorted | kOLabelSorted |
158     kNotOLabelSorted | kWeighted | kUnweighted | kCyclic | kAcyclic |
159     kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
160     kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
161     kString | kNotString;
162 
163 // Properites that are intrinsic to the FST
164 const uint64 kIntrinsicProperties
165   = kExpanded | kMutable | kAcceptor | kNotAcceptor | kIDeterministic |
166     kNonIDeterministic | kODeterministic | kNonODeterministic |
167     kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
168     kNoOEpsilons | kILabelSorted | kNotILabelSorted | kOLabelSorted |
169     kNotOLabelSorted | kWeighted | kUnweighted | kCyclic | kAcyclic |
170     kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
171     kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
172     kString | kNotString;
173 
174 // Properites that are (potentially) extrinsic to the FST
175 const uint64 kExtrinsicProperties = kError;
176 
177 // Properties that are preserved when an FST start state is set
178 const uint64 kSetStartProperties
179   = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
180     kIDeterministic | kNonIDeterministic | kODeterministic |
181     kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
182     kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
183     kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kWeighted |
184     kUnweighted | kCyclic | kAcyclic | kTopSorted | kNotTopSorted |
185     kCoAccessible | kNotCoAccessible;
186 
187 // Properties that are preserved when an FST final weight is set
188 const uint64 kSetFinalProperties
189   = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
190     kIDeterministic | kNonIDeterministic | kODeterministic |
191     kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
192     kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
193     kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kCyclic |
194     kAcyclic | kInitialCyclic | kInitialAcyclic | kTopSorted |
195     kNotTopSorted | kAccessible | kNotAccessible;
196 
197 // Properties that are preserved when an FST state is added
198 const uint64 kAddStateProperties
199   = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
200     kIDeterministic | kNonIDeterministic | kODeterministic |
201     kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
202     kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
203     kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kWeighted |
204     kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
205     kInitialAcyclic | kTopSorted | kNotTopSorted | kNotAccessible |
206     kNotCoAccessible | kNotString;
207 
208 // Properties that are preserved when an FST arc is added
209 const uint64 kAddArcProperties = kExpanded | kMutable | kError | kNotAcceptor |
210     kNonIDeterministic | kNonODeterministic | kEpsilons | kIEpsilons |
211     kOEpsilons | kNotILabelSorted | kNotOLabelSorted | kWeighted |
212     kCyclic | kInitialCyclic | kNotTopSorted | kAccessible | kCoAccessible;
213 
214 // Properties that are preserved when an FST arc is set
215 const uint64 kSetArcProperties = kExpanded | kMutable | kError;
216 
217 // Properties that are preserved when FST states are deleted
218 const uint64 kDeleteStatesProperties
219   = kExpanded | kMutable | kError | kAcceptor | kIDeterministic |
220     kODeterministic | kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
221     kILabelSorted | kOLabelSorted | kUnweighted | kAcyclic |
222     kInitialAcyclic | kTopSorted;
223 
224 // Properties that are preserved when FST arcs are deleted
225 const uint64 kDeleteArcsProperties
226   = kExpanded | kMutable | kError | kAcceptor | kIDeterministic |
227     kODeterministic | kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
228     kILabelSorted | kOLabelSorted | kUnweighted | kAcyclic |
229     kInitialAcyclic | kTopSorted |  kNotAccessible | kNotCoAccessible;
230 
231 // Properties that are preserved when an FST's states are reordered
232 const uint64 kStateSortProperties = kExpanded | kMutable | kError | kAcceptor |
233     kNotAcceptor | kIDeterministic | kNonIDeterministic |
234     kODeterministic | kNonODeterministic | kEpsilons | kNoEpsilons |
235     kIEpsilons | kNoIEpsilons | kOEpsilons | kNoOEpsilons |
236     kILabelSorted | kNotILabelSorted | kOLabelSorted | kNotOLabelSorted
237     | kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
238     kInitialAcyclic | kAccessible | kNotAccessible | kCoAccessible |
239     kNotCoAccessible;
240 
241 // Properties that are preserved when an FST's arcs are reordered
242 const uint64 kArcSortProperties =
243   kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kIDeterministic |
244   kNonIDeterministic | kODeterministic | kNonODeterministic |
245   kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
246   kNoOEpsilons | kWeighted | kUnweighted | kCyclic | kAcyclic |
247   kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
248   kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
249   kString | kNotString;
250 
251 // Properties that are preserved when an FST's input labels are changed.
252 const uint64 kILabelInvariantProperties =
253   kExpanded | kMutable | kError | kODeterministic | kNonODeterministic |
254   kOEpsilons | kNoOEpsilons | kOLabelSorted | kNotOLabelSorted |
255   kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
256   kInitialAcyclic | kTopSorted | kNotTopSorted | kAccessible |
257   kNotAccessible | kCoAccessible | kNotCoAccessible | kString | kNotString;
258 
259 // Properties that are preserved when an FST's output labels are changed.
260 const uint64 kOLabelInvariantProperties =
261   kExpanded | kMutable | kError | kIDeterministic | kNonIDeterministic |
262   kIEpsilons | kNoIEpsilons | kILabelSorted | kNotILabelSorted |
263   kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
264   kInitialAcyclic | kTopSorted | kNotTopSorted | kAccessible |
265   kNotAccessible | kCoAccessible | kNotCoAccessible | kString | kNotString;
266 
267 // Properties that are preserved when an FST's weights are changed.
268 // This assumes that the set of states that are non-final is not changed.
269 const uint64 kWeightInvariantProperties =
270   kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kIDeterministic |
271   kNonIDeterministic | kODeterministic | kNonODeterministic |
272   kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
273   kNoOEpsilons | kILabelSorted | kNotILabelSorted | kOLabelSorted |
274   kNotOLabelSorted | kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
275   kTopSorted | kNotTopSorted | kAccessible | kNotAccessible | kCoAccessible |
276   kNotCoAccessible | kString | kNotString;
277 
278 // Properties that are preserved when a superfinal state is added
279 // and an FSTs final weights are directed to it via new transitions.
280 const uint64 kAddSuperFinalProperties  = kExpanded | kMutable | kError |
281     kAcceptor | kNotAcceptor | kNonIDeterministic | kNonODeterministic |
282     kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted | kNotOLabelSorted |
283     kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
284     kInitialAcyclic | kNotTopSorted | kNotAccessible | kCoAccessible |
285     kNotCoAccessible | kNotString;
286 
287 // Properties that are preserved when a superfinal state is removed
288 // and the epsilon transitions directed to it are made final weights.
289 const uint64 kRmSuperFinalProperties  = kExpanded | kMutable | kError |
290     kAcceptor | kNotAcceptor | kIDeterministic | kODeterministic |
291     kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kILabelSorted | kOLabelSorted |
292     kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
293     kInitialAcyclic | kTopSorted | kAccessible | kCoAccessible |
294     kNotCoAccessible | kString;
295 
296 // All binary properties
297 const uint64 kBinaryProperties =  0x0000000000000007ULL;
298 
299 // All trinary properties
300 const uint64 kTrinaryProperties = 0x00003fffffff0000ULL;
301 
302 //
303 // COMPUTED PROPERTIES
304 //
305 
306 // 1st bit of trinary properties
307 const uint64 kPosTrinaryProperties =
308   kTrinaryProperties & 0x5555555555555555ULL;
309 
310 // 2nd bit of trinary properties
311 const uint64 kNegTrinaryProperties =
312   kTrinaryProperties & 0xaaaaaaaaaaaaaaaaULL;
313 
314 // All properties
315 const uint64 kFstProperties = kBinaryProperties | kTrinaryProperties;
316 
317 //
318 // PROPERTY FUNCTIONS and STRING NAMES (defined in properties.cc)
319 //
320 
321 // Below are functions for getting property bit vectors when executing
322 // mutating fst operations.
323 inline uint64 SetStartProperties(uint64 inprops);
324 template <typename Weight>
325 uint64 SetFinalProperties(uint64 inprops, Weight old_weight,
326                           Weight new_weight);
327 inline uint64 AddStateProperties(uint64 inprops);
328 template <typename A>
329 uint64 AddArcProperties(uint64 inprops, typename A::StateId s, const A &arc,
330                            const A *prev_arc);
331 inline uint64 DeleteStatesProperties(uint64 inprops);
332 inline uint64 DeleteAllStatesProperties(uint64 inprops, uint64 staticProps);
333 inline uint64 DeleteArcsProperties(uint64 inprops);
334 
335 uint64 ClosureProperties(uint64 inprops, bool star, bool delayed = false);
336 uint64 ComplementProperties(uint64 inprops);
337 uint64 ComposeProperties(uint64 inprops1, uint64 inprops2);
338 uint64 ConcatProperties(uint64 inprops1, uint64 inprops2,
339                         bool delayed = false);
340 uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label);
341 uint64 FactorWeightProperties(uint64 inprops);
342 uint64 InvertProperties(uint64 inprops);
343 uint64 ProjectProperties(uint64 inprops, bool project_input);
344 uint64 RandGenProperties(uint64 inprops, bool weighted);
345 uint64 RelabelProperties(uint64 inprops);
346 uint64 ReplaceProperties(const vector<uint64>& inprops,
347                          ssize_t root,
348                          bool epsilon_on_replace,
349                          bool no_empty_fst);
350 uint64 ReverseProperties(uint64 inprops);
351 uint64 ReweightProperties(uint64 inprops);
352 uint64 RmEpsilonProperties(uint64 inprops, bool delayed = false);
353 uint64 ShortestPathProperties(uint64 props);
354 uint64 SynchronizeProperties(uint64 inprops);
355 uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed = false);
356 
357 // Definitions of inlined functions.
358 
SetStartProperties(uint64 inprops)359 uint64 SetStartProperties(uint64 inprops) {
360   uint64 outprops = inprops & kSetStartProperties;
361   if (inprops & kAcyclic) {
362     outprops |= kInitialAcyclic;
363   }
364   return outprops;
365 }
366 
AddStateProperties(uint64 inprops)367 uint64 AddStateProperties(uint64 inprops) {
368   return inprops & kAddStateProperties;
369 }
370 
DeleteStatesProperties(uint64 inprops)371 uint64 DeleteStatesProperties(uint64 inprops) {
372   return inprops & kDeleteStatesProperties;
373 }
374 
DeleteAllStatesProperties(uint64 inprops,uint64 staticprops)375 uint64 DeleteAllStatesProperties(uint64 inprops, uint64 staticprops) {
376   uint64 outprops = inprops & kError;
377   return outprops | kNullProperties | staticprops;
378 }
379 
DeleteArcsProperties(uint64 inprops)380 uint64 DeleteArcsProperties(uint64 inprops) {
381   return inprops & kDeleteArcsProperties;
382 }
383 
384 // Definitions of template functions.
385 
386 //
387 template <typename Weight>
SetFinalProperties(uint64 inprops,Weight old_weight,Weight new_weight)388 uint64 SetFinalProperties(uint64 inprops, Weight old_weight,
389                           Weight new_weight) {
390   uint64 outprops = inprops;
391   if (old_weight != Weight::Zero() && old_weight != Weight::One()) {
392     outprops &= ~kWeighted;
393   }
394   if (new_weight != Weight::Zero() && new_weight != Weight::One()) {
395     outprops |= kWeighted;
396     outprops &= ~kUnweighted;
397   }
398   outprops &= kSetFinalProperties | kWeighted | kUnweighted;
399   return outprops;
400 }
401 
402 /// Gets the properties for the MutableFst::AddArc method.
403 ///
404 /// \param inprops  the current properties of the fst
405 /// \param s        the id of the state to which an arc is being added
406 /// \param arc      the arc being added to the state with the specified id
407 /// \param prev_arc the previously-added (or "last") arc of state s, or NULL if
408 ///                 s currently has no arcs
409 template <typename A>
AddArcProperties(uint64 inprops,typename A::StateId s,const A & arc,const A * prev_arc)410 uint64 AddArcProperties(uint64 inprops, typename A::StateId s,
411                         const A &arc, const A *prev_arc) {
412   uint64 outprops = inprops;
413   if (arc.ilabel != arc.olabel) {
414     outprops |= kNotAcceptor;
415     outprops &= ~kAcceptor;
416   }
417   if (arc.ilabel == 0) {
418     outprops |= kIEpsilons;
419     outprops &= ~kNoIEpsilons;
420     if (arc.olabel == 0) {
421       outprops |= kEpsilons;
422       outprops &= ~kNoEpsilons;
423     }
424   }
425   if (arc.olabel == 0) {
426     outprops |= kOEpsilons;
427     outprops &= ~kNoOEpsilons;
428   }
429   if (prev_arc != 0) {
430     if (prev_arc->ilabel > arc.ilabel) {
431       outprops |= kNotILabelSorted;
432       outprops &= ~kILabelSorted;
433     }
434     if (prev_arc->olabel > arc.olabel) {
435       outprops |= kNotOLabelSorted;
436       outprops &= ~kOLabelSorted;
437     }
438   }
439   if (arc.weight != A::Weight::Zero() && arc.weight != A::Weight::One()) {
440     outprops |= kWeighted;
441     outprops &= ~kUnweighted;
442   }
443   if (arc.nextstate <= s) {
444     outprops |= kNotTopSorted;
445     outprops &= ~kTopSorted;
446   }
447   outprops &= kAddArcProperties | kAcceptor |
448               kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
449               kILabelSorted | kOLabelSorted | kUnweighted | kTopSorted;
450   if (outprops & kTopSorted) {
451     outprops |= kAcyclic | kInitialAcyclic;
452   }
453   return outprops;
454 }
455 
456 extern const char *PropertyNames[];
457 
458 }  // namespace fst
459 
460 #endif  // FST_LIB_PROPERTIES_H__
461