1 // properties.cc
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Functions for updating property bits for various FST operations and
20 // string names of the properties.
21
22 #include <fst/properties.h>
23
24 #include <stddef.h>
25 #include <vector>
26 using std::vector;
27
28 namespace fst {
29
30 // These functions determine the properties associated with the FST
31 // result of various finite-state operations. The property arguments
32 // correspond to the operation's FST arguments. The properties
33 // returned assume the operation modifies its first argument.
34 // Bitwise-and this result with kCopyProperties for the case when a
35 // new (possibly delayed) FST is instead constructed.
36
37 // Properties for a concatenatively-closed FST.
ClosureProperties(uint64 inprops,bool star,bool delayed)38 uint64 ClosureProperties(uint64 inprops, bool star, bool delayed) {
39 uint64 outprops = (kError | kAcceptor | kUnweighted | kAccessible) & inprops;
40 if (!delayed)
41 outprops |= (kExpanded | kMutable | kCoAccessible |
42 kNotTopSorted | kNotString) & inprops;
43 if (!delayed || inprops & kAccessible)
44 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
45 kNotILabelSorted | kNotOLabelSorted | kWeighted |
46 kNotAccessible | kNotCoAccessible) & inprops;
47 return outprops;
48 }
49
50 // Properties for a complemented FST.
ComplementProperties(uint64 inprops)51 uint64 ComplementProperties(uint64 inprops) {
52 uint64 outprops = kAcceptor | kUnweighted | kNoEpsilons |
53 kNoIEpsilons | kNoOEpsilons |
54 kIDeterministic | kODeterministic | kAccessible;
55 outprops |= (kError | kILabelSorted | kOLabelSorted | kInitialCyclic) &
56 inprops;
57 if (inprops & kAccessible)
58 outprops |= kNotILabelSorted | kNotOLabelSorted | kCyclic;
59 return outprops;
60 }
61
62 // Properties for a composed FST.
ComposeProperties(uint64 inprops1,uint64 inprops2)63 uint64 ComposeProperties(uint64 inprops1, uint64 inprops2) {
64 uint64 outprops = kError & (inprops1 | inprops2);
65 if (inprops1 & kAcceptor && inprops2 & kAcceptor) {
66 outprops |= kAcceptor | kAccessible;
67 outprops |= (kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kAcyclic |
68 kInitialAcyclic) & inprops1 & inprops2;
69 if (kNoIEpsilons & inprops1 & inprops2)
70 outprops |= (kIDeterministic | kODeterministic) & inprops1 & inprops2;
71 } else {
72 outprops |= kAccessible;
73 outprops |= (kAcceptor | kNoIEpsilons | kAcyclic | kInitialAcyclic) &
74 inprops1 & inprops2;
75 if (kNoIEpsilons & inprops1 & inprops2)
76 outprops |= kIDeterministic & inprops1 & inprops2;
77 }
78 return outprops;
79 }
80
81 // Properties for a concatenated FST.
ConcatProperties(uint64 inprops1,uint64 inprops2,bool delayed)82 uint64 ConcatProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
83 uint64 outprops =
84 (kAcceptor | kUnweighted | kAcyclic) & inprops1 & inprops2;
85 outprops |= kError & (inprops1 | inprops2);
86
87 bool empty1 = delayed; // Can fst1 be the empty machine?
88 bool empty2 = delayed; // Can fst2 be the empty machine?
89
90 if (!delayed) {
91 outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
92 outprops |= (kNotTopSorted | kNotString) & inprops2;
93 }
94 if (!empty1)
95 outprops |= (kInitialAcyclic | kInitialCyclic) & inprops1;
96 if (!delayed || inprops1 & kAccessible)
97 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
98 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
99 kNotOLabelSorted | kWeighted | kCyclic |
100 kNotAccessible | kNotCoAccessible) & inprops1;
101 if ((inprops1 & (kAccessible | kCoAccessible)) ==
102 (kAccessible | kCoAccessible) && !empty1) {
103 outprops |= kAccessible & inprops2;
104 if (!empty2)
105 outprops |= kCoAccessible & inprops2;
106 if (!delayed || inprops2 & kAccessible)
107 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
108 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
109 kNotOLabelSorted | kWeighted | kCyclic |
110 kNotAccessible | kNotCoAccessible) & inprops2;
111 }
112 return outprops;
113 }
114
115 // Properties for a determinized FST.
DeterminizeProperties(uint64 inprops,bool has_subsequential_label)116 uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label) {
117 uint64 outprops = kAccessible;
118 if (((kAcceptor | kNoIEpsilons) & inprops) || has_subsequential_label)
119 outprops |= kIDeterministic;
120 outprops |= (kError | kAcceptor | kAcyclic |
121 kInitialAcyclic | kCoAccessible | kString) & inprops;
122 if (inprops & kNoIEpsilons)
123 outprops |= kNoEpsilons & inprops;
124 if (inprops & kAccessible)
125 outprops |= (kNotAcceptor | kEpsilons | kIEpsilons | kOEpsilons |
126 kCyclic) & inprops;
127 if (inprops & kAcceptor)
128 outprops |= (kNoIEpsilons | kNoOEpsilons) & inprops;
129 if ((inprops & kNoIEpsilons) && has_subsequential_label)
130 outprops |= kNoIEpsilons;
131 return outprops;
132 }
133
134 // Properties for factored weight FST.
FactorWeightProperties(uint64 inprops)135 uint64 FactorWeightProperties(uint64 inprops) {
136 uint64 outprops = (kExpanded | kMutable | kError | kAcceptor |
137 kAcyclic | kAccessible | kCoAccessible) & inprops;
138 if (inprops & kAccessible)
139 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
140 kEpsilons | kIEpsilons | kOEpsilons | kCyclic |
141 kNotILabelSorted | kNotOLabelSorted)
142 & inprops;
143 return outprops;
144 }
145
146 // Properties for an inverted FST.
InvertProperties(uint64 inprops)147 uint64 InvertProperties(uint64 inprops) {
148 uint64 outprops = (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
149 kEpsilons | kNoEpsilons | kWeighted | kUnweighted |
150 kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
151 kTopSorted | kNotTopSorted |
152 kAccessible | kNotAccessible |
153 kCoAccessible | kNotCoAccessible |
154 kString | kNotString) & inprops;
155 if (kIDeterministic & inprops)
156 outprops |= kODeterministic;
157 if (kNonIDeterministic & inprops)
158 outprops |= kNonODeterministic;
159 if (kODeterministic & inprops)
160 outprops |= kIDeterministic;
161 if (kNonODeterministic & inprops)
162 outprops |= kNonIDeterministic;
163
164 if (kIEpsilons & inprops)
165 outprops |= kOEpsilons;
166 if (kNoIEpsilons & inprops)
167 outprops |= kNoOEpsilons;
168 if (kOEpsilons & inprops)
169 outprops |= kIEpsilons;
170 if (kNoOEpsilons & inprops)
171 outprops |= kNoIEpsilons;
172
173 if (kILabelSorted & inprops)
174 outprops |= kOLabelSorted;
175 if (kNotILabelSorted & inprops)
176 outprops |= kNotOLabelSorted;
177 if (kOLabelSorted & inprops)
178 outprops |= kILabelSorted;
179 if (kNotOLabelSorted & inprops)
180 outprops |= kNotILabelSorted;
181 return outprops;
182 }
183
184 // Properties for a projected FST.
ProjectProperties(uint64 inprops,bool project_input)185 uint64 ProjectProperties(uint64 inprops, bool project_input) {
186 uint64 outprops = kAcceptor;
187 outprops |= (kExpanded | kMutable | kError | kWeighted | kUnweighted |
188 kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
189 kTopSorted | kNotTopSorted | kAccessible | kNotAccessible |
190 kCoAccessible | kNotCoAccessible |
191 kString | kNotString) & inprops;
192 if (project_input) {
193 outprops |= (kIDeterministic | kNonIDeterministic |
194 kIEpsilons | kNoIEpsilons |
195 kILabelSorted | kNotILabelSorted) & inprops;
196
197 if (kIDeterministic & inprops)
198 outprops |= kODeterministic;
199 if (kNonIDeterministic & inprops)
200 outprops |= kNonODeterministic;
201
202 if (kIEpsilons & inprops)
203 outprops |= kOEpsilons | kEpsilons;
204 if (kNoIEpsilons & inprops)
205 outprops |= kNoOEpsilons | kNoEpsilons;
206
207 if (kILabelSorted & inprops)
208 outprops |= kOLabelSorted;
209 if (kNotILabelSorted & inprops)
210 outprops |= kNotOLabelSorted;
211 } else {
212 outprops |= (kODeterministic | kNonODeterministic |
213 kOEpsilons | kNoOEpsilons |
214 kOLabelSorted | kNotOLabelSorted) & inprops;
215
216 if (kODeterministic & inprops)
217 outprops |= kIDeterministic;
218 if (kNonODeterministic & inprops)
219 outprops |= kNonIDeterministic;
220
221 if (kOEpsilons & inprops)
222 outprops |= kIEpsilons | kEpsilons;
223 if (kNoOEpsilons & inprops)
224 outprops |= kNoIEpsilons | kNoEpsilons;
225
226 if (kOLabelSorted & inprops)
227 outprops |= kILabelSorted;
228 if (kNotOLabelSorted & inprops)
229 outprops |= kNotILabelSorted;
230 }
231 return outprops;
232 }
233
234 // Properties for a randgen FST.
RandGenProperties(uint64 inprops,bool weighted)235 uint64 RandGenProperties(uint64 inprops, bool weighted) {
236 uint64 outprops = kAcyclic | kInitialAcyclic | kAccessible;
237 outprops |= inprops & kError;
238 if (weighted) {
239 outprops |= kTopSorted;
240 outprops |= (kAcceptor | kNoEpsilons |
241 kNoIEpsilons | kNoOEpsilons |
242 kIDeterministic | kODeterministic |
243 kILabelSorted | kOLabelSorted) & inprops;
244 } else {
245 outprops |= kUnweighted;
246 outprops |= (kAcceptor | kILabelSorted | kOLabelSorted) & inprops;
247 }
248 return outprops;
249 }
250
251 // Properties for a replace FST.
ReplaceProperties(const vector<uint64> & inprops,ssize_t root,bool epsilon_on_replace,bool no_empty_fsts)252 uint64 ReplaceProperties(const vector<uint64>& inprops,
253 ssize_t root,
254 bool epsilon_on_replace,
255 bool no_empty_fsts) {
256 if (inprops.size() == 0)
257 return kNullProperties;
258 uint64 outprops = 0;
259 for (size_t i = 0; i < inprops.size(); ++i)
260 outprops |= kError & inprops[i];
261 uint64 access_props = no_empty_fsts ? kAccessible | kCoAccessible : 0;
262 for (size_t i = 0; i < inprops.size(); ++i)
263 access_props &= (inprops[i] & (kAccessible | kCoAccessible));
264 if (access_props == (kAccessible | kCoAccessible)) {
265 outprops |= access_props;
266 if (inprops[root] & kInitialCyclic)
267 outprops |= kInitialCyclic;
268 uint64 props = 0;
269 bool string = true;
270 for (size_t i = 0; i < inprops.size(); ++i) {
271 if (epsilon_on_replace == false)
272 props |= kNotAcceptor & inprops[i];
273 props |= (kNonIDeterministic | kNonODeterministic | kEpsilons |
274 kIEpsilons | kOEpsilons | kWeighted | kCyclic |
275 kNotTopSorted | kNotString) & inprops[i];
276 if (!(inprops[i] & kString))
277 string = false;
278 }
279 outprops |= props;
280 if (string)
281 outprops |= kString;
282 }
283 bool acceptor = epsilon_on_replace;
284 bool ideterministic = !epsilon_on_replace;
285 bool no_iepsilons = !epsilon_on_replace;
286 bool acyclic = true;
287 bool unweighted = true;
288 for (size_t i = 0; i < inprops.size(); ++i) {
289 if (!(inprops[i] & kAcceptor))
290 acceptor = false;
291 if (!(inprops[i] & kIDeterministic))
292 ideterministic = false;
293 if (!(inprops[i] & kNoIEpsilons))
294 no_iepsilons = false;
295 if (!(inprops[i] & kAcyclic))
296 acyclic = false;
297 if (!(inprops[i] & kUnweighted))
298 unweighted = false;
299 }
300 if (acceptor)
301 outprops |= kAcceptor;
302 if (ideterministic)
303 outprops |= kIDeterministic;
304 if (no_iepsilons)
305 outprops |= kNoIEpsilons;
306 if (acyclic)
307 outprops |= kAcyclic;
308 if (unweighted)
309 outprops |= kUnweighted;
310 if (inprops[root] & kInitialAcyclic)
311 outprops |= kInitialAcyclic;
312 return outprops;
313 }
314
315 // Properties for a relabeled FST.
RelabelProperties(uint64 inprops)316 uint64 RelabelProperties(uint64 inprops) {
317 uint64 outprops = (kExpanded | kMutable | kError |
318 kWeighted | kUnweighted |
319 kCyclic | kAcyclic |
320 kInitialCyclic | kInitialAcyclic |
321 kTopSorted | kNotTopSorted |
322 kAccessible | kNotAccessible |
323 kCoAccessible | kNotCoAccessible |
324 kString | kNotString) & inprops;
325 return outprops;
326 }
327
328 // Properties for a reversed FST. (the superinitial state limits this set)
ReverseProperties(uint64 inprops)329 uint64 ReverseProperties(uint64 inprops) {
330 uint64 outprops =
331 (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kEpsilons |
332 kIEpsilons | kOEpsilons | kWeighted | kUnweighted |
333 kCyclic | kAcyclic) & inprops;
334 return outprops;
335 }
336
337 // Properties for re-weighted FST.
ReweightProperties(uint64 inprops)338 uint64 ReweightProperties(uint64 inprops) {
339 uint64 outprops = inprops & kWeightInvariantProperties;
340 outprops = outprops & ~kCoAccessible;
341 return outprops;
342 }
343
344 // Properties for an epsilon-removed FST.
RmEpsilonProperties(uint64 inprops,bool delayed)345 uint64 RmEpsilonProperties(uint64 inprops, bool delayed) {
346 uint64 outprops = kNoEpsilons;
347 outprops |= (kError | kAcceptor | kAcyclic | kInitialAcyclic) & inprops;
348 if (inprops & kAcceptor)
349 outprops |= kNoIEpsilons | kNoOEpsilons;
350 if (!delayed) {
351 outprops |= kExpanded | kMutable;
352 outprops |= kTopSorted & inprops;
353 }
354 if (!delayed || inprops & kAccessible)
355 outprops |= kNotAcceptor & inprops;
356 return outprops;
357 }
358
359 // Properties for shortest path. This function computes how the properties
360 // of the output of shortest path need to be updated, given that 'props' is
361 // already known.
ShortestPathProperties(uint64 props)362 uint64 ShortestPathProperties(uint64 props) {
363 return props | kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
364 }
365
366 // Properties for a synchronized FST.
SynchronizeProperties(uint64 inprops)367 uint64 SynchronizeProperties(uint64 inprops) {
368 uint64 outprops = (kError | kAcceptor | kAcyclic | kAccessible |
369 kCoAccessible | kUnweighted) & inprops;
370 if (inprops & kAccessible)
371 outprops |= (kCyclic | kNotCoAccessible | kWeighted) & inprops;
372 return outprops;
373 }
374
375 // Properties for a unioned FST.
UnionProperties(uint64 inprops1,uint64 inprops2,bool delayed)376 uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
377 uint64 outprops = (kAcceptor | kUnweighted | kAcyclic | kAccessible)
378 & inprops1 & inprops2;
379 outprops |= kError & (inprops1 | inprops2);
380 outprops |= kInitialAcyclic;
381
382 bool empty1 = delayed; // Can fst1 be the empty machine?
383 bool empty2 = delayed; // Can fst2 be the empty machine?
384 if (!delayed) {
385 outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
386 outprops |= (kNotTopSorted | kNotString) & inprops2;
387 }
388 if (!empty1 && !empty2) {
389 outprops |= kEpsilons | kIEpsilons | kOEpsilons;
390 outprops |= kCoAccessible & inprops1 & inprops2;
391 }
392 // Note kNotCoAccessible does not hold because of kInitialAcyclic opt.
393 if (!delayed || inprops1 & kAccessible)
394 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
395 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
396 kNotOLabelSorted | kWeighted | kCyclic |
397 kNotAccessible) & inprops1;
398 if (!delayed || inprops2 & kAccessible)
399 outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
400 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
401 kNotOLabelSorted | kWeighted | kCyclic |
402 kNotAccessible | kNotCoAccessible) & inprops2;
403 return outprops;
404 }
405
406
407 // Property string names (indexed by bit position).
408 const char *PropertyNames[] = {
409 // binary
410 "expanded", "mutable", "error", "", "", "", "", "",
411 "", "", "", "", "", "", "", "",
412 // trinary
413 "acceptor", "not acceptor",
414 "input deterministic", "non input deterministic",
415 "output deterministic", "non output deterministic",
416 "input/output epsilons", "no input/output epsilons",
417 "input epsilons", "no input epsilons",
418 "output epsilons", "no output epsilons",
419 "input label sorted", "not input label sorted",
420 "output label sorted", "not output label sorted",
421 "weighted", "unweighted",
422 "cyclic", "acyclic",
423 "cyclic at initial state", "acyclic at initial state",
424 "top sorted", "not top sorted",
425 "accessible", "not accessible",
426 "coaccessible", "not coaccessible",
427 "string", "not string",
428 };
429
430 } // namespace fst
431