1 // string-weight.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // String weight set and associated semiring operation definitions.
20 
21 #ifndef FST_LIB_STRING_WEIGHT_H__
22 #define FST_LIB_STRING_WEIGHT_H__
23 
24 #include <list>
25 #include <string>
26 
27 #include <fst/product-weight.h>
28 #include <fst/weight.h>
29 
30 namespace fst {
31 
32 const int kStringInfinity = -1;      // Label for the infinite string
33 const int kStringBad = -2;           // Label for a non-string
34 const char kStringSeparator = '_';   // Label separator in strings
35 
36 // Determines whether to use left or right string semiring.  Includes
37 // restricted versions that signal an error if proper prefixes
38 // (suffixes) would otherwise be returned by Plus, useful with various
39 // algorithms that require functional transducer input with the
40 // string semirings.
41 enum StringType { STRING_LEFT = 0, STRING_RIGHT = 1 ,
42                   STRING_LEFT_RESTRICT = 2, STRING_RIGHT_RESTRICT };
43 
44 #define REVERSE_STRING_TYPE(S)                                  \
45    ((S) == STRING_LEFT ? STRING_RIGHT :                         \
46     ((S) == STRING_RIGHT ? STRING_LEFT :                        \
47      ((S) == STRING_LEFT_RESTRICT ? STRING_RIGHT_RESTRICT :     \
48       STRING_LEFT_RESTRICT)))
49 
50 template <typename L, StringType S = STRING_LEFT>
51 class StringWeight;
52 
53 template <typename L, StringType S = STRING_LEFT>
54 class StringWeightIterator;
55 
56 template <typename L, StringType S = STRING_LEFT>
57 class StringWeightReverseIterator;
58 
59 template <typename L, StringType S>
60 bool operator==(const StringWeight<L, S> &,  const StringWeight<L, S> &);
61 
62 
63 // String semiring: (longest_common_prefix/suffix, ., Infinity, Epsilon)
64 template <typename L, StringType S>
65 class StringWeight {
66  public:
67   typedef L Label;
68   typedef StringWeight<L, REVERSE_STRING_TYPE(S)> ReverseWeight;
69 
70   friend class StringWeightIterator<L, S>;
71   friend class StringWeightReverseIterator<L, S>;
72   friend bool operator==<>(const StringWeight<L, S> &,
73                            const StringWeight<L, S> &);
74 
StringWeight()75   StringWeight() { Init(); }
76 
77   template <typename Iter>
StringWeight(const Iter & begin,const Iter & end)78   StringWeight(const Iter &begin, const Iter &end) {
79     Init();
80     for (Iter iter = begin; iter != end; ++iter)
81       PushBack(*iter);
82   }
83 
StringWeight(L l)84   explicit StringWeight(L l) { Init(); PushBack(l); }
85 
Zero()86   static const StringWeight<L, S> &Zero() {
87     static const StringWeight<L, S> zero(kStringInfinity);
88     return zero;
89   }
90 
One()91   static const StringWeight<L, S> &One() {
92     static const StringWeight<L, S> one;
93     return one;
94   }
95 
NoWeight()96   static const StringWeight<L, S> &NoWeight() {
97     static const StringWeight<L, S> no_weight(kStringBad);
98     return no_weight;
99   }
100 
Type()101   static const string &Type() {
102     static const string type =
103         S == STRING_LEFT ? "string" :
104         (S == STRING_RIGHT ? "right_string" :
105          (S == STRING_LEFT_RESTRICT ? "restricted_string" :
106           "right_restricted_string"));
107     return type;
108   }
109 
110   bool Member() const;
111 
112   istream &Read(istream &strm);
113 
114   ostream &Write(ostream &strm) const;
115 
116   size_t Hash() const;
117 
118   StringWeight<L, S> Quantize(float delta = kDelta) const {
119     return *this;
120   }
121 
122   ReverseWeight Reverse() const;
123 
Properties()124   static uint64 Properties() {
125     return (S == STRING_LEFT || S == STRING_LEFT_RESTRICT ?
126             kLeftSemiring : kRightSemiring) | kIdempotent;
127   }
128 
129   // NB: This needs to be uncommented only if default fails for this impl.
130   // StringWeight<L, S> &operator=(const StringWeight<L, S> &w);
131 
132   // These operations combined with the StringWeightIterator and
133   // StringWeightReverseIterator provide the access and mutation of
134   // the string internal elements.
135 
136   // Common initializer among constructors.
Init()137   void Init() { first_ = 0; }
138 
139   // Clear existing StringWeight.
Clear()140   void Clear() { first_ = 0; rest_.clear(); }
141 
Size()142   size_t Size() const { return first_ ? rest_.size() + 1 : 0; }
143 
PushFront(L l)144   void PushFront(L l) {
145     if (first_)
146       rest_.push_front(first_);
147     first_ = l;
148   }
149 
PushBack(L l)150   void PushBack(L l) {
151     if (!first_)
152       first_ = l;
153     else
154       rest_.push_back(l);
155   }
156 
157  private:
158   L first_;         // first label in string (0 if empty)
159   list<L> rest_;    // remaining labels in string
160 };
161 
162 
163 // Traverses string in forward direction.
164 template <typename L, StringType S>
165 class StringWeightIterator {
166  public:
StringWeightIterator(const StringWeight<L,S> & w)167   explicit StringWeightIterator(const StringWeight<L, S>& w)
168       : first_(w.first_), rest_(w.rest_), init_(true),
169         iter_(rest_.begin()) {}
170 
Done()171   bool Done() const {
172     if (init_) return first_ == 0;
173     else return iter_ == rest_.end();
174   }
175 
Value()176   const L& Value() const { return init_ ? first_ : *iter_; }
177 
Next()178   void Next() {
179     if (init_) init_ = false;
180     else  ++iter_;
181   }
182 
Reset()183   void Reset() {
184     init_ = true;
185     iter_ = rest_.begin();
186   }
187 
188  private:
189   const L &first_;
190   const list<L> &rest_;
191   bool init_;   // in the initialized state?
192   typename list<L>::const_iterator iter_;
193 
194   DISALLOW_COPY_AND_ASSIGN(StringWeightIterator);
195 };
196 
197 
198 // Traverses string in backward direction.
199 template <typename L, StringType S>
200 class StringWeightReverseIterator {
201  public:
StringWeightReverseIterator(const StringWeight<L,S> & w)202   explicit StringWeightReverseIterator(const StringWeight<L, S>& w)
203       : first_(w.first_), rest_(w.rest_), fin_(first_ == 0),
204         iter_(rest_.rbegin()) {}
205 
Done()206   bool Done() const { return fin_; }
207 
Value()208   const L& Value() const { return iter_ == rest_.rend() ? first_ : *iter_; }
209 
Next()210   void Next() {
211     if (iter_ == rest_.rend()) fin_ = true;
212     else  ++iter_;
213   }
214 
Reset()215   void Reset() {
216     fin_ = false;
217     iter_ = rest_.rbegin();
218   }
219 
220  private:
221   const L &first_;
222   const list<L> &rest_;
223   bool fin_;   // in the final state?
224   typename list<L>::const_reverse_iterator iter_;
225 
226   DISALLOW_COPY_AND_ASSIGN(StringWeightReverseIterator);
227 };
228 
229 
230 // StringWeight member functions follow that require
231 // StringWeightIterator or StringWeightReverseIterator.
232 
233 template <typename L, StringType S>
Read(istream & strm)234 inline istream &StringWeight<L, S>::Read(istream &strm) {
235   Clear();
236   int32 size;
237   ReadType(strm, &size);
238   for (int i = 0; i < size; ++i) {
239     L label;
240     ReadType(strm, &label);
241     PushBack(label);
242   }
243   return strm;
244 }
245 
246 template <typename L, StringType S>
Write(ostream & strm)247 inline ostream &StringWeight<L, S>::Write(ostream &strm) const {
248   int32 size =  Size();
249   WriteType(strm, size);
250   for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) {
251     L label = iter.Value();
252     WriteType(strm, label);
253   }
254   return strm;
255 }
256 
257 template <typename L, StringType S>
Member()258 inline bool StringWeight<L, S>::Member() const {
259   if (Size() != 1)
260     return true;
261   StringWeightIterator<L, S> iter(*this);
262   return iter.Value() != kStringBad;
263 }
264 
265 template <typename L, StringType S>
266 inline typename StringWeight<L, S>::ReverseWeight
Reverse()267 StringWeight<L, S>::Reverse() const {
268   ReverseWeight rw;
269   for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next())
270     rw.PushFront(iter.Value());
271   return rw;
272 }
273 
274 template <typename L, StringType S>
Hash()275 inline size_t StringWeight<L, S>::Hash() const {
276   size_t h = 0;
277   for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next())
278     h ^= h<<1 ^ iter.Value();
279   return h;
280 }
281 
282 // NB: This needs to be uncommented only if default fails for this the impl.
283 //
284 // template <typename L, StringType S>
285 // inline StringWeight<L, S>
286 // &StringWeight<L, S>::operator=(const StringWeight<L, S> &w) {
287 //   if (this != &w) {
288 //     Clear();
289 //     for (StringWeightIterator<L, S> iter(w); !iter.Done(); iter.Next())
290 //       PushBack(iter.Value());
291 //   }
292 //   return *this;
293 // }
294 
295 template <typename L, StringType S>
296 inline bool operator==(const StringWeight<L, S> &w1,
297                        const StringWeight<L, S> &w2) {
298   if (w1.Size() != w2.Size())
299     return false;
300 
301   StringWeightIterator<L, S> iter1(w1);
302   StringWeightIterator<L, S> iter2(w2);
303 
304   for (; !iter1.Done() ; iter1.Next(), iter2.Next())
305     if (iter1.Value() != iter2.Value())
306       return false;
307 
308   return true;
309 }
310 
311 template <typename L, StringType S>
312 inline bool operator!=(const StringWeight<L, S> &w1,
313                        const StringWeight<L, S> &w2) {
314   return !(w1 == w2);
315 }
316 
317 template <typename L, StringType S>
318 inline bool ApproxEqual(const StringWeight<L, S> &w1,
319                         const StringWeight<L, S> &w2,
320                         float delta = kDelta) {
321   return w1 == w2;
322 }
323 
324 template <typename L, StringType S>
325 inline ostream &operator<<(ostream &strm, const StringWeight<L, S> &w) {
326   StringWeightIterator<L, S> iter(w);
327   if (iter.Done())
328     return strm << "Epsilon";
329   else if (iter.Value() == kStringInfinity)
330     return strm << "Infinity";
331   else if (iter.Value() == kStringBad)
332     return strm << "BadString";
333   else
334     for (size_t i = 0; !iter.Done(); ++i, iter.Next()) {
335       if (i > 0)
336         strm << kStringSeparator;
337       strm << iter.Value();
338     }
339   return strm;
340 }
341 
342 template <typename L, StringType S>
343 inline istream &operator>>(istream &strm, StringWeight<L, S> &w) {
344   string s;
345   strm >> s;
346   if (s == "Infinity") {
347     w = StringWeight<L, S>::Zero();
348   } else if (s == "Epsilon") {
349     w = StringWeight<L, S>::One();
350   } else {
351     w.Clear();
352     char *p = 0;
353     for (const char *cs = s.c_str(); !p || *p != '\0'; cs = p + 1) {
354       int l = strtoll(cs, &p, 10);
355       if (p == cs || (*p != 0 && *p != kStringSeparator)) {
356         strm.clear(std::ios::badbit);
357         break;
358       }
359       w.PushBack(l);
360     }
361   }
362   return strm;
363 }
364 
365 
366 // Default is for the restricted left and right semirings.  String
367 // equality is required (for non-Zero() input. This restriction
368 // is used in e.g. Determinize to ensure functional input.
369 template <typename L, StringType S>  inline StringWeight<L, S>
Plus(const StringWeight<L,S> & w1,const StringWeight<L,S> & w2)370 Plus(const StringWeight<L, S> &w1,
371      const StringWeight<L, S> &w2) {
372   if (!w1.Member() || !w2.Member())
373     return StringWeight<L, S>::NoWeight();
374   if (w1 == StringWeight<L, S>::Zero())
375     return w2;
376   if (w2 == StringWeight<L, S>::Zero())
377     return w1;
378 
379   if (w1 != w2) {
380     FSTERROR() << "StringWeight::Plus: unequal arguments "
381                << "(non-functional FST?)"
382                << " w1 = " << w1
383                << " w2 = " << w2;
384     return StringWeight<L, S>::NoWeight();
385   }
386 
387   return w1;
388 }
389 
390 
391 // Longest common prefix for left string semiring.
392 template <typename L>  inline StringWeight<L, STRING_LEFT>
Plus(const StringWeight<L,STRING_LEFT> & w1,const StringWeight<L,STRING_LEFT> & w2)393 Plus(const StringWeight<L, STRING_LEFT> &w1,
394      const StringWeight<L, STRING_LEFT> &w2) {
395   if (!w1.Member() || !w2.Member())
396     return StringWeight<L, STRING_LEFT>::NoWeight();
397   if (w1 == StringWeight<L, STRING_LEFT>::Zero())
398     return w2;
399   if (w2 == StringWeight<L, STRING_LEFT>::Zero())
400     return w1;
401 
402   StringWeight<L, STRING_LEFT> sum;
403   StringWeightIterator<L, STRING_LEFT> iter1(w1);
404   StringWeightIterator<L, STRING_LEFT> iter2(w2);
405   for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
406        iter1.Next(), iter2.Next())
407     sum.PushBack(iter1.Value());
408   return sum;
409 }
410 
411 
412 // Longest common suffix for right string semiring.
413 template <typename L>  inline StringWeight<L, STRING_RIGHT>
Plus(const StringWeight<L,STRING_RIGHT> & w1,const StringWeight<L,STRING_RIGHT> & w2)414 Plus(const StringWeight<L, STRING_RIGHT> &w1,
415      const StringWeight<L, STRING_RIGHT> &w2) {
416   if (!w1.Member() || !w2.Member())
417     return StringWeight<L, STRING_RIGHT>::NoWeight();
418   if (w1 == StringWeight<L, STRING_RIGHT>::Zero())
419     return w2;
420   if (w2 == StringWeight<L, STRING_RIGHT>::Zero())
421     return w1;
422 
423   StringWeight<L, STRING_RIGHT> sum;
424   StringWeightReverseIterator<L, STRING_RIGHT> iter1(w1);
425   StringWeightReverseIterator<L, STRING_RIGHT> iter2(w2);
426   for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
427        iter1.Next(), iter2.Next())
428     sum.PushFront(iter1.Value());
429   return sum;
430 }
431 
432 
433 template <typename L, StringType S>
Times(const StringWeight<L,S> & w1,const StringWeight<L,S> & w2)434 inline StringWeight<L, S> Times(const StringWeight<L, S> &w1,
435                              const StringWeight<L, S> &w2) {
436   if (!w1.Member() || !w2.Member())
437     return StringWeight<L, S>::NoWeight();
438   if (w1 == StringWeight<L, S>::Zero() || w2 == StringWeight<L, S>::Zero())
439     return StringWeight<L, S>::Zero();
440 
441   StringWeight<L, S> prod(w1);
442   for (StringWeightIterator<L, S> iter(w2); !iter.Done(); iter.Next())
443     prod.PushBack(iter.Value());
444 
445   return prod;
446 }
447 
448 
449 // Default is for left division in the left string and the
450 // left restricted string semirings.
451 template <typename L, StringType S> inline StringWeight<L, S>
Divide(const StringWeight<L,S> & w1,const StringWeight<L,S> & w2,DivideType typ)452 Divide(const StringWeight<L, S> &w1,
453        const StringWeight<L, S> &w2,
454        DivideType typ) {
455 
456   if (typ != DIVIDE_LEFT) {
457     FSTERROR() << "StringWeight::Divide: only left division is defined "
458                << "for the " << StringWeight<L, S>::Type() << " semiring";
459     return StringWeight<L, S>::NoWeight();
460   }
461 
462   if (!w1.Member() || !w2.Member())
463     return StringWeight<L, S>::NoWeight();
464 
465   if (w2 == StringWeight<L, S>::Zero())
466     return StringWeight<L, S>(kStringBad);
467   else if (w1 == StringWeight<L, S>::Zero())
468     return StringWeight<L, S>::Zero();
469 
470   StringWeight<L, S> div;
471   StringWeightIterator<L, S> iter(w1);
472   for (int i = 0; !iter.Done(); iter.Next(), ++i) {
473     if (i >= w2.Size())
474       div.PushBack(iter.Value());
475   }
476   return div;
477 }
478 
479 
480 // Right division in the right string semiring.
481 template <typename L> inline StringWeight<L, STRING_RIGHT>
Divide(const StringWeight<L,STRING_RIGHT> & w1,const StringWeight<L,STRING_RIGHT> & w2,DivideType typ)482 Divide(const StringWeight<L, STRING_RIGHT> &w1,
483        const StringWeight<L, STRING_RIGHT> &w2,
484        DivideType typ) {
485 
486   if (typ != DIVIDE_RIGHT) {
487     FSTERROR() << "StringWeight::Divide: only right division is defined "
488                << "for the right string semiring";
489     return StringWeight<L, STRING_RIGHT>::NoWeight();
490   }
491 
492   if (!w1.Member() || !w2.Member())
493     return StringWeight<L, STRING_RIGHT>::NoWeight();
494 
495   if (w2 == StringWeight<L, STRING_RIGHT>::Zero())
496     return StringWeight<L, STRING_RIGHT>(kStringBad);
497   else if (w1 == StringWeight<L, STRING_RIGHT>::Zero())
498     return StringWeight<L, STRING_RIGHT>::Zero();
499 
500   StringWeight<L, STRING_RIGHT> div;
501   StringWeightReverseIterator<L, STRING_RIGHT> iter(w1);
502   for (int i = 0; !iter.Done(); iter.Next(), ++i) {
503     if (i >= w2.Size())
504       div.PushFront(iter.Value());
505   }
506   return div;
507 }
508 
509 
510 // Right division in the right restricted string semiring.
511 template <typename L> inline StringWeight<L, STRING_RIGHT_RESTRICT>
Divide(const StringWeight<L,STRING_RIGHT_RESTRICT> & w1,const StringWeight<L,STRING_RIGHT_RESTRICT> & w2,DivideType typ)512 Divide(const StringWeight<L, STRING_RIGHT_RESTRICT> &w1,
513        const StringWeight<L, STRING_RIGHT_RESTRICT> &w2,
514        DivideType typ) {
515 
516   if (typ != DIVIDE_RIGHT) {
517     FSTERROR() << "StringWeight::Divide: only right division is defined "
518                << "for the right restricted string semiring";
519     return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight();
520   }
521 
522   if (!w1.Member() || !w2.Member())
523     return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight();
524 
525   if (w2 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero())
526     return StringWeight<L, STRING_RIGHT_RESTRICT>(kStringBad);
527   else if (w1 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero())
528     return StringWeight<L, STRING_RIGHT_RESTRICT>::Zero();
529 
530   StringWeight<L, STRING_RIGHT_RESTRICT> div;
531   StringWeightReverseIterator<L, STRING_RIGHT_RESTRICT> iter(w1);
532   for (int i = 0; !iter.Done(); iter.Next(), ++i) {
533     if (i >= w2.Size())
534       div.PushFront(iter.Value());
535   }
536   return div;
537 }
538 
539 
540 // Product of string weight and an arbitray weight.
541 template <class L, class W, StringType S = STRING_LEFT>
542 struct GallicWeight : public ProductWeight<StringWeight<L, S>, W> {
543   typedef GallicWeight<L, typename W::ReverseWeight, REVERSE_STRING_TYPE(S)>
544   ReverseWeight;
545 
GallicWeightGallicWeight546   GallicWeight() {}
547 
GallicWeightGallicWeight548   GallicWeight(StringWeight<L, S> w1, W w2)
549       : ProductWeight<StringWeight<L, S>, W>(w1, w2) {}
550 
551   explicit GallicWeight(const string &s, int *nread = 0)
552       : ProductWeight<StringWeight<L, S>, W>(s, nread) {}
553 
GallicWeightGallicWeight554   GallicWeight(const ProductWeight<StringWeight<L, S>, W> &w)
555       : ProductWeight<StringWeight<L, S>, W>(w) {}
556 };
557 
558 }  // namespace fst
559 
560 #endif  // FST_LIB_STRING_WEIGHT_H__
561