1 // interval-set.h
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Class to represent and operate on sets of intervals.
20
21 #ifndef FST_LIB_INTERVAL_SET_H__
22 #define FST_LIB_INTERVAL_SET_H__
23
24 #include <iostream>
25 #include <vector>
26 using std::vector;
27
28
29 #include <fst/util.h>
30
31
32 namespace fst {
33
34 // Stores and operates on a set of half-open integral intervals [a,b)
35 // of signed integers of type T.
36 template <typename T>
37 class IntervalSet {
38 public:
39 struct Interval {
40 T begin;
41 T end;
42
IntervalInterval43 Interval() : begin(-1), end(-1) {}
44
IntervalInterval45 Interval(T b, T e) : begin(b), end(e) {}
46
47 bool operator<(const Interval &i) const {
48 return begin < i.begin || (begin == i.begin && end > i.end);
49 }
50
51 bool operator==(const Interval &i) const {
52 return begin == i.begin && end == i.end;
53 }
54
55 bool operator!=(const Interval &i) const {
56 return begin != i.begin || end != i.end;
57 }
58
ReadInterval59 istream &Read(istream &strm) {
60 T n;
61 ReadType(strm, &n);
62 begin = n;
63 ReadType(strm, &n);
64 end = n;
65 return strm;
66 }
67
WriteInterval68 ostream &Write(ostream &strm) const {
69 T n = begin;
70 WriteType(strm, n);
71 n = end;
72 WriteType(strm, n);
73 return strm;
74 }
75 };
76
IntervalSet()77 IntervalSet() : count_(-1) {}
78
79 // Returns the interval set as a vector.
Intervals()80 vector<Interval> *Intervals() { return &intervals_; }
81
Intervals()82 const vector<Interval> *Intervals() const { return &intervals_; }
83
Empty()84 bool Empty() const { return intervals_.empty(); }
85
Size()86 T Size() const { return intervals_.size(); }
87
88 // Number of points in the intervals (undefined if not normalized).
Count()89 T Count() const { return count_; }
90
Clear()91 void Clear() {
92 intervals_.clear();
93 count_ = 0;
94 }
95
96 // Adds an interval set to the set. The result may not be normalized.
Union(const IntervalSet<T> & iset)97 void Union(const IntervalSet<T> &iset) {
98 const vector<Interval> *intervals = iset.Intervals();
99 for (typename vector<Interval>::const_iterator it = intervals->begin();
100 it != intervals->end(); ++it)
101 intervals_.push_back(*it);
102 }
103
104 // Requires intervals be normalized.
Member(T value)105 bool Member(T value) const {
106 Interval interval(value, value);
107 typename vector<Interval>::const_iterator lb =
108 lower_bound(intervals_.begin(), intervals_.end(), interval);
109 if (lb == intervals_.begin())
110 return false;
111 return (--lb)->end > value;
112 }
113
114 // Requires intervals be normalized.
115 bool operator==(const IntervalSet<T>& iset) const {
116 return *(iset.Intervals()) == intervals_;
117 }
118
119 // Requires intervals be normalized.
120 bool operator!=(const IntervalSet<T>& iset) const {
121 return *(iset.Intervals()) != intervals_;
122 }
123
Singleton()124 bool Singleton() const {
125 return intervals_.size() == 1 &&
126 intervals_[0].begin + 1 == intervals_[0].end;
127 }
128
129
130 // Sorts; collapses overlapping and adjacent interals; sets count.
131 void Normalize();
132
133 // Intersects an interval set with the set. Requires intervals be
134 // normalized. The result is normalized.
135 void Intersect(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
136
137 // Complements the set w.r.t [0, maxval). Requires intervals be
138 // normalized. The result is normalized.
139 void Complement(T maxval, IntervalSet<T> *oset) const;
140
141 // Subtract an interval set from the set. Requires intervals be
142 // normalized. The result is normalized.
143 void Difference(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
144
145 // Determines if an interval set overlaps with the set. Requires
146 // intervals be normalized.
147 bool Overlaps(const IntervalSet<T> &iset) const;
148
149 // Determines if an interval set overlaps with the set but neither
150 // is contained in the other. Requires intervals be normalized.
151 bool StrictlyOverlaps(const IntervalSet<T> &iset) const;
152
153 // Determines if an interval set is contained within the set. Requires
154 // intervals be normalized.
155 bool Contains(const IntervalSet<T> &iset) const;
156
Read(istream & strm)157 istream &Read(istream &strm) {
158 ReadType(strm, &intervals_);
159 return ReadType(strm, &count_);
160 }
161
Write(ostream & strm)162 ostream &Write(ostream &strm) const {
163 WriteType(strm, intervals_);
164 return WriteType(strm, count_);
165 }
166
167 private:
168 vector<Interval> intervals_;
169 T count_;
170 };
171
172 // Sorts; collapses overlapping and adjacent interavls; sets count.
173 template <typename T>
Normalize()174 void IntervalSet<T>::Normalize() {
175 sort(intervals_.begin(), intervals_.end());
176
177 count_ = 0;
178 T size = 0;
179 for (T i = 0; i < intervals_.size(); ++i) {
180 Interval &inti = intervals_[i];
181 if (inti.begin == inti.end)
182 continue;
183 for (T j = i + 1; j < intervals_.size(); ++j) {
184 Interval &intj = intervals_[j];
185 if (intj.begin > inti.end)
186 break;
187 if (intj.end > inti.end)
188 inti.end = intj.end;
189 ++i;
190 }
191 count_ += inti.end - inti.begin;
192 intervals_[size++] = inti;
193 }
194 intervals_.resize(size);
195 }
196
197 // Intersects an interval set with the set. Requires intervals be normalized.
198 // The result is normalized.
199 template <typename T>
Intersect(const IntervalSet<T> & iset,IntervalSet<T> * oset)200 void IntervalSet<T>::Intersect(const IntervalSet<T> &iset,
201 IntervalSet<T> *oset) const {
202 const vector<Interval> *iintervals = iset.Intervals();
203 vector<Interval> *ointervals = oset->Intervals();
204 typename vector<Interval>::const_iterator it1 = intervals_.begin();
205 typename vector<Interval>::const_iterator it2 = iintervals->begin();
206
207 ointervals->clear();
208 oset->count_ = 0;
209
210 while (it1 != intervals_.end() && it2 != iintervals->end()) {
211 if (it1->end <= it2->begin) {
212 ++it1;
213 } else if (it2->end <= it1->begin) {
214 ++it2;
215 } else {
216 Interval interval;
217 interval.begin = max(it1->begin, it2->begin);
218 interval.end = min(it1->end, it2->end);
219 ointervals->push_back(interval);
220 oset->count_ += interval.end - interval.begin;
221 if (it1->end < it2->end)
222 ++it1;
223 else
224 ++it2;
225 }
226 }
227 }
228
229 // Complements the set w.r.t [0, maxval). Requires intervals be normalized.
230 // The result is normalized.
231 template <typename T>
Complement(T maxval,IntervalSet<T> * oset)232 void IntervalSet<T>::Complement(T maxval, IntervalSet<T> *oset) const {
233 vector<Interval> *ointervals = oset->Intervals();
234 ointervals->clear();
235 oset->count_ = 0;
236
237 Interval interval;
238 interval.begin = 0;
239 for (typename vector<Interval>::const_iterator it = intervals_.begin();
240 it != intervals_.end();
241 ++it) {
242 interval.end = min(it->begin, maxval);
243 if (interval.begin < interval.end) {
244 ointervals->push_back(interval);
245 oset->count_ += interval.end - interval.begin;
246 }
247 interval.begin = it->end;
248 }
249 interval.end = maxval;
250 if (interval.begin < interval.end) {
251 ointervals->push_back(interval);
252 oset->count_ += interval.end - interval.begin;
253 }
254 }
255
256 // Subtract an interval set from the set. Requires intervals be normalized.
257 // The result is normalized.
258 template <typename T>
Difference(const IntervalSet<T> & iset,IntervalSet<T> * oset)259 void IntervalSet<T>::Difference(const IntervalSet<T> &iset,
260 IntervalSet<T> *oset) const {
261 if (intervals_.empty()) {
262 oset->Intervals()->clear();
263 oset->count_ = 0;
264 } else {
265 IntervalSet<T> cset;
266 iset.Complement(intervals_.back().end, &cset);
267 Intersect(cset, oset);
268 }
269 }
270
271 // Determines if an interval set overlaps with the set. Requires
272 // intervals be normalized.
273 template <typename T>
Overlaps(const IntervalSet<T> & iset)274 bool IntervalSet<T>::Overlaps(const IntervalSet<T> &iset) const {
275 const vector<Interval> *intervals = iset.Intervals();
276 typename vector<Interval>::const_iterator it1 = intervals_.begin();
277 typename vector<Interval>::const_iterator it2 = intervals->begin();
278
279 while (it1 != intervals_.end() && it2 != intervals->end()) {
280 if (it1->end <= it2->begin) {
281 ++it1;
282 } else if (it2->end <= it1->begin) {
283 ++it2;
284 } else {
285 return true;
286 }
287 }
288 return false;
289 }
290
291 // Determines if an interval set overlaps with the set but neither
292 // is contained in the other. Requires intervals be normalized.
293 template <typename T>
StrictlyOverlaps(const IntervalSet<T> & iset)294 bool IntervalSet<T>::StrictlyOverlaps(const IntervalSet<T> &iset) const {
295 const vector<Interval> *intervals = iset.Intervals();
296 typename vector<Interval>::const_iterator it1 = intervals_.begin();
297 typename vector<Interval>::const_iterator it2 = intervals->begin();
298 bool only1 = false; // point in intervals_ but not intervals
299 bool only2 = false; // point in intervals but not intervals_
300 bool overlap = false; // point in both intervals_ and intervals
301
302 while (it1 != intervals_.end() && it2 != intervals->end()) {
303 if (it1->end <= it2->begin) { // no overlap - it1 first
304 only1 = true;
305 ++it1;
306 } else if (it2->end <= it1->begin) { // no overlap - it2 first
307 only2 = true;
308 ++it2;
309 } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals
310 overlap = true;
311 ++it1;
312 ++it2;
313 } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2
314 only2 = true;
315 overlap = true;
316 ++it1;
317 } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1
318 only1 = true;
319 overlap = true;
320 ++it2;
321 } else { // strict overlap
322 only1 = true;
323 only2 = true;
324 overlap = true;
325 }
326 if (only1 == true && only2 == true && overlap == true)
327 return true;
328 }
329 if (it1 != intervals_.end())
330 only1 = true;
331 if (it2 != intervals->end())
332 only2 = true;
333
334 return only1 == true && only2 == true && overlap == true;
335 }
336
337 // Determines if an interval set is contained within the set. Requires
338 // intervals be normalized.
339 template <typename T>
Contains(const IntervalSet<T> & iset)340 bool IntervalSet<T>::Contains(const IntervalSet<T> &iset) const {
341 if (iset.Count() > Count())
342 return false;
343
344 const vector<Interval> *intervals = iset.Intervals();
345 typename vector<Interval>::const_iterator it1 = intervals_.begin();
346 typename vector<Interval>::const_iterator it2 = intervals->begin();
347
348 while (it1 != intervals_.end() && it2 != intervals->end()) {
349 if (it1->end <= it2->begin) { // no overlap - it1 first
350 ++it1;
351 } else if (it2->begin < it1->begin || it2->end > it1->end) { // no C
352 return false;
353 } else if (it2->end == it1->end) {
354 ++it1;
355 ++it2;
356 } else {
357 ++it2;
358 }
359 }
360 return it2 == intervals->end();
361 }
362
363 template <typename T>
364 ostream &operator<<(ostream &strm, const IntervalSet<T> &s) {
365 typedef typename IntervalSet<T>::Interval Interval;
366 const vector<Interval> *intervals = s.Intervals();
367 strm << "{";
368 for (typename vector<Interval>::const_iterator it = intervals->begin();
369 it != intervals->end();
370 ++it) {
371 if (it != intervals->begin())
372 strm << ",";
373 strm << "[" << it->begin << "," << it->end << ")";
374 }
375 strm << "}";
376 return strm;
377 }
378
379 } // namespace fst
380
381 #endif // FST_LIB_INTERVAL_SET_H__
382