1# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import print_function
16
17import heapq
18import itertools
19
20
21__all__ = ["RangeSet"]
22
23
24class RangeSet(object):
25  """A RangeSet represents a set of non-overlapping ranges on integers.
26
27  Attributes:
28    monotonic: Whether the input has all its integers in increasing order.
29    extra: A dict that can be used by the caller, e.g. to store info that's
30        only meaningful to caller.
31  """
32
33  def __init__(self, data=None):
34    self.monotonic = False
35    self._extra = {}
36    if isinstance(data, str):
37      self._parse_internal(data)
38    elif data:
39      assert len(data) % 2 == 0
40      self.data = tuple(self._remove_pairs(data))
41      self.monotonic = all(x < y for x, y in zip(self.data, self.data[1:]))
42    else:
43      self.data = ()
44
45  def __iter__(self):
46    for i in range(0, len(self.data), 2):
47      yield self.data[i:i+2]
48
49  def __eq__(self, other):
50    return self.data == other.data
51
52  def __ne__(self, other):
53    return self.data != other.data
54
55  def __nonzero__(self):
56    return bool(self.data)
57
58  def __str__(self):
59    if not self.data:
60      return "empty"
61    else:
62      return self.to_string()
63
64  def __repr__(self):
65    return '<RangeSet("' + self.to_string() + '")>'
66
67  @property
68  def extra(self):
69    return self._extra
70
71  @classmethod
72  def parse(cls, text):
73    """Parses a text string into a RangeSet.
74
75    The input text string consists of a space-separated list of blocks and
76    ranges, e.g. "10-20 30 35-40". Ranges are interpreted to include both their
77    ends (so the above example represents 18 individual blocks). Returns a
78    RangeSet object.
79
80    If the input has all its blocks in increasing order, then the 'monotonic'
81    attribute of the returned RangeSet will be set to True. For example the
82    input "10-20 30" is monotonic, but the input "15-20 30 10-14" is not, even
83    though they represent the same set of blocks (and the two RangeSets will
84    compare equal with ==).
85    """
86    return cls(text)
87
88  @classmethod
89  def parse_raw(cls, text):
90    """Parse a string generated by RangeSet.to_string_raw().
91
92    >>> RangeSet.parse_raw(RangeSet("0-9").to_string_raw())
93    <RangeSet("0-9")>
94    """
95
96    raw = [int(i) for i in text.split(',')]
97    assert raw[0] == len(raw[1:]), "Invalid raw string."
98
99    return cls(data=raw[1:])
100
101  def _parse_internal(self, text):
102    data = []
103    last = -1
104    monotonic = True
105    for p in text.split():
106      if "-" in p:
107        s, e = (int(x) for x in p.split("-"))
108        data.append(s)
109        data.append(e+1)
110        if last <= s <= e:
111          last = e
112        else:
113          monotonic = False
114      else:
115        s = int(p)
116        data.append(s)
117        data.append(s+1)
118        if last <= s:
119          last = s+1
120        else:
121          monotonic = False
122    data.sort()
123    self.data = tuple(self._remove_pairs(data))
124    self.monotonic = monotonic
125
126  @staticmethod
127  def _remove_pairs(source):
128    """Remove consecutive duplicate items to simplify the result.
129
130    [1, 2, 2, 5, 5, 10] will become [1, 10]."""
131    last = None
132    for i in source:
133      if i == last:
134        last = None
135      else:
136        if last is not None:
137          yield last
138        last = i
139    if last is not None:
140      yield last
141
142  def to_string(self):
143    out = []
144    for i in range(0, len(self.data), 2):
145      s, e = self.data[i:i+2]
146      if e == s+1:
147        out.append(str(s))
148      else:
149        out.append(str(s) + "-" + str(e-1))
150    return " ".join(out)
151
152  def to_string_raw(self):
153    assert self.data
154    return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
155
156  def union(self, other):
157    """Return a new RangeSet representing the union of this RangeSet
158    with the argument.
159
160    >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
161    <RangeSet("10-34")>
162    >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
163    <RangeSet("10-19 22 30-34")>
164    """
165    out = []
166    z = 0
167    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
168                            zip(other.data, itertools.cycle((+1, -1)))):
169      if (z == 0 and d == 1) or (z == 1 and d == -1):
170        out.append(p)
171      z += d
172    return RangeSet(data=out)
173
174  def intersect(self, other):
175    """Return a new RangeSet representing the intersection of this
176    RangeSet with the argument.
177
178    >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
179    <RangeSet("18-19 30-32")>
180    >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
181    <RangeSet("")>
182    """
183    out = []
184    z = 0
185    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
186                            zip(other.data, itertools.cycle((+1, -1)))):
187      if (z == 1 and d == 1) or (z == 2 and d == -1):
188        out.append(p)
189      z += d
190    return RangeSet(data=out)
191
192  def subtract(self, other):
193    """Return a new RangeSet representing subtracting the argument
194    from this RangeSet.
195
196    >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
197    <RangeSet("10-17 33-34")>
198    >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
199    <RangeSet("10-19 30-34")>
200    """
201
202    out = []
203    z = 0
204    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
205                            zip(other.data, itertools.cycle((-1, +1)))):
206      if (z == 0 and d == 1) or (z == 1 and d == -1):
207        out.append(p)
208      z += d
209    return RangeSet(data=out)
210
211  def overlaps(self, other):
212    """Returns true if the argument has a nonempty overlap with this
213    RangeSet.
214
215    >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
216    True
217    >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
218    False
219    """
220
221    # This is like intersect, but we can stop as soon as we discover the
222    # output is going to be nonempty.
223    z = 0
224    for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
225                            zip(other.data, itertools.cycle((+1, -1)))):
226      if (z == 1 and d == 1) or (z == 2 and d == -1):
227        return True
228      z += d
229    return False
230
231  def size(self):
232    """Returns the total size of the RangeSet (ie, how many integers
233    are in the set).
234
235    >>> RangeSet("10-19 30-34").size()
236    15
237    """
238
239    total = 0
240    for i, p in enumerate(self.data):
241      if i % 2:
242        total += p
243      else:
244        total -= p
245    return total
246
247  def map_within(self, other):
248    """'other' should be a subset of 'self'.  Returns a RangeSet
249    representing what 'other' would get translated to if the integers
250    of 'self' were translated down to be contiguous starting at zero.
251
252    >>> RangeSet("0-9").map_within(RangeSet("3-4"))
253    <RangeSet("3-4")>
254    >>> RangeSet("10-19").map_within(RangeSet("13-14"))
255    <RangeSet("3-4")>
256    >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
257    <RangeSet("7-12")>
258    >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
259    <RangeSet("2-3 7-12")>
260    """
261
262    out = []
263    offset = 0
264    start = None
265    for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
266                            zip(other.data, itertools.cycle((-1, +1)))):
267      if d == -5:
268        start = p
269      elif d == +5:
270        offset += p-start
271        start = None
272      else:
273        out.append(offset + p - start)
274    return RangeSet(data=out)
275
276  def extend(self, n):
277    """Extend the RangeSet by 'n' blocks.
278
279    The lower bound is guaranteed to be non-negative.
280
281    >>> RangeSet("0-9").extend(1)
282    <RangeSet("0-10")>
283    >>> RangeSet("10-19").extend(15)
284    <RangeSet("0-34")>
285    >>> RangeSet("10-19 30-39").extend(4)
286    <RangeSet("6-23 26-43")>
287    >>> RangeSet("10-19 30-39").extend(10)
288    <RangeSet("0-49")>
289    """
290    out = self
291    for i in range(0, len(self.data), 2):
292      s, e = self.data[i:i+2]
293      s1 = max(0, s - n)
294      e1 = e + n
295      out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
296    return out
297
298  def first(self, n):
299    """Return the RangeSet that contains at most the first 'n' integers.
300
301    >>> RangeSet("0-9").first(1)
302    <RangeSet("0")>
303    >>> RangeSet("10-19").first(5)
304    <RangeSet("10-14")>
305    >>> RangeSet("10-19").first(15)
306    <RangeSet("10-19")>
307    >>> RangeSet("10-19 30-39").first(3)
308    <RangeSet("10-12")>
309    >>> RangeSet("10-19 30-39").first(15)
310    <RangeSet("10-19 30-34")>
311    >>> RangeSet("10-19 30-39").first(30)
312    <RangeSet("10-19 30-39")>
313    >>> RangeSet("0-9").first(0)
314    <RangeSet("")>
315    """
316
317    if self.size() <= n:
318      return self
319
320    out = []
321    for s, e in self:
322      if e - s >= n:
323        out += (s, s+n)
324        break
325      else:
326        out += (s, e)
327        n -= e - s
328    return RangeSet(data=out)
329
330  def next_item(self):
331    """Return the next integer represented by the RangeSet.
332
333    >>> list(RangeSet("0-9").next_item())
334    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
335    >>> list(RangeSet("10-19 3-5").next_item())
336    [3, 4, 5, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
337    >>> list(RangeSet("10-19 3 5 7").next_item())
338    [3, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
339    """
340    for s, e in self:
341      for element in range(s, e):
342        yield element
343
344
345if __name__ == "__main__":
346  import doctest
347  doctest.testmod()
348