1# Copyright (C) 2014 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15from __future__ import print_function 16import heapq 17import itertools 18 19__all__ = ["RangeSet"] 20 21class RangeSet(object): 22 """A RangeSet represents a set of nonoverlapping ranges on the 23 integers (ie, a set of integers, but efficient when the set contains 24 lots of runs.""" 25 26 def __init__(self, data=None): 27 self.monotonic = False 28 if isinstance(data, str): 29 self._parse_internal(data) 30 elif data: 31 assert len(data) % 2 == 0 32 self.data = tuple(self._remove_pairs(data)) 33 self.monotonic = all(x < y for x, y in zip(self.data, self.data[1:])) 34 else: 35 self.data = () 36 37 def __iter__(self): 38 for i in range(0, len(self.data), 2): 39 yield self.data[i:i+2] 40 41 def __eq__(self, other): 42 return self.data == other.data 43 44 def __ne__(self, other): 45 return self.data != other.data 46 47 def __nonzero__(self): 48 return bool(self.data) 49 50 def __str__(self): 51 if not self.data: 52 return "empty" 53 else: 54 return self.to_string() 55 56 def __repr__(self): 57 return '<RangeSet("' + self.to_string() + '")>' 58 59 @classmethod 60 def parse(cls, text): 61 """Parse a text string consisting of a space-separated list of 62 blocks and ranges, eg "10-20 30 35-40". Ranges are interpreted to 63 include both their ends (so the above example represents 18 64 individual blocks. Returns a RangeSet object. 65 66 If the input has all its blocks in increasing order, then returned 67 RangeSet will have an extra attribute 'monotonic' that is set to 68 True. For example the input "10-20 30" is monotonic, but the input 69 "15-20 30 10-14" is not, even though they represent the same set 70 of blocks (and the two RangeSets will compare equal with ==). 71 """ 72 return cls(text) 73 74 def _parse_internal(self, text): 75 data = [] 76 last = -1 77 monotonic = True 78 for p in text.split(): 79 if "-" in p: 80 s, e = (int(x) for x in p.split("-")) 81 data.append(s) 82 data.append(e+1) 83 if last <= s <= e: 84 last = e 85 else: 86 monotonic = False 87 else: 88 s = int(p) 89 data.append(s) 90 data.append(s+1) 91 if last <= s: 92 last = s+1 93 else: 94 monotonic = False 95 data.sort() 96 self.data = tuple(self._remove_pairs(data)) 97 self.monotonic = monotonic 98 99 @staticmethod 100 def _remove_pairs(source): 101 """Remove consecutive duplicate items to simplify the result. 102 103 [1, 2, 2, 5, 5, 10] will become [1, 10].""" 104 last = None 105 for i in source: 106 if i == last: 107 last = None 108 else: 109 if last is not None: 110 yield last 111 last = i 112 if last is not None: 113 yield last 114 115 def to_string(self): 116 out = [] 117 for i in range(0, len(self.data), 2): 118 s, e = self.data[i:i+2] 119 if e == s+1: 120 out.append(str(s)) 121 else: 122 out.append(str(s) + "-" + str(e-1)) 123 return " ".join(out) 124 125 def to_string_raw(self): 126 assert self.data 127 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data) 128 129 def union(self, other): 130 """Return a new RangeSet representing the union of this RangeSet 131 with the argument. 132 133 >>> RangeSet("10-19 30-34").union(RangeSet("18-29")) 134 <RangeSet("10-34")> 135 >>> RangeSet("10-19 30-34").union(RangeSet("22 32")) 136 <RangeSet("10-19 22 30-34")> 137 """ 138 out = [] 139 z = 0 140 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 141 zip(other.data, itertools.cycle((+1, -1)))): 142 if (z == 0 and d == 1) or (z == 1 and d == -1): 143 out.append(p) 144 z += d 145 return RangeSet(data=out) 146 147 def intersect(self, other): 148 """Return a new RangeSet representing the intersection of this 149 RangeSet with the argument. 150 151 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32")) 152 <RangeSet("18-19 30-32")> 153 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28")) 154 <RangeSet("")> 155 """ 156 out = [] 157 z = 0 158 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 159 zip(other.data, itertools.cycle((+1, -1)))): 160 if (z == 1 and d == 1) or (z == 2 and d == -1): 161 out.append(p) 162 z += d 163 return RangeSet(data=out) 164 165 def subtract(self, other): 166 """Return a new RangeSet representing subtracting the argument 167 from this RangeSet. 168 169 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32")) 170 <RangeSet("10-17 33-34")> 171 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28")) 172 <RangeSet("10-19 30-34")> 173 """ 174 175 out = [] 176 z = 0 177 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 178 zip(other.data, itertools.cycle((-1, +1)))): 179 if (z == 0 and d == 1) or (z == 1 and d == -1): 180 out.append(p) 181 z += d 182 return RangeSet(data=out) 183 184 def overlaps(self, other): 185 """Returns true if the argument has a nonempty overlap with this 186 RangeSet. 187 188 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32")) 189 True 190 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28")) 191 False 192 """ 193 194 # This is like intersect, but we can stop as soon as we discover the 195 # output is going to be nonempty. 196 z = 0 197 for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 198 zip(other.data, itertools.cycle((+1, -1)))): 199 if (z == 1 and d == 1) or (z == 2 and d == -1): 200 return True 201 z += d 202 return False 203 204 def size(self): 205 """Returns the total size of the RangeSet (ie, how many integers 206 are in the set). 207 208 >>> RangeSet("10-19 30-34").size() 209 15 210 """ 211 212 total = 0 213 for i, p in enumerate(self.data): 214 if i % 2: 215 total += p 216 else: 217 total -= p 218 return total 219 220 def map_within(self, other): 221 """'other' should be a subset of 'self'. Returns a RangeSet 222 representing what 'other' would get translated to if the integers 223 of 'self' were translated down to be contiguous starting at zero. 224 225 >>> RangeSet("0-9").map_within(RangeSet("3-4")) 226 <RangeSet("3-4")> 227 >>> RangeSet("10-19").map_within(RangeSet("13-14")) 228 <RangeSet("3-4")> 229 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32")) 230 <RangeSet("7-12")> 231 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32")) 232 <RangeSet("2-3 7-12")> 233 """ 234 235 out = [] 236 offset = 0 237 start = None 238 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))), 239 zip(other.data, itertools.cycle((-1, +1)))): 240 if d == -5: 241 start = p 242 elif d == +5: 243 offset += p-start 244 start = None 245 else: 246 out.append(offset + p - start) 247 return RangeSet(data=out) 248 249 def extend(self, n): 250 """Extend the RangeSet by 'n' blocks. 251 252 The lower bound is guaranteed to be non-negative. 253 254 >>> RangeSet("0-9").extend(1) 255 <RangeSet("0-10")> 256 >>> RangeSet("10-19").extend(15) 257 <RangeSet("0-34")> 258 >>> RangeSet("10-19 30-39").extend(4) 259 <RangeSet("6-23 26-43")> 260 >>> RangeSet("10-19 30-39").extend(10) 261 <RangeSet("0-49")> 262 """ 263 out = self 264 for i in range(0, len(self.data), 2): 265 s, e = self.data[i:i+2] 266 s1 = max(0, s - n) 267 e1 = e + n 268 out = out.union(RangeSet(str(s1) + "-" + str(e1-1))) 269 return out 270 271 def first(self, n): 272 """Return the RangeSet that contains at most the first 'n' integers. 273 274 >>> RangeSet("0-9").first(1) 275 <RangeSet("0")> 276 >>> RangeSet("10-19").first(5) 277 <RangeSet("10-14")> 278 >>> RangeSet("10-19").first(15) 279 <RangeSet("10-19")> 280 >>> RangeSet("10-19 30-39").first(3) 281 <RangeSet("10-12")> 282 >>> RangeSet("10-19 30-39").first(15) 283 <RangeSet("10-19 30-34")> 284 >>> RangeSet("10-19 30-39").first(30) 285 <RangeSet("10-19 30-39")> 286 >>> RangeSet("0-9").first(0) 287 <RangeSet("")> 288 """ 289 290 if self.size() <= n: 291 return self 292 293 out = [] 294 for s, e in self: 295 if e - s >= n: 296 out += (s, s+n) 297 break 298 else: 299 out += (s, e) 300 n -= e - s 301 return RangeSet(data=out) 302 303 304if __name__ == "__main__": 305 import doctest 306 doctest.testmod() 307