1# Copyright (C) 2014 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15from __future__ import print_function 16import heapq 17import itertools 18 19__all__ = ["RangeSet"] 20 21class RangeSet(object): 22 """A RangeSet represents a set of nonoverlapping ranges on the 23 integers (ie, a set of integers, but efficient when the set contains 24 lots of runs.""" 25 26 def __init__(self, data=None): 27 self.monotonic = False 28 if isinstance(data, str): 29 self._parse_internal(data) 30 elif data: 31 self.data = tuple(self._remove_pairs(data)) 32 else: 33 self.data = () 34 35 def __iter__(self): 36 for i in range(0, len(self.data), 2): 37 yield self.data[i:i+2] 38 39 def __eq__(self, other): 40 return self.data == other.data 41 def __ne__(self, other): 42 return self.data != other.data 43 def __nonzero__(self): 44 return bool(self.data) 45 46 def __str__(self): 47 if not self.data: 48 return "empty" 49 else: 50 return self.to_string() 51 52 def __repr__(self): 53 return '<RangeSet("' + self.to_string() + '")>' 54 55 @classmethod 56 def parse(cls, text): 57 """Parse a text string consisting of a space-separated list of 58 blocks and ranges, eg "10-20 30 35-40". Ranges are interpreted to 59 include both their ends (so the above example represents 18 60 individual blocks. Returns a RangeSet object. 61 62 If the input has all its blocks in increasing order, then returned 63 RangeSet will have an extra attribute 'monotonic' that is set to 64 True. For example the input "10-20 30" is monotonic, but the input 65 "15-20 30 10-14" is not, even though they represent the same set 66 of blocks (and the two RangeSets will compare equal with ==). 67 """ 68 return cls(text) 69 70 def _parse_internal(self, text): 71 data = [] 72 last = -1 73 monotonic = True 74 for p in text.split(): 75 if "-" in p: 76 s, e = p.split("-") 77 data.append(int(s)) 78 data.append(int(e)+1) 79 if last <= s <= e: 80 last = e 81 else: 82 monotonic = False 83 else: 84 s = int(p) 85 data.append(s) 86 data.append(s+1) 87 if last <= s: 88 last = s+1 89 else: 90 monotonic = True 91 data.sort() 92 self.data = tuple(self._remove_pairs(data)) 93 self.monotonic = monotonic 94 95 @staticmethod 96 def _remove_pairs(source): 97 last = None 98 for i in source: 99 if i == last: 100 last = None 101 else: 102 if last is not None: 103 yield last 104 last = i 105 if last is not None: 106 yield last 107 108 def to_string(self): 109 out = [] 110 for i in range(0, len(self.data), 2): 111 s, e = self.data[i:i+2] 112 if e == s+1: 113 out.append(str(s)) 114 else: 115 out.append(str(s) + "-" + str(e-1)) 116 return " ".join(out) 117 118 def to_string_raw(self): 119 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data) 120 121 def union(self, other): 122 """Return a new RangeSet representing the union of this RangeSet 123 with the argument. 124 125 >>> RangeSet("10-19 30-34").union(RangeSet("18-29")) 126 <RangeSet("10-34")> 127 >>> RangeSet("10-19 30-34").union(RangeSet("22 32")) 128 <RangeSet("10-19 22 30-34")> 129 """ 130 out = [] 131 z = 0 132 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 133 zip(other.data, itertools.cycle((+1, -1)))): 134 if (z == 0 and d == 1) or (z == 1 and d == -1): 135 out.append(p) 136 z += d 137 return RangeSet(data=out) 138 139 def intersect(self, other): 140 """Return a new RangeSet representing the intersection of this 141 RangeSet with the argument. 142 143 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32")) 144 <RangeSet("18-19 30-32")> 145 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28")) 146 <RangeSet("")> 147 """ 148 out = [] 149 z = 0 150 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 151 zip(other.data, itertools.cycle((+1, -1)))): 152 if (z == 1 and d == 1) or (z == 2 and d == -1): 153 out.append(p) 154 z += d 155 return RangeSet(data=out) 156 157 def subtract(self, other): 158 """Return a new RangeSet representing subtracting the argument 159 from this RangeSet. 160 161 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32")) 162 <RangeSet("10-17 33-34")> 163 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28")) 164 <RangeSet("10-19 30-34")> 165 """ 166 167 out = [] 168 z = 0 169 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 170 zip(other.data, itertools.cycle((-1, +1)))): 171 if (z == 0 and d == 1) or (z == 1 and d == -1): 172 out.append(p) 173 z += d 174 return RangeSet(data=out) 175 176 def overlaps(self, other): 177 """Returns true if the argument has a nonempty overlap with this 178 RangeSet. 179 180 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32")) 181 True 182 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28")) 183 False 184 """ 185 186 # This is like intersect, but we can stop as soon as we discover the 187 # output is going to be nonempty. 188 z = 0 189 for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 190 zip(other.data, itertools.cycle((+1, -1)))): 191 if (z == 1 and d == 1) or (z == 2 and d == -1): 192 return True 193 z += d 194 return False 195 196 def size(self): 197 """Returns the total size of the RangeSet (ie, how many integers 198 are in the set). 199 200 >>> RangeSet("10-19 30-34").size() 201 15 202 """ 203 204 total = 0 205 for i, p in enumerate(self.data): 206 if i % 2: 207 total += p 208 else: 209 total -= p 210 return total 211 212 def map_within(self, other): 213 """'other' should be a subset of 'self'. Returns a RangeSet 214 representing what 'other' would get translated to if the integers 215 of 'self' were translated down to be contiguous starting at zero. 216 217 >>> RangeSet("0-9").map_within(RangeSet("3-4")) 218 <RangeSet("3-4")> 219 >>> RangeSet("10-19").map_within(RangeSet("13-14")) 220 <RangeSet("3-4")> 221 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32")) 222 <RangeSet("7-12")> 223 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32")) 224 <RangeSet("2-3 7-12")> 225 """ 226 227 out = [] 228 offset = 0 229 start = None 230 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))), 231 zip(other.data, itertools.cycle((-1, +1)))): 232 if d == -5: 233 start = p 234 elif d == +5: 235 offset += p-start 236 start = None 237 else: 238 out.append(offset + p - start) 239 return RangeSet(data=out) 240 241 def extend(self, n): 242 """Extend the RangeSet by 'n' blocks. 243 244 The lower bound is guaranteed to be non-negative. 245 246 >>> RangeSet("0-9").extend(1) 247 <RangeSet("0-10")> 248 >>> RangeSet("10-19").extend(15) 249 <RangeSet("0-34")> 250 >>> RangeSet("10-19 30-39").extend(4) 251 <RangeSet("6-23 26-43")> 252 >>> RangeSet("10-19 30-39").extend(10) 253 <RangeSet("0-49")> 254 """ 255 out = self 256 for i in range(0, len(self.data), 2): 257 s, e = self.data[i:i+2] 258 s1 = max(0, s - n) 259 e1 = e + n 260 out = out.union(RangeSet(str(s1) + "-" + str(e1-1))) 261 return out 262 263 264if __name__ == "__main__": 265 import doctest 266 doctest.testmod() 267