1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <stddef.h>
20 
21 #include <string>
22 #include <utility>
23 #include <vector>
24 
25 using Range = std::pair<size_t, size_t>;
26 
27 class RangeSet {
28  public:
RangeSet()29   RangeSet() : blocks_(0) {}
30 
31   explicit RangeSet(std::vector<Range>&& pairs);
32 
33   // Parses the given string into a RangeSet. Returns the parsed RangeSet, or an empty RangeSet on
34   // errors.
35   static RangeSet Parse(const std::string& range_text);
36 
37   // Appends the given Range to the current RangeSet.
38   bool PushBack(Range range);
39 
40   // Clears all the ranges from the RangeSet.
41   void Clear();
42 
43   std::string ToString() const;
44 
45   // Gets the block number for the i-th (starting from 0) block in the RangeSet.
46   size_t GetBlockNumber(size_t idx) const;
47 
48   // Returns whether the current RangeSet overlaps with other. RangeSet has half-closed half-open
49   // bounds. For example, "3,5" contains blocks 3 and 4. So "3,5" and "5,7" are not overlapped.
50   bool Overlaps(const RangeSet& other) const;
51 
52   // Returns a vector of RangeSets that contain the same set of blocks represented by the current
53   // RangeSet. The RangeSets in the vector contain similar number of blocks, with a maximum delta
54   // of 1-block between any two of them. For example, 14 blocks would be split into 4 + 4 + 3 + 3,
55   // as opposed to 4 + 4 + 4 + 2. If the total number of blocks (T) is less than groups, it
56   // returns a vector of T 1-block RangeSets. Otherwise the number of the returned RangeSets must
57   // equal to groups. The current RangeSet remains intact after the split.
58   std::vector<RangeSet> Split(size_t groups) const;
59 
60   // Returns the number of Range's in this RangeSet.
size()61   size_t size() const {
62     return ranges_.size();
63   }
64 
65   // Returns the total number of blocks in this RangeSet.
blocks()66   size_t blocks() const {
67     return blocks_;
68   }
69 
cbegin()70   std::vector<Range>::const_iterator cbegin() const {
71     return ranges_.cbegin();
72   }
73 
cend()74   std::vector<Range>::const_iterator cend() const {
75     return ranges_.cend();
76   }
77 
begin()78   std::vector<Range>::iterator begin() {
79     return ranges_.begin();
80   }
81 
end()82   std::vector<Range>::iterator end() {
83     return ranges_.end();
84   }
85 
begin()86   std::vector<Range>::const_iterator begin() const {
87     return ranges_.begin();
88   }
89 
end()90   std::vector<Range>::const_iterator end() const {
91     return ranges_.end();
92   }
93 
94   // Reverse const iterators for MoveRange().
crbegin()95   std::vector<Range>::const_reverse_iterator crbegin() const {
96     return ranges_.crbegin();
97   }
98 
crend()99   std::vector<Range>::const_reverse_iterator crend() const {
100     return ranges_.crend();
101   }
102 
103   // Returns whether the RangeSet is valid (i.e. non-empty).
104   explicit operator bool() const {
105     return !ranges_.empty();
106   }
107 
108   const Range& operator[](size_t i) const {
109     return ranges_[i];
110   }
111 
112   bool operator==(const RangeSet& other) const {
113     // The orders of Range's matter. "4,1,5,8,10" != "4,8,10,1,5".
114     return (ranges_ == other.ranges_);
115   }
116 
117   bool operator!=(const RangeSet& other) const {
118     return ranges_ != other.ranges_;
119   }
120 
121  protected:
122   // Actual limit for each value and the total number are both INT_MAX.
123   std::vector<Range> ranges_;
124   size_t blocks_;
125 };
126 
127 // The class is a sorted version of a RangeSet; and it's useful in imgdiff to split the input
128 // files when we're handling large zip files. Specifically, we can treat the input file as a
129 // continuous RangeSet (i.e. RangeSet("0-99") for a 100 blocks file); and break it down into
130 // several smaller chunks based on the zip entries.
131 
132 // For example, [source: 0-99] can be split into
133 // [split_src1: 10-29]; [split_src2: 40-49, 60-69]; [split_src3: 70-89]
134 // Here "10-29" simply means block 10th to block 29th with respect to the original input file.
135 // Also, note that the split sources should be mutual exclusive, but they don't need to cover
136 // every block in the original source.
137 class SortedRangeSet : public RangeSet {
138  public:
139   // The block size when working with offset and file length.
140   static constexpr size_t kBlockSize = 4096;
141 
SortedRangeSet()142   SortedRangeSet() {}
143 
144   // Ranges in the the set should be mutually exclusive; and they're sorted by the start block.
145   explicit SortedRangeSet(std::vector<Range>&& pairs);
146 
147   void Insert(const Range& to_insert);
148 
149   // Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges.
150   void Insert(const SortedRangeSet& rs);
151 
152   // Compute the block range the file occupies, and insert that range.
153   void Insert(size_t start, size_t len);
154 
155   using RangeSet::Overlaps;
156 
157   bool Overlaps(size_t start, size_t len) const;
158 
159   // Given an offset of the file, checks if the corresponding block (by considering the file as
160   // 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset
161   // within this SortedRangeSet.
162   //
163   // For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte.
164   // The mapped offset within a SortedRangeSet("1-9 15-19") is 10.
165   //
166   // An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th
167   // item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10
168   // + 10) in a range represented by this SortedRangeSet.
169   size_t GetOffsetInRangeSet(size_t old_offset) const;
170 };
171