1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <stddef.h>
20 
21 #include <optional>
22 #include <string>
23 #include <utility>
24 #include <vector>
25 
26 using Range = std::pair<size_t, size_t>;
27 
28 class RangeSet {
29  public:
RangeSet()30   RangeSet() : blocks_(0) {}
31 
32   explicit RangeSet(std::vector<Range>&& pairs);
33 
34   // Parses the given string into a RangeSet. Returns the parsed RangeSet, or an empty RangeSet on
35   // errors.
36   static RangeSet Parse(const std::string& range_text);
37 
38   // Appends the given Range to the current RangeSet.
39   bool PushBack(Range range);
40 
41   // Clears all the ranges from the RangeSet.
42   void Clear();
43 
44   std::string ToString() const;
45 
46   // Gets the block number for the i-th (starting from 0) block in the RangeSet.
47   size_t GetBlockNumber(size_t idx) const;
48 
49   // Returns whether the current RangeSet overlaps with other. RangeSet has half-closed half-open
50   // bounds. For example, "3,5" contains blocks 3 and 4. So "3,5" and "5,7" are not overlapped.
51   bool Overlaps(const RangeSet& other) const;
52 
53   // Returns a subset of ranges starting from |start_index| with respect to the original range. The
54   // output range will have |num_of_blocks| blocks in size. Returns std::nullopt if the input is
55   // invalid. e.g. RangeSet({{0, 5}, {10, 15}}).GetSubRanges(1, 5) returns
56   // RangeSet({{1, 5}, {10, 11}}).
57   std::optional<RangeSet> GetSubRanges(size_t start_index, size_t num_of_blocks) const;
58 
59   // Returns a vector of RangeSets that contain the same set of blocks represented by the current
60   // RangeSet. The RangeSets in the vector contain similar number of blocks, with a maximum delta
61   // of 1-block between any two of them. For example, 14 blocks would be split into 4 + 4 + 3 + 3,
62   // as opposed to 4 + 4 + 4 + 2. If the total number of blocks (T) is less than groups, it
63   // returns a vector of T 1-block RangeSets. Otherwise the number of the returned RangeSets must
64   // equal to groups. The current RangeSet remains intact after the split.
65   std::vector<RangeSet> Split(size_t groups) const;
66 
67   // Returns the number of Range's in this RangeSet.
size()68   size_t size() const {
69     return ranges_.size();
70   }
71 
72   // Returns the total number of blocks in this RangeSet.
blocks()73   size_t blocks() const {
74     return blocks_;
75   }
76 
cbegin()77   std::vector<Range>::const_iterator cbegin() const {
78     return ranges_.cbegin();
79   }
80 
cend()81   std::vector<Range>::const_iterator cend() const {
82     return ranges_.cend();
83   }
84 
begin()85   std::vector<Range>::iterator begin() {
86     return ranges_.begin();
87   }
88 
end()89   std::vector<Range>::iterator end() {
90     return ranges_.end();
91   }
92 
begin()93   std::vector<Range>::const_iterator begin() const {
94     return ranges_.begin();
95   }
96 
end()97   std::vector<Range>::const_iterator end() const {
98     return ranges_.end();
99   }
100 
101   // Reverse const iterators for MoveRange().
crbegin()102   std::vector<Range>::const_reverse_iterator crbegin() const {
103     return ranges_.crbegin();
104   }
105 
crend()106   std::vector<Range>::const_reverse_iterator crend() const {
107     return ranges_.crend();
108   }
109 
110   // Returns whether the RangeSet is valid (i.e. non-empty).
111   explicit operator bool() const {
112     return !ranges_.empty();
113   }
114 
115   const Range& operator[](size_t i) const {
116     return ranges_[i];
117   }
118 
119   bool operator==(const RangeSet& other) const {
120     // The orders of Range's matter. "4,1,5,8,10" != "4,8,10,1,5".
121     return (ranges_ == other.ranges_);
122   }
123 
124   bool operator!=(const RangeSet& other) const {
125     return ranges_ != other.ranges_;
126   }
127 
128  protected:
129   // Actual limit for each value and the total number are both INT_MAX.
130   std::vector<Range> ranges_;
131   size_t blocks_;
132 };
133 
134 // The class is a sorted version of a RangeSet; and it's useful in imgdiff to split the input
135 // files when we're handling large zip files. Specifically, we can treat the input file as a
136 // continuous RangeSet (i.e. RangeSet("0-99") for a 100 blocks file); and break it down into
137 // several smaller chunks based on the zip entries.
138 
139 // For example, [source: 0-99] can be split into
140 // [split_src1: 10-29]; [split_src2: 40-49, 60-69]; [split_src3: 70-89]
141 // Here "10-29" simply means block 10th to block 29th with respect to the original input file.
142 // Also, note that the split sources should be mutual exclusive, but they don't need to cover
143 // every block in the original source.
144 class SortedRangeSet : public RangeSet {
145  public:
146   // The block size when working with offset and file length.
147   static constexpr size_t kBlockSize = 4096;
148 
SortedRangeSet()149   SortedRangeSet() {}
150 
151   // Ranges in the the set should be mutually exclusive; and they're sorted by the start block.
152   explicit SortedRangeSet(std::vector<Range>&& pairs);
153 
154   void Insert(const Range& to_insert);
155 
156   // Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges.
157   void Insert(const SortedRangeSet& rs);
158 
159   // Compute the block range the file occupies, and insert that range.
160   void Insert(size_t start, size_t len);
161 
162   using RangeSet::Overlaps;
163 
164   bool Overlaps(size_t start, size_t len) const;
165 
166   // Given an offset of the file, checks if the corresponding block (by considering the file as
167   // 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset
168   // within this SortedRangeSet.
169   //
170   // For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte.
171   // The mapped offset within a SortedRangeSet("1-9 15-19") is 10.
172   //
173   // An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th
174   // item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10
175   // + 10) in a range represented by this SortedRangeSet.
176   size_t GetOffsetInRangeSet(size_t old_offset) const;
177 };
178