1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include <stddef.h> 20 21 #include <optional> 22 #include <string> 23 #include <utility> 24 #include <vector> 25 26 using Range = std::pair<size_t, size_t>; 27 28 class RangeSet { 29 public: RangeSet()30 RangeSet() : blocks_(0) {} 31 32 explicit RangeSet(std::vector<Range>&& pairs); 33 34 // Parses the given string into a RangeSet. Returns the parsed RangeSet, or an empty RangeSet on 35 // errors. 36 static RangeSet Parse(const std::string& range_text); 37 38 // Appends the given Range to the current RangeSet. 39 bool PushBack(Range range); 40 41 // Clears all the ranges from the RangeSet. 42 void Clear(); 43 44 std::string ToString() const; 45 46 // Gets the block number for the i-th (starting from 0) block in the RangeSet. 47 size_t GetBlockNumber(size_t idx) const; 48 49 // Returns whether the current RangeSet overlaps with other. RangeSet has half-closed half-open 50 // bounds. For example, "3,5" contains blocks 3 and 4. So "3,5" and "5,7" are not overlapped. 51 bool Overlaps(const RangeSet& other) const; 52 53 // Returns a subset of ranges starting from |start_index| with respect to the original range. The 54 // output range will have |num_of_blocks| blocks in size. Returns std::nullopt if the input is 55 // invalid. e.g. RangeSet({{0, 5}, {10, 15}}).GetSubRanges(1, 5) returns 56 // RangeSet({{1, 5}, {10, 11}}). 57 std::optional<RangeSet> GetSubRanges(size_t start_index, size_t num_of_blocks) const; 58 59 // Returns a vector of RangeSets that contain the same set of blocks represented by the current 60 // RangeSet. The RangeSets in the vector contain similar number of blocks, with a maximum delta 61 // of 1-block between any two of them. For example, 14 blocks would be split into 4 + 4 + 3 + 3, 62 // as opposed to 4 + 4 + 4 + 2. If the total number of blocks (T) is less than groups, it 63 // returns a vector of T 1-block RangeSets. Otherwise the number of the returned RangeSets must 64 // equal to groups. The current RangeSet remains intact after the split. 65 std::vector<RangeSet> Split(size_t groups) const; 66 67 // Returns the number of Range's in this RangeSet. size()68 size_t size() const { 69 return ranges_.size(); 70 } 71 72 // Returns the total number of blocks in this RangeSet. blocks()73 size_t blocks() const { 74 return blocks_; 75 } 76 cbegin()77 std::vector<Range>::const_iterator cbegin() const { 78 return ranges_.cbegin(); 79 } 80 cend()81 std::vector<Range>::const_iterator cend() const { 82 return ranges_.cend(); 83 } 84 begin()85 std::vector<Range>::iterator begin() { 86 return ranges_.begin(); 87 } 88 end()89 std::vector<Range>::iterator end() { 90 return ranges_.end(); 91 } 92 begin()93 std::vector<Range>::const_iterator begin() const { 94 return ranges_.begin(); 95 } 96 end()97 std::vector<Range>::const_iterator end() const { 98 return ranges_.end(); 99 } 100 101 // Reverse const iterators for MoveRange(). crbegin()102 std::vector<Range>::const_reverse_iterator crbegin() const { 103 return ranges_.crbegin(); 104 } 105 crend()106 std::vector<Range>::const_reverse_iterator crend() const { 107 return ranges_.crend(); 108 } 109 110 // Returns whether the RangeSet is valid (i.e. non-empty). 111 explicit operator bool() const { 112 return !ranges_.empty(); 113 } 114 115 const Range& operator[](size_t i) const { 116 return ranges_[i]; 117 } 118 119 bool operator==(const RangeSet& other) const { 120 // The orders of Range's matter. "4,1,5,8,10" != "4,8,10,1,5". 121 return (ranges_ == other.ranges_); 122 } 123 124 bool operator!=(const RangeSet& other) const { 125 return ranges_ != other.ranges_; 126 } 127 128 protected: 129 // Actual limit for each value and the total number are both INT_MAX. 130 std::vector<Range> ranges_; 131 size_t blocks_; 132 }; 133 134 // The class is a sorted version of a RangeSet; and it's useful in imgdiff to split the input 135 // files when we're handling large zip files. Specifically, we can treat the input file as a 136 // continuous RangeSet (i.e. RangeSet("0-99") for a 100 blocks file); and break it down into 137 // several smaller chunks based on the zip entries. 138 139 // For example, [source: 0-99] can be split into 140 // [split_src1: 10-29]; [split_src2: 40-49, 60-69]; [split_src3: 70-89] 141 // Here "10-29" simply means block 10th to block 29th with respect to the original input file. 142 // Also, note that the split sources should be mutual exclusive, but they don't need to cover 143 // every block in the original source. 144 class SortedRangeSet : public RangeSet { 145 public: 146 // The block size when working with offset and file length. 147 static constexpr size_t kBlockSize = 4096; 148 SortedRangeSet()149 SortedRangeSet() {} 150 151 // Ranges in the the set should be mutually exclusive; and they're sorted by the start block. 152 explicit SortedRangeSet(std::vector<Range>&& pairs); 153 154 void Insert(const Range& to_insert); 155 156 // Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges. 157 void Insert(const SortedRangeSet& rs); 158 159 // Compute the block range the file occupies, and insert that range. 160 void Insert(size_t start, size_t len); 161 162 using RangeSet::Overlaps; 163 164 bool Overlaps(size_t start, size_t len) const; 165 166 // Given an offset of the file, checks if the corresponding block (by considering the file as 167 // 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset 168 // within this SortedRangeSet. 169 // 170 // For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte. 171 // The mapped offset within a SortedRangeSet("1-9 15-19") is 10. 172 // 173 // An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th 174 // item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10 175 // + 10) in a range represented by this SortedRangeSet. 176 size_t GetOffsetInRangeSet(size_t old_offset) const; 177 }; 178