1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #pragma once
16 
17 #include <android-base/logging.h>
18 #include <stdint.h>
19 
20 #include <optional>
21 #include <vector>
22 
23 namespace android {
24 namespace snapshot {
25 
26 class DmSnapCowSizeCalculator {
27   public:
DmSnapCowSizeCalculator(unsigned int sector_bytes,unsigned int chunk_sectors)28     DmSnapCowSizeCalculator(unsigned int sector_bytes, unsigned int chunk_sectors)
29         : sector_bytes_(sector_bytes),
30           chunk_sectors_(chunk_sectors),
31           exceptions_per_chunk(chunk_sectors_ * sector_bytes_ / exception_size_bytes) {}
32 
WriteByte(uint64_t address)33     void WriteByte(uint64_t address) { WriteSector(address / sector_bytes_); }
WriteSector(uint64_t sector)34     void WriteSector(uint64_t sector) { WriteChunk(sector / chunk_sectors_); }
WriteChunk(uint64_t chunk_id)35     void WriteChunk(uint64_t chunk_id) {
36         if (!valid_) {
37             return;
38         }
39 
40         if (modified_chunks_.size() <= chunk_id) {
41             if (modified_chunks_.max_size() <= chunk_id) {
42                 LOG(ERROR) << "Invalid COW size, chunk_id is too large.";
43                 valid_ = false;
44                 return;
45             }
46             modified_chunks_.resize(chunk_id + 1, false);
47             if (modified_chunks_.size() <= chunk_id) {
48                 LOG(ERROR) << "Invalid COW size, chunk_id is too large.";
49                 valid_ = false;
50                 return;
51             }
52         }
53 
54         modified_chunks_[chunk_id] = true;
55     }
56 
cow_size_bytes()57     std::optional<uint64_t> cow_size_bytes() const {
58         auto sectors = cow_size_sectors();
59         if (!sectors) {
60             return std::nullopt;
61         }
62         return sectors.value() * sector_bytes_;
63     }
cow_size_sectors()64     std::optional<uint64_t> cow_size_sectors() const {
65         auto chunks = cow_size_chunks();
66         if (!chunks) {
67             return std::nullopt;
68         }
69         return chunks.value() * chunk_sectors_;
70     }
71 
72     /*
73      * The COW device has a precise internal structure as follows:
74      *
75      * - header (1 chunk)
76      * - #0 map and chunks
77      *   - map (1 chunk)
78      *   - chunks addressable by previous map (exceptions_per_chunk)
79      * - #1 map and chunks
80      *   - map (1 chunk)
81      *   - chunks addressable by previous map (exceptions_per_chunk)
82      * ...
83      * - #n: map and chunks
84      *   - map (1 chunk)
85      *   - chunks addressable by previous map (exceptions_per_chunk)
86      * - 1 extra chunk
87      */
cow_size_chunks()88     std::optional<uint64_t> cow_size_chunks() const {
89         if (!valid_) {
90             LOG(ERROR) << "Invalid COW size.";
91             return std::nullopt;
92         }
93 
94         uint64_t modified_chunks_count = 0;
95         uint64_t cow_chunks = 0;
96 
97         for (const auto& c : modified_chunks_) {
98             if (c) {
99                 ++modified_chunks_count;
100             }
101         }
102 
103         /* disk header + padding = 1 chunk */
104         cow_chunks += 1;
105 
106         /* snapshot modified chunks */
107         cow_chunks += modified_chunks_count;
108 
109         /* snapshot chunks index metadata */
110         cow_chunks += 1 + modified_chunks_count / exceptions_per_chunk;
111 
112         return cow_chunks;
113     }
114 
115   private:
116     /*
117      * Size of each sector in bytes.
118      */
119     const uint64_t sector_bytes_;
120 
121     /*
122      * Size of each chunk in sectors.
123      */
124     const uint64_t chunk_sectors_;
125 
126     /*
127      * The COW device stores tables to map the modified chunks. Each table has
128      * the size of exactly 1 chunk.
129      * Each entry of the table is called exception and the number of exceptions
130      * that each table can contain determines the number of data chunks that
131      * separate two consecutive tables. This value is then fundamental to
132      * compute the space overhead introduced by the tables in COW devices.
133      */
134     const uint64_t exceptions_per_chunk;
135 
136     /*
137      * Each row of the table (called exception in the kernel) contains two
138      * 64 bit indices to identify the corresponding chunk, and this 128 bit
139      * pair is constant in size.
140      */
141     static constexpr unsigned int exception_size_bytes = 64 * 2 / 8;
142 
143     /*
144      * Validity check for the container.
145      * It may happen that the caller attempts the write of an invalid chunk
146      * identifier, and this misbehavior is accounted and stored in this value.
147      */
148     bool valid_ = true;
149 
150     /*
151      * |modified_chunks_| is a container that keeps trace of the modified
152      * chunks.
153      * Multiple options were considered when choosing the most appropriate data
154      * structure for this container. Here follows a summary of why vector<bool>
155      * has been chosen, taking as a reference a snapshot partition of 4 GiB and
156      * chunk size of 4 KiB.
157      * - std::set<uint64_t> is very space-efficient for a small number of
158      *   operations, but if the whole snapshot is changed, it would need to
159      *   store
160      *     4 GiB / 4 KiB * (64 bit / 8) = 8 MiB
161      *   just for the data, plus the additional data overhead for the red-black
162      *   tree used for data sorting (if each rb-tree element stores 3 address
163      *   and the word-aligne color, the total size grows to 32 MiB).
164      * - std::bitset<N> is not a good fit because requires a priori knowledge,
165      *   at compile time, of the bitset size.
166      * - std::vector<bool> is a special case of vector, which uses a data
167      *   compression that allows reducing the space utilization of each element
168      *   to 1 bit. In detail, this data structure is composed of a resizable
169      *   array of words, each of them representing a bitmap. On a 64 bit
170      *   device, modifying the whole 4 GiB snapshot grows this container up to
171      *     4 * GiB / 4 KiB / 64 = 64 KiB
172      *   that, even if is the same space requirement to change a single byte at
173      *   the highest address of the snapshot, is a very affordable space
174      *   requirement.
175      */
176     std::vector<bool> modified_chunks_;
177 };
178 
179 }  // namespace snapshot
180 }  // namespace android
181