1 // Copyright (C) 2020 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #pragma once
16 
17 #include <linux/types.h>
18 #include <stdint.h>
19 #include <stdlib.h>
20 #include <sys/mman.h>
21 
22 #include <bitset>
23 #include <condition_variable>
24 #include <csignal>
25 #include <cstring>
26 #include <future>
27 #include <iostream>
28 #include <limits>
29 #include <map>
30 #include <mutex>
31 #include <string>
32 #include <thread>
33 #include <unordered_map>
34 #include <unordered_set>
35 #include <vector>
36 
37 #include <android-base/file.h>
38 #include <android-base/logging.h>
39 #include <android-base/stringprintf.h>
40 #include <android-base/unique_fd.h>
41 #include <libdm/dm.h>
42 #include <libsnapshot/cow_reader.h>
43 #include <libsnapshot/cow_writer.h>
44 #include <libsnapshot/snapuserd_kernel.h>
45 
46 namespace android {
47 namespace snapshot {
48 
49 using android::base::unique_fd;
50 using namespace std::chrono_literals;
51 
52 static constexpr size_t PAYLOAD_SIZE = (1UL << 20);
53 static_assert(PAYLOAD_SIZE >= BLOCK_SZ);
54 
55 /*
56  * With 4 threads, we get optimal performance
57  * when update_verifier reads the partition during
58  * boot.
59  */
60 static constexpr int NUM_THREADS_PER_PARTITION = 4;
61 
62 /*
63  * State transitions between worker threads and read-ahead
64  * threads.
65  *
66  * READ_AHEAD_BEGIN: Worker threads initiates the read-ahead
67  *                   thread to begin reading the copy operations
68  *                   for each bounded region.
69  *
70  * READ_AHEAD_IN_PROGRESS: When read ahead thread is in-flight
71  *                         and reading the copy operations.
72  *
73  * IO_IN_PROGRESS: Merge operation is in-progress by worker threads.
74  *
75  * IO_TERMINATED: When all the worker threads are done, request the
76  *                read-ahead thread to terminate
77  *
78  * READ_AHEAD_FAILURE: If there are any IO failures when read-ahead
79  *                     thread is reading from COW device.
80  *
81  * The transition of each states is described in snapuserd_readahead.cpp
82  */
83 enum class READ_AHEAD_IO_TRANSITION {
84     READ_AHEAD_BEGIN,
85     READ_AHEAD_IN_PROGRESS,
86     IO_IN_PROGRESS,
87     IO_TERMINATED,
88     READ_AHEAD_FAILURE,
89 };
90 
91 class BufferSink : public IByteSink {
92   public:
93     void Initialize(size_t size);
GetBufPtr()94     void* GetBufPtr() { return buffer_.get(); }
Clear()95     void Clear() { memset(GetBufPtr(), 0, buffer_size_); }
96     void* GetPayloadBuffer(size_t size);
97     void* GetBuffer(size_t requested, size_t* actual) override;
UpdateBufferOffset(size_t size)98     void UpdateBufferOffset(size_t size) { buffer_offset_ += size; }
99     struct dm_user_header* GetHeaderPtr();
ReturnData(void *,size_t)100     bool ReturnData(void*, size_t) override { return true; }
ResetBufferOffset()101     void ResetBufferOffset() { buffer_offset_ = 0; }
102     void* GetPayloadBufPtr();
103 
104   private:
105     std::unique_ptr<uint8_t[]> buffer_;
106     loff_t buffer_offset_;
107     size_t buffer_size_;
108 };
109 
110 class Snapuserd;
111 
112 class ReadAheadThread {
113   public:
114     ReadAheadThread(const std::string& cow_device, const std::string& backing_device,
115                     const std::string& misc_name, std::shared_ptr<Snapuserd> snapuserd);
116     bool RunThread();
117 
118   private:
119     void InitializeIter();
120     bool IterDone();
121     void IterNext();
122     const CowOperation* GetIterOp();
123     void InitializeBuffer();
124 
125     bool InitializeFds();
CloseFds()126     void CloseFds() {
127         cow_fd_ = {};
128         backing_store_fd_ = {};
129     }
130 
131     bool ReadAheadIOStart();
132     void PrepareReadAhead(uint64_t* source_block, int* pending_ops, std::vector<uint64_t>& blocks);
133     bool ReconstructDataFromCow();
134     void CheckOverlap(const CowOperation* cow_op);
135 
136     void* read_ahead_buffer_;
137     void* metadata_buffer_;
138     std::vector<const CowOperation*>::reverse_iterator read_ahead_iter_;
139     std::string cow_device_;
140     std::string backing_store_device_;
141     std::string misc_name_;
142 
143     unique_fd cow_fd_;
144     unique_fd backing_store_fd_;
145 
146     std::shared_ptr<Snapuserd> snapuserd_;
147 
148     std::unordered_set<uint64_t> dest_blocks_;
149     std::unordered_set<uint64_t> source_blocks_;
150     bool overlap_;
151 };
152 
153 class WorkerThread {
154   public:
155     WorkerThread(const std::string& cow_device, const std::string& backing_device,
156                  const std::string& control_device, const std::string& misc_name,
157                  std::shared_ptr<Snapuserd> snapuserd);
158     bool RunThread();
159 
160   private:
161     // Initialization
162     void InitializeBufsink();
163     bool InitializeFds();
164     bool InitReader();
CloseFds()165     void CloseFds() {
166         ctrl_fd_ = {};
167         backing_store_fd_ = {};
168     }
169 
170     // Functions interacting with dm-user
171     bool ReadDmUserHeader();
172     bool DmuserReadRequest();
173     bool DmuserWriteRequest();
174     bool ReadDmUserPayload(void* buffer, size_t size);
175     bool WriteDmUserPayload(size_t size, bool header_response);
176 
177     bool ReadDiskExceptions(chunk_t chunk, size_t size);
178     bool ZerofillDiskExceptions(size_t read_size);
179     void ConstructKernelCowHeader();
180 
181     // IO Path
182     bool ProcessIORequest();
183     int ReadData(sector_t sector, size_t size);
184     int ReadUnalignedSector(sector_t sector, size_t size,
185                             std::vector<std::pair<sector_t, const CowOperation*>>::iterator& it);
186 
187     // Processing COW operations
188     bool ProcessCowOp(const CowOperation* cow_op);
189     bool ProcessReplaceOp(const CowOperation* cow_op);
190     bool ProcessCopyOp(const CowOperation* cow_op);
191     bool ProcessZeroOp();
192 
193     bool ReadFromBaseDevice(const CowOperation* cow_op);
194     bool GetReadAheadPopulatedBuffer(const CowOperation* cow_op);
195 
196     // Merge related functions
197     bool ProcessMergeComplete(chunk_t chunk, void* buffer);
198     loff_t GetMergeStartOffset(void* merged_buffer, void* unmerged_buffer,
199                                int* unmerged_exceptions);
200 
201     int GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset,
202                              int unmerged_exceptions, bool* copy_op, bool* commit);
203 
ChunkToSector(chunk_t chunk)204     sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
SectorToChunk(sector_t sector)205     chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
206 
207     std::unique_ptr<CowReader> reader_;
208     BufferSink bufsink_;
209 
210     std::string cow_device_;
211     std::string backing_store_device_;
212     std::string control_device_;
213     std::string misc_name_;
214 
215     unique_fd cow_fd_;
216     unique_fd backing_store_fd_;
217     unique_fd ctrl_fd_;
218 
219     std::shared_ptr<Snapuserd> snapuserd_;
220     uint32_t exceptions_per_area_;
221 };
222 
223 class Snapuserd : public std::enable_shared_from_this<Snapuserd> {
224   public:
225     Snapuserd(const std::string& misc_name, const std::string& cow_device,
226               const std::string& backing_device);
227     bool InitCowDevice();
228     bool Start();
GetControlDevicePath()229     const std::string& GetControlDevicePath() { return control_device_; }
GetMiscName()230     const std::string& GetMiscName() { return misc_name_; }
GetNumSectors()231     uint64_t GetNumSectors() { return num_sectors_; }
IsAttached()232     bool IsAttached() const { return attached_; }
AttachControlDevice()233     void AttachControlDevice() { attached_ = true; }
234 
235     void CheckMergeCompletionStatus();
236     bool CommitMerge(int num_merge_ops);
237 
CloseFds()238     void CloseFds() { cow_fd_ = {}; }
FreeResources()239     void FreeResources() {
240         worker_threads_.clear();
241         read_ahead_thread_ = nullptr;
242     }
GetMetadataAreaSize()243     size_t GetMetadataAreaSize() { return vec_.size(); }
GetExceptionBuffer(size_t i)244     void* GetExceptionBuffer(size_t i) { return vec_[i].get(); }
245 
246     bool InitializeWorkers();
GetSharedPtr()247     std::shared_ptr<Snapuserd> GetSharedPtr() { return shared_from_this(); }
248 
GetChunkVec()249     std::vector<std::pair<sector_t, const CowOperation*>>& GetChunkVec() { return chunk_vec_; }
GetMetadataVec()250     const std::vector<std::unique_ptr<uint8_t[]>>& GetMetadataVec() const { return vec_; }
251 
compare(std::pair<sector_t,const CowOperation * > p1,std::pair<sector_t,const CowOperation * > p2)252     static bool compare(std::pair<sector_t, const CowOperation*> p1,
253                         std::pair<sector_t, const CowOperation*> p2) {
254         return p1.first < p2.first;
255     }
256 
257     void UnmapBufferRegion();
258     bool MmapMetadata();
259 
260     // Read-ahead related functions
GetReadAheadOpsVec()261     std::vector<const CowOperation*>& GetReadAheadOpsVec() { return read_ahead_ops_; }
GetReadAheadMap()262     std::unordered_map<uint64_t, void*>& GetReadAheadMap() { return read_ahead_buffer_map_; }
GetMappedAddr()263     void* GetMappedAddr() { return mapped_addr_; }
IsReadAheadFeaturePresent()264     bool IsReadAheadFeaturePresent() { return read_ahead_feature_; }
265     void PrepareReadAhead();
266     void StartReadAhead();
267     void MergeCompleted();
268     bool ReadAheadIOCompleted(bool sync);
269     void ReadAheadIOFailed();
270     bool WaitForMergeToComplete();
271     bool GetReadAheadPopulatedBuffer(uint64_t block, void* buffer);
ReconstructDataFromCow()272     bool ReconstructDataFromCow() { return populate_data_from_cow_; }
ReconstructDataFromCowFinish()273     void ReconstructDataFromCowFinish() { populate_data_from_cow_ = false; }
274     bool WaitForReadAheadToStart();
275 
276     uint64_t GetBufferMetadataOffset();
277     size_t GetBufferMetadataSize();
278     size_t GetBufferDataOffset();
279     size_t GetBufferDataSize();
280 
281     // Final block to be merged in a given read-ahead buffer region
SetFinalBlockMerged(uint64_t x)282     void SetFinalBlockMerged(uint64_t x) { final_block_merged_ = x; }
GetFinalBlockMerged()283     uint64_t GetFinalBlockMerged() { return final_block_merged_; }
284     // Total number of blocks to be merged in a given read-ahead buffer region
SetTotalRaBlocksMerged(int x)285     void SetTotalRaBlocksMerged(int x) { total_ra_blocks_merged_ = x; }
GetTotalRaBlocksMerged()286     int GetTotalRaBlocksMerged() { return total_ra_blocks_merged_; }
287 
288   private:
289     bool IsChunkIdMetadata(chunk_t chunk);
290     chunk_t GetNextAllocatableChunkId(chunk_t chunk_id);
291 
292     bool GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block, void* buffer);
293     bool ReadMetadata();
ChunkToSector(chunk_t chunk)294     sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
SectorToChunk(sector_t sector)295     chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
IsBlockAligned(int read_size)296     bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); }
297     struct BufferState* GetBufferState();
298 
299     std::string cow_device_;
300     std::string backing_store_device_;
301     std::string control_device_;
302     std::string misc_name_;
303 
304     unique_fd cow_fd_;
305 
306     uint32_t exceptions_per_area_;
307     uint64_t num_sectors_;
308 
309     std::unique_ptr<ICowOpIter> cowop_iter_;
310     std::unique_ptr<ICowOpReverseIter> cowop_riter_;
311     std::unique_ptr<CowReader> reader_;
312 
313     // Vector of disk exception which is a
314     // mapping of old-chunk to new-chunk
315     std::vector<std::unique_ptr<uint8_t[]>> vec_;
316 
317     // chunk_vec stores the pseudo mapping of sector
318     // to COW operations.
319     std::vector<std::pair<sector_t, const CowOperation*>> chunk_vec_;
320 
321     std::mutex lock_;
322     std::condition_variable cv;
323 
324     void* mapped_addr_;
325     size_t total_mapped_addr_length_;
326 
327     std::vector<std::unique_ptr<WorkerThread>> worker_threads_;
328     // Read-ahead related
329     std::unordered_map<uint64_t, void*> read_ahead_buffer_map_;
330     std::vector<const CowOperation*> read_ahead_ops_;
331     bool populate_data_from_cow_ = false;
332     bool read_ahead_feature_;
333     uint64_t final_block_merged_;
334     int total_ra_blocks_merged_ = 0;
335     READ_AHEAD_IO_TRANSITION io_state_;
336     std::unique_ptr<ReadAheadThread> read_ahead_thread_;
337 
338     bool merge_initiated_ = false;
339     bool attached_ = false;
340 };
341 
342 }  // namespace snapshot
343 }  // namespace android
344