1 // Copyright (C) 2020 The Android Open Source Project 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #pragma once 16 17 #include <linux/types.h> 18 #include <stdint.h> 19 #include <stdlib.h> 20 #include <sys/mman.h> 21 22 #include <bitset> 23 #include <condition_variable> 24 #include <csignal> 25 #include <cstring> 26 #include <future> 27 #include <iostream> 28 #include <limits> 29 #include <map> 30 #include <mutex> 31 #include <string> 32 #include <thread> 33 #include <unordered_map> 34 #include <unordered_set> 35 #include <vector> 36 37 #include <android-base/file.h> 38 #include <android-base/logging.h> 39 #include <android-base/stringprintf.h> 40 #include <android-base/unique_fd.h> 41 #include <libdm/dm.h> 42 #include <libsnapshot/cow_reader.h> 43 #include <libsnapshot/cow_writer.h> 44 #include <libsnapshot/snapuserd_kernel.h> 45 46 namespace android { 47 namespace snapshot { 48 49 using android::base::unique_fd; 50 using namespace std::chrono_literals; 51 52 static constexpr size_t PAYLOAD_SIZE = (1UL << 20); 53 static_assert(PAYLOAD_SIZE >= BLOCK_SZ); 54 55 /* 56 * With 4 threads, we get optimal performance 57 * when update_verifier reads the partition during 58 * boot. 59 */ 60 static constexpr int NUM_THREADS_PER_PARTITION = 4; 61 62 /* 63 * State transitions between worker threads and read-ahead 64 * threads. 65 * 66 * READ_AHEAD_BEGIN: Worker threads initiates the read-ahead 67 * thread to begin reading the copy operations 68 * for each bounded region. 69 * 70 * READ_AHEAD_IN_PROGRESS: When read ahead thread is in-flight 71 * and reading the copy operations. 72 * 73 * IO_IN_PROGRESS: Merge operation is in-progress by worker threads. 74 * 75 * IO_TERMINATED: When all the worker threads are done, request the 76 * read-ahead thread to terminate 77 * 78 * READ_AHEAD_FAILURE: If there are any IO failures when read-ahead 79 * thread is reading from COW device. 80 * 81 * The transition of each states is described in snapuserd_readahead.cpp 82 */ 83 enum class READ_AHEAD_IO_TRANSITION { 84 READ_AHEAD_BEGIN, 85 READ_AHEAD_IN_PROGRESS, 86 IO_IN_PROGRESS, 87 IO_TERMINATED, 88 READ_AHEAD_FAILURE, 89 }; 90 91 class BufferSink : public IByteSink { 92 public: 93 void Initialize(size_t size); GetBufPtr()94 void* GetBufPtr() { return buffer_.get(); } Clear()95 void Clear() { memset(GetBufPtr(), 0, buffer_size_); } 96 void* GetPayloadBuffer(size_t size); 97 void* GetBuffer(size_t requested, size_t* actual) override; UpdateBufferOffset(size_t size)98 void UpdateBufferOffset(size_t size) { buffer_offset_ += size; } 99 struct dm_user_header* GetHeaderPtr(); ReturnData(void *,size_t)100 bool ReturnData(void*, size_t) override { return true; } ResetBufferOffset()101 void ResetBufferOffset() { buffer_offset_ = 0; } 102 void* GetPayloadBufPtr(); 103 104 private: 105 std::unique_ptr<uint8_t[]> buffer_; 106 loff_t buffer_offset_; 107 size_t buffer_size_; 108 }; 109 110 class Snapuserd; 111 112 class ReadAheadThread { 113 public: 114 ReadAheadThread(const std::string& cow_device, const std::string& backing_device, 115 const std::string& misc_name, std::shared_ptr<Snapuserd> snapuserd); 116 bool RunThread(); 117 118 private: 119 void InitializeIter(); 120 bool IterDone(); 121 void IterNext(); 122 const CowOperation* GetIterOp(); 123 void InitializeBuffer(); 124 125 bool InitializeFds(); CloseFds()126 void CloseFds() { 127 cow_fd_ = {}; 128 backing_store_fd_ = {}; 129 } 130 131 bool ReadAheadIOStart(); 132 void PrepareReadAhead(uint64_t* source_block, int* pending_ops, std::vector<uint64_t>& blocks); 133 bool ReconstructDataFromCow(); 134 void CheckOverlap(const CowOperation* cow_op); 135 136 void* read_ahead_buffer_; 137 void* metadata_buffer_; 138 std::vector<const CowOperation*>::reverse_iterator read_ahead_iter_; 139 std::string cow_device_; 140 std::string backing_store_device_; 141 std::string misc_name_; 142 143 unique_fd cow_fd_; 144 unique_fd backing_store_fd_; 145 146 std::shared_ptr<Snapuserd> snapuserd_; 147 148 std::unordered_set<uint64_t> dest_blocks_; 149 std::unordered_set<uint64_t> source_blocks_; 150 bool overlap_; 151 }; 152 153 class WorkerThread { 154 public: 155 WorkerThread(const std::string& cow_device, const std::string& backing_device, 156 const std::string& control_device, const std::string& misc_name, 157 std::shared_ptr<Snapuserd> snapuserd); 158 bool RunThread(); 159 160 private: 161 // Initialization 162 void InitializeBufsink(); 163 bool InitializeFds(); 164 bool InitReader(); CloseFds()165 void CloseFds() { 166 ctrl_fd_ = {}; 167 backing_store_fd_ = {}; 168 } 169 170 // Functions interacting with dm-user 171 bool ReadDmUserHeader(); 172 bool DmuserReadRequest(); 173 bool DmuserWriteRequest(); 174 bool ReadDmUserPayload(void* buffer, size_t size); 175 bool WriteDmUserPayload(size_t size, bool header_response); 176 177 bool ReadDiskExceptions(chunk_t chunk, size_t size); 178 bool ZerofillDiskExceptions(size_t read_size); 179 void ConstructKernelCowHeader(); 180 181 // IO Path 182 bool ProcessIORequest(); 183 int ReadData(sector_t sector, size_t size); 184 int ReadUnalignedSector(sector_t sector, size_t size, 185 std::vector<std::pair<sector_t, const CowOperation*>>::iterator& it); 186 187 // Processing COW operations 188 bool ProcessCowOp(const CowOperation* cow_op); 189 bool ProcessReplaceOp(const CowOperation* cow_op); 190 bool ProcessCopyOp(const CowOperation* cow_op); 191 bool ProcessZeroOp(); 192 193 bool ReadFromBaseDevice(const CowOperation* cow_op); 194 bool GetReadAheadPopulatedBuffer(const CowOperation* cow_op); 195 196 // Merge related functions 197 bool ProcessMergeComplete(chunk_t chunk, void* buffer); 198 loff_t GetMergeStartOffset(void* merged_buffer, void* unmerged_buffer, 199 int* unmerged_exceptions); 200 201 int GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset, 202 int unmerged_exceptions, bool* copy_op, bool* commit); 203 ChunkToSector(chunk_t chunk)204 sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; } SectorToChunk(sector_t sector)205 chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; } 206 207 std::unique_ptr<CowReader> reader_; 208 BufferSink bufsink_; 209 210 std::string cow_device_; 211 std::string backing_store_device_; 212 std::string control_device_; 213 std::string misc_name_; 214 215 unique_fd cow_fd_; 216 unique_fd backing_store_fd_; 217 unique_fd ctrl_fd_; 218 219 std::shared_ptr<Snapuserd> snapuserd_; 220 uint32_t exceptions_per_area_; 221 }; 222 223 class Snapuserd : public std::enable_shared_from_this<Snapuserd> { 224 public: 225 Snapuserd(const std::string& misc_name, const std::string& cow_device, 226 const std::string& backing_device); 227 bool InitCowDevice(); 228 bool Start(); GetControlDevicePath()229 const std::string& GetControlDevicePath() { return control_device_; } GetMiscName()230 const std::string& GetMiscName() { return misc_name_; } GetNumSectors()231 uint64_t GetNumSectors() { return num_sectors_; } IsAttached()232 bool IsAttached() const { return attached_; } AttachControlDevice()233 void AttachControlDevice() { attached_ = true; } 234 235 void CheckMergeCompletionStatus(); 236 bool CommitMerge(int num_merge_ops); 237 CloseFds()238 void CloseFds() { cow_fd_ = {}; } FreeResources()239 void FreeResources() { 240 worker_threads_.clear(); 241 read_ahead_thread_ = nullptr; 242 } GetMetadataAreaSize()243 size_t GetMetadataAreaSize() { return vec_.size(); } GetExceptionBuffer(size_t i)244 void* GetExceptionBuffer(size_t i) { return vec_[i].get(); } 245 246 bool InitializeWorkers(); GetSharedPtr()247 std::shared_ptr<Snapuserd> GetSharedPtr() { return shared_from_this(); } 248 GetChunkVec()249 std::vector<std::pair<sector_t, const CowOperation*>>& GetChunkVec() { return chunk_vec_; } GetMetadataVec()250 const std::vector<std::unique_ptr<uint8_t[]>>& GetMetadataVec() const { return vec_; } 251 compare(std::pair<sector_t,const CowOperation * > p1,std::pair<sector_t,const CowOperation * > p2)252 static bool compare(std::pair<sector_t, const CowOperation*> p1, 253 std::pair<sector_t, const CowOperation*> p2) { 254 return p1.first < p2.first; 255 } 256 257 void UnmapBufferRegion(); 258 bool MmapMetadata(); 259 260 // Read-ahead related functions GetReadAheadOpsVec()261 std::vector<const CowOperation*>& GetReadAheadOpsVec() { return read_ahead_ops_; } GetReadAheadMap()262 std::unordered_map<uint64_t, void*>& GetReadAheadMap() { return read_ahead_buffer_map_; } GetMappedAddr()263 void* GetMappedAddr() { return mapped_addr_; } IsReadAheadFeaturePresent()264 bool IsReadAheadFeaturePresent() { return read_ahead_feature_; } 265 void PrepareReadAhead(); 266 void StartReadAhead(); 267 void MergeCompleted(); 268 bool ReadAheadIOCompleted(bool sync); 269 void ReadAheadIOFailed(); 270 bool WaitForMergeToComplete(); 271 bool GetReadAheadPopulatedBuffer(uint64_t block, void* buffer); ReconstructDataFromCow()272 bool ReconstructDataFromCow() { return populate_data_from_cow_; } ReconstructDataFromCowFinish()273 void ReconstructDataFromCowFinish() { populate_data_from_cow_ = false; } 274 bool WaitForReadAheadToStart(); 275 276 uint64_t GetBufferMetadataOffset(); 277 size_t GetBufferMetadataSize(); 278 size_t GetBufferDataOffset(); 279 size_t GetBufferDataSize(); 280 281 // Final block to be merged in a given read-ahead buffer region SetFinalBlockMerged(uint64_t x)282 void SetFinalBlockMerged(uint64_t x) { final_block_merged_ = x; } GetFinalBlockMerged()283 uint64_t GetFinalBlockMerged() { return final_block_merged_; } 284 // Total number of blocks to be merged in a given read-ahead buffer region SetTotalRaBlocksMerged(int x)285 void SetTotalRaBlocksMerged(int x) { total_ra_blocks_merged_ = x; } GetTotalRaBlocksMerged()286 int GetTotalRaBlocksMerged() { return total_ra_blocks_merged_; } 287 288 private: 289 bool IsChunkIdMetadata(chunk_t chunk); 290 chunk_t GetNextAllocatableChunkId(chunk_t chunk_id); 291 292 bool GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block, void* buffer); 293 bool ReadMetadata(); ChunkToSector(chunk_t chunk)294 sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; } SectorToChunk(sector_t sector)295 chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; } IsBlockAligned(int read_size)296 bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); } 297 struct BufferState* GetBufferState(); 298 299 std::string cow_device_; 300 std::string backing_store_device_; 301 std::string control_device_; 302 std::string misc_name_; 303 304 unique_fd cow_fd_; 305 306 uint32_t exceptions_per_area_; 307 uint64_t num_sectors_; 308 309 std::unique_ptr<ICowOpIter> cowop_iter_; 310 std::unique_ptr<ICowOpReverseIter> cowop_riter_; 311 std::unique_ptr<CowReader> reader_; 312 313 // Vector of disk exception which is a 314 // mapping of old-chunk to new-chunk 315 std::vector<std::unique_ptr<uint8_t[]>> vec_; 316 317 // chunk_vec stores the pseudo mapping of sector 318 // to COW operations. 319 std::vector<std::pair<sector_t, const CowOperation*>> chunk_vec_; 320 321 std::mutex lock_; 322 std::condition_variable cv; 323 324 void* mapped_addr_; 325 size_t total_mapped_addr_length_; 326 327 std::vector<std::unique_ptr<WorkerThread>> worker_threads_; 328 // Read-ahead related 329 std::unordered_map<uint64_t, void*> read_ahead_buffer_map_; 330 std::vector<const CowOperation*> read_ahead_ops_; 331 bool populate_data_from_cow_ = false; 332 bool read_ahead_feature_; 333 uint64_t final_block_merged_; 334 int total_ra_blocks_merged_ = 0; 335 READ_AHEAD_IO_TRANSITION io_state_; 336 std::unique_ptr<ReadAheadThread> read_ahead_thread_; 337 338 bool merge_initiated_ = false; 339 bool attached_ = false; 340 }; 341 342 } // namespace snapshot 343 } // namespace android 344