1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "snapuserd.h"
18 
19 #include <csignal>
20 #include <optional>
21 #include <set>
22 
23 #include <libsnapshot/snapuserd_client.h>
24 
25 namespace android {
26 namespace snapshot {
27 
28 using namespace android;
29 using namespace android::dm;
30 using android::base::unique_fd;
31 
32 #define SNAP_LOG(level) LOG(level) << misc_name_ << ": "
33 #define SNAP_PLOG(level) PLOG(level) << misc_name_ << ": "
34 
Snapuserd(const std::string & misc_name,const std::string & cow_device,const std::string & backing_device)35 Snapuserd::Snapuserd(const std::string& misc_name, const std::string& cow_device,
36                      const std::string& backing_device) {
37     misc_name_ = misc_name;
38     cow_device_ = cow_device;
39     backing_store_device_ = backing_device;
40     control_device_ = "/dev/dm-user/" + misc_name;
41 }
42 
InitializeWorkers()43 bool Snapuserd::InitializeWorkers() {
44     for (int i = 0; i < NUM_THREADS_PER_PARTITION; i++) {
45         std::unique_ptr<WorkerThread> wt = std::make_unique<WorkerThread>(
46                 cow_device_, backing_store_device_, control_device_, misc_name_, GetSharedPtr());
47 
48         worker_threads_.push_back(std::move(wt));
49     }
50 
51     read_ahead_thread_ = std::make_unique<ReadAheadThread>(cow_device_, backing_store_device_,
52                                                            misc_name_, GetSharedPtr());
53     return true;
54 }
55 
CommitMerge(int num_merge_ops)56 bool Snapuserd::CommitMerge(int num_merge_ops) {
57     struct CowHeader* ch = reinterpret_cast<struct CowHeader*>(mapped_addr_);
58     ch->num_merge_ops += num_merge_ops;
59 
60     if (read_ahead_feature_ && read_ahead_ops_.size() > 0) {
61         struct BufferState* ra_state = GetBufferState();
62         ra_state->read_ahead_state = kCowReadAheadInProgress;
63     }
64 
65     int ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC);
66     if (ret < 0) {
67         PLOG(ERROR) << "msync header failed: " << ret;
68         return false;
69     }
70 
71     merge_initiated_ = true;
72 
73     return true;
74 }
75 
PrepareReadAhead()76 void Snapuserd::PrepareReadAhead() {
77     if (!read_ahead_feature_) {
78         return;
79     }
80 
81     struct BufferState* ra_state = GetBufferState();
82     // Check if the data has to be re-constructed from COW device
83     if (ra_state->read_ahead_state == kCowReadAheadDone) {
84         populate_data_from_cow_ = true;
85     } else {
86         populate_data_from_cow_ = false;
87     }
88 
89     StartReadAhead();
90 }
91 
GetRABuffer(std::unique_lock<std::mutex> * lock,uint64_t block,void * buffer)92 bool Snapuserd::GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block, void* buffer) {
93     if (!lock->owns_lock()) {
94         SNAP_LOG(ERROR) << "GetRABuffer - Lock not held";
95         return false;
96     }
97     std::unordered_map<uint64_t, void*>::iterator it = read_ahead_buffer_map_.find(block);
98 
99     // This will be true only for IO's generated as part of reading a root
100     // filesystem. IO's related to merge should always be in read-ahead cache.
101     if (it == read_ahead_buffer_map_.end()) {
102         return false;
103     }
104 
105     // Theoretically, we can send the data back from the read-ahead buffer
106     // all the way to the kernel without memcpy. However, if the IO is
107     // un-aligned, the wrapper function will need to touch the read-ahead
108     // buffers and transitions will be bit more complicated.
109     memcpy(buffer, it->second, BLOCK_SZ);
110     return true;
111 }
112 
113 // ========== State transition functions for read-ahead operations ===========
114 
GetReadAheadPopulatedBuffer(uint64_t block,void * buffer)115 bool Snapuserd::GetReadAheadPopulatedBuffer(uint64_t block, void* buffer) {
116     if (!read_ahead_feature_) {
117         return false;
118     }
119 
120     {
121         std::unique_lock<std::mutex> lock(lock_);
122         if (io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE) {
123             return false;
124         }
125 
126         if (io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS) {
127             return GetRABuffer(&lock, block, buffer);
128         }
129     }
130 
131     {
132         // Read-ahead thread IO is in-progress. Wait for it to complete
133         std::unique_lock<std::mutex> lock(lock_);
134         while (!(io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE ||
135                  io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS)) {
136             cv.wait(lock);
137         }
138 
139         return GetRABuffer(&lock, block, buffer);
140     }
141 }
142 
143 // This is invoked by read-ahead thread waiting for merge IO's
144 // to complete
WaitForMergeToComplete()145 bool Snapuserd::WaitForMergeToComplete() {
146     {
147         std::unique_lock<std::mutex> lock(lock_);
148         while (!(io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_BEGIN ||
149                  io_state_ == READ_AHEAD_IO_TRANSITION::IO_TERMINATED)) {
150             cv.wait(lock);
151         }
152 
153         if (io_state_ == READ_AHEAD_IO_TRANSITION::IO_TERMINATED) {
154             return false;
155         }
156 
157         io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_IN_PROGRESS;
158         return true;
159     }
160 }
161 
162 // This is invoked during the launch of worker threads. We wait
163 // for read-ahead thread to by fully up before worker threads
164 // are launched; else we will have a race between worker threads
165 // and read-ahead thread specifically during re-construction.
WaitForReadAheadToStart()166 bool Snapuserd::WaitForReadAheadToStart() {
167     {
168         std::unique_lock<std::mutex> lock(lock_);
169         while (!(io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS ||
170                  io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE)) {
171             cv.wait(lock);
172         }
173 
174         if (io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE) {
175             return false;
176         }
177 
178         return true;
179     }
180 }
181 
182 // Invoked by worker threads when a sequence of merge operation
183 // is complete notifying read-ahead thread to make forward
184 // progress.
StartReadAhead()185 void Snapuserd::StartReadAhead() {
186     {
187         std::lock_guard<std::mutex> lock(lock_);
188         io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_BEGIN;
189     }
190 
191     cv.notify_one();
192 }
193 
MergeCompleted()194 void Snapuserd::MergeCompleted() {
195     {
196         std::lock_guard<std::mutex> lock(lock_);
197         io_state_ = READ_AHEAD_IO_TRANSITION::IO_TERMINATED;
198     }
199 
200     cv.notify_one();
201 }
202 
ReadAheadIOCompleted(bool sync)203 bool Snapuserd::ReadAheadIOCompleted(bool sync) {
204     if (sync) {
205         // Flush the entire buffer region
206         int ret = msync(mapped_addr_, total_mapped_addr_length_, MS_SYNC);
207         if (ret < 0) {
208             PLOG(ERROR) << "msync failed after ReadAheadIOCompleted: " << ret;
209             return false;
210         }
211 
212         // Metadata and data are synced. Now, update the state.
213         // We need to update the state after flushing data; if there is a crash
214         // when read-ahead IO is in progress, the state of data in the COW file
215         // is unknown. kCowReadAheadDone acts as a checkpoint wherein the data
216         // in the scratch space is good and during next reboot, read-ahead thread
217         // can safely re-construct the data.
218         struct BufferState* ra_state = GetBufferState();
219         ra_state->read_ahead_state = kCowReadAheadDone;
220 
221         ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC);
222         if (ret < 0) {
223             PLOG(ERROR) << "msync failed to flush Readahead completion state...";
224             return false;
225         }
226     }
227 
228     // Notify the worker threads
229     {
230         std::lock_guard<std::mutex> lock(lock_);
231         io_state_ = READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS;
232     }
233 
234     cv.notify_all();
235     return true;
236 }
237 
ReadAheadIOFailed()238 void Snapuserd::ReadAheadIOFailed() {
239     {
240         std::lock_guard<std::mutex> lock(lock_);
241         io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE;
242     }
243 
244     cv.notify_all();
245 }
246 
247 //========== End of state transition functions ====================
248 
IsChunkIdMetadata(chunk_t chunk)249 bool Snapuserd::IsChunkIdMetadata(chunk_t chunk) {
250     uint32_t stride = exceptions_per_area_ + 1;
251     lldiv_t divresult = lldiv(chunk, stride);
252 
253     return (divresult.rem == NUM_SNAPSHOT_HDR_CHUNKS);
254 }
255 
256 // Find the next free chunk-id to be assigned. Check if the next free
257 // chunk-id represents a metadata page. If so, skip it.
GetNextAllocatableChunkId(chunk_t chunk)258 chunk_t Snapuserd::GetNextAllocatableChunkId(chunk_t chunk) {
259     chunk_t next_chunk = chunk + 1;
260 
261     if (IsChunkIdMetadata(next_chunk)) {
262         next_chunk += 1;
263     }
264     return next_chunk;
265 }
266 
CheckMergeCompletionStatus()267 void Snapuserd::CheckMergeCompletionStatus() {
268     if (!merge_initiated_) {
269         SNAP_LOG(INFO) << "Merge was not initiated. Total-data-ops: " << reader_->total_data_ops();
270         return;
271     }
272 
273     struct CowHeader* ch = reinterpret_cast<struct CowHeader*>(mapped_addr_);
274 
275     SNAP_LOG(INFO) << "Merge-status: Total-Merged-ops: " << ch->num_merge_ops
276                    << " Total-data-ops: " << reader_->total_data_ops();
277 }
278 
279 /*
280  * Read the metadata from COW device and
281  * construct the metadata as required by the kernel.
282  *
283  * Please see design on kernel COW format
284  *
285  * 1: Read the metadata from internal COW device
286  * 2: There are 3 COW operations:
287  *     a: Replace op
288  *     b: Copy op
289  *     c: Zero op
290  * 3: For each of the 3 operations, op->new_block
291  *    represents the block number in the base device
292  *    for which one of the 3 operations have to be applied.
293  *    This represents the old_chunk in the kernel COW format
294  * 4: We need to assign new_chunk for a corresponding old_chunk
295  * 5: The algorithm is similar to how kernel assigns chunk number
296  *    while creating exceptions. However, there are few cases
297  *    which needs to be addressed here:
298  *      a: During merge process, kernel scans the metadata page
299  *      from backwards when merge is initiated. Since, we need
300  *      to make sure that the merge ordering follows our COW format,
301  *      we read the COW operation from backwards and populate the
302  *      metadata so that when kernel starts the merging from backwards,
303  *      those ops correspond to the beginning of our COW format.
304  *      b: Kernel can merge successive operations if the two chunk IDs
305  *      are contiguous. This can be problematic when there is a crash
306  *      during merge; specifically when the merge operation has dependency.
307  *      These dependencies can only happen during copy operations.
308  *
309  *      To avoid this problem, we make sure overlap copy operations
310  *      are not batch merged.
311  * 6: Use a monotonically increasing chunk number to assign the
312  *    new_chunk
313  * 7: Each chunk-id represents either
314  *        a: Metadata page or
315  *        b: Data page
316  * 8: Chunk-id representing a data page is stored in a map.
317  * 9: Chunk-id representing a metadata page is converted into a vector
318  *    index. We store this in vector as kernel requests metadata during
319  *    two stage:
320  *       a: When initial dm-snapshot device is created, kernel requests
321  *          all the metadata and stores it in its internal data-structures.
322  *       b: During merge, kernel once again requests the same metadata
323  *          once-again.
324  *    In both these cases, a quick lookup based on chunk-id is done.
325  * 10: When chunk number is incremented, we need to make sure that
326  *    if the chunk is representing a metadata page and skip.
327  * 11: Each 4k page will contain 256 disk exceptions. We call this
328  *    exceptions_per_area_
329  * 12: Kernel will stop issuing metadata IO request when new-chunk ID is 0.
330  */
ReadMetadata()331 bool Snapuserd::ReadMetadata() {
332     reader_ = std::make_unique<CowReader>();
333     CowHeader header;
334     CowOptions options;
335     bool metadata_found = false;
336     int replace_ops = 0, zero_ops = 0, copy_ops = 0;
337 
338     SNAP_LOG(DEBUG) << "ReadMetadata: Parsing cow file";
339 
340     if (!reader_->Parse(cow_fd_)) {
341         SNAP_LOG(ERROR) << "Failed to parse";
342         return false;
343     }
344 
345     if (!reader_->GetHeader(&header)) {
346         SNAP_LOG(ERROR) << "Failed to get header";
347         return false;
348     }
349 
350     if (!(header.block_size == BLOCK_SZ)) {
351         SNAP_LOG(ERROR) << "Invalid header block size found: " << header.block_size;
352         return false;
353     }
354 
355     reader_->InitializeMerge();
356     SNAP_LOG(DEBUG) << "Merge-ops: " << header.num_merge_ops;
357 
358     if (!MmapMetadata()) {
359         SNAP_LOG(ERROR) << "mmap failed";
360         return false;
361     }
362 
363     // Initialize the iterator for reading metadata
364     cowop_riter_ = reader_->GetRevOpIter();
365 
366     exceptions_per_area_ = (CHUNK_SIZE << SECTOR_SHIFT) / sizeof(struct disk_exception);
367 
368     // Start from chunk number 2. Chunk 0 represents header and chunk 1
369     // represents first metadata page.
370     chunk_t data_chunk_id = NUM_SNAPSHOT_HDR_CHUNKS + 1;
371     size_t num_ops = 0;
372 
373     loff_t offset = 0;
374     std::unique_ptr<uint8_t[]> de_ptr =
375             std::make_unique<uint8_t[]>(exceptions_per_area_ * sizeof(struct disk_exception));
376 
377     // This memset is important. Kernel will stop issuing IO when new-chunk ID
378     // is 0. When Area is not filled completely with all 256 exceptions,
379     // this memset will ensure that metadata read is completed.
380     memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
381 
382     while (!cowop_riter_->Done()) {
383         const CowOperation* cow_op = &cowop_riter_->Get();
384         struct disk_exception* de =
385                 reinterpret_cast<struct disk_exception*>((char*)de_ptr.get() + offset);
386 
387         if (IsMetadataOp(*cow_op)) {
388             cowop_riter_->Next();
389             continue;
390         }
391 
392         metadata_found = true;
393         // This loop will handle all the replace and zero ops.
394         // We will handle the copy ops later as it requires special
395         // handling of assigning chunk-id's. Furthermore, we make
396         // sure that replace/zero and copy ops are not batch merged; hence,
397         // the bump in the chunk_id before break of this loop
398         if (cow_op->type == kCowCopyOp) {
399             data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
400             break;
401         }
402 
403         if (cow_op->type == kCowReplaceOp) {
404             replace_ops++;
405         } else if (cow_op->type == kCowZeroOp) {
406             zero_ops++;
407         }
408 
409         // Construct the disk-exception
410         de->old_chunk = cow_op->new_block;
411         de->new_chunk = data_chunk_id;
412 
413 
414         // Store operation pointer.
415         chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), cow_op));
416         num_ops += 1;
417         offset += sizeof(struct disk_exception);
418         cowop_riter_->Next();
419 
420         SNAP_LOG(DEBUG) << num_ops << ":"
421                         << " Old-chunk: " << de->old_chunk << " New-chunk: " << de->new_chunk;
422 
423         if (num_ops == exceptions_per_area_) {
424             // Store it in vector at the right index. This maps the chunk-id to
425             // vector index.
426             vec_.push_back(std::move(de_ptr));
427             offset = 0;
428             num_ops = 0;
429 
430             // Create buffer for next area
431             de_ptr = std::make_unique<uint8_t[]>(exceptions_per_area_ *
432                                                  sizeof(struct disk_exception));
433             memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
434 
435             if (cowop_riter_->Done()) {
436                 vec_.push_back(std::move(de_ptr));
437             }
438         }
439 
440         data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
441     }
442 
443     int num_ra_ops_per_iter = ((GetBufferDataSize()) / BLOCK_SZ);
444     std::optional<chunk_t> prev_id = {};
445     std::vector<const CowOperation*> vec;
446     std::set<uint64_t> dest_blocks;
447     std::set<uint64_t> source_blocks;
448     size_t pending_copy_ops = exceptions_per_area_ - num_ops;
449     uint64_t total_copy_ops = reader_->total_copy_ops();
450 
451     SNAP_LOG(DEBUG) << " Processing copy-ops at Area: " << vec_.size()
452                     << " Number of replace/zero ops completed in this area: " << num_ops
453                     << " Pending copy ops for this area: " << pending_copy_ops;
454     while (!cowop_riter_->Done()) {
455         do {
456             const CowOperation* cow_op = &cowop_riter_->Get();
457             if (IsMetadataOp(*cow_op)) {
458                 cowop_riter_->Next();
459                 continue;
460             }
461 
462             // We have two cases specific cases:
463             //
464             // =====================================================
465             // Case 1: Overlapping copy regions
466             //
467             // Ex:
468             //
469             // Source -> Destination
470             //
471             // 1: 15 -> 18
472             // 2: 16 -> 19
473             // 3: 17 -> 20
474             // 4: 18 -> 21
475             // 5: 19 -> 22
476             // 6: 20 -> 23
477             //
478             // We have 6 copy operations to be executed in OTA and there is a overlap. Update-engine
479             // will write to COW file as follows:
480             //
481             // Op-1: 20 -> 23
482             // Op-2: 19 -> 22
483             // Op-3: 18 -> 21
484             // Op-4: 17 -> 20
485             // Op-5: 16 -> 19
486             // Op-6: 15 -> 18
487             //
488             // Note that the blocks numbers are contiguous. Hence, all 6 copy
489             // operations can be batch merged. However, that will be
490             // problematic if we have a crash as block 20, 19, 18 would have
491             // been overwritten and hence subsequent recovery may end up with
492             // a silent data corruption when op-1, op-2 and op-3 are
493             // re-executed.
494             //
495             // To address the above problem, read-ahead thread will
496             // read all the 6 source blocks, cache them in the scratch
497             // space of the COW file. During merge, read-ahead
498             // thread will serve the blocks from the read-ahead cache.
499             // If there is a crash during merge; on subsequent reboot,
500             // read-ahead thread will recover the data from the
501             // scratch space and re-construct it thereby there
502             // is no loss of data.
503             //
504             // Note that we will follow the same order of COW operations
505             // as present in the COW file. This will make sure that\
506             // the merge of operations are done based on the ops present
507             // in the file.
508             //===========================================================
509             if (prev_id.has_value()) {
510                 if (dest_blocks.count(cow_op->new_block) || source_blocks.count(cow_op->source)) {
511                     break;
512                 }
513             }
514             metadata_found = true;
515             pending_copy_ops -= 1;
516             vec.push_back(cow_op);
517             dest_blocks.insert(cow_op->source);
518             source_blocks.insert(cow_op->new_block);
519             prev_id = cow_op->new_block;
520             cowop_riter_->Next();
521         } while (!cowop_riter_->Done() && pending_copy_ops);
522 
523         data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
524         SNAP_LOG(DEBUG) << "Batch Merge copy-ops of size: " << vec.size()
525                         << " Area: " << vec_.size() << " Area offset: " << offset
526                         << " Pending-copy-ops in this area: " << pending_copy_ops;
527 
528         for (size_t i = 0; i < vec.size(); i++) {
529             struct disk_exception* de =
530                     reinterpret_cast<struct disk_exception*>((char*)de_ptr.get() + offset);
531             const CowOperation* cow_op = vec[i];
532 
533             de->old_chunk = cow_op->new_block;
534             de->new_chunk = data_chunk_id;
535 
536             // Store operation pointer.
537             chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), cow_op));
538             offset += sizeof(struct disk_exception);
539             num_ops += 1;
540             copy_ops++;
541             if (read_ahead_feature_) {
542                 read_ahead_ops_.push_back(cow_op);
543             }
544 
545             SNAP_LOG(DEBUG) << num_ops << ":"
546                             << " Copy-op: "
547                             << " Old-chunk: " << de->old_chunk << " New-chunk: " << de->new_chunk;
548 
549             if (num_ops == exceptions_per_area_) {
550                 // Store it in vector at the right index. This maps the chunk-id to
551                 // vector index.
552                 vec_.push_back(std::move(de_ptr));
553                 num_ops = 0;
554                 offset = 0;
555 
556                 // Create buffer for next area
557                 de_ptr = std::make_unique<uint8_t[]>(exceptions_per_area_ *
558                                                      sizeof(struct disk_exception));
559                 memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
560 
561                 if (cowop_riter_->Done()) {
562                     vec_.push_back(std::move(de_ptr));
563                     SNAP_LOG(DEBUG) << "ReadMetadata() completed; Number of Areas: " << vec_.size();
564                 }
565 
566                 if (!(pending_copy_ops == 0)) {
567                     SNAP_LOG(ERROR)
568                             << "Invalid pending_copy_ops: expected: 0 found: " << pending_copy_ops;
569                     return false;
570                 }
571                 pending_copy_ops = exceptions_per_area_;
572             }
573 
574             data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
575             total_copy_ops -= 1;
576             /*
577              * Split the number of ops based on the size of read-ahead buffer
578              * region. We need to ensure that kernel doesn't issue IO on blocks
579              * which are not read by the read-ahead thread.
580              */
581             if (read_ahead_feature_ && (total_copy_ops % num_ra_ops_per_iter == 0)) {
582                 data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
583             }
584         }
585         vec.clear();
586         dest_blocks.clear();
587         source_blocks.clear();
588         prev_id.reset();
589     }
590 
591     // Partially filled area or there is no metadata
592     // If there is no metadata, fill with zero so that kernel
593     // is aware that merge is completed.
594     if (num_ops || !metadata_found) {
595         vec_.push_back(std::move(de_ptr));
596         SNAP_LOG(DEBUG) << "ReadMetadata() completed. Partially filled area num_ops: " << num_ops
597                         << "Areas : " << vec_.size();
598     }
599 
600     chunk_vec_.shrink_to_fit();
601     vec_.shrink_to_fit();
602     read_ahead_ops_.shrink_to_fit();
603 
604     // Sort the vector based on sectors as we need this during un-aligned access
605     std::sort(chunk_vec_.begin(), chunk_vec_.end(), compare);
606 
607     SNAP_LOG(INFO) << "ReadMetadata completed. Final-chunk-id: " << data_chunk_id
608                    << " Num Sector: " << ChunkToSector(data_chunk_id)
609                    << " Replace-ops: " << replace_ops << " Zero-ops: " << zero_ops
610                    << " Copy-ops: " << copy_ops << " Areas: " << vec_.size()
611                    << " Num-ops-merged: " << header.num_merge_ops
612                    << " Total-data-ops: " << reader_->total_data_ops();
613 
614     // Total number of sectors required for creating dm-user device
615     num_sectors_ = ChunkToSector(data_chunk_id);
616     merge_initiated_ = false;
617     PrepareReadAhead();
618 
619     return true;
620 }
621 
MmapMetadata()622 bool Snapuserd::MmapMetadata() {
623     CowHeader header;
624     reader_->GetHeader(&header);
625 
626     if (header.major_version >= 2 && header.buffer_size > 0) {
627         total_mapped_addr_length_ = header.header_size + BUFFER_REGION_DEFAULT_SIZE;
628         read_ahead_feature_ = true;
629     } else {
630         // mmap the first 4k page - older COW format
631         total_mapped_addr_length_ = BLOCK_SZ;
632         read_ahead_feature_ = false;
633     }
634 
635     mapped_addr_ = mmap(NULL, total_mapped_addr_length_, PROT_READ | PROT_WRITE, MAP_SHARED,
636                         cow_fd_.get(), 0);
637     if (mapped_addr_ == MAP_FAILED) {
638         SNAP_LOG(ERROR) << "mmap metadata failed";
639         return false;
640     }
641 
642     return true;
643 }
644 
UnmapBufferRegion()645 void Snapuserd::UnmapBufferRegion() {
646     int ret = munmap(mapped_addr_, total_mapped_addr_length_);
647     if (ret < 0) {
648         SNAP_PLOG(ERROR) << "munmap failed";
649     }
650 }
651 
MyLogger(android::base::LogId,android::base::LogSeverity severity,const char *,const char *,unsigned int,const char * message)652 void MyLogger(android::base::LogId, android::base::LogSeverity severity, const char*, const char*,
653               unsigned int, const char* message) {
654     if (severity == android::base::ERROR) {
655         fprintf(stderr, "%s\n", message);
656     } else {
657         fprintf(stdout, "%s\n", message);
658     }
659 }
660 
InitCowDevice()661 bool Snapuserd::InitCowDevice() {
662     cow_fd_.reset(open(cow_device_.c_str(), O_RDWR));
663     if (cow_fd_ < 0) {
664         SNAP_PLOG(ERROR) << "Open Failed: " << cow_device_;
665         return false;
666     }
667 
668     return ReadMetadata();
669 }
670 
671 /*
672  * Entry point to launch threads
673  */
Start()674 bool Snapuserd::Start() {
675     std::vector<std::future<bool>> threads;
676     std::future<bool> ra_thread;
677     bool rathread = (read_ahead_feature_ && (read_ahead_ops_.size() > 0));
678 
679     // Start the read-ahead thread and wait
680     // for it as the data has to be re-constructed
681     // from COW device.
682     if (rathread) {
683         ra_thread = std::async(std::launch::async, &ReadAheadThread::RunThread,
684                                read_ahead_thread_.get());
685         if (!WaitForReadAheadToStart()) {
686             SNAP_LOG(ERROR) << "Failed to start Read-ahead thread...";
687             return false;
688         }
689 
690         SNAP_LOG(INFO) << "Read-ahead thread started...";
691     }
692 
693     // Launch worker threads
694     for (int i = 0; i < worker_threads_.size(); i++) {
695         threads.emplace_back(
696                 std::async(std::launch::async, &WorkerThread::RunThread, worker_threads_[i].get()));
697     }
698 
699     bool ret = true;
700     for (auto& t : threads) {
701         ret = t.get() && ret;
702     }
703 
704     if (rathread) {
705         // Notify the read-ahead thread that all worker threads
706         // are done. We need this explicit notification when
707         // there is an IO failure or there was a switch
708         // of dm-user table; thus, forcing the read-ahead
709         // thread to wake up.
710         MergeCompleted();
711         ret = ret && ra_thread.get();
712     }
713 
714     return ret;
715 }
716 
GetBufferMetadataOffset()717 uint64_t Snapuserd::GetBufferMetadataOffset() {
718     CowHeader header;
719     reader_->GetHeader(&header);
720 
721     size_t size = header.header_size + sizeof(BufferState);
722     return size;
723 }
724 
725 /*
726  * Metadata for read-ahead is 16 bytes. For a 2 MB region, we will
727  * end up with 8k (2 PAGE) worth of metadata. Thus, a 2MB buffer
728  * region is split into:
729  *
730  * 1: 8k metadata
731  *
732  */
GetBufferMetadataSize()733 size_t Snapuserd::GetBufferMetadataSize() {
734     CowHeader header;
735     reader_->GetHeader(&header);
736 
737     size_t metadata_bytes = (header.buffer_size * sizeof(struct ScratchMetadata)) / BLOCK_SZ;
738     return metadata_bytes;
739 }
740 
GetBufferDataOffset()741 size_t Snapuserd::GetBufferDataOffset() {
742     CowHeader header;
743     reader_->GetHeader(&header);
744 
745     return (header.header_size + GetBufferMetadataSize());
746 }
747 
748 /*
749  * (2MB - 8K = 2088960 bytes) will be the buffer region to hold the data.
750  */
GetBufferDataSize()751 size_t Snapuserd::GetBufferDataSize() {
752     CowHeader header;
753     reader_->GetHeader(&header);
754 
755     size_t size = header.buffer_size - GetBufferMetadataSize();
756     return size;
757 }
758 
GetBufferState()759 struct BufferState* Snapuserd::GetBufferState() {
760     CowHeader header;
761     reader_->GetHeader(&header);
762 
763     struct BufferState* ra_state =
764             reinterpret_cast<struct BufferState*>((char*)mapped_addr_ + header.header_size);
765     return ra_state;
766 }
767 
768 }  // namespace snapshot
769 }  // namespace android
770