1 /*
2 * Copyright (C) 2020 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "snapuserd.h"
18
19 #include <csignal>
20 #include <optional>
21 #include <set>
22
23 #include <libsnapshot/snapuserd_client.h>
24
25 namespace android {
26 namespace snapshot {
27
28 using namespace android;
29 using namespace android::dm;
30 using android::base::unique_fd;
31
32 #define SNAP_LOG(level) LOG(level) << misc_name_ << ": "
33 #define SNAP_PLOG(level) PLOG(level) << misc_name_ << ": "
34
Snapuserd(const std::string & misc_name,const std::string & cow_device,const std::string & backing_device)35 Snapuserd::Snapuserd(const std::string& misc_name, const std::string& cow_device,
36 const std::string& backing_device) {
37 misc_name_ = misc_name;
38 cow_device_ = cow_device;
39 backing_store_device_ = backing_device;
40 control_device_ = "/dev/dm-user/" + misc_name;
41 }
42
InitializeWorkers()43 bool Snapuserd::InitializeWorkers() {
44 for (int i = 0; i < NUM_THREADS_PER_PARTITION; i++) {
45 std::unique_ptr<WorkerThread> wt = std::make_unique<WorkerThread>(
46 cow_device_, backing_store_device_, control_device_, misc_name_, GetSharedPtr());
47
48 worker_threads_.push_back(std::move(wt));
49 }
50
51 read_ahead_thread_ = std::make_unique<ReadAheadThread>(cow_device_, backing_store_device_,
52 misc_name_, GetSharedPtr());
53 return true;
54 }
55
CommitMerge(int num_merge_ops)56 bool Snapuserd::CommitMerge(int num_merge_ops) {
57 struct CowHeader* ch = reinterpret_cast<struct CowHeader*>(mapped_addr_);
58 ch->num_merge_ops += num_merge_ops;
59
60 if (read_ahead_feature_ && read_ahead_ops_.size() > 0) {
61 struct BufferState* ra_state = GetBufferState();
62 ra_state->read_ahead_state = kCowReadAheadInProgress;
63 }
64
65 int ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC);
66 if (ret < 0) {
67 PLOG(ERROR) << "msync header failed: " << ret;
68 return false;
69 }
70
71 merge_initiated_ = true;
72
73 return true;
74 }
75
PrepareReadAhead()76 void Snapuserd::PrepareReadAhead() {
77 if (!read_ahead_feature_) {
78 return;
79 }
80
81 struct BufferState* ra_state = GetBufferState();
82 // Check if the data has to be re-constructed from COW device
83 if (ra_state->read_ahead_state == kCowReadAheadDone) {
84 populate_data_from_cow_ = true;
85 } else {
86 populate_data_from_cow_ = false;
87 }
88
89 StartReadAhead();
90 }
91
GetRABuffer(std::unique_lock<std::mutex> * lock,uint64_t block,void * buffer)92 bool Snapuserd::GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block, void* buffer) {
93 if (!lock->owns_lock()) {
94 SNAP_LOG(ERROR) << "GetRABuffer - Lock not held";
95 return false;
96 }
97 std::unordered_map<uint64_t, void*>::iterator it = read_ahead_buffer_map_.find(block);
98
99 // This will be true only for IO's generated as part of reading a root
100 // filesystem. IO's related to merge should always be in read-ahead cache.
101 if (it == read_ahead_buffer_map_.end()) {
102 return false;
103 }
104
105 // Theoretically, we can send the data back from the read-ahead buffer
106 // all the way to the kernel without memcpy. However, if the IO is
107 // un-aligned, the wrapper function will need to touch the read-ahead
108 // buffers and transitions will be bit more complicated.
109 memcpy(buffer, it->second, BLOCK_SZ);
110 return true;
111 }
112
113 // ========== State transition functions for read-ahead operations ===========
114
GetReadAheadPopulatedBuffer(uint64_t block,void * buffer)115 bool Snapuserd::GetReadAheadPopulatedBuffer(uint64_t block, void* buffer) {
116 if (!read_ahead_feature_) {
117 return false;
118 }
119
120 {
121 std::unique_lock<std::mutex> lock(lock_);
122 if (io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE) {
123 return false;
124 }
125
126 if (io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS) {
127 return GetRABuffer(&lock, block, buffer);
128 }
129 }
130
131 {
132 // Read-ahead thread IO is in-progress. Wait for it to complete
133 std::unique_lock<std::mutex> lock(lock_);
134 while (!(io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE ||
135 io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS)) {
136 cv.wait(lock);
137 }
138
139 return GetRABuffer(&lock, block, buffer);
140 }
141 }
142
143 // This is invoked by read-ahead thread waiting for merge IO's
144 // to complete
WaitForMergeToComplete()145 bool Snapuserd::WaitForMergeToComplete() {
146 {
147 std::unique_lock<std::mutex> lock(lock_);
148 while (!(io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_BEGIN ||
149 io_state_ == READ_AHEAD_IO_TRANSITION::IO_TERMINATED)) {
150 cv.wait(lock);
151 }
152
153 if (io_state_ == READ_AHEAD_IO_TRANSITION::IO_TERMINATED) {
154 return false;
155 }
156
157 io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_IN_PROGRESS;
158 return true;
159 }
160 }
161
162 // This is invoked during the launch of worker threads. We wait
163 // for read-ahead thread to by fully up before worker threads
164 // are launched; else we will have a race between worker threads
165 // and read-ahead thread specifically during re-construction.
WaitForReadAheadToStart()166 bool Snapuserd::WaitForReadAheadToStart() {
167 {
168 std::unique_lock<std::mutex> lock(lock_);
169 while (!(io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS ||
170 io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE)) {
171 cv.wait(lock);
172 }
173
174 if (io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE) {
175 return false;
176 }
177
178 return true;
179 }
180 }
181
182 // Invoked by worker threads when a sequence of merge operation
183 // is complete notifying read-ahead thread to make forward
184 // progress.
StartReadAhead()185 void Snapuserd::StartReadAhead() {
186 {
187 std::lock_guard<std::mutex> lock(lock_);
188 io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_BEGIN;
189 }
190
191 cv.notify_one();
192 }
193
MergeCompleted()194 void Snapuserd::MergeCompleted() {
195 {
196 std::lock_guard<std::mutex> lock(lock_);
197 io_state_ = READ_AHEAD_IO_TRANSITION::IO_TERMINATED;
198 }
199
200 cv.notify_one();
201 }
202
ReadAheadIOCompleted(bool sync)203 bool Snapuserd::ReadAheadIOCompleted(bool sync) {
204 if (sync) {
205 // Flush the entire buffer region
206 int ret = msync(mapped_addr_, total_mapped_addr_length_, MS_SYNC);
207 if (ret < 0) {
208 PLOG(ERROR) << "msync failed after ReadAheadIOCompleted: " << ret;
209 return false;
210 }
211
212 // Metadata and data are synced. Now, update the state.
213 // We need to update the state after flushing data; if there is a crash
214 // when read-ahead IO is in progress, the state of data in the COW file
215 // is unknown. kCowReadAheadDone acts as a checkpoint wherein the data
216 // in the scratch space is good and during next reboot, read-ahead thread
217 // can safely re-construct the data.
218 struct BufferState* ra_state = GetBufferState();
219 ra_state->read_ahead_state = kCowReadAheadDone;
220
221 ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC);
222 if (ret < 0) {
223 PLOG(ERROR) << "msync failed to flush Readahead completion state...";
224 return false;
225 }
226 }
227
228 // Notify the worker threads
229 {
230 std::lock_guard<std::mutex> lock(lock_);
231 io_state_ = READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS;
232 }
233
234 cv.notify_all();
235 return true;
236 }
237
ReadAheadIOFailed()238 void Snapuserd::ReadAheadIOFailed() {
239 {
240 std::lock_guard<std::mutex> lock(lock_);
241 io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE;
242 }
243
244 cv.notify_all();
245 }
246
247 //========== End of state transition functions ====================
248
IsChunkIdMetadata(chunk_t chunk)249 bool Snapuserd::IsChunkIdMetadata(chunk_t chunk) {
250 uint32_t stride = exceptions_per_area_ + 1;
251 lldiv_t divresult = lldiv(chunk, stride);
252
253 return (divresult.rem == NUM_SNAPSHOT_HDR_CHUNKS);
254 }
255
256 // Find the next free chunk-id to be assigned. Check if the next free
257 // chunk-id represents a metadata page. If so, skip it.
GetNextAllocatableChunkId(chunk_t chunk)258 chunk_t Snapuserd::GetNextAllocatableChunkId(chunk_t chunk) {
259 chunk_t next_chunk = chunk + 1;
260
261 if (IsChunkIdMetadata(next_chunk)) {
262 next_chunk += 1;
263 }
264 return next_chunk;
265 }
266
CheckMergeCompletionStatus()267 void Snapuserd::CheckMergeCompletionStatus() {
268 if (!merge_initiated_) {
269 SNAP_LOG(INFO) << "Merge was not initiated. Total-data-ops: " << reader_->total_data_ops();
270 return;
271 }
272
273 struct CowHeader* ch = reinterpret_cast<struct CowHeader*>(mapped_addr_);
274
275 SNAP_LOG(INFO) << "Merge-status: Total-Merged-ops: " << ch->num_merge_ops
276 << " Total-data-ops: " << reader_->total_data_ops();
277 }
278
279 /*
280 * Read the metadata from COW device and
281 * construct the metadata as required by the kernel.
282 *
283 * Please see design on kernel COW format
284 *
285 * 1: Read the metadata from internal COW device
286 * 2: There are 3 COW operations:
287 * a: Replace op
288 * b: Copy op
289 * c: Zero op
290 * 3: For each of the 3 operations, op->new_block
291 * represents the block number in the base device
292 * for which one of the 3 operations have to be applied.
293 * This represents the old_chunk in the kernel COW format
294 * 4: We need to assign new_chunk for a corresponding old_chunk
295 * 5: The algorithm is similar to how kernel assigns chunk number
296 * while creating exceptions. However, there are few cases
297 * which needs to be addressed here:
298 * a: During merge process, kernel scans the metadata page
299 * from backwards when merge is initiated. Since, we need
300 * to make sure that the merge ordering follows our COW format,
301 * we read the COW operation from backwards and populate the
302 * metadata so that when kernel starts the merging from backwards,
303 * those ops correspond to the beginning of our COW format.
304 * b: Kernel can merge successive operations if the two chunk IDs
305 * are contiguous. This can be problematic when there is a crash
306 * during merge; specifically when the merge operation has dependency.
307 * These dependencies can only happen during copy operations.
308 *
309 * To avoid this problem, we make sure overlap copy operations
310 * are not batch merged.
311 * 6: Use a monotonically increasing chunk number to assign the
312 * new_chunk
313 * 7: Each chunk-id represents either
314 * a: Metadata page or
315 * b: Data page
316 * 8: Chunk-id representing a data page is stored in a map.
317 * 9: Chunk-id representing a metadata page is converted into a vector
318 * index. We store this in vector as kernel requests metadata during
319 * two stage:
320 * a: When initial dm-snapshot device is created, kernel requests
321 * all the metadata and stores it in its internal data-structures.
322 * b: During merge, kernel once again requests the same metadata
323 * once-again.
324 * In both these cases, a quick lookup based on chunk-id is done.
325 * 10: When chunk number is incremented, we need to make sure that
326 * if the chunk is representing a metadata page and skip.
327 * 11: Each 4k page will contain 256 disk exceptions. We call this
328 * exceptions_per_area_
329 * 12: Kernel will stop issuing metadata IO request when new-chunk ID is 0.
330 */
ReadMetadata()331 bool Snapuserd::ReadMetadata() {
332 reader_ = std::make_unique<CowReader>();
333 CowHeader header;
334 CowOptions options;
335 bool metadata_found = false;
336 int replace_ops = 0, zero_ops = 0, copy_ops = 0;
337
338 SNAP_LOG(DEBUG) << "ReadMetadata: Parsing cow file";
339
340 if (!reader_->Parse(cow_fd_)) {
341 SNAP_LOG(ERROR) << "Failed to parse";
342 return false;
343 }
344
345 if (!reader_->GetHeader(&header)) {
346 SNAP_LOG(ERROR) << "Failed to get header";
347 return false;
348 }
349
350 if (!(header.block_size == BLOCK_SZ)) {
351 SNAP_LOG(ERROR) << "Invalid header block size found: " << header.block_size;
352 return false;
353 }
354
355 reader_->InitializeMerge();
356 SNAP_LOG(DEBUG) << "Merge-ops: " << header.num_merge_ops;
357
358 if (!MmapMetadata()) {
359 SNAP_LOG(ERROR) << "mmap failed";
360 return false;
361 }
362
363 // Initialize the iterator for reading metadata
364 cowop_riter_ = reader_->GetRevOpIter();
365
366 exceptions_per_area_ = (CHUNK_SIZE << SECTOR_SHIFT) / sizeof(struct disk_exception);
367
368 // Start from chunk number 2. Chunk 0 represents header and chunk 1
369 // represents first metadata page.
370 chunk_t data_chunk_id = NUM_SNAPSHOT_HDR_CHUNKS + 1;
371 size_t num_ops = 0;
372
373 loff_t offset = 0;
374 std::unique_ptr<uint8_t[]> de_ptr =
375 std::make_unique<uint8_t[]>(exceptions_per_area_ * sizeof(struct disk_exception));
376
377 // This memset is important. Kernel will stop issuing IO when new-chunk ID
378 // is 0. When Area is not filled completely with all 256 exceptions,
379 // this memset will ensure that metadata read is completed.
380 memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
381
382 while (!cowop_riter_->Done()) {
383 const CowOperation* cow_op = &cowop_riter_->Get();
384 struct disk_exception* de =
385 reinterpret_cast<struct disk_exception*>((char*)de_ptr.get() + offset);
386
387 if (IsMetadataOp(*cow_op)) {
388 cowop_riter_->Next();
389 continue;
390 }
391
392 metadata_found = true;
393 // This loop will handle all the replace and zero ops.
394 // We will handle the copy ops later as it requires special
395 // handling of assigning chunk-id's. Furthermore, we make
396 // sure that replace/zero and copy ops are not batch merged; hence,
397 // the bump in the chunk_id before break of this loop
398 if (cow_op->type == kCowCopyOp) {
399 data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
400 break;
401 }
402
403 if (cow_op->type == kCowReplaceOp) {
404 replace_ops++;
405 } else if (cow_op->type == kCowZeroOp) {
406 zero_ops++;
407 }
408
409 // Construct the disk-exception
410 de->old_chunk = cow_op->new_block;
411 de->new_chunk = data_chunk_id;
412
413
414 // Store operation pointer.
415 chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), cow_op));
416 num_ops += 1;
417 offset += sizeof(struct disk_exception);
418 cowop_riter_->Next();
419
420 SNAP_LOG(DEBUG) << num_ops << ":"
421 << " Old-chunk: " << de->old_chunk << " New-chunk: " << de->new_chunk;
422
423 if (num_ops == exceptions_per_area_) {
424 // Store it in vector at the right index. This maps the chunk-id to
425 // vector index.
426 vec_.push_back(std::move(de_ptr));
427 offset = 0;
428 num_ops = 0;
429
430 // Create buffer for next area
431 de_ptr = std::make_unique<uint8_t[]>(exceptions_per_area_ *
432 sizeof(struct disk_exception));
433 memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
434
435 if (cowop_riter_->Done()) {
436 vec_.push_back(std::move(de_ptr));
437 }
438 }
439
440 data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
441 }
442
443 int num_ra_ops_per_iter = ((GetBufferDataSize()) / BLOCK_SZ);
444 std::optional<chunk_t> prev_id = {};
445 std::vector<const CowOperation*> vec;
446 std::set<uint64_t> dest_blocks;
447 std::set<uint64_t> source_blocks;
448 size_t pending_copy_ops = exceptions_per_area_ - num_ops;
449 uint64_t total_copy_ops = reader_->total_copy_ops();
450
451 SNAP_LOG(DEBUG) << " Processing copy-ops at Area: " << vec_.size()
452 << " Number of replace/zero ops completed in this area: " << num_ops
453 << " Pending copy ops for this area: " << pending_copy_ops;
454 while (!cowop_riter_->Done()) {
455 do {
456 const CowOperation* cow_op = &cowop_riter_->Get();
457 if (IsMetadataOp(*cow_op)) {
458 cowop_riter_->Next();
459 continue;
460 }
461
462 // We have two cases specific cases:
463 //
464 // =====================================================
465 // Case 1: Overlapping copy regions
466 //
467 // Ex:
468 //
469 // Source -> Destination
470 //
471 // 1: 15 -> 18
472 // 2: 16 -> 19
473 // 3: 17 -> 20
474 // 4: 18 -> 21
475 // 5: 19 -> 22
476 // 6: 20 -> 23
477 //
478 // We have 6 copy operations to be executed in OTA and there is a overlap. Update-engine
479 // will write to COW file as follows:
480 //
481 // Op-1: 20 -> 23
482 // Op-2: 19 -> 22
483 // Op-3: 18 -> 21
484 // Op-4: 17 -> 20
485 // Op-5: 16 -> 19
486 // Op-6: 15 -> 18
487 //
488 // Note that the blocks numbers are contiguous. Hence, all 6 copy
489 // operations can be batch merged. However, that will be
490 // problematic if we have a crash as block 20, 19, 18 would have
491 // been overwritten and hence subsequent recovery may end up with
492 // a silent data corruption when op-1, op-2 and op-3 are
493 // re-executed.
494 //
495 // To address the above problem, read-ahead thread will
496 // read all the 6 source blocks, cache them in the scratch
497 // space of the COW file. During merge, read-ahead
498 // thread will serve the blocks from the read-ahead cache.
499 // If there is a crash during merge; on subsequent reboot,
500 // read-ahead thread will recover the data from the
501 // scratch space and re-construct it thereby there
502 // is no loss of data.
503 //
504 // Note that we will follow the same order of COW operations
505 // as present in the COW file. This will make sure that\
506 // the merge of operations are done based on the ops present
507 // in the file.
508 //===========================================================
509 if (prev_id.has_value()) {
510 if (dest_blocks.count(cow_op->new_block) || source_blocks.count(cow_op->source)) {
511 break;
512 }
513 }
514 metadata_found = true;
515 pending_copy_ops -= 1;
516 vec.push_back(cow_op);
517 dest_blocks.insert(cow_op->source);
518 source_blocks.insert(cow_op->new_block);
519 prev_id = cow_op->new_block;
520 cowop_riter_->Next();
521 } while (!cowop_riter_->Done() && pending_copy_ops);
522
523 data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
524 SNAP_LOG(DEBUG) << "Batch Merge copy-ops of size: " << vec.size()
525 << " Area: " << vec_.size() << " Area offset: " << offset
526 << " Pending-copy-ops in this area: " << pending_copy_ops;
527
528 for (size_t i = 0; i < vec.size(); i++) {
529 struct disk_exception* de =
530 reinterpret_cast<struct disk_exception*>((char*)de_ptr.get() + offset);
531 const CowOperation* cow_op = vec[i];
532
533 de->old_chunk = cow_op->new_block;
534 de->new_chunk = data_chunk_id;
535
536 // Store operation pointer.
537 chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), cow_op));
538 offset += sizeof(struct disk_exception);
539 num_ops += 1;
540 copy_ops++;
541 if (read_ahead_feature_) {
542 read_ahead_ops_.push_back(cow_op);
543 }
544
545 SNAP_LOG(DEBUG) << num_ops << ":"
546 << " Copy-op: "
547 << " Old-chunk: " << de->old_chunk << " New-chunk: " << de->new_chunk;
548
549 if (num_ops == exceptions_per_area_) {
550 // Store it in vector at the right index. This maps the chunk-id to
551 // vector index.
552 vec_.push_back(std::move(de_ptr));
553 num_ops = 0;
554 offset = 0;
555
556 // Create buffer for next area
557 de_ptr = std::make_unique<uint8_t[]>(exceptions_per_area_ *
558 sizeof(struct disk_exception));
559 memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
560
561 if (cowop_riter_->Done()) {
562 vec_.push_back(std::move(de_ptr));
563 SNAP_LOG(DEBUG) << "ReadMetadata() completed; Number of Areas: " << vec_.size();
564 }
565
566 if (!(pending_copy_ops == 0)) {
567 SNAP_LOG(ERROR)
568 << "Invalid pending_copy_ops: expected: 0 found: " << pending_copy_ops;
569 return false;
570 }
571 pending_copy_ops = exceptions_per_area_;
572 }
573
574 data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
575 total_copy_ops -= 1;
576 /*
577 * Split the number of ops based on the size of read-ahead buffer
578 * region. We need to ensure that kernel doesn't issue IO on blocks
579 * which are not read by the read-ahead thread.
580 */
581 if (read_ahead_feature_ && (total_copy_ops % num_ra_ops_per_iter == 0)) {
582 data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
583 }
584 }
585 vec.clear();
586 dest_blocks.clear();
587 source_blocks.clear();
588 prev_id.reset();
589 }
590
591 // Partially filled area or there is no metadata
592 // If there is no metadata, fill with zero so that kernel
593 // is aware that merge is completed.
594 if (num_ops || !metadata_found) {
595 vec_.push_back(std::move(de_ptr));
596 SNAP_LOG(DEBUG) << "ReadMetadata() completed. Partially filled area num_ops: " << num_ops
597 << "Areas : " << vec_.size();
598 }
599
600 chunk_vec_.shrink_to_fit();
601 vec_.shrink_to_fit();
602 read_ahead_ops_.shrink_to_fit();
603
604 // Sort the vector based on sectors as we need this during un-aligned access
605 std::sort(chunk_vec_.begin(), chunk_vec_.end(), compare);
606
607 SNAP_LOG(INFO) << "ReadMetadata completed. Final-chunk-id: " << data_chunk_id
608 << " Num Sector: " << ChunkToSector(data_chunk_id)
609 << " Replace-ops: " << replace_ops << " Zero-ops: " << zero_ops
610 << " Copy-ops: " << copy_ops << " Areas: " << vec_.size()
611 << " Num-ops-merged: " << header.num_merge_ops
612 << " Total-data-ops: " << reader_->total_data_ops();
613
614 // Total number of sectors required for creating dm-user device
615 num_sectors_ = ChunkToSector(data_chunk_id);
616 merge_initiated_ = false;
617 PrepareReadAhead();
618
619 return true;
620 }
621
MmapMetadata()622 bool Snapuserd::MmapMetadata() {
623 CowHeader header;
624 reader_->GetHeader(&header);
625
626 if (header.major_version >= 2 && header.buffer_size > 0) {
627 total_mapped_addr_length_ = header.header_size + BUFFER_REGION_DEFAULT_SIZE;
628 read_ahead_feature_ = true;
629 } else {
630 // mmap the first 4k page - older COW format
631 total_mapped_addr_length_ = BLOCK_SZ;
632 read_ahead_feature_ = false;
633 }
634
635 mapped_addr_ = mmap(NULL, total_mapped_addr_length_, PROT_READ | PROT_WRITE, MAP_SHARED,
636 cow_fd_.get(), 0);
637 if (mapped_addr_ == MAP_FAILED) {
638 SNAP_LOG(ERROR) << "mmap metadata failed";
639 return false;
640 }
641
642 return true;
643 }
644
UnmapBufferRegion()645 void Snapuserd::UnmapBufferRegion() {
646 int ret = munmap(mapped_addr_, total_mapped_addr_length_);
647 if (ret < 0) {
648 SNAP_PLOG(ERROR) << "munmap failed";
649 }
650 }
651
MyLogger(android::base::LogId,android::base::LogSeverity severity,const char *,const char *,unsigned int,const char * message)652 void MyLogger(android::base::LogId, android::base::LogSeverity severity, const char*, const char*,
653 unsigned int, const char* message) {
654 if (severity == android::base::ERROR) {
655 fprintf(stderr, "%s\n", message);
656 } else {
657 fprintf(stdout, "%s\n", message);
658 }
659 }
660
InitCowDevice()661 bool Snapuserd::InitCowDevice() {
662 cow_fd_.reset(open(cow_device_.c_str(), O_RDWR));
663 if (cow_fd_ < 0) {
664 SNAP_PLOG(ERROR) << "Open Failed: " << cow_device_;
665 return false;
666 }
667
668 return ReadMetadata();
669 }
670
671 /*
672 * Entry point to launch threads
673 */
Start()674 bool Snapuserd::Start() {
675 std::vector<std::future<bool>> threads;
676 std::future<bool> ra_thread;
677 bool rathread = (read_ahead_feature_ && (read_ahead_ops_.size() > 0));
678
679 // Start the read-ahead thread and wait
680 // for it as the data has to be re-constructed
681 // from COW device.
682 if (rathread) {
683 ra_thread = std::async(std::launch::async, &ReadAheadThread::RunThread,
684 read_ahead_thread_.get());
685 if (!WaitForReadAheadToStart()) {
686 SNAP_LOG(ERROR) << "Failed to start Read-ahead thread...";
687 return false;
688 }
689
690 SNAP_LOG(INFO) << "Read-ahead thread started...";
691 }
692
693 // Launch worker threads
694 for (int i = 0; i < worker_threads_.size(); i++) {
695 threads.emplace_back(
696 std::async(std::launch::async, &WorkerThread::RunThread, worker_threads_[i].get()));
697 }
698
699 bool ret = true;
700 for (auto& t : threads) {
701 ret = t.get() && ret;
702 }
703
704 if (rathread) {
705 // Notify the read-ahead thread that all worker threads
706 // are done. We need this explicit notification when
707 // there is an IO failure or there was a switch
708 // of dm-user table; thus, forcing the read-ahead
709 // thread to wake up.
710 MergeCompleted();
711 ret = ret && ra_thread.get();
712 }
713
714 return ret;
715 }
716
GetBufferMetadataOffset()717 uint64_t Snapuserd::GetBufferMetadataOffset() {
718 CowHeader header;
719 reader_->GetHeader(&header);
720
721 size_t size = header.header_size + sizeof(BufferState);
722 return size;
723 }
724
725 /*
726 * Metadata for read-ahead is 16 bytes. For a 2 MB region, we will
727 * end up with 8k (2 PAGE) worth of metadata. Thus, a 2MB buffer
728 * region is split into:
729 *
730 * 1: 8k metadata
731 *
732 */
GetBufferMetadataSize()733 size_t Snapuserd::GetBufferMetadataSize() {
734 CowHeader header;
735 reader_->GetHeader(&header);
736
737 size_t metadata_bytes = (header.buffer_size * sizeof(struct ScratchMetadata)) / BLOCK_SZ;
738 return metadata_bytes;
739 }
740
GetBufferDataOffset()741 size_t Snapuserd::GetBufferDataOffset() {
742 CowHeader header;
743 reader_->GetHeader(&header);
744
745 return (header.header_size + GetBufferMetadataSize());
746 }
747
748 /*
749 * (2MB - 8K = 2088960 bytes) will be the buffer region to hold the data.
750 */
GetBufferDataSize()751 size_t Snapuserd::GetBufferDataSize() {
752 CowHeader header;
753 reader_->GetHeader(&header);
754
755 size_t size = header.buffer_size - GetBufferMetadataSize();
756 return size;
757 }
758
GetBufferState()759 struct BufferState* Snapuserd::GetBufferState() {
760 CowHeader header;
761 reader_->GetHeader(&header);
762
763 struct BufferState* ra_state =
764 reinterpret_cast<struct BufferState*>((char*)mapped_addr_ + header.header_size);
765 return ra_state;
766 }
767
768 } // namespace snapshot
769 } // namespace android
770