1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <libsnapshot/snapshot.h>
16 
17 #include <dirent.h>
18 #include <fcntl.h>
19 #include <math.h>
20 #include <sys/file.h>
21 #include <sys/types.h>
22 #include <sys/unistd.h>
23 
24 #include <filesystem>
25 #include <optional>
26 #include <thread>
27 #include <unordered_set>
28 
29 #include <android-base/file.h>
30 #include <android-base/logging.h>
31 #include <android-base/parseint.h>
32 #include <android-base/properties.h>
33 #include <android-base/strings.h>
34 #include <android-base/unique_fd.h>
35 #include <cutils/sockets.h>
36 #include <ext4_utils/ext4_utils.h>
37 #include <fs_mgr.h>
38 #include <fs_mgr/file_wait.h>
39 #include <fs_mgr_dm_linear.h>
40 #include <fstab/fstab.h>
41 #include <libdm/dm.h>
42 #include <libfiemap/image_manager.h>
43 #include <liblp/liblp.h>
44 
45 #include <android/snapshot/snapshot.pb.h>
46 #include <libsnapshot/snapshot_stats.h>
47 #include "device_info.h"
48 #include "partition_cow_creator.h"
49 #include "snapshot_metadata_updater.h"
50 #include "snapshot_reader.h"
51 #include "utility.h"
52 
53 namespace android {
54 namespace snapshot {
55 
56 using android::base::unique_fd;
57 using android::dm::DeviceMapper;
58 using android::dm::DmDeviceState;
59 using android::dm::DmTable;
60 using android::dm::DmTargetLinear;
61 using android::dm::DmTargetSnapshot;
62 using android::dm::DmTargetUser;
63 using android::dm::kSectorSize;
64 using android::dm::SnapshotStorageMode;
65 using android::fiemap::FiemapStatus;
66 using android::fiemap::IImageManager;
67 using android::fs_mgr::CreateDmTable;
68 using android::fs_mgr::CreateLogicalPartition;
69 using android::fs_mgr::CreateLogicalPartitionParams;
70 using android::fs_mgr::GetPartitionGroupName;
71 using android::fs_mgr::GetPartitionName;
72 using android::fs_mgr::LpMetadata;
73 using android::fs_mgr::MetadataBuilder;
74 using android::fs_mgr::SlotNumberForSlotSuffix;
75 using android::hardware::boot::V1_1::MergeStatus;
76 using chromeos_update_engine::DeltaArchiveManifest;
77 using chromeos_update_engine::Extent;
78 using chromeos_update_engine::FileDescriptor;
79 using chromeos_update_engine::PartitionUpdate;
80 template <typename T>
81 using RepeatedPtrField = google::protobuf::RepeatedPtrField<T>;
82 using std::chrono::duration_cast;
83 using namespace std::chrono_literals;
84 using namespace std::string_literals;
85 
86 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
87 static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
88 static constexpr auto kUpdateStateCheckInterval = 2s;
89 
90 // Note: IImageManager is an incomplete type in the header, so the default
91 // destructor doesn't work.
~SnapshotManager()92 SnapshotManager::~SnapshotManager() {}
93 
New(IDeviceInfo * info)94 std::unique_ptr<SnapshotManager> SnapshotManager::New(IDeviceInfo* info) {
95     if (!info) {
96         info = new DeviceInfo();
97     }
98     return std::unique_ptr<SnapshotManager>(new SnapshotManager(info));
99 }
100 
NewForFirstStageMount(IDeviceInfo * info)101 std::unique_ptr<SnapshotManager> SnapshotManager::NewForFirstStageMount(IDeviceInfo* info) {
102     if (!info) {
103         DeviceInfo* impl = new DeviceInfo();
104         impl->set_first_stage_init(true);
105         info = impl;
106     }
107     auto sm = New(info);
108 
109     // The first-stage version of snapuserd is explicitly started by init. Do
110     // not attempt to using it during tests (which run in normal AOSP).
111     if (!sm->device()->IsTestDevice()) {
112         sm->use_first_stage_snapuserd_ = true;
113     }
114     return sm;
115 }
116 
SnapshotManager(IDeviceInfo * device)117 SnapshotManager::SnapshotManager(IDeviceInfo* device) : device_(device) {
118     metadata_dir_ = device_->GetMetadataDir();
119 }
120 
GetCowName(const std::string & snapshot_name)121 static std::string GetCowName(const std::string& snapshot_name) {
122     return snapshot_name + "-cow";
123 }
124 
GetDmUserCowName(const std::string & snapshot_name)125 static std::string GetDmUserCowName(const std::string& snapshot_name) {
126     return snapshot_name + "-user-cow";
127 }
128 
GetCowImageDeviceName(const std::string & snapshot_name)129 static std::string GetCowImageDeviceName(const std::string& snapshot_name) {
130     return snapshot_name + "-cow-img";
131 }
132 
GetBaseDeviceName(const std::string & partition_name)133 static std::string GetBaseDeviceName(const std::string& partition_name) {
134     return partition_name + "-base";
135 }
136 
GetSourceDeviceName(const std::string & partition_name)137 static std::string GetSourceDeviceName(const std::string& partition_name) {
138     return partition_name + "-src";
139 }
140 
BeginUpdate()141 bool SnapshotManager::BeginUpdate() {
142     bool needs_merge = false;
143     if (!TryCancelUpdate(&needs_merge)) {
144         return false;
145     }
146     if (needs_merge) {
147         LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
148         auto state = ProcessUpdateState();
149         LOG(INFO) << "Merged with state = " << state;
150     }
151 
152     auto file = LockExclusive();
153     if (!file) return false;
154 
155     // Purge the ImageManager just in case there is a corrupt lp_metadata file
156     // lying around. (NB: no need to return false on an error, we can let the
157     // update try to progress.)
158     if (EnsureImageManager()) {
159         images_->RemoveAllImages();
160     }
161 
162     // Clear any cached metadata (this allows re-using one manager across tests).
163     old_partition_metadata_ = nullptr;
164 
165     auto state = ReadUpdateState(file.get());
166     if (state != UpdateState::None) {
167         LOG(ERROR) << "An update is already in progress, cannot begin a new update";
168         return false;
169     }
170     return WriteUpdateState(file.get(), UpdateState::Initiated);
171 }
172 
CancelUpdate()173 bool SnapshotManager::CancelUpdate() {
174     bool needs_merge = false;
175     if (!TryCancelUpdate(&needs_merge)) {
176         return false;
177     }
178     if (needs_merge) {
179         LOG(ERROR) << "Cannot cancel update after it has completed or started merging";
180     }
181     return !needs_merge;
182 }
183 
TryCancelUpdate(bool * needs_merge)184 bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
185     *needs_merge = false;
186 
187     auto file = LockExclusive();
188     if (!file) return false;
189 
190     UpdateState state = ReadUpdateState(file.get());
191     if (state == UpdateState::None) return true;
192 
193     if (state == UpdateState::Initiated) {
194         LOG(INFO) << "Update has been initiated, now canceling";
195         return RemoveAllUpdateState(file.get());
196     }
197 
198     if (state == UpdateState::Unverified) {
199         // We completed an update, but it can still be canceled if we haven't booted into it.
200         auto slot = GetCurrentSlot();
201         if (slot != Slot::Target) {
202             LOG(INFO) << "Canceling previously completed updates (if any)";
203             return RemoveAllUpdateState(file.get());
204         }
205     }
206     *needs_merge = true;
207     return true;
208 }
209 
ReadUpdateSourceSlotSuffix()210 std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
211     auto boot_file = GetSnapshotBootIndicatorPath();
212     std::string contents;
213     if (!android::base::ReadFileToString(boot_file, &contents)) {
214         PLOG(WARNING) << "Cannot read " << boot_file;
215         return {};
216     }
217     return contents;
218 }
219 
GetCurrentSlot()220 SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
221     auto contents = ReadUpdateSourceSlotSuffix();
222     if (contents.empty()) {
223         return Slot::Unknown;
224     }
225     if (device_->GetSlotSuffix() == contents) {
226         return Slot::Source;
227     }
228     return Slot::Target;
229 }
230 
GetSnapshotSlotSuffix()231 std::string SnapshotManager::GetSnapshotSlotSuffix() {
232     switch (GetCurrentSlot()) {
233         case Slot::Target:
234             return device_->GetSlotSuffix();
235         default:
236             return device_->GetOtherSlotSuffix();
237     }
238 }
239 
RemoveFileIfExists(const std::string & path)240 static bool RemoveFileIfExists(const std::string& path) {
241     std::string message;
242     if (!android::base::RemoveFileIfExists(path, &message)) {
243         LOG(ERROR) << "Remove failed: " << path << ": " << message;
244         return false;
245     }
246     return true;
247 }
248 
RemoveAllUpdateState(LockedFile * lock,const std::function<bool ()> & prolog)249 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function<bool()>& prolog) {
250     if (prolog && !prolog()) {
251         LOG(WARNING) << "Can't RemoveAllUpdateState: prolog failed.";
252         return false;
253     }
254 
255     LOG(INFO) << "Removing all update state.";
256 
257     if (!RemoveAllSnapshots(lock)) {
258         LOG(ERROR) << "Could not remove all snapshots";
259         return false;
260     }
261 
262     // It's okay if these fail:
263     // - For SnapshotBoot and Rollback, first-stage init performs a deeper check after
264     // reading the indicator file, so it's not a problem if it still exists
265     // after the update completes.
266     // - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
267     // matches the incoming update.
268     std::vector<std::string> files = {
269             GetSnapshotBootIndicatorPath(),
270             GetRollbackIndicatorPath(),
271             GetForwardMergeIndicatorPath(),
272             GetOldPartitionMetadataPath(),
273     };
274     for (const auto& file : files) {
275         RemoveFileIfExists(file);
276     }
277 
278     // If this fails, we'll keep trying to remove the update state (as the
279     // device reboots or starts a new update) until it finally succeeds.
280     return WriteUpdateState(lock, UpdateState::None);
281 }
282 
FinishedSnapshotWrites(bool wipe)283 bool SnapshotManager::FinishedSnapshotWrites(bool wipe) {
284     auto lock = LockExclusive();
285     if (!lock) return false;
286 
287     auto update_state = ReadUpdateState(lock.get());
288     if (update_state == UpdateState::Unverified) {
289         LOG(INFO) << "FinishedSnapshotWrites already called before. Ignored.";
290         return true;
291     }
292 
293     if (update_state != UpdateState::Initiated) {
294         LOG(ERROR) << "Can only transition to the Unverified state from the Initiated state.";
295         return false;
296     }
297 
298     if (!EnsureNoOverflowSnapshot(lock.get())) {
299         LOG(ERROR) << "Cannot ensure there are no overflow snapshots.";
300         return false;
301     }
302 
303     if (!UpdateForwardMergeIndicator(wipe)) {
304         return false;
305     }
306 
307     // This file is written on boot to detect whether a rollback occurred. It
308     // MUST NOT exist before rebooting, otherwise, we're at risk of deleting
309     // snapshots too early.
310     if (!RemoveFileIfExists(GetRollbackIndicatorPath())) {
311         return false;
312     }
313 
314     // This file acts as both a quick indicator for init (it can use access(2)
315     // to decide how to do first-stage mounts), and it stores the old slot, so
316     // we can tell whether or not we performed a rollback.
317     auto contents = device_->GetSlotSuffix();
318     auto boot_file = GetSnapshotBootIndicatorPath();
319     if (!WriteStringToFileAtomic(contents, boot_file)) {
320         PLOG(ERROR) << "write failed: " << boot_file;
321         return false;
322     }
323     return WriteUpdateState(lock.get(), UpdateState::Unverified);
324 }
325 
CreateSnapshot(LockedFile * lock,PartitionCowCreator * cow_creator,SnapshotStatus * status)326 bool SnapshotManager::CreateSnapshot(LockedFile* lock, PartitionCowCreator* cow_creator,
327                                      SnapshotStatus* status) {
328     CHECK(lock);
329     CHECK(lock->lock_mode() == LOCK_EX);
330     CHECK(status);
331 
332     if (status->name().empty()) {
333         LOG(ERROR) << "SnapshotStatus has no name.";
334         return false;
335     }
336     // Check these sizes. Like liblp, we guarantee the partition size is
337     // respected, which means it has to be sector-aligned. (This guarantee is
338     // useful for locating avb footers correctly). The COW file size, however,
339     // can be arbitrarily larger than specified, so we can safely round it up.
340     if (status->device_size() % kSectorSize != 0) {
341         LOG(ERROR) << "Snapshot " << status->name()
342                    << " device size is not a multiple of the sector size: "
343                    << status->device_size();
344         return false;
345     }
346     if (status->snapshot_size() % kSectorSize != 0) {
347         LOG(ERROR) << "Snapshot " << status->name()
348                    << " snapshot size is not a multiple of the sector size: "
349                    << status->snapshot_size();
350         return false;
351     }
352     if (status->cow_partition_size() % kSectorSize != 0) {
353         LOG(ERROR) << "Snapshot " << status->name()
354                    << " cow partition size is not a multiple of the sector size: "
355                    << status->cow_partition_size();
356         return false;
357     }
358     if (status->cow_file_size() % kSectorSize != 0) {
359         LOG(ERROR) << "Snapshot " << status->name()
360                    << " cow file size is not a multiple of the sector size: "
361                    << status->cow_file_size();
362         return false;
363     }
364 
365     status->set_state(SnapshotState::CREATED);
366     status->set_sectors_allocated(0);
367     status->set_metadata_sectors(0);
368     status->set_compression_enabled(cow_creator->compression_enabled);
369     status->set_compression_algorithm(cow_creator->compression_algorithm);
370 
371     if (!WriteSnapshotStatus(lock, *status)) {
372         PLOG(ERROR) << "Could not write snapshot status: " << status->name();
373         return false;
374     }
375     return true;
376 }
377 
CreateCowImage(LockedFile * lock,const std::string & name)378 Return SnapshotManager::CreateCowImage(LockedFile* lock, const std::string& name) {
379     CHECK(lock);
380     CHECK(lock->lock_mode() == LOCK_EX);
381     if (!EnsureImageManager()) return Return::Error();
382 
383     SnapshotStatus status;
384     if (!ReadSnapshotStatus(lock, name, &status)) {
385         return Return::Error();
386     }
387 
388     // The COW file size should have been rounded up to the nearest sector in CreateSnapshot.
389     if (status.cow_file_size() % kSectorSize != 0) {
390         LOG(ERROR) << "Snapshot " << name << " COW file size is not a multiple of the sector size: "
391                    << status.cow_file_size();
392         return Return::Error();
393     }
394 
395     std::string cow_image_name = GetCowImageDeviceName(name);
396     int cow_flags = IImageManager::CREATE_IMAGE_DEFAULT;
397     return Return(images_->CreateBackingImage(cow_image_name, status.cow_file_size(), cow_flags));
398 }
399 
MapDmUserCow(LockedFile * lock,const std::string & name,const std::string & cow_file,const std::string & base_device,const std::chrono::milliseconds & timeout_ms,std::string * path)400 bool SnapshotManager::MapDmUserCow(LockedFile* lock, const std::string& name,
401                                    const std::string& cow_file, const std::string& base_device,
402                                    const std::chrono::milliseconds& timeout_ms, std::string* path) {
403     CHECK(lock);
404 
405     auto& dm = DeviceMapper::Instance();
406 
407     // Use an extra decoration for first-stage init, so we can transition
408     // to a new table entry in second-stage.
409     std::string misc_name = name;
410     if (use_first_stage_snapuserd_) {
411         misc_name += "-init";
412     }
413 
414     if (!EnsureSnapuserdConnected()) {
415         return false;
416     }
417 
418     uint64_t base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device);
419     if (base_sectors == 0) {
420         LOG(ERROR) << "Failed to retrieve base_sectors from Snapuserd";
421         return false;
422     }
423 
424     DmTable table;
425     table.Emplace<DmTargetUser>(0, base_sectors, misc_name);
426     if (!dm.CreateDevice(name, table, path, timeout_ms)) {
427         return false;
428     }
429     if (!WaitForDevice(*path, timeout_ms)) {
430         return false;
431     }
432 
433     auto control_device = "/dev/dm-user/" + misc_name;
434     if (!WaitForDevice(control_device, timeout_ms)) {
435         return false;
436     }
437 
438     return snapuserd_client_->AttachDmUser(misc_name);
439 }
440 
MapSnapshot(LockedFile * lock,const std::string & name,const std::string & base_device,const std::string & cow_device,const std::chrono::milliseconds & timeout_ms,std::string * dev_path)441 bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
442                                   const std::string& base_device, const std::string& cow_device,
443                                   const std::chrono::milliseconds& timeout_ms,
444                                   std::string* dev_path) {
445     CHECK(lock);
446 
447     SnapshotStatus status;
448     if (!ReadSnapshotStatus(lock, name, &status)) {
449         return false;
450     }
451     if (status.state() == SnapshotState::NONE || status.state() == SnapshotState::MERGE_COMPLETED) {
452         LOG(ERROR) << "Should not create a snapshot device for " << name
453                    << " after merging has completed.";
454         return false;
455     }
456 
457     // Validate the block device size, as well as the requested snapshot size.
458     // Note that during first-stage init, we don't have the device paths.
459     if (android::base::StartsWith(base_device, "/")) {
460         unique_fd fd(open(base_device.c_str(), O_RDONLY | O_CLOEXEC));
461         if (fd < 0) {
462             PLOG(ERROR) << "open failed: " << base_device;
463             return false;
464         }
465         auto dev_size = get_block_device_size(fd);
466         if (!dev_size) {
467             PLOG(ERROR) << "Could not determine block device size: " << base_device;
468             return false;
469         }
470         if (status.device_size() != dev_size) {
471             LOG(ERROR) << "Block device size for " << base_device << " does not match"
472                        << "(expected " << status.device_size() << ", got " << dev_size << ")";
473             return false;
474         }
475     }
476     if (status.device_size() % kSectorSize != 0) {
477         LOG(ERROR) << "invalid blockdev size for " << base_device << ": " << status.device_size();
478         return false;
479     }
480     if (status.snapshot_size() % kSectorSize != 0 ||
481         status.snapshot_size() > status.device_size()) {
482         LOG(ERROR) << "Invalid snapshot size for " << base_device << ": " << status.snapshot_size();
483         return false;
484     }
485     if (status.device_size() != status.snapshot_size()) {
486         LOG(ERROR) << "Device size and snapshot size must be the same (device size = "
487                    << status.device_size() << ", snapshot size = " << status.snapshot_size();
488         return false;
489     }
490 
491     uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
492 
493     auto& dm = DeviceMapper::Instance();
494 
495     // Note that merging is a global state. We do track whether individual devices
496     // have completed merging, but the start of the merge process is considered
497     // atomic.
498     SnapshotStorageMode mode;
499     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
500     switch (update_status.state()) {
501         case UpdateState::MergeCompleted:
502         case UpdateState::MergeNeedsReboot:
503             LOG(ERROR) << "Should not create a snapshot device for " << name
504                        << " after global merging has completed.";
505             return false;
506         case UpdateState::Merging:
507         case UpdateState::MergeFailed:
508             // Note: MergeFailed indicates that a merge is in progress, but
509             // is possibly stalled. We still have to honor the merge.
510             if (DecideMergePhase(status) == update_status.merge_phase()) {
511                 mode = SnapshotStorageMode::Merge;
512             } else {
513                 mode = SnapshotStorageMode::Persistent;
514             }
515             break;
516         default:
517             mode = SnapshotStorageMode::Persistent;
518             break;
519     }
520 
521     DmTable table;
522     table.Emplace<DmTargetSnapshot>(0, snapshot_sectors, base_device, cow_device, mode,
523                                     kSnapshotChunkSize);
524     if (!dm.CreateDevice(name, table, dev_path, timeout_ms)) {
525         LOG(ERROR) << "Could not create snapshot device: " << name;
526         return false;
527     }
528     return true;
529 }
530 
MapCowImage(const std::string & name,const std::chrono::milliseconds & timeout_ms)531 std::optional<std::string> SnapshotManager::MapCowImage(
532         const std::string& name, const std::chrono::milliseconds& timeout_ms) {
533     if (!EnsureImageManager()) return std::nullopt;
534     auto cow_image_name = GetCowImageDeviceName(name);
535 
536     bool ok;
537     std::string cow_dev;
538     if (device_->IsRecovery() || device_->IsFirstStageInit()) {
539         const auto& opener = device_->GetPartitionOpener();
540         ok = images_->MapImageWithDeviceMapper(opener, cow_image_name, &cow_dev);
541     } else {
542         ok = images_->MapImageDevice(cow_image_name, timeout_ms, &cow_dev);
543     }
544 
545     if (ok) {
546         LOG(INFO) << "Mapped " << cow_image_name << " to " << cow_dev;
547         return cow_dev;
548     }
549     LOG(ERROR) << "Could not map image device: " << cow_image_name;
550     return std::nullopt;
551 }
552 
MapSourceDevice(LockedFile * lock,const std::string & name,const std::chrono::milliseconds & timeout_ms,std::string * path)553 bool SnapshotManager::MapSourceDevice(LockedFile* lock, const std::string& name,
554                                       const std::chrono::milliseconds& timeout_ms,
555                                       std::string* path) {
556     CHECK(lock);
557 
558     auto metadata = ReadOldPartitionMetadata(lock);
559     if (!metadata) {
560         LOG(ERROR) << "Could not map source device due to missing or corrupt metadata";
561         return false;
562     }
563 
564     auto old_name = GetOtherPartitionName(name);
565     auto slot_suffix = device_->GetSlotSuffix();
566     auto slot = SlotNumberForSlotSuffix(slot_suffix);
567 
568     CreateLogicalPartitionParams params = {
569             .block_device = device_->GetSuperDevice(slot),
570             .metadata = metadata,
571             .partition_name = old_name,
572             .timeout_ms = timeout_ms,
573             .device_name = GetSourceDeviceName(name),
574             .partition_opener = &device_->GetPartitionOpener(),
575     };
576     if (!CreateLogicalPartition(std::move(params), path)) {
577         LOG(ERROR) << "Could not create source device for snapshot " << name;
578         return false;
579     }
580     return true;
581 }
582 
UnmapSnapshot(LockedFile * lock,const std::string & name)583 bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
584     CHECK(lock);
585 
586     if (!DeleteDeviceIfExists(name)) {
587         LOG(ERROR) << "Could not delete snapshot device: " << name;
588         return false;
589     }
590     return true;
591 }
592 
UnmapCowImage(const std::string & name)593 bool SnapshotManager::UnmapCowImage(const std::string& name) {
594     if (!EnsureImageManager()) return false;
595     return images_->UnmapImageIfExists(GetCowImageDeviceName(name));
596 }
597 
DeleteSnapshot(LockedFile * lock,const std::string & name)598 bool SnapshotManager::DeleteSnapshot(LockedFile* lock, const std::string& name) {
599     CHECK(lock);
600     CHECK(lock->lock_mode() == LOCK_EX);
601     if (!EnsureImageManager()) return false;
602 
603     if (!UnmapCowDevices(lock, name)) {
604         return false;
605     }
606 
607     // We can't delete snapshots in recovery. The only way we'd try is it we're
608     // completing or canceling a merge in preparation for a data wipe, in which
609     // case, we don't care if the file sticks around.
610     if (device_->IsRecovery()) {
611         LOG(INFO) << "Skipping delete of snapshot " << name << " in recovery.";
612         return true;
613     }
614 
615     auto cow_image_name = GetCowImageDeviceName(name);
616     if (images_->BackingImageExists(cow_image_name)) {
617         if (!images_->DeleteBackingImage(cow_image_name)) {
618             return false;
619         }
620     }
621 
622     std::string error;
623     auto file_path = GetSnapshotStatusFilePath(name);
624     if (!android::base::RemoveFileIfExists(file_path, &error)) {
625         LOG(ERROR) << "Failed to remove status file " << file_path << ": " << error;
626         return false;
627     }
628     return true;
629 }
630 
InitiateMerge()631 bool SnapshotManager::InitiateMerge() {
632     auto lock = LockExclusive();
633     if (!lock) return false;
634 
635     UpdateState state = ReadUpdateState(lock.get());
636     if (state != UpdateState::Unverified) {
637         LOG(ERROR) << "Cannot begin a merge if an update has not been verified";
638         return false;
639     }
640 
641     auto slot = GetCurrentSlot();
642     if (slot != Slot::Target) {
643         LOG(ERROR) << "Device cannot merge while not booting from new slot";
644         return false;
645     }
646 
647     std::vector<std::string> snapshots;
648     if (!ListSnapshots(lock.get(), &snapshots)) {
649         LOG(ERROR) << "Could not list snapshots";
650         return false;
651     }
652 
653     auto other_suffix = device_->GetOtherSlotSuffix();
654 
655     auto& dm = DeviceMapper::Instance();
656     for (const auto& snapshot : snapshots) {
657         if (android::base::EndsWith(snapshot, other_suffix)) {
658             // Allow the merge to continue, but log this unexpected case.
659             LOG(ERROR) << "Unexpected snapshot found during merge: " << snapshot;
660             continue;
661         }
662 
663         // The device has to be mapped, since everything should be merged at
664         // the same time. This is a fairly serious error. We could forcefully
665         // map everything here, but it should have been mapped during first-
666         // stage init.
667         if (dm.GetState(snapshot) == DmDeviceState::INVALID) {
668             LOG(ERROR) << "Cannot begin merge; device " << snapshot << " is not mapped.";
669             return false;
670         }
671     }
672 
673     auto metadata = ReadCurrentMetadata();
674     for (auto it = snapshots.begin(); it != snapshots.end();) {
675         switch (GetMetadataPartitionState(*metadata, *it)) {
676             case MetadataPartitionState::Flashed:
677                 LOG(WARNING) << "Detected re-flashing for partition " << *it
678                              << ". Skip merging it.";
679                 [[fallthrough]];
680             case MetadataPartitionState::None: {
681                 LOG(WARNING) << "Deleting snapshot for partition " << *it;
682                 if (!DeleteSnapshot(lock.get(), *it)) {
683                     LOG(WARNING) << "Cannot delete snapshot for partition " << *it
684                                  << ". Skip merging it anyways.";
685                 }
686                 it = snapshots.erase(it);
687             } break;
688             case MetadataPartitionState::Updated: {
689                 ++it;
690             } break;
691         }
692     }
693 
694     bool compression_enabled = false;
695 
696     std::vector<std::string> first_merge_group;
697 
698     DmTargetSnapshot::Status initial_target_values = {};
699     for (const auto& snapshot : snapshots) {
700         DmTargetSnapshot::Status current_status;
701         if (!QuerySnapshotStatus(snapshot, nullptr, &current_status)) {
702             return false;
703         }
704         initial_target_values.sectors_allocated += current_status.sectors_allocated;
705         initial_target_values.total_sectors += current_status.total_sectors;
706         initial_target_values.metadata_sectors += current_status.metadata_sectors;
707 
708         SnapshotStatus snapshot_status;
709         if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
710             return false;
711         }
712 
713         compression_enabled |= snapshot_status.compression_enabled();
714         if (DecideMergePhase(snapshot_status) == MergePhase::FIRST_PHASE) {
715             first_merge_group.emplace_back(snapshot);
716         }
717     }
718 
719     SnapshotUpdateStatus initial_status = ReadSnapshotUpdateStatus(lock.get());
720     initial_status.set_state(UpdateState::Merging);
721     initial_status.set_sectors_allocated(initial_target_values.sectors_allocated);
722     initial_status.set_total_sectors(initial_target_values.total_sectors);
723     initial_status.set_metadata_sectors(initial_target_values.metadata_sectors);
724     initial_status.set_compression_enabled(compression_enabled);
725 
726     // If any partitions shrunk, we need to merge them before we merge any other
727     // partitions (see b/177935716). Otherwise, a merge from another partition
728     // may overwrite the source block of a copy operation.
729     const std::vector<std::string>* merge_group;
730     if (first_merge_group.empty()) {
731         merge_group = &snapshots;
732         initial_status.set_merge_phase(MergePhase::SECOND_PHASE);
733     } else {
734         merge_group = &first_merge_group;
735         initial_status.set_merge_phase(MergePhase::FIRST_PHASE);
736     }
737 
738     // Point of no return - mark that we're starting a merge. From now on every
739     // eligible snapshot must be a merge target.
740     if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) {
741         return false;
742     }
743 
744     auto reported_code = MergeFailureCode::Ok;
745     for (const auto& snapshot : *merge_group) {
746         // If this fails, we have no choice but to continue. Everything must
747         // be merged. This is not an ideal state to be in, but it is safe,
748         // because we the next boot will try again.
749         auto code = SwitchSnapshotToMerge(lock.get(), snapshot);
750         if (code != MergeFailureCode::Ok) {
751             LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot;
752             if (reported_code == MergeFailureCode::Ok) {
753                 reported_code = code;
754             }
755         }
756     }
757 
758     // If we couldn't switch everything to a merge target, pre-emptively mark
759     // this merge as failed. It will get acknowledged when WaitForMerge() is
760     // called.
761     if (reported_code != MergeFailureCode::Ok) {
762         WriteUpdateState(lock.get(), UpdateState::MergeFailed, reported_code);
763     }
764 
765     // Return true no matter what, because a merge was initiated.
766     return true;
767 }
768 
SwitchSnapshotToMerge(LockedFile * lock,const std::string & name)769 MergeFailureCode SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
770     SnapshotStatus status;
771     if (!ReadSnapshotStatus(lock, name, &status)) {
772         return MergeFailureCode::ReadStatus;
773     }
774     if (status.state() != SnapshotState::CREATED) {
775         LOG(WARNING) << "Snapshot " << name
776                      << " has unexpected state: " << SnapshotState_Name(status.state());
777     }
778 
779     // After this, we return true because we technically did switch to a merge
780     // target. Everything else we do here is just informational.
781     if (auto code = RewriteSnapshotDeviceTable(name); code != MergeFailureCode::Ok) {
782         return code;
783     }
784 
785     status.set_state(SnapshotState::MERGING);
786 
787     DmTargetSnapshot::Status dm_status;
788     if (!QuerySnapshotStatus(name, nullptr, &dm_status)) {
789         LOG(ERROR) << "Could not query merge status for snapshot: " << name;
790     }
791     status.set_sectors_allocated(dm_status.sectors_allocated);
792     status.set_metadata_sectors(dm_status.metadata_sectors);
793     if (!WriteSnapshotStatus(lock, status)) {
794         LOG(ERROR) << "Could not update status file for snapshot: " << name;
795     }
796     return MergeFailureCode::Ok;
797 }
798 
RewriteSnapshotDeviceTable(const std::string & name)799 MergeFailureCode SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) {
800     auto& dm = DeviceMapper::Instance();
801 
802     std::vector<DeviceMapper::TargetInfo> old_targets;
803     if (!dm.GetTableInfo(name, &old_targets)) {
804         LOG(ERROR) << "Could not read snapshot device table: " << name;
805         return MergeFailureCode::GetTableInfo;
806     }
807     if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") {
808         LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << name;
809         return MergeFailureCode::UnknownTable;
810     }
811 
812     std::string base_device, cow_device;
813     if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) {
814         LOG(ERROR) << "Could not derive underlying devices for snapshot: " << name;
815         return MergeFailureCode::GetTableParams;
816     }
817 
818     DmTable table;
819     table.Emplace<DmTargetSnapshot>(0, old_targets[0].spec.length, base_device, cow_device,
820                                     SnapshotStorageMode::Merge, kSnapshotChunkSize);
821     if (!dm.LoadTableAndActivate(name, table)) {
822         LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << name;
823         return MergeFailureCode::ActivateNewTable;
824     }
825     LOG(INFO) << "Successfully switched snapshot device to a merge target: " << name;
826     return MergeFailureCode::Ok;
827 }
828 
829 enum class TableQuery {
830     Table,
831     Status,
832 };
833 
GetSingleTarget(const std::string & dm_name,TableQuery query,DeviceMapper::TargetInfo * target)834 static bool GetSingleTarget(const std::string& dm_name, TableQuery query,
835                             DeviceMapper::TargetInfo* target) {
836     auto& dm = DeviceMapper::Instance();
837     if (dm.GetState(dm_name) == DmDeviceState::INVALID) {
838         return false;
839     }
840 
841     std::vector<DeviceMapper::TargetInfo> targets;
842     bool result;
843     if (query == TableQuery::Status) {
844         result = dm.GetTableStatus(dm_name, &targets);
845     } else {
846         result = dm.GetTableInfo(dm_name, &targets);
847     }
848     if (!result) {
849         LOG(ERROR) << "Could not query device: " << dm_name;
850         return false;
851     }
852     if (targets.size() != 1) {
853         return false;
854     }
855 
856     *target = std::move(targets[0]);
857     return true;
858 }
859 
IsSnapshotDevice(const std::string & dm_name,TargetInfo * target)860 bool SnapshotManager::IsSnapshotDevice(const std::string& dm_name, TargetInfo* target) {
861     DeviceMapper::TargetInfo snap_target;
862     if (!GetSingleTarget(dm_name, TableQuery::Status, &snap_target)) {
863         return false;
864     }
865     auto type = DeviceMapper::GetTargetType(snap_target.spec);
866     if (type != "snapshot" && type != "snapshot-merge") {
867         return false;
868     }
869     if (target) {
870         *target = std::move(snap_target);
871     }
872     return true;
873 }
874 
QuerySnapshotStatus(const std::string & dm_name,std::string * target_type,DmTargetSnapshot::Status * status)875 bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::string* target_type,
876                                           DmTargetSnapshot::Status* status) {
877     DeviceMapper::TargetInfo target;
878     if (!IsSnapshotDevice(dm_name, &target)) {
879         LOG(ERROR) << "Device " << dm_name << " is not a snapshot or snapshot-merge device";
880         return false;
881     }
882     if (!DmTargetSnapshot::ParseStatusText(target.data, status)) {
883         LOG(ERROR) << "Could not parse snapshot status text: " << dm_name;
884         return false;
885     }
886     if (target_type) {
887         *target_type = DeviceMapper::GetTargetType(target.spec);
888     }
889     return true;
890 }
891 
892 // Note that when a merge fails, we will *always* try again to complete the
893 // merge each time the device boots. There is no harm in doing so, and if
894 // the problem was transient, we might manage to get a new outcome.
ProcessUpdateState(const std::function<bool ()> & callback,const std::function<bool ()> & before_cancel)895 UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback,
896                                                 const std::function<bool()>& before_cancel) {
897     while (true) {
898         auto result = CheckMergeState(before_cancel);
899         LOG(INFO) << "ProcessUpdateState handling state: " << result.state;
900 
901         if (result.state == UpdateState::MergeFailed) {
902             AcknowledgeMergeFailure(result.failure_code);
903         }
904         if (result.state != UpdateState::Merging) {
905             // Either there is no merge, or the merge was finished, so no need
906             // to keep waiting.
907             return result.state;
908         }
909 
910         if (callback && !callback()) {
911             return result.state;
912         }
913 
914         // This wait is not super time sensitive, so we have a relatively
915         // low polling frequency.
916         std::this_thread::sleep_for(kUpdateStateCheckInterval);
917     }
918 }
919 
CheckMergeState(const std::function<bool ()> & before_cancel)920 auto SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) -> MergeResult {
921     auto lock = LockExclusive();
922     if (!lock) {
923         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::AcquireLock);
924     }
925 
926     auto result = CheckMergeState(lock.get(), before_cancel);
927     LOG(INFO) << "CheckMergeState for snapshots returned: " << result.state;
928 
929     if (result.state == UpdateState::MergeCompleted) {
930         // Do this inside the same lock. Failures get acknowledged without the
931         // lock, because flock() might have failed.
932         AcknowledgeMergeSuccess(lock.get());
933     } else if (result.state == UpdateState::Cancelled) {
934         if (!device_->IsRecovery() && !RemoveAllUpdateState(lock.get(), before_cancel)) {
935             LOG(ERROR) << "Failed to remove all update state after acknowleding cancelled update.";
936         }
937     }
938     return result;
939 }
940 
CheckMergeState(LockedFile * lock,const std::function<bool ()> & before_cancel)941 auto SnapshotManager::CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel)
942         -> MergeResult {
943     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
944     switch (update_status.state()) {
945         case UpdateState::None:
946         case UpdateState::MergeCompleted:
947             // Harmless races are allowed between two callers of WaitForMerge,
948             // so in both of these cases we just propagate the state.
949             return MergeResult(update_status.state());
950 
951         case UpdateState::Merging:
952         case UpdateState::MergeNeedsReboot:
953         case UpdateState::MergeFailed:
954             // We'll poll each snapshot below. Note that for the NeedsReboot
955             // case, we always poll once to give cleanup another opportunity to
956             // run.
957             break;
958 
959         case UpdateState::Unverified:
960             // This is an edge case. Normally cancelled updates are detected
961             // via the merge poll below, but if we never started a merge, we
962             // need to also check here.
963             if (HandleCancelledUpdate(lock, before_cancel)) {
964                 return MergeResult(UpdateState::Cancelled);
965             }
966             return MergeResult(update_status.state());
967 
968         default:
969             return MergeResult(update_status.state());
970     }
971 
972     std::vector<std::string> snapshots;
973     if (!ListSnapshots(lock, &snapshots)) {
974         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ListSnapshots);
975     }
976 
977     auto other_suffix = device_->GetOtherSlotSuffix();
978 
979     bool cancelled = false;
980     bool merging = false;
981     bool needs_reboot = false;
982     bool wrong_phase = false;
983     MergeFailureCode failure_code = MergeFailureCode::Ok;
984     for (const auto& snapshot : snapshots) {
985         if (android::base::EndsWith(snapshot, other_suffix)) {
986             // This will have triggered an error message in InitiateMerge already.
987             LOG(INFO) << "Skipping merge validation of unexpected snapshot: " << snapshot;
988             continue;
989         }
990 
991         auto result = CheckTargetMergeState(lock, snapshot, update_status);
992         LOG(INFO) << "CheckTargetMergeState for " << snapshot << " returned: " << result.state;
993 
994         switch (result.state) {
995             case UpdateState::MergeFailed:
996                 // Take the first failure code in case other failures compound.
997                 if (failure_code == MergeFailureCode::Ok) {
998                     failure_code = result.failure_code;
999                 }
1000                 break;
1001             case UpdateState::Merging:
1002                 merging = true;
1003                 break;
1004             case UpdateState::MergeNeedsReboot:
1005                 needs_reboot = true;
1006                 break;
1007             case UpdateState::MergeCompleted:
1008                 break;
1009             case UpdateState::Cancelled:
1010                 cancelled = true;
1011                 break;
1012             case UpdateState::None:
1013                 wrong_phase = true;
1014                 break;
1015             default:
1016                 LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": "
1017                            << "\"" << result.state << "\"";
1018                 if (failure_code == MergeFailureCode::Ok) {
1019                     failure_code = MergeFailureCode::UnexpectedMergeState;
1020                 }
1021                 break;
1022         }
1023     }
1024 
1025     if (merging) {
1026         // Note that we handle "Merging" before we handle anything else. We
1027         // want to poll until *nothing* is merging if we can, so everything has
1028         // a chance to get marked as completed or failed.
1029         return MergeResult(UpdateState::Merging);
1030     }
1031     if (failure_code != MergeFailureCode::Ok) {
1032         // Note: since there are many drop-out cases for failure, we acknowledge
1033         // it in WaitForMerge rather than here and elsewhere.
1034         return MergeResult(UpdateState::MergeFailed, failure_code);
1035     }
1036     if (wrong_phase) {
1037         // If we got here, no other partitions are being merged, and nothing
1038         // failed to merge. It's safe to move to the next merge phase.
1039         auto code = MergeSecondPhaseSnapshots(lock);
1040         if (code != MergeFailureCode::Ok) {
1041             return MergeResult(UpdateState::MergeFailed, code);
1042         }
1043         return MergeResult(UpdateState::Merging);
1044     }
1045     if (needs_reboot) {
1046         WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
1047         return MergeResult(UpdateState::MergeNeedsReboot);
1048     }
1049     if (cancelled) {
1050         // This is an edge case, that we handle as correctly as we sensibly can.
1051         // The underlying partition has changed behind update_engine, and we've
1052         // removed the snapshot as a result. The exact state of the update is
1053         // undefined now, but this can only happen on an unlocked device where
1054         // partitions can be flashed without wiping userdata.
1055         return MergeResult(UpdateState::Cancelled);
1056     }
1057     return MergeResult(UpdateState::MergeCompleted);
1058 }
1059 
CheckTargetMergeState(LockedFile * lock,const std::string & name,const SnapshotUpdateStatus & update_status)1060 auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name,
1061                                             const SnapshotUpdateStatus& update_status)
1062         -> MergeResult {
1063     SnapshotStatus snapshot_status;
1064     if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
1065         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ReadStatus);
1066     }
1067 
1068     std::unique_ptr<LpMetadata> current_metadata;
1069 
1070     if (!IsSnapshotDevice(name)) {
1071         if (!current_metadata) {
1072             current_metadata = ReadCurrentMetadata();
1073         }
1074 
1075         if (!current_metadata ||
1076             GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) {
1077             DeleteSnapshot(lock, name);
1078             return MergeResult(UpdateState::Cancelled);
1079         }
1080 
1081         // During a check, we decided the merge was complete, but we were unable to
1082         // collapse the device-mapper stack and perform COW cleanup. If we haven't
1083         // rebooted after this check, the device will still be a snapshot-merge
1084         // target. If we have rebooted, the device will now be a linear target,
1085         // and we can try cleanup again.
1086         if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1087             // NB: It's okay if this fails now, we gave cleanup our best effort.
1088             OnSnapshotMergeComplete(lock, name, snapshot_status);
1089             return MergeResult(UpdateState::MergeCompleted);
1090         }
1091 
1092         LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << name;
1093         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1094     }
1095 
1096     // This check is expensive so it is only enabled for debugging.
1097     DCHECK((current_metadata = ReadCurrentMetadata()) &&
1098            GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated);
1099 
1100     std::string target_type;
1101     DmTargetSnapshot::Status status;
1102     if (!QuerySnapshotStatus(name, &target_type, &status)) {
1103         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
1104     }
1105     if (target_type == "snapshot" &&
1106         DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
1107         update_status.merge_phase() == MergePhase::FIRST_PHASE) {
1108         // The snapshot is not being merged because it's in the wrong phase.
1109         return MergeResult(UpdateState::None);
1110     }
1111     if (target_type != "snapshot-merge") {
1112         // We can get here if we failed to rewrite the target type in
1113         // InitiateMerge(). If we failed to create the target in first-stage
1114         // init, boot would not succeed.
1115         LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type;
1116         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
1117     }
1118 
1119     // These two values are equal when merging is complete.
1120     if (status.sectors_allocated != status.metadata_sectors) {
1121         if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1122             LOG(ERROR) << "Snapshot " << name << " is merging after being marked merge-complete.";
1123             return MergeResult(UpdateState::MergeFailed,
1124                                MergeFailureCode::UnmergedSectorsAfterCompletion);
1125         }
1126         return MergeResult(UpdateState::Merging);
1127     }
1128 
1129     auto code = CheckMergeConsistency(lock, name, snapshot_status);
1130     if (code != MergeFailureCode::Ok) {
1131         return MergeResult(UpdateState::MergeFailed, code);
1132     }
1133 
1134     // Merging is done. First, update the status file to indicate the merge
1135     // is complete. We do this before calling OnSnapshotMergeComplete, even
1136     // though this means the write is potentially wasted work (since in the
1137     // ideal case we'll immediately delete the file).
1138     //
1139     // This makes it simpler to reason about the next reboot: no matter what
1140     // part of cleanup failed, first-stage init won't try to create another
1141     // snapshot device for this partition.
1142     snapshot_status.set_state(SnapshotState::MERGE_COMPLETED);
1143     if (!WriteSnapshotStatus(lock, snapshot_status)) {
1144         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::WriteStatus);
1145     }
1146     if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) {
1147         return MergeResult(UpdateState::MergeNeedsReboot);
1148     }
1149     return MergeResult(UpdateState::MergeCompleted, MergeFailureCode::Ok);
1150 }
1151 
1152 // This returns the backing device, not the dm-user layer.
GetMappedCowDeviceName(const std::string & snapshot,const SnapshotStatus & status)1153 static std::string GetMappedCowDeviceName(const std::string& snapshot,
1154                                           const SnapshotStatus& status) {
1155     // If no partition was created (the COW exists entirely on /data), the
1156     // device-mapper layering is different than if we had a partition.
1157     if (status.cow_partition_size() == 0) {
1158         return GetCowImageDeviceName(snapshot);
1159     }
1160     return GetCowName(snapshot);
1161 }
1162 
CheckMergeConsistency(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1163 MergeFailureCode SnapshotManager::CheckMergeConsistency(LockedFile* lock, const std::string& name,
1164                                                         const SnapshotStatus& status) {
1165     CHECK(lock);
1166 
1167     if (!status.compression_enabled()) {
1168         // Do not try to verify old-style COWs yet.
1169         return MergeFailureCode::Ok;
1170     }
1171 
1172     auto& dm = DeviceMapper::Instance();
1173 
1174     std::string cow_image_name = GetMappedCowDeviceName(name, status);
1175     std::string cow_image_path;
1176     if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_path)) {
1177         LOG(ERROR) << "Failed to get path for cow device: " << cow_image_name;
1178         return MergeFailureCode::GetCowPathConsistencyCheck;
1179     }
1180 
1181     // First pass, count # of ops.
1182     size_t num_ops = 0;
1183     {
1184         unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_CLOEXEC));
1185         if (fd < 0) {
1186             PLOG(ERROR) << "Failed to open " << cow_image_name;
1187             return MergeFailureCode::OpenCowConsistencyCheck;
1188         }
1189 
1190         CowReader reader;
1191         if (!reader.Parse(std::move(fd))) {
1192             LOG(ERROR) << "Failed to parse cow " << cow_image_path;
1193             return MergeFailureCode::ParseCowConsistencyCheck;
1194         }
1195 
1196         for (auto iter = reader.GetOpIter(); !iter->Done(); iter->Next()) {
1197             if (!IsMetadataOp(iter->Get())) {
1198                 num_ops++;
1199             }
1200         }
1201     }
1202 
1203     // Second pass, try as hard as we can to get the actual number of blocks
1204     // the system thinks is merged.
1205     unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_DIRECT | O_SYNC | O_CLOEXEC));
1206     if (fd < 0) {
1207         PLOG(ERROR) << "Failed to open direct " << cow_image_name;
1208         return MergeFailureCode::OpenCowDirectConsistencyCheck;
1209     }
1210 
1211     void* addr;
1212     size_t page_size = getpagesize();
1213     if (posix_memalign(&addr, page_size, page_size) < 0) {
1214         PLOG(ERROR) << "posix_memalign with page size " << page_size;
1215         return MergeFailureCode::MemAlignConsistencyCheck;
1216     }
1217 
1218     // COWs are always at least 2MB, this is guaranteed in snapshot creation.
1219     std::unique_ptr<void, decltype(&::free)> buffer(addr, ::free);
1220     if (!android::base::ReadFully(fd, buffer.get(), page_size)) {
1221         PLOG(ERROR) << "Direct read failed " << cow_image_name;
1222         return MergeFailureCode::DirectReadConsistencyCheck;
1223     }
1224 
1225     auto header = reinterpret_cast<CowHeader*>(buffer.get());
1226     if (header->num_merge_ops != num_ops) {
1227         LOG(ERROR) << "COW consistency check failed, expected " << num_ops << " to be merged, "
1228                    << "but " << header->num_merge_ops << " were actually recorded.";
1229         LOG(ERROR) << "Aborting merge progress for snapshot " << name
1230                    << ", will try again next boot";
1231         return MergeFailureCode::WrongMergeCountConsistencyCheck;
1232     }
1233 
1234     return MergeFailureCode::Ok;
1235 }
1236 
MergeSecondPhaseSnapshots(LockedFile * lock)1237 MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
1238     std::vector<std::string> snapshots;
1239     if (!ListSnapshots(lock, &snapshots)) {
1240         return MergeFailureCode::ListSnapshots;
1241     }
1242 
1243     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1244     CHECK(update_status.state() == UpdateState::Merging);
1245     CHECK(update_status.merge_phase() == MergePhase::FIRST_PHASE);
1246 
1247     update_status.set_merge_phase(MergePhase::SECOND_PHASE);
1248     if (!WriteSnapshotUpdateStatus(lock, update_status)) {
1249         return MergeFailureCode::WriteStatus;
1250     }
1251 
1252     MergeFailureCode result = MergeFailureCode::Ok;
1253     for (const auto& snapshot : snapshots) {
1254         SnapshotStatus snapshot_status;
1255         if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) {
1256             return MergeFailureCode::ReadStatus;
1257         }
1258         if (DecideMergePhase(snapshot_status) != MergePhase::SECOND_PHASE) {
1259             continue;
1260         }
1261         auto code = SwitchSnapshotToMerge(lock, snapshot);
1262         if (code != MergeFailureCode::Ok) {
1263             LOG(ERROR) << "Failed to switch snapshot to a second-phase merge target: " << snapshot;
1264             if (result == MergeFailureCode::Ok) {
1265                 result = code;
1266             }
1267         }
1268     }
1269     return result;
1270 }
1271 
GetSnapshotBootIndicatorPath()1272 std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
1273     return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
1274 }
1275 
GetRollbackIndicatorPath()1276 std::string SnapshotManager::GetRollbackIndicatorPath() {
1277     return metadata_dir_ + "/" + android::base::Basename(kRollbackIndicatorPath);
1278 }
1279 
GetForwardMergeIndicatorPath()1280 std::string SnapshotManager::GetForwardMergeIndicatorPath() {
1281     return metadata_dir_ + "/allow-forward-merge";
1282 }
1283 
GetOldPartitionMetadataPath()1284 std::string SnapshotManager::GetOldPartitionMetadataPath() {
1285     return metadata_dir_ + "/old-partition-metadata";
1286 }
1287 
AcknowledgeMergeSuccess(LockedFile * lock)1288 void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
1289     // It's not possible to remove update state in recovery, so write an
1290     // indicator that cleanup is needed on reboot. If a factory data reset
1291     // was requested, it doesn't matter, everything will get wiped anyway.
1292     // To make testing easier we consider a /data wipe as cleaned up.
1293     if (device_->IsRecovery()) {
1294         WriteUpdateState(lock, UpdateState::MergeCompleted);
1295         return;
1296     }
1297 
1298     RemoveAllUpdateState(lock);
1299 }
1300 
AcknowledgeMergeFailure(MergeFailureCode failure_code)1301 void SnapshotManager::AcknowledgeMergeFailure(MergeFailureCode failure_code) {
1302     // Log first, so worst case, we always have a record of why the calls below
1303     // were being made.
1304     LOG(ERROR) << "Merge could not be completed and will be marked as failed.";
1305 
1306     auto lock = LockExclusive();
1307     if (!lock) return;
1308 
1309     // Since we released the lock in between WaitForMerge and here, it's
1310     // possible (1) the merge successfully completed or (2) was already
1311     // marked as a failure. So make sure to check the state again, and
1312     // only mark as a failure if appropriate.
1313     UpdateState state = ReadUpdateState(lock.get());
1314     if (state != UpdateState::Merging && state != UpdateState::MergeNeedsReboot) {
1315         return;
1316     }
1317 
1318     WriteUpdateState(lock.get(), UpdateState::MergeFailed, failure_code);
1319 }
1320 
OnSnapshotMergeComplete(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1321 bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
1322                                               const SnapshotStatus& status) {
1323     if (IsSnapshotDevice(name)) {
1324         // We are extra-cautious here, to avoid deleting the wrong table.
1325         std::string target_type;
1326         DmTargetSnapshot::Status dm_status;
1327         if (!QuerySnapshotStatus(name, &target_type, &dm_status)) {
1328             return false;
1329         }
1330         if (target_type != "snapshot-merge") {
1331             LOG(ERROR) << "Unexpected target type " << target_type
1332                        << " for snapshot device: " << name;
1333             return false;
1334         }
1335         if (dm_status.sectors_allocated != dm_status.metadata_sectors) {
1336             LOG(ERROR) << "Merge is unexpectedly incomplete for device " << name;
1337             return false;
1338         }
1339         if (!CollapseSnapshotDevice(name, status)) {
1340             LOG(ERROR) << "Unable to collapse snapshot: " << name;
1341             return false;
1342         }
1343         // Note that collapsing is implicitly an Unmap, so we don't need to
1344         // unmap the snapshot.
1345     }
1346 
1347     if (!DeleteSnapshot(lock, name)) {
1348         LOG(ERROR) << "Could not delete snapshot: " << name;
1349         return false;
1350     }
1351     return true;
1352 }
1353 
CollapseSnapshotDevice(const std::string & name,const SnapshotStatus & status)1354 bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
1355                                              const SnapshotStatus& status) {
1356     auto& dm = DeviceMapper::Instance();
1357 
1358     // Verify we have a snapshot-merge device.
1359     DeviceMapper::TargetInfo target;
1360     if (!GetSingleTarget(name, TableQuery::Table, &target)) {
1361         return false;
1362     }
1363     if (DeviceMapper::GetTargetType(target.spec) != "snapshot-merge") {
1364         // This should be impossible, it was checked earlier.
1365         LOG(ERROR) << "Snapshot device has invalid target type: " << name;
1366         return false;
1367     }
1368 
1369     std::string base_device, cow_device;
1370     if (!DmTargetSnapshot::GetDevicesFromParams(target.data, &base_device, &cow_device)) {
1371         LOG(ERROR) << "Could not parse snapshot device " << name << " parameters: " << target.data;
1372         return false;
1373     }
1374 
1375     uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
1376     if (snapshot_sectors * kSectorSize != status.snapshot_size()) {
1377         LOG(ERROR) << "Snapshot " << name
1378                    << " size is not sector aligned: " << status.snapshot_size();
1379         return false;
1380     }
1381 
1382     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1383     // Create a DmTable that is identical to the base device.
1384     CreateLogicalPartitionParams base_device_params{
1385             .block_device = device_->GetSuperDevice(slot),
1386             .metadata_slot = slot,
1387             .partition_name = name,
1388             .partition_opener = &device_->GetPartitionOpener(),
1389     };
1390     DmTable table;
1391     if (!CreateDmTable(base_device_params, &table)) {
1392         LOG(ERROR) << "Could not create a DmTable for partition: " << name;
1393         return false;
1394     }
1395 
1396     if (!dm.LoadTableAndActivate(name, table)) {
1397         return false;
1398     }
1399 
1400     // Attempt to delete the snapshot device if one still exists. Nothing
1401     // should be depending on the device, and device-mapper should have
1402     // flushed remaining I/O. We could in theory replace with dm-zero (or
1403     // re-use the table above), but for now it's better to know why this
1404     // would fail.
1405     if (status.compression_enabled()) {
1406         UnmapDmUserDevice(name);
1407     }
1408     auto base_name = GetBaseDeviceName(name);
1409     if (!DeleteDeviceIfExists(base_name)) {
1410         LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
1411     }
1412 
1413     if (!DeleteDeviceIfExists(GetSourceDeviceName(name), 4000ms)) {
1414         LOG(ERROR) << "Unable to delete source device for snapshot: " << GetSourceDeviceName(name);
1415     }
1416 
1417     return true;
1418 }
1419 
HandleCancelledUpdate(LockedFile * lock,const std::function<bool ()> & before_cancel)1420 bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock,
1421                                             const std::function<bool()>& before_cancel) {
1422     auto slot = GetCurrentSlot();
1423     if (slot == Slot::Unknown) {
1424         return false;
1425     }
1426 
1427     // If all snapshots were reflashed, then cancel the entire update.
1428     if (AreAllSnapshotsCancelled(lock)) {
1429         LOG(WARNING) << "Detected re-flashing, cancelling unverified update.";
1430         return RemoveAllUpdateState(lock, before_cancel);
1431     }
1432 
1433     // If update has been rolled back, then cancel the entire update.
1434     // Client (update_engine) is responsible for doing additional cleanup work on its own states
1435     // when ProcessUpdateState() returns UpdateState::Cancelled.
1436     auto current_slot = GetCurrentSlot();
1437     if (current_slot != Slot::Source) {
1438         LOG(INFO) << "Update state is being processed while booting at " << current_slot
1439                   << " slot, taking no action.";
1440         return false;
1441     }
1442 
1443     // current_slot == Source. Attempt to detect rollbacks.
1444     if (access(GetRollbackIndicatorPath().c_str(), F_OK) != 0) {
1445         // This unverified update is not attempted. Take no action.
1446         PLOG(INFO) << "Rollback indicator not detected. "
1447                    << "Update state is being processed before reboot, taking no action.";
1448         return false;
1449     }
1450 
1451     LOG(WARNING) << "Detected rollback, cancelling unverified update.";
1452     return RemoveAllUpdateState(lock, before_cancel);
1453 }
1454 
PerformInitTransition(InitTransition transition,std::vector<std::string> * snapuserd_argv)1455 bool SnapshotManager::PerformInitTransition(InitTransition transition,
1456                                             std::vector<std::string>* snapuserd_argv) {
1457     LOG(INFO) << "Performing transition for snapuserd.";
1458 
1459     // Don't use EnsuerSnapuserdConnected() because this is called from init,
1460     // and attempting to do so will deadlock.
1461     if (!snapuserd_client_ && transition != InitTransition::SELINUX_DETACH) {
1462         snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
1463         if (!snapuserd_client_) {
1464             LOG(ERROR) << "Unable to connect to snapuserd";
1465             return false;
1466         }
1467     }
1468 
1469     auto& dm = DeviceMapper::Instance();
1470 
1471     auto lock = LockExclusive();
1472     if (!lock) return false;
1473 
1474     std::vector<std::string> snapshots;
1475     if (!ListSnapshots(lock.get(), &snapshots)) {
1476         LOG(ERROR) << "Failed to list snapshots.";
1477         return false;
1478     }
1479 
1480     size_t num_cows = 0;
1481     size_t ok_cows = 0;
1482     for (const auto& snapshot : snapshots) {
1483         std::string user_cow_name = GetDmUserCowName(snapshot);
1484         if (dm.GetState(user_cow_name) == DmDeviceState::INVALID) {
1485             continue;
1486         }
1487 
1488         DeviceMapper::TargetInfo target;
1489         if (!GetSingleTarget(user_cow_name, TableQuery::Table, &target)) {
1490             continue;
1491         }
1492 
1493         auto target_type = DeviceMapper::GetTargetType(target.spec);
1494         if (target_type != "user") {
1495             LOG(ERROR) << "Unexpected target type for " << user_cow_name << ": " << target_type;
1496             continue;
1497         }
1498 
1499         num_cows++;
1500 
1501         SnapshotStatus snapshot_status;
1502         if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
1503             LOG(ERROR) << "Unable to read snapshot status: " << snapshot;
1504             continue;
1505         }
1506 
1507         auto misc_name = user_cow_name;
1508 
1509         DmTable table;
1510         table.Emplace<DmTargetUser>(0, target.spec.length, misc_name);
1511         if (!dm.LoadTableAndActivate(user_cow_name, table)) {
1512             LOG(ERROR) << "Unable to swap tables for " << misc_name;
1513             continue;
1514         }
1515 
1516         std::string source_device;
1517         if (!dm.GetDmDevicePathByName(GetSourceDeviceName(snapshot), &source_device)) {
1518             LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot);
1519             continue;
1520         }
1521 
1522         std::string cow_image_name = GetMappedCowDeviceName(snapshot, snapshot_status);
1523 
1524         std::string cow_image_device;
1525         if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_device)) {
1526             LOG(ERROR) << "Could not get device path for " << cow_image_name;
1527             continue;
1528         }
1529 
1530         // Wait for ueventd to acknowledge and create the control device node.
1531         std::string control_device = "/dev/dm-user/" + misc_name;
1532         if (!WaitForDevice(control_device, 10s)) {
1533             LOG(ERROR) << "dm-user control device no found:  " << misc_name;
1534             continue;
1535         }
1536 
1537         if (transition == InitTransition::SELINUX_DETACH) {
1538             auto message = misc_name + "," + cow_image_device + "," + source_device;
1539             snapuserd_argv->emplace_back(std::move(message));
1540 
1541             // Do not attempt to connect to the new snapuserd yet, it hasn't
1542             // been started. We do however want to wait for the misc device
1543             // to have been created.
1544             ok_cows++;
1545             continue;
1546         }
1547 
1548         uint64_t base_sectors =
1549                 snapuserd_client_->InitDmUserCow(misc_name, cow_image_device, source_device);
1550         if (base_sectors == 0) {
1551             // Unrecoverable as metadata reads from cow device failed
1552             LOG(FATAL) << "Failed to retrieve base_sectors from Snapuserd";
1553             return false;
1554         }
1555 
1556         CHECK(base_sectors <= target.spec.length);
1557 
1558         if (!snapuserd_client_->AttachDmUser(misc_name)) {
1559             // This error is unrecoverable. We cannot proceed because reads to
1560             // the underlying device will fail.
1561             LOG(FATAL) << "Could not initialize snapuserd for " << user_cow_name;
1562             return false;
1563         }
1564 
1565         ok_cows++;
1566     }
1567 
1568     if (ok_cows != num_cows) {
1569         LOG(ERROR) << "Could not transition all snapuserd consumers.";
1570         return false;
1571     }
1572     return true;
1573 }
1574 
ReadCurrentMetadata()1575 std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() {
1576     const auto& opener = device_->GetPartitionOpener();
1577     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1578     auto super_device = device_->GetSuperDevice(slot);
1579     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1580     if (!metadata) {
1581         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1582         return nullptr;
1583     }
1584     return metadata;
1585 }
1586 
GetMetadataPartitionState(const LpMetadata & metadata,const std::string & name)1587 SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState(
1588         const LpMetadata& metadata, const std::string& name) {
1589     auto partition = android::fs_mgr::FindPartition(metadata, name);
1590     if (!partition) return MetadataPartitionState::None;
1591     if (partition->attributes & LP_PARTITION_ATTR_UPDATED) {
1592         return MetadataPartitionState::Updated;
1593     }
1594     return MetadataPartitionState::Flashed;
1595 }
1596 
AreAllSnapshotsCancelled(LockedFile * lock)1597 bool SnapshotManager::AreAllSnapshotsCancelled(LockedFile* lock) {
1598     std::vector<std::string> snapshots;
1599     if (!ListSnapshots(lock, &snapshots)) {
1600         LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed "
1601                      << "after applying an update. Assuming no snapshots.";
1602         // Let HandleCancelledUpdate resets UpdateState.
1603         return true;
1604     }
1605 
1606     std::map<std::string, bool> flashing_status;
1607 
1608     if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1609         LOG(WARNING) << "Failed to determine whether partitions have been flashed. Not"
1610                      << "removing update states.";
1611         return false;
1612     }
1613 
1614     bool all_snapshots_cancelled = std::all_of(flashing_status.begin(), flashing_status.end(),
1615                                                [](const auto& pair) { return pair.second; });
1616 
1617     if (all_snapshots_cancelled) {
1618         LOG(WARNING) << "All partitions are re-flashed after update, removing all update states.";
1619     }
1620     return all_snapshots_cancelled;
1621 }
1622 
GetSnapshotFlashingStatus(LockedFile * lock,const std::vector<std::string> & snapshots,std::map<std::string,bool> * out)1623 bool SnapshotManager::GetSnapshotFlashingStatus(LockedFile* lock,
1624                                                 const std::vector<std::string>& snapshots,
1625                                                 std::map<std::string, bool>* out) {
1626     CHECK(lock);
1627 
1628     auto source_slot_suffix = ReadUpdateSourceSlotSuffix();
1629     if (source_slot_suffix.empty()) {
1630         return false;
1631     }
1632     uint32_t source_slot = SlotNumberForSlotSuffix(source_slot_suffix);
1633     uint32_t target_slot = (source_slot == 0) ? 1 : 0;
1634 
1635     // Attempt to detect re-flashing on each partition.
1636     // - If all partitions are re-flashed, we can proceed to cancel the whole update.
1637     // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are
1638     //   deleted. Caller is responsible for merging the rest of the snapshots.
1639     // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots.
1640     //
1641     // Note that we use target slot metadata, since if an OTA has been applied
1642     // to the target slot, we can detect the UPDATED flag. Any kind of flash
1643     // operation against dynamic partitions ensures that all copies of the
1644     // metadata are in sync, so flashing all partitions on the source slot will
1645     // remove the UPDATED flag on the target slot as well.
1646     const auto& opener = device_->GetPartitionOpener();
1647     auto super_device = device_->GetSuperDevice(target_slot);
1648     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, target_slot);
1649     if (!metadata) {
1650         return false;
1651     }
1652 
1653     for (const auto& snapshot_name : snapshots) {
1654         if (GetMetadataPartitionState(*metadata, snapshot_name) ==
1655             MetadataPartitionState::Updated) {
1656             out->emplace(snapshot_name, false);
1657         } else {
1658             // Delete snapshots for partitions that are re-flashed after the update.
1659             LOG(WARNING) << "Detected re-flashing of partition " << snapshot_name << ".";
1660             out->emplace(snapshot_name, true);
1661         }
1662     }
1663     return true;
1664 }
1665 
RemoveAllSnapshots(LockedFile * lock)1666 bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) {
1667     std::vector<std::string> snapshots;
1668     if (!ListSnapshots(lock, &snapshots)) {
1669         LOG(ERROR) << "Could not list snapshots";
1670         return false;
1671     }
1672 
1673     std::map<std::string, bool> flashing_status;
1674     if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1675         LOG(WARNING) << "Failed to get flashing status";
1676     }
1677 
1678     auto current_slot = GetCurrentSlot();
1679     bool ok = true;
1680     bool has_mapped_cow_images = false;
1681     for (const auto& name : snapshots) {
1682         // If booting off source slot, it is okay to unmap and delete all the snapshots.
1683         // If boot indicator is missing, update state is None or Initiated, so
1684         //   it is also okay to unmap and delete all the snapshots.
1685         // If booting off target slot,
1686         //  - should not unmap because:
1687         //    - In Android mode, snapshots are not mapped, but
1688         //      filesystems are mounting off dm-linear targets directly.
1689         //    - In recovery mode, assume nothing is mapped, so it is optional to unmap.
1690         //  - If partition is flashed or unknown, it is okay to delete snapshots.
1691         //    Otherwise (UPDATED flag), only delete snapshots if they are not mapped
1692         //    as dm-snapshot (for example, after merge completes).
1693         bool should_unmap = current_slot != Slot::Target;
1694         bool should_delete = ShouldDeleteSnapshot(flashing_status, current_slot, name);
1695         if (should_unmap && android::base::EndsWith(name, device_->GetSlotSuffix())) {
1696             // Something very unexpected has happened - we want to unmap this
1697             // snapshot, but it's on the wrong slot. We can't unmap an active
1698             // partition. If this is not really a snapshot, skip the unmap
1699             // step.
1700             auto& dm = DeviceMapper::Instance();
1701             if (dm.GetState(name) == DmDeviceState::INVALID || !IsSnapshotDevice(name)) {
1702                 LOG(ERROR) << "Detected snapshot " << name << " on " << current_slot << " slot"
1703                            << " for source partition; removing without unmap.";
1704                 should_unmap = false;
1705             }
1706         }
1707 
1708         bool partition_ok = true;
1709         if (should_unmap && !UnmapPartitionWithSnapshot(lock, name)) {
1710             partition_ok = false;
1711         }
1712         if (partition_ok && should_delete && !DeleteSnapshot(lock, name)) {
1713             partition_ok = false;
1714         }
1715 
1716         if (!partition_ok) {
1717             // Remember whether or not we were able to unmap the cow image.
1718             auto cow_image_device = GetCowImageDeviceName(name);
1719             has_mapped_cow_images |=
1720                     (EnsureImageManager() && images_->IsImageMapped(cow_image_device));
1721 
1722             ok = false;
1723         }
1724     }
1725 
1726     if (ok || !has_mapped_cow_images) {
1727         // Delete any image artifacts as a precaution, in case an update is
1728         // being cancelled due to some corrupted state in an lp_metadata file.
1729         // Note that we do not do this if some cow images are still mapped,
1730         // since we must not remove backing storage if it's in use.
1731         if (!EnsureImageManager() || !images_->RemoveAllImages()) {
1732             LOG(ERROR) << "Could not remove all snapshot artifacts";
1733             return false;
1734         }
1735     }
1736     return ok;
1737 }
1738 
1739 // See comments in RemoveAllSnapshots().
ShouldDeleteSnapshot(const std::map<std::string,bool> & flashing_status,Slot current_slot,const std::string & name)1740 bool SnapshotManager::ShouldDeleteSnapshot(const std::map<std::string, bool>& flashing_status,
1741                                            Slot current_slot, const std::string& name) {
1742     if (current_slot != Slot::Target) {
1743         return true;
1744     }
1745     auto it = flashing_status.find(name);
1746     if (it == flashing_status.end()) {
1747         LOG(WARNING) << "Can't determine flashing status for " << name;
1748         return true;
1749     }
1750     if (it->second) {
1751         // partition flashed, okay to delete obsolete snapshots
1752         return true;
1753     }
1754     return !IsSnapshotDevice(name);
1755 }
1756 
GetUpdateState(double * progress)1757 UpdateState SnapshotManager::GetUpdateState(double* progress) {
1758     // If we've never started an update, the state file won't exist.
1759     auto state_file = GetStateFilePath();
1760     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
1761         return UpdateState::None;
1762     }
1763 
1764     auto lock = LockShared();
1765     if (!lock) {
1766         return UpdateState::None;
1767     }
1768 
1769     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
1770     auto state = update_status.state();
1771     if (progress == nullptr) {
1772         return state;
1773     }
1774 
1775     if (state == UpdateState::MergeCompleted) {
1776         *progress = 100.0;
1777         return state;
1778     }
1779 
1780     *progress = 0.0;
1781     if (state != UpdateState::Merging) {
1782         return state;
1783     }
1784 
1785     // Sum all the snapshot states as if the system consists of a single huge
1786     // snapshots device, then compute the merge completion percentage of that
1787     // device.
1788     std::vector<std::string> snapshots;
1789     if (!ListSnapshots(lock.get(), &snapshots)) {
1790         LOG(ERROR) << "Could not list snapshots";
1791         return state;
1792     }
1793 
1794     DmTargetSnapshot::Status fake_snapshots_status = {};
1795     for (const auto& snapshot : snapshots) {
1796         DmTargetSnapshot::Status current_status;
1797 
1798         if (!IsSnapshotDevice(snapshot)) continue;
1799         if (!QuerySnapshotStatus(snapshot, nullptr, &current_status)) continue;
1800 
1801         fake_snapshots_status.sectors_allocated += current_status.sectors_allocated;
1802         fake_snapshots_status.total_sectors += current_status.total_sectors;
1803         fake_snapshots_status.metadata_sectors += current_status.metadata_sectors;
1804     }
1805 
1806     *progress = DmTargetSnapshot::MergePercent(fake_snapshots_status,
1807                                                update_status.sectors_allocated());
1808 
1809     return state;
1810 }
1811 
UpdateUsesCompression()1812 bool SnapshotManager::UpdateUsesCompression() {
1813     auto lock = LockShared();
1814     if (!lock) return false;
1815     return UpdateUsesCompression(lock.get());
1816 }
1817 
UpdateUsesCompression(LockedFile * lock)1818 bool SnapshotManager::UpdateUsesCompression(LockedFile* lock) {
1819     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1820     return update_status.compression_enabled();
1821 }
1822 
ListSnapshots(LockedFile * lock,std::vector<std::string> * snapshots,const std::string & suffix)1823 bool SnapshotManager::ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots,
1824                                     const std::string& suffix) {
1825     CHECK(lock);
1826 
1827     auto dir_path = metadata_dir_ + "/snapshots"s;
1828     std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(dir_path.c_str()), closedir);
1829     if (!dir) {
1830         PLOG(ERROR) << "opendir failed: " << dir_path;
1831         return false;
1832     }
1833 
1834     struct dirent* dp;
1835     while ((dp = readdir(dir.get())) != nullptr) {
1836         if (dp->d_type != DT_REG) continue;
1837 
1838         std::string name(dp->d_name);
1839         if (!suffix.empty() && !android::base::EndsWith(name, suffix)) {
1840             continue;
1841         }
1842         snapshots->emplace_back(std::move(name));
1843     }
1844     return true;
1845 }
1846 
IsSnapshotManagerNeeded()1847 bool SnapshotManager::IsSnapshotManagerNeeded() {
1848     return access(kBootIndicatorPath, F_OK) == 0;
1849 }
1850 
GetGlobalRollbackIndicatorPath()1851 std::string SnapshotManager::GetGlobalRollbackIndicatorPath() {
1852     return kRollbackIndicatorPath;
1853 }
1854 
NeedSnapshotsInFirstStageMount()1855 bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
1856     // If we fail to read, we'll wind up using CreateLogicalPartitions, which
1857     // will create devices that look like the old slot, except with extra
1858     // content at the end of each device. This will confuse dm-verity, and
1859     // ultimately we'll fail to boot. Why not make it a fatal error and have
1860     // the reason be clearer? Because the indicator file still exists, and
1861     // if this was FATAL, reverting to the old slot would be broken.
1862     auto slot = GetCurrentSlot();
1863 
1864     if (slot != Slot::Target) {
1865         if (slot == Slot::Source) {
1866             // Device is rebooting into the original slot, so mark this as a
1867             // rollback.
1868             auto path = GetRollbackIndicatorPath();
1869             if (!android::base::WriteStringToFile("1", path)) {
1870                 PLOG(ERROR) << "Unable to write rollback indicator: " << path;
1871             } else {
1872                 LOG(INFO) << "Rollback detected, writing rollback indicator to " << path;
1873             }
1874         }
1875         LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
1876         return false;
1877     }
1878 
1879     // If we can't read the update state, it's unlikely anything else will
1880     // succeed, so this is a fatal error. We'll eventually exhaust boot
1881     // attempts and revert to the old slot.
1882     auto lock = LockShared();
1883     if (!lock) {
1884         LOG(FATAL) << "Could not read update state to determine snapshot status";
1885         return false;
1886     }
1887     switch (ReadUpdateState(lock.get())) {
1888         case UpdateState::Unverified:
1889         case UpdateState::Merging:
1890         case UpdateState::MergeFailed:
1891             return true;
1892         default:
1893             return false;
1894     }
1895 }
1896 
CreateLogicalAndSnapshotPartitions(const std::string & super_device,const std::chrono::milliseconds & timeout_ms)1897 bool SnapshotManager::CreateLogicalAndSnapshotPartitions(
1898         const std::string& super_device, const std::chrono::milliseconds& timeout_ms) {
1899     LOG(INFO) << "Creating logical partitions with snapshots as needed";
1900 
1901     auto lock = LockExclusive();
1902     if (!lock) return false;
1903 
1904     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1905     return MapAllPartitions(lock.get(), super_device, slot, timeout_ms);
1906 }
1907 
MapAllPartitions(LockedFile * lock,const std::string & super_device,uint32_t slot,const std::chrono::milliseconds & timeout_ms)1908 bool SnapshotManager::MapAllPartitions(LockedFile* lock, const std::string& super_device,
1909                                        uint32_t slot, const std::chrono::milliseconds& timeout_ms) {
1910     const auto& opener = device_->GetPartitionOpener();
1911     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1912     if (!metadata) {
1913         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1914         return false;
1915     }
1916 
1917     if (!EnsureImageManager()) {
1918         return false;
1919     }
1920 
1921     for (const auto& partition : metadata->partitions) {
1922         if (GetPartitionGroupName(metadata->groups[partition.group_index]) == kCowGroupName) {
1923             LOG(INFO) << "Skip mapping partition " << GetPartitionName(partition) << " in group "
1924                       << kCowGroupName;
1925             continue;
1926         }
1927 
1928         CreateLogicalPartitionParams params = {
1929                 .block_device = super_device,
1930                 .metadata = metadata.get(),
1931                 .partition = &partition,
1932                 .partition_opener = &opener,
1933                 .timeout_ms = timeout_ms,
1934         };
1935         if (!MapPartitionWithSnapshot(lock, std::move(params), SnapshotContext::Mount, nullptr)) {
1936             return false;
1937         }
1938     }
1939 
1940     LOG(INFO) << "Created logical partitions with snapshot.";
1941     return true;
1942 }
1943 
GetRemainingTime(const std::chrono::milliseconds & timeout,const std::chrono::time_point<std::chrono::steady_clock> & begin)1944 static std::chrono::milliseconds GetRemainingTime(
1945         const std::chrono::milliseconds& timeout,
1946         const std::chrono::time_point<std::chrono::steady_clock>& begin) {
1947     // If no timeout is specified, execute all commands without specifying any timeout.
1948     if (timeout.count() == 0) return std::chrono::milliseconds(0);
1949     auto passed_time = std::chrono::steady_clock::now() - begin;
1950     auto remaining_time = timeout - duration_cast<std::chrono::milliseconds>(passed_time);
1951     if (remaining_time.count() <= 0) {
1952         LOG(ERROR) << "MapPartitionWithSnapshot has reached timeout " << timeout.count() << "ms ("
1953                    << remaining_time.count() << "ms remaining)";
1954         // Return min() instead of remaining_time here because 0 is treated as a special value for
1955         // no timeout, where the rest of the commands will still be executed.
1956         return std::chrono::milliseconds::min();
1957     }
1958     return remaining_time;
1959 }
1960 
MapPartitionWithSnapshot(LockedFile * lock,CreateLogicalPartitionParams params,SnapshotContext context,SnapshotPaths * paths)1961 bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock,
1962                                                CreateLogicalPartitionParams params,
1963                                                SnapshotContext context, SnapshotPaths* paths) {
1964     auto begin = std::chrono::steady_clock::now();
1965 
1966     CHECK(lock);
1967 
1968     if (params.GetPartitionName() != params.GetDeviceName()) {
1969         LOG(ERROR) << "Mapping snapshot with a different name is unsupported: partition_name = "
1970                    << params.GetPartitionName() << ", device_name = " << params.GetDeviceName();
1971         return false;
1972     }
1973 
1974     // Fill out fields in CreateLogicalPartitionParams so that we have more information (e.g. by
1975     // reading super partition metadata).
1976     CreateLogicalPartitionParams::OwnedData params_owned_data;
1977     if (!params.InitDefaults(&params_owned_data)) {
1978         return false;
1979     }
1980 
1981     if (!params.partition->num_extents) {
1982         LOG(INFO) << "Skipping zero-length logical partition: " << params.GetPartitionName();
1983         return true;  // leave path empty to indicate that nothing is mapped.
1984     }
1985 
1986     // Determine if there is a live snapshot for the SnapshotStatus of the partition; i.e. if the
1987     // partition still has a snapshot that needs to be mapped.  If no live snapshot or merge
1988     // completed, live_snapshot_status is set to nullopt.
1989     std::optional<SnapshotStatus> live_snapshot_status;
1990     do {
1991         if (!(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
1992             LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
1993                       << params.GetPartitionName();
1994             break;
1995         }
1996         auto file_path = GetSnapshotStatusFilePath(params.GetPartitionName());
1997         if (access(file_path.c_str(), F_OK) != 0) {
1998             if (errno != ENOENT) {
1999                 PLOG(INFO) << "Can't map snapshot for " << params.GetPartitionName()
2000                            << ": Can't access " << file_path;
2001                 return false;
2002             }
2003             break;
2004         }
2005         live_snapshot_status = std::make_optional<SnapshotStatus>();
2006         if (!ReadSnapshotStatus(lock, params.GetPartitionName(), &*live_snapshot_status)) {
2007             return false;
2008         }
2009         // No live snapshot if merge is completed.
2010         if (live_snapshot_status->state() == SnapshotState::MERGE_COMPLETED) {
2011             live_snapshot_status.reset();
2012         }
2013 
2014         if (live_snapshot_status->state() == SnapshotState::NONE ||
2015             live_snapshot_status->cow_partition_size() + live_snapshot_status->cow_file_size() ==
2016                     0) {
2017             LOG(WARNING) << "Snapshot status for " << params.GetPartitionName()
2018                          << " is invalid, ignoring: state = "
2019                          << SnapshotState_Name(live_snapshot_status->state())
2020                          << ", cow_partition_size = " << live_snapshot_status->cow_partition_size()
2021                          << ", cow_file_size = " << live_snapshot_status->cow_file_size();
2022             live_snapshot_status.reset();
2023         }
2024     } while (0);
2025 
2026     if (live_snapshot_status.has_value()) {
2027         // dm-snapshot requires the base device to be writable.
2028         params.force_writable = true;
2029         // Map the base device with a different name to avoid collision.
2030         params.device_name = GetBaseDeviceName(params.GetPartitionName());
2031     }
2032 
2033     AutoDeviceList created_devices;
2034 
2035     // Create the base device for the snapshot, or if there is no snapshot, the
2036     // device itself. This device consists of the real blocks in the super
2037     // partition that this logical partition occupies.
2038     auto& dm = DeviceMapper::Instance();
2039     std::string base_path;
2040     if (!CreateLogicalPartition(params, &base_path)) {
2041         LOG(ERROR) << "Could not create logical partition " << params.GetPartitionName()
2042                    << " as device " << params.GetDeviceName();
2043         return false;
2044     }
2045     created_devices.EmplaceBack<AutoUnmapDevice>(&dm, params.GetDeviceName());
2046 
2047     if (paths) {
2048         paths->target_device = base_path;
2049     }
2050 
2051     if (!live_snapshot_status.has_value()) {
2052         created_devices.Release();
2053         return true;
2054     }
2055 
2056     // We don't have ueventd in first-stage init, so use device major:minor
2057     // strings instead.
2058     std::string base_device;
2059     if (!dm.GetDeviceString(params.GetDeviceName(), &base_device)) {
2060         LOG(ERROR) << "Could not determine major/minor for: " << params.GetDeviceName();
2061         return false;
2062     }
2063 
2064     auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2065     if (remaining_time.count() < 0) return false;
2066 
2067     std::string cow_name;
2068     CreateLogicalPartitionParams cow_params = params;
2069     cow_params.timeout_ms = remaining_time;
2070     if (!MapCowDevices(lock, cow_params, *live_snapshot_status, &created_devices, &cow_name)) {
2071         return false;
2072     }
2073     std::string cow_device;
2074     if (!GetMappedImageDeviceStringOrPath(cow_name, &cow_device)) {
2075         LOG(ERROR) << "Could not determine major/minor for: " << cow_name;
2076         return false;
2077     }
2078     if (paths) {
2079         paths->cow_device_name = cow_name;
2080     }
2081 
2082     remaining_time = GetRemainingTime(params.timeout_ms, begin);
2083     if (remaining_time.count() < 0) return false;
2084 
2085     if (context == SnapshotContext::Update && live_snapshot_status->compression_enabled()) {
2086         // Stop here, we can't run dm-user yet, the COW isn't built.
2087         created_devices.Release();
2088         return true;
2089     }
2090 
2091     if (live_snapshot_status->compression_enabled()) {
2092         // Get the source device (eg the view of the partition from before it was resized).
2093         std::string source_device_path;
2094         if (!MapSourceDevice(lock, params.GetPartitionName(), remaining_time,
2095                              &source_device_path)) {
2096             LOG(ERROR) << "Could not map source device for: " << cow_name;
2097             return false;
2098         }
2099 
2100         auto source_device = GetSourceDeviceName(params.GetPartitionName());
2101         created_devices.EmplaceBack<AutoUnmapDevice>(&dm, source_device);
2102 
2103         if (!WaitForDevice(source_device_path, remaining_time)) {
2104             return false;
2105         }
2106 
2107         std::string cow_path;
2108         if (!GetMappedImageDevicePath(cow_name, &cow_path)) {
2109             LOG(ERROR) << "Could not determine path for: " << cow_name;
2110             return false;
2111         }
2112         if (!WaitForDevice(cow_path, remaining_time)) {
2113             return false;
2114         }
2115 
2116         auto name = GetDmUserCowName(params.GetPartitionName());
2117 
2118         std::string new_cow_device;
2119         if (!MapDmUserCow(lock, name, cow_path, source_device_path, remaining_time,
2120                           &new_cow_device)) {
2121             LOG(ERROR) << "Could not map dm-user device for partition "
2122                        << params.GetPartitionName();
2123             return false;
2124         }
2125         created_devices.EmplaceBack<AutoUnmapDevice>(&dm, name);
2126 
2127         remaining_time = GetRemainingTime(params.timeout_ms, begin);
2128         if (remaining_time.count() < 0) return false;
2129 
2130         cow_device = new_cow_device;
2131     }
2132 
2133     std::string path;
2134     if (!MapSnapshot(lock, params.GetPartitionName(), base_device, cow_device, remaining_time,
2135                      &path)) {
2136         LOG(ERROR) << "Could not map snapshot for partition: " << params.GetPartitionName();
2137         return false;
2138     }
2139     // No need to add params.GetPartitionName() to created_devices since it is immediately released.
2140 
2141     if (paths) {
2142         paths->snapshot_device = path;
2143     }
2144 
2145     created_devices.Release();
2146 
2147     LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at " << path;
2148     return true;
2149 }
2150 
UnmapPartitionWithSnapshot(LockedFile * lock,const std::string & target_partition_name)2151 bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock,
2152                                                  const std::string& target_partition_name) {
2153     CHECK(lock);
2154 
2155     if (!UnmapSnapshot(lock, target_partition_name)) {
2156         return false;
2157     }
2158 
2159     if (!UnmapCowDevices(lock, target_partition_name)) {
2160         return false;
2161     }
2162 
2163     auto base_name = GetBaseDeviceName(target_partition_name);
2164     if (!DeleteDeviceIfExists(base_name)) {
2165         LOG(ERROR) << "Cannot delete base device: " << base_name;
2166         return false;
2167     }
2168 
2169     auto source_name = GetSourceDeviceName(target_partition_name);
2170     if (!DeleteDeviceIfExists(source_name)) {
2171         LOG(ERROR) << "Cannot delete source device: " << source_name;
2172         return false;
2173     }
2174 
2175     LOG(INFO) << "Successfully unmapped snapshot " << target_partition_name;
2176 
2177     return true;
2178 }
2179 
MapCowDevices(LockedFile * lock,const CreateLogicalPartitionParams & params,const SnapshotStatus & snapshot_status,AutoDeviceList * created_devices,std::string * cow_name)2180 bool SnapshotManager::MapCowDevices(LockedFile* lock, const CreateLogicalPartitionParams& params,
2181                                     const SnapshotStatus& snapshot_status,
2182                                     AutoDeviceList* created_devices, std::string* cow_name) {
2183     CHECK(lock);
2184     CHECK(snapshot_status.cow_partition_size() + snapshot_status.cow_file_size() > 0);
2185     auto begin = std::chrono::steady_clock::now();
2186 
2187     std::string partition_name = params.GetPartitionName();
2188     std::string cow_image_name = GetCowImageDeviceName(partition_name);
2189     *cow_name = GetCowName(partition_name);
2190 
2191     auto& dm = DeviceMapper::Instance();
2192 
2193     // Map COW image if necessary.
2194     if (snapshot_status.cow_file_size() > 0) {
2195         if (!EnsureImageManager()) return false;
2196         auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2197         if (remaining_time.count() < 0) return false;
2198 
2199         if (!MapCowImage(partition_name, remaining_time).has_value()) {
2200             LOG(ERROR) << "Could not map cow image for partition: " << partition_name;
2201             return false;
2202         }
2203         created_devices->EmplaceBack<AutoUnmapImage>(images_.get(), cow_image_name);
2204 
2205         // If no COW partition exists, just return the image alone.
2206         if (snapshot_status.cow_partition_size() == 0) {
2207             *cow_name = std::move(cow_image_name);
2208             LOG(INFO) << "Mapped COW image for " << partition_name << " at " << *cow_name;
2209             return true;
2210         }
2211     }
2212 
2213     auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2214     if (remaining_time.count() < 0) return false;
2215 
2216     CHECK(snapshot_status.cow_partition_size() > 0);
2217 
2218     // Create the DmTable for the COW device. It is the DmTable of the COW partition plus
2219     // COW image device as the last extent.
2220     CreateLogicalPartitionParams cow_partition_params = params;
2221     cow_partition_params.partition = nullptr;
2222     cow_partition_params.partition_name = *cow_name;
2223     cow_partition_params.device_name.clear();
2224     DmTable table;
2225     if (!CreateDmTable(cow_partition_params, &table)) {
2226         return false;
2227     }
2228     // If the COW image exists, append it as the last extent.
2229     if (snapshot_status.cow_file_size() > 0) {
2230         std::string cow_image_device;
2231         if (!GetMappedImageDeviceStringOrPath(cow_image_name, &cow_image_device)) {
2232             LOG(ERROR) << "Cannot determine major/minor for: " << cow_image_name;
2233             return false;
2234         }
2235         auto cow_partition_sectors = snapshot_status.cow_partition_size() / kSectorSize;
2236         auto cow_image_sectors = snapshot_status.cow_file_size() / kSectorSize;
2237         table.Emplace<DmTargetLinear>(cow_partition_sectors, cow_image_sectors, cow_image_device,
2238                                       0);
2239     }
2240 
2241     // We have created the DmTable now. Map it.
2242     std::string cow_path;
2243     if (!dm.CreateDevice(*cow_name, table, &cow_path, remaining_time)) {
2244         LOG(ERROR) << "Could not create COW device: " << *cow_name;
2245         return false;
2246     }
2247     created_devices->EmplaceBack<AutoUnmapDevice>(&dm, *cow_name);
2248     LOG(INFO) << "Mapped COW device for " << params.GetPartitionName() << " at " << cow_path;
2249     return true;
2250 }
2251 
UnmapCowDevices(LockedFile * lock,const std::string & name)2252 bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) {
2253     CHECK(lock);
2254     if (!EnsureImageManager()) return false;
2255 
2256     if (UpdateUsesCompression(lock) && !UnmapDmUserDevice(name)) {
2257         return false;
2258     }
2259 
2260     if (!DeleteDeviceIfExists(GetCowName(name), 4000ms)) {
2261         LOG(ERROR) << "Cannot unmap: " << GetCowName(name);
2262         return false;
2263     }
2264 
2265     std::string cow_image_name = GetCowImageDeviceName(name);
2266     if (!images_->UnmapImageIfExists(cow_image_name)) {
2267         LOG(ERROR) << "Cannot unmap image " << cow_image_name;
2268         return false;
2269     }
2270     return true;
2271 }
2272 
UnmapDmUserDevice(const std::string & snapshot_name)2273 bool SnapshotManager::UnmapDmUserDevice(const std::string& snapshot_name) {
2274     auto& dm = DeviceMapper::Instance();
2275 
2276     auto dm_user_name = GetDmUserCowName(snapshot_name);
2277     if (dm.GetState(dm_user_name) == DmDeviceState::INVALID) {
2278         return true;
2279     }
2280 
2281     if (!DeleteDeviceIfExists(dm_user_name)) {
2282         LOG(ERROR) << "Cannot unmap " << dm_user_name;
2283         return false;
2284     }
2285 
2286     if (EnsureSnapuserdConnected()) {
2287         if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
2288             LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
2289             return false;
2290         }
2291     }
2292 
2293     // Ensure the control device is gone so we don't run into ABA problems.
2294     auto control_device = "/dev/dm-user/" + dm_user_name;
2295     if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
2296         LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
2297         return false;
2298     }
2299     return true;
2300 }
2301 
MapAllSnapshots(const std::chrono::milliseconds & timeout_ms)2302 bool SnapshotManager::MapAllSnapshots(const std::chrono::milliseconds& timeout_ms) {
2303     auto lock = LockExclusive();
2304     if (!lock) return false;
2305 
2306     auto state = ReadUpdateState(lock.get());
2307     if (state == UpdateState::Unverified) {
2308         if (GetCurrentSlot() == Slot::Target) {
2309             LOG(ERROR) << "Cannot call MapAllSnapshots when booting from the target slot.";
2310             return false;
2311         }
2312     } else if (state != UpdateState::Initiated) {
2313         LOG(ERROR) << "Cannot call MapAllSnapshots from update state: " << state;
2314         return false;
2315     }
2316 
2317     std::vector<std::string> snapshots;
2318     if (!ListSnapshots(lock.get(), &snapshots)) {
2319         return false;
2320     }
2321 
2322     const auto& opener = device_->GetPartitionOpener();
2323     auto slot_suffix = device_->GetOtherSlotSuffix();
2324     auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
2325     auto super_device = device_->GetSuperDevice(slot_number);
2326     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot_number);
2327     if (!metadata) {
2328         LOG(ERROR) << "MapAllSnapshots could not read dynamic partition metadata for device: "
2329                    << super_device;
2330         return false;
2331     }
2332 
2333     for (const auto& snapshot : snapshots) {
2334         if (!UnmapPartitionWithSnapshot(lock.get(), snapshot)) {
2335             LOG(ERROR) << "MapAllSnapshots could not unmap snapshot: " << snapshot;
2336             return false;
2337         }
2338 
2339         CreateLogicalPartitionParams params = {
2340                 .block_device = super_device,
2341                 .metadata = metadata.get(),
2342                 .partition_name = snapshot,
2343                 .partition_opener = &opener,
2344                 .timeout_ms = timeout_ms,
2345         };
2346         if (!MapPartitionWithSnapshot(lock.get(), std::move(params), SnapshotContext::Mount,
2347                                       nullptr)) {
2348             LOG(ERROR) << "MapAllSnapshots failed to map: " << snapshot;
2349             return false;
2350         }
2351     }
2352 
2353     LOG(INFO) << "MapAllSnapshots succeeded.";
2354     return true;
2355 }
2356 
UnmapAllSnapshots()2357 bool SnapshotManager::UnmapAllSnapshots() {
2358     auto lock = LockExclusive();
2359     if (!lock) return false;
2360 
2361     return UnmapAllSnapshots(lock.get());
2362 }
2363 
UnmapAllSnapshots(LockedFile * lock)2364 bool SnapshotManager::UnmapAllSnapshots(LockedFile* lock) {
2365     std::vector<std::string> snapshots;
2366     if (!ListSnapshots(lock, &snapshots)) {
2367         return false;
2368     }
2369 
2370     for (const auto& snapshot : snapshots) {
2371         if (!UnmapPartitionWithSnapshot(lock, snapshot)) {
2372             LOG(ERROR) << "Failed to unmap snapshot: " << snapshot;
2373             return false;
2374         }
2375     }
2376 
2377     // Terminate the daemon and release the snapuserd_client_ object.
2378     // If we need to re-connect with the daemon, EnsureSnapuserdConnected()
2379     // will re-create the object and establish the socket connection.
2380     if (snapuserd_client_) {
2381         LOG(INFO) << "Shutdown snapuserd daemon";
2382         snapuserd_client_->DetachSnapuserd();
2383         snapuserd_client_->CloseConnection();
2384         snapuserd_client_ = nullptr;
2385     }
2386 
2387     return true;
2388 }
2389 
OpenFile(const std::string & file,int lock_flags)2390 auto SnapshotManager::OpenFile(const std::string& file, int lock_flags)
2391         -> std::unique_ptr<LockedFile> {
2392     unique_fd fd(open(file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2393     if (fd < 0) {
2394         PLOG(ERROR) << "Open failed: " << file;
2395         return nullptr;
2396     }
2397     if (lock_flags != 0 && TEMP_FAILURE_RETRY(flock(fd, lock_flags)) < 0) {
2398         PLOG(ERROR) << "Acquire flock failed: " << file;
2399         return nullptr;
2400     }
2401     // For simplicity, we want to CHECK that lock_mode == LOCK_EX, in some
2402     // calls, so strip extra flags.
2403     int lock_mode = lock_flags & (LOCK_EX | LOCK_SH);
2404     return std::make_unique<LockedFile>(file, std::move(fd), lock_mode);
2405 }
2406 
~LockedFile()2407 SnapshotManager::LockedFile::~LockedFile() {
2408     if (TEMP_FAILURE_RETRY(flock(fd_, LOCK_UN)) < 0) {
2409         PLOG(ERROR) << "Failed to unlock file: " << path_;
2410     }
2411 }
2412 
GetStateFilePath() const2413 std::string SnapshotManager::GetStateFilePath() const {
2414     return metadata_dir_ + "/state"s;
2415 }
2416 
GetMergeStateFilePath() const2417 std::string SnapshotManager::GetMergeStateFilePath() const {
2418     return metadata_dir_ + "/merge_state"s;
2419 }
2420 
GetLockPath() const2421 std::string SnapshotManager::GetLockPath() const {
2422     return metadata_dir_;
2423 }
2424 
OpenLock(int lock_flags)2425 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenLock(int lock_flags) {
2426     auto lock_file = GetLockPath();
2427     return OpenFile(lock_file, lock_flags);
2428 }
2429 
LockShared()2430 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockShared() {
2431     return OpenLock(LOCK_SH);
2432 }
2433 
LockExclusive()2434 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockExclusive() {
2435     return OpenLock(LOCK_EX);
2436 }
2437 
UpdateStateFromString(const std::string & contents)2438 static UpdateState UpdateStateFromString(const std::string& contents) {
2439     if (contents.empty() || contents == "none") {
2440         return UpdateState::None;
2441     } else if (contents == "initiated") {
2442         return UpdateState::Initiated;
2443     } else if (contents == "unverified") {
2444         return UpdateState::Unverified;
2445     } else if (contents == "merging") {
2446         return UpdateState::Merging;
2447     } else if (contents == "merge-completed") {
2448         return UpdateState::MergeCompleted;
2449     } else if (contents == "merge-needs-reboot") {
2450         return UpdateState::MergeNeedsReboot;
2451     } else if (contents == "merge-failed") {
2452         return UpdateState::MergeFailed;
2453     } else if (contents == "cancelled") {
2454         return UpdateState::Cancelled;
2455     } else {
2456         LOG(ERROR) << "Unknown merge state in update state file: \"" << contents << "\"";
2457         return UpdateState::None;
2458     }
2459 }
2460 
operator <<(std::ostream & os,UpdateState state)2461 std::ostream& operator<<(std::ostream& os, UpdateState state) {
2462     switch (state) {
2463         case UpdateState::None:
2464             return os << "none";
2465         case UpdateState::Initiated:
2466             return os << "initiated";
2467         case UpdateState::Unverified:
2468             return os << "unverified";
2469         case UpdateState::Merging:
2470             return os << "merging";
2471         case UpdateState::MergeCompleted:
2472             return os << "merge-completed";
2473         case UpdateState::MergeNeedsReboot:
2474             return os << "merge-needs-reboot";
2475         case UpdateState::MergeFailed:
2476             return os << "merge-failed";
2477         case UpdateState::Cancelled:
2478             return os << "cancelled";
2479         default:
2480             LOG(ERROR) << "Unknown update state: " << static_cast<uint32_t>(state);
2481             return os;
2482     }
2483 }
2484 
ReadUpdateState(LockedFile * lock)2485 UpdateState SnapshotManager::ReadUpdateState(LockedFile* lock) {
2486     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock);
2487     return status.state();
2488 }
2489 
ReadSnapshotUpdateStatus(LockedFile * lock)2490 SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) {
2491     CHECK(lock);
2492 
2493     SnapshotUpdateStatus status = {};
2494     std::string contents;
2495     if (!android::base::ReadFileToString(GetStateFilePath(), &contents)) {
2496         PLOG(ERROR) << "Read state file failed";
2497         status.set_state(UpdateState::None);
2498         return status;
2499     }
2500 
2501     if (!status.ParseFromString(contents)) {
2502         LOG(WARNING) << "Unable to parse state file as SnapshotUpdateStatus, using the old format";
2503 
2504         // Try to rollback to legacy file to support devices that are
2505         // currently using the old file format.
2506         // TODO(b/147409432)
2507         status.set_state(UpdateStateFromString(contents));
2508     }
2509 
2510     return status;
2511 }
2512 
WriteUpdateState(LockedFile * lock,UpdateState state,MergeFailureCode failure_code)2513 bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state,
2514                                        MergeFailureCode failure_code) {
2515     SnapshotUpdateStatus status;
2516     status.set_state(state);
2517 
2518     switch (state) {
2519         case UpdateState::MergeFailed:
2520             status.set_merge_failure_code(failure_code);
2521             break;
2522         case UpdateState::Initiated:
2523             status.set_source_build_fingerprint(
2524                     android::base::GetProperty("ro.build.fingerprint", ""));
2525             break;
2526         default:
2527             break;
2528     }
2529 
2530     // If we're transitioning between two valid states (eg, we're not beginning
2531     // or ending an OTA), then make sure to propagate the compression bit and
2532     // build fingerprint.
2533     if (!(state == UpdateState::Initiated || state == UpdateState::None)) {
2534         SnapshotUpdateStatus old_status = ReadSnapshotUpdateStatus(lock);
2535         status.set_compression_enabled(old_status.compression_enabled());
2536         status.set_source_build_fingerprint(old_status.source_build_fingerprint());
2537     }
2538     return WriteSnapshotUpdateStatus(lock, status);
2539 }
2540 
WriteSnapshotUpdateStatus(LockedFile * lock,const SnapshotUpdateStatus & status)2541 bool SnapshotManager::WriteSnapshotUpdateStatus(LockedFile* lock,
2542                                                 const SnapshotUpdateStatus& status) {
2543     CHECK(lock);
2544     CHECK(lock->lock_mode() == LOCK_EX);
2545 
2546     std::string contents;
2547     if (!status.SerializeToString(&contents)) {
2548         LOG(ERROR) << "Unable to serialize SnapshotUpdateStatus.";
2549         return false;
2550     }
2551 
2552 #ifdef LIBSNAPSHOT_USE_HAL
2553     auto merge_status = MergeStatus::UNKNOWN;
2554     switch (status.state()) {
2555         // The needs-reboot and completed cases imply that /data and /metadata
2556         // can be safely wiped, so we don't report a merge status.
2557         case UpdateState::None:
2558         case UpdateState::MergeNeedsReboot:
2559         case UpdateState::MergeCompleted:
2560         case UpdateState::Initiated:
2561             merge_status = MergeStatus::NONE;
2562             break;
2563         case UpdateState::Unverified:
2564             merge_status = MergeStatus::SNAPSHOTTED;
2565             break;
2566         case UpdateState::Merging:
2567         case UpdateState::MergeFailed:
2568             merge_status = MergeStatus::MERGING;
2569             break;
2570         default:
2571             // Note that Cancelled flows to here - it is never written, since
2572             // it only communicates a transient state to the caller.
2573             LOG(ERROR) << "Unexpected update status: " << status.state();
2574             break;
2575     }
2576 
2577     bool set_before_write =
2578             merge_status == MergeStatus::SNAPSHOTTED || merge_status == MergeStatus::MERGING;
2579     if (set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2580         return false;
2581     }
2582 #endif
2583 
2584     if (!WriteStringToFileAtomic(contents, GetStateFilePath())) {
2585         PLOG(ERROR) << "Could not write to state file";
2586         return false;
2587     }
2588 
2589 #ifdef LIBSNAPSHOT_USE_HAL
2590     if (!set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2591         return false;
2592     }
2593 #endif
2594     return true;
2595 }
2596 
GetSnapshotStatusFilePath(const std::string & name)2597 std::string SnapshotManager::GetSnapshotStatusFilePath(const std::string& name) {
2598     auto file = metadata_dir_ + "/snapshots/"s + name;
2599     return file;
2600 }
2601 
ReadSnapshotStatus(LockedFile * lock,const std::string & name,SnapshotStatus * status)2602 bool SnapshotManager::ReadSnapshotStatus(LockedFile* lock, const std::string& name,
2603                                          SnapshotStatus* status) {
2604     CHECK(lock);
2605     auto path = GetSnapshotStatusFilePath(name);
2606 
2607     unique_fd fd(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2608     if (fd < 0) {
2609         PLOG(ERROR) << "Open failed: " << path;
2610         return false;
2611     }
2612 
2613     if (!status->ParseFromFileDescriptor(fd.get())) {
2614         PLOG(ERROR) << "Unable to parse " << path << " as SnapshotStatus";
2615         return false;
2616     }
2617 
2618     if (status->name() != name) {
2619         LOG(WARNING) << "Found snapshot status named " << status->name() << " in " << path;
2620         status->set_name(name);
2621     }
2622 
2623     return true;
2624 }
2625 
WriteSnapshotStatus(LockedFile * lock,const SnapshotStatus & status)2626 bool SnapshotManager::WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status) {
2627     // The caller must take an exclusive lock to modify snapshots.
2628     CHECK(lock);
2629     CHECK(lock->lock_mode() == LOCK_EX);
2630     CHECK(!status.name().empty());
2631 
2632     auto path = GetSnapshotStatusFilePath(status.name());
2633 
2634     std::string content;
2635     if (!status.SerializeToString(&content)) {
2636         LOG(ERROR) << "Unable to serialize SnapshotStatus for " << status.name();
2637         return false;
2638     }
2639 
2640     if (!WriteStringToFileAtomic(content, path)) {
2641         PLOG(ERROR) << "Unable to write SnapshotStatus to " << path;
2642         return false;
2643     }
2644 
2645     return true;
2646 }
2647 
EnsureImageManager()2648 bool SnapshotManager::EnsureImageManager() {
2649     if (images_) return true;
2650 
2651     images_ = device_->OpenImageManager();
2652     if (!images_) {
2653         LOG(ERROR) << "Could not open ImageManager";
2654         return false;
2655     }
2656     return true;
2657 }
2658 
EnsureSnapuserdConnected()2659 bool SnapshotManager::EnsureSnapuserdConnected() {
2660     if (snapuserd_client_) {
2661         return true;
2662     }
2663 
2664     if (!use_first_stage_snapuserd_ && !EnsureSnapuserdStarted()) {
2665         return false;
2666     }
2667 
2668     snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
2669     if (!snapuserd_client_) {
2670         LOG(ERROR) << "Unable to connect to snapuserd";
2671         return false;
2672     }
2673     return true;
2674 }
2675 
UnmapAndDeleteCowPartition(MetadataBuilder * current_metadata)2676 void SnapshotManager::UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
2677     std::vector<std::string> to_delete;
2678     for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
2679         if (!DeleteDeviceIfExists(existing_cow_partition->name())) {
2680             LOG(WARNING) << existing_cow_partition->name()
2681                          << " cannot be unmapped and its space cannot be reclaimed";
2682             continue;
2683         }
2684         to_delete.push_back(existing_cow_partition->name());
2685     }
2686     for (const auto& name : to_delete) {
2687         current_metadata->RemovePartition(name);
2688     }
2689 }
2690 
AddRequiredSpace(Return orig,const std::map<std::string,SnapshotStatus> & all_snapshot_status)2691 static Return AddRequiredSpace(Return orig,
2692                                const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
2693     if (orig.error_code() != Return::ErrorCode::NO_SPACE) {
2694         return orig;
2695     }
2696     uint64_t sum = 0;
2697     for (auto&& [name, status] : all_snapshot_status) {
2698         sum += status.cow_file_size();
2699     }
2700     return Return::NoSpace(sum);
2701 }
2702 
CreateUpdateSnapshots(const DeltaArchiveManifest & manifest)2703 Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manifest) {
2704     auto lock = LockExclusive();
2705     if (!lock) return Return::Error();
2706 
2707     auto update_state = ReadUpdateState(lock.get());
2708     if (update_state != UpdateState::Initiated) {
2709         LOG(ERROR) << "Cannot create update snapshots in state " << update_state;
2710         return Return::Error();
2711     }
2712 
2713     // TODO(b/134949511): remove this check. Right now, with overlayfs mounted, the scratch
2714     // partition takes up a big chunk of space in super, causing COW images to be created on
2715     // retrofit Virtual A/B devices.
2716     if (device_->IsOverlayfsSetup()) {
2717         LOG(ERROR) << "Cannot create update snapshots with overlayfs setup. Run `adb enable-verity`"
2718                    << ", reboot, then try again.";
2719         return Return::Error();
2720     }
2721 
2722     const auto& opener = device_->GetPartitionOpener();
2723     auto current_suffix = device_->GetSlotSuffix();
2724     uint32_t current_slot = SlotNumberForSlotSuffix(current_suffix);
2725     auto target_suffix = device_->GetOtherSlotSuffix();
2726     uint32_t target_slot = SlotNumberForSlotSuffix(target_suffix);
2727     auto current_super = device_->GetSuperDevice(current_slot);
2728 
2729     auto current_metadata = MetadataBuilder::New(opener, current_super, current_slot);
2730     if (current_metadata == nullptr) {
2731         LOG(ERROR) << "Cannot create metadata builder.";
2732         return Return::Error();
2733     }
2734 
2735     auto target_metadata =
2736             MetadataBuilder::NewForUpdate(opener, current_super, current_slot, target_slot);
2737     if (target_metadata == nullptr) {
2738         LOG(ERROR) << "Cannot create target metadata builder.";
2739         return Return::Error();
2740     }
2741 
2742     // Delete partitions with target suffix in |current_metadata|. Otherwise,
2743     // partition_cow_creator recognizes these left-over partitions as used space.
2744     for (const auto& group_name : current_metadata->ListGroups()) {
2745         if (android::base::EndsWith(group_name, target_suffix)) {
2746             current_metadata->RemoveGroupAndPartitions(group_name);
2747         }
2748     }
2749 
2750     SnapshotMetadataUpdater metadata_updater(target_metadata.get(), target_slot, manifest);
2751     if (!metadata_updater.Update()) {
2752         LOG(ERROR) << "Cannot calculate new metadata.";
2753         return Return::Error();
2754     }
2755 
2756     // Delete previous COW partitions in current_metadata so that PartitionCowCreator marks those as
2757     // free regions.
2758     UnmapAndDeleteCowPartition(current_metadata.get());
2759 
2760     // Check that all these metadata is not retrofit dynamic partitions. Snapshots on
2761     // devices with retrofit dynamic partitions does not make sense.
2762     // This ensures that current_metadata->GetFreeRegions() uses the same device
2763     // indices as target_metadata (i.e. 0 -> "super").
2764     // This is also assumed in MapCowDevices() call below.
2765     CHECK(current_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME &&
2766           target_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME);
2767 
2768     std::map<std::string, SnapshotStatus> all_snapshot_status;
2769 
2770     // In case of error, automatically delete devices that are created along the way.
2771     // Note that "lock" is destroyed after "created_devices", so it is safe to use |lock| for
2772     // these devices.
2773     AutoDeviceList created_devices;
2774 
2775     const auto& dap_metadata = manifest.dynamic_partition_metadata();
2776     CowOptions options;
2777     CowWriter writer(options);
2778     bool cow_format_support = true;
2779     if (dap_metadata.cow_version() < writer.GetCowVersion()) {
2780         cow_format_support = false;
2781     }
2782 
2783     LOG(INFO) << " dap_metadata.cow_version(): " << dap_metadata.cow_version()
2784               << " writer.GetCowVersion(): " << writer.GetCowVersion();
2785 
2786     bool use_compression = IsCompressionEnabled() && dap_metadata.vabc_enabled() &&
2787                            !device_->IsRecovery() && cow_format_support;
2788 
2789     std::string compression_algorithm;
2790     if (use_compression) {
2791         compression_algorithm = dap_metadata.vabc_compression_param();
2792         if (compression_algorithm.empty()) {
2793             // Older OTAs don't set an explicit compression type, so default to gz.
2794             compression_algorithm = "gz";
2795         }
2796     } else {
2797         compression_algorithm = "none";
2798     }
2799 
2800     PartitionCowCreator cow_creator{
2801             .target_metadata = target_metadata.get(),
2802             .target_suffix = target_suffix,
2803             .target_partition = nullptr,
2804             .current_metadata = current_metadata.get(),
2805             .current_suffix = current_suffix,
2806             .update = nullptr,
2807             .extra_extents = {},
2808             .compression_enabled = use_compression,
2809             .compression_algorithm = compression_algorithm,
2810     };
2811 
2812     auto ret = CreateUpdateSnapshotsInternal(lock.get(), manifest, &cow_creator, &created_devices,
2813                                              &all_snapshot_status);
2814     if (!ret.is_ok()) return ret;
2815 
2816     auto exported_target_metadata = target_metadata->Export();
2817     if (exported_target_metadata == nullptr) {
2818         LOG(ERROR) << "Cannot export target metadata";
2819         return Return::Error();
2820     }
2821 
2822     ret = InitializeUpdateSnapshots(lock.get(), target_metadata.get(),
2823                                     exported_target_metadata.get(), target_suffix,
2824                                     all_snapshot_status);
2825     if (!ret.is_ok()) return ret;
2826 
2827     if (!UpdatePartitionTable(opener, device_->GetSuperDevice(target_slot),
2828                               *exported_target_metadata, target_slot)) {
2829         LOG(ERROR) << "Cannot write target metadata";
2830         return Return::Error();
2831     }
2832 
2833     // If compression is enabled, we need to retain a copy of the old metadata
2834     // so we can access original blocks in case they are moved around. We do
2835     // not want to rely on the old super metadata slot because we don't
2836     // guarantee its validity after the slot switch is successful.
2837     if (cow_creator.compression_enabled) {
2838         auto metadata = current_metadata->Export();
2839         if (!metadata) {
2840             LOG(ERROR) << "Could not export current metadata";
2841             return Return::Error();
2842         }
2843 
2844         auto path = GetOldPartitionMetadataPath();
2845         if (!android::fs_mgr::WriteToImageFile(path, *metadata.get())) {
2846             LOG(ERROR) << "Cannot write old metadata to " << path;
2847             return Return::Error();
2848         }
2849     }
2850 
2851     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
2852     status.set_state(update_state);
2853     status.set_compression_enabled(cow_creator.compression_enabled);
2854     if (!WriteSnapshotUpdateStatus(lock.get(), status)) {
2855         LOG(ERROR) << "Unable to write new update state";
2856         return Return::Error();
2857     }
2858 
2859     created_devices.Release();
2860     LOG(INFO) << "Successfully created all snapshots for target slot " << target_suffix;
2861 
2862     return Return::Ok();
2863 }
2864 
CreateUpdateSnapshotsInternal(LockedFile * lock,const DeltaArchiveManifest & manifest,PartitionCowCreator * cow_creator,AutoDeviceList * created_devices,std::map<std::string,SnapshotStatus> * all_snapshot_status)2865 Return SnapshotManager::CreateUpdateSnapshotsInternal(
2866         LockedFile* lock, const DeltaArchiveManifest& manifest, PartitionCowCreator* cow_creator,
2867         AutoDeviceList* created_devices,
2868         std::map<std::string, SnapshotStatus>* all_snapshot_status) {
2869     CHECK(lock);
2870 
2871     auto* target_metadata = cow_creator->target_metadata;
2872     const auto& target_suffix = cow_creator->target_suffix;
2873 
2874     if (!target_metadata->AddGroup(kCowGroupName, 0)) {
2875         LOG(ERROR) << "Cannot add group " << kCowGroupName;
2876         return Return::Error();
2877     }
2878 
2879     std::map<std::string, const PartitionUpdate*> partition_map;
2880     std::map<std::string, std::vector<Extent>> extra_extents_map;
2881     for (const auto& partition_update : manifest.partitions()) {
2882         auto suffixed_name = partition_update.partition_name() + target_suffix;
2883         auto&& [it, inserted] = partition_map.emplace(suffixed_name, &partition_update);
2884         if (!inserted) {
2885             LOG(ERROR) << "Duplicated partition " << partition_update.partition_name()
2886                        << " in update manifest.";
2887             return Return::Error();
2888         }
2889 
2890         auto& extra_extents = extra_extents_map[suffixed_name];
2891         if (partition_update.has_hash_tree_extent()) {
2892             extra_extents.push_back(partition_update.hash_tree_extent());
2893         }
2894         if (partition_update.has_fec_extent()) {
2895             extra_extents.push_back(partition_update.fec_extent());
2896         }
2897     }
2898 
2899     for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
2900         cow_creator->target_partition = target_partition;
2901         cow_creator->update = nullptr;
2902         auto iter = partition_map.find(target_partition->name());
2903         if (iter != partition_map.end()) {
2904             cow_creator->update = iter->second;
2905         } else {
2906             LOG(INFO) << target_partition->name()
2907                       << " isn't included in the payload, skipping the cow creation.";
2908             continue;
2909         }
2910 
2911         cow_creator->extra_extents.clear();
2912         auto extra_extents_it = extra_extents_map.find(target_partition->name());
2913         if (extra_extents_it != extra_extents_map.end()) {
2914             cow_creator->extra_extents = std::move(extra_extents_it->second);
2915         }
2916 
2917         // Compute the device sizes for the partition.
2918         auto cow_creator_ret = cow_creator->Run();
2919         if (!cow_creator_ret.has_value()) {
2920             LOG(ERROR) << "PartitionCowCreator returned no value for " << target_partition->name();
2921             return Return::Error();
2922         }
2923 
2924         LOG(INFO) << "For partition " << target_partition->name()
2925                   << ", device size = " << cow_creator_ret->snapshot_status.device_size()
2926                   << ", snapshot size = " << cow_creator_ret->snapshot_status.snapshot_size()
2927                   << ", cow partition size = "
2928                   << cow_creator_ret->snapshot_status.cow_partition_size()
2929                   << ", cow file size = " << cow_creator_ret->snapshot_status.cow_file_size();
2930 
2931         // Delete any existing snapshot before re-creating one.
2932         if (!DeleteSnapshot(lock, target_partition->name())) {
2933             LOG(ERROR) << "Cannot delete existing snapshot before creating a new one for partition "
2934                        << target_partition->name();
2935             return Return::Error();
2936         }
2937 
2938         // It is possible that the whole partition uses free space in super, and snapshot / COW
2939         // would not be needed. In this case, skip the partition.
2940         bool needs_snapshot = cow_creator_ret->snapshot_status.snapshot_size() > 0;
2941         bool needs_cow = (cow_creator_ret->snapshot_status.cow_partition_size() +
2942                           cow_creator_ret->snapshot_status.cow_file_size()) > 0;
2943         CHECK(needs_snapshot == needs_cow);
2944 
2945         if (!needs_snapshot) {
2946             LOG(INFO) << "Skip creating snapshot for partition " << target_partition->name()
2947                       << "because nothing needs to be snapshotted.";
2948             continue;
2949         }
2950 
2951         // Find the original partition size.
2952         auto name = target_partition->name();
2953         auto old_partition_name =
2954                 name.substr(0, name.size() - target_suffix.size()) + cow_creator->current_suffix;
2955         auto old_partition = cow_creator->current_metadata->FindPartition(old_partition_name);
2956         if (old_partition) {
2957             cow_creator_ret->snapshot_status.set_old_partition_size(old_partition->size());
2958         }
2959 
2960         // Store these device sizes to snapshot status file.
2961         if (!CreateSnapshot(lock, cow_creator, &cow_creator_ret->snapshot_status)) {
2962             return Return::Error();
2963         }
2964         created_devices->EmplaceBack<AutoDeleteSnapshot>(this, lock, target_partition->name());
2965 
2966         // Create the COW partition. That is, use any remaining free space in super partition before
2967         // creating the COW images.
2968         if (cow_creator_ret->snapshot_status.cow_partition_size() > 0) {
2969             CHECK(cow_creator_ret->snapshot_status.cow_partition_size() % kSectorSize == 0)
2970                     << "cow_partition_size == "
2971                     << cow_creator_ret->snapshot_status.cow_partition_size()
2972                     << " is not a multiple of sector size " << kSectorSize;
2973             auto cow_partition = target_metadata->AddPartition(GetCowName(target_partition->name()),
2974                                                                kCowGroupName, 0 /* flags */);
2975             if (cow_partition == nullptr) {
2976                 return Return::Error();
2977             }
2978 
2979             if (!target_metadata->ResizePartition(
2980                         cow_partition, cow_creator_ret->snapshot_status.cow_partition_size(),
2981                         cow_creator_ret->cow_partition_usable_regions)) {
2982                 LOG(ERROR) << "Cannot create COW partition on metadata with size "
2983                            << cow_creator_ret->snapshot_status.cow_partition_size();
2984                 return Return::Error();
2985             }
2986             // Only the in-memory target_metadata is modified; nothing to clean up if there is an
2987             // error in the future.
2988         }
2989 
2990         all_snapshot_status->emplace(target_partition->name(),
2991                                      std::move(cow_creator_ret->snapshot_status));
2992 
2993         LOG(INFO) << "Successfully created snapshot partition for " << target_partition->name();
2994     }
2995 
2996     LOG(INFO) << "Allocating CoW images.";
2997 
2998     for (auto&& [name, snapshot_status] : *all_snapshot_status) {
2999         // Create the backing COW image if necessary.
3000         if (snapshot_status.cow_file_size() > 0) {
3001             auto ret = CreateCowImage(lock, name);
3002             if (!ret.is_ok()) return AddRequiredSpace(ret, *all_snapshot_status);
3003         }
3004 
3005         LOG(INFO) << "Successfully created snapshot for " << name;
3006     }
3007 
3008     return Return::Ok();
3009 }
3010 
InitializeUpdateSnapshots(LockedFile * lock,MetadataBuilder * target_metadata,const LpMetadata * exported_target_metadata,const std::string & target_suffix,const std::map<std::string,SnapshotStatus> & all_snapshot_status)3011 Return SnapshotManager::InitializeUpdateSnapshots(
3012         LockedFile* lock, MetadataBuilder* target_metadata,
3013         const LpMetadata* exported_target_metadata, const std::string& target_suffix,
3014         const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
3015     CHECK(lock);
3016 
3017     CreateLogicalPartitionParams cow_params{
3018             .block_device = LP_METADATA_DEFAULT_PARTITION_NAME,
3019             .metadata = exported_target_metadata,
3020             .timeout_ms = std::chrono::milliseconds::max(),
3021             .partition_opener = &device_->GetPartitionOpener(),
3022     };
3023     for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
3024         AutoDeviceList created_devices_for_cow;
3025 
3026         if (!UnmapPartitionWithSnapshot(lock, target_partition->name())) {
3027             LOG(ERROR) << "Cannot unmap existing COW devices before re-mapping them for zero-fill: "
3028                        << target_partition->name();
3029             return Return::Error();
3030         }
3031 
3032         auto it = all_snapshot_status.find(target_partition->name());
3033         if (it == all_snapshot_status.end()) continue;
3034         cow_params.partition_name = target_partition->name();
3035         std::string cow_name;
3036         if (!MapCowDevices(lock, cow_params, it->second, &created_devices_for_cow, &cow_name)) {
3037             return Return::Error();
3038         }
3039 
3040         std::string cow_path;
3041         if (!images_->GetMappedImageDevice(cow_name, &cow_path)) {
3042             LOG(ERROR) << "Cannot determine path for " << cow_name;
3043             return Return::Error();
3044         }
3045 
3046         if (it->second.compression_enabled()) {
3047             unique_fd fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3048             if (fd < 0) {
3049                 PLOG(ERROR) << "open " << cow_path << " failed for snapshot "
3050                             << cow_params.partition_name;
3051                 return Return::Error();
3052             }
3053 
3054             CowOptions options;
3055             if (device()->IsTestDevice()) {
3056                 options.scratch_space = false;
3057             }
3058             options.compression = it->second.compression_algorithm();
3059 
3060             CowWriter writer(options);
3061             if (!writer.Initialize(fd) || !writer.Finalize()) {
3062                 LOG(ERROR) << "Could not initialize COW device for " << target_partition->name();
3063                 return Return::Error();
3064             }
3065         } else {
3066             auto ret = InitializeKernelCow(cow_path);
3067             if (!ret.is_ok()) {
3068                 LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": "
3069                            << cow_path;
3070                 return AddRequiredSpace(ret, all_snapshot_status);
3071             }
3072         }
3073         // Let destructor of created_devices_for_cow to unmap the COW devices.
3074     };
3075     return Return::Ok();
3076 }
3077 
MapUpdateSnapshot(const CreateLogicalPartitionParams & params,std::string * snapshot_path)3078 bool SnapshotManager::MapUpdateSnapshot(const CreateLogicalPartitionParams& params,
3079                                         std::string* snapshot_path) {
3080     auto lock = LockShared();
3081     if (!lock) return false;
3082     if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3083         LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3084                    << params.GetPartitionName();
3085         return false;
3086     }
3087 
3088     SnapshotStatus status;
3089     if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3090         return false;
3091     }
3092     if (status.compression_enabled()) {
3093         LOG(ERROR) << "Cannot use MapUpdateSnapshot with compressed snapshots";
3094         return false;
3095     }
3096 
3097     SnapshotPaths paths;
3098     if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3099         return false;
3100     }
3101 
3102     if (!paths.snapshot_device.empty()) {
3103         *snapshot_path = paths.snapshot_device;
3104     } else {
3105         *snapshot_path = paths.target_device;
3106     }
3107     DCHECK(!snapshot_path->empty());
3108     return true;
3109 }
3110 
OpenSnapshotWriter(const android::fs_mgr::CreateLogicalPartitionParams & params,const std::optional<std::string> & source_device)3111 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenSnapshotWriter(
3112         const android::fs_mgr::CreateLogicalPartitionParams& params,
3113         const std::optional<std::string>& source_device) {
3114 #if defined(LIBSNAPSHOT_NO_COW_WRITE)
3115     (void)params;
3116     (void)source_device;
3117 
3118     LOG(ERROR) << "Snapshots cannot be written in first-stage init or recovery";
3119     return nullptr;
3120 #else
3121     // First unmap any existing mapping.
3122     auto lock = LockShared();
3123     if (!lock) return nullptr;
3124     if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3125         LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3126                    << params.GetPartitionName();
3127         return nullptr;
3128     }
3129 
3130     SnapshotPaths paths;
3131     if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3132         return nullptr;
3133     }
3134 
3135     SnapshotStatus status;
3136     if (!paths.cow_device_name.empty()) {
3137         if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3138             return nullptr;
3139         }
3140     } else {
3141         // Currently, partition_cow_creator always creates snapshots. The
3142         // reason is that if partition X shrinks while partition Y grows, we
3143         // cannot bindly write to the newly freed extents in X. This would
3144         // make the old slot unusable. So, the entire size of the target
3145         // partition is currently considered snapshottable.
3146         LOG(ERROR) << "No snapshot available for partition " << params.GetPartitionName();
3147         return nullptr;
3148     }
3149 
3150     if (status.compression_enabled()) {
3151         return OpenCompressedSnapshotWriter(lock.get(), source_device, params.GetPartitionName(),
3152                                             status, paths);
3153     }
3154     return OpenKernelSnapshotWriter(lock.get(), source_device, params.GetPartitionName(), status,
3155                                     paths);
3156 #endif
3157 }
3158 
3159 #if !defined(LIBSNAPSHOT_NO_COW_WRITE)
OpenCompressedSnapshotWriter(LockedFile * lock,const std::optional<std::string> & source_device,const std::string & partition_name,const SnapshotStatus & status,const SnapshotPaths & paths)3160 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenCompressedSnapshotWriter(
3161         LockedFile* lock, const std::optional<std::string>& source_device,
3162         [[maybe_unused]] const std::string& partition_name, const SnapshotStatus& status,
3163         const SnapshotPaths& paths) {
3164     CHECK(lock);
3165 
3166     CowOptions cow_options;
3167     cow_options.compression = status.compression_algorithm();
3168     cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3169     // Disable scratch space for vts tests
3170     if (device()->IsTestDevice()) {
3171         cow_options.scratch_space = false;
3172     }
3173 
3174     // Currently we don't support partial snapshots, since partition_cow_creator
3175     // never creates this scenario.
3176     CHECK(status.snapshot_size() == status.device_size());
3177 
3178     auto writer = std::make_unique<CompressedSnapshotWriter>(cow_options);
3179     if (source_device) {
3180         writer->SetSourceDevice(*source_device);
3181     }
3182 
3183     std::string cow_path;
3184     if (!GetMappedImageDevicePath(paths.cow_device_name, &cow_path)) {
3185         LOG(ERROR) << "Could not determine path for " << paths.cow_device_name;
3186         return nullptr;
3187     }
3188 
3189     unique_fd cow_fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3190     if (cow_fd < 0) {
3191         PLOG(ERROR) << "OpenCompressedSnapshotWriter: open " << cow_path;
3192         return nullptr;
3193     }
3194     if (!writer->SetCowDevice(std::move(cow_fd))) {
3195         LOG(ERROR) << "Could not create COW writer from " << cow_path;
3196         return nullptr;
3197     }
3198 
3199     return writer;
3200 }
3201 
OpenKernelSnapshotWriter(LockedFile * lock,const std::optional<std::string> & source_device,const std::string & partition_name,const SnapshotStatus & status,const SnapshotPaths & paths)3202 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenKernelSnapshotWriter(
3203         LockedFile* lock, const std::optional<std::string>& source_device,
3204         [[maybe_unused]] const std::string& partition_name, const SnapshotStatus& status,
3205         const SnapshotPaths& paths) {
3206     CHECK(lock);
3207 
3208     CowOptions cow_options;
3209     cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3210 
3211     auto writer = std::make_unique<OnlineKernelSnapshotWriter>(cow_options);
3212 
3213     std::string path = paths.snapshot_device.empty() ? paths.target_device : paths.snapshot_device;
3214     unique_fd fd(open(path.c_str(), O_RDWR | O_CLOEXEC));
3215     if (fd < 0) {
3216         PLOG(ERROR) << "open failed: " << path;
3217         return nullptr;
3218     }
3219 
3220     if (source_device) {
3221         writer->SetSourceDevice(*source_device);
3222     }
3223 
3224     uint64_t cow_size = status.cow_partition_size() + status.cow_file_size();
3225     writer->SetSnapshotDevice(std::move(fd), cow_size);
3226 
3227     return writer;
3228 }
3229 #endif  // !defined(LIBSNAPSHOT_NO_COW_WRITE)
3230 
UnmapUpdateSnapshot(const std::string & target_partition_name)3231 bool SnapshotManager::UnmapUpdateSnapshot(const std::string& target_partition_name) {
3232     auto lock = LockShared();
3233     if (!lock) return false;
3234     return UnmapPartitionWithSnapshot(lock.get(), target_partition_name);
3235 }
3236 
UnmapAllPartitionsInRecovery()3237 bool SnapshotManager::UnmapAllPartitionsInRecovery() {
3238     auto lock = LockExclusive();
3239     if (!lock) return false;
3240 
3241     const auto& opener = device_->GetPartitionOpener();
3242     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3243     auto super_device = device_->GetSuperDevice(slot);
3244     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
3245     if (!metadata) {
3246         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
3247         return false;
3248     }
3249 
3250     bool ok = true;
3251     for (const auto& partition : metadata->partitions) {
3252         auto partition_name = GetPartitionName(partition);
3253         ok &= UnmapPartitionWithSnapshot(lock.get(), partition_name);
3254     }
3255     return ok;
3256 }
3257 
operator <<(std::ostream & os,SnapshotManager::Slot slot)3258 std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
3259     switch (slot) {
3260         case SnapshotManager::Slot::Unknown:
3261             return os << "unknown";
3262         case SnapshotManager::Slot::Source:
3263             return os << "source";
3264         case SnapshotManager::Slot::Target:
3265             return os << "target";
3266     }
3267 }
3268 
Dump(std::ostream & os)3269 bool SnapshotManager::Dump(std::ostream& os) {
3270     // Don't actually lock. Dump() is for debugging purposes only, so it is okay
3271     // if it is racy.
3272     auto file = OpenLock(0 /* lock flag */);
3273     if (!file) return false;
3274 
3275     std::stringstream ss;
3276 
3277     auto update_status = ReadSnapshotUpdateStatus(file.get());
3278 
3279     ss << "Update state: " << ReadUpdateState(file.get()) << std::endl;
3280     ss << "Compression: " << update_status.compression_enabled() << std::endl;
3281     ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
3282     ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;
3283     ss << "Rollback indicator: "
3284        << (access(GetRollbackIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3285        << std::endl;
3286     ss << "Forward merge indicator: "
3287        << (access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3288        << std::endl;
3289     ss << "Source build fingerprint: " << update_status.source_build_fingerprint() << std::endl;
3290 
3291     bool ok = true;
3292     std::vector<std::string> snapshots;
3293     if (!ListSnapshots(file.get(), &snapshots)) {
3294         LOG(ERROR) << "Could not list snapshots";
3295         snapshots.clear();
3296         ok = false;
3297     }
3298     for (const auto& name : snapshots) {
3299         ss << "Snapshot: " << name << std::endl;
3300         SnapshotStatus status;
3301         if (!ReadSnapshotStatus(file.get(), name, &status)) {
3302             ok = false;
3303             continue;
3304         }
3305         ss << "    state: " << SnapshotState_Name(status.state()) << std::endl;
3306         ss << "    device size (bytes): " << status.device_size() << std::endl;
3307         ss << "    snapshot size (bytes): " << status.snapshot_size() << std::endl;
3308         ss << "    cow partition size (bytes): " << status.cow_partition_size() << std::endl;
3309         ss << "    cow file size (bytes): " << status.cow_file_size() << std::endl;
3310         ss << "    allocated sectors: " << status.sectors_allocated() << std::endl;
3311         ss << "    metadata sectors: " << status.metadata_sectors() << std::endl;
3312         ss << "    compression: " << status.compression_algorithm() << std::endl;
3313     }
3314     os << ss.rdbuf();
3315     return ok;
3316 }
3317 
EnsureMetadataMounted()3318 std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() {
3319     if (!device_->IsRecovery()) {
3320         // No need to mount anything in recovery.
3321         LOG(INFO) << "EnsureMetadataMounted does nothing in Android mode.";
3322         return std::unique_ptr<AutoUnmountDevice>(new AutoUnmountDevice());
3323     }
3324     auto ret = AutoUnmountDevice::New(device_->GetMetadataDir());
3325     if (ret == nullptr) return nullptr;
3326 
3327     // In rescue mode, it is possible to erase and format metadata, but /metadata/ota is not
3328     // created to execute snapshot updates. Hence, subsequent calls is likely to fail because
3329     // Lock*() fails. By failing early and returning nullptr here, update_engine_sideload can
3330     // treat this case as if /metadata is not mounted.
3331     if (!LockShared()) {
3332         LOG(WARNING) << "/metadata is mounted, but errors occur when acquiring a shared lock. "
3333                         "Subsequent calls to SnapshotManager will fail. Unmounting /metadata now.";
3334         return nullptr;
3335     }
3336     return ret;
3337 }
3338 
HandleImminentDataWipe(const std::function<void ()> & callback)3339 bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
3340     if (!device_->IsRecovery()) {
3341         LOG(ERROR) << "Data wipes are only allowed in recovery.";
3342         return false;
3343     }
3344 
3345     auto mount = EnsureMetadataMounted();
3346     if (!mount || !mount->HasDevice()) {
3347         // We allow the wipe to continue, because if we can't mount /metadata,
3348         // it is unlikely the device would have booted anyway. If there is no
3349         // metadata partition, then the device predates Virtual A/B.
3350         return true;
3351     }
3352 
3353     // Check this early, so we don't accidentally start trying to populate
3354     // the state file in recovery. Note we don't call GetUpdateState since
3355     // we want errors in acquiring the lock to be propagated, instead of
3356     // returning UpdateState::None.
3357     auto state_file = GetStateFilePath();
3358     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
3359         return true;
3360     }
3361 
3362     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3363     auto super_path = device_->GetSuperDevice(slot_number);
3364     if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3365         LOG(ERROR) << "Unable to map partitions to complete merge.";
3366         return false;
3367     }
3368 
3369     auto process_callback = [&]() -> bool {
3370         if (callback) {
3371             callback();
3372         }
3373         return true;
3374     };
3375 
3376     in_factory_data_reset_ = true;
3377     UpdateState state =
3378             ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
3379     in_factory_data_reset_ = false;
3380 
3381     if (state == UpdateState::MergeFailed) {
3382         return false;
3383     }
3384 
3385     // Nothing should be depending on partitions now, so unmap them all.
3386     if (!UnmapAllPartitionsInRecovery()) {
3387         LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
3388     }
3389 
3390     if (state != UpdateState::None) {
3391         auto lock = LockExclusive();
3392         if (!lock) return false;
3393 
3394         // Zap the update state so the bootloader doesn't think we're still
3395         // merging. It's okay if this fails, it's informative only at this
3396         // point.
3397         WriteUpdateState(lock.get(), UpdateState::None);
3398     }
3399     return true;
3400 }
3401 
FinishMergeInRecovery()3402 bool SnapshotManager::FinishMergeInRecovery() {
3403     if (!device_->IsRecovery()) {
3404         LOG(ERROR) << "Data wipes are only allowed in recovery.";
3405         return false;
3406     }
3407 
3408     auto mount = EnsureMetadataMounted();
3409     if (!mount || !mount->HasDevice()) {
3410         return false;
3411     }
3412 
3413     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3414     auto super_path = device_->GetSuperDevice(slot_number);
3415     if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3416         LOG(ERROR) << "Unable to map partitions to complete merge.";
3417         return false;
3418     }
3419 
3420     UpdateState state = ProcessUpdateState();
3421     if (state != UpdateState::MergeCompleted) {
3422         LOG(ERROR) << "Merge returned unexpected status: " << state;
3423         return false;
3424     }
3425 
3426     // Nothing should be depending on partitions now, so unmap them all.
3427     if (!UnmapAllPartitionsInRecovery()) {
3428         LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
3429     }
3430     return true;
3431 }
3432 
ProcessUpdateStateOnDataWipe(bool allow_forward_merge,const std::function<bool ()> & callback)3433 UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
3434                                                           const std::function<bool()>& callback) {
3435     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3436     UpdateState state = ProcessUpdateState(callback);
3437     LOG(INFO) << "Update state in recovery: " << state;
3438     switch (state) {
3439         case UpdateState::MergeFailed:
3440             LOG(ERROR) << "Unrecoverable merge failure detected.";
3441             return state;
3442         case UpdateState::Unverified: {
3443             // If an OTA was just applied but has not yet started merging:
3444             //
3445             // - if forward merge is allowed, initiate merge and call
3446             // ProcessUpdateState again.
3447             //
3448             // - if forward merge is not allowed, we
3449             // have no choice but to revert slots, because the current slot will
3450             // immediately become unbootable. Rather than wait for the device
3451             // to reboot N times until a rollback, we proactively disable the
3452             // new slot instead.
3453             //
3454             // Since the rollback is inevitable, we don't treat a HAL failure
3455             // as an error here.
3456             auto slot = GetCurrentSlot();
3457             if (slot == Slot::Target) {
3458                 if (allow_forward_merge &&
3459                     access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
3460                     LOG(INFO) << "Forward merge allowed, initiating merge now.";
3461 
3462                     if (!InitiateMerge()) {
3463                         LOG(ERROR) << "Failed to initiate merge on data wipe.";
3464                         return UpdateState::MergeFailed;
3465                     }
3466                     return ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
3467                 }
3468 
3469                 LOG(ERROR) << "Reverting to old slot since update will be deleted.";
3470                 device_->SetSlotAsUnbootable(slot_number);
3471             } else {
3472                 LOG(INFO) << "Booting from " << slot << " slot, no action is taken.";
3473             }
3474             break;
3475         }
3476         case UpdateState::MergeNeedsReboot:
3477             // We shouldn't get here, because nothing is depending on
3478             // logical partitions.
3479             LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
3480             break;
3481         default:
3482             break;
3483     }
3484     return state;
3485 }
3486 
EnsureNoOverflowSnapshot(LockedFile * lock)3487 bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
3488     CHECK(lock);
3489 
3490     std::vector<std::string> snapshots;
3491     if (!ListSnapshots(lock, &snapshots)) {
3492         LOG(ERROR) << "Could not list snapshots.";
3493         return false;
3494     }
3495 
3496     auto& dm = DeviceMapper::Instance();
3497     for (const auto& snapshot : snapshots) {
3498         SnapshotStatus status;
3499         if (!ReadSnapshotStatus(lock, snapshot, &status)) {
3500             return false;
3501         }
3502         if (status.compression_enabled()) {
3503             continue;
3504         }
3505 
3506         std::vector<DeviceMapper::TargetInfo> targets;
3507         if (!dm.GetTableStatus(snapshot, &targets)) {
3508             LOG(ERROR) << "Could not read snapshot device table: " << snapshot;
3509             return false;
3510         }
3511         if (targets.size() != 1) {
3512             LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << snapshot
3513                        << ", size = " << targets.size();
3514             return false;
3515         }
3516         if (targets[0].IsOverflowSnapshot()) {
3517             LOG(ERROR) << "Detected overflow in snapshot " << snapshot
3518                        << ", CoW device size computation is wrong!";
3519             return false;
3520         }
3521     }
3522 
3523     return true;
3524 }
3525 
RecoveryCreateSnapshotDevices()3526 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices() {
3527     if (!device_->IsRecovery()) {
3528         LOG(ERROR) << __func__ << " is only allowed in recovery.";
3529         return CreateResult::NOT_CREATED;
3530     }
3531 
3532     auto mount = EnsureMetadataMounted();
3533     if (!mount || !mount->HasDevice()) {
3534         LOG(ERROR) << "Couldn't mount Metadata.";
3535         return CreateResult::NOT_CREATED;
3536     }
3537     return RecoveryCreateSnapshotDevices(mount);
3538 }
3539 
RecoveryCreateSnapshotDevices(const std::unique_ptr<AutoDevice> & metadata_device)3540 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices(
3541         const std::unique_ptr<AutoDevice>& metadata_device) {
3542     if (!device_->IsRecovery()) {
3543         LOG(ERROR) << __func__ << " is only allowed in recovery.";
3544         return CreateResult::NOT_CREATED;
3545     }
3546 
3547     if (metadata_device == nullptr || !metadata_device->HasDevice()) {
3548         LOG(ERROR) << "Metadata not mounted.";
3549         return CreateResult::NOT_CREATED;
3550     }
3551 
3552     auto state_file = GetStateFilePath();
3553     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
3554         LOG(ERROR) << "Couldn't access state file.";
3555         return CreateResult::NOT_CREATED;
3556     }
3557 
3558     if (!NeedSnapshotsInFirstStageMount()) {
3559         return CreateResult::NOT_CREATED;
3560     }
3561 
3562     auto slot_suffix = device_->GetOtherSlotSuffix();
3563     auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
3564     auto super_path = device_->GetSuperDevice(slot_number);
3565     if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3566         LOG(ERROR) << "Unable to map partitions.";
3567         return CreateResult::ERROR;
3568     }
3569     return CreateResult::CREATED;
3570 }
3571 
UpdateForwardMergeIndicator(bool wipe)3572 bool SnapshotManager::UpdateForwardMergeIndicator(bool wipe) {
3573     auto path = GetForwardMergeIndicatorPath();
3574 
3575     if (!wipe) {
3576         LOG(INFO) << "Wipe is not scheduled. Deleting forward merge indicator.";
3577         return RemoveFileIfExists(path);
3578     }
3579 
3580     // TODO(b/152094219): Don't forward merge if no CoW file is allocated.
3581 
3582     LOG(INFO) << "Wipe will be scheduled. Allowing forward merge of snapshots.";
3583     if (!android::base::WriteStringToFile("1", path)) {
3584         PLOG(ERROR) << "Unable to write forward merge indicator: " << path;
3585         return false;
3586     }
3587 
3588     return true;
3589 }
3590 
GetSnapshotMergeStatsInstance()3591 ISnapshotMergeStats* SnapshotManager::GetSnapshotMergeStatsInstance() {
3592     return SnapshotMergeStats::GetInstance(*this);
3593 }
3594 
3595 // This is only to be used in recovery or normal Android (not first-stage init).
3596 // We don't guarantee dm paths are available in first-stage init, because ueventd
3597 // isn't running yet.
GetMappedImageDevicePath(const std::string & device_name,std::string * device_path)3598 bool SnapshotManager::GetMappedImageDevicePath(const std::string& device_name,
3599                                                std::string* device_path) {
3600     auto& dm = DeviceMapper::Instance();
3601 
3602     // Try getting the device string if it is a device mapper device.
3603     if (dm.GetState(device_name) != DmDeviceState::INVALID) {
3604         return dm.GetDmDevicePathByName(device_name, device_path);
3605     }
3606 
3607     // Otherwise, get path from IImageManager.
3608     return images_->GetMappedImageDevice(device_name, device_path);
3609 }
3610 
GetMappedImageDeviceStringOrPath(const std::string & device_name,std::string * device_string_or_mapped_path)3611 bool SnapshotManager::GetMappedImageDeviceStringOrPath(const std::string& device_name,
3612                                                        std::string* device_string_or_mapped_path) {
3613     auto& dm = DeviceMapper::Instance();
3614     // Try getting the device string if it is a device mapper device.
3615     if (dm.GetState(device_name) != DmDeviceState::INVALID) {
3616         return dm.GetDeviceString(device_name, device_string_or_mapped_path);
3617     }
3618 
3619     // Otherwise, get path from IImageManager.
3620     if (!images_->GetMappedImageDevice(device_name, device_string_or_mapped_path)) {
3621         return false;
3622     }
3623 
3624     LOG(WARNING) << "Calling GetMappedImageDevice with local image manager; device "
3625                  << (device_string_or_mapped_path ? *device_string_or_mapped_path : "(nullptr)")
3626                  << "may not be available in first stage init! ";
3627     return true;
3628 }
3629 
WaitForDevice(const std::string & device,std::chrono::milliseconds timeout_ms)3630 bool SnapshotManager::WaitForDevice(const std::string& device,
3631                                     std::chrono::milliseconds timeout_ms) {
3632     if (!android::base::StartsWith(device, "/")) {
3633         return true;
3634     }
3635 
3636     // In first-stage init, we rely on init setting a callback which can
3637     // regenerate uevents and populate /dev for us.
3638     if (uevent_regen_callback_) {
3639         if (!uevent_regen_callback_(device)) {
3640             LOG(ERROR) << "Failed to find device after regenerating uevents: " << device;
3641             return false;
3642         }
3643         return true;
3644     }
3645 
3646     // Otherwise, the only kind of device we need to wait for is a dm-user
3647     // misc device. Normal calls to DeviceMapper::CreateDevice() guarantee
3648     // the path has been created.
3649     if (!android::base::StartsWith(device, "/dev/dm-user/")) {
3650         return true;
3651     }
3652 
3653     if (timeout_ms.count() == 0) {
3654         LOG(ERROR) << "No timeout was specified to wait for device: " << device;
3655         return false;
3656     }
3657     if (!android::fs_mgr::WaitForFile(device, timeout_ms)) {
3658         LOG(ERROR) << "Timed out waiting for device to appear: " << device;
3659         return false;
3660     }
3661     return true;
3662 }
3663 
IsSnapuserdRequired()3664 bool SnapshotManager::IsSnapuserdRequired() {
3665     auto lock = LockExclusive();
3666     if (!lock) return false;
3667 
3668     auto status = ReadSnapshotUpdateStatus(lock.get());
3669     return status.state() != UpdateState::None && status.compression_enabled();
3670 }
3671 
DetachSnapuserdForSelinux(std::vector<std::string> * snapuserd_argv)3672 bool SnapshotManager::DetachSnapuserdForSelinux(std::vector<std::string>* snapuserd_argv) {
3673     return PerformInitTransition(InitTransition::SELINUX_DETACH, snapuserd_argv);
3674 }
3675 
PerformSecondStageInitTransition()3676 bool SnapshotManager::PerformSecondStageInitTransition() {
3677     return PerformInitTransition(InitTransition::SECOND_STAGE);
3678 }
3679 
ReadOldPartitionMetadata(LockedFile * lock)3680 const LpMetadata* SnapshotManager::ReadOldPartitionMetadata(LockedFile* lock) {
3681     CHECK(lock);
3682 
3683     if (!old_partition_metadata_) {
3684         auto path = GetOldPartitionMetadataPath();
3685         old_partition_metadata_ = android::fs_mgr::ReadFromImageFile(path);
3686         if (!old_partition_metadata_) {
3687             LOG(ERROR) << "Could not read old partition metadata from " << path;
3688             return nullptr;
3689         }
3690     }
3691     return old_partition_metadata_.get();
3692 }
3693 
DecideMergePhase(const SnapshotStatus & status)3694 MergePhase SnapshotManager::DecideMergePhase(const SnapshotStatus& status) {
3695     if (status.compression_enabled() && status.device_size() < status.old_partition_size()) {
3696         return MergePhase::FIRST_PHASE;
3697     }
3698     return MergePhase::SECOND_PHASE;
3699 }
3700 
UpdateCowStats(ISnapshotMergeStats * stats)3701 void SnapshotManager::UpdateCowStats(ISnapshotMergeStats* stats) {
3702     auto lock = LockExclusive();
3703     if (!lock) return;
3704 
3705     std::vector<std::string> snapshots;
3706     if (!ListSnapshots(lock.get(), &snapshots, GetSnapshotSlotSuffix())) {
3707         LOG(ERROR) << "Could not list snapshots";
3708         return;
3709     }
3710 
3711     uint64_t cow_file_size = 0;
3712     uint64_t total_cow_size = 0;
3713     uint64_t estimated_cow_size = 0;
3714     for (const auto& snapshot : snapshots) {
3715         SnapshotStatus status;
3716         if (!ReadSnapshotStatus(lock.get(), snapshot, &status)) {
3717             return;
3718         }
3719 
3720         cow_file_size += status.cow_file_size();
3721         total_cow_size += status.cow_file_size() + status.cow_partition_size();
3722         estimated_cow_size += status.estimated_cow_size();
3723     }
3724 
3725     stats->set_cow_file_size(cow_file_size);
3726     stats->set_total_cow_size_bytes(total_cow_size);
3727     stats->set_estimated_cow_size_bytes(estimated_cow_size);
3728 }
3729 
DeleteDeviceIfExists(const std::string & name,const std::chrono::milliseconds & timeout_ms)3730 bool SnapshotManager::DeleteDeviceIfExists(const std::string& name,
3731                                            const std::chrono::milliseconds& timeout_ms) {
3732     auto& dm = DeviceMapper::Instance();
3733     auto start = std::chrono::steady_clock::now();
3734     while (true) {
3735         if (dm.DeleteDeviceIfExists(name)) {
3736             return true;
3737         }
3738         auto now = std::chrono::steady_clock::now();
3739         auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
3740         if (elapsed >= timeout_ms) {
3741             break;
3742         }
3743         std::this_thread::sleep_for(400ms);
3744     }
3745 
3746     // Try to diagnose why this failed. First get the actual device path.
3747     std::string full_path;
3748     if (!dm.GetDmDevicePathByName(name, &full_path)) {
3749         LOG(ERROR) << "Unable to diagnose DM_DEV_REMOVE failure.";
3750         return false;
3751     }
3752 
3753     // Check for child dm-devices.
3754     std::string block_name = android::base::Basename(full_path);
3755     std::string sysfs_holders = "/sys/class/block/" + block_name + "/holders";
3756 
3757     std::error_code ec;
3758     std::filesystem::directory_iterator dir_iter(sysfs_holders, ec);
3759     if (auto begin = std::filesystem::begin(dir_iter); begin != std::filesystem::end(dir_iter)) {
3760         LOG(ERROR) << "Child device-mapper device still mapped: " << begin->path();
3761         return false;
3762     }
3763 
3764     // Check for mounted partitions.
3765     android::fs_mgr::Fstab fstab;
3766     android::fs_mgr::ReadFstabFromFile("/proc/mounts", &fstab);
3767     for (const auto& entry : fstab) {
3768         if (android::base::Basename(entry.blk_device) == block_name) {
3769             LOG(ERROR) << "Partition still mounted: " << entry.mount_point;
3770             return false;
3771         }
3772     }
3773 
3774     // Check for detached mounted partitions.
3775     for (const auto& fs : std::filesystem::directory_iterator("/sys/fs", ec)) {
3776         std::string fs_type = android::base::Basename(fs.path().c_str());
3777         if (!(fs_type == "ext4" || fs_type == "f2fs")) {
3778             continue;
3779         }
3780 
3781         std::string path = fs.path().c_str() + "/"s + block_name;
3782         if (access(path.c_str(), F_OK) == 0) {
3783             LOG(ERROR) << "Block device was lazily unmounted and is still in-use: " << full_path
3784                        << "; possibly open file descriptor or attached loop device.";
3785             return false;
3786         }
3787     }
3788 
3789     LOG(ERROR) << "Device-mapper device " << name << "(" << full_path << ")"
3790                << " still in use."
3791                << "  Probably a file descriptor was leaked or held open, or a loop device is"
3792                << " attached.";
3793     return false;
3794 }
3795 
ReadMergeFailureCode()3796 MergeFailureCode SnapshotManager::ReadMergeFailureCode() {
3797     auto lock = LockExclusive();
3798     if (!lock) return MergeFailureCode::AcquireLock;
3799 
3800     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
3801     if (status.state() != UpdateState::MergeFailed) {
3802         return MergeFailureCode::Ok;
3803     }
3804     return status.merge_failure_code();
3805 }
3806 
ReadSourceBuildFingerprint()3807 std::string SnapshotManager::ReadSourceBuildFingerprint() {
3808     auto lock = LockExclusive();
3809     if (!lock) return {};
3810 
3811     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
3812     return status.source_build_fingerprint();
3813 }
3814 
3815 }  // namespace snapshot
3816 }  // namespace android
3817