1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "apexd"
18 
19 #include "apexd_loop.h"
20 
21 #include <mutex>
22 
23 #include <dirent.h>
24 #include <fcntl.h>
25 #include <linux/fs.h>
26 #include <linux/loop.h>
27 #include <sys/ioctl.h>
28 #include <sys/stat.h>
29 #include <sys/statfs.h>
30 #include <sys/types.h>
31 #include <unistd.h>
32 
33 #include <android-base/file.h>
34 #include <android-base/logging.h>
35 #include <android-base/parseint.h>
36 #include <android-base/properties.h>
37 #include <android-base/stringprintf.h>
38 #include <android-base/strings.h>
39 
40 #include "apexd_utils.h"
41 #include "string_log.h"
42 
43 using android::base::Basename;
44 using android::base::ErrnoError;
45 using android::base::Error;
46 using android::base::GetBoolProperty;
47 using android::base::ParseUint;
48 using android::base::Result;
49 using android::base::StartsWith;
50 using android::base::StringPrintf;
51 using android::base::unique_fd;
52 
53 #ifndef LOOP_CONFIGURE
54 // These can be removed whenever we pull in the Linux v5.8 UAPI headers
55 struct loop_config {
56   __u32 fd;
57   __u32 block_size;
58   struct loop_info64 info;
59   __u64 __reserved[8];
60 };
61 #define LOOP_CONFIGURE 0x4C0A
62 #endif
63 
64 namespace android {
65 namespace apex {
66 namespace loop {
67 
68 static constexpr const char* kApexLoopIdPrefix = "apex:";
69 
70 // 128 kB read-ahead, which we currently use for /system as well
71 static constexpr const char* kReadAheadKb = "128";
72 
73 // TODO(b/122059364): Even though the kernel has created the loop
74 // device, we still depend on ueventd to run to actually create the
75 // device node in userspace. To solve this properly we should listen on
76 // the netlink socket for uevents, or use inotify. For now, this will
77 // have to do.
78 static constexpr size_t kLoopDeviceRetryAttempts = 3u;
79 
MaybeCloseBad()80 void LoopbackDeviceUniqueFd::MaybeCloseBad() {
81   if (device_fd.get() != -1) {
82     // Disassociate any files.
83     if (ioctl(device_fd.get(), LOOP_CLR_FD) == -1) {
84       PLOG(ERROR) << "Unable to clear fd for loopback device";
85     }
86   }
87 }
88 
ConfigureReadAhead(const std::string & device_path)89 Result<void> ConfigureReadAhead(const std::string& device_path) {
90   CHECK(StartsWith(device_path, "/dev/"));
91   std::string device_name = Basename(device_path);
92 
93   std::string sysfs_device =
94       StringPrintf("/sys/block/%s/queue/read_ahead_kb", device_name.c_str());
95   unique_fd sysfs_fd(open(sysfs_device.c_str(), O_RDWR | O_CLOEXEC));
96   if (sysfs_fd.get() == -1) {
97     return ErrnoError() << "Failed to open " << sysfs_device;
98   }
99 
100   int ret = TEMP_FAILURE_RETRY(
101       write(sysfs_fd.get(), kReadAheadKb, strlen(kReadAheadKb) + 1));
102   if (ret < 0) {
103     return ErrnoError() << "Failed to write to " << sysfs_device;
104   }
105 
106   return {};
107 }
108 
PreAllocateLoopDevices(size_t num)109 Result<void> PreAllocateLoopDevices(size_t num) {
110   Result<void> loop_ready = WaitForFile("/dev/loop-control", 20s);
111   if (!loop_ready.ok()) {
112     return loop_ready;
113   }
114   unique_fd ctl_fd(
115       TEMP_FAILURE_RETRY(open("/dev/loop-control", O_RDWR | O_CLOEXEC)));
116   if (ctl_fd.get() == -1) {
117     return ErrnoError() << "Failed to open loop-control";
118   }
119 
120   bool found = false;
121   size_t start_id = 0;
122   constexpr const char* kLoopPrefix = "loop";
123   WalkDir("/dev/block", [&](const std::filesystem::directory_entry& entry) {
124     std::string devname = entry.path().filename().string();
125     if (StartsWith(devname, kLoopPrefix)) {
126       size_t id;
127       auto parse_ok = ParseUint(
128           devname.substr(std::char_traits<char>::length(kLoopPrefix)), &id);
129       if (parse_ok && id > start_id) {
130         start_id = id;
131         found = true;
132       }
133     }
134   });
135   if (found) ++start_id;
136 
137   // Assumption: loop device ID [0..num) is valid.
138   // This is because pre-allocation happens during bootstrap.
139   // Anyway Kernel pre-allocated loop devices
140   // as many as CONFIG_BLK_DEV_LOOP_MIN_COUNT,
141   // Within the amount of kernel-pre-allocation,
142   // LOOP_CTL_ADD will fail with EEXIST
143   for (size_t id = start_id; id < num + start_id; ++id) {
144     int ret = ioctl(ctl_fd.get(), LOOP_CTL_ADD, id);
145     if (ret < 0 && errno != EEXIST) {
146       return ErrnoError() << "Failed LOOP_CTL_ADD";
147     }
148   }
149 
150   // Don't wait until the dev nodes are actually created, which
151   // will delay the boot. By simply returing here, the creation of the dev
152   // nodes will be done in parallel with other boot processes, and we
153   // just optimistally hope that they are all created when we actually
154   // access them for activating APEXes. If the dev nodes are not ready
155   // even then, we wait 50ms and warning message will be printed (see below
156   // CreateLoopDevice()).
157   LOG(INFO) << "Pre-allocated " << num << " loopback devices";
158   return {};
159 }
160 
ConfigureLoopDevice(const int device_fd,const std::string & target,const int32_t image_offset,const size_t image_size)161 Result<void> ConfigureLoopDevice(const int device_fd, const std::string& target,
162                                  const int32_t image_offset,
163                                  const size_t image_size) {
164   static bool use_loop_configure;
165   static std::once_flag once_flag;
166   std::call_once(once_flag, [&]() {
167     // LOOP_CONFIGURE is a new ioctl in Linux 5.8 (and backported in Android
168     // common) that allows atomically configuring a loop device. It is a lot
169     // faster than the traditional LOOP_SET_FD/LOOP_SET_STATUS64 combo, but
170     // it may not be available on updating devices, so try once before
171     // deciding.
172     struct loop_config config;
173     memset(&config, 0, sizeof(config));
174     config.fd = -1;
175     if (ioctl(device_fd, LOOP_CONFIGURE, &config) == -1 && errno == EBADF) {
176       // If the IOCTL exists, it will fail with EBADF for the -1 fd
177       use_loop_configure = true;
178     }
179   });
180 
181   /*
182    * Using O_DIRECT will tell the kernel that we want to use Direct I/O
183    * on the underlying file, which we want to do to avoid double caching.
184    * Note that Direct I/O won't be enabled immediately, because the block
185    * size of the underlying block device may not match the default loop
186    * device block size (512); when we call LOOP_SET_BLOCK_SIZE below, the
187    * kernel driver will automatically enable Direct I/O when it sees that
188    * condition is now met.
189    */
190   unique_fd target_fd(open(target.c_str(), O_RDONLY | O_CLOEXEC | O_DIRECT));
191   if (target_fd.get() == -1) {
192     struct statfs stbuf;
193     int saved_errno = errno;
194     // let's give another try with buffered I/O for EROFS and squashfs
195     if (statfs(target.c_str(), &stbuf) != 0 ||
196         (stbuf.f_type != EROFS_SUPER_MAGIC_V1 &&
197          stbuf.f_type != SQUASHFS_MAGIC &&
198          stbuf.f_type != OVERLAYFS_SUPER_MAGIC)) {
199       return Error(saved_errno) << "Failed to open " << target;
200     }
201     LOG(WARNING) << "Fallback to buffered I/O for " << target;
202     target_fd.reset(open(target.c_str(), O_RDONLY | O_CLOEXEC));
203     if (target_fd.get() == -1) {
204       return ErrnoError() << "Failed to open " << target;
205     }
206   }
207 
208   struct loop_info64 li;
209   memset(&li, 0, sizeof(li));
210   strlcpy((char*)li.lo_crypt_name, kApexLoopIdPrefix, LO_NAME_SIZE);
211   li.lo_offset = image_offset;
212   li.lo_sizelimit = image_size;
213   // Automatically free loop device on last close.
214   li.lo_flags |= LO_FLAGS_AUTOCLEAR;
215 
216   if (use_loop_configure) {
217     struct loop_config config;
218     memset(&config, 0, sizeof(config));
219     li.lo_flags |= LO_FLAGS_DIRECT_IO;
220     config.fd = target_fd.get();
221     config.info = li;
222     config.block_size = 4096;
223 
224     if (ioctl(device_fd, LOOP_CONFIGURE, &config) == -1) {
225       return ErrnoError() << "Failed to LOOP_CONFIGURE";
226     }
227 
228     return {};
229   } else {
230     if (ioctl(device_fd, LOOP_SET_FD, target_fd.get()) == -1) {
231       return ErrnoError() << "Failed to LOOP_SET_FD";
232     }
233 
234     if (ioctl(device_fd, LOOP_SET_STATUS64, &li) == -1) {
235       return ErrnoError() << "Failed to LOOP_SET_STATUS64";
236     }
237 
238     if (ioctl(device_fd, BLKFLSBUF, 0) == -1) {
239       // This works around a kernel bug where the following happens.
240       // 1) The device runs with a value of loop.max_part > 0
241       // 2) As part of LOOP_SET_FD above, we do a partition scan, which loads
242       //    the first 2 pages of the underlying file into the buffer cache
243       // 3) When we then change the offset with LOOP_SET_STATUS64, those pages
244       //    are not invalidated from the cache.
245       // 4) When we try to mount an ext4 filesystem on the loop device, the ext4
246       //    code will try to find a superblock by reading 4k at offset 0; but,
247       //    because we still have the old pages at offset 0 lying in the cache,
248       //    those pages will be returned directly. However, those pages contain
249       //    the data at offset 0 in the underlying file, not at the offset that
250       //    we configured
251       // 5) the ext4 driver fails to find a superblock in the (wrong) data, and
252       //    fails to mount the filesystem.
253       //
254       // To work around this, explicitly flush the block device, which will
255       // flush the buffer cache and make sure we actually read the data at the
256       // correct offset.
257       return ErrnoError() << "Failed to flush buffers on the loop device";
258     }
259 
260     // Direct-IO requires the loop device to have the same block size as the
261     // underlying filesystem.
262     if (ioctl(device_fd, LOOP_SET_BLOCK_SIZE, 4096) == -1) {
263       PLOG(WARNING) << "Failed to LOOP_SET_BLOCK_SIZE";
264     }
265   }
266   return {};
267 }
268 
WaitForDevice(int num)269 Result<LoopbackDeviceUniqueFd> WaitForDevice(int num) {
270   std::string opened_device;
271   const std::vector<std::string> candidate_devices = {
272       StringPrintf("/dev/block/loop%d", num),
273       StringPrintf("/dev/loop%d", num),
274   };
275 
276   // apexd-bootstrap runs in parallel with ueventd to optimize boot time. In
277   // rare cases apexd would try attempt to mount an apex before ueventd created
278   // a loop device for it. To work around this we keep polling for loop device
279   // to be created until ueventd's cold boot sequence is done.
280   // See comment on kLoopDeviceRetryAttempts.
281   unique_fd sysfs_fd;
282   bool cold_boot_done = GetBoolProperty("ro.cold_boot_done", false);
283   for (size_t i = 0; i != kLoopDeviceRetryAttempts; ++i) {
284     if (!cold_boot_done) {
285       cold_boot_done = GetBoolProperty("ro.cold_boot_done", false);
286     }
287     for (const auto& device : candidate_devices) {
288       sysfs_fd.reset(open(device.c_str(), O_RDWR | O_CLOEXEC));
289       if (sysfs_fd.get() != -1) {
290         return LoopbackDeviceUniqueFd(std::move(sysfs_fd), device);
291       }
292     }
293     PLOG(WARNING) << "Loopback device " << num << " not ready. Waiting 50ms...";
294     usleep(50000);
295     if (!cold_boot_done) {
296       // ueventd hasn't finished cold boot yet, keep trying.
297       i = 0;
298     }
299   }
300 
301   return Error() << "Faled to open loopback device " << num;
302 }
303 
CreateLoopDevice(const std::string & target,const int32_t image_offset,const size_t image_size)304 Result<LoopbackDeviceUniqueFd> CreateLoopDevice(const std::string& target,
305                                                 const int32_t image_offset,
306                                                 const size_t image_size) {
307   unique_fd ctl_fd(open("/dev/loop-control", O_RDWR | O_CLOEXEC));
308   if (ctl_fd.get() == -1) {
309     return ErrnoError() << "Failed to open loop-control";
310   }
311 
312   static std::mutex mlock;
313   std::lock_guard lock(mlock);
314   int num = ioctl(ctl_fd.get(), LOOP_CTL_GET_FREE);
315   if (num == -1) {
316     return ErrnoError() << "Failed LOOP_CTL_GET_FREE";
317   }
318 
319   Result<LoopbackDeviceUniqueFd> loop_device = WaitForDevice(num);
320   if (!loop_device.ok()) {
321     return loop_device.error();
322   }
323   CHECK_NE(loop_device->device_fd.get(), -1);
324 
325   Result<void> configureStatus = ConfigureLoopDevice(
326       loop_device->device_fd.get(), target, image_offset, image_size);
327   if (!configureStatus.ok()) {
328     return configureStatus.error();
329   }
330 
331   Result<void> read_ahead_status = ConfigureReadAhead(loop_device->name);
332   if (!read_ahead_status.ok()) {
333     return read_ahead_status.error();
334   }
335 
336   return loop_device;
337 }
338 
DestroyLoopDevice(const std::string & path,const DestroyLoopFn & extra)339 void DestroyLoopDevice(const std::string& path, const DestroyLoopFn& extra) {
340   unique_fd fd(open(path.c_str(), O_RDWR | O_CLOEXEC));
341   if (fd.get() == -1) {
342     if (errno != ENOENT) {
343       PLOG(WARNING) << "Failed to open " << path;
344     }
345     return;
346   }
347 
348   struct loop_info64 li;
349   if (ioctl(fd.get(), LOOP_GET_STATUS64, &li) < 0) {
350     if (errno != ENXIO) {
351       PLOG(WARNING) << "Failed to LOOP_GET_STATUS64 " << path;
352     }
353     return;
354   }
355 
356   auto id = std::string((char*)li.lo_crypt_name);
357   if (StartsWith(id, kApexLoopIdPrefix)) {
358     extra(path, id);
359 
360     if (ioctl(fd.get(), LOOP_CLR_FD, 0) < 0) {
361       PLOG(WARNING) << "Failed to LOOP_CLR_FD " << path;
362     }
363   }
364 }
365 
366 }  // namespace loop
367 }  // namespace apex
368 }  // namespace android
369