1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #define LOG_TAG "apexd" 18 19 #include "apexd_loop.h" 20 21 #include <mutex> 22 23 #include <dirent.h> 24 #include <fcntl.h> 25 #include <linux/fs.h> 26 #include <linux/loop.h> 27 #include <sys/ioctl.h> 28 #include <sys/stat.h> 29 #include <sys/statfs.h> 30 #include <sys/types.h> 31 #include <unistd.h> 32 33 #include <android-base/file.h> 34 #include <android-base/logging.h> 35 #include <android-base/parseint.h> 36 #include <android-base/properties.h> 37 #include <android-base/stringprintf.h> 38 #include <android-base/strings.h> 39 40 #include "apexd_utils.h" 41 #include "string_log.h" 42 43 using android::base::Basename; 44 using android::base::ErrnoError; 45 using android::base::Error; 46 using android::base::GetBoolProperty; 47 using android::base::ParseUint; 48 using android::base::Result; 49 using android::base::StartsWith; 50 using android::base::StringPrintf; 51 using android::base::unique_fd; 52 53 #ifndef LOOP_CONFIGURE 54 // These can be removed whenever we pull in the Linux v5.8 UAPI headers 55 struct loop_config { 56 __u32 fd; 57 __u32 block_size; 58 struct loop_info64 info; 59 __u64 __reserved[8]; 60 }; 61 #define LOOP_CONFIGURE 0x4C0A 62 #endif 63 64 namespace android { 65 namespace apex { 66 namespace loop { 67 68 static constexpr const char* kApexLoopIdPrefix = "apex:"; 69 70 // 128 kB read-ahead, which we currently use for /system as well 71 static constexpr const char* kReadAheadKb = "128"; 72 73 // TODO(b/122059364): Even though the kernel has created the loop 74 // device, we still depend on ueventd to run to actually create the 75 // device node in userspace. To solve this properly we should listen on 76 // the netlink socket for uevents, or use inotify. For now, this will 77 // have to do. 78 static constexpr size_t kLoopDeviceRetryAttempts = 3u; 79 80 void LoopbackDeviceUniqueFd::MaybeCloseBad() { 81 if (device_fd.get() != -1) { 82 // Disassociate any files. 83 if (ioctl(device_fd.get(), LOOP_CLR_FD) == -1) { 84 PLOG(ERROR) << "Unable to clear fd for loopback device"; 85 } 86 } 87 } 88 89 Result<void> ConfigureReadAhead(const std::string& device_path) { 90 CHECK(StartsWith(device_path, "/dev/")); 91 std::string device_name = Basename(device_path); 92 93 std::string sysfs_device = 94 StringPrintf("/sys/block/%s/queue/read_ahead_kb", device_name.c_str()); 95 unique_fd sysfs_fd(open(sysfs_device.c_str(), O_RDWR | O_CLOEXEC)); 96 if (sysfs_fd.get() == -1) { 97 return ErrnoError() << "Failed to open " << sysfs_device; 98 } 99 100 int ret = TEMP_FAILURE_RETRY( 101 write(sysfs_fd.get(), kReadAheadKb, strlen(kReadAheadKb) + 1)); 102 if (ret < 0) { 103 return ErrnoError() << "Failed to write to " << sysfs_device; 104 } 105 106 return {}; 107 } 108 109 Result<void> PreAllocateLoopDevices(size_t num) { 110 Result<void> loop_ready = WaitForFile("/dev/loop-control", 20s); 111 if (!loop_ready.ok()) { 112 return loop_ready; 113 } 114 unique_fd ctl_fd( 115 TEMP_FAILURE_RETRY(open("/dev/loop-control", O_RDWR | O_CLOEXEC))); 116 if (ctl_fd.get() == -1) { 117 return ErrnoError() << "Failed to open loop-control"; 118 } 119 120 bool found = false; 121 size_t start_id = 0; 122 constexpr const char* kLoopPrefix = "loop"; 123 WalkDir("/dev/block", [&](const std::filesystem::directory_entry& entry) { 124 std::string devname = entry.path().filename().string(); 125 if (StartsWith(devname, kLoopPrefix)) { 126 size_t id; 127 auto parse_ok = ParseUint( 128 devname.substr(std::char_traits<char>::length(kLoopPrefix)), &id); 129 if (parse_ok && id > start_id) { 130 start_id = id; 131 found = true; 132 } 133 } 134 }); 135 if (found) ++start_id; 136 137 // Assumption: loop device ID [0..num) is valid. 138 // This is because pre-allocation happens during bootstrap. 139 // Anyway Kernel pre-allocated loop devices 140 // as many as CONFIG_BLK_DEV_LOOP_MIN_COUNT, 141 // Within the amount of kernel-pre-allocation, 142 // LOOP_CTL_ADD will fail with EEXIST 143 for (size_t id = start_id; id < num + start_id; ++id) { 144 int ret = ioctl(ctl_fd.get(), LOOP_CTL_ADD, id); 145 if (ret < 0 && errno != EEXIST) { 146 return ErrnoError() << "Failed LOOP_CTL_ADD"; 147 } 148 } 149 150 // Don't wait until the dev nodes are actually created, which 151 // will delay the boot. By simply returing here, the creation of the dev 152 // nodes will be done in parallel with other boot processes, and we 153 // just optimistally hope that they are all created when we actually 154 // access them for activating APEXes. If the dev nodes are not ready 155 // even then, we wait 50ms and warning message will be printed (see below 156 // CreateLoopDevice()). 157 LOG(INFO) << "Pre-allocated " << num << " loopback devices"; 158 return {}; 159 } 160 161 Result<void> ConfigureLoopDevice(const int device_fd, const std::string& target, 162 const int32_t image_offset, 163 const size_t image_size) { 164 static bool use_loop_configure; 165 static std::once_flag once_flag; 166 std::call_once(once_flag, [&]() { 167 // LOOP_CONFIGURE is a new ioctl in Linux 5.8 (and backported in Android 168 // common) that allows atomically configuring a loop device. It is a lot 169 // faster than the traditional LOOP_SET_FD/LOOP_SET_STATUS64 combo, but 170 // it may not be available on updating devices, so try once before 171 // deciding. 172 struct loop_config config; 173 memset(&config, 0, sizeof(config)); 174 config.fd = -1; 175 if (ioctl(device_fd, LOOP_CONFIGURE, &config) == -1 && errno == EBADF) { 176 // If the IOCTL exists, it will fail with EBADF for the -1 fd 177 use_loop_configure = true; 178 } 179 }); 180 181 /* 182 * Using O_DIRECT will tell the kernel that we want to use Direct I/O 183 * on the underlying file, which we want to do to avoid double caching. 184 * Note that Direct I/O won't be enabled immediately, because the block 185 * size of the underlying block device may not match the default loop 186 * device block size (512); when we call LOOP_SET_BLOCK_SIZE below, the 187 * kernel driver will automatically enable Direct I/O when it sees that 188 * condition is now met. 189 */ 190 unique_fd target_fd(open(target.c_str(), O_RDONLY | O_CLOEXEC | O_DIRECT)); 191 if (target_fd.get() == -1) { 192 struct statfs stbuf; 193 int saved_errno = errno; 194 // let's give another try with buffered I/O for EROFS and squashfs 195 if (statfs(target.c_str(), &stbuf) != 0 || 196 (stbuf.f_type != EROFS_SUPER_MAGIC_V1 && 197 stbuf.f_type != SQUASHFS_MAGIC && 198 stbuf.f_type != OVERLAYFS_SUPER_MAGIC)) { 199 return Error(saved_errno) << "Failed to open " << target; 200 } 201 LOG(WARNING) << "Fallback to buffered I/O for " << target; 202 target_fd.reset(open(target.c_str(), O_RDONLY | O_CLOEXEC)); 203 if (target_fd.get() == -1) { 204 return ErrnoError() << "Failed to open " << target; 205 } 206 } 207 208 struct loop_info64 li; 209 memset(&li, 0, sizeof(li)); 210 strlcpy((char*)li.lo_crypt_name, kApexLoopIdPrefix, LO_NAME_SIZE); 211 li.lo_offset = image_offset; 212 li.lo_sizelimit = image_size; 213 // Automatically free loop device on last close. 214 li.lo_flags |= LO_FLAGS_AUTOCLEAR; 215 216 if (use_loop_configure) { 217 struct loop_config config; 218 memset(&config, 0, sizeof(config)); 219 li.lo_flags |= LO_FLAGS_DIRECT_IO; 220 config.fd = target_fd.get(); 221 config.info = li; 222 config.block_size = 4096; 223 224 if (ioctl(device_fd, LOOP_CONFIGURE, &config) == -1) { 225 return ErrnoError() << "Failed to LOOP_CONFIGURE"; 226 } 227 228 return {}; 229 } else { 230 if (ioctl(device_fd, LOOP_SET_FD, target_fd.get()) == -1) { 231 return ErrnoError() << "Failed to LOOP_SET_FD"; 232 } 233 234 if (ioctl(device_fd, LOOP_SET_STATUS64, &li) == -1) { 235 return ErrnoError() << "Failed to LOOP_SET_STATUS64"; 236 } 237 238 if (ioctl(device_fd, BLKFLSBUF, 0) == -1) { 239 // This works around a kernel bug where the following happens. 240 // 1) The device runs with a value of loop.max_part > 0 241 // 2) As part of LOOP_SET_FD above, we do a partition scan, which loads 242 // the first 2 pages of the underlying file into the buffer cache 243 // 3) When we then change the offset with LOOP_SET_STATUS64, those pages 244 // are not invalidated from the cache. 245 // 4) When we try to mount an ext4 filesystem on the loop device, the ext4 246 // code will try to find a superblock by reading 4k at offset 0; but, 247 // because we still have the old pages at offset 0 lying in the cache, 248 // those pages will be returned directly. However, those pages contain 249 // the data at offset 0 in the underlying file, not at the offset that 250 // we configured 251 // 5) the ext4 driver fails to find a superblock in the (wrong) data, and 252 // fails to mount the filesystem. 253 // 254 // To work around this, explicitly flush the block device, which will 255 // flush the buffer cache and make sure we actually read the data at the 256 // correct offset. 257 return ErrnoError() << "Failed to flush buffers on the loop device"; 258 } 259 260 // Direct-IO requires the loop device to have the same block size as the 261 // underlying filesystem. 262 if (ioctl(device_fd, LOOP_SET_BLOCK_SIZE, 4096) == -1) { 263 PLOG(WARNING) << "Failed to LOOP_SET_BLOCK_SIZE"; 264 } 265 } 266 return {}; 267 } 268 269 Result<LoopbackDeviceUniqueFd> WaitForDevice(int num) { 270 std::string opened_device; 271 const std::vector<std::string> candidate_devices = { 272 StringPrintf("/dev/block/loop%d", num), 273 StringPrintf("/dev/loop%d", num), 274 }; 275 276 // apexd-bootstrap runs in parallel with ueventd to optimize boot time. In 277 // rare cases apexd would try attempt to mount an apex before ueventd created 278 // a loop device for it. To work around this we keep polling for loop device 279 // to be created until ueventd's cold boot sequence is done. 280 // See comment on kLoopDeviceRetryAttempts. 281 unique_fd sysfs_fd; 282 bool cold_boot_done = GetBoolProperty("ro.cold_boot_done", false); 283 for (size_t i = 0; i != kLoopDeviceRetryAttempts; ++i) { 284 if (!cold_boot_done) { 285 cold_boot_done = GetBoolProperty("ro.cold_boot_done", false); 286 } 287 for (const auto& device : candidate_devices) { 288 sysfs_fd.reset(open(device.c_str(), O_RDWR | O_CLOEXEC)); 289 if (sysfs_fd.get() != -1) { 290 return LoopbackDeviceUniqueFd(std::move(sysfs_fd), device); 291 } 292 } 293 PLOG(WARNING) << "Loopback device " << num << " not ready. Waiting 50ms..."; 294 usleep(50000); 295 if (!cold_boot_done) { 296 // ueventd hasn't finished cold boot yet, keep trying. 297 i = 0; 298 } 299 } 300 301 return Error() << "Faled to open loopback device " << num; 302 } 303 304 Result<LoopbackDeviceUniqueFd> CreateLoopDevice(const std::string& target, 305 const int32_t image_offset, 306 const size_t image_size) { 307 unique_fd ctl_fd(open("/dev/loop-control", O_RDWR | O_CLOEXEC)); 308 if (ctl_fd.get() == -1) { 309 return ErrnoError() << "Failed to open loop-control"; 310 } 311 312 static std::mutex mlock; 313 std::lock_guard lock(mlock); 314 int num = ioctl(ctl_fd.get(), LOOP_CTL_GET_FREE); 315 if (num == -1) { 316 return ErrnoError() << "Failed LOOP_CTL_GET_FREE"; 317 } 318 319 Result<LoopbackDeviceUniqueFd> loop_device = WaitForDevice(num); 320 if (!loop_device.ok()) { 321 return loop_device.error(); 322 } 323 CHECK_NE(loop_device->device_fd.get(), -1); 324 325 Result<void> configureStatus = ConfigureLoopDevice( 326 loop_device->device_fd.get(), target, image_offset, image_size); 327 if (!configureStatus.ok()) { 328 return configureStatus.error(); 329 } 330 331 Result<void> read_ahead_status = ConfigureReadAhead(loop_device->name); 332 if (!read_ahead_status.ok()) { 333 return read_ahead_status.error(); 334 } 335 336 return loop_device; 337 } 338 339 void DestroyLoopDevice(const std::string& path, const DestroyLoopFn& extra) { 340 unique_fd fd(open(path.c_str(), O_RDWR | O_CLOEXEC)); 341 if (fd.get() == -1) { 342 if (errno != ENOENT) { 343 PLOG(WARNING) << "Failed to open " << path; 344 } 345 return; 346 } 347 348 struct loop_info64 li; 349 if (ioctl(fd.get(), LOOP_GET_STATUS64, &li) < 0) { 350 if (errno != ENXIO) { 351 PLOG(WARNING) << "Failed to LOOP_GET_STATUS64 " << path; 352 } 353 return; 354 } 355 356 auto id = std::string((char*)li.lo_crypt_name); 357 if (StartsWith(id, kApexLoopIdPrefix)) { 358 extra(path, id); 359 360 if (ioctl(fd.get(), LOOP_CLR_FD, 0) < 0) { 361 PLOG(WARNING) << "Failed to LOOP_CLR_FD " << path; 362 } 363 } 364 } 365 366 } // namespace loop 367 } // namespace apex 368 } // namespace android 369