1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 use anyhow::Result;
18 use log::{debug, warn};
19 use std::collections::BTreeMap;
20 use std::convert::TryFrom;
21 use std::ffi::CStr;
22 use std::fs::OpenOptions;
23 use std::io;
24 use std::mem::MaybeUninit;
25 use std::option::Option;
26 use std::os::unix::io::AsRawFd;
27 use std::path::Path;
28 use std::time::Duration;
29 
30 use fuse::filesystem::{
31     Context, DirEntry, DirectoryIterator, Entry, FileSystem, FsOptions, SetattrValid,
32     ZeroCopyReader, ZeroCopyWriter,
33 };
34 use fuse::mount::MountOption;
35 
36 use crate::common::{divide_roundup, ChunkedSizeIter, CHUNK_SIZE};
37 use crate::file::{
38     LocalFileReader, RandomWrite, ReadByChunk, RemoteFileEditor, RemoteFileReader,
39     RemoteMerkleTreeReader,
40 };
41 use crate::fsverity::{VerifiedFileEditor, VerifiedFileReader};
42 
43 const DEFAULT_METADATA_TIMEOUT: std::time::Duration = Duration::from_secs(5);
44 
45 pub type Inode = u64;
46 type Handle = u64;
47 
48 /// `FileConfig` defines the file type supported by AuthFS.
49 pub enum FileConfig {
50     /// A file type that is verified against fs-verity signature (thus read-only). The file is
51     /// backed by a local file. Debug only.
52     LocalVerifiedReadonlyFile {
53         reader: VerifiedFileReader<LocalFileReader, LocalFileReader>,
54         file_size: u64,
55     },
56     /// A file type that is a read-only passthrough from a local file. Debug only.
57     LocalUnverifiedReadonlyFile { reader: LocalFileReader, file_size: u64 },
58     /// A file type that is verified against fs-verity signature (thus read-only). The file is
59     /// served from a remote server.
60     RemoteVerifiedReadonlyFile {
61         reader: VerifiedFileReader<RemoteFileReader, RemoteMerkleTreeReader>,
62         file_size: u64,
63     },
64     /// A file type that is a read-only passthrough from a file on a remote serrver.
65     RemoteUnverifiedReadonlyFile { reader: RemoteFileReader, file_size: u64 },
66     /// A file type that is initially empty, and the content is stored on a remote server. File
67     /// integrity is guaranteed with private Merkle tree.
68     RemoteVerifiedNewFile { editor: VerifiedFileEditor<RemoteFileEditor> },
69 }
70 
71 struct AuthFs {
72     /// Store `FileConfig`s using the `Inode` number as the search index.
73     ///
74     /// For further optimization to minimize the search cost, since Inode is integer, we may
75     /// consider storing them in a Vec if we can guarantee that the numbers are small and
76     /// consecutive.
77     file_pool: BTreeMap<Inode, FileConfig>,
78 
79     /// Maximum bytes in the write transaction to the FUSE device. This limits the maximum size to
80     /// a read request (including FUSE protocol overhead).
81     max_write: u32,
82 }
83 
84 impl AuthFs {
85     pub fn new(file_pool: BTreeMap<Inode, FileConfig>, max_write: u32) -> AuthFs {
86         AuthFs { file_pool, max_write }
87     }
88 
89     fn get_file_config(&self, inode: &Inode) -> io::Result<&FileConfig> {
90         self.file_pool.get(&inode).ok_or_else(|| io::Error::from_raw_os_error(libc::ENOENT))
91     }
92 }
93 
94 fn check_access_mode(flags: u32, mode: libc::c_int) -> io::Result<()> {
95     if (flags & libc::O_ACCMODE as u32) == mode as u32 {
96         Ok(())
97     } else {
98         Err(io::Error::from_raw_os_error(libc::EACCES))
99     }
100 }
101 
102 cfg_if::cfg_if! {
103     if #[cfg(all(target_arch = "aarch64", target_pointer_width = "64"))] {
104         fn blk_size() -> libc::c_int { CHUNK_SIZE as libc::c_int }
105     } else {
106         fn blk_size() -> libc::c_long { CHUNK_SIZE as libc::c_long }
107     }
108 }
109 
110 enum FileMode {
111     ReadOnly,
112     ReadWrite,
113 }
114 
115 fn create_stat(ino: libc::ino_t, file_size: u64, file_mode: FileMode) -> io::Result<libc::stat64> {
116     let mut st = unsafe { MaybeUninit::<libc::stat64>::zeroed().assume_init() };
117 
118     st.st_ino = ino;
119     st.st_mode = match file_mode {
120         // Until needed, let's just grant the owner access.
121         FileMode::ReadOnly => libc::S_IFREG | libc::S_IRUSR,
122         FileMode::ReadWrite => libc::S_IFREG | libc::S_IRUSR | libc::S_IWUSR,
123     };
124     st.st_dev = 0;
125     st.st_nlink = 1;
126     st.st_uid = 0;
127     st.st_gid = 0;
128     st.st_rdev = 0;
129     st.st_size = libc::off64_t::try_from(file_size)
130         .map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
131     st.st_blksize = blk_size();
132     // Per man stat(2), st_blocks is "Number of 512B blocks allocated".
133     st.st_blocks = libc::c_longlong::try_from(divide_roundup(file_size, 512))
134         .map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
135     Ok(st)
136 }
137 
138 fn offset_to_chunk_index(offset: u64) -> u64 {
139     offset / CHUNK_SIZE
140 }
141 
142 fn read_chunks<W: io::Write, T: ReadByChunk>(
143     mut w: W,
144     file: &T,
145     file_size: u64,
146     offset: u64,
147     size: u32,
148 ) -> io::Result<usize> {
149     let remaining = file_size.saturating_sub(offset);
150     let size_to_read = std::cmp::min(size as usize, remaining as usize);
151     let total = ChunkedSizeIter::new(size_to_read, offset, CHUNK_SIZE as usize).try_fold(
152         0,
153         |total, (current_offset, planned_data_size)| {
154             // TODO(victorhsieh): There might be a non-trivial way to avoid this copy. For example,
155             // instead of accepting a buffer, the writer could expose the final destination buffer
156             // for the reader to write to. It might not be generally applicable though, e.g. with
157             // virtio transport, the buffer may not be continuous.
158             let mut buf = [0u8; CHUNK_SIZE as usize];
159             let read_size = file.read_chunk(offset_to_chunk_index(current_offset), &mut buf)?;
160             if read_size < planned_data_size {
161                 return Err(io::Error::from_raw_os_error(libc::ENODATA));
162             }
163 
164             let begin = (current_offset % CHUNK_SIZE) as usize;
165             let end = begin + planned_data_size;
166             let s = w.write(&buf[begin..end])?;
167             if s != planned_data_size {
168                 return Err(io::Error::from_raw_os_error(libc::EIO));
169             }
170             Ok(total + s)
171         },
172     )?;
173 
174     Ok(total)
175 }
176 
177 // No need to support enumerating directory entries.
178 struct EmptyDirectoryIterator {}
179 
180 impl DirectoryIterator for EmptyDirectoryIterator {
181     fn next(&mut self) -> Option<DirEntry> {
182         None
183     }
184 }
185 
186 impl FileSystem for AuthFs {
187     type Inode = Inode;
188     type Handle = Handle;
189     type DirIter = EmptyDirectoryIterator;
190 
191     fn max_buffer_size(&self) -> u32 {
192         self.max_write
193     }
194 
195     fn init(&self, _capable: FsOptions) -> io::Result<FsOptions> {
196         // Enable writeback cache for better performance especially since our bandwidth to the
197         // backend service is limited.
198         Ok(FsOptions::WRITEBACK_CACHE)
199     }
200 
201     fn lookup(&self, _ctx: Context, _parent: Inode, name: &CStr) -> io::Result<Entry> {
202         // Only accept file name that looks like an integrer. Files in the pool are simply exposed
203         // by their inode number. Also, there is currently no directory structure.
204         let num = name.to_str().map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?;
205         // Normally, `lookup` is required to increase a reference count for the inode (while
206         // `forget` will decrease it). It is not necessary here since the files are configured to
207         // be static.
208         let inode = num.parse::<Inode>().map_err(|_| io::Error::from_raw_os_error(libc::ENOENT))?;
209         let st = match self.get_file_config(&inode)? {
210             FileConfig::LocalVerifiedReadonlyFile { file_size, .. }
211             | FileConfig::LocalUnverifiedReadonlyFile { file_size, .. }
212             | FileConfig::RemoteUnverifiedReadonlyFile { file_size, .. }
213             | FileConfig::RemoteVerifiedReadonlyFile { file_size, .. } => {
214                 create_stat(inode, *file_size, FileMode::ReadOnly)?
215             }
216             FileConfig::RemoteVerifiedNewFile { editor } => {
217                 create_stat(inode, editor.size(), FileMode::ReadWrite)?
218             }
219         };
220         Ok(Entry {
221             inode,
222             generation: 0,
223             attr: st,
224             entry_timeout: DEFAULT_METADATA_TIMEOUT,
225             attr_timeout: DEFAULT_METADATA_TIMEOUT,
226         })
227     }
228 
229     fn getattr(
230         &self,
231         _ctx: Context,
232         inode: Inode,
233         _handle: Option<Handle>,
234     ) -> io::Result<(libc::stat64, Duration)> {
235         Ok((
236             match self.get_file_config(&inode)? {
237                 FileConfig::LocalVerifiedReadonlyFile { file_size, .. }
238                 | FileConfig::LocalUnverifiedReadonlyFile { file_size, .. }
239                 | FileConfig::RemoteUnverifiedReadonlyFile { file_size, .. }
240                 | FileConfig::RemoteVerifiedReadonlyFile { file_size, .. } => {
241                     create_stat(inode, *file_size, FileMode::ReadOnly)?
242                 }
243                 FileConfig::RemoteVerifiedNewFile { editor } => {
244                     create_stat(inode, editor.size(), FileMode::ReadWrite)?
245                 }
246             },
247             DEFAULT_METADATA_TIMEOUT,
248         ))
249     }
250 
251     fn open(
252         &self,
253         _ctx: Context,
254         inode: Self::Inode,
255         flags: u32,
256     ) -> io::Result<(Option<Self::Handle>, fuse::sys::OpenOptions)> {
257         // Since file handle is not really used in later operations (which use Inode directly),
258         // return None as the handle.
259         match self.get_file_config(&inode)? {
260             FileConfig::LocalVerifiedReadonlyFile { .. }
261             | FileConfig::LocalUnverifiedReadonlyFile { .. }
262             | FileConfig::RemoteVerifiedReadonlyFile { .. }
263             | FileConfig::RemoteUnverifiedReadonlyFile { .. } => {
264                 check_access_mode(flags, libc::O_RDONLY)?;
265             }
266             FileConfig::RemoteVerifiedNewFile { .. } => {
267                 // No need to check access modes since all the modes are allowed to the
268                 // read-writable file.
269             }
270         }
271         // Always cache the file content. There is currently no need to support direct I/O or avoid
272         // the cache buffer. Memory mapping is only possible with cache enabled.
273         Ok((None, fuse::sys::OpenOptions::KEEP_CACHE))
274     }
275 
276     fn read<W: io::Write + ZeroCopyWriter>(
277         &self,
278         _ctx: Context,
279         inode: Inode,
280         _handle: Handle,
281         w: W,
282         size: u32,
283         offset: u64,
284         _lock_owner: Option<u64>,
285         _flags: u32,
286     ) -> io::Result<usize> {
287         match self.get_file_config(&inode)? {
288             FileConfig::LocalVerifiedReadonlyFile { reader, file_size } => {
289                 read_chunks(w, reader, *file_size, offset, size)
290             }
291             FileConfig::LocalUnverifiedReadonlyFile { reader, file_size } => {
292                 read_chunks(w, reader, *file_size, offset, size)
293             }
294             FileConfig::RemoteVerifiedReadonlyFile { reader, file_size } => {
295                 read_chunks(w, reader, *file_size, offset, size)
296             }
297             FileConfig::RemoteUnverifiedReadonlyFile { reader, file_size } => {
298                 read_chunks(w, reader, *file_size, offset, size)
299             }
300             FileConfig::RemoteVerifiedNewFile { editor } => {
301                 // Note that with FsOptions::WRITEBACK_CACHE, it's possible for the kernel to
302                 // request a read even if the file is open with O_WRONLY.
303                 read_chunks(w, editor, editor.size(), offset, size)
304             }
305         }
306     }
307 
308     fn write<R: io::Read + ZeroCopyReader>(
309         &self,
310         _ctx: Context,
311         inode: Self::Inode,
312         _handle: Self::Handle,
313         mut r: R,
314         size: u32,
315         offset: u64,
316         _lock_owner: Option<u64>,
317         _delayed_write: bool,
318         _flags: u32,
319     ) -> io::Result<usize> {
320         match self.get_file_config(&inode)? {
321             FileConfig::RemoteVerifiedNewFile { editor } => {
322                 let mut buf = vec![0; size as usize];
323                 r.read_exact(&mut buf)?;
324                 editor.write_at(&buf, offset)
325             }
326             _ => Err(io::Error::from_raw_os_error(libc::EBADF)),
327         }
328     }
329 
330     fn setattr(
331         &self,
332         _ctx: Context,
333         inode: Inode,
334         attr: libc::stat64,
335         _handle: Option<Handle>,
336         valid: SetattrValid,
337     ) -> io::Result<(libc::stat64, Duration)> {
338         match self.get_file_config(&inode)? {
339             FileConfig::RemoteVerifiedNewFile { editor } => {
340                 // Initialize the default stat.
341                 let mut new_attr = create_stat(inode, editor.size(), FileMode::ReadWrite)?;
342                 // `valid` indicates what fields in `attr` are valid. Update to return correctly.
343                 if valid.contains(SetattrValid::SIZE) {
344                     // st_size is i64, but the cast should be safe since kernel should not give a
345                     // negative size.
346                     debug_assert!(attr.st_size >= 0);
347                     new_attr.st_size = attr.st_size;
348                     editor.resize(attr.st_size as u64)?;
349                 }
350 
351                 if valid.contains(SetattrValid::MODE) {
352                     warn!("Changing st_mode is not currently supported");
353                     return Err(io::Error::from_raw_os_error(libc::ENOSYS));
354                 }
355                 if valid.contains(SetattrValid::UID) {
356                     warn!("Changing st_uid is not currently supported");
357                     return Err(io::Error::from_raw_os_error(libc::ENOSYS));
358                 }
359                 if valid.contains(SetattrValid::GID) {
360                     warn!("Changing st_gid is not currently supported");
361                     return Err(io::Error::from_raw_os_error(libc::ENOSYS));
362                 }
363                 if valid.contains(SetattrValid::CTIME) {
364                     debug!("Ignoring ctime change as authfs does not maintain timestamp currently");
365                 }
366                 if valid.intersects(SetattrValid::ATIME | SetattrValid::ATIME_NOW) {
367                     debug!("Ignoring atime change as authfs does not maintain timestamp currently");
368                 }
369                 if valid.intersects(SetattrValid::MTIME | SetattrValid::MTIME_NOW) {
370                     debug!("Ignoring mtime change as authfs does not maintain timestamp currently");
371                 }
372                 Ok((new_attr, DEFAULT_METADATA_TIMEOUT))
373             }
374             _ => Err(io::Error::from_raw_os_error(libc::EBADF)),
375         }
376     }
377 }
378 
379 /// Mount and start the FUSE instance. This requires CAP_SYS_ADMIN.
380 pub fn loop_forever(
381     file_pool: BTreeMap<Inode, FileConfig>,
382     mountpoint: &Path,
383 ) -> Result<(), fuse::Error> {
384     let max_read: u32 = 65536;
385     let max_write: u32 = 65536;
386     let dev_fuse = OpenOptions::new()
387         .read(true)
388         .write(true)
389         .open("/dev/fuse")
390         .expect("Failed to open /dev/fuse");
391 
392     fuse::mount(
393         mountpoint,
394         "authfs",
395         libc::MS_NOSUID | libc::MS_NODEV,
396         &[
397             MountOption::FD(dev_fuse.as_raw_fd()),
398             MountOption::RootMode(libc::S_IFDIR | libc::S_IXUSR | libc::S_IXGRP | libc::S_IXOTH),
399             MountOption::AllowOther,
400             MountOption::UserId(0),
401             MountOption::GroupId(0),
402             MountOption::MaxRead(max_read),
403         ],
404     )
405     .expect("Failed to mount fuse");
406 
407     fuse::worker::start_message_loop(
408         dev_fuse,
409         max_write,
410         max_read,
411         AuthFs::new(file_pool, max_write),
412     )
413 }
414