1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 use anyhow::{anyhow, bail, Result};
17 use std::collections::HashMap;
18 use std::ffi::{CStr, CString};
19 use std::io;
20 use std::os::unix::ffi::OsStrExt;
21 
22 /// `InodeTable` is a table of `InodeData` indexed by `Inode`.
23 #[derive(Debug)]
24 pub struct InodeTable {
25     table: Vec<InodeData>,
26 }
27 
28 /// `Inode` is the handle (or index in the table) to `InodeData` which represents an inode.
29 pub type Inode = u64;
30 
31 const INVALID: Inode = 0;
32 const ROOT: Inode = 1;
33 
34 /// `InodeData` represents an inode which has metadata about a file or a directory
35 #[derive(Debug)]
36 pub struct InodeData {
37     /// Size of the file that this inode represents. In case when the file is a directory, this
38     // is zero.
39     pub size: u64,
40     /// unix mode of this inode. It may not have `S_IFDIR` and `S_IFREG` in case the original zip
41     /// doesn't have the information in the external_attributes fields. To test if this inode
42     /// is for a regular file or a directory, use `is_dir`.
43     pub mode: u32,
44     data: InodeDataData,
45 }
46 
47 type ZipIndex = usize;
48 
49 /// `InodeDataData` is the actual data (or a means to access the data) of the file or the directory
50 /// that an inode is representing. In case of a directory, this data is the hash table of the
51 /// directory entries. In case of a file, this data is the index of the file in `ZipArchive` which
52 /// can be used to retrieve `ZipFile` that provides access to the content of the file.
53 #[derive(Debug)]
54 enum InodeDataData {
55     Directory(HashMap<CString, DirectoryEntry>),
56     File(ZipIndex),
57 }
58 
59 #[derive(Debug, Clone)]
60 pub struct DirectoryEntry {
61     pub inode: Inode,
62     pub kind: InodeKind,
63 }
64 
65 #[derive(Debug, Clone, PartialEq, Copy)]
66 pub enum InodeKind {
67     Directory,
68     File,
69 }
70 
71 impl InodeData {
72     pub fn is_dir(&self) -> bool {
73         matches!(&self.data, InodeDataData::Directory(_))
74     }
75 
76     pub fn get_directory(&self) -> Option<&HashMap<CString, DirectoryEntry>> {
77         match &self.data {
78             InodeDataData::Directory(hash) => Some(hash),
79             _ => None,
80         }
81     }
82 
83     pub fn get_zip_index(&self) -> Option<ZipIndex> {
84         match &self.data {
85             InodeDataData::File(zip_index) => Some(*zip_index),
86             _ => None,
87         }
88     }
89 
90     // Below methods are used to construct the inode table when initializing the filesystem. Once
91     // the initialization is done, these are not used because this is a read-only filesystem.
92 
93     fn new_dir(mode: u32) -> InodeData {
94         InodeData { mode, size: 0, data: InodeDataData::Directory(HashMap::new()) }
95     }
96 
97     fn new_file(zip_index: ZipIndex, zip_file: &zip::read::ZipFile) -> InodeData {
98         InodeData {
99             mode: zip_file.unix_mode().unwrap_or(0),
100             size: zip_file.size(),
101             data: InodeDataData::File(zip_index),
102         }
103     }
104 
105     fn add_to_directory(&mut self, name: CString, entry: DirectoryEntry) {
106         match &mut self.data {
107             InodeDataData::Directory(hashtable) => {
108                 let existing = hashtable.insert(name, entry);
109                 assert!(existing.is_none());
110             }
111             _ => {
112                 panic!("can't add a directory entry to a file inode");
113             }
114         }
115     }
116 }
117 
118 impl InodeTable {
119     /// Gets `InodeData` at a specific index.
120     pub fn get(&self, inode: Inode) -> Option<&InodeData> {
121         match inode {
122             INVALID => None,
123             _ => self.table.get(inode as usize),
124         }
125     }
126 
127     fn get_mut(&mut self, inode: Inode) -> Option<&mut InodeData> {
128         match inode {
129             INVALID => None,
130             _ => self.table.get_mut(inode as usize),
131         }
132     }
133 
134     fn put(&mut self, data: InodeData) -> Inode {
135         let inode = self.table.len() as Inode;
136         self.table.push(data);
137         inode
138     }
139 
140     /// Finds the inode number of a file named `name` in the `parent` inode. The `parent` inode
141     /// must exist and be a directory.
142     fn find(&self, parent: Inode, name: &CStr) -> Option<Inode> {
143         let data = self.get(parent).unwrap();
144         match data.get_directory().unwrap().get(name) {
145             Some(DirectoryEntry { inode, .. }) => Some(*inode),
146             _ => None,
147         }
148     }
149 
150     // Adds the inode `data` to the inode table and also links it to the `parent` inode as a file
151     // named `name`. The `parent` inode must exist and be a directory.
152     fn add(&mut self, parent: Inode, name: CString, data: InodeData) -> Inode {
153         assert!(self.find(parent, &name).is_none());
154 
155         let kind = if data.is_dir() { InodeKind::Directory } else { InodeKind::File };
156         // Add the inode to the table
157         let inode = self.put(data);
158 
159         // ... and then register it to the directory of the parent inode
160         self.get_mut(parent).unwrap().add_to_directory(name, DirectoryEntry { inode, kind });
161         inode
162     }
163 
164     /// Constructs `InodeTable` from a zip archive `archive`.
165     pub fn from_zip<R: io::Read + io::Seek>(
166         archive: &mut zip::ZipArchive<R>,
167     ) -> Result<InodeTable> {
168         let mut table = InodeTable { table: Vec::new() };
169 
170         // Add the inodes for the invalid and the root directory
171         assert_eq!(INVALID, table.put(InodeData::new_dir(0)));
172         assert_eq!(ROOT, table.put(InodeData::new_dir(0)));
173 
174         // For each zip file in the archive, create an inode and add it to the table. If the file's
175         // parent directories don't have corresponding inodes in the table, handle them too.
176         for i in 0..archive.len() {
177             let file = archive.by_index(i)?;
178             let path = file
179                 .enclosed_name()
180                 .ok_or_else(|| anyhow!("{} is an invalid name", file.name()))?;
181             // TODO(jiyong): normalize this (e.g. a/b/c/../d -> a/b/d). We can't use
182             // fs::canonicalize as this is a non-existing path yet.
183 
184             let mut parent = ROOT;
185             let mut iter = path.iter().peekable();
186             while let Some(name) = iter.next() {
187                 // TODO(jiyong): remove this check by canonicalizing `path`
188                 if name == ".." {
189                     bail!(".. is not allowed");
190                 }
191 
192                 let is_leaf = iter.peek().is_none();
193                 let is_file = file.is_file() && is_leaf;
194 
195                 // The happy path; the inode for `name` is already in the `parent` inode. Move on
196                 // to the next path element.
197                 let name = CString::new(name.as_bytes()).unwrap();
198                 if let Some(found) = table.find(parent, &name) {
199                     parent = found;
200                     // Update the mode if this is a directory leaf.
201                     if !is_file && is_leaf {
202                         let mut inode = table.get_mut(parent).unwrap();
203                         inode.mode = file.unix_mode().unwrap_or(0);
204                     }
205                     continue;
206                 }
207 
208                 const DEFAULT_DIR_MODE: u32 = libc::S_IRUSR | libc::S_IXUSR;
209 
210                 // No inode found. Create a new inode and add it to the inode table.
211                 let inode = if is_file {
212                     InodeData::new_file(i, &file)
213                 } else if is_leaf {
214                     InodeData::new_dir(file.unix_mode().unwrap_or(DEFAULT_DIR_MODE))
215                 } else {
216                     InodeData::new_dir(DEFAULT_DIR_MODE)
217                 };
218                 let new = table.add(parent, name, inode);
219                 parent = new;
220             }
221         }
222         Ok(table)
223     }
224 }
225 
226 #[cfg(test)]
227 mod tests {
228     use crate::inode::*;
229     use std::io::{Cursor, Write};
230     use zip::write::FileOptions;
231 
232     // Creates an in-memory zip buffer, adds some files to it, and converts it to InodeTable
233     fn setup(add: fn(&mut zip::ZipWriter<&mut std::io::Cursor<Vec<u8>>>)) -> InodeTable {
234         let mut buf: Cursor<Vec<u8>> = Cursor::new(Vec::new());
235         let mut writer = zip::ZipWriter::new(&mut buf);
236         add(&mut writer);
237         assert!(writer.finish().is_ok());
238         drop(writer);
239 
240         let zip = zip::ZipArchive::new(buf);
241         assert!(zip.is_ok());
242         let it = InodeTable::from_zip(&mut zip.unwrap());
243         assert!(it.is_ok());
244         it.unwrap()
245     }
246 
247     fn check_dir(it: &InodeTable, parent: Inode, name: &str) -> Inode {
248         let name = CString::new(name.as_bytes()).unwrap();
249         let inode = it.find(parent, &name);
250         assert!(inode.is_some());
251         let inode = inode.unwrap();
252         let inode_data = it.get(inode);
253         assert!(inode_data.is_some());
254         let inode_data = inode_data.unwrap();
255         assert_eq!(0, inode_data.size);
256         assert!(inode_data.is_dir());
257         inode
258     }
259 
260     fn check_file<'a>(it: &'a InodeTable, parent: Inode, name: &str) -> &'a InodeData {
261         let name = CString::new(name.as_bytes()).unwrap();
262         let inode = it.find(parent, &name);
263         assert!(inode.is_some());
264         let inode = inode.unwrap();
265         let inode_data = it.get(inode);
266         assert!(inode_data.is_some());
267         let inode_data = inode_data.unwrap();
268         assert!(!inode_data.is_dir());
269         inode_data
270     }
271 
272     #[test]
273     fn empty_zip_has_two_inodes() {
274         let it = setup(|_| {});
275         assert_eq!(2, it.table.len());
276         assert!(it.get(INVALID).is_none());
277         assert!(it.get(ROOT).is_some());
278     }
279 
280     #[test]
281     fn one_file() {
282         let it = setup(|zip| {
283             zip.start_file("foo", FileOptions::default()).unwrap();
284             zip.write_all(b"0123456789").unwrap();
285         });
286         let inode_data = check_file(&it, ROOT, "foo");
287         assert_eq!(b"0123456789".len() as u64, inode_data.size);
288     }
289 
290     #[test]
291     fn one_dir() {
292         let it = setup(|zip| {
293             zip.add_directory("foo", FileOptions::default()).unwrap();
294         });
295         let inode = check_dir(&it, ROOT, "foo");
296         // The directory doesn't have any entries
297         assert_eq!(0, it.get(inode).unwrap().get_directory().unwrap().len());
298     }
299 
300     #[test]
301     fn one_file_in_subdirs() {
302         let it = setup(|zip| {
303             zip.start_file("a/b/c/d", FileOptions::default()).unwrap();
304             zip.write_all(b"0123456789").unwrap();
305         });
306 
307         assert_eq!(6, it.table.len());
308         let a = check_dir(&it, ROOT, "a");
309         let b = check_dir(&it, a, "b");
310         let c = check_dir(&it, b, "c");
311         let d = check_file(&it, c, "d");
312         assert_eq!(10, d.size);
313     }
314 
315     #[test]
316     fn complex_hierarchy() {
317         // root/
318         //   a/
319         //    b1/
320         //    b2/
321         //      c1 (file)
322         //      c2/
323         //          d1 (file)
324         //          d2 (file)
325         //          d3 (file)
326         //  x/
327         //    y1 (file)
328         //    y2 (file)
329         //    y3/
330         //
331         //  foo (file)
332         //  bar (file)
333         let it = setup(|zip| {
334             let opt = FileOptions::default();
335             zip.add_directory("a/b1", opt).unwrap();
336 
337             zip.start_file("a/b2/c1", opt).unwrap();
338 
339             zip.start_file("a/b2/c2/d1", opt).unwrap();
340             zip.start_file("a/b2/c2/d2", opt).unwrap();
341             zip.start_file("a/b2/c2/d3", opt).unwrap();
342 
343             zip.start_file("x/y1", opt).unwrap();
344             zip.start_file("x/y2", opt).unwrap();
345             zip.add_directory("x/y3", opt).unwrap();
346 
347             zip.start_file("foo", opt).unwrap();
348             zip.start_file("bar", opt).unwrap();
349         });
350 
351         assert_eq!(16, it.table.len()); // 8 files, 6 dirs, and 2 (for root and the invalid inode)
352         let a = check_dir(&it, ROOT, "a");
353         let _b1 = check_dir(&it, a, "b1");
354         let b2 = check_dir(&it, a, "b2");
355         let _c1 = check_file(&it, b2, "c1");
356 
357         let c2 = check_dir(&it, b2, "c2");
358         let _d1 = check_file(&it, c2, "d1");
359         let _d2 = check_file(&it, c2, "d3");
360         let _d3 = check_file(&it, c2, "d3");
361 
362         let x = check_dir(&it, ROOT, "x");
363         let _y1 = check_file(&it, x, "y1");
364         let _y2 = check_file(&it, x, "y2");
365         let _y3 = check_dir(&it, x, "y3");
366 
367         let _foo = check_file(&it, ROOT, "foo");
368         let _bar = check_file(&it, ROOT, "bar");
369     }
370 
371     #[test]
372     fn file_size() {
373         let it = setup(|zip| {
374             let opt = FileOptions::default();
375             zip.start_file("empty", opt).unwrap();
376 
377             zip.start_file("10bytes", opt).unwrap();
378             zip.write_all(&[0; 10]).unwrap();
379 
380             zip.start_file("1234bytes", opt).unwrap();
381             zip.write_all(&[0; 1234]).unwrap();
382 
383             zip.start_file("2^20bytes", opt).unwrap();
384             zip.write_all(&[0; 2 << 20]).unwrap();
385         });
386 
387         let f = check_file(&it, ROOT, "empty");
388         assert_eq!(0, f.size);
389 
390         let f = check_file(&it, ROOT, "10bytes");
391         assert_eq!(10, f.size);
392 
393         let f = check_file(&it, ROOT, "1234bytes");
394         assert_eq!(1234, f.size);
395 
396         let f = check_file(&it, ROOT, "2^20bytes");
397         assert_eq!(2 << 20, f.size);
398     }
399 
400     #[test]
401     fn rejects_invalid_paths() {
402         let invalid_paths = [
403             "a/../../b",   // escapes the root
404             "a/..",        // escapes the root
405             "a/../../b/c", // escape the root
406             "a/b/../c",    // doesn't escape the root, but not normalized
407         ];
408         for path in invalid_paths.iter() {
409             let mut buf: Cursor<Vec<u8>> = Cursor::new(Vec::new());
410             let mut writer = zip::ZipWriter::new(&mut buf);
411             writer.start_file(*path, FileOptions::default()).unwrap();
412             assert!(writer.finish().is_ok());
413             drop(writer);
414 
415             let zip = zip::ZipArchive::new(buf);
416             assert!(zip.is_ok());
417             let it = InodeTable::from_zip(&mut zip.unwrap());
418             assert!(it.is_err());
419         }
420     }
421 }
422