1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 use super::common::{build_fsverity_digest, merkle_tree_height, FsverityError};
18 use crate::common::{divide_roundup, CHUNK_SIZE};
19 use crate::crypto::{CryptoError, Sha256Hash, Sha256Hasher};
20 
21 const HASH_SIZE: usize = Sha256Hasher::HASH_SIZE;
22 const HASH_PER_PAGE: usize = CHUNK_SIZE as usize / HASH_SIZE;
23 
24 /// MerkleLeaves can be used by the class' customer for bookkeeping integrity data for their bytes.
25 /// It can also be used to generate the standard fs-verity digest for the source data.
26 ///
27 /// It's in-memory because for the initial use cases, we don't need to read back an existing file,
28 /// and only need to deal with new files. Also, considering that the output file won't be large at
29 /// the moment, it is sufficient to simply keep the Merkle tree in memory in the trusted world. To
30 /// further simplify the initial implementation, we only need to keep the leaf nodes in memory, and
31 /// generate the tree / root hash when requested.
32 pub struct MerkleLeaves {
33     leaves: Vec<Sha256Hash>,
34     file_size: u64,
35 }
36 
37 fn hash_all_pages(source: &[Sha256Hash]) -> Result<Vec<Sha256Hash>, CryptoError> {
38     source
39         .chunks(HASH_PER_PAGE)
40         .map(|chunk| {
41             let padding_bytes = (HASH_PER_PAGE - chunk.len()) * HASH_SIZE;
42             Sha256Hasher::new()?.update_from(chunk)?.update(&vec![0u8; padding_bytes])?.finalize()
43         })
44         .collect()
45 }
46 
47 impl MerkleLeaves {
48     /// Creates a `MerkleLeaves` instance with empty data.
49     pub fn new() -> Self {
50         Self { leaves: Vec::new(), file_size: 0 }
51     }
52 
53     /// Gets size of the file represented by `MerkleLeaves`.
54     pub fn file_size(&self) -> u64 {
55         self.file_size
56     }
57 
58     /// Grows the `MerkleLeaves` to the new file size. To keep the consistency, if any new leaves
59     /// are added, the leaves/hashes are as if the extended content is all zero.
60     ///
61     /// However, when the change shrinks the leaf number, `MerkleLeaves` does not know if the hash
62     /// of the last chunk has changed, or what the new value should be. As the result, it is up to
63     /// the caller to fix the last leaf if needed.
64     pub fn resize(&mut self, new_file_size: usize) {
65         let new_file_size = new_file_size as u64;
66         let leaves_size = divide_roundup(new_file_size, CHUNK_SIZE);
67         self.leaves.resize(leaves_size as usize, Sha256Hasher::HASH_OF_4096_ZEROS);
68         self.file_size = new_file_size;
69     }
70 
71     /// Updates the hash of the `index`-th leaf, and increase the size to `size_at_least` if the
72     /// current size is smaller.
73     pub fn update_hash(&mut self, index: usize, hash: &Sha256Hash, size_at_least: u64) {
74         // +1 since index is zero-based.
75         if self.leaves.len() < index + 1 {
76             // When resizing, fill in hash of zeros by default. This makes it easy to handle holes
77             // in a file.
78             self.leaves.resize(index + 1, Sha256Hasher::HASH_OF_4096_ZEROS);
79         }
80         self.leaves[index].clone_from_slice(hash);
81 
82         if size_at_least > self.file_size {
83             self.file_size = size_at_least;
84         }
85     }
86 
87     /// Returns whether `index` is within the bound of leaves.
88     pub fn is_index_valid(&self, index: usize) -> bool {
89         index < self.leaves.len()
90     }
91 
92     /// Returns whether the `index`-th hash is consistent to `hash`.
93     pub fn is_consistent(&self, index: usize, hash: &Sha256Hash) -> bool {
94         if let Some(element) = self.leaves.get(index) {
95             element == hash
96         } else {
97             false
98         }
99     }
100 
101     fn calculate_root_hash(&self) -> Result<Sha256Hash, FsverityError> {
102         match self.leaves.len() {
103             // Special cases per fs-verity digest definition.
104             0 => {
105                 debug_assert_eq!(self.file_size, 0);
106                 Ok([0u8; HASH_SIZE])
107             }
108             1 => {
109                 debug_assert!(self.file_size <= CHUNK_SIZE && self.file_size > 0);
110                 Ok(self.leaves[0])
111             }
112             n => {
113                 debug_assert_eq!((self.file_size - 1) / CHUNK_SIZE, n as u64);
114                 let size_for_equivalent = n as u64 * CHUNK_SIZE;
115                 let level = merkle_tree_height(size_for_equivalent).unwrap(); // safe since n > 0
116 
117                 // `leaves` is owned and can't be the initial state below. Here we manually hash it
118                 // first to avoid a copy and to get the type right.
119                 let second_level = hash_all_pages(&self.leaves)?;
120                 let hashes =
121                     (1..=level).try_fold(second_level, |source, _| hash_all_pages(&source))?;
122                 if hashes.len() != 1 {
123                     Err(FsverityError::InvalidState)
124                 } else {
125                     Ok(hashes.into_iter().next().unwrap())
126                 }
127             }
128         }
129     }
130 
131     /// Returns the fs-verity digest based on the current tree and file size.
132     pub fn calculate_fsverity_digest(&self) -> Result<Sha256Hash, FsverityError> {
133         let root_hash = self.calculate_root_hash()?;
134         Ok(build_fsverity_digest(&root_hash, self.file_size)?)
135     }
136 }
137 
138 #[cfg(test)]
139 mod tests {
140     // Test data below can be generated by:
141     //  $ perl -e 'print "\x{00}" x 6000' > foo
142     //  $ perl -e 'print "\x{01}" x 5000' >> foo
143     //  $ fsverity digest foo
144     use super::*;
145     use anyhow::Result;
146 
147     #[test]
148     fn merkle_tree_empty_file() -> Result<()> {
149         assert_eq!(
150             to_u8_vec("3d248ca542a24fc62d1c43b916eae5016878e2533c88238480b26128a1f1af95"),
151             generate_fsverity_digest_sequentially(&Vec::new())?
152         );
153         Ok(())
154     }
155 
156     #[test]
157     fn merkle_tree_file_size_less_than_or_equal_to_4k() -> Result<()> {
158         // Test a file that contains 4096 '\01's.
159         assert_eq!(
160             to_u8_vec("cd0875ca59c7d37e962c5e8f5acd3770750ac80225e2df652ce5672fd34500af"),
161             generate_fsverity_digest_sequentially(&vec![1; 4096])?
162         );
163         Ok(())
164     }
165 
166     #[test]
167     fn merkle_tree_more_sizes() -> Result<()> {
168         // Test files that contains >4096 '\01's.
169 
170         assert_eq!(
171             to_u8_vec("2901b849fda2d91e3929524561c4a47e77bb64734319759507b2029f18b9cc52"),
172             generate_fsverity_digest_sequentially(&vec![1; 4097])?
173         );
174 
175         assert_eq!(
176             to_u8_vec("2a476d58eb80394052a3a783111e1458ac3ecf68a7878183fed86ca0ff47ec0d"),
177             generate_fsverity_digest_sequentially(&vec![1; 8192])?
178         );
179 
180         // Test with max size that still fits in 2 levels.
181         assert_eq!(
182             to_u8_vec("26b7c190a34e19f420808ee7ec233b09fa6c34543b5a9d2950530114c205d14f"),
183             generate_fsverity_digest_sequentially(&vec![1; 524288])?
184         );
185 
186         // Test with data that requires 3 levels.
187         assert_eq!(
188             to_u8_vec("316835d9be1c95b5cd55d07ae7965d651689efad186e26cbf680e40b683a3262"),
189             generate_fsverity_digest_sequentially(&vec![1; 524289])?
190         );
191         Ok(())
192     }
193 
194     #[test]
195     fn merkle_tree_non_sequential() -> Result<()> {
196         let mut tree = MerkleLeaves::new();
197         let hash = Sha256Hasher::new()?.update(&vec![1u8; CHUNK_SIZE as usize])?.finalize()?;
198 
199         // Update hashes of 4 1-blocks.
200         tree.update_hash(1, &hash, CHUNK_SIZE * 2);
201         tree.update_hash(3, &hash, CHUNK_SIZE * 4);
202         tree.update_hash(0, &hash, CHUNK_SIZE);
203         tree.update_hash(2, &hash, CHUNK_SIZE * 3);
204 
205         assert_eq!(
206             to_u8_vec("7d3c0d2e1dc54230b20ed875f5f3a4bd3f9873df601936b3ca8127d4db3548f3"),
207             tree.calculate_fsverity_digest()?
208         );
209         Ok(())
210     }
211 
212     #[test]
213     fn merkle_tree_grow_leaves() -> Result<()> {
214         let mut tree = MerkleLeaves::new();
215         tree.update_hash(0, &[42; HASH_SIZE], CHUNK_SIZE); // fake hash of a CHUNK_SIZE block
216 
217         // Grows the leaves
218         tree.resize(4096 * 3 - 100);
219 
220         assert!(tree.is_index_valid(0));
221         assert!(tree.is_index_valid(1));
222         assert!(tree.is_index_valid(2));
223         assert!(!tree.is_index_valid(3));
224         assert!(tree.is_consistent(1, &Sha256Hasher::HASH_OF_4096_ZEROS));
225         assert!(tree.is_consistent(2, &Sha256Hasher::HASH_OF_4096_ZEROS));
226         Ok(())
227     }
228 
229     #[test]
230     fn merkle_tree_shrink_leaves() -> Result<()> {
231         let mut tree = MerkleLeaves::new();
232         tree.update_hash(0, &[42; HASH_SIZE], CHUNK_SIZE);
233         tree.update_hash(0, &[42; HASH_SIZE], CHUNK_SIZE * 3);
234 
235         // Shrink the leaves
236         tree.resize(CHUNK_SIZE as usize * 2 - 100);
237 
238         assert!(tree.is_index_valid(0));
239         assert!(tree.is_index_valid(1));
240         assert!(!tree.is_index_valid(2));
241         // The second chunk is a hole and full of zero. When shrunk, with zero padding, the hash
242         // happens to be consistent to a full-zero chunk.
243         assert!(tree.is_consistent(1, &Sha256Hasher::HASH_OF_4096_ZEROS));
244         Ok(())
245     }
246 
247     fn generate_fsverity_digest_sequentially(test_data: &[u8]) -> Result<Sha256Hash> {
248         let mut tree = MerkleLeaves::new();
249         for (index, chunk) in test_data.chunks(CHUNK_SIZE as usize).enumerate() {
250             let hash = Sha256Hasher::new()?
251                 .update(&chunk)?
252                 .update(&vec![0u8; CHUNK_SIZE as usize - chunk.len()])?
253                 .finalize()?;
254 
255             tree.update_hash(index, &hash, CHUNK_SIZE * index as u64 + chunk.len() as u64);
256         }
257         Ok(tree.calculate_fsverity_digest()?)
258     }
259 
260     fn to_u8_vec(hex_str: &str) -> Vec<u8> {
261         assert!(hex_str.len() % 2 == 0);
262         (0..hex_str.len())
263             .step_by(2)
264             .map(|i| u8::from_str_radix(&hex_str[i..i + 2], 16).unwrap())
265             .collect()
266     }
267 }
268