1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 use super::common::{build_fsverity_digest, merkle_tree_height, FsverityError}; 18 use crate::common::{divide_roundup, CHUNK_SIZE}; 19 use crate::crypto::{CryptoError, Sha256Hash, Sha256Hasher}; 20 21 const HASH_SIZE: usize = Sha256Hasher::HASH_SIZE; 22 const HASH_PER_PAGE: usize = CHUNK_SIZE as usize / HASH_SIZE; 23 24 /// MerkleLeaves can be used by the class' customer for bookkeeping integrity data for their bytes. 25 /// It can also be used to generate the standard fs-verity digest for the source data. 26 /// 27 /// It's in-memory because for the initial use cases, we don't need to read back an existing file, 28 /// and only need to deal with new files. Also, considering that the output file won't be large at 29 /// the moment, it is sufficient to simply keep the Merkle tree in memory in the trusted world. To 30 /// further simplify the initial implementation, we only need to keep the leaf nodes in memory, and 31 /// generate the tree / root hash when requested. 32 pub struct MerkleLeaves { 33 leaves: Vec<Sha256Hash>, 34 file_size: u64, 35 } 36 37 fn hash_all_pages(source: &[Sha256Hash]) -> Result<Vec<Sha256Hash>, CryptoError> { 38 source 39 .chunks(HASH_PER_PAGE) 40 .map(|chunk| { 41 let padding_bytes = (HASH_PER_PAGE - chunk.len()) * HASH_SIZE; 42 Sha256Hasher::new()?.update_from(chunk)?.update(&vec![0u8; padding_bytes])?.finalize() 43 }) 44 .collect() 45 } 46 47 impl MerkleLeaves { 48 /// Creates a `MerkleLeaves` instance with empty data. 49 pub fn new() -> Self { 50 Self { leaves: Vec::new(), file_size: 0 } 51 } 52 53 /// Gets size of the file represented by `MerkleLeaves`. 54 pub fn file_size(&self) -> u64 { 55 self.file_size 56 } 57 58 /// Grows the `MerkleLeaves` to the new file size. To keep the consistency, if any new leaves 59 /// are added, the leaves/hashes are as if the extended content is all zero. 60 /// 61 /// However, when the change shrinks the leaf number, `MerkleLeaves` does not know if the hash 62 /// of the last chunk has changed, or what the new value should be. As the result, it is up to 63 /// the caller to fix the last leaf if needed. 64 pub fn resize(&mut self, new_file_size: usize) { 65 let new_file_size = new_file_size as u64; 66 let leaves_size = divide_roundup(new_file_size, CHUNK_SIZE); 67 self.leaves.resize(leaves_size as usize, Sha256Hasher::HASH_OF_4096_ZEROS); 68 self.file_size = new_file_size; 69 } 70 71 /// Updates the hash of the `index`-th leaf, and increase the size to `size_at_least` if the 72 /// current size is smaller. 73 pub fn update_hash(&mut self, index: usize, hash: &Sha256Hash, size_at_least: u64) { 74 // +1 since index is zero-based. 75 if self.leaves.len() < index + 1 { 76 // When resizing, fill in hash of zeros by default. This makes it easy to handle holes 77 // in a file. 78 self.leaves.resize(index + 1, Sha256Hasher::HASH_OF_4096_ZEROS); 79 } 80 self.leaves[index].clone_from_slice(hash); 81 82 if size_at_least > self.file_size { 83 self.file_size = size_at_least; 84 } 85 } 86 87 /// Returns whether `index` is within the bound of leaves. 88 pub fn is_index_valid(&self, index: usize) -> bool { 89 index < self.leaves.len() 90 } 91 92 /// Returns whether the `index`-th hash is consistent to `hash`. 93 pub fn is_consistent(&self, index: usize, hash: &Sha256Hash) -> bool { 94 if let Some(element) = self.leaves.get(index) { 95 element == hash 96 } else { 97 false 98 } 99 } 100 101 fn calculate_root_hash(&self) -> Result<Sha256Hash, FsverityError> { 102 match self.leaves.len() { 103 // Special cases per fs-verity digest definition. 104 0 => { 105 debug_assert_eq!(self.file_size, 0); 106 Ok([0u8; HASH_SIZE]) 107 } 108 1 => { 109 debug_assert!(self.file_size <= CHUNK_SIZE && self.file_size > 0); 110 Ok(self.leaves[0]) 111 } 112 n => { 113 debug_assert_eq!((self.file_size - 1) / CHUNK_SIZE, n as u64); 114 let size_for_equivalent = n as u64 * CHUNK_SIZE; 115 let level = merkle_tree_height(size_for_equivalent).unwrap(); // safe since n > 0 116 117 // `leaves` is owned and can't be the initial state below. Here we manually hash it 118 // first to avoid a copy and to get the type right. 119 let second_level = hash_all_pages(&self.leaves)?; 120 let hashes = 121 (1..=level).try_fold(second_level, |source, _| hash_all_pages(&source))?; 122 if hashes.len() != 1 { 123 Err(FsverityError::InvalidState) 124 } else { 125 Ok(hashes.into_iter().next().unwrap()) 126 } 127 } 128 } 129 } 130 131 /// Returns the fs-verity digest based on the current tree and file size. 132 pub fn calculate_fsverity_digest(&self) -> Result<Sha256Hash, FsverityError> { 133 let root_hash = self.calculate_root_hash()?; 134 Ok(build_fsverity_digest(&root_hash, self.file_size)?) 135 } 136 } 137 138 #[cfg(test)] 139 mod tests { 140 // Test data below can be generated by: 141 // $ perl -e 'print "\x{00}" x 6000' > foo 142 // $ perl -e 'print "\x{01}" x 5000' >> foo 143 // $ fsverity digest foo 144 use super::*; 145 use anyhow::Result; 146 147 #[test] 148 fn merkle_tree_empty_file() -> Result<()> { 149 assert_eq!( 150 to_u8_vec("3d248ca542a24fc62d1c43b916eae5016878e2533c88238480b26128a1f1af95"), 151 generate_fsverity_digest_sequentially(&Vec::new())? 152 ); 153 Ok(()) 154 } 155 156 #[test] 157 fn merkle_tree_file_size_less_than_or_equal_to_4k() -> Result<()> { 158 // Test a file that contains 4096 '\01's. 159 assert_eq!( 160 to_u8_vec("cd0875ca59c7d37e962c5e8f5acd3770750ac80225e2df652ce5672fd34500af"), 161 generate_fsverity_digest_sequentially(&vec![1; 4096])? 162 ); 163 Ok(()) 164 } 165 166 #[test] 167 fn merkle_tree_more_sizes() -> Result<()> { 168 // Test files that contains >4096 '\01's. 169 170 assert_eq!( 171 to_u8_vec("2901b849fda2d91e3929524561c4a47e77bb64734319759507b2029f18b9cc52"), 172 generate_fsverity_digest_sequentially(&vec![1; 4097])? 173 ); 174 175 assert_eq!( 176 to_u8_vec("2a476d58eb80394052a3a783111e1458ac3ecf68a7878183fed86ca0ff47ec0d"), 177 generate_fsverity_digest_sequentially(&vec![1; 8192])? 178 ); 179 180 // Test with max size that still fits in 2 levels. 181 assert_eq!( 182 to_u8_vec("26b7c190a34e19f420808ee7ec233b09fa6c34543b5a9d2950530114c205d14f"), 183 generate_fsverity_digest_sequentially(&vec![1; 524288])? 184 ); 185 186 // Test with data that requires 3 levels. 187 assert_eq!( 188 to_u8_vec("316835d9be1c95b5cd55d07ae7965d651689efad186e26cbf680e40b683a3262"), 189 generate_fsverity_digest_sequentially(&vec![1; 524289])? 190 ); 191 Ok(()) 192 } 193 194 #[test] 195 fn merkle_tree_non_sequential() -> Result<()> { 196 let mut tree = MerkleLeaves::new(); 197 let hash = Sha256Hasher::new()?.update(&vec![1u8; CHUNK_SIZE as usize])?.finalize()?; 198 199 // Update hashes of 4 1-blocks. 200 tree.update_hash(1, &hash, CHUNK_SIZE * 2); 201 tree.update_hash(3, &hash, CHUNK_SIZE * 4); 202 tree.update_hash(0, &hash, CHUNK_SIZE); 203 tree.update_hash(2, &hash, CHUNK_SIZE * 3); 204 205 assert_eq!( 206 to_u8_vec("7d3c0d2e1dc54230b20ed875f5f3a4bd3f9873df601936b3ca8127d4db3548f3"), 207 tree.calculate_fsverity_digest()? 208 ); 209 Ok(()) 210 } 211 212 #[test] 213 fn merkle_tree_grow_leaves() -> Result<()> { 214 let mut tree = MerkleLeaves::new(); 215 tree.update_hash(0, &[42; HASH_SIZE], CHUNK_SIZE); // fake hash of a CHUNK_SIZE block 216 217 // Grows the leaves 218 tree.resize(4096 * 3 - 100); 219 220 assert!(tree.is_index_valid(0)); 221 assert!(tree.is_index_valid(1)); 222 assert!(tree.is_index_valid(2)); 223 assert!(!tree.is_index_valid(3)); 224 assert!(tree.is_consistent(1, &Sha256Hasher::HASH_OF_4096_ZEROS)); 225 assert!(tree.is_consistent(2, &Sha256Hasher::HASH_OF_4096_ZEROS)); 226 Ok(()) 227 } 228 229 #[test] 230 fn merkle_tree_shrink_leaves() -> Result<()> { 231 let mut tree = MerkleLeaves::new(); 232 tree.update_hash(0, &[42; HASH_SIZE], CHUNK_SIZE); 233 tree.update_hash(0, &[42; HASH_SIZE], CHUNK_SIZE * 3); 234 235 // Shrink the leaves 236 tree.resize(CHUNK_SIZE as usize * 2 - 100); 237 238 assert!(tree.is_index_valid(0)); 239 assert!(tree.is_index_valid(1)); 240 assert!(!tree.is_index_valid(2)); 241 // The second chunk is a hole and full of zero. When shrunk, with zero padding, the hash 242 // happens to be consistent to a full-zero chunk. 243 assert!(tree.is_consistent(1, &Sha256Hasher::HASH_OF_4096_ZEROS)); 244 Ok(()) 245 } 246 247 fn generate_fsverity_digest_sequentially(test_data: &[u8]) -> Result<Sha256Hash> { 248 let mut tree = MerkleLeaves::new(); 249 for (index, chunk) in test_data.chunks(CHUNK_SIZE as usize).enumerate() { 250 let hash = Sha256Hasher::new()? 251 .update(&chunk)? 252 .update(&vec![0u8; CHUNK_SIZE as usize - chunk.len()])? 253 .finalize()?; 254 255 tree.update_hash(index, &hash, CHUNK_SIZE * index as u64 + chunk.len() as u64); 256 } 257 Ok(tree.calculate_fsverity_digest()?) 258 } 259 260 fn to_u8_vec(hex_str: &str) -> Vec<u8> { 261 assert!(hex_str.len() % 2 == 0); 262 (0..hex_str.len()) 263 .step_by(2) 264 .map(|i| u8::from_str_radix(&hex_str[i..i + 2], 16).unwrap()) 265 .collect() 266 } 267 } 268