1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 use super::common::{
18     build_fsverity_digest, merkle_tree_height, FsverityError, Sha256Hash, SHA256_HASH_SIZE,
19 };
20 use crate::common::{divide_roundup, CHUNK_SIZE};
21 use openssl::sha::Sha256;
22 
23 const HASH_SIZE: usize = SHA256_HASH_SIZE;
24 const HASH_PER_PAGE: usize = CHUNK_SIZE as usize / HASH_SIZE;
25 
26 const HASH_OF_4096_ZEROS: Sha256Hash = [
27     0xad, 0x7f, 0xac, 0xb2, 0x58, 0x6f, 0xc6, 0xe9, 0x66, 0xc0, 0x04, 0xd7, 0xd1, 0xd1, 0x6b, 0x02,
28     0x4f, 0x58, 0x05, 0xff, 0x7c, 0xb4, 0x7c, 0x7a, 0x85, 0xda, 0xbd, 0x8b, 0x48, 0x89, 0x2c, 0xa7,
29 ];
30 
31 /// MerkleLeaves can be used by the class' customer for bookkeeping integrity data for their bytes.
32 /// It can also be used to generate the standard fs-verity digest for the source data.
33 ///
34 /// It's in-memory because for the initial use cases, we don't need to read back an existing file,
35 /// and only need to deal with new files. Also, considering that the output file won't be large at
36 /// the moment, it is sufficient to simply keep the Merkle tree in memory in the trusted world. To
37 /// further simplify the initial implementation, we only need to keep the leaf nodes in memory, and
38 /// generate the tree / root hash when requested.
39 pub struct MerkleLeaves {
40     leaves: Vec<Sha256Hash>,
41     file_size: u64,
42 }
43 
hash_all_pages(source: &[Sha256Hash]) -> Vec<Sha256Hash>44 fn hash_all_pages(source: &[Sha256Hash]) -> Vec<Sha256Hash> {
45     source
46         .chunks(HASH_PER_PAGE)
47         .map(|chunk| {
48             let padding_bytes = (HASH_PER_PAGE - chunk.len()) * HASH_SIZE;
49             let mut ctx = Sha256::new();
50             for data in chunk {
51                 ctx.update(data.as_ref());
52             }
53             ctx.update(&vec![0u8; padding_bytes]);
54             ctx.finish()
55         })
56         .collect()
57 }
58 
59 impl MerkleLeaves {
60     /// Creates a `MerkleLeaves` instance with empty data.
new() -> Self61     pub fn new() -> Self {
62         Self { leaves: Vec::new(), file_size: 0 }
63     }
64 
65     /// Gets size of the file represented by `MerkleLeaves`.
file_size(&self) -> u6466     pub fn file_size(&self) -> u64 {
67         self.file_size
68     }
69 
70     /// Grows the `MerkleLeaves` to the new file size. To keep the consistency, if any new leaves
71     /// are added, the leaves/hashes are as if the extended content is all zero.
72     ///
73     /// However, when the change shrinks the leaf number, `MerkleLeaves` does not know if the hash
74     /// of the last chunk has changed, or what the new value should be. As the result, it is up to
75     /// the caller to fix the last leaf if needed.
resize(&mut self, new_file_size: usize)76     pub fn resize(&mut self, new_file_size: usize) {
77         let new_file_size = new_file_size as u64;
78         let leaves_size = divide_roundup(new_file_size, CHUNK_SIZE);
79         self.leaves.resize(leaves_size as usize, HASH_OF_4096_ZEROS);
80         self.file_size = new_file_size;
81     }
82 
83     /// Updates the hash of the `index`-th leaf, and increase the size to `size_at_least` if the
84     /// current size is smaller.
update_hash(&mut self, index: usize, hash: &Sha256Hash, size_at_least: u64)85     pub fn update_hash(&mut self, index: usize, hash: &Sha256Hash, size_at_least: u64) {
86         // +1 since index is zero-based.
87         if self.leaves.len() < index + 1 {
88             // When resizing, fill in hash of zeros by default. This makes it easy to handle holes
89             // in a file.
90             self.leaves.resize(index + 1, HASH_OF_4096_ZEROS);
91         }
92         self.leaves[index].clone_from_slice(hash);
93 
94         if size_at_least > self.file_size {
95             self.file_size = size_at_least;
96         }
97     }
98 
99     /// Returns whether `index` is within the bound of leaves.
is_index_valid(&self, index: usize) -> bool100     pub fn is_index_valid(&self, index: usize) -> bool {
101         index < self.leaves.len()
102     }
103 
104     /// Returns whether the `index`-th hash is consistent to `hash`.
is_consistent(&self, index: usize, hash: &Sha256Hash) -> bool105     pub fn is_consistent(&self, index: usize, hash: &Sha256Hash) -> bool {
106         if let Some(element) = self.leaves.get(index) {
107             element == hash
108         } else {
109             false
110         }
111     }
112 
calculate_root_hash(&self) -> Result<Sha256Hash, FsverityError>113     fn calculate_root_hash(&self) -> Result<Sha256Hash, FsverityError> {
114         match self.leaves.len() {
115             // Special cases per fs-verity digest definition.
116             0 => {
117                 debug_assert_eq!(self.file_size, 0);
118                 Ok([0u8; HASH_SIZE])
119             }
120             1 => {
121                 debug_assert!(self.file_size <= CHUNK_SIZE && self.file_size > 0);
122                 Ok(self.leaves[0])
123             }
124             n => {
125                 debug_assert_eq!((self.file_size - 1) / CHUNK_SIZE, n as u64);
126                 let size_for_equivalent = n as u64 * CHUNK_SIZE;
127                 let level = merkle_tree_height(size_for_equivalent).unwrap(); // safe since n > 0
128 
129                 // `leaves` is owned and can't be the initial state below. Here we manually hash it
130                 // first to avoid a copy and to get the type right.
131                 let second_level = hash_all_pages(&self.leaves);
132                 let hashes = (1..=level).fold(second_level, |source, _| hash_all_pages(&source));
133                 if hashes.len() != 1 {
134                     Err(FsverityError::InvalidState)
135                 } else {
136                     Ok(hashes.into_iter().next().unwrap())
137                 }
138             }
139         }
140     }
141 
142     /// Returns the fs-verity digest based on the current tree and file size.
calculate_fsverity_digest(&self) -> Result<Sha256Hash, FsverityError>143     pub fn calculate_fsverity_digest(&self) -> Result<Sha256Hash, FsverityError> {
144         let root_hash = self.calculate_root_hash()?;
145         Ok(build_fsverity_digest(&root_hash, self.file_size))
146     }
147 }
148 
149 #[cfg(test)]
150 mod tests {
151     // Test data below can be generated by:
152     //  $ perl -e 'print "\x{00}" x 6000' > foo
153     //  $ perl -e 'print "\x{01}" x 5000' >> foo
154     //  $ fsverity digest foo
155     use super::*;
156     use anyhow::Result;
157     use openssl::sha::sha256;
158 
159     #[test]
merkle_tree_empty_file() -> Result<()>160     fn merkle_tree_empty_file() -> Result<()> {
161         assert_eq!(
162             hex::decode("3d248ca542a24fc62d1c43b916eae5016878e2533c88238480b26128a1f1af95")?,
163             generate_fsverity_digest_sequentially(&Vec::new())?
164         );
165         Ok(())
166     }
167 
168     #[test]
merkle_tree_file_size_less_than_or_equal_to_4k() -> Result<()>169     fn merkle_tree_file_size_less_than_or_equal_to_4k() -> Result<()> {
170         // Test a file that contains 4096 '\01's.
171         assert_eq!(
172             hex::decode("cd0875ca59c7d37e962c5e8f5acd3770750ac80225e2df652ce5672fd34500af")?,
173             generate_fsverity_digest_sequentially(&vec![1; 4096])?
174         );
175         Ok(())
176     }
177 
178     #[test]
merkle_tree_more_sizes() -> Result<()>179     fn merkle_tree_more_sizes() -> Result<()> {
180         // Test files that contains >4096 '\01's.
181 
182         assert_eq!(
183             hex::decode("2901b849fda2d91e3929524561c4a47e77bb64734319759507b2029f18b9cc52")?,
184             generate_fsverity_digest_sequentially(&vec![1; 4097])?
185         );
186 
187         assert_eq!(
188             hex::decode("2a476d58eb80394052a3a783111e1458ac3ecf68a7878183fed86ca0ff47ec0d")?,
189             generate_fsverity_digest_sequentially(&vec![1; 8192])?
190         );
191 
192         // Test with max size that still fits in 2 levels.
193         assert_eq!(
194             hex::decode("26b7c190a34e19f420808ee7ec233b09fa6c34543b5a9d2950530114c205d14f")?,
195             generate_fsverity_digest_sequentially(&vec![1; 524288])?
196         );
197 
198         // Test with data that requires 3 levels.
199         assert_eq!(
200             hex::decode("316835d9be1c95b5cd55d07ae7965d651689efad186e26cbf680e40b683a3262")?,
201             generate_fsverity_digest_sequentially(&vec![1; 524289])?
202         );
203         Ok(())
204     }
205 
206     #[test]
merkle_tree_non_sequential() -> Result<()>207     fn merkle_tree_non_sequential() -> Result<()> {
208         let mut tree = MerkleLeaves::new();
209         let hash = sha256(&vec![1u8; CHUNK_SIZE as usize]);
210 
211         // Update hashes of 4 1-blocks.
212         tree.update_hash(1, &hash, CHUNK_SIZE * 2);
213         tree.update_hash(3, &hash, CHUNK_SIZE * 4);
214         tree.update_hash(0, &hash, CHUNK_SIZE);
215         tree.update_hash(2, &hash, CHUNK_SIZE * 3);
216 
217         assert_eq!(
218             hex::decode("7d3c0d2e1dc54230b20ed875f5f3a4bd3f9873df601936b3ca8127d4db3548f3")?,
219             tree.calculate_fsverity_digest()?
220         );
221         Ok(())
222     }
223 
224     #[test]
merkle_tree_grow_leaves() -> Result<()>225     fn merkle_tree_grow_leaves() -> Result<()> {
226         let mut tree = MerkleLeaves::new();
227         tree.update_hash(0, &[42; HASH_SIZE], CHUNK_SIZE); // fake hash of a CHUNK_SIZE block
228 
229         // Grows the leaves
230         tree.resize(4096 * 3 - 100);
231 
232         assert!(tree.is_index_valid(0));
233         assert!(tree.is_index_valid(1));
234         assert!(tree.is_index_valid(2));
235         assert!(!tree.is_index_valid(3));
236         assert!(tree.is_consistent(1, &HASH_OF_4096_ZEROS));
237         assert!(tree.is_consistent(2, &HASH_OF_4096_ZEROS));
238         Ok(())
239     }
240 
241     #[test]
merkle_tree_shrink_leaves() -> Result<()>242     fn merkle_tree_shrink_leaves() -> Result<()> {
243         let mut tree = MerkleLeaves::new();
244         tree.update_hash(0, &[42; HASH_SIZE], CHUNK_SIZE);
245         tree.update_hash(0, &[42; HASH_SIZE], CHUNK_SIZE * 3);
246 
247         // Shrink the leaves
248         tree.resize(CHUNK_SIZE as usize * 2 - 100);
249 
250         assert!(tree.is_index_valid(0));
251         assert!(tree.is_index_valid(1));
252         assert!(!tree.is_index_valid(2));
253         // The second chunk is a hole and full of zero. When shrunk, with zero padding, the hash
254         // happens to be consistent to a full-zero chunk.
255         assert!(tree.is_consistent(1, &HASH_OF_4096_ZEROS));
256         Ok(())
257     }
258 
generate_fsverity_digest_sequentially(test_data: &[u8]) -> Result<Sha256Hash>259     fn generate_fsverity_digest_sequentially(test_data: &[u8]) -> Result<Sha256Hash> {
260         let mut tree = MerkleLeaves::new();
261         for (index, chunk) in test_data.chunks(CHUNK_SIZE as usize).enumerate() {
262             let mut ctx = Sha256::new();
263             ctx.update(chunk);
264             ctx.update(&vec![0u8; CHUNK_SIZE as usize - chunk.len()]);
265             let hash = ctx.finish();
266 
267             tree.update_hash(index, &hash, CHUNK_SIZE * index as u64 + chunk.len() as u64);
268         }
269         Ok(tree.calculate_fsverity_digest()?)
270     }
271 }
272