1 // Copyright 2018 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 mod qcow_raw_file;
6 mod refcount;
7 mod vec_cache;
8 
9 use base::{
10     error, AsRawDescriptor, AsRawDescriptors, FileAllocate, FileReadWriteAtVolatile,
11     FileReadWriteVolatile, FileSetLen, FileSync, PunchHole, RawDescriptor, SeekHole, WriteZeroesAt,
12 };
13 use data_model::{VolatileMemory, VolatileSlice};
14 use libc::{EINVAL, ENOSPC, ENOTSUP};
15 use remain::sorted;
16 
17 use std::cmp::{max, min};
18 use std::fmt::{self, Display};
19 use std::fs::{File, OpenOptions};
20 use std::io::{self, Read, Seek, SeekFrom, Write};
21 use std::mem::size_of;
22 use std::str;
23 
24 use crate::qcow::qcow_raw_file::QcowRawFile;
25 use crate::qcow::refcount::RefCount;
26 use crate::qcow::vec_cache::{CacheMap, Cacheable, VecCache};
27 use crate::{create_disk_file, DiskFile, DiskGetLen};
28 
29 #[sorted]
30 #[derive(Debug)]
31 pub enum Error {
32     BackingFileIo(io::Error),
33     BackingFileOpen(Box<crate::Error>),
34     BackingFileTooLong(usize),
35     CompressedBlocksNotSupported,
36     EvictingCache(io::Error),
37     FileTooBig(u64),
38     GettingFileSize(io::Error),
39     GettingRefcount(refcount::Error),
40     InvalidBackingFileName(str::Utf8Error),
41     InvalidClusterIndex,
42     InvalidClusterSize,
43     InvalidIndex,
44     InvalidL1TableOffset,
45     InvalidL1TableSize(u32),
46     InvalidMagic,
47     InvalidOffset(u64),
48     InvalidRefcountTableOffset,
49     InvalidRefcountTableSize(u64),
50     NoFreeClusters,
51     NoRefcountClusters,
52     NotEnoughSpaceForRefcounts,
53     OpeningFile(io::Error),
54     ReadingHeader(io::Error),
55     ReadingPointers(io::Error),
56     ReadingRefCountBlock(refcount::Error),
57     ReadingRefCounts(io::Error),
58     RebuildingRefCounts(io::Error),
59     RefcountTableOffEnd,
60     RefcountTableTooLarge,
61     SeekingFile(io::Error),
62     SettingRefcountRefcount(io::Error),
63     SizeTooSmallForNumberOfClusters,
64     TooManyL1Entries(u64),
65     TooManyRefcounts(u64),
66     UnsupportedRefcountOrder,
67     UnsupportedVersion(u32),
68     WritingHeader(io::Error),
69 }
70 
71 pub type Result<T> = std::result::Result<T, Error>;
72 
73 impl Display for Error {
74     #[remain::check]
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result75     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
76         use self::Error::*;
77 
78         #[sorted]
79         match self {
80             BackingFileIo(e) => write!(f, "backing file io error: {}", e),
81             BackingFileOpen(e) => write!(f, "backing file open error: {}", *e),
82             BackingFileTooLong(len) => {
83                 write!(f, "backing file name is too long: {} bytes over", len)
84             }
85             CompressedBlocksNotSupported => write!(f, "compressed blocks not supported"),
86             EvictingCache(e) => write!(f, "failed to evict cache: {}", e),
87             FileTooBig(size) => write!(
88                 f,
89                 "file larger than max of {}: {}",
90                 MAX_QCOW_FILE_SIZE, size
91             ),
92             GettingFileSize(e) => write!(f, "failed to get file size: {}", e),
93             GettingRefcount(e) => write!(f, "failed to get refcount: {}", e),
94             InvalidBackingFileName(e) => write!(f, "failed to parse filename: {}", e),
95             InvalidClusterIndex => write!(f, "invalid cluster index"),
96             InvalidClusterSize => write!(f, "invalid cluster size"),
97             InvalidIndex => write!(f, "invalid index"),
98             InvalidL1TableOffset => write!(f, "invalid L1 table offset"),
99             InvalidL1TableSize(size) => write!(f, "invalid L1 table size {}", size),
100             InvalidMagic => write!(f, "invalid magic"),
101             InvalidOffset(_) => write!(f, "invalid offset"),
102             InvalidRefcountTableOffset => write!(f, "invalid refcount table offset"),
103             InvalidRefcountTableSize(size) => write!(f, "invalid refcount table size: {}", size),
104             NoFreeClusters => write!(f, "no free clusters"),
105             NoRefcountClusters => write!(f, "no refcount clusters"),
106             NotEnoughSpaceForRefcounts => write!(f, "not enough space for refcounts"),
107             OpeningFile(e) => write!(f, "failed to open file: {}", e),
108             ReadingHeader(e) => write!(f, "failed to read header: {}", e),
109             ReadingPointers(e) => write!(f, "failed to read pointers: {}", e),
110             ReadingRefCountBlock(e) => write!(f, "failed to read ref count block: {}", e),
111             ReadingRefCounts(e) => write!(f, "failed to read ref counts: {}", e),
112             RebuildingRefCounts(e) => write!(f, "failed to rebuild ref counts: {}", e),
113             RefcountTableOffEnd => write!(f, "refcount table offset past file end"),
114             RefcountTableTooLarge => write!(f, "too many clusters specified for refcount table"),
115             SeekingFile(e) => write!(f, "failed to seek file: {}", e),
116             SettingRefcountRefcount(e) => write!(f, "failed to set refcount refcount: {}", e),
117             SizeTooSmallForNumberOfClusters => write!(f, "size too small for number of clusters"),
118             TooManyL1Entries(count) => write!(f, "l1 entry table too large: {}", count),
119             TooManyRefcounts(count) => write!(f, "ref count table too large: {}", count),
120             UnsupportedRefcountOrder => write!(f, "unsupported refcount order"),
121             UnsupportedVersion(v) => write!(f, "unsupported version: {}", v),
122             WritingHeader(e) => write!(f, "failed to write header: {}", e),
123         }
124     }
125 }
126 
127 // Maximum data size supported.
128 const MAX_QCOW_FILE_SIZE: u64 = 0x01 << 44; // 16 TB.
129 
130 // QCOW magic constant that starts the header.
131 pub const QCOW_MAGIC: u32 = 0x5146_49fb;
132 // Default to a cluster size of 2^DEFAULT_CLUSTER_BITS
133 const DEFAULT_CLUSTER_BITS: u32 = 16;
134 // Limit clusters to reasonable sizes. Choose the same limits as qemu. Making the clusters smaller
135 // increases the amount of overhead for book keeping.
136 const MIN_CLUSTER_BITS: u32 = 9;
137 const MAX_CLUSTER_BITS: u32 = 21;
138 // The L1 and RefCount table are kept in RAM, only handle files that require less than 35M entries.
139 // This easily covers 1 TB files. When support for bigger files is needed the assumptions made to
140 // keep these tables in RAM needs to be thrown out.
141 const MAX_RAM_POINTER_TABLE_SIZE: u64 = 35_000_000;
142 // Only support 2 byte refcounts, 2^refcount_order bits.
143 const DEFAULT_REFCOUNT_ORDER: u32 = 4;
144 
145 const V3_BARE_HEADER_SIZE: u32 = 104;
146 
147 // bits 0-8 and 56-63 are reserved.
148 const L1_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00;
149 const L2_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00;
150 // Flags
151 const COMPRESSED_FLAG: u64 = 1 << 62;
152 const CLUSTER_USED_FLAG: u64 = 1 << 63;
153 const COMPATIBLE_FEATURES_LAZY_REFCOUNTS: u64 = 1 << 0;
154 
155 // The format supports a "header extension area", that crosvm does not use.
156 const QCOW_EMPTY_HEADER_EXTENSION_SIZE: u32 = 8;
157 
158 // Defined by the specification
159 const MAX_BACKING_FILE_SIZE: u32 = 1023;
160 
161 /// Contains the information from the header of a qcow file.
162 #[derive(Clone, Debug)]
163 pub struct QcowHeader {
164     pub magic: u32,
165     pub version: u32,
166 
167     pub backing_file_offset: u64,
168     pub backing_file_size: u32,
169 
170     pub cluster_bits: u32,
171     pub size: u64,
172     pub crypt_method: u32,
173 
174     pub l1_size: u32,
175     pub l1_table_offset: u64,
176 
177     pub refcount_table_offset: u64,
178     pub refcount_table_clusters: u32,
179 
180     pub nb_snapshots: u32,
181     pub snapshots_offset: u64,
182 
183     // v3 entries
184     pub incompatible_features: u64,
185     pub compatible_features: u64,
186     pub autoclear_features: u64,
187     pub refcount_order: u32,
188     pub header_size: u32,
189 
190     // Post-header entries
191     pub backing_file_path: Option<String>,
192 }
193 
194 // Reads the next u16 from the file.
read_u16_from_file(mut f: &File) -> Result<u16>195 fn read_u16_from_file(mut f: &File) -> Result<u16> {
196     let mut value = [0u8; 2];
197     (&mut f)
198         .read_exact(&mut value)
199         .map_err(Error::ReadingHeader)?;
200     Ok(u16::from_be_bytes(value))
201 }
202 
203 // Reads the next u32 from the file.
read_u32_from_file(mut f: &File) -> Result<u32>204 fn read_u32_from_file(mut f: &File) -> Result<u32> {
205     let mut value = [0u8; 4];
206     (&mut f)
207         .read_exact(&mut value)
208         .map_err(Error::ReadingHeader)?;
209     Ok(u32::from_be_bytes(value))
210 }
211 
212 // Reads the next u64 from the file.
read_u64_from_file(mut f: &File) -> Result<u64>213 fn read_u64_from_file(mut f: &File) -> Result<u64> {
214     let mut value = [0u8; 8];
215     (&mut f)
216         .read_exact(&mut value)
217         .map_err(Error::ReadingHeader)?;
218     Ok(u64::from_be_bytes(value))
219 }
220 
221 impl QcowHeader {
222     /// Creates a QcowHeader from a reference to a file.
new(f: &mut File) -> Result<QcowHeader>223     pub fn new(f: &mut File) -> Result<QcowHeader> {
224         f.seek(SeekFrom::Start(0)).map_err(Error::ReadingHeader)?;
225 
226         let magic = read_u32_from_file(f)?;
227         if magic != QCOW_MAGIC {
228             return Err(Error::InvalidMagic);
229         }
230 
231         let mut header = QcowHeader {
232             magic,
233             version: read_u32_from_file(f)?,
234             backing_file_offset: read_u64_from_file(f)?,
235             backing_file_size: read_u32_from_file(f)?,
236             cluster_bits: read_u32_from_file(f)?,
237             size: read_u64_from_file(f)?,
238             crypt_method: read_u32_from_file(f)?,
239             l1_size: read_u32_from_file(f)?,
240             l1_table_offset: read_u64_from_file(f)?,
241             refcount_table_offset: read_u64_from_file(f)?,
242             refcount_table_clusters: read_u32_from_file(f)?,
243             nb_snapshots: read_u32_from_file(f)?,
244             snapshots_offset: read_u64_from_file(f)?,
245             incompatible_features: read_u64_from_file(f)?,
246             compatible_features: read_u64_from_file(f)?,
247             autoclear_features: read_u64_from_file(f)?,
248             refcount_order: read_u32_from_file(f)?,
249             header_size: read_u32_from_file(f)?,
250             backing_file_path: None,
251         };
252         if header.backing_file_size > MAX_BACKING_FILE_SIZE {
253             return Err(Error::BackingFileTooLong(header.backing_file_size as usize));
254         }
255         if header.backing_file_offset != 0 {
256             f.seek(SeekFrom::Start(header.backing_file_offset))
257                 .map_err(Error::ReadingHeader)?;
258             let mut backing_file_name_bytes = vec![0u8; header.backing_file_size as usize];
259             f.read_exact(&mut backing_file_name_bytes)
260                 .map_err(Error::ReadingHeader)?;
261             header.backing_file_path = Some(
262                 String::from_utf8(backing_file_name_bytes)
263                     .map_err(|err| Error::InvalidBackingFileName(err.utf8_error()))?,
264             );
265         }
266         Ok(header)
267     }
268 
create_for_size_and_path(size: u64, backing_file: Option<&str>) -> Result<QcowHeader>269     pub fn create_for_size_and_path(size: u64, backing_file: Option<&str>) -> Result<QcowHeader> {
270         let cluster_bits: u32 = DEFAULT_CLUSTER_BITS;
271         let cluster_size: u32 = 0x01 << cluster_bits;
272         let max_length: usize =
273             (cluster_size - V3_BARE_HEADER_SIZE - QCOW_EMPTY_HEADER_EXTENSION_SIZE) as usize;
274         if let Some(path) = backing_file {
275             if path.len() > max_length {
276                 return Err(Error::BackingFileTooLong(path.len() - max_length));
277             }
278         }
279         // L2 blocks are always one cluster long. They contain cluster_size/sizeof(u64) addresses.
280         let l2_size: u32 = cluster_size / size_of::<u64>() as u32;
281         let num_clusters: u32 = div_round_up_u64(size, u64::from(cluster_size)) as u32;
282         let num_l2_clusters: u32 = div_round_up_u32(num_clusters, l2_size);
283         let l1_clusters: u32 = div_round_up_u32(num_l2_clusters, cluster_size);
284         let header_clusters = div_round_up_u32(size_of::<QcowHeader>() as u32, cluster_size);
285         Ok(QcowHeader {
286             magic: QCOW_MAGIC,
287             version: 3,
288             backing_file_offset: (if backing_file.is_none() {
289                 0
290             } else {
291                 V3_BARE_HEADER_SIZE + QCOW_EMPTY_HEADER_EXTENSION_SIZE
292             }) as u64,
293             backing_file_size: backing_file.map_or(0, |x| x.len()) as u32,
294             cluster_bits: DEFAULT_CLUSTER_BITS,
295             size,
296             crypt_method: 0,
297             l1_size: num_l2_clusters,
298             l1_table_offset: u64::from(cluster_size),
299             // The refcount table is after l1 + header.
300             refcount_table_offset: u64::from(cluster_size * (l1_clusters + 1)),
301             refcount_table_clusters: {
302                 // Pre-allocate enough clusters for the entire refcount table as it must be
303                 // continuous in the file. Allocate enough space to refcount all clusters, including
304                 // the refcount clusters.
305                 let max_refcount_clusters = max_refcount_clusters(
306                     DEFAULT_REFCOUNT_ORDER,
307                     cluster_size,
308                     num_clusters + l1_clusters + num_l2_clusters + header_clusters,
309                 ) as u32;
310                 // The refcount table needs to store the offset of each refcount cluster.
311                 div_round_up_u32(
312                     max_refcount_clusters * size_of::<u64>() as u32,
313                     cluster_size,
314                 )
315             },
316             nb_snapshots: 0,
317             snapshots_offset: 0,
318             incompatible_features: 0,
319             compatible_features: 0,
320             autoclear_features: 0,
321             refcount_order: DEFAULT_REFCOUNT_ORDER,
322             header_size: V3_BARE_HEADER_SIZE,
323             backing_file_path: backing_file.map(String::from),
324         })
325     }
326 
327     /// Write the header to `file`.
write_to<F: Write + Seek>(&self, file: &mut F) -> Result<()>328     pub fn write_to<F: Write + Seek>(&self, file: &mut F) -> Result<()> {
329         // Writes the next u32 to the file.
330         fn write_u32_to_file<F: Write>(f: &mut F, value: u32) -> Result<()> {
331             f.write_all(&value.to_be_bytes())
332                 .map_err(Error::WritingHeader)
333         }
334 
335         // Writes the next u64 to the file.
336         fn write_u64_to_file<F: Write>(f: &mut F, value: u64) -> Result<()> {
337             f.write_all(&value.to_be_bytes())
338                 .map_err(Error::WritingHeader)
339         }
340 
341         write_u32_to_file(file, self.magic)?;
342         write_u32_to_file(file, self.version)?;
343         write_u64_to_file(file, self.backing_file_offset)?;
344         write_u32_to_file(file, self.backing_file_size)?;
345         write_u32_to_file(file, self.cluster_bits)?;
346         write_u64_to_file(file, self.size)?;
347         write_u32_to_file(file, self.crypt_method)?;
348         write_u32_to_file(file, self.l1_size)?;
349         write_u64_to_file(file, self.l1_table_offset)?;
350         write_u64_to_file(file, self.refcount_table_offset)?;
351         write_u32_to_file(file, self.refcount_table_clusters)?;
352         write_u32_to_file(file, self.nb_snapshots)?;
353         write_u64_to_file(file, self.snapshots_offset)?;
354         write_u64_to_file(file, self.incompatible_features)?;
355         write_u64_to_file(file, self.compatible_features)?;
356         write_u64_to_file(file, self.autoclear_features)?;
357         write_u32_to_file(file, self.refcount_order)?;
358         write_u32_to_file(file, self.header_size)?;
359         write_u32_to_file(file, 0)?; // header extension type: end of header extension area
360         write_u32_to_file(file, 0)?; // length of header extension data: 0
361         if let Some(backing_file_path) = self.backing_file_path.as_ref() {
362             write!(file, "{}", backing_file_path).map_err(Error::WritingHeader)?;
363         }
364 
365         // Set the file length by seeking and writing a zero to the last byte. This avoids needing
366         // a `File` instead of anything that implements seek as the `file` argument.
367         // Zeros out the l1 and refcount table clusters.
368         let cluster_size = 0x01u64 << self.cluster_bits;
369         let refcount_blocks_size = u64::from(self.refcount_table_clusters) * cluster_size;
370         file.seek(SeekFrom::Start(
371             self.refcount_table_offset + refcount_blocks_size - 2,
372         ))
373         .map_err(Error::WritingHeader)?;
374         file.write(&[0u8]).map_err(Error::WritingHeader)?;
375 
376         Ok(())
377     }
378 }
379 
max_refcount_clusters(refcount_order: u32, cluster_size: u32, num_clusters: u32) -> u64380 fn max_refcount_clusters(refcount_order: u32, cluster_size: u32, num_clusters: u32) -> u64 {
381     // Use u64 as the product of the u32 inputs can overflow.
382     let refcount_bytes = (0x01 << refcount_order as u64) / 8;
383     let for_data = div_round_up_u64(num_clusters as u64 * refcount_bytes, cluster_size as u64);
384     let for_refcounts = div_round_up_u64(for_data * refcount_bytes, cluster_size as u64);
385     for_data + for_refcounts
386 }
387 
388 /// Represents a qcow2 file. This is a sparse file format maintained by the qemu project.
389 /// Full documentation of the format can be found in the qemu repository.
390 ///
391 /// # Example
392 ///
393 /// ```
394 /// # use std::io::{Read, Seek, SeekFrom};
395 /// # use disk::QcowFile;
396 /// # fn test(file: std::fs::File) -> std::io::Result<()> {
397 ///     let mut q = QcowFile::from(file).expect("Can't open qcow file");
398 ///     let mut buf = [0u8; 12];
399 ///     q.seek(SeekFrom::Start(10 as u64))?;
400 ///     q.read(&mut buf[..])?;
401 /// #   Ok(())
402 /// # }
403 /// ```
404 #[derive(Debug)]
405 pub struct QcowFile {
406     raw_file: QcowRawFile,
407     header: QcowHeader,
408     l1_table: VecCache<u64>,
409     l2_entries: u64,
410     l2_cache: CacheMap<VecCache<u64>>,
411     refcounts: RefCount,
412     current_offset: u64,
413     unref_clusters: Vec<u64>, // List of freshly unreferenced clusters.
414     // List of unreferenced clusters available to be used. unref clusters become available once the
415     // removal of references to them have been synced to disk.
416     avail_clusters: Vec<u64>,
417     backing_file: Option<Box<dyn DiskFile>>,
418 }
419 
420 impl QcowFile {
421     /// Creates a QcowFile from `file`. File must be a valid qcow2 image.
from(mut file: File) -> Result<QcowFile>422     pub fn from(mut file: File) -> Result<QcowFile> {
423         let header = QcowHeader::new(&mut file)?;
424 
425         // Only v3 files are supported.
426         if header.version != 3 {
427             return Err(Error::UnsupportedVersion(header.version));
428         }
429 
430         // Make sure that the L1 table fits in RAM.
431         if u64::from(header.l1_size) > MAX_RAM_POINTER_TABLE_SIZE {
432             return Err(Error::InvalidL1TableSize(header.l1_size));
433         }
434 
435         let cluster_bits: u32 = header.cluster_bits;
436         if !(MIN_CLUSTER_BITS..=MAX_CLUSTER_BITS).contains(&cluster_bits) {
437             return Err(Error::InvalidClusterSize);
438         }
439         let cluster_size = 0x01u64 << cluster_bits;
440 
441         // Limit the total size of the disk.
442         if header.size > MAX_QCOW_FILE_SIZE {
443             return Err(Error::FileTooBig(header.size));
444         }
445 
446         let backing_file = if let Some(backing_file_path) = header.backing_file_path.as_ref() {
447             let path = backing_file_path.clone();
448             let backing_raw_file = OpenOptions::new()
449                 .read(true)
450                 .open(path)
451                 .map_err(Error::BackingFileIo)?;
452             let backing_file = create_disk_file(backing_raw_file)
453                 .map_err(|e| Error::BackingFileOpen(Box::new(e)))?;
454             Some(backing_file)
455         } else {
456             None
457         };
458 
459         // Only support two byte refcounts.
460         let refcount_bits: u64 = 0x01u64
461             .checked_shl(header.refcount_order)
462             .ok_or(Error::UnsupportedRefcountOrder)?;
463         if refcount_bits != 16 {
464             return Err(Error::UnsupportedRefcountOrder);
465         }
466         let refcount_bytes = (refcount_bits + 7) / 8;
467 
468         // Need at least one refcount cluster
469         if header.refcount_table_clusters == 0 {
470             return Err(Error::NoRefcountClusters);
471         }
472         offset_is_cluster_boundary(header.l1_table_offset, header.cluster_bits)?;
473         offset_is_cluster_boundary(header.snapshots_offset, header.cluster_bits)?;
474         // refcount table must be a cluster boundary, and within the file's virtual or actual size.
475         offset_is_cluster_boundary(header.refcount_table_offset, header.cluster_bits)?;
476         let file_size = file.metadata().map_err(Error::GettingFileSize)?.len();
477         if header.refcount_table_offset > max(file_size, header.size) {
478             return Err(Error::RefcountTableOffEnd);
479         }
480 
481         // The first cluster should always have a non-zero refcount, so if it is 0,
482         // this is an old file with broken refcounts, which requires a rebuild.
483         let mut refcount_rebuild_required = true;
484         file.seek(SeekFrom::Start(header.refcount_table_offset))
485             .map_err(Error::SeekingFile)?;
486         let first_refblock_addr = read_u64_from_file(&file)?;
487         if first_refblock_addr != 0 {
488             file.seek(SeekFrom::Start(first_refblock_addr))
489                 .map_err(Error::SeekingFile)?;
490             let first_cluster_refcount = read_u16_from_file(&file)?;
491             if first_cluster_refcount != 0 {
492                 refcount_rebuild_required = false;
493             }
494         }
495 
496         if (header.compatible_features & COMPATIBLE_FEATURES_LAZY_REFCOUNTS) != 0 {
497             refcount_rebuild_required = true;
498         }
499 
500         let mut raw_file =
501             QcowRawFile::from(file, cluster_size).ok_or(Error::InvalidClusterSize)?;
502         if refcount_rebuild_required {
503             QcowFile::rebuild_refcounts(&mut raw_file, header.clone())?;
504         }
505 
506         let l2_size = cluster_size / size_of::<u64>() as u64;
507         let num_clusters = div_round_up_u64(header.size, cluster_size);
508         let num_l2_clusters = div_round_up_u64(num_clusters, l2_size);
509         let l1_clusters = div_round_up_u64(num_l2_clusters, cluster_size);
510         let header_clusters = div_round_up_u64(size_of::<QcowHeader>() as u64, cluster_size);
511         if num_l2_clusters > MAX_RAM_POINTER_TABLE_SIZE {
512             return Err(Error::TooManyL1Entries(num_l2_clusters));
513         }
514         let l1_table = VecCache::from_vec(
515             raw_file
516                 .read_pointer_table(
517                     header.l1_table_offset,
518                     num_l2_clusters,
519                     Some(L1_TABLE_OFFSET_MASK),
520                 )
521                 .map_err(Error::ReadingHeader)?,
522         );
523 
524         let num_clusters = div_round_up_u64(header.size, cluster_size);
525         let refcount_clusters = max_refcount_clusters(
526             header.refcount_order,
527             cluster_size as u32,
528             (num_clusters + l1_clusters + num_l2_clusters + header_clusters) as u32,
529         );
530         // Check that the given header doesn't have a suspiciously sized refcount table.
531         if u64::from(header.refcount_table_clusters) > 2 * refcount_clusters {
532             return Err(Error::RefcountTableTooLarge);
533         }
534         if l1_clusters + refcount_clusters > MAX_RAM_POINTER_TABLE_SIZE {
535             return Err(Error::TooManyRefcounts(refcount_clusters));
536         }
537         let refcount_block_entries = cluster_size / refcount_bytes;
538         let refcounts = RefCount::new(
539             &mut raw_file,
540             header.refcount_table_offset,
541             refcount_clusters,
542             refcount_block_entries,
543             cluster_size,
544         )
545         .map_err(Error::ReadingRefCounts)?;
546 
547         let l2_entries = cluster_size / size_of::<u64>() as u64;
548 
549         let mut qcow = QcowFile {
550             raw_file,
551             header,
552             l1_table,
553             l2_entries,
554             l2_cache: CacheMap::new(100),
555             refcounts,
556             current_offset: 0,
557             unref_clusters: Vec::new(),
558             avail_clusters: Vec::new(),
559             backing_file,
560         };
561 
562         // Check that the L1 and refcount tables fit in a 64bit address space.
563         qcow.header
564             .l1_table_offset
565             .checked_add(qcow.l1_address_offset(qcow.virtual_size()))
566             .ok_or(Error::InvalidL1TableOffset)?;
567         qcow.header
568             .refcount_table_offset
569             .checked_add(u64::from(qcow.header.refcount_table_clusters) * cluster_size)
570             .ok_or(Error::InvalidRefcountTableOffset)?;
571 
572         qcow.find_avail_clusters()?;
573 
574         Ok(qcow)
575     }
576 
577     /// Creates a new QcowFile at the given path.
new(file: File, virtual_size: u64) -> Result<QcowFile>578     pub fn new(file: File, virtual_size: u64) -> Result<QcowFile> {
579         let header = QcowHeader::create_for_size_and_path(virtual_size, None)?;
580         QcowFile::new_from_header(file, header)
581     }
582 
583     /// Creates a new QcowFile at the given path.
new_from_backing(file: File, backing_file_name: &str) -> Result<QcowFile>584     pub fn new_from_backing(file: File, backing_file_name: &str) -> Result<QcowFile> {
585         let backing_raw_file = OpenOptions::new()
586             .read(true)
587             .open(backing_file_name)
588             .map_err(Error::BackingFileIo)?;
589         let backing_file =
590             create_disk_file(backing_raw_file).map_err(|e| Error::BackingFileOpen(Box::new(e)))?;
591         let size = backing_file.get_len().map_err(Error::BackingFileIo)?;
592         let header = QcowHeader::create_for_size_and_path(size, Some(backing_file_name))?;
593         let mut result = QcowFile::new_from_header(file, header)?;
594         result.backing_file = Some(backing_file);
595         Ok(result)
596     }
597 
new_from_header(mut file: File, header: QcowHeader) -> Result<QcowFile>598     fn new_from_header(mut file: File, header: QcowHeader) -> Result<QcowFile> {
599         file.seek(SeekFrom::Start(0)).map_err(Error::SeekingFile)?;
600         header.write_to(&mut file)?;
601 
602         let mut qcow = Self::from(file)?;
603 
604         // Set the refcount for each refcount table cluster.
605         let cluster_size = 0x01u64 << qcow.header.cluster_bits;
606         let refcount_table_base = qcow.header.refcount_table_offset as u64;
607         let end_cluster_addr =
608             refcount_table_base + u64::from(qcow.header.refcount_table_clusters) * cluster_size;
609 
610         let mut cluster_addr = 0;
611         while cluster_addr < end_cluster_addr {
612             let mut unref_clusters = qcow
613                 .set_cluster_refcount(cluster_addr, 1)
614                 .map_err(Error::SettingRefcountRefcount)?;
615             qcow.unref_clusters.append(&mut unref_clusters);
616             cluster_addr += cluster_size;
617         }
618 
619         Ok(qcow)
620     }
621 
set_backing_file(&mut self, backing: Option<Box<dyn DiskFile>>)622     pub fn set_backing_file(&mut self, backing: Option<Box<dyn DiskFile>>) {
623         self.backing_file = backing;
624     }
625 
626     /// Returns the `QcowHeader` for this file.
header(&self) -> &QcowHeader627     pub fn header(&self) -> &QcowHeader {
628         &self.header
629     }
630 
631     /// Returns the L1 lookup table for this file. This is only useful for debugging.
l1_table(&self) -> &[u64]632     pub fn l1_table(&self) -> &[u64] {
633         &self.l1_table.get_values()
634     }
635 
636     /// Returns an L2_table of cluster addresses, only used for debugging.
l2_table(&mut self, l1_index: usize) -> Result<Option<&[u64]>>637     pub fn l2_table(&mut self, l1_index: usize) -> Result<Option<&[u64]>> {
638         let l2_addr_disk = *self.l1_table.get(l1_index).ok_or(Error::InvalidIndex)?;
639 
640         if l2_addr_disk == 0 {
641             // Reading from an unallocated cluster will return zeros.
642             return Ok(None);
643         }
644 
645         if !self.l2_cache.contains_key(&l1_index) {
646             // Not in the cache.
647             let table = VecCache::from_vec(
648                 Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)
649                     .map_err(Error::ReadingPointers)?,
650             );
651             let l1_table = &self.l1_table;
652             let raw_file = &mut self.raw_file;
653             self.l2_cache
654                 .insert(l1_index, table, |index, evicted| {
655                     raw_file.write_pointer_table(
656                         l1_table[index],
657                         evicted.get_values(),
658                         CLUSTER_USED_FLAG,
659                     )
660                 })
661                 .map_err(Error::EvictingCache)?;
662         }
663 
664         // The index must exist as it was just inserted if it didn't already.
665         Ok(Some(self.l2_cache.get(&l1_index).unwrap().get_values()))
666     }
667 
668     /// Returns the refcount table for this file. This is only useful for debugging.
ref_table(&self) -> &[u64]669     pub fn ref_table(&self) -> &[u64] {
670         &self.refcounts.ref_table()
671     }
672 
673     /// Returns the `index`th refcount block from the file.
refcount_block(&mut self, index: usize) -> Result<Option<&[u16]>>674     pub fn refcount_block(&mut self, index: usize) -> Result<Option<&[u16]>> {
675         self.refcounts
676             .refcount_block(&mut self.raw_file, index)
677             .map_err(Error::ReadingRefCountBlock)
678     }
679 
680     /// Returns the first cluster in the file with a 0 refcount. Used for testing.
first_zero_refcount(&mut self) -> Result<Option<u64>>681     pub fn first_zero_refcount(&mut self) -> Result<Option<u64>> {
682         let file_size = self
683             .raw_file
684             .file_mut()
685             .metadata()
686             .map_err(Error::GettingFileSize)?
687             .len();
688         let cluster_size = 0x01u64 << self.header.cluster_bits;
689 
690         let mut cluster_addr = 0;
691         while cluster_addr < file_size {
692             let cluster_refcount = self
693                 .refcounts
694                 .get_cluster_refcount(&mut self.raw_file, cluster_addr)
695                 .map_err(Error::GettingRefcount)?;
696             if cluster_refcount == 0 {
697                 return Ok(Some(cluster_addr));
698             }
699             cluster_addr += cluster_size;
700         }
701         Ok(None)
702     }
703 
find_avail_clusters(&mut self) -> Result<()>704     fn find_avail_clusters(&mut self) -> Result<()> {
705         let cluster_size = self.raw_file.cluster_size();
706 
707         let file_size = self
708             .raw_file
709             .file_mut()
710             .metadata()
711             .map_err(Error::GettingFileSize)?
712             .len();
713 
714         for i in (0..file_size).step_by(cluster_size as usize) {
715             let refcount = self
716                 .refcounts
717                 .get_cluster_refcount(&mut self.raw_file, i)
718                 .map_err(Error::GettingRefcount)?;
719             if refcount == 0 {
720                 self.avail_clusters.push(i);
721             }
722         }
723 
724         Ok(())
725     }
726 
727     /// Rebuild the reference count tables.
rebuild_refcounts(raw_file: &mut QcowRawFile, header: QcowHeader) -> Result<()>728     fn rebuild_refcounts(raw_file: &mut QcowRawFile, header: QcowHeader) -> Result<()> {
729         fn add_ref(refcounts: &mut [u16], cluster_size: u64, cluster_address: u64) -> Result<()> {
730             let idx = (cluster_address / cluster_size) as usize;
731             if idx >= refcounts.len() {
732                 return Err(Error::InvalidClusterIndex);
733             }
734             refcounts[idx] += 1;
735             Ok(())
736         }
737 
738         // Add a reference to the first cluster (header plus extensions).
739         fn set_header_refcount(refcounts: &mut [u16], cluster_size: u64) -> Result<()> {
740             add_ref(refcounts, cluster_size, 0)
741         }
742 
743         // Add references to the L1 table clusters.
744         fn set_l1_refcounts(
745             refcounts: &mut [u16],
746             header: QcowHeader,
747             cluster_size: u64,
748         ) -> Result<()> {
749             let l1_clusters = div_round_up_u64(header.l1_size as u64, cluster_size);
750             let l1_table_offset = header.l1_table_offset;
751             for i in 0..l1_clusters {
752                 add_ref(refcounts, cluster_size, l1_table_offset + i * cluster_size)?;
753             }
754             Ok(())
755         }
756 
757         // Traverse the L1 and L2 tables to find all reachable data clusters.
758         fn set_data_refcounts(
759             refcounts: &mut [u16],
760             header: QcowHeader,
761             cluster_size: u64,
762             raw_file: &mut QcowRawFile,
763         ) -> Result<()> {
764             let l1_table = raw_file
765                 .read_pointer_table(
766                     header.l1_table_offset,
767                     header.l1_size as u64,
768                     Some(L1_TABLE_OFFSET_MASK),
769                 )
770                 .map_err(Error::ReadingPointers)?;
771             for l1_index in 0..header.l1_size as usize {
772                 let l2_addr_disk = *l1_table.get(l1_index).ok_or(Error::InvalidIndex)?;
773                 if l2_addr_disk != 0 {
774                     // Add a reference to the L2 table cluster itself.
775                     add_ref(refcounts, cluster_size, l2_addr_disk)?;
776 
777                     // Read the L2 table and find all referenced data clusters.
778                     let l2_table = raw_file
779                         .read_pointer_table(
780                             l2_addr_disk,
781                             cluster_size / size_of::<u64>() as u64,
782                             Some(L2_TABLE_OFFSET_MASK),
783                         )
784                         .map_err(Error::ReadingPointers)?;
785                     for data_cluster_addr in l2_table {
786                         if data_cluster_addr != 0 {
787                             add_ref(refcounts, cluster_size, data_cluster_addr)?;
788                         }
789                     }
790                 }
791             }
792 
793             Ok(())
794         }
795 
796         // Add references to the top-level refcount table clusters.
797         fn set_refcount_table_refcounts(
798             refcounts: &mut [u16],
799             header: QcowHeader,
800             cluster_size: u64,
801         ) -> Result<()> {
802             let refcount_table_offset = header.refcount_table_offset;
803             for i in 0..header.refcount_table_clusters as u64 {
804                 add_ref(
805                     refcounts,
806                     cluster_size,
807                     refcount_table_offset + i * cluster_size,
808                 )?;
809             }
810             Ok(())
811         }
812 
813         // Allocate clusters for refblocks.
814         // This needs to be done last so that we have the correct refcounts for all other
815         // clusters.
816         fn alloc_refblocks(
817             refcounts: &mut [u16],
818             cluster_size: u64,
819             refblock_clusters: u64,
820             pointers_per_cluster: u64,
821         ) -> Result<Vec<u64>> {
822             let refcount_table_entries = div_round_up_u64(refblock_clusters, pointers_per_cluster);
823             let mut ref_table = vec![0; refcount_table_entries as usize];
824             let mut first_free_cluster: u64 = 0;
825             for refblock_addr in &mut ref_table {
826                 loop {
827                     if first_free_cluster >= refcounts.len() as u64 {
828                         return Err(Error::NotEnoughSpaceForRefcounts);
829                     }
830                     if refcounts[first_free_cluster as usize] == 0 {
831                         break;
832                     }
833                     first_free_cluster += 1;
834                 }
835 
836                 *refblock_addr = first_free_cluster * cluster_size;
837                 add_ref(refcounts, cluster_size, *refblock_addr)?;
838 
839                 first_free_cluster += 1;
840             }
841 
842             Ok(ref_table)
843         }
844 
845         // Write the updated reference count blocks and reftable.
846         fn write_refblocks(
847             refcounts: &[u16],
848             mut header: QcowHeader,
849             ref_table: &[u64],
850             raw_file: &mut QcowRawFile,
851             refcount_block_entries: u64,
852         ) -> Result<()> {
853             // Rewrite the header with lazy refcounts enabled while we are rebuilding the tables.
854             header.compatible_features |= COMPATIBLE_FEATURES_LAZY_REFCOUNTS;
855             raw_file
856                 .file_mut()
857                 .seek(SeekFrom::Start(0))
858                 .map_err(Error::SeekingFile)?;
859             header.write_to(raw_file.file_mut())?;
860 
861             for (i, refblock_addr) in ref_table.iter().enumerate() {
862                 // Write a block of refcounts to the location indicated by refblock_addr.
863                 let refblock_start = i * (refcount_block_entries as usize);
864                 let refblock_end = min(
865                     refcounts.len(),
866                     refblock_start + refcount_block_entries as usize,
867                 );
868                 let refblock = &refcounts[refblock_start..refblock_end];
869                 raw_file
870                     .write_refcount_block(*refblock_addr, refblock)
871                     .map_err(Error::WritingHeader)?;
872 
873                 // If this is the last (partial) cluster, pad it out to a full refblock cluster.
874                 if refblock.len() < refcount_block_entries as usize {
875                     let refblock_padding =
876                         vec![0u16; refcount_block_entries as usize - refblock.len()];
877                     raw_file
878                         .write_refcount_block(
879                             *refblock_addr + refblock.len() as u64 * 2,
880                             &refblock_padding,
881                         )
882                         .map_err(Error::WritingHeader)?;
883                 }
884             }
885 
886             // Rewrite the top-level refcount table.
887             raw_file
888                 .write_pointer_table(header.refcount_table_offset, &ref_table, 0)
889                 .map_err(Error::WritingHeader)?;
890 
891             // Rewrite the header again, now with lazy refcounts disabled.
892             header.compatible_features &= !COMPATIBLE_FEATURES_LAZY_REFCOUNTS;
893             raw_file
894                 .file_mut()
895                 .seek(SeekFrom::Start(0))
896                 .map_err(Error::SeekingFile)?;
897             header.write_to(raw_file.file_mut())?;
898 
899             Ok(())
900         }
901 
902         let cluster_size = raw_file.cluster_size();
903 
904         let file_size = raw_file
905             .file_mut()
906             .metadata()
907             .map_err(Error::GettingFileSize)?
908             .len();
909 
910         let refcount_bits = 1u64 << header.refcount_order;
911         let refcount_bytes = div_round_up_u64(refcount_bits, 8);
912         let refcount_block_entries = cluster_size / refcount_bytes;
913         let pointers_per_cluster = cluster_size / size_of::<u64>() as u64;
914         let data_clusters = div_round_up_u64(header.size, cluster_size);
915         let l2_clusters = div_round_up_u64(data_clusters, pointers_per_cluster);
916         let l1_clusters = div_round_up_u64(l2_clusters, cluster_size);
917         let header_clusters = div_round_up_u64(size_of::<QcowHeader>() as u64, cluster_size);
918         let max_clusters = data_clusters + l2_clusters + l1_clusters + header_clusters;
919         let mut max_valid_cluster_index = max_clusters;
920         let refblock_clusters = div_round_up_u64(max_valid_cluster_index, refcount_block_entries);
921         let reftable_clusters = div_round_up_u64(refblock_clusters, pointers_per_cluster);
922         // Account for refblocks and the ref table size needed to address them.
923         let refblocks_for_refs = div_round_up_u64(
924             refblock_clusters + reftable_clusters,
925             refcount_block_entries,
926         );
927         let reftable_clusters_for_refs =
928             div_round_up_u64(refblocks_for_refs, refcount_block_entries);
929         max_valid_cluster_index += refblock_clusters + reftable_clusters;
930         max_valid_cluster_index += refblocks_for_refs + reftable_clusters_for_refs;
931 
932         if max_valid_cluster_index > MAX_RAM_POINTER_TABLE_SIZE {
933             return Err(Error::InvalidRefcountTableSize(max_valid_cluster_index));
934         }
935 
936         let max_valid_cluster_offset = max_valid_cluster_index * cluster_size;
937         if max_valid_cluster_offset < file_size - cluster_size {
938             return Err(Error::InvalidRefcountTableSize(max_valid_cluster_offset));
939         }
940 
941         let mut refcounts = vec![0; max_valid_cluster_index as usize];
942 
943         // Find all references clusters and rebuild refcounts.
944         set_header_refcount(&mut refcounts, cluster_size)?;
945         set_l1_refcounts(&mut refcounts, header.clone(), cluster_size)?;
946         set_data_refcounts(&mut refcounts, header.clone(), cluster_size, raw_file)?;
947         set_refcount_table_refcounts(&mut refcounts, header.clone(), cluster_size)?;
948 
949         // Allocate clusters to store the new reference count blocks.
950         let ref_table = alloc_refblocks(
951             &mut refcounts,
952             cluster_size,
953             refblock_clusters,
954             pointers_per_cluster,
955         )?;
956 
957         // Write updated reference counts and point the reftable at them.
958         write_refblocks(
959             &refcounts,
960             header,
961             &ref_table,
962             raw_file,
963             refcount_block_entries,
964         )
965     }
966 
967     // Limits the range so that it doesn't exceed the virtual size of the file.
limit_range_file(&self, address: u64, count: usize) -> usize968     fn limit_range_file(&self, address: u64, count: usize) -> usize {
969         if address.checked_add(count as u64).is_none() || address > self.virtual_size() {
970             return 0;
971         }
972         min(count as u64, self.virtual_size() - address) as usize
973     }
974 
975     // Limits the range so that it doesn't overflow the end of a cluster.
limit_range_cluster(&self, address: u64, count: usize) -> usize976     fn limit_range_cluster(&self, address: u64, count: usize) -> usize {
977         let offset: u64 = self.raw_file.cluster_offset(address);
978         let limit = self.raw_file.cluster_size() - offset;
979         min(count as u64, limit) as usize
980     }
981 
982     // Gets the maximum virtual size of this image.
virtual_size(&self) -> u64983     fn virtual_size(&self) -> u64 {
984         self.header.size
985     }
986 
987     // Gets the offset of `address` in the L1 table.
l1_address_offset(&self, address: u64) -> u64988     fn l1_address_offset(&self, address: u64) -> u64 {
989         let l1_index = self.l1_table_index(address);
990         l1_index * size_of::<u64>() as u64
991     }
992 
993     // Gets the offset of `address` in the L1 table.
l1_table_index(&self, address: u64) -> u64994     fn l1_table_index(&self, address: u64) -> u64 {
995         (address / self.raw_file.cluster_size()) / self.l2_entries
996     }
997 
998     // Gets the offset of `address` in the L2 table.
l2_table_index(&self, address: u64) -> u64999     fn l2_table_index(&self, address: u64) -> u64 {
1000         (address / self.raw_file.cluster_size()) % self.l2_entries
1001     }
1002 
1003     // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters have
1004     // yet to be allocated, return None.
file_offset_read(&mut self, address: u64) -> std::io::Result<Option<u64>>1005     fn file_offset_read(&mut self, address: u64) -> std::io::Result<Option<u64>> {
1006         if address >= self.virtual_size() as u64 {
1007             return Err(std::io::Error::from_raw_os_error(EINVAL));
1008         }
1009 
1010         let l1_index = self.l1_table_index(address) as usize;
1011         let l2_addr_disk = *self
1012             .l1_table
1013             .get(l1_index)
1014             .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1015 
1016         if l2_addr_disk == 0 {
1017             // Reading from an unallocated cluster will return zeros.
1018             return Ok(None);
1019         }
1020 
1021         let l2_index = self.l2_table_index(address) as usize;
1022 
1023         if !self.l2_cache.contains_key(&l1_index) {
1024             // Not in the cache.
1025             let table =
1026                 VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?);
1027 
1028             let l1_table = &self.l1_table;
1029             let raw_file = &mut self.raw_file;
1030             self.l2_cache.insert(l1_index, table, |index, evicted| {
1031                 raw_file.write_pointer_table(
1032                     l1_table[index],
1033                     evicted.get_values(),
1034                     CLUSTER_USED_FLAG,
1035                 )
1036             })?;
1037         };
1038 
1039         let cluster_addr = self.l2_cache.get(&l1_index).unwrap()[l2_index];
1040         if cluster_addr == 0 {
1041             return Ok(None);
1042         }
1043         Ok(Some(cluster_addr + self.raw_file.cluster_offset(address)))
1044     }
1045 
1046     // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters need
1047     // to be allocated, they will be.
file_offset_write(&mut self, address: u64) -> std::io::Result<u64>1048     fn file_offset_write(&mut self, address: u64) -> std::io::Result<u64> {
1049         if address >= self.virtual_size() as u64 {
1050             return Err(std::io::Error::from_raw_os_error(EINVAL));
1051         }
1052 
1053         let l1_index = self.l1_table_index(address) as usize;
1054         let l2_addr_disk = *self
1055             .l1_table
1056             .get(l1_index)
1057             .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1058         let l2_index = self.l2_table_index(address) as usize;
1059 
1060         let mut set_refcounts = Vec::new();
1061 
1062         if !self.l2_cache.contains_key(&l1_index) {
1063             // Not in the cache.
1064             let l2_table = if l2_addr_disk == 0 {
1065                 // Allocate a new cluster to store the L2 table and update the L1 table to point
1066                 // to the new table.
1067                 let new_addr: u64 = self.get_new_cluster(None)?;
1068                 // The cluster refcount starts at one meaning it is used but doesn't need COW.
1069                 set_refcounts.push((new_addr, 1));
1070                 self.l1_table[l1_index] = new_addr;
1071                 VecCache::new(self.l2_entries as usize)
1072             } else {
1073                 VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?)
1074             };
1075             let l1_table = &self.l1_table;
1076             let raw_file = &mut self.raw_file;
1077             self.l2_cache.insert(l1_index, l2_table, |index, evicted| {
1078                 raw_file.write_pointer_table(
1079                     l1_table[index],
1080                     evicted.get_values(),
1081                     CLUSTER_USED_FLAG,
1082                 )
1083             })?;
1084         }
1085 
1086         let cluster_addr = match self.l2_cache.get(&l1_index).unwrap()[l2_index] {
1087             0 => {
1088                 let initial_data = if let Some(backing) = self.backing_file.as_mut() {
1089                     let cluster_size = self.raw_file.cluster_size();
1090                     let cluster_begin = address - (address % cluster_size);
1091                     let mut cluster_data = vec![0u8; cluster_size as usize];
1092                     let volatile_slice = VolatileSlice::new(&mut cluster_data);
1093                     backing.read_exact_at_volatile(volatile_slice, cluster_begin)?;
1094                     Some(cluster_data)
1095                 } else {
1096                     None
1097                 };
1098                 // Need to allocate a data cluster
1099                 let cluster_addr = self.append_data_cluster(initial_data)?;
1100                 self.update_cluster_addr(l1_index, l2_index, cluster_addr, &mut set_refcounts)?;
1101                 cluster_addr
1102             }
1103             a => a,
1104         };
1105 
1106         for (addr, count) in set_refcounts {
1107             let mut newly_unref = self.set_cluster_refcount(addr, count)?;
1108             self.unref_clusters.append(&mut newly_unref);
1109         }
1110 
1111         Ok(cluster_addr + self.raw_file.cluster_offset(address))
1112     }
1113 
1114     // Updates the l1 and l2 tables to point to the new `cluster_addr`.
update_cluster_addr( &mut self, l1_index: usize, l2_index: usize, cluster_addr: u64, set_refcounts: &mut Vec<(u64, u16)>, ) -> io::Result<()>1115     fn update_cluster_addr(
1116         &mut self,
1117         l1_index: usize,
1118         l2_index: usize,
1119         cluster_addr: u64,
1120         set_refcounts: &mut Vec<(u64, u16)>,
1121     ) -> io::Result<()> {
1122         if !self.l2_cache.get(&l1_index).unwrap().dirty() {
1123             // Free the previously used cluster if one exists. Modified tables are always
1124             // witten to new clusters so the L1 table can be committed to disk after they
1125             // are and L1 never points at an invalid table.
1126             // The index must be valid from when it was insterted.
1127             let addr = self.l1_table[l1_index];
1128             if addr != 0 {
1129                 self.unref_clusters.push(addr);
1130                 set_refcounts.push((addr, 0));
1131             }
1132 
1133             // Allocate a new cluster to store the L2 table and update the L1 table to point
1134             // to the new table. The cluster will be written when the cache is flushed, no
1135             // need to copy the data now.
1136             let new_addr: u64 = self.get_new_cluster(None)?;
1137             // The cluster refcount starts at one indicating it is used but doesn't need
1138             // COW.
1139             set_refcounts.push((new_addr, 1));
1140             self.l1_table[l1_index] = new_addr;
1141         }
1142         // 'unwrap' is OK because it was just added.
1143         self.l2_cache.get_mut(&l1_index).unwrap()[l2_index] = cluster_addr;
1144         Ok(())
1145     }
1146 
1147     // Allocate a new cluster and return its offset within the raw file.
get_new_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64>1148     fn get_new_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64> {
1149         // First use a pre allocated cluster if one is available.
1150         if let Some(free_cluster) = self.avail_clusters.pop() {
1151             if let Some(initial_data) = initial_data {
1152                 self.raw_file.write_cluster(free_cluster, initial_data)?;
1153             } else {
1154                 self.raw_file.zero_cluster(free_cluster)?;
1155             }
1156             return Ok(free_cluster);
1157         }
1158 
1159         let max_valid_cluster_offset = self.refcounts.max_valid_cluster_offset();
1160         if let Some(new_cluster) = self.raw_file.add_cluster_end(max_valid_cluster_offset)? {
1161             if let Some(initial_data) = initial_data {
1162                 self.raw_file.write_cluster(new_cluster, initial_data)?;
1163             }
1164             Ok(new_cluster)
1165         } else {
1166             error!("No free clusters in get_new_cluster()");
1167             Err(std::io::Error::from_raw_os_error(ENOSPC))
1168         }
1169     }
1170 
1171     // Allocate and initialize a new data cluster. Returns the offset of the
1172     // cluster in to the file on success.
append_data_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64>1173     fn append_data_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64> {
1174         let new_addr: u64 = self.get_new_cluster(initial_data)?;
1175         // The cluster refcount starts at one indicating it is used but doesn't need COW.
1176         let mut newly_unref = self.set_cluster_refcount(new_addr, 1)?;
1177         self.unref_clusters.append(&mut newly_unref);
1178         Ok(new_addr)
1179     }
1180 
1181     // Returns true if the cluster containing `address` is already allocated.
cluster_allocated(&mut self, address: u64) -> std::io::Result<bool>1182     fn cluster_allocated(&mut self, address: u64) -> std::io::Result<bool> {
1183         if address >= self.virtual_size() as u64 {
1184             return Err(std::io::Error::from_raw_os_error(EINVAL));
1185         }
1186 
1187         let l1_index = self.l1_table_index(address) as usize;
1188         let l2_addr_disk = *self
1189             .l1_table
1190             .get(l1_index)
1191             .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1192         let l2_index = self.l2_table_index(address) as usize;
1193 
1194         if l2_addr_disk == 0 {
1195             // The whole L2 table for this address is not allocated yet,
1196             // so the cluster must also be unallocated.
1197             return Ok(false);
1198         }
1199 
1200         if !self.l2_cache.contains_key(&l1_index) {
1201             // Not in the cache.
1202             let table =
1203                 VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?);
1204             let l1_table = &self.l1_table;
1205             let raw_file = &mut self.raw_file;
1206             self.l2_cache.insert(l1_index, table, |index, evicted| {
1207                 raw_file.write_pointer_table(
1208                     l1_table[index],
1209                     evicted.get_values(),
1210                     CLUSTER_USED_FLAG,
1211                 )
1212             })?;
1213         }
1214 
1215         let cluster_addr = self.l2_cache.get(&l1_index).unwrap()[l2_index];
1216         // If cluster_addr != 0, the cluster is allocated.
1217         Ok(cluster_addr != 0)
1218     }
1219 
1220     // Find the first guest address greater than or equal to `address` whose allocation state
1221     // matches `allocated`.
find_allocated_cluster( &mut self, address: u64, allocated: bool, ) -> std::io::Result<Option<u64>>1222     fn find_allocated_cluster(
1223         &mut self,
1224         address: u64,
1225         allocated: bool,
1226     ) -> std::io::Result<Option<u64>> {
1227         let size = self.virtual_size();
1228         if address >= size {
1229             return Ok(None);
1230         }
1231 
1232         // If offset is already within a hole, return it.
1233         if self.cluster_allocated(address)? == allocated {
1234             return Ok(Some(address));
1235         }
1236 
1237         // Skip to the next cluster boundary.
1238         let cluster_size = self.raw_file.cluster_size();
1239         let mut cluster_addr = (address / cluster_size + 1) * cluster_size;
1240 
1241         // Search for clusters with the desired allocation state.
1242         while cluster_addr < size {
1243             if self.cluster_allocated(cluster_addr)? == allocated {
1244                 return Ok(Some(cluster_addr));
1245             }
1246             cluster_addr += cluster_size;
1247         }
1248 
1249         Ok(None)
1250     }
1251 
1252     // Deallocate the storage for the cluster starting at `address`.
1253     // Any future reads of this cluster will return all zeroes (or the backing file, if in use).
deallocate_cluster(&mut self, address: u64) -> std::io::Result<()>1254     fn deallocate_cluster(&mut self, address: u64) -> std::io::Result<()> {
1255         if address >= self.virtual_size() as u64 {
1256             return Err(std::io::Error::from_raw_os_error(EINVAL));
1257         }
1258 
1259         let l1_index = self.l1_table_index(address) as usize;
1260         let l2_addr_disk = *self
1261             .l1_table
1262             .get(l1_index)
1263             .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1264         let l2_index = self.l2_table_index(address) as usize;
1265 
1266         if l2_addr_disk == 0 {
1267             // The whole L2 table for this address is not allocated yet,
1268             // so the cluster must also be unallocated.
1269             return Ok(());
1270         }
1271 
1272         if !self.l2_cache.contains_key(&l1_index) {
1273             // Not in the cache.
1274             let table =
1275                 VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?);
1276             let l1_table = &self.l1_table;
1277             let raw_file = &mut self.raw_file;
1278             self.l2_cache.insert(l1_index, table, |index, evicted| {
1279                 raw_file.write_pointer_table(
1280                     l1_table[index],
1281                     evicted.get_values(),
1282                     CLUSTER_USED_FLAG,
1283                 )
1284             })?;
1285         }
1286 
1287         let cluster_addr = self.l2_cache.get(&l1_index).unwrap()[l2_index];
1288         if cluster_addr == 0 {
1289             // This cluster is already unallocated; nothing to do.
1290             return Ok(());
1291         }
1292 
1293         // Decrement the refcount.
1294         let refcount = self
1295             .refcounts
1296             .get_cluster_refcount(&mut self.raw_file, cluster_addr)
1297             .map_err(|_| std::io::Error::from_raw_os_error(EINVAL))?;
1298         if refcount == 0 {
1299             return Err(std::io::Error::from_raw_os_error(EINVAL));
1300         }
1301 
1302         let new_refcount = refcount - 1;
1303         let mut newly_unref = self.set_cluster_refcount(cluster_addr, new_refcount)?;
1304         self.unref_clusters.append(&mut newly_unref);
1305 
1306         // Rewrite the L2 entry to remove the cluster mapping.
1307         // unwrap is safe as we just checked/inserted this entry.
1308         self.l2_cache.get_mut(&l1_index).unwrap()[l2_index] = 0;
1309 
1310         if new_refcount == 0 {
1311             let cluster_size = self.raw_file.cluster_size();
1312             // This cluster is no longer in use; deallocate the storage.
1313             // The underlying FS may not support FALLOC_FL_PUNCH_HOLE,
1314             // so don't treat an error as fatal.  Future reads will return zeros anyways.
1315             let _ = self
1316                 .raw_file
1317                 .file_mut()
1318                 .punch_hole(cluster_addr, cluster_size);
1319             self.unref_clusters.push(cluster_addr);
1320         }
1321         Ok(())
1322     }
1323 
1324     // Fill a range of `length` bytes starting at `address` with zeroes.
1325     // Any future reads of this range will return all zeroes.
1326     // If there is no backing file, this will deallocate cluster storage when possible.
zero_bytes(&mut self, address: u64, length: usize) -> std::io::Result<()>1327     fn zero_bytes(&mut self, address: u64, length: usize) -> std::io::Result<()> {
1328         let write_count: usize = self.limit_range_file(address, length);
1329 
1330         let mut nwritten: usize = 0;
1331         while nwritten < write_count {
1332             let curr_addr = address + nwritten as u64;
1333             let count = self.limit_range_cluster(curr_addr, write_count - nwritten);
1334 
1335             if self.backing_file.is_none() && count == self.raw_file.cluster_size() as usize {
1336                 // Full cluster and no backing file in use - deallocate the storage.
1337                 self.deallocate_cluster(curr_addr)?;
1338             } else {
1339                 // Partial cluster - zero out the relevant bytes.
1340                 let offset = if self.backing_file.is_some() {
1341                     // There is a backing file, so we need to allocate a cluster in order to
1342                     // zero out the hole-punched bytes such that the backing file contents do not
1343                     // show through.
1344                     Some(self.file_offset_write(curr_addr)?)
1345                 } else {
1346                     // Any space in unallocated clusters can be left alone, since
1347                     // unallocated clusters already read back as zeroes.
1348                     self.file_offset_read(curr_addr)?
1349                 };
1350                 if let Some(offset) = offset {
1351                     // Partial cluster - zero it out.
1352                     self.raw_file
1353                         .file_mut()
1354                         .write_zeroes_all_at(offset, count)?;
1355                 }
1356             }
1357 
1358             nwritten += count;
1359         }
1360         Ok(())
1361     }
1362 
1363     // Reads an L2 cluster from the disk, returning an error if the file can't be read or if any
1364     // cluster is compressed.
read_l2_cluster(raw_file: &mut QcowRawFile, cluster_addr: u64) -> std::io::Result<Vec<u64>>1365     fn read_l2_cluster(raw_file: &mut QcowRawFile, cluster_addr: u64) -> std::io::Result<Vec<u64>> {
1366         let file_values = raw_file.read_pointer_cluster(cluster_addr, None)?;
1367         if file_values.iter().any(|entry| entry & COMPRESSED_FLAG != 0) {
1368             return Err(std::io::Error::from_raw_os_error(ENOTSUP));
1369         }
1370         Ok(file_values
1371             .iter()
1372             .map(|entry| *entry & L2_TABLE_OFFSET_MASK)
1373             .collect())
1374     }
1375 
1376     // Set the refcount for a cluster with the given address.
1377     // Returns a list of any refblocks that can be reused, this happens when a refblock is moved,
1378     // the old location can be reused.
set_cluster_refcount(&mut self, address: u64, refcount: u16) -> std::io::Result<Vec<u64>>1379     fn set_cluster_refcount(&mut self, address: u64, refcount: u16) -> std::io::Result<Vec<u64>> {
1380         let mut added_clusters = Vec::new();
1381         let mut unref_clusters = Vec::new();
1382         let mut refcount_set = false;
1383         let mut new_cluster = None;
1384 
1385         while !refcount_set {
1386             match self.refcounts.set_cluster_refcount(
1387                 &mut self.raw_file,
1388                 address,
1389                 refcount,
1390                 new_cluster.take(),
1391             ) {
1392                 Ok(None) => {
1393                     refcount_set = true;
1394                 }
1395                 Ok(Some(freed_cluster)) => {
1396                     unref_clusters.push(freed_cluster);
1397                     refcount_set = true;
1398                 }
1399                 Err(refcount::Error::EvictingRefCounts(e)) => {
1400                     return Err(e);
1401                 }
1402                 Err(refcount::Error::InvalidIndex) => {
1403                     return Err(std::io::Error::from_raw_os_error(EINVAL));
1404                 }
1405                 Err(refcount::Error::NeedCluster(addr)) => {
1406                     // Read the address and call set_cluster_refcount again.
1407                     new_cluster = Some((
1408                         addr,
1409                         VecCache::from_vec(self.raw_file.read_refcount_block(addr)?),
1410                     ));
1411                 }
1412                 Err(refcount::Error::NeedNewCluster) => {
1413                     // Allocate the cluster and call set_cluster_refcount again.
1414                     let addr = self.get_new_cluster(None)?;
1415                     added_clusters.push(addr);
1416                     new_cluster = Some((
1417                         addr,
1418                         VecCache::new(self.refcounts.refcounts_per_block() as usize),
1419                     ));
1420                 }
1421                 Err(refcount::Error::ReadingRefCounts(e)) => {
1422                     return Err(e);
1423                 }
1424             }
1425         }
1426 
1427         for addr in added_clusters {
1428             self.set_cluster_refcount(addr, 1)?;
1429         }
1430         Ok(unref_clusters)
1431     }
1432 
sync_caches(&mut self) -> std::io::Result<()>1433     fn sync_caches(&mut self) -> std::io::Result<()> {
1434         // Write out all dirty L2 tables.
1435         for (l1_index, l2_table) in self.l2_cache.iter_mut().filter(|(_k, v)| v.dirty()) {
1436             // The index must be valid from when we insterted it.
1437             let addr = self.l1_table[*l1_index];
1438             if addr != 0 {
1439                 self.raw_file.write_pointer_table(
1440                     addr,
1441                     l2_table.get_values(),
1442                     CLUSTER_USED_FLAG,
1443                 )?;
1444             } else {
1445                 return Err(std::io::Error::from_raw_os_error(EINVAL));
1446             }
1447             l2_table.mark_clean();
1448         }
1449         // Write the modified refcount blocks.
1450         self.refcounts.flush_blocks(&mut self.raw_file)?;
1451         // Make sure metadata(file len) and all data clusters are written.
1452         self.raw_file.file_mut().sync_all()?;
1453 
1454         // Push L1 table and refcount table last as all the clusters they point to are now
1455         // guaranteed to be valid.
1456         let mut sync_required = false;
1457         if self.l1_table.dirty() {
1458             self.raw_file.write_pointer_table(
1459                 self.header.l1_table_offset,
1460                 &self.l1_table.get_values(),
1461                 0,
1462             )?;
1463             self.l1_table.mark_clean();
1464             sync_required = true;
1465         }
1466         sync_required |= self.refcounts.flush_table(&mut self.raw_file)?;
1467         if sync_required {
1468             self.raw_file.file_mut().sync_data()?;
1469         }
1470         Ok(())
1471     }
1472 
1473     // Reads `count` bytes starting at `address`, calling `cb` repeatedly with the data source,
1474     // number of bytes read so far, offset to read from, and number of bytes to read from the file
1475     // in that invocation. If None is given to `cb` in place of the backing file, the `cb` should
1476     // infer zeros would have been read.
read_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize> where F: FnMut(Option<&mut dyn DiskFile>, usize, u64, usize) -> std::io::Result<()>,1477     fn read_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize>
1478     where
1479         F: FnMut(Option<&mut dyn DiskFile>, usize, u64, usize) -> std::io::Result<()>,
1480     {
1481         let read_count: usize = self.limit_range_file(address, count);
1482 
1483         let mut nread: usize = 0;
1484         while nread < read_count {
1485             let curr_addr = address + nread as u64;
1486             let file_offset = self.file_offset_read(curr_addr)?;
1487             let count = self.limit_range_cluster(curr_addr, read_count - nread);
1488 
1489             if let Some(offset) = file_offset {
1490                 cb(Some(self.raw_file.file_mut()), nread, offset, count)?;
1491             } else if let Some(backing) = self.backing_file.as_mut() {
1492                 cb(Some(backing.as_mut()), nread, curr_addr, count)?;
1493             } else {
1494                 cb(None, nread, 0, count)?;
1495             }
1496 
1497             nread += count;
1498         }
1499         Ok(read_count)
1500     }
1501 
1502     // Writes `count` bytes starting at `address`, calling `cb` repeatedly with the backing file,
1503     // number of bytes written so far, and number of bytes to write to the file in that invocation.
write_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize> where F: FnMut(&mut File, usize, usize) -> std::io::Result<()>,1504     fn write_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize>
1505     where
1506         F: FnMut(&mut File, usize, usize) -> std::io::Result<()>,
1507     {
1508         let write_count: usize = self.limit_range_file(address, count);
1509 
1510         let mut nwritten: usize = 0;
1511         while nwritten < write_count {
1512             let curr_addr = address + nwritten as u64;
1513             let offset = self.file_offset_write(curr_addr)?;
1514             let count = self.limit_range_cluster(curr_addr, write_count - nwritten);
1515 
1516             if let Err(e) = self.raw_file.file_mut().seek(SeekFrom::Start(offset)) {
1517                 return Err(e);
1518             }
1519             if let Err(e) = cb(self.raw_file.file_mut(), nwritten, count) {
1520                 return Err(e);
1521             }
1522 
1523             nwritten += count;
1524         }
1525         Ok(write_count)
1526     }
1527 }
1528 
1529 impl Drop for QcowFile {
drop(&mut self)1530     fn drop(&mut self) {
1531         let _ = self.sync_caches();
1532     }
1533 }
1534 
1535 impl AsRawDescriptors for QcowFile {
as_raw_descriptors(&self) -> Vec<RawDescriptor>1536     fn as_raw_descriptors(&self) -> Vec<RawDescriptor> {
1537         let mut descriptors = vec![self.raw_file.file().as_raw_descriptor()];
1538         if let Some(backing) = &self.backing_file {
1539             descriptors.append(&mut backing.as_raw_descriptors());
1540         }
1541         descriptors
1542     }
1543 }
1544 
1545 impl Read for QcowFile {
read(&mut self, buf: &mut [u8]) -> std::io::Result<usize>1546     fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
1547         let len = buf.len();
1548         let slice = VolatileSlice::new(buf);
1549         let read_count = self.read_cb(
1550             self.current_offset,
1551             len,
1552             |file, already_read, offset, count| {
1553                 let sub_slice = slice.get_slice(already_read, count).unwrap();
1554                 match file {
1555                     Some(f) => f.read_exact_at_volatile(sub_slice, offset),
1556                     None => {
1557                         sub_slice.write_bytes(0);
1558                         Ok(())
1559                     }
1560                 }
1561             },
1562         )?;
1563         self.current_offset += read_count as u64;
1564         Ok(read_count)
1565     }
1566 }
1567 
1568 impl Seek for QcowFile {
seek(&mut self, pos: SeekFrom) -> std::io::Result<u64>1569     fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
1570         let new_offset: Option<u64> = match pos {
1571             SeekFrom::Start(off) => Some(off),
1572             SeekFrom::End(off) => {
1573                 if off < 0 {
1574                     0i64.checked_sub(off)
1575                         .and_then(|increment| self.virtual_size().checked_sub(increment as u64))
1576                 } else {
1577                     self.virtual_size().checked_add(off as u64)
1578                 }
1579             }
1580             SeekFrom::Current(off) => {
1581                 if off < 0 {
1582                     0i64.checked_sub(off)
1583                         .and_then(|increment| self.current_offset.checked_sub(increment as u64))
1584                 } else {
1585                     self.current_offset.checked_add(off as u64)
1586                 }
1587             }
1588         };
1589 
1590         if let Some(o) = new_offset {
1591             if o <= self.virtual_size() {
1592                 self.current_offset = o;
1593                 return Ok(o);
1594             }
1595         }
1596         Err(std::io::Error::from_raw_os_error(EINVAL))
1597     }
1598 }
1599 
1600 impl Write for QcowFile {
write(&mut self, buf: &[u8]) -> std::io::Result<usize>1601     fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1602         let write_count =
1603             self.write_cb(self.current_offset, buf.len(), |file, offset, count| {
1604                 file.write_all(&buf[offset..(offset + count)])
1605             })?;
1606         self.current_offset += write_count as u64;
1607         Ok(write_count)
1608     }
1609 
flush(&mut self) -> std::io::Result<()>1610     fn flush(&mut self) -> std::io::Result<()> {
1611         self.sync_caches()?;
1612         self.avail_clusters.append(&mut self.unref_clusters);
1613         Ok(())
1614     }
1615 }
1616 
1617 impl FileReadWriteVolatile for QcowFile {
read_volatile(&mut self, slice: VolatileSlice) -> io::Result<usize>1618     fn read_volatile(&mut self, slice: VolatileSlice) -> io::Result<usize> {
1619         let read_count = self.read_cb(
1620             self.current_offset,
1621             slice.size(),
1622             |file, read, offset, count| {
1623                 let sub_slice = slice.get_slice(read, count).unwrap();
1624                 match file {
1625                     Some(f) => f.read_exact_at_volatile(sub_slice, offset),
1626                     None => {
1627                         sub_slice.write_bytes(0);
1628                         Ok(())
1629                     }
1630                 }
1631             },
1632         )?;
1633         self.current_offset += read_count as u64;
1634         Ok(read_count)
1635     }
1636 
write_volatile(&mut self, slice: VolatileSlice) -> io::Result<usize>1637     fn write_volatile(&mut self, slice: VolatileSlice) -> io::Result<usize> {
1638         let write_count =
1639             self.write_cb(self.current_offset, slice.size(), |file, offset, count| {
1640                 let sub_slice = slice.get_slice(offset, count).unwrap();
1641                 file.write_all_volatile(sub_slice)
1642             })?;
1643         self.current_offset += write_count as u64;
1644         Ok(write_count)
1645     }
1646 }
1647 
1648 impl FileReadWriteAtVolatile for QcowFile {
read_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize>1649     fn read_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize> {
1650         self.read_cb(offset, slice.size(), |file, read, offset, count| {
1651             let sub_slice = slice.get_slice(read, count).unwrap();
1652             match file {
1653                 Some(f) => f.read_exact_at_volatile(sub_slice, offset),
1654                 None => {
1655                     sub_slice.write_bytes(0);
1656                     Ok(())
1657                 }
1658             }
1659         })
1660     }
1661 
write_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize>1662     fn write_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize> {
1663         self.write_cb(offset, slice.size(), |file, offset, count| {
1664             let sub_slice = slice.get_slice(offset, count).unwrap();
1665             file.write_all_volatile(sub_slice)
1666         })
1667     }
1668 }
1669 
1670 impl FileSync for QcowFile {
fsync(&mut self) -> std::io::Result<()>1671     fn fsync(&mut self) -> std::io::Result<()> {
1672         self.flush()
1673     }
1674 }
1675 
1676 impl FileSetLen for QcowFile {
set_len(&self, _len: u64) -> std::io::Result<()>1677     fn set_len(&self, _len: u64) -> std::io::Result<()> {
1678         Err(std::io::Error::new(
1679             std::io::ErrorKind::Other,
1680             "set_len() not supported for QcowFile",
1681         ))
1682     }
1683 }
1684 
1685 impl DiskGetLen for QcowFile {
get_len(&self) -> io::Result<u64>1686     fn get_len(&self) -> io::Result<u64> {
1687         Ok(self.virtual_size())
1688     }
1689 }
1690 
1691 impl FileAllocate for QcowFile {
allocate(&mut self, offset: u64, len: u64) -> io::Result<()>1692     fn allocate(&mut self, offset: u64, len: u64) -> io::Result<()> {
1693         // Call write_cb with a do-nothing callback, which will have the effect
1694         // of allocating all clusters in the specified range.
1695         self.write_cb(offset, len as usize, |_file, _offset, _count| Ok(()))?;
1696         Ok(())
1697     }
1698 }
1699 
1700 impl PunchHole for QcowFile {
punch_hole(&mut self, offset: u64, length: u64) -> std::io::Result<()>1701     fn punch_hole(&mut self, offset: u64, length: u64) -> std::io::Result<()> {
1702         let mut remaining = length;
1703         let mut offset = offset;
1704         while remaining > 0 {
1705             let chunk_length = min(remaining, std::usize::MAX as u64) as usize;
1706             self.zero_bytes(offset, chunk_length)?;
1707             remaining -= chunk_length as u64;
1708             offset += chunk_length as u64;
1709         }
1710         Ok(())
1711     }
1712 }
1713 
1714 impl WriteZeroesAt for QcowFile {
write_zeroes_at(&mut self, offset: u64, length: usize) -> io::Result<usize>1715     fn write_zeroes_at(&mut self, offset: u64, length: usize) -> io::Result<usize> {
1716         self.punch_hole(offset, length as u64)?;
1717         Ok(length)
1718     }
1719 }
1720 
1721 impl SeekHole for QcowFile {
seek_hole(&mut self, offset: u64) -> io::Result<Option<u64>>1722     fn seek_hole(&mut self, offset: u64) -> io::Result<Option<u64>> {
1723         match self.find_allocated_cluster(offset, false) {
1724             Err(e) => Err(e),
1725             Ok(None) => {
1726                 if offset < self.virtual_size() {
1727                     Ok(Some(self.seek(SeekFrom::End(0))?))
1728                 } else {
1729                     Ok(None)
1730                 }
1731             }
1732             Ok(Some(o)) => {
1733                 self.seek(SeekFrom::Start(o))?;
1734                 Ok(Some(o))
1735             }
1736         }
1737     }
1738 
seek_data(&mut self, offset: u64) -> io::Result<Option<u64>>1739     fn seek_data(&mut self, offset: u64) -> io::Result<Option<u64>> {
1740         match self.find_allocated_cluster(offset, true) {
1741             Err(e) => Err(e),
1742             Ok(None) => Ok(None),
1743             Ok(Some(o)) => {
1744                 self.seek(SeekFrom::Start(o))?;
1745                 Ok(Some(o))
1746             }
1747         }
1748     }
1749 }
1750 
1751 // Returns an Error if the given offset doesn't align to a cluster boundary.
offset_is_cluster_boundary(offset: u64, cluster_bits: u32) -> Result<()>1752 fn offset_is_cluster_boundary(offset: u64, cluster_bits: u32) -> Result<()> {
1753     if offset & ((0x01 << cluster_bits) - 1) != 0 {
1754         return Err(Error::InvalidOffset(offset));
1755     }
1756     Ok(())
1757 }
1758 
1759 // Ceiling of the division of `dividend`/`divisor`.
div_round_up_u64(dividend: u64, divisor: u64) -> u641760 fn div_round_up_u64(dividend: u64, divisor: u64) -> u64 {
1761     dividend / divisor + if dividend % divisor != 0 { 1 } else { 0 }
1762 }
1763 
1764 // Ceiling of the division of `dividend`/`divisor`.
div_round_up_u32(dividend: u32, divisor: u32) -> u321765 fn div_round_up_u32(dividend: u32, divisor: u32) -> u32 {
1766     dividend / divisor + if dividend % divisor != 0 { 1 } else { 0 }
1767 }
1768 
1769 #[cfg(test)]
1770 mod tests {
1771     use super::*;
1772     use base::WriteZeroes;
1773     use std::io::{Read, Seek, SeekFrom, Write};
1774     use tempfile::tempfile;
1775 
valid_header() -> Vec<u8>1776     fn valid_header() -> Vec<u8> {
1777         vec![
1778             0x51u8, 0x46, 0x49, 0xfb, // magic
1779             0x00, 0x00, 0x00, 0x03, // version
1780             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset
1781             0x00, 0x00, 0x00, 0x00, // backing file size
1782             0x00, 0x00, 0x00, 0x10, // cluster_bits
1783             0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, // size
1784             0x00, 0x00, 0x00, 0x00, // crypt method
1785             0x00, 0x00, 0x01, 0x00, // L1 size
1786             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // L1 table offset
1787             0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset
1788             0x00, 0x00, 0x00, 0x03, // refcount table clusters
1789             0x00, 0x00, 0x00, 0x00, // nb snapshots
1790             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // snapshots offset
1791             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // incompatible_features
1792             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compatible_features
1793             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // autoclear_features
1794             0x00, 0x00, 0x00, 0x04, // refcount_order
1795             0x00, 0x00, 0x00, 0x68, // header_length
1796         ]
1797     }
1798 
1799     // Test case found by clusterfuzz to allocate excessive memory.
test_huge_header() -> Vec<u8>1800     fn test_huge_header() -> Vec<u8> {
1801         vec![
1802             0x51, 0x46, 0x49, 0xfb, // magic
1803             0x00, 0x00, 0x00, 0x03, // version
1804             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset
1805             0x00, 0x00, 0x00, 0x00, // backing file size
1806             0x00, 0x00, 0x00, 0x09, // cluster_bits
1807             0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, // size
1808             0x00, 0x00, 0x00, 0x00, // crypt method
1809             0x00, 0x00, 0x01, 0x00, // L1 size
1810             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // L1 table offset
1811             0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset
1812             0x00, 0x00, 0x00, 0x03, // refcount table clusters
1813             0x00, 0x00, 0x00, 0x00, // nb snapshots
1814             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // snapshots offset
1815             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // incompatible_features
1816             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compatible_features
1817             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // autoclear_features
1818             0x00, 0x00, 0x00, 0x04, // refcount_order
1819             0x00, 0x00, 0x00, 0x68, // header_length
1820         ]
1821     }
1822 
basic_file(header: &[u8]) -> File1823     fn basic_file(header: &[u8]) -> File {
1824         let mut disk_file = tempfile().expect("failed to create tempfile");
1825         disk_file.write_all(&header).unwrap();
1826         disk_file.set_len(0x8000_0000).unwrap();
1827         disk_file.seek(SeekFrom::Start(0)).unwrap();
1828         disk_file
1829     }
1830 
with_basic_file<F>(header: &[u8], mut testfn: F) where F: FnMut(File),1831     fn with_basic_file<F>(header: &[u8], mut testfn: F)
1832     where
1833         F: FnMut(File),
1834     {
1835         testfn(basic_file(header)); // File closed when the function exits.
1836     }
1837 
with_default_file<F>(file_size: u64, mut testfn: F) where F: FnMut(QcowFile),1838     fn with_default_file<F>(file_size: u64, mut testfn: F)
1839     where
1840         F: FnMut(QcowFile),
1841     {
1842         let file = tempfile().expect("failed to create tempfile");
1843         let qcow_file = QcowFile::new(file, file_size).unwrap();
1844 
1845         testfn(qcow_file); // File closed when the function exits.
1846     }
1847 
1848     #[test]
default_header()1849     fn default_header() {
1850         let header = QcowHeader::create_for_size_and_path(0x10_0000, None);
1851         let mut disk_file = tempfile().expect("failed to create tempfile");
1852         header
1853             .expect("Failed to create header.")
1854             .write_to(&mut disk_file)
1855             .expect("Failed to write header to shm.");
1856         disk_file.seek(SeekFrom::Start(0)).unwrap();
1857         QcowFile::from(disk_file).expect("Failed to create Qcow from default Header");
1858     }
1859 
1860     #[test]
header_read()1861     fn header_read() {
1862         with_basic_file(&valid_header(), |mut disk_file: File| {
1863             QcowHeader::new(&mut disk_file).expect("Failed to create Header.");
1864         });
1865     }
1866 
1867     #[test]
header_with_backing()1868     fn header_with_backing() {
1869         let header = QcowHeader::create_for_size_and_path(0x10_0000, Some("/my/path/to/a/file"))
1870             .expect("Failed to create header.");
1871         let mut disk_file = tempfile().expect("failed to create tempfile");
1872         header
1873             .write_to(&mut disk_file)
1874             .expect("Failed to write header to shm.");
1875         disk_file.seek(SeekFrom::Start(0)).unwrap();
1876         let read_header = QcowHeader::new(&mut disk_file).expect("Failed to create header.");
1877         assert_eq!(
1878             header.backing_file_path,
1879             Some(String::from("/my/path/to/a/file"))
1880         );
1881         assert_eq!(read_header.backing_file_path, header.backing_file_path);
1882     }
1883 
1884     #[test]
invalid_magic()1885     fn invalid_magic() {
1886         let invalid_header = vec![0x51u8, 0x46, 0x4a, 0xfb];
1887         with_basic_file(&invalid_header, |mut disk_file: File| {
1888             QcowHeader::new(&mut disk_file).expect_err("Invalid header worked.");
1889         });
1890     }
1891 
1892     #[test]
invalid_refcount_order()1893     fn invalid_refcount_order() {
1894         let mut header = valid_header();
1895         header[99] = 2;
1896         with_basic_file(&header, |disk_file: File| {
1897             QcowFile::from(disk_file).expect_err("Invalid refcount order worked.");
1898         });
1899     }
1900 
1901     #[test]
invalid_cluster_bits()1902     fn invalid_cluster_bits() {
1903         let mut header = valid_header();
1904         header[23] = 3;
1905         with_basic_file(&header, |disk_file: File| {
1906             QcowFile::from(disk_file).expect_err("Failed to create file.");
1907         });
1908     }
1909 
1910     #[test]
test_header_huge_file()1911     fn test_header_huge_file() {
1912         let header = test_huge_header();
1913         with_basic_file(&header, |disk_file: File| {
1914             QcowFile::from(disk_file).expect_err("Failed to create file.");
1915         });
1916     }
1917 
1918     #[test]
test_header_crazy_file_size_rejected()1919     fn test_header_crazy_file_size_rejected() {
1920         let mut header = valid_header();
1921         &mut header[24..32].copy_from_slice(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1e]);
1922         with_basic_file(&header, |disk_file: File| {
1923             QcowFile::from(disk_file).expect_err("Failed to create file.");
1924         });
1925     }
1926 
1927     #[test]
test_huge_l1_table()1928     fn test_huge_l1_table() {
1929         let mut header = valid_header();
1930         header[36] = 0x12;
1931         with_basic_file(&header, |disk_file: File| {
1932             QcowFile::from(disk_file).expect_err("Failed to create file.");
1933         });
1934     }
1935 
1936     #[test]
test_header_1_tb_file_min_cluster()1937     fn test_header_1_tb_file_min_cluster() {
1938         let mut header = test_huge_header();
1939         header[24] = 0;
1940         header[26] = 1;
1941         header[31] = 0;
1942         // 1 TB with the min cluster size makes the arrays too big, it should fail.
1943         with_basic_file(&header, |disk_file: File| {
1944             QcowFile::from(disk_file).expect_err("Failed to create file.");
1945         });
1946     }
1947 
1948     #[test]
test_header_1_tb_file()1949     fn test_header_1_tb_file() {
1950         let mut header = test_huge_header();
1951         // reset to 1 TB size.
1952         header[24] = 0;
1953         header[26] = 1;
1954         header[31] = 0;
1955         // set cluster_bits
1956         header[23] = 16;
1957         with_basic_file(&header, |disk_file: File| {
1958             let mut qcow = QcowFile::from(disk_file).expect("Failed to create file.");
1959             qcow.seek(SeekFrom::Start(0x100_0000_0000 - 8))
1960                 .expect("Failed to seek.");
1961             let value = 0x0000_0040_3f00_ffffu64;
1962             qcow.write_all(&value.to_le_bytes())
1963                 .expect("failed to write data");
1964         });
1965     }
1966 
1967     #[test]
test_header_huge_num_refcounts()1968     fn test_header_huge_num_refcounts() {
1969         let mut header = valid_header();
1970         &mut header[56..60].copy_from_slice(&[0x02, 0x00, 0xe8, 0xff]);
1971         with_basic_file(&header, |disk_file: File| {
1972             QcowFile::from(disk_file).expect_err("Created disk with crazy refcount clusters");
1973         });
1974     }
1975 
1976     #[test]
test_header_huge_refcount_offset()1977     fn test_header_huge_refcount_offset() {
1978         let mut header = valid_header();
1979         &mut header[48..56].copy_from_slice(&[0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x02, 0x00]);
1980         with_basic_file(&header, |disk_file: File| {
1981             QcowFile::from(disk_file).expect_err("Created disk with crazy refcount offset");
1982         });
1983     }
1984 
1985     #[test]
write_read_start()1986     fn write_read_start() {
1987         with_basic_file(&valid_header(), |disk_file: File| {
1988             let mut q = QcowFile::from(disk_file).unwrap();
1989             q.write(b"test first bytes")
1990                 .expect("Failed to write test string.");
1991             let mut buf = [0u8; 4];
1992             q.seek(SeekFrom::Start(0)).expect("Failed to seek.");
1993             q.read(&mut buf).expect("Failed to read.");
1994             assert_eq!(&buf, b"test");
1995         });
1996     }
1997 
1998     #[test]
write_read_start_backing()1999     fn write_read_start_backing() {
2000         let disk_file = basic_file(&valid_header());
2001         let mut backing = QcowFile::from(disk_file).unwrap();
2002         backing
2003             .write(b"test first bytes")
2004             .expect("Failed to write test string.");
2005         let mut buf = [0u8; 4];
2006         let wrapping_disk_file = basic_file(&valid_header());
2007         let mut wrapping = QcowFile::from(wrapping_disk_file).unwrap();
2008         wrapping.set_backing_file(Some(Box::new(backing)));
2009         wrapping.seek(SeekFrom::Start(0)).expect("Failed to seek.");
2010         wrapping.read(&mut buf).expect("Failed to read.");
2011         assert_eq!(&buf, b"test");
2012     }
2013 
2014     #[test]
write_read_start_backing_overlap()2015     fn write_read_start_backing_overlap() {
2016         let disk_file = basic_file(&valid_header());
2017         let mut backing = QcowFile::from(disk_file).unwrap();
2018         backing
2019             .write(b"test first bytes")
2020             .expect("Failed to write test string.");
2021         let wrapping_disk_file = basic_file(&valid_header());
2022         let mut wrapping = QcowFile::from(wrapping_disk_file).unwrap();
2023         wrapping.set_backing_file(Some(Box::new(backing)));
2024         wrapping.seek(SeekFrom::Start(0)).expect("Failed to seek.");
2025         wrapping
2026             .write(b"TEST")
2027             .expect("Failed to write second test string.");
2028         let mut buf = [0u8; 10];
2029         wrapping.seek(SeekFrom::Start(0)).expect("Failed to seek.");
2030         wrapping.read(&mut buf).expect("Failed to read.");
2031         assert_eq!(&buf, b"TEST first");
2032     }
2033 
2034     #[test]
offset_write_read()2035     fn offset_write_read() {
2036         with_basic_file(&valid_header(), |disk_file: File| {
2037             let mut q = QcowFile::from(disk_file).unwrap();
2038             let b = [0x55u8; 0x1000];
2039             q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek.");
2040             q.write(&b).expect("Failed to write test string.");
2041             let mut buf = [0u8; 4];
2042             q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek.");
2043             q.read(&mut buf).expect("Failed to read.");
2044             assert_eq!(buf[0], 0x55);
2045         });
2046     }
2047 
2048     #[test]
write_zeroes_read()2049     fn write_zeroes_read() {
2050         with_basic_file(&valid_header(), |disk_file: File| {
2051             let mut q = QcowFile::from(disk_file).unwrap();
2052             // Write some test data.
2053             let b = [0x55u8; 0x1000];
2054             q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek.");
2055             q.write(&b).expect("Failed to write test string.");
2056             // Overwrite the test data with zeroes.
2057             q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek.");
2058             q.write_zeroes_all(0x200).expect("Failed to write zeroes.");
2059             // Verify that the correct part of the data was zeroed out.
2060             let mut buf = [0u8; 0x1000];
2061             q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek.");
2062             q.read(&mut buf).expect("Failed to read.");
2063             assert_eq!(buf[0], 0);
2064             assert_eq!(buf[0x1FF], 0);
2065             assert_eq!(buf[0x200], 0x55);
2066             assert_eq!(buf[0xFFF], 0x55);
2067         });
2068     }
2069 
2070     #[test]
write_zeroes_full_cluster()2071     fn write_zeroes_full_cluster() {
2072         // Choose a size that is larger than a cluster.
2073         // valid_header uses cluster_bits = 12, which corresponds to a cluster size of 4096.
2074         const CHUNK_SIZE: usize = 4096 * 2 + 512;
2075         with_basic_file(&valid_header(), |disk_file: File| {
2076             let mut q = QcowFile::from(disk_file).unwrap();
2077             // Write some test data.
2078             let b = [0x55u8; CHUNK_SIZE];
2079             q.seek(SeekFrom::Start(0)).expect("Failed to seek.");
2080             q.write(&b).expect("Failed to write test string.");
2081             // Overwrite the full cluster with zeroes.
2082             q.seek(SeekFrom::Start(0)).expect("Failed to seek.");
2083             q.write_zeroes_all(CHUNK_SIZE)
2084                 .expect("Failed to write zeroes.");
2085             // Verify that the data was zeroed out.
2086             let mut buf = [0u8; CHUNK_SIZE];
2087             q.seek(SeekFrom::Start(0)).expect("Failed to seek.");
2088             q.read(&mut buf).expect("Failed to read.");
2089             assert_eq!(buf[0], 0);
2090             assert_eq!(buf[CHUNK_SIZE - 1], 0);
2091         });
2092     }
2093 
2094     #[test]
write_zeroes_backing()2095     fn write_zeroes_backing() {
2096         let disk_file = basic_file(&valid_header());
2097         let mut backing = QcowFile::from(disk_file).unwrap();
2098         // Write some test data.
2099         let b = [0x55u8; 0x1000];
2100         backing
2101             .seek(SeekFrom::Start(0xfff2000))
2102             .expect("Failed to seek.");
2103         backing.write(&b).expect("Failed to write test string.");
2104         let wrapping_disk_file = basic_file(&valid_header());
2105         let mut wrapping = QcowFile::from(wrapping_disk_file).unwrap();
2106         wrapping.set_backing_file(Some(Box::new(backing)));
2107         // Overwrite the test data with zeroes.
2108         // This should allocate new clusters in the wrapping file so that they can be zeroed.
2109         wrapping
2110             .seek(SeekFrom::Start(0xfff2000))
2111             .expect("Failed to seek.");
2112         wrapping
2113             .write_zeroes_all(0x200)
2114             .expect("Failed to write zeroes.");
2115         // Verify that the correct part of the data was zeroed out.
2116         let mut buf = [0u8; 0x1000];
2117         wrapping
2118             .seek(SeekFrom::Start(0xfff2000))
2119             .expect("Failed to seek.");
2120         wrapping.read(&mut buf).expect("Failed to read.");
2121         assert_eq!(buf[0], 0);
2122         assert_eq!(buf[0x1FF], 0);
2123         assert_eq!(buf[0x200], 0x55);
2124         assert_eq!(buf[0xFFF], 0x55);
2125     }
2126 
2127     #[test]
test_header()2128     fn test_header() {
2129         with_basic_file(&valid_header(), |disk_file: File| {
2130             let q = QcowFile::from(disk_file).unwrap();
2131             assert_eq!(q.virtual_size(), 0x20_0000_0000);
2132         });
2133     }
2134 
2135     #[test]
read_small_buffer()2136     fn read_small_buffer() {
2137         with_basic_file(&valid_header(), |disk_file: File| {
2138             let mut q = QcowFile::from(disk_file).unwrap();
2139             let mut b = [5u8; 16];
2140             q.seek(SeekFrom::Start(1000)).expect("Failed to seek.");
2141             q.read(&mut b).expect("Failed to read.");
2142             assert_eq!(0, b[0]);
2143             assert_eq!(0, b[15]);
2144         });
2145     }
2146 
2147     #[test]
replay_ext4()2148     fn replay_ext4() {
2149         with_basic_file(&valid_header(), |disk_file: File| {
2150             let mut q = QcowFile::from(disk_file).unwrap();
2151             const BUF_SIZE: usize = 0x1000;
2152             let mut b = [0u8; BUF_SIZE];
2153 
2154             struct Transfer {
2155                 pub write: bool,
2156                 pub addr: u64,
2157             };
2158 
2159             // Write transactions from mkfs.ext4.
2160             let xfers: Vec<Transfer> = vec![
2161                 Transfer {
2162                     write: false,
2163                     addr: 0xfff0000,
2164                 },
2165                 Transfer {
2166                     write: false,
2167                     addr: 0xfffe000,
2168                 },
2169                 Transfer {
2170                     write: false,
2171                     addr: 0x0,
2172                 },
2173                 Transfer {
2174                     write: false,
2175                     addr: 0x1000,
2176                 },
2177                 Transfer {
2178                     write: false,
2179                     addr: 0xffff000,
2180                 },
2181                 Transfer {
2182                     write: false,
2183                     addr: 0xffdf000,
2184                 },
2185                 Transfer {
2186                     write: false,
2187                     addr: 0xfff8000,
2188                 },
2189                 Transfer {
2190                     write: false,
2191                     addr: 0xffe0000,
2192                 },
2193                 Transfer {
2194                     write: false,
2195                     addr: 0xffce000,
2196                 },
2197                 Transfer {
2198                     write: false,
2199                     addr: 0xffb6000,
2200                 },
2201                 Transfer {
2202                     write: false,
2203                     addr: 0xffab000,
2204                 },
2205                 Transfer {
2206                     write: false,
2207                     addr: 0xffa4000,
2208                 },
2209                 Transfer {
2210                     write: false,
2211                     addr: 0xff8e000,
2212                 },
2213                 Transfer {
2214                     write: false,
2215                     addr: 0xff86000,
2216                 },
2217                 Transfer {
2218                     write: false,
2219                     addr: 0xff84000,
2220                 },
2221                 Transfer {
2222                     write: false,
2223                     addr: 0xff89000,
2224                 },
2225                 Transfer {
2226                     write: false,
2227                     addr: 0xfe7e000,
2228                 },
2229                 Transfer {
2230                     write: false,
2231                     addr: 0x100000,
2232                 },
2233                 Transfer {
2234                     write: false,
2235                     addr: 0x3000,
2236                 },
2237                 Transfer {
2238                     write: false,
2239                     addr: 0x7000,
2240                 },
2241                 Transfer {
2242                     write: false,
2243                     addr: 0xf000,
2244                 },
2245                 Transfer {
2246                     write: false,
2247                     addr: 0x2000,
2248                 },
2249                 Transfer {
2250                     write: false,
2251                     addr: 0x4000,
2252                 },
2253                 Transfer {
2254                     write: false,
2255                     addr: 0x5000,
2256                 },
2257                 Transfer {
2258                     write: false,
2259                     addr: 0x6000,
2260                 },
2261                 Transfer {
2262                     write: false,
2263                     addr: 0x8000,
2264                 },
2265                 Transfer {
2266                     write: false,
2267                     addr: 0x9000,
2268                 },
2269                 Transfer {
2270                     write: false,
2271                     addr: 0xa000,
2272                 },
2273                 Transfer {
2274                     write: false,
2275                     addr: 0xb000,
2276                 },
2277                 Transfer {
2278                     write: false,
2279                     addr: 0xc000,
2280                 },
2281                 Transfer {
2282                     write: false,
2283                     addr: 0xd000,
2284                 },
2285                 Transfer {
2286                     write: false,
2287                     addr: 0xe000,
2288                 },
2289                 Transfer {
2290                     write: false,
2291                     addr: 0x10000,
2292                 },
2293                 Transfer {
2294                     write: false,
2295                     addr: 0x11000,
2296                 },
2297                 Transfer {
2298                     write: false,
2299                     addr: 0x12000,
2300                 },
2301                 Transfer {
2302                     write: false,
2303                     addr: 0x13000,
2304                 },
2305                 Transfer {
2306                     write: false,
2307                     addr: 0x14000,
2308                 },
2309                 Transfer {
2310                     write: false,
2311                     addr: 0x15000,
2312                 },
2313                 Transfer {
2314                     write: false,
2315                     addr: 0x16000,
2316                 },
2317                 Transfer {
2318                     write: false,
2319                     addr: 0x17000,
2320                 },
2321                 Transfer {
2322                     write: false,
2323                     addr: 0x18000,
2324                 },
2325                 Transfer {
2326                     write: false,
2327                     addr: 0x19000,
2328                 },
2329                 Transfer {
2330                     write: false,
2331                     addr: 0x1a000,
2332                 },
2333                 Transfer {
2334                     write: false,
2335                     addr: 0x1b000,
2336                 },
2337                 Transfer {
2338                     write: false,
2339                     addr: 0x1c000,
2340                 },
2341                 Transfer {
2342                     write: false,
2343                     addr: 0x1d000,
2344                 },
2345                 Transfer {
2346                     write: false,
2347                     addr: 0x1e000,
2348                 },
2349                 Transfer {
2350                     write: false,
2351                     addr: 0x1f000,
2352                 },
2353                 Transfer {
2354                     write: false,
2355                     addr: 0x21000,
2356                 },
2357                 Transfer {
2358                     write: false,
2359                     addr: 0x22000,
2360                 },
2361                 Transfer {
2362                     write: false,
2363                     addr: 0x24000,
2364                 },
2365                 Transfer {
2366                     write: false,
2367                     addr: 0x40000,
2368                 },
2369                 Transfer {
2370                     write: false,
2371                     addr: 0x0,
2372                 },
2373                 Transfer {
2374                     write: false,
2375                     addr: 0x3000,
2376                 },
2377                 Transfer {
2378                     write: false,
2379                     addr: 0x7000,
2380                 },
2381                 Transfer {
2382                     write: false,
2383                     addr: 0x0,
2384                 },
2385                 Transfer {
2386                     write: false,
2387                     addr: 0x1000,
2388                 },
2389                 Transfer {
2390                     write: false,
2391                     addr: 0x2000,
2392                 },
2393                 Transfer {
2394                     write: false,
2395                     addr: 0x3000,
2396                 },
2397                 Transfer {
2398                     write: false,
2399                     addr: 0x0,
2400                 },
2401                 Transfer {
2402                     write: false,
2403                     addr: 0x449000,
2404                 },
2405                 Transfer {
2406                     write: false,
2407                     addr: 0x48000,
2408                 },
2409                 Transfer {
2410                     write: false,
2411                     addr: 0x48000,
2412                 },
2413                 Transfer {
2414                     write: false,
2415                     addr: 0x448000,
2416                 },
2417                 Transfer {
2418                     write: false,
2419                     addr: 0x44a000,
2420                 },
2421                 Transfer {
2422                     write: false,
2423                     addr: 0x48000,
2424                 },
2425                 Transfer {
2426                     write: false,
2427                     addr: 0x48000,
2428                 },
2429                 Transfer {
2430                     write: true,
2431                     addr: 0x0,
2432                 },
2433                 Transfer {
2434                     write: true,
2435                     addr: 0x448000,
2436                 },
2437                 Transfer {
2438                     write: true,
2439                     addr: 0x449000,
2440                 },
2441                 Transfer {
2442                     write: true,
2443                     addr: 0x44a000,
2444                 },
2445                 Transfer {
2446                     write: true,
2447                     addr: 0xfff0000,
2448                 },
2449                 Transfer {
2450                     write: true,
2451                     addr: 0xfff1000,
2452                 },
2453                 Transfer {
2454                     write: true,
2455                     addr: 0xfff2000,
2456                 },
2457                 Transfer {
2458                     write: true,
2459                     addr: 0xfff3000,
2460                 },
2461                 Transfer {
2462                     write: true,
2463                     addr: 0xfff4000,
2464                 },
2465                 Transfer {
2466                     write: true,
2467                     addr: 0xfff5000,
2468                 },
2469                 Transfer {
2470                     write: true,
2471                     addr: 0xfff6000,
2472                 },
2473                 Transfer {
2474                     write: true,
2475                     addr: 0xfff7000,
2476                 },
2477                 Transfer {
2478                     write: true,
2479                     addr: 0xfff8000,
2480                 },
2481                 Transfer {
2482                     write: true,
2483                     addr: 0xfff9000,
2484                 },
2485                 Transfer {
2486                     write: true,
2487                     addr: 0xfffa000,
2488                 },
2489                 Transfer {
2490                     write: true,
2491                     addr: 0xfffb000,
2492                 },
2493                 Transfer {
2494                     write: true,
2495                     addr: 0xfffc000,
2496                 },
2497                 Transfer {
2498                     write: true,
2499                     addr: 0xfffd000,
2500                 },
2501                 Transfer {
2502                     write: true,
2503                     addr: 0xfffe000,
2504                 },
2505                 Transfer {
2506                     write: true,
2507                     addr: 0xffff000,
2508                 },
2509             ];
2510 
2511             for xfer in &xfers {
2512                 q.seek(SeekFrom::Start(xfer.addr)).expect("Failed to seek.");
2513                 if xfer.write {
2514                     q.write(&b).expect("Failed to write.");
2515                 } else {
2516                     let read_count: usize = q.read(&mut b).expect("Failed to read.");
2517                     assert_eq!(read_count, BUF_SIZE);
2518                 }
2519             }
2520         });
2521     }
2522 
2523     #[test]
combo_write_read()2524     fn combo_write_read() {
2525         with_default_file(1024 * 1024 * 1024 * 256, |mut qcow_file| {
2526             const NUM_BLOCKS: usize = 555;
2527             const BLOCK_SIZE: usize = 0x1_0000;
2528             const OFFSET: u64 = 0x1_0000_0020;
2529             let data = [0x55u8; BLOCK_SIZE];
2530             let mut readback = [0u8; BLOCK_SIZE];
2531             for i in 0..NUM_BLOCKS {
2532                 let seek_offset = OFFSET + (i as u64) * (BLOCK_SIZE as u64);
2533                 qcow_file
2534                     .seek(SeekFrom::Start(seek_offset))
2535                     .expect("Failed to seek.");
2536                 let nwritten = qcow_file.write(&data).expect("Failed to write test data.");
2537                 assert_eq!(nwritten, BLOCK_SIZE);
2538                 // Read back the data to check it was written correctly.
2539                 qcow_file
2540                     .seek(SeekFrom::Start(seek_offset))
2541                     .expect("Failed to seek.");
2542                 let nread = qcow_file.read(&mut readback).expect("Failed to read.");
2543                 assert_eq!(nread, BLOCK_SIZE);
2544                 for (orig, read) in data.iter().zip(readback.iter()) {
2545                     assert_eq!(orig, read);
2546                 }
2547             }
2548             // Check that address 0 is still zeros.
2549             qcow_file.seek(SeekFrom::Start(0)).expect("Failed to seek.");
2550             let nread = qcow_file.read(&mut readback).expect("Failed to read.");
2551             assert_eq!(nread, BLOCK_SIZE);
2552             for read in readback.iter() {
2553                 assert_eq!(*read, 0);
2554             }
2555             // Check the data again after the writes have happened.
2556             for i in 0..NUM_BLOCKS {
2557                 let seek_offset = OFFSET + (i as u64) * (BLOCK_SIZE as u64);
2558                 qcow_file
2559                     .seek(SeekFrom::Start(seek_offset))
2560                     .expect("Failed to seek.");
2561                 let nread = qcow_file.read(&mut readback).expect("Failed to read.");
2562                 assert_eq!(nread, BLOCK_SIZE);
2563                 for (orig, read) in data.iter().zip(readback.iter()) {
2564                     assert_eq!(orig, read);
2565                 }
2566             }
2567 
2568             assert_eq!(qcow_file.first_zero_refcount().unwrap(), None);
2569         });
2570     }
2571 
seek_cur(file: &mut QcowFile) -> u642572     fn seek_cur(file: &mut QcowFile) -> u64 {
2573         file.seek(SeekFrom::Current(0)).unwrap()
2574     }
2575 
2576     #[test]
seek_data()2577     fn seek_data() {
2578         with_default_file(0x30000, |mut file| {
2579             // seek_data at or after the end of the file should return None
2580             assert_eq!(file.seek_data(0x10000).unwrap(), None);
2581             assert_eq!(seek_cur(&mut file), 0);
2582             assert_eq!(file.seek_data(0x10001).unwrap(), None);
2583             assert_eq!(seek_cur(&mut file), 0);
2584 
2585             // Write some data to [0x10000, 0x20000)
2586             let b = [0x55u8; 0x10000];
2587             file.seek(SeekFrom::Start(0x10000)).unwrap();
2588             file.write_all(&b).unwrap();
2589             assert_eq!(file.seek_data(0).unwrap(), Some(0x10000));
2590             assert_eq!(seek_cur(&mut file), 0x10000);
2591 
2592             // seek_data within data should return the same offset
2593             assert_eq!(file.seek_data(0x10000).unwrap(), Some(0x10000));
2594             assert_eq!(seek_cur(&mut file), 0x10000);
2595             assert_eq!(file.seek_data(0x10001).unwrap(), Some(0x10001));
2596             assert_eq!(seek_cur(&mut file), 0x10001);
2597             assert_eq!(file.seek_data(0x1FFFF).unwrap(), Some(0x1FFFF));
2598             assert_eq!(seek_cur(&mut file), 0x1FFFF);
2599 
2600             assert_eq!(file.seek_data(0).unwrap(), Some(0x10000));
2601             assert_eq!(seek_cur(&mut file), 0x10000);
2602             assert_eq!(file.seek_data(0x1FFFF).unwrap(), Some(0x1FFFF));
2603             assert_eq!(seek_cur(&mut file), 0x1FFFF);
2604             assert_eq!(file.seek_data(0x20000).unwrap(), None);
2605             assert_eq!(seek_cur(&mut file), 0x1FFFF);
2606         });
2607     }
2608 
2609     #[test]
seek_hole()2610     fn seek_hole() {
2611         with_default_file(0x30000, |mut file| {
2612             // File consisting entirely of a hole
2613             assert_eq!(file.seek_hole(0).unwrap(), Some(0));
2614             assert_eq!(seek_cur(&mut file), 0);
2615             assert_eq!(file.seek_hole(0xFFFF).unwrap(), Some(0xFFFF));
2616             assert_eq!(seek_cur(&mut file), 0xFFFF);
2617 
2618             // seek_hole at or after the end of the file should return None
2619             file.seek(SeekFrom::Start(0)).unwrap();
2620             assert_eq!(file.seek_hole(0x30000).unwrap(), None);
2621             assert_eq!(seek_cur(&mut file), 0);
2622             assert_eq!(file.seek_hole(0x30001).unwrap(), None);
2623             assert_eq!(seek_cur(&mut file), 0);
2624 
2625             // Write some data to [0x10000, 0x20000)
2626             let b = [0x55u8; 0x10000];
2627             file.seek(SeekFrom::Start(0x10000)).unwrap();
2628             file.write_all(&b).unwrap();
2629 
2630             // seek_hole within a hole should return the same offset
2631             assert_eq!(file.seek_hole(0).unwrap(), Some(0));
2632             assert_eq!(seek_cur(&mut file), 0);
2633             assert_eq!(file.seek_hole(0xFFFF).unwrap(), Some(0xFFFF));
2634             assert_eq!(seek_cur(&mut file), 0xFFFF);
2635 
2636             // seek_hole within data should return the next hole
2637             file.seek(SeekFrom::Start(0)).unwrap();
2638             assert_eq!(file.seek_hole(0x10000).unwrap(), Some(0x20000));
2639             assert_eq!(seek_cur(&mut file), 0x20000);
2640             file.seek(SeekFrom::Start(0)).unwrap();
2641             assert_eq!(file.seek_hole(0x10001).unwrap(), Some(0x20000));
2642             assert_eq!(seek_cur(&mut file), 0x20000);
2643             file.seek(SeekFrom::Start(0)).unwrap();
2644             assert_eq!(file.seek_hole(0x1FFFF).unwrap(), Some(0x20000));
2645             assert_eq!(seek_cur(&mut file), 0x20000);
2646             file.seek(SeekFrom::Start(0)).unwrap();
2647             assert_eq!(file.seek_hole(0xFFFF).unwrap(), Some(0xFFFF));
2648             assert_eq!(seek_cur(&mut file), 0xFFFF);
2649             file.seek(SeekFrom::Start(0)).unwrap();
2650             assert_eq!(file.seek_hole(0x10000).unwrap(), Some(0x20000));
2651             assert_eq!(seek_cur(&mut file), 0x20000);
2652             file.seek(SeekFrom::Start(0)).unwrap();
2653             assert_eq!(file.seek_hole(0x1FFFF).unwrap(), Some(0x20000));
2654             assert_eq!(seek_cur(&mut file), 0x20000);
2655             file.seek(SeekFrom::Start(0)).unwrap();
2656             assert_eq!(file.seek_hole(0x20000).unwrap(), Some(0x20000));
2657             assert_eq!(seek_cur(&mut file), 0x20000);
2658             file.seek(SeekFrom::Start(0)).unwrap();
2659             assert_eq!(file.seek_hole(0x20001).unwrap(), Some(0x20001));
2660             assert_eq!(seek_cur(&mut file), 0x20001);
2661 
2662             // seek_hole at EOF should return None
2663             file.seek(SeekFrom::Start(0)).unwrap();
2664             assert_eq!(file.seek_hole(0x30000).unwrap(), None);
2665             assert_eq!(seek_cur(&mut file), 0);
2666 
2667             // Write some data to [0x20000, 0x30000)
2668             file.seek(SeekFrom::Start(0x20000)).unwrap();
2669             file.write_all(&b).unwrap();
2670 
2671             // seek_hole within [0x20000, 0x30000) should now find the hole at EOF
2672             assert_eq!(file.seek_hole(0x20000).unwrap(), Some(0x30000));
2673             assert_eq!(seek_cur(&mut file), 0x30000);
2674             file.seek(SeekFrom::Start(0)).unwrap();
2675             assert_eq!(file.seek_hole(0x20001).unwrap(), Some(0x30000));
2676             assert_eq!(seek_cur(&mut file), 0x30000);
2677             file.seek(SeekFrom::Start(0)).unwrap();
2678             assert_eq!(file.seek_hole(0x30000).unwrap(), None);
2679             assert_eq!(seek_cur(&mut file), 0);
2680         });
2681     }
2682 
2683     #[test]
rebuild_refcounts()2684     fn rebuild_refcounts() {
2685         with_basic_file(&valid_header(), |mut disk_file: File| {
2686             let header = QcowHeader::new(&mut disk_file).expect("Failed to create Header.");
2687             let cluster_size = 65536;
2688             let mut raw_file =
2689                 QcowRawFile::from(disk_file, cluster_size).expect("Failed to create QcowRawFile.");
2690             QcowFile::rebuild_refcounts(&mut raw_file, header)
2691                 .expect("Failed to rebuild recounts.");
2692         });
2693     }
2694 }
2695