1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 //! A safe wrapper around the kernel's KVM interface.
6
7 mod cap;
8
9 use std::cell::RefCell;
10 use std::cmp::{min, Ordering};
11 use std::collections::{BTreeMap, BinaryHeap};
12 use std::ffi::CString;
13 use std::fs::File;
14 use std::mem::size_of;
15 use std::ops::{Deref, DerefMut};
16 use std::os::raw::*;
17 use std::os::unix::prelude::OsStrExt;
18 use std::path::{Path, PathBuf};
19 use std::ptr::copy_nonoverlapping;
20 use std::sync::Arc;
21 use sync::Mutex;
22
23 use base::{AsRawDescriptor, FromRawDescriptor, RawDescriptor};
24 use data_model::vec_with_array_field;
25
26 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
27 use data_model::FlexibleArrayWrapper;
28
29 use libc::sigset_t;
30 use libc::{open, EBUSY, EINVAL, ENOENT, ENOSPC, EOVERFLOW, O_CLOEXEC, O_RDWR};
31
32 use kvm_sys::*;
33
34 #[allow(unused_imports)]
35 use base::{
36 block_signal, ioctl, ioctl_with_mut_ptr, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref,
37 ioctl_with_val, pagesize, signal, unblock_signal, warn, Error, Event, IoctlNr, MappedRegion,
38 MemoryMapping, MemoryMappingBuilder, MmapError, Result, SIGRTMIN,
39 };
40 use vm_memory::{GuestAddress, GuestMemory};
41
42 pub use crate::cap::*;
43
errno_result<T>() -> Result<T>44 fn errno_result<T>() -> Result<T> {
45 Err(Error::last())
46 }
47
set_user_memory_region<F: AsRawDescriptor>( fd: &F, slot: u32, read_only: bool, log_dirty_pages: bool, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()>48 unsafe fn set_user_memory_region<F: AsRawDescriptor>(
49 fd: &F,
50 slot: u32,
51 read_only: bool,
52 log_dirty_pages: bool,
53 guest_addr: u64,
54 memory_size: u64,
55 userspace_addr: *mut u8,
56 ) -> Result<()> {
57 let mut flags = if read_only { KVM_MEM_READONLY } else { 0 };
58 if log_dirty_pages {
59 flags |= KVM_MEM_LOG_DIRTY_PAGES;
60 }
61 let region = kvm_userspace_memory_region {
62 slot,
63 flags,
64 guest_phys_addr: guest_addr,
65 memory_size,
66 userspace_addr: userspace_addr as u64,
67 };
68
69 let ret = ioctl_with_ref(fd, KVM_SET_USER_MEMORY_REGION(), ®ion);
70 if ret == 0 {
71 Ok(())
72 } else {
73 errno_result()
74 }
75 }
76
77 /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
78 /// size.
79 ///
80 /// # Arguments
81 ///
82 /// * `size` - Number of bytes in the memory region being queried.
dirty_log_bitmap_size(size: usize) -> usize83 pub fn dirty_log_bitmap_size(size: usize) -> usize {
84 let page_size = pagesize();
85 (((size + page_size - 1) / page_size) + 7) / 8
86 }
87
88 /// A wrapper around opening and using `/dev/kvm`.
89 ///
90 /// Useful for querying extensions and basic values from the KVM backend. A `Kvm` is required to
91 /// create a `Vm` object.
92 pub struct Kvm {
93 kvm: File,
94 }
95
96 impl Kvm {
97 /// Opens `/dev/kvm/` and returns a Kvm object on success.
new() -> Result<Kvm>98 pub fn new() -> Result<Kvm> {
99 Kvm::new_with_path(&PathBuf::from("/dev/kvm"))
100 }
101
102 /// Opens a KVM device at `device_path` and returns a Kvm object on success.
new_with_path(device_path: &Path) -> Result<Kvm>103 pub fn new_with_path(device_path: &Path) -> Result<Kvm> {
104 // Open calls are safe because we give a nul-terminated string and verify the result.
105 let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
106 let ret = unsafe { open(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
107 if ret < 0 {
108 return errno_result();
109 }
110 // Safe because we verify that ret is valid and we own the fd.
111 Ok(Kvm {
112 kvm: unsafe { File::from_raw_descriptor(ret) },
113 })
114 }
115
check_extension_int(&self, c: Cap) -> i32116 fn check_extension_int(&self, c: Cap) -> i32 {
117 // Safe because we know that our file is a KVM fd and that the extension is one of the ones
118 // defined by kernel.
119 unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c as c_ulong) }
120 }
121
122 /// Checks if a particular `Cap` is available.
check_extension(&self, c: Cap) -> bool123 pub fn check_extension(&self, c: Cap) -> bool {
124 self.check_extension_int(c) == 1
125 }
126
127 /// Gets the size of the mmap required to use vcpu's `kvm_run` structure.
get_vcpu_mmap_size(&self) -> Result<usize>128 pub fn get_vcpu_mmap_size(&self) -> Result<usize> {
129 // Safe because we know that our file is a KVM fd and we verify the return result.
130 let res = unsafe { ioctl(self, KVM_GET_VCPU_MMAP_SIZE()) };
131 if res > 0 {
132 Ok(res as usize)
133 } else {
134 errno_result()
135 }
136 }
137
138 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_cpuid(&self, kind: IoctlNr) -> Result<CpuId>139 fn get_cpuid(&self, kind: IoctlNr) -> Result<CpuId> {
140 const MAX_KVM_CPUID_ENTRIES: usize = 256;
141 let mut cpuid = CpuId::new(MAX_KVM_CPUID_ENTRIES);
142
143 let ret = unsafe {
144 // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
145 // allocated for the struct. The limit is read from nent, which is set to the allocated
146 // size(MAX_KVM_CPUID_ENTRIES) above.
147 ioctl_with_mut_ptr(self, kind, cpuid.as_mut_ptr())
148 };
149 if ret < 0 {
150 return errno_result();
151 }
152
153 Ok(cpuid)
154 }
155
156 /// X86 specific call to get the system supported CPUID values
157 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_supported_cpuid(&self) -> Result<CpuId>158 pub fn get_supported_cpuid(&self) -> Result<CpuId> {
159 self.get_cpuid(KVM_GET_SUPPORTED_CPUID())
160 }
161
162 /// X86 specific call to get the system emulated CPUID values
163 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_emulated_cpuid(&self) -> Result<CpuId>164 pub fn get_emulated_cpuid(&self) -> Result<CpuId> {
165 self.get_cpuid(KVM_GET_EMULATED_CPUID())
166 }
167
168 /// X86 specific call to get list of supported MSRS
169 ///
170 /// See the documentation for KVM_GET_MSR_INDEX_LIST.
171 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_msr_index_list(&self) -> Result<Vec<u32>>172 pub fn get_msr_index_list(&self) -> Result<Vec<u32>> {
173 const MAX_KVM_MSR_ENTRIES: usize = 256;
174
175 let mut msr_list = vec_with_array_field::<kvm_msr_list, u32>(MAX_KVM_MSR_ENTRIES);
176 msr_list[0].nmsrs = MAX_KVM_MSR_ENTRIES as u32;
177
178 let ret = unsafe {
179 // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
180 // allocated for the struct. The limit is read from nmsrs, which is set to the allocated
181 // size (MAX_KVM_MSR_ENTRIES) above.
182 ioctl_with_mut_ref(self, KVM_GET_MSR_INDEX_LIST(), &mut msr_list[0])
183 };
184 if ret < 0 {
185 return errno_result();
186 }
187
188 let mut nmsrs = msr_list[0].nmsrs;
189
190 // Mapping the unsized array to a slice is unsafe because the length isn't known. Using
191 // the length we originally allocated with eliminates the possibility of overflow.
192 let indices: &[u32] = unsafe {
193 if nmsrs > MAX_KVM_MSR_ENTRIES as u32 {
194 nmsrs = MAX_KVM_MSR_ENTRIES as u32;
195 }
196 msr_list[0].indices.as_slice(nmsrs as usize)
197 };
198
199 Ok(indices.to_vec())
200 }
201 }
202
203 impl AsRawDescriptor for Kvm {
as_raw_descriptor(&self) -> RawDescriptor204 fn as_raw_descriptor(&self) -> RawDescriptor {
205 self.kvm.as_raw_descriptor()
206 }
207 }
208
209 /// An address either in programmable I/O space or in memory mapped I/O space.
210 #[derive(Copy, Clone, Debug)]
211 pub enum IoeventAddress {
212 Pio(u64),
213 Mmio(u64),
214 }
215
216 /// Used in `Vm::register_ioevent` to indicate a size and optionally value to match.
217 pub enum Datamatch {
218 AnyLength,
219 U8(Option<u8>),
220 U16(Option<u16>),
221 U32(Option<u32>),
222 U64(Option<u64>),
223 }
224
225 /// A source of IRQs in an `IrqRoute`.
226 pub enum IrqSource {
227 Irqchip { chip: u32, pin: u32 },
228 Msi { address: u64, data: u32 },
229 }
230
231 /// A single route for an IRQ.
232 pub struct IrqRoute {
233 pub gsi: u32,
234 pub source: IrqSource,
235 }
236
237 /// Interrupt controller IDs
238 pub enum PicId {
239 Primary = 0,
240 Secondary = 1,
241 }
242
243 /// Number of pins on the IOAPIC.
244 pub const NUM_IOAPIC_PINS: usize = 24;
245
246 // Used to invert the order when stored in a max-heap.
247 #[derive(Copy, Clone, Eq, PartialEq)]
248 struct MemSlot(u32);
249
250 impl Ord for MemSlot {
cmp(&self, other: &MemSlot) -> Ordering251 fn cmp(&self, other: &MemSlot) -> Ordering {
252 // Notice the order is inverted so the lowest magnitude slot has the highest priority in a
253 // max-heap.
254 other.0.cmp(&self.0)
255 }
256 }
257
258 impl PartialOrd for MemSlot {
partial_cmp(&self, other: &MemSlot) -> Option<Ordering>259 fn partial_cmp(&self, other: &MemSlot) -> Option<Ordering> {
260 Some(self.cmp(other))
261 }
262 }
263
264 /// A wrapper around creating and using a VM.
265 pub struct Vm {
266 vm: File,
267 guest_mem: GuestMemory,
268 mem_regions: Arc<Mutex<BTreeMap<u32, Box<dyn MappedRegion>>>>,
269 mem_slot_gaps: Arc<Mutex<BinaryHeap<MemSlot>>>,
270 }
271
272 impl Vm {
273 /// Constructs a new `Vm` using the given `Kvm` instance.
new(kvm: &Kvm, guest_mem: GuestMemory) -> Result<Vm>274 pub fn new(kvm: &Kvm, guest_mem: GuestMemory) -> Result<Vm> {
275 // Safe because we know kvm is a real kvm fd as this module is the only one that can make
276 // Kvm objects.
277 let ret = unsafe { ioctl(kvm, KVM_CREATE_VM()) };
278 if ret >= 0 {
279 // Safe because we verify the value of ret and we are the owners of the fd.
280 let vm_file = unsafe { File::from_raw_descriptor(ret) };
281 guest_mem.with_regions(|index, guest_addr, size, host_addr, _, _| {
282 unsafe {
283 // Safe because the guest regions are guaranteed not to overlap.
284 set_user_memory_region(
285 &vm_file,
286 index as u32,
287 false,
288 false,
289 guest_addr.offset() as u64,
290 size as u64,
291 host_addr as *mut u8,
292 )
293 }
294 })?;
295
296 Ok(Vm {
297 vm: vm_file,
298 guest_mem,
299 mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
300 mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
301 })
302 } else {
303 errno_result()
304 }
305 }
306
307 /// Checks if a particular `Cap` is available.
308 ///
309 /// This is distinct from the `Kvm` version of this method because the some extensions depend on
310 /// the particular `Vm` existence. This method is encouraged by the kernel because it more
311 /// accurately reflects the usable capabilities.
check_extension(&self, c: Cap) -> bool312 pub fn check_extension(&self, c: Cap) -> bool {
313 // Safe because we know that our file is a KVM fd and that the extension is one of the ones
314 // defined by kernel.
315 unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c as c_ulong) == 1 }
316 }
317
318 /// Inserts the given `mem` into the VM's address space at `guest_addr`.
319 ///
320 /// The slot that was assigned the kvm memory mapping is returned on success. The slot can be
321 /// given to `Vm::remove_memory_region` to remove the memory from the VM's address space and
322 /// take back ownership of `mem`.
323 ///
324 /// Note that memory inserted into the VM's address space must not overlap with any other memory
325 /// slot's region.
326 ///
327 /// If `read_only` is true, the guest will be able to read the memory as normal, but attempts to
328 /// write will trigger a mmio VM exit, leaving the memory untouched.
329 ///
330 /// If `log_dirty_pages` is true, the slot number can be used to retrieve the pages written to
331 /// by the guest with `get_dirty_log`.
add_memory_region( &mut self, guest_addr: GuestAddress, mem: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, ) -> Result<u32>332 pub fn add_memory_region(
333 &mut self,
334 guest_addr: GuestAddress,
335 mem: Box<dyn MappedRegion>,
336 read_only: bool,
337 log_dirty_pages: bool,
338 ) -> Result<u32> {
339 let size = mem.size() as u64;
340 let end_addr = guest_addr
341 .checked_add(size)
342 .ok_or_else(|| Error::new(EOVERFLOW))?;
343 if self.guest_mem.range_overlap(guest_addr, end_addr) {
344 return Err(Error::new(ENOSPC));
345 }
346 let mut regions = self.mem_regions.lock();
347 let mut gaps = self.mem_slot_gaps.lock();
348 let slot = match gaps.pop() {
349 Some(gap) => gap.0,
350 None => (regions.len() + self.guest_mem.num_regions() as usize) as u32,
351 };
352
353 // Safe because we check that the given guest address is valid and has no overlaps. We also
354 // know that the pointer and size are correct because the MemoryMapping interface ensures
355 // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
356 // is removed.
357 let res = unsafe {
358 set_user_memory_region(
359 &self.vm,
360 slot,
361 read_only,
362 log_dirty_pages,
363 guest_addr.offset() as u64,
364 size,
365 mem.as_ptr(),
366 )
367 };
368
369 if let Err(e) = res {
370 gaps.push(MemSlot(slot));
371 return Err(e);
372 }
373 regions.insert(slot, mem);
374 Ok(slot)
375 }
376
377 /// Removes memory that was previously added at the given slot.
378 ///
379 /// Ownership of the host memory mapping associated with the given slot is returned on success.
remove_memory_region(&mut self, slot: u32) -> Result<Box<dyn MappedRegion>>380 pub fn remove_memory_region(&mut self, slot: u32) -> Result<Box<dyn MappedRegion>> {
381 let mut regions = self.mem_regions.lock();
382 if !regions.contains_key(&slot) {
383 return Err(Error::new(ENOENT));
384 }
385 // Safe because the slot is checked against the list of memory slots.
386 unsafe {
387 set_user_memory_region(&self.vm, slot, false, false, 0, 0, std::ptr::null_mut())?;
388 }
389 self.mem_slot_gaps.lock().push(MemSlot(slot));
390 // This remove will always succeed because of the contains_key check above.
391 Ok(regions.remove(&slot).unwrap())
392 }
393
394 /// Gets the bitmap of dirty pages since the last call to `get_dirty_log` for the memory at
395 /// `slot`.
396 ///
397 /// The size of `dirty_log` must be at least as many bits as there are pages in the memory
398 /// region `slot` represents. For example, if the size of `slot` is 16 pages, `dirty_log` must
399 /// be 2 bytes or greater.
get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()>400 pub fn get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()> {
401 match self.mem_regions.lock().get(&slot) {
402 Some(mem) => {
403 // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
404 if dirty_log_bitmap_size(mem.size()) > dirty_log.len() {
405 return Err(Error::new(EINVAL));
406 }
407 let mut dirty_log_kvm = kvm_dirty_log {
408 slot,
409 ..Default::default()
410 };
411 dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void;
412 // Safe because the `dirty_bitmap` pointer assigned above is guaranteed to be valid
413 // (because it's from a slice) and we checked that it will be large enough to hold
414 // the entire log.
415 let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG(), &dirty_log_kvm) };
416 if ret == 0 {
417 Ok(())
418 } else {
419 errno_result()
420 }
421 }
422 _ => Err(Error::new(ENOENT)),
423 }
424 }
425
426 /// Gets a reference to the guest memory owned by this VM.
427 ///
428 /// Note that `GuestMemory` does not include any mmio memory that may have been added after
429 /// this VM was constructed.
get_memory(&self) -> &GuestMemory430 pub fn get_memory(&self) -> &GuestMemory {
431 &self.guest_mem
432 }
433
434 /// Sets the address of a one-page region in the VM's address space.
435 ///
436 /// See the documentation on the KVM_SET_IDENTITY_MAP_ADDR ioctl.
437 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>438 pub fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()> {
439 // Safe because we know that our file is a VM fd and we verify the return result.
440 let ret =
441 unsafe { ioctl_with_ref(self, KVM_SET_IDENTITY_MAP_ADDR(), &(addr.offset() as u64)) };
442 if ret == 0 {
443 Ok(())
444 } else {
445 errno_result()
446 }
447 }
448
449 /// Retrieves the current timestamp of kvmclock as seen by the current guest.
450 ///
451 /// See the documentation on the KVM_GET_CLOCK ioctl.
452 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_clock(&self) -> Result<kvm_clock_data>453 pub fn get_clock(&self) -> Result<kvm_clock_data> {
454 // Safe because we know that our file is a VM fd, we know the kernel will only write
455 // correct amount of memory to our pointer, and we verify the return result.
456 let mut clock_data = unsafe { std::mem::zeroed() };
457 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_CLOCK(), &mut clock_data) };
458 if ret == 0 {
459 Ok(clock_data)
460 } else {
461 errno_result()
462 }
463 }
464
465 /// Sets the current timestamp of kvmclock to the specified value.
466 ///
467 /// See the documentation on the KVM_SET_CLOCK ioctl.
468 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_clock(&self, clock_data: &kvm_clock_data) -> Result<()>469 pub fn set_clock(&self, clock_data: &kvm_clock_data) -> Result<()> {
470 // Safe because we know that our file is a VM fd, we know the kernel will only read
471 // correct amount of memory from our pointer, and we verify the return result.
472 let ret = unsafe { ioctl_with_ref(self, KVM_SET_CLOCK(), clock_data) };
473 if ret == 0 {
474 Ok(())
475 } else {
476 errno_result()
477 }
478 }
479
480 /// Crates an in kernel interrupt controller.
481 ///
482 /// See the documentation on the KVM_CREATE_IRQCHIP ioctl.
483 #[cfg(any(
484 target_arch = "x86",
485 target_arch = "x86_64",
486 target_arch = "arm",
487 target_arch = "aarch64"
488 ))]
create_irq_chip(&self) -> Result<()>489 pub fn create_irq_chip(&self) -> Result<()> {
490 // Safe because we know that our file is a VM fd and we verify the return result.
491 let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP()) };
492 if ret == 0 {
493 Ok(())
494 } else {
495 errno_result()
496 }
497 }
498
499 /// Retrieves the state of given interrupt controller by issuing KVM_GET_IRQCHIP ioctl.
500 ///
501 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
502 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_pic_state(&self, id: PicId) -> Result<kvm_pic_state>503 pub fn get_pic_state(&self, id: PicId) -> Result<kvm_pic_state> {
504 let mut irqchip_state = kvm_irqchip {
505 chip_id: id as u32,
506 ..Default::default()
507 };
508 let ret = unsafe {
509 // Safe because we know our file is a VM fd, we know the kernel will only write
510 // correct amount of memory to our pointer, and we verify the return result.
511 ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), &mut irqchip_state)
512 };
513 if ret == 0 {
514 Ok(unsafe {
515 // Safe as we know that we are retrieving data related to the
516 // PIC (primary or secondary) and not IOAPIC.
517 irqchip_state.chip.pic
518 })
519 } else {
520 errno_result()
521 }
522 }
523
524 /// Sets the state of given interrupt controller by issuing KVM_SET_IRQCHIP ioctl.
525 ///
526 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
527 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_pic_state(&self, id: PicId, state: &kvm_pic_state) -> Result<()>528 pub fn set_pic_state(&self, id: PicId, state: &kvm_pic_state) -> Result<()> {
529 let mut irqchip_state = kvm_irqchip {
530 chip_id: id as u32,
531 ..Default::default()
532 };
533 irqchip_state.chip.pic = *state;
534 // Safe because we know that our file is a VM fd, we know the kernel will only read
535 // correct amount of memory from our pointer, and we verify the return result.
536 let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), &irqchip_state) };
537 if ret == 0 {
538 Ok(())
539 } else {
540 errno_result()
541 }
542 }
543
544 /// Retrieves the state of IOAPIC by issuing KVM_GET_IRQCHIP ioctl.
545 ///
546 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
547 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_ioapic_state(&self) -> Result<kvm_ioapic_state>548 pub fn get_ioapic_state(&self) -> Result<kvm_ioapic_state> {
549 let mut irqchip_state = kvm_irqchip {
550 chip_id: 2,
551 ..Default::default()
552 };
553 let ret = unsafe {
554 // Safe because we know our file is a VM fd, we know the kernel will only write
555 // correct amount of memory to our pointer, and we verify the return result.
556 ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), &mut irqchip_state)
557 };
558 if ret == 0 {
559 Ok(unsafe {
560 // Safe as we know that we are retrieving data related to the
561 // IOAPIC and not PIC.
562 irqchip_state.chip.ioapic
563 })
564 } else {
565 errno_result()
566 }
567 }
568
569 /// Sets the state of IOAPIC by issuing KVM_SET_IRQCHIP ioctl.
570 ///
571 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
572 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()>573 pub fn set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()> {
574 let mut irqchip_state = kvm_irqchip {
575 chip_id: 2,
576 ..Default::default()
577 };
578 irqchip_state.chip.ioapic = *state;
579 // Safe because we know that our file is a VM fd, we know the kernel will only read
580 // correct amount of memory from our pointer, and we verify the return result.
581 let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), &irqchip_state) };
582 if ret == 0 {
583 Ok(())
584 } else {
585 errno_result()
586 }
587 }
588
589 /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise.
590 #[cfg(any(
591 target_arch = "x86",
592 target_arch = "x86_64",
593 target_arch = "arm",
594 target_arch = "aarch64"
595 ))]
set_irq_line(&self, irq: u32, active: bool) -> Result<()>596 pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
597 let mut irq_level = kvm_irq_level::default();
598 irq_level.__bindgen_anon_1.irq = irq;
599 irq_level.level = if active { 1 } else { 0 };
600
601 // Safe because we know that our file is a VM fd, we know the kernel will only read the
602 // correct amount of memory from our pointer, and we verify the return result.
603 let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE(), &irq_level) };
604 if ret == 0 {
605 Ok(())
606 } else {
607 errno_result()
608 }
609 }
610
611 /// Creates a PIT as per the KVM_CREATE_PIT2 ioctl.
612 ///
613 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
614 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
create_pit(&self) -> Result<()>615 pub fn create_pit(&self) -> Result<()> {
616 let pit_config = kvm_pit_config::default();
617 // Safe because we know that our file is a VM fd, we know the kernel will only read the
618 // correct amount of memory from our pointer, and we verify the return result.
619 let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_PIT2(), &pit_config) };
620 if ret == 0 {
621 Ok(())
622 } else {
623 errno_result()
624 }
625 }
626
627 /// Retrieves the state of PIT by issuing KVM_GET_PIT2 ioctl.
628 ///
629 /// Note that this call can only succeed after a call to `Vm::create_pit`.
630 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_pit_state(&self) -> Result<kvm_pit_state2>631 pub fn get_pit_state(&self) -> Result<kvm_pit_state2> {
632 // Safe because we know that our file is a VM fd, we know the kernel will only write
633 // correct amount of memory to our pointer, and we verify the return result.
634 let mut pit_state = unsafe { std::mem::zeroed() };
635 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_PIT2(), &mut pit_state) };
636 if ret == 0 {
637 Ok(pit_state)
638 } else {
639 errno_result()
640 }
641 }
642
643 /// Sets the state of PIT by issuing KVM_SET_PIT2 ioctl.
644 ///
645 /// Note that this call can only succeed after a call to `Vm::create_pit`.
646 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()>647 pub fn set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()> {
648 // Safe because we know that our file is a VM fd, we know the kernel will only read
649 // correct amount of memory from our pointer, and we verify the return result.
650 let ret = unsafe { ioctl_with_ref(self, KVM_SET_PIT2(), pit_state) };
651 if ret == 0 {
652 Ok(())
653 } else {
654 errno_result()
655 }
656 }
657
658 /// Registers an event to be signaled whenever a certain address is written to.
659 ///
660 /// The `datamatch` parameter can be used to limit signaling `evt` to only the cases where the
661 /// value being written is equal to `datamatch`. Note that the size of `datamatch` is important
662 /// and must match the expected size of the guest's write.
663 ///
664 /// In all cases where `evt` is signaled, the ordinary vmexit to userspace that would be
665 /// triggered is prevented.
register_ioevent( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, ) -> Result<()>666 pub fn register_ioevent(
667 &self,
668 evt: &Event,
669 addr: IoeventAddress,
670 datamatch: Datamatch,
671 ) -> Result<()> {
672 self.ioeventfd(evt, addr, datamatch, false)
673 }
674
675 /// Unregisters an event previously registered with `register_ioevent`.
676 ///
677 /// The `evt`, `addr`, and `datamatch` set must be the same as the ones passed into
678 /// `register_ioevent`.
unregister_ioevent( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, ) -> Result<()>679 pub fn unregister_ioevent(
680 &self,
681 evt: &Event,
682 addr: IoeventAddress,
683 datamatch: Datamatch,
684 ) -> Result<()> {
685 self.ioeventfd(evt, addr, datamatch, true)
686 }
687
ioeventfd( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, deassign: bool, ) -> Result<()>688 fn ioeventfd(
689 &self,
690 evt: &Event,
691 addr: IoeventAddress,
692 datamatch: Datamatch,
693 deassign: bool,
694 ) -> Result<()> {
695 let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
696 Datamatch::AnyLength => (false, 0, 0),
697 Datamatch::U8(v) => match v {
698 Some(u) => (true, u as u64, 1),
699 None => (false, 0, 1),
700 },
701 Datamatch::U16(v) => match v {
702 Some(u) => (true, u as u64, 2),
703 None => (false, 0, 2),
704 },
705 Datamatch::U32(v) => match v {
706 Some(u) => (true, u as u64, 4),
707 None => (false, 0, 4),
708 },
709 Datamatch::U64(v) => match v {
710 Some(u) => (true, u as u64, 8),
711 None => (false, 0, 8),
712 },
713 };
714 let mut flags = 0;
715 if deassign {
716 flags |= 1 << kvm_ioeventfd_flag_nr_deassign;
717 }
718 if do_datamatch {
719 flags |= 1 << kvm_ioeventfd_flag_nr_datamatch
720 }
721 if let IoeventAddress::Pio(_) = addr {
722 flags |= 1 << kvm_ioeventfd_flag_nr_pio;
723 }
724 let ioeventfd = kvm_ioeventfd {
725 datamatch: datamatch_value,
726 len: datamatch_len,
727 addr: match addr {
728 IoeventAddress::Pio(p) => p as u64,
729 IoeventAddress::Mmio(m) => m,
730 },
731 fd: evt.as_raw_descriptor(),
732 flags,
733 ..Default::default()
734 };
735 // Safe because we know that our file is a VM fd, we know the kernel will only read the
736 // correct amount of memory from our pointer, and we verify the return result.
737 let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD(), &ioeventfd) };
738 if ret == 0 {
739 Ok(())
740 } else {
741 errno_result()
742 }
743 }
744
745 /// Registers an event that will, when signalled, trigger the `gsi` irq, and `resample_evt` will
746 /// get triggered when the irqchip is resampled.
747 #[cfg(any(
748 target_arch = "x86",
749 target_arch = "x86_64",
750 target_arch = "arm",
751 target_arch = "aarch64"
752 ))]
register_irqfd_resample( &self, evt: &Event, resample_evt: &Event, gsi: u32, ) -> Result<()>753 pub fn register_irqfd_resample(
754 &self,
755 evt: &Event,
756 resample_evt: &Event,
757 gsi: u32,
758 ) -> Result<()> {
759 let irqfd = kvm_irqfd {
760 flags: KVM_IRQFD_FLAG_RESAMPLE,
761 fd: evt.as_raw_descriptor() as u32,
762 resamplefd: resample_evt.as_raw_descriptor() as u32,
763 gsi,
764 ..Default::default()
765 };
766 // Safe because we know that our file is a VM fd, we know the kernel will only read the
767 // correct amount of memory from our pointer, and we verify the return result.
768 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) };
769 if ret == 0 {
770 Ok(())
771 } else {
772 errno_result()
773 }
774 }
775
776 /// Unregisters an event that was previously registered with
777 /// `register_irqfd`/`register_irqfd_resample`.
778 ///
779 /// The `evt` and `gsi` pair must be the same as the ones passed into
780 /// `register_irqfd`/`register_irqfd_resample`.
781 #[cfg(any(
782 target_arch = "x86",
783 target_arch = "x86_64",
784 target_arch = "arm",
785 target_arch = "aarch64"
786 ))]
unregister_irqfd(&self, evt: &Event, gsi: u32) -> Result<()>787 pub fn unregister_irqfd(&self, evt: &Event, gsi: u32) -> Result<()> {
788 let irqfd = kvm_irqfd {
789 fd: evt.as_raw_descriptor() as u32,
790 gsi,
791 flags: KVM_IRQFD_FLAG_DEASSIGN,
792 ..Default::default()
793 };
794 // Safe because we know that our file is a VM fd, we know the kernel will only read the
795 // correct amount of memory from our pointer, and we verify the return result.
796 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) };
797 if ret == 0 {
798 Ok(())
799 } else {
800 errno_result()
801 }
802 }
803
804 /// Sets the GSI routing table, replacing any table set with previous calls to
805 /// `set_gsi_routing`.
806 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()>807 pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> {
808 let mut irq_routing =
809 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(routes.len());
810 irq_routing[0].nr = routes.len() as u32;
811
812 // Safe because we ensured there is enough space in irq_routing to hold the number of
813 // route entries.
814 let irq_routes = unsafe { irq_routing[0].entries.as_mut_slice(routes.len()) };
815 for (route, irq_route) in routes.iter().zip(irq_routes.iter_mut()) {
816 irq_route.gsi = route.gsi;
817 match route.source {
818 IrqSource::Irqchip { chip, pin } => {
819 irq_route.type_ = KVM_IRQ_ROUTING_IRQCHIP;
820 irq_route.u.irqchip = kvm_irq_routing_irqchip { irqchip: chip, pin }
821 }
822 IrqSource::Msi { address, data } => {
823 irq_route.type_ = KVM_IRQ_ROUTING_MSI;
824 irq_route.u.msi = kvm_irq_routing_msi {
825 address_lo: address as u32,
826 address_hi: (address >> 32) as u32,
827 data,
828 ..Default::default()
829 }
830 }
831 }
832 }
833
834 let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING(), &irq_routing[0]) };
835 if ret == 0 {
836 Ok(())
837 } else {
838 errno_result()
839 }
840 }
841
842 /// Enable the specified capability.
843 /// See documentation for KVM_ENABLE_CAP.
844 /// This function is marked as unsafe because `cap` may contain values which are interpreted as
845 /// pointers by the kernel.
kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()>846 pub unsafe fn kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> {
847 // Safe because we allocated the struct and we know the kernel will read exactly the size of
848 // the struct.
849 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP(), cap);
850 if ret < 0 {
851 errno_result()
852 } else {
853 Ok(())
854 }
855 }
856 }
857
858 impl AsRawDescriptor for Vm {
as_raw_descriptor(&self) -> RawDescriptor859 fn as_raw_descriptor(&self) -> RawDescriptor {
860 self.vm.as_raw_descriptor()
861 }
862 }
863
864 /// A reason why a VCPU exited. One of these returns every time `Vcpu::run` is called.
865 #[derive(Debug)]
866 pub enum VcpuExit {
867 /// An out port instruction was run on the given port with the given data.
868 IoOut {
869 port: u16,
870 size: usize,
871 data: [u8; 8],
872 },
873 /// An in port instruction was run on the given port.
874 ///
875 /// The date that the instruction receives should be set with `set_data` before `Vcpu::run` is
876 /// called again.
877 IoIn {
878 port: u16,
879 size: usize,
880 },
881 /// A read instruction was run against the given MMIO address.
882 ///
883 /// The date that the instruction receives should be set with `set_data` before `Vcpu::run` is
884 /// called again.
885 MmioRead {
886 address: u64,
887 size: usize,
888 },
889 /// A write instruction was run against the given MMIO address with the given data.
890 MmioWrite {
891 address: u64,
892 size: usize,
893 data: [u8; 8],
894 },
895 IoapicEoi {
896 vector: u8,
897 },
898 HypervSynic {
899 msr: u32,
900 control: u64,
901 evt_page: u64,
902 msg_page: u64,
903 },
904 HypervHcall {
905 input: u64,
906 params: [u64; 2],
907 },
908 Unknown,
909 Exception,
910 Hypercall,
911 Debug,
912 Hlt,
913 IrqWindowOpen,
914 Shutdown,
915 FailEntry {
916 hardware_entry_failure_reason: u64,
917 },
918 Intr,
919 SetTpr,
920 TprAccess,
921 S390Sieic,
922 S390Reset,
923 Dcr,
924 Nmi,
925 InternalError,
926 Osi,
927 PaprHcall,
928 S390Ucontrol,
929 Watchdog,
930 S390Tsch,
931 Epr,
932 /// The cpu triggered a system level event which is specified by the type field.
933 /// The first field is the event type and the second field is flags.
934 /// The possible event types are shutdown, reset, or crash. So far there
935 /// are not any flags defined.
936 SystemEvent(u32 /* event_type */, u64 /* flags */),
937 }
938
939 /// A wrapper around creating and using a VCPU.
940 /// `Vcpu` provides all functionality except for running. To run, `to_runnable` must be called to
941 /// lock the vcpu to a thread. Then the returned `RunnableVcpu` can be used for running.
942 pub struct Vcpu {
943 vcpu: File,
944 run_mmap: MemoryMapping,
945 }
946
947 pub struct VcpuThread {
948 run: *mut kvm_run,
949 signal_num: Option<c_int>,
950 }
951
952 thread_local!(static VCPU_THREAD: RefCell<Option<VcpuThread>> = RefCell::new(None));
953
954 impl Vcpu {
955 /// Constructs a new VCPU for `vm`.
956 ///
957 /// The `id` argument is the CPU number between [0, max vcpus).
new(id: c_ulong, kvm: &Kvm, vm: &Vm) -> Result<Vcpu>958 pub fn new(id: c_ulong, kvm: &Kvm, vm: &Vm) -> Result<Vcpu> {
959 let run_mmap_size = kvm.get_vcpu_mmap_size()?;
960
961 // Safe because we know that vm a VM fd and we verify the return result.
962 let vcpu_fd = unsafe { ioctl_with_val(vm, KVM_CREATE_VCPU(), id) };
963 if vcpu_fd < 0 {
964 return errno_result();
965 }
966
967 // Wrap the vcpu now in case the following ? returns early. This is safe because we verified
968 // the value of the fd and we own the fd.
969 let vcpu = unsafe { File::from_raw_descriptor(vcpu_fd) };
970
971 let run_mmap = MemoryMappingBuilder::new(run_mmap_size)
972 .from_file(&vcpu)
973 .build()
974 .map_err(|_| Error::new(ENOSPC))?;
975
976 Ok(Vcpu { vcpu, run_mmap })
977 }
978
979 /// Consumes `self` and returns a `RunnableVcpu`. A `RunnableVcpu` is required to run the
980 /// guest.
981 /// Assigns a vcpu to the current thread and stores it in a hash map that can be used by signal
982 /// handlers to call set_local_immediate_exit(). An optional signal number will be temporarily
983 /// blocked while assigning the vcpu to the thread and later blocked when `RunnableVcpu` is
984 /// destroyed.
985 ///
986 /// Returns an error, `EBUSY`, if the current thread already contains a Vcpu.
987 #[allow(clippy::cast_ptr_alignment)]
to_runnable(self, signal_num: Option<c_int>) -> Result<RunnableVcpu>988 pub fn to_runnable(self, signal_num: Option<c_int>) -> Result<RunnableVcpu> {
989 // Block signal while we add -- if a signal fires (very unlikely,
990 // as this means something is trying to pause the vcpu before it has
991 // even started) it'll try to grab the read lock while this write
992 // lock is grabbed and cause a deadlock.
993 // Assuming that a failure to block means it's already blocked.
994 let _blocked_signal = signal_num.map(BlockedSignal::new);
995
996 VCPU_THREAD.with(|v| {
997 if v.borrow().is_none() {
998 *v.borrow_mut() = Some(VcpuThread {
999 run: self.run_mmap.as_ptr() as *mut kvm_run,
1000 signal_num,
1001 });
1002 Ok(())
1003 } else {
1004 Err(Error::new(EBUSY))
1005 }
1006 })?;
1007
1008 Ok(RunnableVcpu {
1009 vcpu: self,
1010 phantom: Default::default(),
1011 })
1012 }
1013
1014 /// Sets the data received by a mmio read, ioport in, or hypercall instruction.
1015 ///
1016 /// This function should be called after `Vcpu::run` returns an `VcpuExit::IoIn`,
1017 /// `VcpuExit::MmioRead`, or 'VcpuExit::HypervHcall`.
1018 #[allow(clippy::cast_ptr_alignment)]
set_data(&self, data: &[u8]) -> Result<()>1019 pub fn set_data(&self, data: &[u8]) -> Result<()> {
1020 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1021 // kernel told us how large it was. The pointer is page aligned so casting to a different
1022 // type is well defined, hence the clippy allow attribute.
1023 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1024 match run.exit_reason {
1025 KVM_EXIT_IO => {
1026 let run_start = run as *mut kvm_run as *mut u8;
1027 // Safe because the exit_reason (which comes from the kernel) told us which
1028 // union field to use.
1029 let io = unsafe { run.__bindgen_anon_1.io };
1030 if io.direction as u32 != KVM_EXIT_IO_IN {
1031 return Err(Error::new(EINVAL));
1032 }
1033 let data_size = (io.count as usize) * (io.size as usize);
1034 if data_size != data.len() {
1035 return Err(Error::new(EINVAL));
1036 }
1037 // The data_offset is defined by the kernel to be some number of bytes into the
1038 // kvm_run structure, which we have fully mmap'd.
1039 unsafe {
1040 let data_ptr = run_start.offset(io.data_offset as isize);
1041 copy_nonoverlapping(data.as_ptr(), data_ptr, data_size);
1042 }
1043 Ok(())
1044 }
1045 KVM_EXIT_MMIO => {
1046 // Safe because the exit_reason (which comes from the kernel) told us which
1047 // union field to use.
1048 let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1049 if mmio.is_write != 0 {
1050 return Err(Error::new(EINVAL));
1051 }
1052 let len = mmio.len as usize;
1053 if len != data.len() {
1054 return Err(Error::new(EINVAL));
1055 }
1056 mmio.data[..len].copy_from_slice(data);
1057 Ok(())
1058 }
1059 KVM_EXIT_HYPERV => {
1060 // Safe because the exit_reason (which comes from the kernel) told us which
1061 // union field to use.
1062 let hyperv = unsafe { &mut run.__bindgen_anon_1.hyperv };
1063 if hyperv.type_ != KVM_EXIT_HYPERV_HCALL {
1064 return Err(Error::new(EINVAL));
1065 }
1066 let hcall = unsafe { &mut hyperv.u.hcall };
1067 if data.len() != std::mem::size_of::<u64>() {
1068 return Err(Error::new(EINVAL));
1069 }
1070 hcall.result.to_ne_bytes().copy_from_slice(data);
1071 Ok(())
1072 }
1073 _ => Err(Error::new(EINVAL)),
1074 }
1075 }
1076
1077 /// Sets the bit that requests an immediate exit.
1078 #[allow(clippy::cast_ptr_alignment)]
set_immediate_exit(&self, exit: bool)1079 pub fn set_immediate_exit(&self, exit: bool) {
1080 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1081 // kernel told us how large it was. The pointer is page aligned so casting to a different
1082 // type is well defined, hence the clippy allow attribute.
1083 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1084 run.immediate_exit = if exit { 1 } else { 0 };
1085 }
1086
1087 /// Sets/clears the bit for immediate exit for the vcpu on the current thread.
set_local_immediate_exit(exit: bool)1088 pub fn set_local_immediate_exit(exit: bool) {
1089 VCPU_THREAD.with(|v| {
1090 if let Some(state) = &(*v.borrow()) {
1091 unsafe {
1092 (*state.run).immediate_exit = if exit { 1 } else { 0 };
1093 };
1094 }
1095 });
1096 }
1097
1098 /// Gets the VCPU registers.
1099 #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
get_regs(&self) -> Result<kvm_regs>1100 pub fn get_regs(&self) -> Result<kvm_regs> {
1101 // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1102 // correct amount of memory from our pointer, and we verify the return result.
1103 let mut regs = unsafe { std::mem::zeroed() };
1104 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_REGS(), &mut regs) };
1105 if ret != 0 {
1106 return errno_result();
1107 }
1108 Ok(regs)
1109 }
1110
1111 /// Sets the VCPU registers.
1112 #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
set_regs(&self, regs: &kvm_regs) -> Result<()>1113 pub fn set_regs(&self, regs: &kvm_regs) -> Result<()> {
1114 // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1115 // correct amount of memory from our pointer, and we verify the return result.
1116 let ret = unsafe { ioctl_with_ref(self, KVM_SET_REGS(), regs) };
1117 if ret != 0 {
1118 return errno_result();
1119 }
1120 Ok(())
1121 }
1122
1123 /// Gets the VCPU special registers.
1124 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_sregs(&self) -> Result<kvm_sregs>1125 pub fn get_sregs(&self) -> Result<kvm_sregs> {
1126 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1127 // correct amount of memory to our pointer, and we verify the return result.
1128 let mut regs = unsafe { std::mem::zeroed() };
1129 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS(), &mut regs) };
1130 if ret != 0 {
1131 return errno_result();
1132 }
1133 Ok(regs)
1134 }
1135
1136 /// Sets the VCPU special registers.
1137 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_sregs(&self, sregs: &kvm_sregs) -> Result<()>1138 pub fn set_sregs(&self, sregs: &kvm_sregs) -> Result<()> {
1139 // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1140 // correct amount of memory from our pointer, and we verify the return result.
1141 let ret = unsafe { ioctl_with_ref(self, KVM_SET_SREGS(), sregs) };
1142 if ret != 0 {
1143 return errno_result();
1144 }
1145 Ok(())
1146 }
1147
1148 /// Gets the VCPU FPU registers.
1149 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_fpu(&self) -> Result<kvm_fpu>1150 pub fn get_fpu(&self) -> Result<kvm_fpu> {
1151 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1152 // correct amount of memory to our pointer, and we verify the return result.
1153 let mut regs = unsafe { std::mem::zeroed() };
1154 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_FPU(), &mut regs) };
1155 if ret != 0 {
1156 return errno_result();
1157 }
1158 Ok(regs)
1159 }
1160
1161 /// X86 specific call to setup the FPU
1162 ///
1163 /// See the documentation for KVM_SET_FPU.
1164 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_fpu(&self, fpu: &kvm_fpu) -> Result<()>1165 pub fn set_fpu(&self, fpu: &kvm_fpu) -> Result<()> {
1166 let ret = unsafe {
1167 // Here we trust the kernel not to read past the end of the kvm_fpu struct.
1168 ioctl_with_ref(self, KVM_SET_FPU(), fpu)
1169 };
1170 if ret < 0 {
1171 return errno_result();
1172 }
1173 Ok(())
1174 }
1175
1176 /// Gets the VCPU debug registers.
1177 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_debugregs(&self) -> Result<kvm_debugregs>1178 pub fn get_debugregs(&self) -> Result<kvm_debugregs> {
1179 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1180 // correct amount of memory to our pointer, and we verify the return result.
1181 let mut regs = unsafe { std::mem::zeroed() };
1182 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEBUGREGS(), &mut regs) };
1183 if ret != 0 {
1184 return errno_result();
1185 }
1186 Ok(regs)
1187 }
1188
1189 /// Sets the VCPU debug registers
1190 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_debugregs(&self, dregs: &kvm_debugregs) -> Result<()>1191 pub fn set_debugregs(&self, dregs: &kvm_debugregs) -> Result<()> {
1192 let ret = unsafe {
1193 // Here we trust the kernel not to read past the end of the kvm_fpu struct.
1194 ioctl_with_ref(self, KVM_SET_DEBUGREGS(), dregs)
1195 };
1196 if ret < 0 {
1197 return errno_result();
1198 }
1199 Ok(())
1200 }
1201
1202 /// Gets the VCPU extended control registers
1203 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_xcrs(&self) -> Result<kvm_xcrs>1204 pub fn get_xcrs(&self) -> Result<kvm_xcrs> {
1205 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1206 // correct amount of memory to our pointer, and we verify the return result.
1207 let mut regs = unsafe { std::mem::zeroed() };
1208 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XCRS(), &mut regs) };
1209 if ret != 0 {
1210 return errno_result();
1211 }
1212 Ok(regs)
1213 }
1214
1215 /// Sets the VCPU extended control registers
1216 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()>1217 pub fn set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()> {
1218 let ret = unsafe {
1219 // Here we trust the kernel not to read past the end of the kvm_xcrs struct.
1220 ioctl_with_ref(self, KVM_SET_XCRS(), xcrs)
1221 };
1222 if ret < 0 {
1223 return errno_result();
1224 }
1225 Ok(())
1226 }
1227
1228 /// X86 specific call to get the MSRS
1229 ///
1230 /// See the documentation for KVM_SET_MSRS.
1231 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_msrs(&self, msr_entries: &mut Vec<kvm_msr_entry>) -> Result<()>1232 pub fn get_msrs(&self, msr_entries: &mut Vec<kvm_msr_entry>) -> Result<()> {
1233 let mut msrs = vec_with_array_field::<kvm_msrs, kvm_msr_entry>(msr_entries.len());
1234 unsafe {
1235 // Mapping the unsized array to a slice is unsafe because the length isn't known.
1236 // Providing the length used to create the struct guarantees the entire slice is valid.
1237 let entries: &mut [kvm_msr_entry] = msrs[0].entries.as_mut_slice(msr_entries.len());
1238 entries.copy_from_slice(&msr_entries);
1239 }
1240 msrs[0].nmsrs = msr_entries.len() as u32;
1241 let ret = unsafe {
1242 // Here we trust the kernel not to read or write past the end of the kvm_msrs struct.
1243 ioctl_with_ref(self, KVM_GET_MSRS(), &msrs[0])
1244 };
1245 if ret < 0 {
1246 // KVM_SET_MSRS actually returns the number of msr entries written.
1247 return errno_result();
1248 }
1249 unsafe {
1250 let count = ret as usize;
1251 assert!(count <= msr_entries.len());
1252 let entries: &mut [kvm_msr_entry] = msrs[0].entries.as_mut_slice(count);
1253 msr_entries.truncate(count);
1254 msr_entries.copy_from_slice(&entries);
1255 }
1256 Ok(())
1257 }
1258
1259 /// X86 specific call to setup the MSRS
1260 ///
1261 /// See the documentation for KVM_SET_MSRS.
1262 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_msrs(&self, msrs: &kvm_msrs) -> Result<()>1263 pub fn set_msrs(&self, msrs: &kvm_msrs) -> Result<()> {
1264 let ret = unsafe {
1265 // Here we trust the kernel not to read past the end of the kvm_msrs struct.
1266 ioctl_with_ref(self, KVM_SET_MSRS(), msrs)
1267 };
1268 if ret < 0 {
1269 // KVM_SET_MSRS actually returns the number of msr entries written.
1270 return errno_result();
1271 }
1272 Ok(())
1273 }
1274
1275 /// X86 specific call to setup the CPUID registers
1276 ///
1277 /// See the documentation for KVM_SET_CPUID2.
1278 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_cpuid2(&self, cpuid: &CpuId) -> Result<()>1279 pub fn set_cpuid2(&self, cpuid: &CpuId) -> Result<()> {
1280 let ret = unsafe {
1281 // Here we trust the kernel not to read past the end of the kvm_msrs struct.
1282 ioctl_with_ptr(self, KVM_SET_CPUID2(), cpuid.as_ptr())
1283 };
1284 if ret < 0 {
1285 return errno_result();
1286 }
1287 Ok(())
1288 }
1289
1290 /// X86 specific call to get the system emulated hyper-v CPUID values
1291 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_hyperv_cpuid(&self) -> Result<CpuId>1292 pub fn get_hyperv_cpuid(&self) -> Result<CpuId> {
1293 const MAX_KVM_CPUID_ENTRIES: usize = 256;
1294 let mut cpuid = CpuId::new(MAX_KVM_CPUID_ENTRIES);
1295
1296 let ret = unsafe {
1297 // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
1298 // allocated for the struct. The limit is read from nent, which is set to the allocated
1299 // size(MAX_KVM_CPUID_ENTRIES) above.
1300 ioctl_with_mut_ptr(self, KVM_GET_SUPPORTED_HV_CPUID(), cpuid.as_mut_ptr())
1301 };
1302 if ret < 0 {
1303 return errno_result();
1304 }
1305 Ok(cpuid)
1306 }
1307
1308 /// X86 specific call to get the state of the "Local Advanced Programmable Interrupt Controller".
1309 ///
1310 /// See the documentation for KVM_GET_LAPIC.
1311 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_lapic(&self) -> Result<kvm_lapic_state>1312 pub fn get_lapic(&self) -> Result<kvm_lapic_state> {
1313 let mut klapic: kvm_lapic_state = Default::default();
1314
1315 let ret = unsafe {
1316 // The ioctl is unsafe unless you trust the kernel not to write past the end of the
1317 // local_apic struct.
1318 ioctl_with_mut_ref(self, KVM_GET_LAPIC(), &mut klapic)
1319 };
1320 if ret < 0 {
1321 return errno_result();
1322 }
1323 Ok(klapic)
1324 }
1325
1326 /// X86 specific call to set the state of the "Local Advanced Programmable Interrupt Controller".
1327 ///
1328 /// See the documentation for KVM_SET_LAPIC.
1329 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()>1330 pub fn set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()> {
1331 let ret = unsafe {
1332 // The ioctl is safe because the kernel will only read from the klapic struct.
1333 ioctl_with_ref(self, KVM_SET_LAPIC(), klapic)
1334 };
1335 if ret < 0 {
1336 return errno_result();
1337 }
1338 Ok(())
1339 }
1340
1341 /// Gets the vcpu's current "multiprocessing state".
1342 ///
1343 /// See the documentation for KVM_GET_MP_STATE. This call can only succeed after
1344 /// a call to `Vm::create_irq_chip`.
1345 ///
1346 /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1347 /// to run crosvm on s390.
1348 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_mp_state(&self) -> Result<kvm_mp_state>1349 pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
1350 // Safe because we know that our file is a VCPU fd, we know the kernel will only
1351 // write correct amount of memory to our pointer, and we verify the return result.
1352 let mut state: kvm_mp_state = unsafe { std::mem::zeroed() };
1353 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE(), &mut state) };
1354 if ret < 0 {
1355 return errno_result();
1356 }
1357 Ok(state)
1358 }
1359
1360 /// Sets the vcpu's current "multiprocessing state".
1361 ///
1362 /// See the documentation for KVM_SET_MP_STATE. This call can only succeed after
1363 /// a call to `Vm::create_irq_chip`.
1364 ///
1365 /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1366 /// to run crosvm on s390.
1367 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_mp_state(&self, state: &kvm_mp_state) -> Result<()>1368 pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> {
1369 let ret = unsafe {
1370 // The ioctl is safe because the kernel will only read from the kvm_mp_state struct.
1371 ioctl_with_ref(self, KVM_SET_MP_STATE(), state)
1372 };
1373 if ret < 0 {
1374 return errno_result();
1375 }
1376 Ok(())
1377 }
1378
1379 /// Gets the vcpu's currently pending exceptions, interrupts, NMIs, etc
1380 ///
1381 /// See the documentation for KVM_GET_VCPU_EVENTS.
1382 ///
1383 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_vcpu_events(&self) -> Result<kvm_vcpu_events>1384 pub fn get_vcpu_events(&self) -> Result<kvm_vcpu_events> {
1385 // Safe because we know that our file is a VCPU fd, we know the kernel
1386 // will only write correct amount of memory to our pointer, and we
1387 // verify the return result.
1388 let mut events: kvm_vcpu_events = unsafe { std::mem::zeroed() };
1389 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_VCPU_EVENTS(), &mut events) };
1390 if ret < 0 {
1391 return errno_result();
1392 }
1393 Ok(events)
1394 }
1395
1396 /// Sets the vcpu's currently pending exceptions, interrupts, NMIs, etc
1397 ///
1398 /// See the documentation for KVM_SET_VCPU_EVENTS.
1399 ///
1400 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_vcpu_events(&self, events: &kvm_vcpu_events) -> Result<()>1401 pub fn set_vcpu_events(&self, events: &kvm_vcpu_events) -> Result<()> {
1402 let ret = unsafe {
1403 // The ioctl is safe because the kernel will only read from the
1404 // kvm_vcpu_events.
1405 ioctl_with_ref(self, KVM_SET_VCPU_EVENTS(), events)
1406 };
1407 if ret < 0 {
1408 return errno_result();
1409 }
1410 Ok(())
1411 }
1412
1413 /// Enable the specified capability.
1414 /// See documentation for KVM_ENABLE_CAP.
1415 /// This function is marked as unsafe because `cap` may contain values which are interpreted as
1416 /// pointers by the kernel.
kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()>1417 pub unsafe fn kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> {
1418 // Safe because we allocated the struct and we know the kernel will read exactly the size of
1419 // the struct.
1420 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP(), cap);
1421 if ret < 0 {
1422 return errno_result();
1423 }
1424 Ok(())
1425 }
1426
1427 /// Specifies set of signals that are blocked during execution of KVM_RUN.
1428 /// Signals that are not blocked will cause KVM_RUN to return with -EINTR.
1429 ///
1430 /// See the documentation for KVM_SET_SIGNAL_MASK
set_signal_mask(&self, signals: &[c_int]) -> Result<()>1431 pub fn set_signal_mask(&self, signals: &[c_int]) -> Result<()> {
1432 let sigset = signal::create_sigset(signals)?;
1433
1434 let mut kvm_sigmask = vec_with_array_field::<kvm_signal_mask, sigset_t>(1);
1435 // Rust definition of sigset_t takes 128 bytes, but the kernel only
1436 // expects 8-bytes structure, so we can't write
1437 // kvm_sigmask.len = size_of::<sigset_t>() as u32;
1438 kvm_sigmask[0].len = 8;
1439 // Ensure the length is not too big.
1440 const _ASSERT: usize = size_of::<sigset_t>() - 8usize;
1441
1442 // Safe as we allocated exactly the needed space
1443 unsafe {
1444 copy_nonoverlapping(
1445 &sigset as *const sigset_t as *const u8,
1446 kvm_sigmask[0].sigset.as_mut_ptr(),
1447 8,
1448 );
1449 }
1450
1451 let ret = unsafe {
1452 // The ioctl is safe because the kernel will only read from the
1453 // kvm_signal_mask structure.
1454 ioctl_with_ref(self, KVM_SET_SIGNAL_MASK(), &kvm_sigmask[0])
1455 };
1456 if ret < 0 {
1457 return errno_result();
1458 }
1459 Ok(())
1460 }
1461
1462 /// Sets the value of one register on this VCPU. The id of the register is
1463 /// encoded as specified in the kernel documentation for KVM_SET_ONE_REG.
1464 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
set_one_reg(&self, reg_id: u64, data: u64) -> Result<()>1465 pub fn set_one_reg(&self, reg_id: u64, data: u64) -> Result<()> {
1466 let data_ref = &data as *const u64;
1467 let onereg = kvm_one_reg {
1468 id: reg_id,
1469 addr: data_ref as u64,
1470 };
1471 // safe because we allocated the struct and we know the kernel will read
1472 // exactly the size of the struct
1473 let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG(), &onereg) };
1474 if ret < 0 {
1475 return errno_result();
1476 }
1477 Ok(())
1478 }
1479 }
1480
1481 impl AsRawDescriptor for Vcpu {
as_raw_descriptor(&self) -> RawDescriptor1482 fn as_raw_descriptor(&self) -> RawDescriptor {
1483 self.vcpu.as_raw_descriptor()
1484 }
1485 }
1486
1487 /// A Vcpu that has a thread and can be run. Created by calling `to_runnable` on a `Vcpu`.
1488 /// Implements `Deref` to a `Vcpu` so all `Vcpu` methods are usable, with the addition of the `run`
1489 /// function to execute the guest.
1490 pub struct RunnableVcpu {
1491 vcpu: Vcpu,
1492 // vcpus must stay on the same thread once they start.
1493 // Add the PhantomData pointer to ensure RunnableVcpu is not `Send`.
1494 phantom: std::marker::PhantomData<*mut u8>,
1495 }
1496
1497 impl RunnableVcpu {
1498 /// Runs the VCPU until it exits, returning the reason for the exit.
1499 ///
1500 /// Note that the state of the VCPU and associated VM must be setup first for this to do
1501 /// anything useful.
1502 #[allow(clippy::cast_ptr_alignment)]
1503 // The pointer is page aligned so casting to a different type is well defined, hence the clippy
1504 // allow attribute.
run(&self) -> Result<VcpuExit>1505 pub fn run(&self) -> Result<VcpuExit> {
1506 // Safe because we know that our file is a VCPU fd and we verify the return result.
1507 let ret = unsafe { ioctl(self, KVM_RUN()) };
1508 if ret == 0 {
1509 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1510 // kernel told us how large it was.
1511 let run = unsafe { &*(self.run_mmap.as_ptr() as *const kvm_run) };
1512 match run.exit_reason {
1513 KVM_EXIT_IO => {
1514 // Safe because the exit_reason (which comes from the kernel) told us which
1515 // union field to use.
1516 let io = unsafe { run.__bindgen_anon_1.io };
1517 let port = io.port;
1518 let size = (io.count as usize) * (io.size as usize);
1519 match io.direction as u32 {
1520 KVM_EXIT_IO_IN => Ok(VcpuExit::IoIn { port, size }),
1521 KVM_EXIT_IO_OUT => {
1522 let mut data = [0; 8];
1523 let run_start = run as *const kvm_run as *const u8;
1524 // The data_offset is defined by the kernel to be some number of bytes
1525 // into the kvm_run structure, which we have fully mmap'd.
1526 unsafe {
1527 let data_ptr = run_start.offset(io.data_offset as isize);
1528 copy_nonoverlapping(
1529 data_ptr,
1530 data.as_mut_ptr(),
1531 min(size, data.len()),
1532 );
1533 }
1534 Ok(VcpuExit::IoOut { port, size, data })
1535 }
1536 _ => Err(Error::new(EINVAL)),
1537 }
1538 }
1539 KVM_EXIT_MMIO => {
1540 // Safe because the exit_reason (which comes from the kernel) told us which
1541 // union field to use.
1542 let mmio = unsafe { &run.__bindgen_anon_1.mmio };
1543 let address = mmio.phys_addr;
1544 let size = min(mmio.len as usize, mmio.data.len());
1545 if mmio.is_write != 0 {
1546 Ok(VcpuExit::MmioWrite {
1547 address,
1548 size,
1549 data: mmio.data,
1550 })
1551 } else {
1552 Ok(VcpuExit::MmioRead { address, size })
1553 }
1554 }
1555 KVM_EXIT_IOAPIC_EOI => {
1556 // Safe because the exit_reason (which comes from the kernel) told us which
1557 // union field to use.
1558 let vector = unsafe { run.__bindgen_anon_1.eoi.vector };
1559 Ok(VcpuExit::IoapicEoi { vector })
1560 }
1561 KVM_EXIT_HYPERV => {
1562 // Safe because the exit_reason (which comes from the kernel) told us which
1563 // union field to use.
1564 let hyperv = unsafe { &run.__bindgen_anon_1.hyperv };
1565 match hyperv.type_ as u32 {
1566 KVM_EXIT_HYPERV_SYNIC => {
1567 let synic = unsafe { &hyperv.u.synic };
1568 Ok(VcpuExit::HypervSynic {
1569 msr: synic.msr,
1570 control: synic.control,
1571 evt_page: synic.evt_page,
1572 msg_page: synic.msg_page,
1573 })
1574 }
1575 KVM_EXIT_HYPERV_HCALL => {
1576 let hcall = unsafe { &hyperv.u.hcall };
1577 Ok(VcpuExit::HypervHcall {
1578 input: hcall.input,
1579 params: hcall.params,
1580 })
1581 }
1582 _ => Err(Error::new(EINVAL)),
1583 }
1584 }
1585 KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown),
1586 KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
1587 KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall),
1588 KVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
1589 KVM_EXIT_HLT => Ok(VcpuExit::Hlt),
1590 KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
1591 KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown),
1592 KVM_EXIT_FAIL_ENTRY => {
1593 // Safe because the exit_reason (which comes from the kernel) told us which
1594 // union field to use.
1595 let hardware_entry_failure_reason = unsafe {
1596 run.__bindgen_anon_1
1597 .fail_entry
1598 .hardware_entry_failure_reason
1599 };
1600 Ok(VcpuExit::FailEntry {
1601 hardware_entry_failure_reason,
1602 })
1603 }
1604 KVM_EXIT_INTR => Ok(VcpuExit::Intr),
1605 KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr),
1606 KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess),
1607 KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic),
1608 KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset),
1609 KVM_EXIT_DCR => Ok(VcpuExit::Dcr),
1610 KVM_EXIT_NMI => Ok(VcpuExit::Nmi),
1611 KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1612 KVM_EXIT_OSI => Ok(VcpuExit::Osi),
1613 KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall),
1614 KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol),
1615 KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog),
1616 KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch),
1617 KVM_EXIT_EPR => Ok(VcpuExit::Epr),
1618 KVM_EXIT_SYSTEM_EVENT => {
1619 // Safe because we know the exit reason told us this union
1620 // field is valid
1621 let event_type = unsafe { run.__bindgen_anon_1.system_event.type_ };
1622 let event_flags = unsafe { run.__bindgen_anon_1.system_event.flags };
1623 Ok(VcpuExit::SystemEvent(event_type, event_flags))
1624 }
1625 r => panic!("unknown kvm exit reason: {}", r),
1626 }
1627 } else {
1628 errno_result()
1629 }
1630 }
1631 }
1632
1633 impl Deref for RunnableVcpu {
1634 type Target = Vcpu;
deref(&self) -> &Self::Target1635 fn deref(&self) -> &Self::Target {
1636 &self.vcpu
1637 }
1638 }
1639
1640 impl DerefMut for RunnableVcpu {
deref_mut(&mut self) -> &mut Self::Target1641 fn deref_mut(&mut self) -> &mut Self::Target {
1642 &mut self.vcpu
1643 }
1644 }
1645
1646 impl AsRawDescriptor for RunnableVcpu {
as_raw_descriptor(&self) -> RawDescriptor1647 fn as_raw_descriptor(&self) -> RawDescriptor {
1648 self.vcpu.as_raw_descriptor()
1649 }
1650 }
1651
1652 impl Drop for RunnableVcpu {
drop(&mut self)1653 fn drop(&mut self) {
1654 VCPU_THREAD.with(|v| {
1655 // This assumes that a failure in `BlockedSignal::new` means the signal is already
1656 // blocked and there it should not be unblocked on exit.
1657 let _blocked_signal = &(*v.borrow())
1658 .as_ref()
1659 .and_then(|state| state.signal_num)
1660 .map(BlockedSignal::new);
1661
1662 *v.borrow_mut() = None;
1663 });
1664 }
1665 }
1666
1667 /// Wrapper for kvm_cpuid2 which has a zero length array at the end.
1668 /// Hides the zero length array behind a bounds check.
1669 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1670 pub type CpuId = FlexibleArrayWrapper<kvm_cpuid2, kvm_cpuid_entry2>;
1671
1672 // Represents a temporarily blocked signal. It will unblock the signal when dropped.
1673 struct BlockedSignal {
1674 signal_num: c_int,
1675 }
1676
1677 impl BlockedSignal {
1678 // Returns a `BlockedSignal` if the specified signal can be blocked, otherwise None.
new(signal_num: c_int) -> Option<BlockedSignal>1679 fn new(signal_num: c_int) -> Option<BlockedSignal> {
1680 if block_signal(signal_num).is_ok() {
1681 Some(BlockedSignal { signal_num })
1682 } else {
1683 None
1684 }
1685 }
1686 }
1687
1688 impl Drop for BlockedSignal {
drop(&mut self)1689 fn drop(&mut self) {
1690 let _ = unblock_signal(self.signal_num).expect("failed to restore signal mask");
1691 }
1692 }
1693
1694 #[cfg(test)]
1695 mod tests {
1696 use super::*;
1697
1698 #[test]
dirty_log_size()1699 fn dirty_log_size() {
1700 let page_size = pagesize();
1701 assert_eq!(dirty_log_bitmap_size(0), 0);
1702 assert_eq!(dirty_log_bitmap_size(page_size), 1);
1703 assert_eq!(dirty_log_bitmap_size(page_size * 8), 1);
1704 assert_eq!(dirty_log_bitmap_size(page_size * 8 + 1), 2);
1705 assert_eq!(dirty_log_bitmap_size(page_size * 100), 13);
1706 }
1707
1708 #[test]
new()1709 fn new() {
1710 Kvm::new().unwrap();
1711 }
1712
1713 #[test]
create_vm()1714 fn create_vm() {
1715 let kvm = Kvm::new().unwrap();
1716 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x1000)]).unwrap();
1717 Vm::new(&kvm, gm).unwrap();
1718 }
1719
1720 #[test]
check_extension()1721 fn check_extension() {
1722 let kvm = Kvm::new().unwrap();
1723 assert!(kvm.check_extension(Cap::UserMemory));
1724 // I assume nobody is testing this on s390
1725 assert!(!kvm.check_extension(Cap::S390UserSigp));
1726 }
1727
1728 #[test]
check_vm_extension()1729 fn check_vm_extension() {
1730 let kvm = Kvm::new().unwrap();
1731 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x1000)]).unwrap();
1732 let vm = Vm::new(&kvm, gm).unwrap();
1733 assert!(vm.check_extension(Cap::UserMemory));
1734 // I assume nobody is testing this on s390
1735 assert!(!vm.check_extension(Cap::S390UserSigp));
1736 }
1737
1738 #[test]
1739 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_supported_cpuid()1740 fn get_supported_cpuid() {
1741 let kvm = Kvm::new().unwrap();
1742 let mut cpuid = kvm.get_supported_cpuid().unwrap();
1743 let cpuid_entries = cpuid.mut_entries_slice();
1744 assert!(cpuid_entries.len() > 0);
1745 }
1746
1747 #[test]
1748 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_emulated_cpuid()1749 fn get_emulated_cpuid() {
1750 let kvm = Kvm::new().unwrap();
1751 kvm.get_emulated_cpuid().unwrap();
1752 }
1753
1754 #[test]
1755 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_msr_index_list()1756 fn get_msr_index_list() {
1757 let kvm = Kvm::new().unwrap();
1758 let msr_list = kvm.get_msr_index_list().unwrap();
1759 assert!(msr_list.len() >= 2);
1760 }
1761
1762 #[test]
add_memory()1763 fn add_memory() {
1764 let kvm = Kvm::new().unwrap();
1765 let gm = GuestMemory::new(&vec![
1766 (GuestAddress(0), 0x1000),
1767 (GuestAddress(0x5000), 0x5000),
1768 ])
1769 .unwrap();
1770 let mut vm = Vm::new(&kvm, gm).unwrap();
1771 let mem_size = 0x1000;
1772 let mem = MemoryMappingBuilder::new(mem_size).build().unwrap();
1773 vm.add_memory_region(GuestAddress(0x1000), Box::new(mem), false, false)
1774 .unwrap();
1775 let mem = MemoryMappingBuilder::new(mem_size).build().unwrap();
1776 vm.add_memory_region(GuestAddress(0x10000), Box::new(mem), false, false)
1777 .unwrap();
1778 }
1779
1780 #[test]
add_memory_ro()1781 fn add_memory_ro() {
1782 let kvm = Kvm::new().unwrap();
1783 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x1000)]).unwrap();
1784 let mut vm = Vm::new(&kvm, gm).unwrap();
1785 let mem_size = 0x1000;
1786 let mem = MemoryMappingBuilder::new(mem_size).build().unwrap();
1787 vm.add_memory_region(GuestAddress(0x1000), Box::new(mem), true, false)
1788 .unwrap();
1789 }
1790
1791 #[test]
remove_memory_region()1792 fn remove_memory_region() {
1793 let kvm = Kvm::new().unwrap();
1794 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x1000)]).unwrap();
1795 let mut vm = Vm::new(&kvm, gm).unwrap();
1796 let mem_size = 0x1000;
1797 let mem = MemoryMappingBuilder::new(mem_size).build().unwrap();
1798 let mem_ptr = mem.as_ptr();
1799 let slot = vm
1800 .add_memory_region(GuestAddress(0x1000), Box::new(mem), false, false)
1801 .unwrap();
1802 let removed_mem = vm.remove_memory_region(slot).unwrap();
1803 assert_eq!(removed_mem.size(), mem_size);
1804 assert_eq!(removed_mem.as_ptr(), mem_ptr);
1805 }
1806
1807 #[test]
remove_invalid_memory()1808 fn remove_invalid_memory() {
1809 let kvm = Kvm::new().unwrap();
1810 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x1000)]).unwrap();
1811 let mut vm = Vm::new(&kvm, gm).unwrap();
1812 assert!(vm.remove_memory_region(0).is_err());
1813 }
1814
1815 #[test]
overlap_memory()1816 fn overlap_memory() {
1817 let kvm = Kvm::new().unwrap();
1818 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
1819 let mut vm = Vm::new(&kvm, gm).unwrap();
1820 let mem_size = 0x2000;
1821 let mem = MemoryMappingBuilder::new(mem_size).build().unwrap();
1822 assert!(vm
1823 .add_memory_region(GuestAddress(0x2000), Box::new(mem), false, false)
1824 .is_err());
1825 }
1826
1827 #[test]
get_memory()1828 fn get_memory() {
1829 let kvm = Kvm::new().unwrap();
1830 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x1000)]).unwrap();
1831 let vm = Vm::new(&kvm, gm).unwrap();
1832 let obj_addr = GuestAddress(0xf0);
1833 vm.get_memory().write_obj_at_addr(67u8, obj_addr).unwrap();
1834 let read_val: u8 = vm.get_memory().read_obj_from_addr(obj_addr).unwrap();
1835 assert_eq!(read_val, 67u8);
1836 }
1837
1838 #[test]
1839 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
clock_handling()1840 fn clock_handling() {
1841 let kvm = Kvm::new().unwrap();
1842 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
1843 let vm = Vm::new(&kvm, gm).unwrap();
1844 let mut clock_data = vm.get_clock().unwrap();
1845 clock_data.clock += 1000;
1846 vm.set_clock(&clock_data).unwrap();
1847 }
1848
1849 #[test]
1850 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pic_handling()1851 fn pic_handling() {
1852 let kvm = Kvm::new().unwrap();
1853 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
1854 let vm = Vm::new(&kvm, gm).unwrap();
1855 vm.create_irq_chip().unwrap();
1856 let pic_state = vm.get_pic_state(PicId::Secondary).unwrap();
1857 vm.set_pic_state(PicId::Secondary, &pic_state).unwrap();
1858 }
1859
1860 #[test]
1861 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
ioapic_handling()1862 fn ioapic_handling() {
1863 let kvm = Kvm::new().unwrap();
1864 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
1865 let vm = Vm::new(&kvm, gm).unwrap();
1866 vm.create_irq_chip().unwrap();
1867 let ioapic_state = vm.get_ioapic_state().unwrap();
1868 vm.set_ioapic_state(&ioapic_state).unwrap();
1869 }
1870
1871 #[test]
1872 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pit_handling()1873 fn pit_handling() {
1874 let kvm = Kvm::new().unwrap();
1875 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
1876 let vm = Vm::new(&kvm, gm).unwrap();
1877 vm.create_irq_chip().unwrap();
1878 vm.create_pit().unwrap();
1879 let pit_state = vm.get_pit_state().unwrap();
1880 vm.set_pit_state(&pit_state).unwrap();
1881 }
1882
1883 #[test]
register_ioevent()1884 fn register_ioevent() {
1885 let kvm = Kvm::new().unwrap();
1886 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
1887 let vm = Vm::new(&kvm, gm).unwrap();
1888 let evtfd = Event::new().unwrap();
1889 vm.register_ioevent(&evtfd, IoeventAddress::Pio(0xf4), Datamatch::AnyLength)
1890 .unwrap();
1891 vm.register_ioevent(&evtfd, IoeventAddress::Mmio(0x1000), Datamatch::AnyLength)
1892 .unwrap();
1893 vm.register_ioevent(
1894 &evtfd,
1895 IoeventAddress::Pio(0xc1),
1896 Datamatch::U8(Some(0x7fu8)),
1897 )
1898 .unwrap();
1899 vm.register_ioevent(
1900 &evtfd,
1901 IoeventAddress::Pio(0xc2),
1902 Datamatch::U16(Some(0x1337u16)),
1903 )
1904 .unwrap();
1905 vm.register_ioevent(
1906 &evtfd,
1907 IoeventAddress::Pio(0xc4),
1908 Datamatch::U32(Some(0xdeadbeefu32)),
1909 )
1910 .unwrap();
1911 vm.register_ioevent(
1912 &evtfd,
1913 IoeventAddress::Pio(0xc8),
1914 Datamatch::U64(Some(0xdeadbeefdeadbeefu64)),
1915 )
1916 .unwrap();
1917 }
1918
1919 #[test]
unregister_ioevent()1920 fn unregister_ioevent() {
1921 let kvm = Kvm::new().unwrap();
1922 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
1923 let vm = Vm::new(&kvm, gm).unwrap();
1924 let evtfd = Event::new().unwrap();
1925 vm.register_ioevent(&evtfd, IoeventAddress::Pio(0xf4), Datamatch::AnyLength)
1926 .unwrap();
1927 vm.register_ioevent(&evtfd, IoeventAddress::Mmio(0x1000), Datamatch::AnyLength)
1928 .unwrap();
1929 vm.register_ioevent(
1930 &evtfd,
1931 IoeventAddress::Mmio(0x1004),
1932 Datamatch::U8(Some(0x7fu8)),
1933 )
1934 .unwrap();
1935 vm.unregister_ioevent(&evtfd, IoeventAddress::Pio(0xf4), Datamatch::AnyLength)
1936 .unwrap();
1937 vm.unregister_ioevent(&evtfd, IoeventAddress::Mmio(0x1000), Datamatch::AnyLength)
1938 .unwrap();
1939 vm.unregister_ioevent(
1940 &evtfd,
1941 IoeventAddress::Mmio(0x1004),
1942 Datamatch::U8(Some(0x7fu8)),
1943 )
1944 .unwrap();
1945 }
1946
1947 #[test]
irqfd_resample()1948 fn irqfd_resample() {
1949 let kvm = Kvm::new().unwrap();
1950 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
1951 let vm = Vm::new(&kvm, gm).unwrap();
1952 let evtfd1 = Event::new().unwrap();
1953 let evtfd2 = Event::new().unwrap();
1954 vm.create_irq_chip().unwrap();
1955 vm.register_irqfd_resample(&evtfd1, &evtfd2, 4).unwrap();
1956 vm.unregister_irqfd(&evtfd1, 4).unwrap();
1957 // Ensures the ioctl is actually reading the resamplefd.
1958 vm.register_irqfd_resample(&evtfd1, unsafe { &Event::from_raw_descriptor(-1) }, 4)
1959 .unwrap_err();
1960 }
1961
1962 #[test]
1963 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_gsi_routing()1964 fn set_gsi_routing() {
1965 let kvm = Kvm::new().unwrap();
1966 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
1967 let vm = Vm::new(&kvm, gm).unwrap();
1968 vm.create_irq_chip().unwrap();
1969 vm.set_gsi_routing(&[]).unwrap();
1970 vm.set_gsi_routing(&[IrqRoute {
1971 gsi: 1,
1972 source: IrqSource::Irqchip {
1973 chip: KVM_IRQCHIP_IOAPIC,
1974 pin: 3,
1975 },
1976 }])
1977 .unwrap();
1978 vm.set_gsi_routing(&[IrqRoute {
1979 gsi: 1,
1980 source: IrqSource::Msi {
1981 address: 0xf000000,
1982 data: 0xa0,
1983 },
1984 }])
1985 .unwrap();
1986 vm.set_gsi_routing(&[
1987 IrqRoute {
1988 gsi: 1,
1989 source: IrqSource::Irqchip {
1990 chip: KVM_IRQCHIP_IOAPIC,
1991 pin: 3,
1992 },
1993 },
1994 IrqRoute {
1995 gsi: 2,
1996 source: IrqSource::Msi {
1997 address: 0xf000000,
1998 data: 0xa0,
1999 },
2000 },
2001 ])
2002 .unwrap();
2003 }
2004
2005 #[test]
create_vcpu()2006 fn create_vcpu() {
2007 let kvm = Kvm::new().unwrap();
2008 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
2009 let vm = Vm::new(&kvm, gm).unwrap();
2010 Vcpu::new(0, &kvm, &vm).unwrap();
2011 }
2012
2013 #[test]
2014 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
debugregs()2015 fn debugregs() {
2016 let kvm = Kvm::new().unwrap();
2017 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
2018 let vm = Vm::new(&kvm, gm).unwrap();
2019 let vcpu = Vcpu::new(0, &kvm, &vm).unwrap();
2020 let mut dregs = vcpu.get_debugregs().unwrap();
2021 dregs.dr7 = 13;
2022 vcpu.set_debugregs(&dregs).unwrap();
2023 let dregs2 = vcpu.get_debugregs().unwrap();
2024 assert_eq!(dregs.dr7, dregs2.dr7);
2025 }
2026
2027 #[test]
2028 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
xcrs()2029 fn xcrs() {
2030 let kvm = Kvm::new().unwrap();
2031 if !kvm.check_extension(Cap::Xcrs) {
2032 return;
2033 }
2034
2035 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
2036 let vm = Vm::new(&kvm, gm).unwrap();
2037 let vcpu = Vcpu::new(0, &kvm, &vm).unwrap();
2038 let mut xcrs = vcpu.get_xcrs().unwrap();
2039 xcrs.xcrs[0].value = 1;
2040 vcpu.set_xcrs(&xcrs).unwrap();
2041 let xcrs2 = vcpu.get_xcrs().unwrap();
2042 assert_eq!(xcrs.xcrs[0].value, xcrs2.xcrs[0].value);
2043 }
2044
2045 #[test]
2046 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_msrs()2047 fn get_msrs() {
2048 let kvm = Kvm::new().unwrap();
2049 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
2050 let vm = Vm::new(&kvm, gm).unwrap();
2051 let vcpu = Vcpu::new(0, &kvm, &vm).unwrap();
2052 let mut msrs = vec![
2053 // This one should succeed
2054 kvm_msr_entry {
2055 index: 0x0000011e,
2056 ..Default::default()
2057 },
2058 // This one will fail to fetch
2059 kvm_msr_entry {
2060 index: 0x000003f1,
2061 ..Default::default()
2062 },
2063 ];
2064 vcpu.get_msrs(&mut msrs).unwrap();
2065 assert_eq!(msrs.len(), 1);
2066 }
2067
2068 #[test]
2069 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_hyperv_cpuid()2070 fn get_hyperv_cpuid() {
2071 let kvm = Kvm::new().unwrap();
2072 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
2073 let vm = Vm::new(&kvm, gm).unwrap();
2074 let vcpu = Vcpu::new(0, &kvm, &vm).unwrap();
2075 let cpuid = vcpu.get_hyperv_cpuid();
2076 // Older kernels don't support so tolerate this kind of failure.
2077 match cpuid {
2078 Ok(_) => {}
2079 Err(e) => {
2080 assert_eq!(e.errno(), EINVAL);
2081 }
2082 }
2083 }
2084
2085 #[test]
2086 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
enable_feature()2087 fn enable_feature() {
2088 let kvm = Kvm::new().unwrap();
2089 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
2090 let vm = Vm::new(&kvm, gm).unwrap();
2091 vm.create_irq_chip().unwrap();
2092 let vcpu = Vcpu::new(0, &kvm, &vm).unwrap();
2093 let mut cap: kvm_enable_cap = Default::default();
2094 cap.cap = kvm_sys::KVM_CAP_HYPERV_SYNIC;
2095 unsafe { vcpu.kvm_enable_cap(&cap) }.unwrap();
2096 }
2097
2098 #[test]
2099 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mp_state()2100 fn mp_state() {
2101 let kvm = Kvm::new().unwrap();
2102 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
2103 let vm = Vm::new(&kvm, gm).unwrap();
2104 vm.create_irq_chip().unwrap();
2105 let vcpu = Vcpu::new(0, &kvm, &vm).unwrap();
2106 let state = vcpu.get_mp_state().unwrap();
2107 vcpu.set_mp_state(&state).unwrap();
2108 }
2109
2110 #[test]
set_signal_mask()2111 fn set_signal_mask() {
2112 let kvm = Kvm::new().unwrap();
2113 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
2114 let vm = Vm::new(&kvm, gm).unwrap();
2115 let vcpu = Vcpu::new(0, &kvm, &vm).unwrap();
2116 vcpu.set_signal_mask(&[base::SIGRTMIN() + 0]).unwrap();
2117 }
2118
2119 #[test]
vcpu_mmap_size()2120 fn vcpu_mmap_size() {
2121 let kvm = Kvm::new().unwrap();
2122 let mmap_size = kvm.get_vcpu_mmap_size().unwrap();
2123 let page_size = pagesize();
2124 assert!(mmap_size >= page_size);
2125 assert!(mmap_size % page_size == 0);
2126 }
2127
2128 #[test]
2129 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
set_identity_map_addr()2130 fn set_identity_map_addr() {
2131 let kvm = Kvm::new().unwrap();
2132 let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap();
2133 let vm = Vm::new(&kvm, gm).unwrap();
2134 vm.set_identity_map_addr(GuestAddress(0x20000)).unwrap();
2135 }
2136 }
2137