1 // Copyright 2020 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use serde::{Deserialize, Serialize};
6 
7 use base::{error, Result};
8 use bit_field::*;
9 use downcast_rs::impl_downcast;
10 
11 use vm_memory::GuestAddress;
12 
13 use crate::{Hypervisor, IrqRoute, IrqSource, IrqSourceChip, Vcpu, Vm};
14 
15 /// A trait for managing cpuids for an x86_64 hypervisor and for checking its capabilities.
16 pub trait HypervisorX86_64: Hypervisor {
17     /// Get the system supported CPUID values.
get_supported_cpuid(&self) -> Result<CpuId>18     fn get_supported_cpuid(&self) -> Result<CpuId>;
19 
20     /// Get the system emulated CPUID values.
get_emulated_cpuid(&self) -> Result<CpuId>21     fn get_emulated_cpuid(&self) -> Result<CpuId>;
22 
23     /// Gets the list of supported MSRs.
get_msr_index_list(&self) -> Result<Vec<u32>>24     fn get_msr_index_list(&self) -> Result<Vec<u32>>;
25 }
26 
27 /// A wrapper for using a VM on x86_64 and getting/setting its state.
28 pub trait VmX86_64: Vm {
29     /// Gets the `HypervisorX86_64` that created this VM.
get_hypervisor(&self) -> &dyn HypervisorX86_6430     fn get_hypervisor(&self) -> &dyn HypervisorX86_64;
31 
32     /// Create a Vcpu with the specified Vcpu ID.
create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>33     fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>;
34 
35     /// Sets the address of the three-page region in the VM's address space.
set_tss_addr(&self, addr: GuestAddress) -> Result<()>36     fn set_tss_addr(&self, addr: GuestAddress) -> Result<()>;
37 
38     /// Sets the address of a one-page region in the VM's address space.
set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>39     fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>;
40 }
41 
42 /// A wrapper around creating and using a VCPU on x86_64.
43 pub trait VcpuX86_64: Vcpu {
44     /// Sets or clears the flag that requests the VCPU to exit when it becomes possible to inject
45     /// interrupts into the guest.
set_interrupt_window_requested(&self, requested: bool)46     fn set_interrupt_window_requested(&self, requested: bool);
47 
48     /// Checks if we can inject an interrupt into the VCPU.
ready_for_interrupt(&self) -> bool49     fn ready_for_interrupt(&self) -> bool;
50 
51     /// Injects interrupt vector `irq` into the VCPU.
interrupt(&self, irq: u32) -> Result<()>52     fn interrupt(&self, irq: u32) -> Result<()>;
53 
54     /// Injects a non-maskable interrupt into the VCPU.
inject_nmi(&self) -> Result<()>55     fn inject_nmi(&self) -> Result<()>;
56 
57     /// Gets the VCPU general purpose registers.
get_regs(&self) -> Result<Regs>58     fn get_regs(&self) -> Result<Regs>;
59 
60     /// Sets the VCPU general purpose registers.
set_regs(&self, regs: &Regs) -> Result<()>61     fn set_regs(&self, regs: &Regs) -> Result<()>;
62 
63     /// Gets the VCPU special registers.
get_sregs(&self) -> Result<Sregs>64     fn get_sregs(&self) -> Result<Sregs>;
65 
66     /// Sets the VCPU special registers.
set_sregs(&self, sregs: &Sregs) -> Result<()>67     fn set_sregs(&self, sregs: &Sregs) -> Result<()>;
68 
69     /// Gets the VCPU FPU registers.
get_fpu(&self) -> Result<Fpu>70     fn get_fpu(&self) -> Result<Fpu>;
71 
72     /// Sets the VCPU FPU registers.
set_fpu(&self, fpu: &Fpu) -> Result<()>73     fn set_fpu(&self, fpu: &Fpu) -> Result<()>;
74 
75     /// Gets the VCPU debug registers.
get_debugregs(&self) -> Result<DebugRegs>76     fn get_debugregs(&self) -> Result<DebugRegs>;
77 
78     /// Sets the VCPU debug registers.
set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>79     fn set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>;
80 
81     /// Gets the VCPU extended control registers.
get_xcrs(&self) -> Result<Vec<Register>>82     fn get_xcrs(&self) -> Result<Vec<Register>>;
83 
84     /// Sets the VCPU extended control registers.
set_xcrs(&self, xcrs: &[Register]) -> Result<()>85     fn set_xcrs(&self, xcrs: &[Register]) -> Result<()>;
86 
87     /// Gets the model-specific registers.  `msrs` specifies the MSR indexes to be queried, and
88     /// on success contains their indexes and values.
get_msrs(&self, msrs: &mut Vec<Register>) -> Result<()>89     fn get_msrs(&self, msrs: &mut Vec<Register>) -> Result<()>;
90 
91     /// Sets the model-specific registers.
set_msrs(&self, msrs: &[Register]) -> Result<()>92     fn set_msrs(&self, msrs: &[Register]) -> Result<()>;
93 
94     /// Sets up the data returned by the CPUID instruction.
set_cpuid(&self, cpuid: &CpuId) -> Result<()>95     fn set_cpuid(&self, cpuid: &CpuId) -> Result<()>;
96 
97     /// Gets the system emulated hyper-v CPUID values.
get_hyperv_cpuid(&self) -> Result<CpuId>98     fn get_hyperv_cpuid(&self) -> Result<CpuId>;
99 
100     /// Sets up debug registers and configure vcpu for handling guest debug events.
set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>101     fn set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>;
102 }
103 
104 impl_downcast!(VcpuX86_64);
105 
106 /// A CpuId Entry contains supported feature information for the given processor.
107 /// This can be modified by the hypervisor to pass additional information to the guest kernel
108 /// about the hypervisor or vm. Information is returned in the eax, ebx, ecx and edx registers
109 /// by the cpu for a given function and index/subfunction (passed into the cpu via the eax and ecx
110 /// register respectively).
111 #[repr(C)]
112 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
113 pub struct CpuIdEntry {
114     pub function: u32,
115     pub index: u32,
116     // flags is needed for KVM.  We store it on CpuIdEntry to preserve the flags across
117     // get_supported_cpuids() -> kvm_cpuid2 -> CpuId -> kvm_cpuid2 -> set_cpuid().
118     pub flags: u32,
119     pub eax: u32,
120     pub ebx: u32,
121     pub ecx: u32,
122     pub edx: u32,
123 }
124 
125 /// A container for the list of cpu id entries for the hypervisor and underlying cpu.
126 pub struct CpuId {
127     pub cpu_id_entries: Vec<CpuIdEntry>,
128 }
129 
130 impl CpuId {
131     /// Constructs a new CpuId, with space allocated for `initial_capacity` CpuIdEntries.
new(initial_capacity: usize) -> Self132     pub fn new(initial_capacity: usize) -> Self {
133         CpuId {
134             cpu_id_entries: Vec::with_capacity(initial_capacity),
135         }
136     }
137 }
138 
139 #[bitfield]
140 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
141 pub enum DestinationMode {
142     Physical = 0,
143     Logical = 1,
144 }
145 
146 #[bitfield]
147 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
148 pub enum TriggerMode {
149     Edge = 0,
150     Level = 1,
151 }
152 
153 #[bitfield]
154 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
155 pub enum DeliveryMode {
156     Fixed = 0b000,
157     Lowest = 0b001,
158     SMI = 0b010,        // System management interrupt
159     RemoteRead = 0b011, // This is no longer supported by intel.
160     NMI = 0b100,        // Non maskable interrupt
161     Init = 0b101,
162     Startup = 0b110,
163     External = 0b111,
164 }
165 
166 // These MSI structures are for Intel's implementation of MSI.  The PCI spec defines most of MSI,
167 // but the Intel spec defines the format of messages for raising interrupts.  The PCI spec defines
168 // three u32s -- the address, address_high, and data -- but Intel only makes use of the address and
169 // data.  The Intel portion of the specification is in Volume 3 section 10.11.
170 #[bitfield]
171 #[derive(Clone, Copy, PartialEq, Eq)]
172 pub struct MsiAddressMessage {
173     pub reserved: BitField2,
174     #[bits = 1]
175     pub destination_mode: DestinationMode,
176     pub redirection_hint: BitField1,
177     pub reserved_2: BitField8,
178     pub destination_id: BitField8,
179     // According to Intel's implementation of MSI, these bits must always be 0xfee.
180     pub always_0xfee: BitField12,
181 }
182 
183 #[bitfield]
184 #[derive(Clone, Copy, PartialEq, Eq)]
185 pub struct MsiDataMessage {
186     pub vector: BitField8,
187     #[bits = 3]
188     pub delivery_mode: DeliveryMode,
189     pub reserved: BitField3,
190     #[bits = 1]
191     pub level: Level,
192     #[bits = 1]
193     pub trigger: TriggerMode,
194     pub reserved2: BitField16,
195 }
196 
197 #[bitfield]
198 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
199 pub enum DeliveryStatus {
200     Idle = 0,
201     Pending = 1,
202 }
203 
204 /// The level of a level-triggered interrupt: asserted or deasserted.
205 #[bitfield]
206 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
207 pub enum Level {
208     Deassert = 0,
209     Assert = 1,
210 }
211 
212 /// Represents a IOAPIC redirection table entry.
213 #[bitfield]
214 #[derive(Clone, Copy, Default, PartialEq, Eq)]
215 pub struct IoapicRedirectionTableEntry {
216     vector: BitField8,
217     #[bits = 3]
218     delivery_mode: DeliveryMode,
219     #[bits = 1]
220     dest_mode: DestinationMode,
221     #[bits = 1]
222     delivery_status: DeliveryStatus,
223     polarity: BitField1,
224     remote_irr: bool,
225     #[bits = 1]
226     trigger_mode: TriggerMode,
227     interrupt_mask: bool, // true iff interrupts are masked.
228     reserved: BitField39,
229     dest_id: BitField8,
230 }
231 
232 /// Number of pins on the IOAPIC.
233 pub const NUM_IOAPIC_PINS: usize = 24;
234 
235 /// Represents the state of the IOAPIC.
236 #[repr(C)]
237 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
238 pub struct IoapicState {
239     /// base_address is the memory base address for this IOAPIC. It cannot be changed.
240     pub base_address: u64,
241     /// ioregsel register. Used for selecting which entry of the redirect table to read/write.
242     pub ioregsel: u8,
243     /// ioapicid register. Bits 24 - 27 contain the APIC ID for this device.
244     pub ioapicid: u32,
245     /// current_interrupt_level_bitmap represents a bitmap of the state of all of the irq lines
246     pub current_interrupt_level_bitmap: u32,
247     /// redirect_table contains the irq settings for each irq line
248     pub redirect_table: [IoapicRedirectionTableEntry; 24],
249 }
250 
251 #[repr(C)]
252 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
253 pub enum PicSelect {
254     Primary = 0,
255     Secondary = 1,
256 }
257 
258 #[repr(C)]
259 #[derive(enumn::N, Debug, Clone, Copy, PartialEq, Eq)]
260 pub enum PicInitState {
261     Icw1 = 0,
262     Icw2 = 1,
263     Icw3 = 2,
264     Icw4 = 3,
265 }
266 
267 /// Convenience implementation for converting from a u8
268 impl From<u8> for PicInitState {
from(item: u8) -> Self269     fn from(item: u8) -> Self {
270         PicInitState::n(item).unwrap_or_else(|| {
271             error!("Invalid PicInitState {}, setting to 0", item);
272             PicInitState::Icw1
273         })
274     }
275 }
276 
277 impl Default for PicInitState {
default() -> Self278     fn default() -> Self {
279         PicInitState::Icw1
280     }
281 }
282 
283 /// Represents the state of the PIC.
284 #[repr(C)]
285 #[derive(Clone, Copy, Default, Debug, PartialEq, Eq)]
286 pub struct PicState {
287     /// Edge detection.
288     pub last_irr: u8,
289     /// Interrupt Request Register.
290     pub irr: u8,
291     /// Interrupt Mask Register.
292     pub imr: u8,
293     /// Interrupt Service Register.
294     pub isr: u8,
295     /// Highest priority, for priority rotation.
296     pub priority_add: u8,
297     pub irq_base: u8,
298     pub read_reg_select: bool,
299     pub poll: bool,
300     pub special_mask: bool,
301     pub init_state: PicInitState,
302     pub auto_eoi: bool,
303     pub rotate_on_auto_eoi: bool,
304     pub special_fully_nested_mode: bool,
305     /// PIC takes either 3 or 4 bytes of initialization command word during
306     /// initialization. use_4_byte_icw is true if 4 bytes of ICW are needed.
307     pub use_4_byte_icw: bool,
308     /// "Edge/Level Control Registers", for edge trigger selection.
309     /// When a particular bit is set, the corresponding IRQ is in level-triggered mode. Otherwise it
310     /// is in edge-triggered mode.
311     pub elcr: u8,
312     pub elcr_mask: u8,
313 }
314 
315 /// The LapicState represents the state of an x86 CPU's Local APIC.
316 /// The Local APIC consists of 64 128-bit registers, but only the first 32-bits of each register
317 /// can be used, so this structure only stores the first 32-bits of each register.
318 #[repr(C)]
319 #[derive(Clone, Copy)]
320 pub struct LapicState {
321     pub regs: [LapicRegister; 64],
322 }
323 
324 pub type LapicRegister = u32;
325 
326 // rust arrays longer than 32 need custom implementations of Debug
327 impl std::fmt::Debug for LapicState {
fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result328     fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
329         self.regs[..].fmt(formatter)
330     }
331 }
332 
333 // rust arrays longer than 32 need custom implementations of PartialEq
334 impl PartialEq for LapicState {
eq(&self, other: &LapicState) -> bool335     fn eq(&self, other: &LapicState) -> bool {
336         self.regs[..] == other.regs[..]
337     }
338 }
339 
340 // Lapic equality is reflexive, so we impl Eq
341 impl Eq for LapicState {}
342 
343 /// The PitState represents the state of the PIT (aka the Programmable Interval Timer).
344 /// The state is simply the state of it's three channels.
345 #[repr(C)]
346 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
347 pub struct PitState {
348     pub channels: [PitChannelState; 3],
349     /// Hypervisor-specific flags for setting the pit state.
350     pub flags: u32,
351 }
352 
353 /// The PitRWMode enum represents the access mode of a PIT channel.
354 /// Reads and writes to the Pit happen over Port-mapped I/O, which happens one byte at a time,
355 /// but the count values and latch values are two bytes. So the access mode controls which of the
356 /// two bytes will be read when.
357 #[repr(C)]
358 #[derive(enumn::N, Clone, Copy, Debug, PartialEq, Eq)]
359 pub enum PitRWMode {
360     /// None mode means that no access mode has been set.
361     None = 0,
362     /// Least mode means all reads/writes will read/write the least significant byte.
363     Least = 1,
364     /// Most mode means all reads/writes will read/write the most significant byte.
365     Most = 2,
366     /// Both mode means first the least significant byte will be read/written, then the
367     /// next read/write will read/write the most significant byte.
368     Both = 3,
369 }
370 
371 /// Convenience implementation for converting from a u8
372 impl From<u8> for PitRWMode {
from(item: u8) -> Self373     fn from(item: u8) -> Self {
374         PitRWMode::n(item).unwrap_or_else(|| {
375             error!("Invalid PitRWMode value {}, setting to 0", item);
376             PitRWMode::None
377         })
378     }
379 }
380 
381 /// The PitRWState enum represents the state of reading to or writing from a channel.
382 /// This is related to the PitRWMode, it mainly gives more detail about the state of the channel
383 /// with respect to PitRWMode::Both.
384 #[repr(C)]
385 #[derive(enumn::N, Clone, Copy, Debug, PartialEq, Eq)]
386 pub enum PitRWState {
387     /// None mode means that no access mode has been set.
388     None = 0,
389     /// LSB means that the channel is in PitRWMode::Least access mode.
390     LSB = 1,
391     /// MSB means that the channel is in PitRWMode::Most access mode.
392     MSB = 2,
393     /// Word0 means that the channel is in PitRWMode::Both mode, and the least sginificant byte
394     /// has not been read/written yet.
395     Word0 = 3,
396     /// Word1 means that the channel is in PitRWMode::Both mode and the least significant byte
397     /// has already been read/written, and the next byte to be read/written will be the most
398     /// significant byte.
399     Word1 = 4,
400 }
401 
402 /// Convenience implementation for converting from a u8
403 impl From<u8> for PitRWState {
from(item: u8) -> Self404     fn from(item: u8) -> Self {
405         PitRWState::n(item).unwrap_or_else(|| {
406             error!("Invalid PitRWState value {}, setting to 0", item);
407             PitRWState::None
408         })
409     }
410 }
411 
412 /// The PitChannelState represents the state of one of the PIT's three counters.
413 #[repr(C)]
414 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
415 pub struct PitChannelState {
416     /// The starting value for the counter.
417     pub count: u32,
418     /// Stores the channel count from the last time the count was latched.
419     pub latched_count: u16,
420     /// Indicates the PitRWState state of reading the latch value.
421     pub count_latched: PitRWState,
422     /// Indicates whether ReadBack status has been latched.
423     pub status_latched: bool,
424     /// Stores the channel status from the last time the status was latched. The status contains
425     /// information about the access mode of this channel, but changing those bits in the status
426     /// will not change the behavior of the pit.
427     pub status: u8,
428     /// Indicates the PitRWState state of reading the counter.
429     pub read_state: PitRWState,
430     /// Indicates the PitRWState state of writing the counter.
431     pub write_state: PitRWState,
432     /// Stores the value with which the counter was initialized. Counters are 16-
433     /// bit values with an effective range of 1-65536 (65536 represented by 0).
434     pub reload_value: u16,
435     /// The command access mode of this channel.
436     pub rw_mode: PitRWMode,
437     /// The operation mode of this channel.
438     pub mode: u8,
439     /// Whether or not we are in bcd mode. Not supported by KVM or crosvm's PIT implementation.
440     pub bcd: bool,
441     /// Value of the gate input pin. This only applies to channel 2.
442     pub gate: bool,
443     /// Nanosecond timestamp of when the count value was loaded.
444     pub count_load_time: u64,
445 }
446 
447 // Convenience constructors for IrqRoutes
448 impl IrqRoute {
ioapic_irq_route(irq_num: u32) -> IrqRoute449     pub fn ioapic_irq_route(irq_num: u32) -> IrqRoute {
450         IrqRoute {
451             gsi: irq_num,
452             source: IrqSource::Irqchip {
453                 chip: IrqSourceChip::Ioapic,
454                 pin: irq_num,
455             },
456         }
457     }
458 
pic_irq_route(id: IrqSourceChip, irq_num: u32) -> IrqRoute459     pub fn pic_irq_route(id: IrqSourceChip, irq_num: u32) -> IrqRoute {
460         IrqRoute {
461             gsi: irq_num,
462             source: IrqSource::Irqchip {
463                 chip: id,
464                 pin: irq_num % 8,
465             },
466         }
467     }
468 }
469 
470 /// State of a VCPU's general purpose registers.
471 #[repr(C)]
472 #[derive(Debug, Default, Copy, Clone)]
473 pub struct Regs {
474     pub rax: u64,
475     pub rbx: u64,
476     pub rcx: u64,
477     pub rdx: u64,
478     pub rsi: u64,
479     pub rdi: u64,
480     pub rsp: u64,
481     pub rbp: u64,
482     pub r8: u64,
483     pub r9: u64,
484     pub r10: u64,
485     pub r11: u64,
486     pub r12: u64,
487     pub r13: u64,
488     pub r14: u64,
489     pub r15: u64,
490     pub rip: u64,
491     pub rflags: u64,
492 }
493 
494 /// State of a memory segment.
495 #[repr(C)]
496 #[derive(Debug, Default, Copy, Clone)]
497 pub struct Segment {
498     pub base: u64,
499     pub limit: u32,
500     pub selector: u16,
501     pub type_: u8,
502     pub present: u8,
503     pub dpl: u8,
504     pub db: u8,
505     pub s: u8,
506     pub l: u8,
507     pub g: u8,
508     pub avl: u8,
509 }
510 
511 /// State of a global descriptor table or interrupt descriptor table.
512 #[repr(C)]
513 #[derive(Debug, Default, Copy, Clone)]
514 pub struct DescriptorTable {
515     pub base: u64,
516     pub limit: u16,
517 }
518 
519 /// State of a VCPU's special registers.
520 #[repr(C)]
521 #[derive(Debug, Default, Copy, Clone)]
522 pub struct Sregs {
523     pub cs: Segment,
524     pub ds: Segment,
525     pub es: Segment,
526     pub fs: Segment,
527     pub gs: Segment,
528     pub ss: Segment,
529     pub tr: Segment,
530     pub ldt: Segment,
531     pub gdt: DescriptorTable,
532     pub idt: DescriptorTable,
533     pub cr0: u64,
534     pub cr2: u64,
535     pub cr3: u64,
536     pub cr4: u64,
537     pub cr8: u64,
538     pub efer: u64,
539     pub apic_base: u64,
540 
541     /// A bitmap of pending external interrupts.  At most one bit may be set.  This interrupt has
542     /// been acknowledged by the APIC but not yet injected into the cpu core.
543     pub interrupt_bitmap: [u64; 4usize],
544 }
545 
546 /// State of a VCPU's floating point unit.
547 #[repr(C)]
548 #[derive(Debug, Default, Copy, Clone)]
549 pub struct Fpu {
550     pub fpr: [[u8; 16usize]; 8usize],
551     pub fcw: u16,
552     pub fsw: u16,
553     pub ftwx: u8,
554     pub last_opcode: u16,
555     pub last_ip: u64,
556     pub last_dp: u64,
557     pub xmm: [[u8; 16usize]; 16usize],
558     pub mxcsr: u32,
559 }
560 
561 /// State of a VCPU's debug registers.
562 #[repr(C)]
563 #[derive(Debug, Default, Copy, Clone)]
564 pub struct DebugRegs {
565     pub db: [u64; 4usize],
566     pub dr6: u64,
567     pub dr7: u64,
568 }
569 
570 /// State of one VCPU register.  Currently used for MSRs and XCRs.
571 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]
572 pub struct Register {
573     pub id: u32,
574     pub value: u64,
575 }
576