1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::fmt::{self, Display};
6 use std::{mem, result};
7 
8 use base::{self, warn};
9 use hypervisor::{Fpu, Register, Regs, Sregs, VcpuX86_64};
10 use vm_memory::{GuestAddress, GuestMemory};
11 
12 use crate::gdt;
13 
14 #[derive(Debug)]
15 pub enum Error {
16     /// Setting up msrs failed.
17     MsrIoctlFailed(base::Error),
18     /// Failed to configure the FPU.
19     FpuIoctlFailed(base::Error),
20     /// Failed to get sregs for this cpu.
21     GetSRegsIoctlFailed(base::Error),
22     /// Failed to set base registers for this cpu.
23     SettingRegistersIoctl(base::Error),
24     /// Failed to set sregs for this cpu.
25     SetSRegsIoctlFailed(base::Error),
26     /// Writing the GDT to RAM failed.
27     WriteGDTFailure,
28     /// Writing the IDT to RAM failed.
29     WriteIDTFailure,
30     /// Writing PML4 to RAM failed.
31     WritePML4Address,
32     /// Writing PDPTE to RAM failed.
33     WritePDPTEAddress,
34     /// Writing PDE to RAM failed.
35     WritePDEAddress,
36 }
37 pub type Result<T> = result::Result<T, Error>;
38 
39 impl std::error::Error for Error {}
40 
41 impl Display for Error {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result42     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
43         use self::Error::*;
44 
45         match self {
46             MsrIoctlFailed(e) => write!(f, "setting up msrs failed: {}", e),
47             FpuIoctlFailed(e) => write!(f, "failed to configure the FPU: {}", e),
48             GetSRegsIoctlFailed(e) => write!(f, "failed to get sregs for this cpu: {}", e),
49             SettingRegistersIoctl(e) => {
50                 write!(f, "failed to set base registers for this cpu: {}", e)
51             }
52             SetSRegsIoctlFailed(e) => write!(f, "failed to set sregs for this cpu: {}", e),
53             WriteGDTFailure => write!(f, "writing the GDT to RAM failed"),
54             WriteIDTFailure => write!(f, "writing the IDT to RAM failed"),
55             WritePML4Address => write!(f, "writing PML4 to RAM failed"),
56             WritePDPTEAddress => write!(f, "writing PDPTE to RAM failed"),
57             WritePDEAddress => write!(f, "writing PDE to RAM failed"),
58         }
59     }
60 }
61 
62 const MTRR_MEMTYPE_UC: u8 = 0x0;
63 const MTRR_MEMTYPE_WB: u8 = 0x6;
64 const MTRR_VAR_VALID: u64 = 0x800;
65 const MTRR_ENABLE: u64 = 0x800;
66 const MTRR_PHYS_BASE_MSR: u32 = 0x200;
67 const MTRR_PHYS_MASK_MSR: u32 = 0x201;
68 const VAR_MTRR_NUM_MASK: u64 = 0xFF;
69 
70 // Returns the value of the highest bit in a 64-bit value. Equivalent to
71 // 1 << HighBitSet(x)
get_power_of_two(data: u64) -> u6472 fn get_power_of_two(data: u64) -> u64 {
73     1 << (64 - data.leading_zeros() - 1)
74 }
75 
76 // Returns the max length which suitable for mtrr setting based on the
77 // specified (base, len)
get_max_len(base: u64, len: u64) -> u6478 fn get_max_len(base: u64, len: u64) -> u64 {
79     let mut ret = get_power_of_two(len);
80 
81     while base % ret != 0 {
82         ret >>= 1;
83     }
84 
85     ret
86 }
87 
88 // For the specified (Base, Len), returns (base, len) pair which could be
89 // set into mtrr register. mtrr requires: the base-address alignment value can't be
90 // less than its length
get_mtrr_pairs(base: u64, len: u64) -> Vec<(u64, u64)>91 fn get_mtrr_pairs(base: u64, len: u64) -> Vec<(u64, u64)> {
92     let mut vecs = Vec::new();
93 
94     let mut remains = len;
95     let mut new = base;
96     while remains != 0 {
97         let max = get_max_len(new, remains);
98         vecs.push((new, max));
99         remains -= max;
100         new += max;
101     }
102 
103     vecs
104 }
105 
append_mtrr_entries(vpu: &dyn VcpuX86_64, pci_start: u64, entries: &mut Vec<Register>)106 fn append_mtrr_entries(vpu: &dyn VcpuX86_64, pci_start: u64, entries: &mut Vec<Register>) {
107     // Get VAR MTRR num from MSR_MTRRcap
108     let mut msrs = vec![Register {
109         id: crate::msr_index::MSR_MTRRcap,
110         ..Default::default()
111     }];
112     if vpu.get_msrs(&mut msrs).is_err() {
113         warn!("get msrs fail, guest with pass through device may be very slow");
114         return;
115     }
116     let var_num = msrs[0].value & VAR_MTRR_NUM_MASK;
117 
118     // Set pci_start .. 4G as UC
119     // all others are set to default WB
120     let pci_len = (1 << 32) - pci_start;
121     let vecs = get_mtrr_pairs(pci_start, pci_len);
122     if vecs.len() as u64 > var_num {
123         warn!(
124             "mtrr fail for pci mmio, please check pci_start addr,
125               guest with pass through device may be very slow"
126         );
127         return;
128     }
129 
130     let phys_mask: u64 = (1 << crate::cpuid::phy_max_address_bits()) - 1;
131     for (idx, (base, len)) in vecs.iter().enumerate() {
132         let reg_idx = idx as u32 * 2;
133         entries.push(Register {
134             id: MTRR_PHYS_BASE_MSR + reg_idx,
135             value: base | MTRR_MEMTYPE_UC as u64,
136         });
137         let mask: u64 = len.wrapping_neg() & phys_mask | MTRR_VAR_VALID;
138         entries.push(Register {
139             id: MTRR_PHYS_MASK_MSR + reg_idx,
140             value: mask,
141         });
142     }
143     // Disable fixed MTRRs and enable variable MTRRs, set default type as WB
144     entries.push(Register {
145         id: crate::msr_index::MSR_MTRRdefType,
146         value: MTRR_ENABLE | MTRR_MEMTYPE_WB as u64,
147     });
148 }
149 
create_msr_entries(vcpu: &dyn VcpuX86_64, pci_start: u64) -> Vec<Register>150 fn create_msr_entries(vcpu: &dyn VcpuX86_64, pci_start: u64) -> Vec<Register> {
151     let mut entries = vec![
152         Register {
153             id: crate::msr_index::MSR_IA32_SYSENTER_CS,
154             value: 0x0,
155         },
156         Register {
157             id: crate::msr_index::MSR_IA32_SYSENTER_ESP,
158             value: 0x0,
159         },
160         Register {
161             id: crate::msr_index::MSR_IA32_SYSENTER_EIP,
162             value: 0x0,
163         },
164         // x86_64 specific msrs, we only run on x86_64 not x86
165         Register {
166             id: crate::msr_index::MSR_STAR,
167             value: 0x0,
168         },
169         Register {
170             id: crate::msr_index::MSR_CSTAR,
171             value: 0x0,
172         },
173         Register {
174             id: crate::msr_index::MSR_KERNEL_GS_BASE,
175             value: 0x0,
176         },
177         Register {
178             id: crate::msr_index::MSR_SYSCALL_MASK,
179             value: 0x0,
180         },
181         Register {
182             id: crate::msr_index::MSR_LSTAR,
183             value: 0x0,
184         },
185         // end of x86_64 specific code
186         Register {
187             id: crate::msr_index::MSR_IA32_TSC,
188             value: 0x0,
189         },
190         Register {
191             id: crate::msr_index::MSR_IA32_MISC_ENABLE,
192             value: crate::msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64,
193         },
194     ];
195     append_mtrr_entries(vcpu, pci_start, &mut entries);
196     entries
197 }
198 
199 /// Configure Model specific registers for x86
200 ///
201 /// # Arguments
202 ///
203 /// * `vcpu` - Structure for the vcpu that holds the vcpu fd.
setup_msrs(vcpu: &dyn VcpuX86_64, pci_start: u64) -> Result<()>204 pub fn setup_msrs(vcpu: &dyn VcpuX86_64, pci_start: u64) -> Result<()> {
205     let msrs = create_msr_entries(vcpu, pci_start);
206     vcpu.set_msrs(&msrs).map_err(Error::MsrIoctlFailed)
207 }
208 
209 /// Configure FPU registers for x86
210 ///
211 /// # Arguments
212 ///
213 /// * `vcpu` - Structure for the vcpu that holds the vcpu fd.
setup_fpu(vcpu: &dyn VcpuX86_64) -> Result<()>214 pub fn setup_fpu(vcpu: &dyn VcpuX86_64) -> Result<()> {
215     let fpu = Fpu {
216         fcw: 0x37f,
217         mxcsr: 0x1f80,
218         ..Default::default()
219     };
220 
221     vcpu.set_fpu(&fpu).map_err(Error::FpuIoctlFailed)
222 }
223 
224 /// Configure base registers for x86
225 ///
226 /// # Arguments
227 ///
228 /// * `vcpu` - Structure for the vcpu that holds the vcpu fd.
229 /// * `boot_ip` - Starting instruction pointer.
230 /// * `boot_sp` - Starting stack pointer.
231 /// * `boot_si` - Must point to zero page address per Linux ABI.
setup_regs(vcpu: &dyn VcpuX86_64, boot_ip: u64, boot_sp: u64, boot_si: u64) -> Result<()>232 pub fn setup_regs(vcpu: &dyn VcpuX86_64, boot_ip: u64, boot_sp: u64, boot_si: u64) -> Result<()> {
233     let regs = Regs {
234         rflags: 0x0000000000000002u64,
235         rip: boot_ip,
236         rsp: boot_sp,
237         rbp: boot_sp,
238         rsi: boot_si,
239         ..Default::default()
240     };
241 
242     vcpu.set_regs(&regs).map_err(Error::SettingRegistersIoctl)
243 }
244 
245 const X86_CR0_PE: u64 = 0x1;
246 const X86_CR0_PG: u64 = 0x80000000;
247 const X86_CR4_PAE: u64 = 0x20;
248 
249 const EFER_LME: u64 = 0x100;
250 const EFER_LMA: u64 = 0x400;
251 
252 const BOOT_GDT_OFFSET: u64 = 0x500;
253 const BOOT_IDT_OFFSET: u64 = 0x520;
254 
255 const BOOT_GDT_MAX: usize = 4;
256 
write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()>257 fn write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()> {
258     let boot_gdt_addr = GuestAddress(BOOT_GDT_OFFSET);
259     for (index, entry) in table.iter().enumerate() {
260         let addr = guest_mem
261             .checked_offset(boot_gdt_addr, (index * mem::size_of::<u64>()) as u64)
262             .ok_or(Error::WriteGDTFailure)?;
263         guest_mem
264             .write_obj_at_addr(*entry, addr)
265             .map_err(|_| Error::WriteGDTFailure)?;
266     }
267     Ok(())
268 }
269 
write_idt_value(val: u64, guest_mem: &GuestMemory) -> Result<()>270 fn write_idt_value(val: u64, guest_mem: &GuestMemory) -> Result<()> {
271     let boot_idt_addr = GuestAddress(BOOT_IDT_OFFSET);
272     guest_mem
273         .write_obj_at_addr(val, boot_idt_addr)
274         .map_err(|_| Error::WriteIDTFailure)
275 }
276 
configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()>277 fn configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
278     let gdt_table: [u64; BOOT_GDT_MAX as usize] = [
279         gdt::gdt_entry(0, 0, 0),            // NULL
280         gdt::gdt_entry(0xa09b, 0, 0xfffff), // CODE
281         gdt::gdt_entry(0xc093, 0, 0xfffff), // DATA
282         gdt::gdt_entry(0x808b, 0, 0xfffff), // TSS
283     ];
284 
285     let code_seg = gdt::segment_from_gdt(gdt_table[1], 1);
286     let data_seg = gdt::segment_from_gdt(gdt_table[2], 2);
287     let tss_seg = gdt::segment_from_gdt(gdt_table[3], 3);
288 
289     // Write segments
290     write_gdt_table(&gdt_table[..], mem)?;
291     sregs.gdt.base = BOOT_GDT_OFFSET as u64;
292     sregs.gdt.limit = mem::size_of_val(&gdt_table) as u16 - 1;
293 
294     write_idt_value(0, mem)?;
295     sregs.idt.base = BOOT_IDT_OFFSET as u64;
296     sregs.idt.limit = mem::size_of::<u64>() as u16 - 1;
297 
298     sregs.cs = code_seg;
299     sregs.ds = data_seg;
300     sregs.es = data_seg;
301     sregs.fs = data_seg;
302     sregs.gs = data_seg;
303     sregs.ss = data_seg;
304     sregs.tr = tss_seg;
305 
306     /* 64-bit protected mode */
307     sregs.cr0 |= X86_CR0_PE;
308     sregs.efer |= EFER_LME;
309 
310     Ok(())
311 }
312 
setup_page_tables(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()>313 fn setup_page_tables(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
314     // Puts PML4 right after zero page but aligned to 4k.
315     let boot_pml4_addr = GuestAddress(0x9000);
316     let boot_pdpte_addr = GuestAddress(0xa000);
317     let boot_pde_addr = GuestAddress(0xb000);
318 
319     // Entry covering VA [0..512GB)
320     mem.write_obj_at_addr(boot_pdpte_addr.offset() as u64 | 0x03, boot_pml4_addr)
321         .map_err(|_| Error::WritePML4Address)?;
322 
323     // Entry covering VA [0..1GB)
324     mem.write_obj_at_addr(boot_pde_addr.offset() as u64 | 0x03, boot_pdpte_addr)
325         .map_err(|_| Error::WritePDPTEAddress)?;
326 
327     // 512 2MB entries together covering VA [0..1GB). Note we are assuming
328     // CPU supports 2MB pages (/proc/cpuinfo has 'pse'). All modern CPUs do.
329     for i in 0..512 {
330         mem.write_obj_at_addr((i << 21) + 0x83u64, boot_pde_addr.unchecked_add(i * 8))
331             .map_err(|_| Error::WritePDEAddress)?;
332     }
333     sregs.cr3 = boot_pml4_addr.offset() as u64;
334     sregs.cr4 |= X86_CR4_PAE;
335     sregs.cr0 |= X86_CR0_PG;
336     sregs.efer |= EFER_LMA; // Long mode is active. Must be auto-enabled with CR0_PG.
337     Ok(())
338 }
339 
340 /// Configures the segment registers and system page tables for a given CPU.
341 ///
342 /// # Arguments
343 ///
344 /// * `mem` - The memory that will be passed to the guest.
345 /// * `vcpu` - The VCPU to configure registers on.
setup_sregs(mem: &GuestMemory, vcpu: &dyn VcpuX86_64) -> Result<()>346 pub fn setup_sregs(mem: &GuestMemory, vcpu: &dyn VcpuX86_64) -> Result<()> {
347     let mut sregs = vcpu.get_sregs().map_err(Error::GetSRegsIoctlFailed)?;
348 
349     configure_segments_and_sregs(mem, &mut sregs)?;
350     setup_page_tables(mem, &mut sregs)?; // TODO(dgreid) - Can this be done once per system instead?
351 
352     vcpu.set_sregs(&sregs).map_err(Error::SetSRegsIoctlFailed)?;
353 
354     Ok(())
355 }
356 
357 #[cfg(test)]
358 mod tests {
359     use super::*;
360     use vm_memory::{GuestAddress, GuestMemory};
361 
create_guest_mem() -> GuestMemory362     fn create_guest_mem() -> GuestMemory {
363         GuestMemory::new(&[(GuestAddress(0), 0x10000)]).unwrap()
364     }
365 
read_u64(gm: &GuestMemory, offset: u64) -> u64366     fn read_u64(gm: &GuestMemory, offset: u64) -> u64 {
367         let read_addr = GuestAddress(offset);
368         gm.read_obj_from_addr(read_addr).unwrap()
369     }
370 
371     #[test]
segments_and_sregs()372     fn segments_and_sregs() {
373         let mut sregs = Default::default();
374         let gm = create_guest_mem();
375         configure_segments_and_sregs(&gm, &mut sregs).unwrap();
376 
377         assert_eq!(0x0, read_u64(&gm, BOOT_GDT_OFFSET));
378         assert_eq!(0xaf9b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 8));
379         assert_eq!(0xcf93000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 16));
380         assert_eq!(0x8f8b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 24));
381         assert_eq!(0x0, read_u64(&gm, BOOT_IDT_OFFSET));
382 
383         assert_eq!(0, sregs.cs.base);
384         assert_eq!(0xfffff, sregs.ds.limit);
385         assert_eq!(0x10, sregs.es.selector);
386         assert_eq!(1, sregs.fs.present);
387         assert_eq!(1, sregs.gs.g);
388         assert_eq!(0, sregs.ss.avl);
389         assert_eq!(0, sregs.tr.base);
390         assert_eq!(0xfffff, sregs.tr.limit);
391         assert_eq!(0, sregs.tr.avl);
392         assert_eq!(X86_CR0_PE, sregs.cr0);
393         assert_eq!(EFER_LME, sregs.efer);
394     }
395 
396     #[test]
page_tables()397     fn page_tables() {
398         let mut sregs = Default::default();
399         let gm = create_guest_mem();
400         setup_page_tables(&gm, &mut sregs).unwrap();
401 
402         assert_eq!(0xa003, read_u64(&gm, 0x9000));
403         assert_eq!(0xb003, read_u64(&gm, 0xa000));
404         for i in 0..512 {
405             assert_eq!((i << 21) + 0x83u64, read_u64(&gm, 0xb000 + i * 8));
406         }
407 
408         assert_eq!(0x9000, sregs.cr3);
409         assert_eq!(X86_CR4_PAE, sregs.cr4);
410         assert_eq!(X86_CR0_PG, sregs.cr0);
411     }
412 }
413