1 // Copyright 2018 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::alloc::Layout;
6 use std::cell::{Cell, RefCell};
7 use std::cmp::min;
8 use std::cmp::{self, Ord, PartialEq, PartialOrd};
9 use std::collections::btree_set::BTreeSet;
10 use std::io::{Read, Write};
11 use std::mem;
12 use std::sync::{Arc, RwLock};
13 
14 use libc::{EINVAL, ENOENT, ENOTTY, EPERM, EPIPE, EPROTO};
15 
16 use protobuf::Message;
17 
18 use assertions::const_assert;
19 use base::{error, LayoutAllocation};
20 use data_model::DataInit;
21 use kvm::{CpuId, Vcpu};
22 use kvm_sys::{
23     kvm_debugregs, kvm_enable_cap, kvm_fpu, kvm_lapic_state, kvm_mp_state, kvm_msr_entry, kvm_msrs,
24     kvm_regs, kvm_sregs, kvm_vcpu_events, kvm_xcrs, KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
25 };
26 use protobuf::stream::CodedOutputStream;
27 use protos::plugin::*;
28 use sync::Mutex;
29 
30 use super::*;
31 
32 /// Identifier for an address space in the VM.
33 #[derive(Copy, Clone, PartialEq)]
34 pub enum IoSpace {
35     Ioport,
36     Mmio,
37 }
38 
39 #[derive(Debug, Copy, Clone)]
40 struct Range(u64, u64, bool);
41 
42 impl Eq for Range {}
43 
44 impl PartialEq for Range {
eq(&self, other: &Range) -> bool45     fn eq(&self, other: &Range) -> bool {
46         self.0 == other.0
47     }
48 }
49 
50 impl Ord for Range {
cmp(&self, other: &Range) -> cmp::Ordering51     fn cmp(&self, other: &Range) -> cmp::Ordering {
52         self.0.cmp(&other.0)
53     }
54 }
55 
56 impl PartialOrd for Range {
partial_cmp(&self, other: &Range) -> Option<cmp::Ordering>57     fn partial_cmp(&self, other: &Range) -> Option<cmp::Ordering> {
58         self.0.partial_cmp(&other.0)
59     }
60 }
61 
62 // Wrapper types to make the kvm register structs DataInit
63 #[derive(Copy, Clone)]
64 struct VcpuRegs(kvm_regs);
65 unsafe impl DataInit for VcpuRegs {}
66 #[derive(Copy, Clone)]
67 struct VcpuSregs(kvm_sregs);
68 unsafe impl DataInit for VcpuSregs {}
69 #[derive(Copy, Clone)]
70 struct VcpuFpu(kvm_fpu);
71 unsafe impl DataInit for VcpuFpu {}
72 #[derive(Copy, Clone)]
73 struct VcpuDebugregs(kvm_debugregs);
74 unsafe impl DataInit for VcpuDebugregs {}
75 #[derive(Copy, Clone)]
76 struct VcpuXcregs(kvm_xcrs);
77 unsafe impl DataInit for VcpuXcregs {}
78 #[derive(Copy, Clone)]
79 struct VcpuLapicState(kvm_lapic_state);
80 unsafe impl DataInit for VcpuLapicState {}
81 #[derive(Copy, Clone)]
82 struct VcpuMpState(kvm_mp_state);
83 unsafe impl DataInit for VcpuMpState {}
84 #[derive(Copy, Clone)]
85 struct VcpuEvents(kvm_vcpu_events);
86 unsafe impl DataInit for VcpuEvents {}
87 
get_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet) -> SysResult<Vec<u8>>88 fn get_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet) -> SysResult<Vec<u8>> {
89     Ok(match state_set {
90         VcpuRequest_StateSet::REGS => VcpuRegs(vcpu.get_regs()?).as_slice().to_vec(),
91         VcpuRequest_StateSet::SREGS => VcpuSregs(vcpu.get_sregs()?).as_slice().to_vec(),
92         VcpuRequest_StateSet::FPU => VcpuFpu(vcpu.get_fpu()?).as_slice().to_vec(),
93         VcpuRequest_StateSet::DEBUGREGS => VcpuDebugregs(vcpu.get_debugregs()?).as_slice().to_vec(),
94         VcpuRequest_StateSet::XCREGS => VcpuXcregs(vcpu.get_xcrs()?).as_slice().to_vec(),
95         VcpuRequest_StateSet::LAPIC => VcpuLapicState(vcpu.get_lapic()?).as_slice().to_vec(),
96         VcpuRequest_StateSet::MP => VcpuMpState(vcpu.get_mp_state()?).as_slice().to_vec(),
97         VcpuRequest_StateSet::EVENTS => VcpuEvents(vcpu.get_vcpu_events()?).as_slice().to_vec(),
98     })
99 }
100 
set_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet, state: &[u8]) -> SysResult<()>101 fn set_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet, state: &[u8]) -> SysResult<()> {
102     match state_set {
103         VcpuRequest_StateSet::REGS => {
104             vcpu.set_regs(&VcpuRegs::from_slice(state).ok_or(SysError::new(EINVAL))?.0)
105         }
106         VcpuRequest_StateSet::SREGS => {
107             vcpu.set_sregs(&VcpuSregs::from_slice(state).ok_or(SysError::new(EINVAL))?.0)
108         }
109         VcpuRequest_StateSet::FPU => {
110             vcpu.set_fpu(&VcpuFpu::from_slice(state).ok_or(SysError::new(EINVAL))?.0)
111         }
112         VcpuRequest_StateSet::DEBUGREGS => vcpu.set_debugregs(
113             &VcpuDebugregs::from_slice(state)
114                 .ok_or(SysError::new(EINVAL))?
115                 .0,
116         ),
117         VcpuRequest_StateSet::XCREGS => vcpu.set_xcrs(
118             &VcpuXcregs::from_slice(state)
119                 .ok_or(SysError::new(EINVAL))?
120                 .0,
121         ),
122         VcpuRequest_StateSet::LAPIC => vcpu.set_lapic(
123             &VcpuLapicState::from_slice(state)
124                 .ok_or(SysError::new(EINVAL))?
125                 .0,
126         ),
127         VcpuRequest_StateSet::MP => vcpu.set_mp_state(
128             &VcpuMpState::from_slice(state)
129                 .ok_or(SysError::new(EINVAL))?
130                 .0,
131         ),
132         VcpuRequest_StateSet::EVENTS => vcpu.set_vcpu_events(
133             &VcpuEvents::from_slice(state)
134                 .ok_or(SysError::new(EINVAL))?
135                 .0,
136         ),
137     }
138 }
139 
140 pub struct CallHintDetails {
141     pub match_rax: bool,
142     pub match_rbx: bool,
143     pub match_rcx: bool,
144     pub match_rdx: bool,
145     pub rax: u64,
146     pub rbx: u64,
147     pub rcx: u64,
148     pub rdx: u64,
149     pub send_sregs: bool,
150     pub send_debugregs: bool,
151 }
152 
153 pub struct CallHint {
154     io_space: IoSpace,
155     addr: u64,
156     on_write: bool,
157     regs: Vec<CallHintDetails>,
158 }
159 
160 /// State shared by every VCPU, grouped together to make edits to the state coherent across VCPUs.
161 #[derive(Default)]
162 pub struct SharedVcpuState {
163     ioport_regions: BTreeSet<Range>,
164     mmio_regions: BTreeSet<Range>,
165     hint: Option<CallHint>,
166 }
167 
168 impl SharedVcpuState {
169     /// Reserves the given range for handling by the plugin process.
170     ///
171     /// This will reject any reservation that overlaps with an existing reservation.
reserve_range( &mut self, space: IoSpace, start: u64, length: u64, async_write: bool, ) -> SysResult<()>172     pub fn reserve_range(
173         &mut self,
174         space: IoSpace,
175         start: u64,
176         length: u64,
177         async_write: bool,
178     ) -> SysResult<()> {
179         if length == 0 {
180             return Err(SysError::new(EINVAL));
181         }
182 
183         // Reject all cases where this reservation is part of another reservation.
184         if self.is_reserved(space, start) {
185             return Err(SysError::new(EPERM));
186         }
187 
188         let last_address = match start.checked_add(length) {
189             Some(end) => end - 1,
190             None => return Err(SysError::new(EINVAL)),
191         };
192 
193         let space = match space {
194             IoSpace::Ioport => &mut self.ioport_regions,
195             IoSpace::Mmio => &mut self.mmio_regions,
196         };
197 
198         match space
199             .range(..Range(last_address, 0, false))
200             .next_back()
201             .cloned()
202         {
203             Some(Range(existing_start, _, _)) if existing_start >= start => {
204                 Err(SysError::new(EPERM))
205             }
206             _ => {
207                 space.insert(Range(start, length, async_write));
208                 Ok(())
209             }
210         }
211     }
212 
213     //// Releases a reservation previously made at `start` in the given `space`.
unreserve_range(&mut self, space: IoSpace, start: u64) -> SysResult<()>214     pub fn unreserve_range(&mut self, space: IoSpace, start: u64) -> SysResult<()> {
215         let range = Range(start, 0, false);
216         let space = match space {
217             IoSpace::Ioport => &mut self.ioport_regions,
218             IoSpace::Mmio => &mut self.mmio_regions,
219         };
220         if space.remove(&range) {
221             Ok(())
222         } else {
223             Err(SysError::new(ENOENT))
224         }
225     }
226 
set_hint( &mut self, space: IoSpace, addr: u64, on_write: bool, regs: Vec<CallHintDetails>, )227     pub fn set_hint(
228         &mut self,
229         space: IoSpace,
230         addr: u64,
231         on_write: bool,
232         regs: Vec<CallHintDetails>,
233     ) {
234         if addr == 0 {
235             self.hint = None;
236         } else {
237             let hint = CallHint {
238                 io_space: space,
239                 addr,
240                 on_write,
241                 regs,
242             };
243             self.hint = Some(hint);
244         }
245     }
246 
is_reserved(&self, space: IoSpace, addr: u64) -> bool247     fn is_reserved(&self, space: IoSpace, addr: u64) -> bool {
248         if let Some(Range(start, len, _)) = self.first_before(space, addr) {
249             let offset = addr - start;
250             if offset < len {
251                 return true;
252             }
253         }
254         false
255     }
256 
first_before(&self, io_space: IoSpace, addr: u64) -> Option<Range>257     fn first_before(&self, io_space: IoSpace, addr: u64) -> Option<Range> {
258         let space = match io_space {
259             IoSpace::Ioport => &self.ioport_regions,
260             IoSpace::Mmio => &self.mmio_regions,
261         };
262 
263         match addr.checked_add(1) {
264             Some(next_addr) => space
265                 .range(..Range(next_addr, 0, false))
266                 .next_back()
267                 .cloned(),
268             None => None,
269         }
270     }
271 
matches_hint(&self, io_space: IoSpace, addr: u64, is_write: bool) -> bool272     fn matches_hint(&self, io_space: IoSpace, addr: u64, is_write: bool) -> bool {
273         if let Some(hint) = &self.hint {
274             return io_space == hint.io_space && addr == hint.addr && is_write == hint.on_write;
275         }
276         false
277     }
278 
check_hint_details(&self, regs: &kvm_regs) -> (bool, bool)279     fn check_hint_details(&self, regs: &kvm_regs) -> (bool, bool) {
280         if let Some(hint) = &self.hint {
281             for entry in hint.regs.iter() {
282                 if (!entry.match_rax || entry.rax == regs.rax)
283                     && (!entry.match_rbx || entry.rbx == regs.rbx)
284                     && (!entry.match_rcx || entry.rcx == regs.rcx)
285                     && (!entry.match_rdx || entry.rdx == regs.rdx)
286                 {
287                     return (entry.send_sregs, entry.send_debugregs);
288                 }
289             }
290         }
291         (false, false)
292     }
293 }
294 
295 /// State specific to a VCPU, grouped so that each `PluginVcpu` object will share a canonical
296 /// version.
297 #[derive(Default)]
298 pub struct PerVcpuState {
299     pause_request: Option<u64>,
300 }
301 
302 impl PerVcpuState {
303     /// Indicates that a VCPU should wait until the plugin process resumes the VCPU.
304     ///
305     /// This method will not cause a VCPU to pause immediately. Instead, the VCPU thread will
306     /// continue running until a interrupted, at which point it will check for a pending pause. If
307     /// there is another call to `request_pause` for this VCPU before that happens, the last pause
308     /// request's `data` will be overwritten with the most recent `data.
309     ///
310     /// To get an immediate pause after calling `request_pause`, send a signal (with a registered
311     /// handler) to the thread handling the VCPU corresponding to this state. This should interrupt
312     /// the running VCPU, which should check for a pause with `PluginVcpu::pre_run`.
request_pause(&mut self, data: u64)313     pub fn request_pause(&mut self, data: u64) {
314         self.pause_request = Some(data);
315     }
316 }
317 
318 enum VcpuRunData<'a> {
319     Read(&'a mut [u8]),
320     Write(&'a [u8]),
321 }
322 
323 impl<'a> VcpuRunData<'a> {
is_write(&self) -> bool324     fn is_write(&self) -> bool {
325         matches!(self, VcpuRunData::Write(_))
326     }
327 
as_slice(&self) -> &[u8]328     fn as_slice(&self) -> &[u8] {
329         match self {
330             VcpuRunData::Read(s) => s,
331             VcpuRunData::Write(s) => s,
332         }
333     }
334 
copy_from_slice(&mut self, data: &[u8])335     fn copy_from_slice(&mut self, data: &[u8]) {
336         if let VcpuRunData::Read(s) = self {
337             let copy_size = min(s.len(), data.len());
338             s.copy_from_slice(&data[..copy_size]);
339         }
340     }
341 }
342 
343 /// State object for a VCPU's connection with the plugin process.
344 ///
345 /// This is used by a VCPU thread to allow the plugin process to handle vmexits. Each method may
346 /// block indefinitely while the plugin process is handling requests. In order to cleanly shutdown
347 /// during these blocking calls, the `connection` socket should be shutdown. This will end the
348 /// blocking calls,
349 pub struct PluginVcpu {
350     shared_vcpu_state: Arc<RwLock<SharedVcpuState>>,
351     per_vcpu_state: Arc<Mutex<PerVcpuState>>,
352     read_pipe: File,
353     write_pipe: File,
354     wait_reason: Cell<Option<VcpuResponse_Wait>>,
355     request_buffer: RefCell<Vec<u8>>,
356     response_buffer: RefCell<Vec<u8>>,
357 }
358 
359 impl PluginVcpu {
360     /// Creates the plugin state and connection container for a VCPU thread.
new( shared_vcpu_state: Arc<RwLock<SharedVcpuState>>, per_vcpu_state: Arc<Mutex<PerVcpuState>>, read_pipe: File, write_pipe: File, ) -> PluginVcpu361     pub fn new(
362         shared_vcpu_state: Arc<RwLock<SharedVcpuState>>,
363         per_vcpu_state: Arc<Mutex<PerVcpuState>>,
364         read_pipe: File,
365         write_pipe: File,
366     ) -> PluginVcpu {
367         PluginVcpu {
368             shared_vcpu_state,
369             per_vcpu_state,
370             read_pipe,
371             write_pipe,
372             wait_reason: Default::default(),
373             request_buffer: Default::default(),
374             response_buffer: Default::default(),
375         }
376     }
377 
378     /// Tells the plugin process to initialize this VCPU.
379     ///
380     /// This should be called for each VCPU before the first run of any of the VCPUs in the VM.
init(&self, vcpu: &Vcpu) -> SysResult<()>381     pub fn init(&self, vcpu: &Vcpu) -> SysResult<()> {
382         let mut wait_reason = VcpuResponse_Wait::new();
383         wait_reason.mut_init();
384         self.wait_reason.set(Some(wait_reason));
385         self.handle_until_resume(vcpu)?;
386         Ok(())
387     }
388 
389     /// The VCPU thread should call this before rerunning a VM in order to handle pending requests
390     /// to this VCPU.
pre_run(&self, vcpu: &Vcpu) -> SysResult<()>391     pub fn pre_run(&self, vcpu: &Vcpu) -> SysResult<()> {
392         let request = {
393             let mut lock = self.per_vcpu_state.lock();
394             lock.pause_request.take()
395         };
396 
397         if let Some(user_data) = request {
398             let mut wait_reason = VcpuResponse_Wait::new();
399             wait_reason.mut_user().user = user_data;
400             self.wait_reason.set(Some(wait_reason));
401             self.handle_until_resume(vcpu)?;
402         }
403         Ok(())
404     }
405 
process(&self, io_space: IoSpace, addr: u64, mut data: VcpuRunData, vcpu: &Vcpu) -> bool406     fn process(&self, io_space: IoSpace, addr: u64, mut data: VcpuRunData, vcpu: &Vcpu) -> bool {
407         let vcpu_state_lock = match self.shared_vcpu_state.read() {
408             Ok(l) => l,
409             Err(e) => {
410                 error!("error read locking shared cpu state: {}", e);
411                 return false;
412             }
413         };
414 
415         let first_before_addr = vcpu_state_lock.first_before(io_space, addr);
416 
417         match first_before_addr {
418             Some(Range(start, len, async_write)) => {
419                 let offset = addr - start;
420                 if offset >= len {
421                     return false;
422                 }
423                 if async_write && !data.is_write() {
424                     return false;
425                 }
426 
427                 let mut wait_reason = VcpuResponse_Wait::new();
428                 let io = wait_reason.mut_io();
429                 io.space = match io_space {
430                     IoSpace::Ioport => AddressSpace::IOPORT,
431                     IoSpace::Mmio => AddressSpace::MMIO,
432                 };
433                 io.address = addr;
434                 io.is_write = data.is_write();
435                 io.data = data.as_slice().to_vec();
436                 io.no_resume = async_write;
437                 if !async_write && vcpu_state_lock.matches_hint(io_space, addr, io.is_write) {
438                     if let Ok(regs) = vcpu.get_regs() {
439                         let (has_sregs, has_debugregs) = vcpu_state_lock.check_hint_details(&regs);
440                         io.regs = VcpuRegs(regs).as_slice().to_vec();
441                         if has_sregs {
442                             if let Ok(state) = get_vcpu_state(vcpu, VcpuRequest_StateSet::SREGS) {
443                                 io.sregs = state;
444                             }
445                         }
446                         if has_debugregs {
447                             if let Ok(state) = get_vcpu_state(vcpu, VcpuRequest_StateSet::DEBUGREGS)
448                             {
449                                 io.debugregs = state;
450                             }
451                         }
452                     }
453                 }
454                 // don't hold lock while blocked in `handle_until_resume`.
455                 drop(vcpu_state_lock);
456 
457                 if async_write {
458                     let mut response = VcpuResponse::new();
459                     response.set_wait(wait_reason);
460 
461                     let mut response_buffer = self.response_buffer.borrow_mut();
462                     response_buffer.clear();
463                     let mut stream = CodedOutputStream::vec(&mut response_buffer);
464                     match response.write_length_delimited_to(&mut stream) {
465                         Ok(_) => {
466                             match stream.flush() {
467                                 Ok(_) => {}
468                                 Err(e) => error!("failed to flush to vec: {}", e),
469                             }
470                             let mut write_pipe = &self.write_pipe;
471                             match write_pipe.write(&response_buffer[..]) {
472                                 Ok(_) => {}
473                                 Err(e) => error!("failed to write to pipe: {}", e),
474                             }
475                         }
476                         Err(e) => error!("failed to write to buffer: {}", e),
477                     }
478                 } else {
479                     self.wait_reason.set(Some(wait_reason));
480                     match self.handle_until_resume(vcpu) {
481                         Ok(resume_data) => data.copy_from_slice(&resume_data),
482                         Err(e) if e.errno() == EPIPE => {}
483                         Err(e) => error!("failed to process vcpu requests: {}", e),
484                     }
485                 }
486                 true
487             }
488             None => false,
489         }
490     }
491 
492     /// Has the plugin process handle a IO port read.
io_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool493     pub fn io_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool {
494         self.process(IoSpace::Ioport, addr, VcpuRunData::Read(data), vcpu)
495     }
496 
497     /// Has the plugin process handle a IO port write.
io_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool498     pub fn io_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool {
499         self.process(IoSpace::Ioport, addr, VcpuRunData::Write(data), vcpu)
500     }
501 
502     /// Has the plugin process handle a MMIO read.
mmio_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool503     pub fn mmio_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool {
504         self.process(IoSpace::Mmio, addr, VcpuRunData::Read(data), vcpu)
505     }
506 
507     /// Has the plugin process handle a MMIO write.
mmio_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool508     pub fn mmio_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool {
509         self.process(IoSpace::Mmio, addr, VcpuRunData::Write(data), vcpu)
510     }
511 
512     /// Has the plugin process handle a hyper-v call.
hyperv_call(&self, input: u64, params: [u64; 2], data: &mut [u8], vcpu: &Vcpu) -> bool513     pub fn hyperv_call(&self, input: u64, params: [u64; 2], data: &mut [u8], vcpu: &Vcpu) -> bool {
514         let mut wait_reason = VcpuResponse_Wait::new();
515         let hv = wait_reason.mut_hyperv_call();
516         hv.input = input;
517         hv.params0 = params[0];
518         hv.params1 = params[1];
519 
520         self.wait_reason.set(Some(wait_reason));
521         match self.handle_until_resume(vcpu) {
522             Ok(resume_data) => {
523                 data.copy_from_slice(&resume_data);
524                 true
525             }
526             Err(e) if e.errno() == EPIPE => false,
527             Err(e) => {
528                 error!("failed to process hyperv call request: {}", e);
529                 false
530             }
531         }
532     }
533 
534     /// Has the plugin process handle a synic config change.
hyperv_synic( &self, msr: u32, control: u64, evt_page: u64, msg_page: u64, vcpu: &Vcpu, ) -> bool535     pub fn hyperv_synic(
536         &self,
537         msr: u32,
538         control: u64,
539         evt_page: u64,
540         msg_page: u64,
541         vcpu: &Vcpu,
542     ) -> bool {
543         let mut wait_reason = VcpuResponse_Wait::new();
544         let hv = wait_reason.mut_hyperv_synic();
545         hv.msr = msr;
546         hv.control = control;
547         hv.evt_page = evt_page;
548         hv.msg_page = msg_page;
549         self.wait_reason.set(Some(wait_reason));
550         match self.handle_until_resume(vcpu) {
551             Ok(_resume_data) => true,
552             Err(e) if e.errno() == EPIPE => false,
553             Err(e) => {
554                 error!("failed to process hyperv synic request: {}", e);
555                 false
556             }
557         }
558     }
559 
handle_request(&self, vcpu: &Vcpu) -> SysResult<Option<Vec<u8>>>560     fn handle_request(&self, vcpu: &Vcpu) -> SysResult<Option<Vec<u8>>> {
561         let mut wait_reason = self.wait_reason.take();
562         let mut do_recv = true;
563         let mut resume_data = None;
564         let mut response = VcpuResponse::new();
565         let mut send_response = true;
566 
567         // Typically a response is sent for every request received.  The odd (yet common)
568         // case is when a resume request is received.  This function will skip sending
569         // a resume reply, and instead we'll go run the VM and then later reply with a wait
570         // response message.  This code block handles checking if a wait reason is pending (where
571         // the wait reason isn't the first-time init [first time init needs to first
572         // receive a wait request from the plugin]) to send it as a reply before doing a recv()
573         // for the next request.  Note that if a wait reply is pending then this function
574         // will send the reply and do nothing else--the expectation is that handle_until_resume()
575         // is the only caller of this function, so the function will immediately get called again
576         // and this second call will no longer see a pending wait reason and do a recv() for the
577         // next message.
578         if let Some(reason) = wait_reason {
579             if reason.has_init() {
580                 wait_reason = Some(reason);
581             } else {
582                 response.set_wait(reason);
583                 do_recv = false;
584                 wait_reason = None;
585             }
586         }
587 
588         if do_recv {
589             let mut request_buffer = self.request_buffer.borrow_mut();
590             request_buffer.resize(MAX_VCPU_DATAGRAM_SIZE, 0);
591 
592             let mut read_pipe = &self.read_pipe;
593             let msg_size = read_pipe.read(&mut request_buffer).map_err(io_to_sys_err)?;
594 
595             let mut request =
596                 protobuf::parse_from_bytes::<VcpuRequest>(&request_buffer[..msg_size])
597                     .map_err(proto_to_sys_err)?;
598 
599             let res = if request.has_wait() {
600                 match wait_reason {
601                     Some(wait_reason) => {
602                         response.set_wait(wait_reason);
603                         Ok(())
604                     }
605                     None => Err(SysError::new(EPROTO)),
606                 }
607             } else if wait_reason.is_some() {
608                 // Any request other than getting the wait_reason while there is one pending is invalid.
609                 self.wait_reason.set(wait_reason);
610                 Err(SysError::new(EPROTO))
611             } else if request.has_resume() {
612                 send_response = false;
613                 let resume = request.get_resume();
614                 if !resume.get_regs().is_empty() {
615                     set_vcpu_state(vcpu, VcpuRequest_StateSet::REGS, resume.get_regs())?;
616                 }
617                 if !resume.get_sregs().is_empty() {
618                     set_vcpu_state(vcpu, VcpuRequest_StateSet::SREGS, resume.get_sregs())?;
619                 }
620                 if !resume.get_debugregs().is_empty() {
621                     set_vcpu_state(
622                         vcpu,
623                         VcpuRequest_StateSet::DEBUGREGS,
624                         resume.get_debugregs(),
625                     )?;
626                 }
627                 resume_data = Some(request.take_resume().take_data());
628                 Ok(())
629             } else if request.has_get_state() {
630                 let response_state = response.mut_get_state();
631                 match get_vcpu_state(vcpu, request.get_get_state().set) {
632                     Ok(state) => {
633                         response_state.state = state;
634                         Ok(())
635                     }
636                     Err(e) => Err(e),
637                 }
638             } else if request.has_set_state() {
639                 response.mut_set_state();
640                 let set_state = request.get_set_state();
641                 set_vcpu_state(vcpu, set_state.set, set_state.get_state())
642             } else if request.has_get_hyperv_cpuid() {
643                 let cpuid_response = &mut response.mut_get_hyperv_cpuid().entries;
644                 match vcpu.get_hyperv_cpuid() {
645                     Ok(mut cpuid) => {
646                         for entry in cpuid.mut_entries_slice() {
647                             cpuid_response.push(cpuid_kvm_to_proto(entry));
648                         }
649                         Ok(())
650                     }
651                     Err(e) => Err(e),
652                 }
653             } else if request.has_get_msrs() {
654                 let entry_data = &mut response.mut_get_msrs().entry_data;
655                 let entry_indices = &request.get_get_msrs().entry_indices;
656                 let mut msr_entries = Vec::with_capacity(entry_indices.len());
657                 for &index in entry_indices {
658                     msr_entries.push(kvm_msr_entry {
659                         index,
660                         ..Default::default()
661                     });
662                 }
663                 match vcpu.get_msrs(&mut msr_entries) {
664                     Ok(()) => {
665                         for msr_entry in msr_entries {
666                             entry_data.push(msr_entry.data);
667                         }
668                         Ok(())
669                     }
670                     Err(e) => Err(e),
671                 }
672             } else if request.has_set_msrs() {
673                 const SIZE_OF_MSRS: usize = mem::size_of::<kvm_msrs>();
674                 const SIZE_OF_ENTRY: usize = mem::size_of::<kvm_msr_entry>();
675                 const ALIGN_OF_MSRS: usize = mem::align_of::<kvm_msrs>();
676                 const ALIGN_OF_ENTRY: usize = mem::align_of::<kvm_msr_entry>();
677                 const_assert!(ALIGN_OF_MSRS >= ALIGN_OF_ENTRY);
678 
679                 response.mut_set_msrs();
680                 let request_entries = &request.get_set_msrs().entries;
681 
682                 let size = SIZE_OF_MSRS + request_entries.len() * SIZE_OF_ENTRY;
683                 let layout =
684                     Layout::from_size_align(size, ALIGN_OF_MSRS).expect("impossible layout");
685                 let mut allocation = LayoutAllocation::zeroed(layout);
686 
687                 // Safe to obtain an exclusive reference because there are no other
688                 // references to the allocation yet and all-zero is a valid bit
689                 // pattern.
690                 let kvm_msrs = unsafe { allocation.as_mut::<kvm_msrs>() };
691 
692                 unsafe {
693                     // Mapping the unsized array to a slice is unsafe becase the length isn't known.
694                     // Providing the length used to create the struct guarantees the entire slice is
695                     // valid.
696                     let kvm_msr_entries: &mut [kvm_msr_entry] =
697                         kvm_msrs.entries.as_mut_slice(request_entries.len());
698                     for (msr_entry, entry) in kvm_msr_entries.iter_mut().zip(request_entries) {
699                         msr_entry.index = entry.index;
700                         msr_entry.data = entry.data;
701                     }
702                 }
703                 kvm_msrs.nmsrs = request_entries.len() as u32;
704                 vcpu.set_msrs(&kvm_msrs)
705             } else if request.has_set_cpuid() {
706                 response.mut_set_cpuid();
707                 let request_entries = &request.get_set_cpuid().entries;
708                 let mut cpuid = CpuId::new(request_entries.len());
709                 let cpuid_entries = cpuid.mut_entries_slice();
710                 for (request_entry, cpuid_entry) in request_entries.iter().zip(cpuid_entries) {
711                     cpuid_entry.function = request_entry.function;
712                     if request_entry.has_index {
713                         cpuid_entry.index = request_entry.index;
714                         cpuid_entry.flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
715                     }
716                     cpuid_entry.eax = request_entry.eax;
717                     cpuid_entry.ebx = request_entry.ebx;
718                     cpuid_entry.ecx = request_entry.ecx;
719                     cpuid_entry.edx = request_entry.edx;
720                 }
721                 vcpu.set_cpuid2(&cpuid)
722             } else if request.has_enable_capability() {
723                 response.mut_enable_capability();
724                 let capability = request.get_enable_capability().capability;
725                 if capability != kvm_sys::KVM_CAP_HYPERV_SYNIC
726                     && capability != kvm_sys::KVM_CAP_HYPERV_SYNIC2
727                 {
728                     Err(SysError::new(EINVAL))
729                 } else {
730                     let mut cap: kvm_enable_cap = Default::default();
731                     cap.cap = capability;
732                     // Safe because the allowed capabilities don't take pointer arguments.
733                     unsafe { vcpu.kvm_enable_cap(&cap) }
734                 }
735             } else if request.has_shutdown() {
736                 return Err(SysError::new(EPIPE));
737             } else {
738                 Err(SysError::new(ENOTTY))
739             };
740 
741             if let Err(e) = res {
742                 response.errno = e.errno();
743             }
744         }
745 
746         // Send the response, except if it's a resume response (in which case
747         // we'll go run the VM and afterwards send a wait response message).
748         if send_response {
749             let mut response_buffer = self.response_buffer.borrow_mut();
750             response_buffer.clear();
751             let mut stream = CodedOutputStream::vec(&mut response_buffer);
752             response
753                 .write_length_delimited_to(&mut stream)
754                 .map_err(proto_to_sys_err)?;
755             stream.flush().map_err(proto_to_sys_err)?;
756             let mut write_pipe = &self.write_pipe;
757             write_pipe
758                 .write(&response_buffer[..])
759                 .map_err(io_to_sys_err)?;
760         }
761 
762         Ok(resume_data)
763     }
764 
handle_until_resume(&self, vcpu: &Vcpu) -> SysResult<Vec<u8>>765     fn handle_until_resume(&self, vcpu: &Vcpu) -> SysResult<Vec<u8>> {
766         loop {
767             if let Some(resume_data) = self.handle_request(vcpu)? {
768                 return Ok(resume_data);
769             }
770         }
771     }
772 }
773 
774 #[cfg(test)]
775 mod tests {
776     use super::*;
777 
778     #[test]
shared_vcpu_reserve()779     fn shared_vcpu_reserve() {
780         let mut shared_vcpu_state = SharedVcpuState::default();
781         shared_vcpu_state
782             .reserve_range(IoSpace::Ioport, 0x10, 0, false)
783             .unwrap_err();
784         shared_vcpu_state
785             .reserve_range(IoSpace::Ioport, 0x10, 0x10, false)
786             .unwrap();
787         shared_vcpu_state
788             .reserve_range(IoSpace::Ioport, 0x0f, 0x10, false)
789             .unwrap_err();
790         shared_vcpu_state
791             .reserve_range(IoSpace::Ioport, 0x10, 0x10, false)
792             .unwrap_err();
793         shared_vcpu_state
794             .reserve_range(IoSpace::Ioport, 0x10, 0x15, false)
795             .unwrap_err();
796         shared_vcpu_state
797             .reserve_range(IoSpace::Ioport, 0x12, 0x15, false)
798             .unwrap_err();
799         shared_vcpu_state
800             .reserve_range(IoSpace::Ioport, 0x12, 0x01, false)
801             .unwrap_err();
802         shared_vcpu_state
803             .reserve_range(IoSpace::Ioport, 0x0, 0x20, false)
804             .unwrap_err();
805         shared_vcpu_state
806             .reserve_range(IoSpace::Ioport, 0x20, 0x05, false)
807             .unwrap();
808         shared_vcpu_state
809             .reserve_range(IoSpace::Ioport, 0x25, 0x05, false)
810             .unwrap();
811         shared_vcpu_state
812             .reserve_range(IoSpace::Ioport, 0x0, 0x10, false)
813             .unwrap();
814     }
815 }
816