1 // Copyright 2018 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::alloc::Layout;
6 use std::cell::{Cell, RefCell};
7 use std::cmp::min;
8 use std::cmp::{self, Ord, PartialEq, PartialOrd};
9 use std::collections::btree_set::BTreeSet;
10 use std::io::{Read, Write};
11 use std::mem;
12 use std::sync::{Arc, RwLock};
13
14 use libc::{EINVAL, ENOENT, ENOTTY, EPERM, EPIPE, EPROTO};
15
16 use protobuf::Message;
17
18 use assertions::const_assert;
19 use base::{error, LayoutAllocation};
20 use data_model::DataInit;
21 use kvm::{CpuId, Vcpu};
22 use kvm_sys::{
23 kvm_debugregs, kvm_enable_cap, kvm_fpu, kvm_lapic_state, kvm_mp_state, kvm_msr_entry, kvm_msrs,
24 kvm_regs, kvm_sregs, kvm_vcpu_events, kvm_xcrs, KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
25 };
26 use protobuf::stream::CodedOutputStream;
27 use protos::plugin::*;
28 use sync::Mutex;
29
30 use super::*;
31
32 /// Identifier for an address space in the VM.
33 #[derive(Copy, Clone, PartialEq)]
34 pub enum IoSpace {
35 Ioport,
36 Mmio,
37 }
38
39 #[derive(Debug, Copy, Clone)]
40 struct Range(u64, u64, bool);
41
42 impl Eq for Range {}
43
44 impl PartialEq for Range {
eq(&self, other: &Range) -> bool45 fn eq(&self, other: &Range) -> bool {
46 self.0 == other.0
47 }
48 }
49
50 impl Ord for Range {
cmp(&self, other: &Range) -> cmp::Ordering51 fn cmp(&self, other: &Range) -> cmp::Ordering {
52 self.0.cmp(&other.0)
53 }
54 }
55
56 impl PartialOrd for Range {
partial_cmp(&self, other: &Range) -> Option<cmp::Ordering>57 fn partial_cmp(&self, other: &Range) -> Option<cmp::Ordering> {
58 self.0.partial_cmp(&other.0)
59 }
60 }
61
62 // Wrapper types to make the kvm register structs DataInit
63 #[derive(Copy, Clone)]
64 struct VcpuRegs(kvm_regs);
65 unsafe impl DataInit for VcpuRegs {}
66 #[derive(Copy, Clone)]
67 struct VcpuSregs(kvm_sregs);
68 unsafe impl DataInit for VcpuSregs {}
69 #[derive(Copy, Clone)]
70 struct VcpuFpu(kvm_fpu);
71 unsafe impl DataInit for VcpuFpu {}
72 #[derive(Copy, Clone)]
73 struct VcpuDebugregs(kvm_debugregs);
74 unsafe impl DataInit for VcpuDebugregs {}
75 #[derive(Copy, Clone)]
76 struct VcpuXcregs(kvm_xcrs);
77 unsafe impl DataInit for VcpuXcregs {}
78 #[derive(Copy, Clone)]
79 struct VcpuLapicState(kvm_lapic_state);
80 unsafe impl DataInit for VcpuLapicState {}
81 #[derive(Copy, Clone)]
82 struct VcpuMpState(kvm_mp_state);
83 unsafe impl DataInit for VcpuMpState {}
84 #[derive(Copy, Clone)]
85 struct VcpuEvents(kvm_vcpu_events);
86 unsafe impl DataInit for VcpuEvents {}
87
get_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet) -> SysResult<Vec<u8>>88 fn get_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet) -> SysResult<Vec<u8>> {
89 Ok(match state_set {
90 VcpuRequest_StateSet::REGS => VcpuRegs(vcpu.get_regs()?).as_slice().to_vec(),
91 VcpuRequest_StateSet::SREGS => VcpuSregs(vcpu.get_sregs()?).as_slice().to_vec(),
92 VcpuRequest_StateSet::FPU => VcpuFpu(vcpu.get_fpu()?).as_slice().to_vec(),
93 VcpuRequest_StateSet::DEBUGREGS => VcpuDebugregs(vcpu.get_debugregs()?).as_slice().to_vec(),
94 VcpuRequest_StateSet::XCREGS => VcpuXcregs(vcpu.get_xcrs()?).as_slice().to_vec(),
95 VcpuRequest_StateSet::LAPIC => VcpuLapicState(vcpu.get_lapic()?).as_slice().to_vec(),
96 VcpuRequest_StateSet::MP => VcpuMpState(vcpu.get_mp_state()?).as_slice().to_vec(),
97 VcpuRequest_StateSet::EVENTS => VcpuEvents(vcpu.get_vcpu_events()?).as_slice().to_vec(),
98 })
99 }
100
set_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet, state: &[u8]) -> SysResult<()>101 fn set_vcpu_state(vcpu: &Vcpu, state_set: VcpuRequest_StateSet, state: &[u8]) -> SysResult<()> {
102 match state_set {
103 VcpuRequest_StateSet::REGS => {
104 vcpu.set_regs(&VcpuRegs::from_slice(state).ok_or(SysError::new(EINVAL))?.0)
105 }
106 VcpuRequest_StateSet::SREGS => {
107 vcpu.set_sregs(&VcpuSregs::from_slice(state).ok_or(SysError::new(EINVAL))?.0)
108 }
109 VcpuRequest_StateSet::FPU => {
110 vcpu.set_fpu(&VcpuFpu::from_slice(state).ok_or(SysError::new(EINVAL))?.0)
111 }
112 VcpuRequest_StateSet::DEBUGREGS => vcpu.set_debugregs(
113 &VcpuDebugregs::from_slice(state)
114 .ok_or(SysError::new(EINVAL))?
115 .0,
116 ),
117 VcpuRequest_StateSet::XCREGS => vcpu.set_xcrs(
118 &VcpuXcregs::from_slice(state)
119 .ok_or(SysError::new(EINVAL))?
120 .0,
121 ),
122 VcpuRequest_StateSet::LAPIC => vcpu.set_lapic(
123 &VcpuLapicState::from_slice(state)
124 .ok_or(SysError::new(EINVAL))?
125 .0,
126 ),
127 VcpuRequest_StateSet::MP => vcpu.set_mp_state(
128 &VcpuMpState::from_slice(state)
129 .ok_or(SysError::new(EINVAL))?
130 .0,
131 ),
132 VcpuRequest_StateSet::EVENTS => vcpu.set_vcpu_events(
133 &VcpuEvents::from_slice(state)
134 .ok_or(SysError::new(EINVAL))?
135 .0,
136 ),
137 }
138 }
139
140 pub struct CallHintDetails {
141 pub match_rax: bool,
142 pub match_rbx: bool,
143 pub match_rcx: bool,
144 pub match_rdx: bool,
145 pub rax: u64,
146 pub rbx: u64,
147 pub rcx: u64,
148 pub rdx: u64,
149 pub send_sregs: bool,
150 pub send_debugregs: bool,
151 }
152
153 pub struct CallHint {
154 io_space: IoSpace,
155 addr: u64,
156 on_write: bool,
157 regs: Vec<CallHintDetails>,
158 }
159
160 /// State shared by every VCPU, grouped together to make edits to the state coherent across VCPUs.
161 #[derive(Default)]
162 pub struct SharedVcpuState {
163 ioport_regions: BTreeSet<Range>,
164 mmio_regions: BTreeSet<Range>,
165 hint: Option<CallHint>,
166 }
167
168 impl SharedVcpuState {
169 /// Reserves the given range for handling by the plugin process.
170 ///
171 /// This will reject any reservation that overlaps with an existing reservation.
reserve_range( &mut self, space: IoSpace, start: u64, length: u64, async_write: bool, ) -> SysResult<()>172 pub fn reserve_range(
173 &mut self,
174 space: IoSpace,
175 start: u64,
176 length: u64,
177 async_write: bool,
178 ) -> SysResult<()> {
179 if length == 0 {
180 return Err(SysError::new(EINVAL));
181 }
182
183 // Reject all cases where this reservation is part of another reservation.
184 if self.is_reserved(space, start) {
185 return Err(SysError::new(EPERM));
186 }
187
188 let last_address = match start.checked_add(length) {
189 Some(end) => end - 1,
190 None => return Err(SysError::new(EINVAL)),
191 };
192
193 let space = match space {
194 IoSpace::Ioport => &mut self.ioport_regions,
195 IoSpace::Mmio => &mut self.mmio_regions,
196 };
197
198 match space
199 .range(..Range(last_address, 0, false))
200 .next_back()
201 .cloned()
202 {
203 Some(Range(existing_start, _, _)) if existing_start >= start => {
204 Err(SysError::new(EPERM))
205 }
206 _ => {
207 space.insert(Range(start, length, async_write));
208 Ok(())
209 }
210 }
211 }
212
213 //// Releases a reservation previously made at `start` in the given `space`.
unreserve_range(&mut self, space: IoSpace, start: u64) -> SysResult<()>214 pub fn unreserve_range(&mut self, space: IoSpace, start: u64) -> SysResult<()> {
215 let range = Range(start, 0, false);
216 let space = match space {
217 IoSpace::Ioport => &mut self.ioport_regions,
218 IoSpace::Mmio => &mut self.mmio_regions,
219 };
220 if space.remove(&range) {
221 Ok(())
222 } else {
223 Err(SysError::new(ENOENT))
224 }
225 }
226
set_hint( &mut self, space: IoSpace, addr: u64, on_write: bool, regs: Vec<CallHintDetails>, )227 pub fn set_hint(
228 &mut self,
229 space: IoSpace,
230 addr: u64,
231 on_write: bool,
232 regs: Vec<CallHintDetails>,
233 ) {
234 if addr == 0 {
235 self.hint = None;
236 } else {
237 let hint = CallHint {
238 io_space: space,
239 addr,
240 on_write,
241 regs,
242 };
243 self.hint = Some(hint);
244 }
245 }
246
is_reserved(&self, space: IoSpace, addr: u64) -> bool247 fn is_reserved(&self, space: IoSpace, addr: u64) -> bool {
248 if let Some(Range(start, len, _)) = self.first_before(space, addr) {
249 let offset = addr - start;
250 if offset < len {
251 return true;
252 }
253 }
254 false
255 }
256
first_before(&self, io_space: IoSpace, addr: u64) -> Option<Range>257 fn first_before(&self, io_space: IoSpace, addr: u64) -> Option<Range> {
258 let space = match io_space {
259 IoSpace::Ioport => &self.ioport_regions,
260 IoSpace::Mmio => &self.mmio_regions,
261 };
262
263 match addr.checked_add(1) {
264 Some(next_addr) => space
265 .range(..Range(next_addr, 0, false))
266 .next_back()
267 .cloned(),
268 None => None,
269 }
270 }
271
matches_hint(&self, io_space: IoSpace, addr: u64, is_write: bool) -> bool272 fn matches_hint(&self, io_space: IoSpace, addr: u64, is_write: bool) -> bool {
273 if let Some(hint) = &self.hint {
274 return io_space == hint.io_space && addr == hint.addr && is_write == hint.on_write;
275 }
276 false
277 }
278
check_hint_details(&self, regs: &kvm_regs) -> (bool, bool)279 fn check_hint_details(&self, regs: &kvm_regs) -> (bool, bool) {
280 if let Some(hint) = &self.hint {
281 for entry in hint.regs.iter() {
282 if (!entry.match_rax || entry.rax == regs.rax)
283 && (!entry.match_rbx || entry.rbx == regs.rbx)
284 && (!entry.match_rcx || entry.rcx == regs.rcx)
285 && (!entry.match_rdx || entry.rdx == regs.rdx)
286 {
287 return (entry.send_sregs, entry.send_debugregs);
288 }
289 }
290 }
291 (false, false)
292 }
293 }
294
295 /// State specific to a VCPU, grouped so that each `PluginVcpu` object will share a canonical
296 /// version.
297 #[derive(Default)]
298 pub struct PerVcpuState {
299 pause_request: Option<u64>,
300 }
301
302 impl PerVcpuState {
303 /// Indicates that a VCPU should wait until the plugin process resumes the VCPU.
304 ///
305 /// This method will not cause a VCPU to pause immediately. Instead, the VCPU thread will
306 /// continue running until a interrupted, at which point it will check for a pending pause. If
307 /// there is another call to `request_pause` for this VCPU before that happens, the last pause
308 /// request's `data` will be overwritten with the most recent `data.
309 ///
310 /// To get an immediate pause after calling `request_pause`, send a signal (with a registered
311 /// handler) to the thread handling the VCPU corresponding to this state. This should interrupt
312 /// the running VCPU, which should check for a pause with `PluginVcpu::pre_run`.
request_pause(&mut self, data: u64)313 pub fn request_pause(&mut self, data: u64) {
314 self.pause_request = Some(data);
315 }
316 }
317
318 enum VcpuRunData<'a> {
319 Read(&'a mut [u8]),
320 Write(&'a [u8]),
321 }
322
323 impl<'a> VcpuRunData<'a> {
is_write(&self) -> bool324 fn is_write(&self) -> bool {
325 matches!(self, VcpuRunData::Write(_))
326 }
327
as_slice(&self) -> &[u8]328 fn as_slice(&self) -> &[u8] {
329 match self {
330 VcpuRunData::Read(s) => s,
331 VcpuRunData::Write(s) => s,
332 }
333 }
334
copy_from_slice(&mut self, data: &[u8])335 fn copy_from_slice(&mut self, data: &[u8]) {
336 if let VcpuRunData::Read(s) = self {
337 let copy_size = min(s.len(), data.len());
338 s.copy_from_slice(&data[..copy_size]);
339 }
340 }
341 }
342
343 /// State object for a VCPU's connection with the plugin process.
344 ///
345 /// This is used by a VCPU thread to allow the plugin process to handle vmexits. Each method may
346 /// block indefinitely while the plugin process is handling requests. In order to cleanly shutdown
347 /// during these blocking calls, the `connection` socket should be shutdown. This will end the
348 /// blocking calls,
349 pub struct PluginVcpu {
350 shared_vcpu_state: Arc<RwLock<SharedVcpuState>>,
351 per_vcpu_state: Arc<Mutex<PerVcpuState>>,
352 read_pipe: File,
353 write_pipe: File,
354 wait_reason: Cell<Option<VcpuResponse_Wait>>,
355 request_buffer: RefCell<Vec<u8>>,
356 response_buffer: RefCell<Vec<u8>>,
357 }
358
359 impl PluginVcpu {
360 /// Creates the plugin state and connection container for a VCPU thread.
new( shared_vcpu_state: Arc<RwLock<SharedVcpuState>>, per_vcpu_state: Arc<Mutex<PerVcpuState>>, read_pipe: File, write_pipe: File, ) -> PluginVcpu361 pub fn new(
362 shared_vcpu_state: Arc<RwLock<SharedVcpuState>>,
363 per_vcpu_state: Arc<Mutex<PerVcpuState>>,
364 read_pipe: File,
365 write_pipe: File,
366 ) -> PluginVcpu {
367 PluginVcpu {
368 shared_vcpu_state,
369 per_vcpu_state,
370 read_pipe,
371 write_pipe,
372 wait_reason: Default::default(),
373 request_buffer: Default::default(),
374 response_buffer: Default::default(),
375 }
376 }
377
378 /// Tells the plugin process to initialize this VCPU.
379 ///
380 /// This should be called for each VCPU before the first run of any of the VCPUs in the VM.
init(&self, vcpu: &Vcpu) -> SysResult<()>381 pub fn init(&self, vcpu: &Vcpu) -> SysResult<()> {
382 let mut wait_reason = VcpuResponse_Wait::new();
383 wait_reason.mut_init();
384 self.wait_reason.set(Some(wait_reason));
385 self.handle_until_resume(vcpu)?;
386 Ok(())
387 }
388
389 /// The VCPU thread should call this before rerunning a VM in order to handle pending requests
390 /// to this VCPU.
pre_run(&self, vcpu: &Vcpu) -> SysResult<()>391 pub fn pre_run(&self, vcpu: &Vcpu) -> SysResult<()> {
392 let request = {
393 let mut lock = self.per_vcpu_state.lock();
394 lock.pause_request.take()
395 };
396
397 if let Some(user_data) = request {
398 let mut wait_reason = VcpuResponse_Wait::new();
399 wait_reason.mut_user().user = user_data;
400 self.wait_reason.set(Some(wait_reason));
401 self.handle_until_resume(vcpu)?;
402 }
403 Ok(())
404 }
405
process(&self, io_space: IoSpace, addr: u64, mut data: VcpuRunData, vcpu: &Vcpu) -> bool406 fn process(&self, io_space: IoSpace, addr: u64, mut data: VcpuRunData, vcpu: &Vcpu) -> bool {
407 let vcpu_state_lock = match self.shared_vcpu_state.read() {
408 Ok(l) => l,
409 Err(e) => {
410 error!("error read locking shared cpu state: {}", e);
411 return false;
412 }
413 };
414
415 let first_before_addr = vcpu_state_lock.first_before(io_space, addr);
416
417 match first_before_addr {
418 Some(Range(start, len, async_write)) => {
419 let offset = addr - start;
420 if offset >= len {
421 return false;
422 }
423 if async_write && !data.is_write() {
424 return false;
425 }
426
427 let mut wait_reason = VcpuResponse_Wait::new();
428 let io = wait_reason.mut_io();
429 io.space = match io_space {
430 IoSpace::Ioport => AddressSpace::IOPORT,
431 IoSpace::Mmio => AddressSpace::MMIO,
432 };
433 io.address = addr;
434 io.is_write = data.is_write();
435 io.data = data.as_slice().to_vec();
436 io.no_resume = async_write;
437 if !async_write && vcpu_state_lock.matches_hint(io_space, addr, io.is_write) {
438 if let Ok(regs) = vcpu.get_regs() {
439 let (has_sregs, has_debugregs) = vcpu_state_lock.check_hint_details(®s);
440 io.regs = VcpuRegs(regs).as_slice().to_vec();
441 if has_sregs {
442 if let Ok(state) = get_vcpu_state(vcpu, VcpuRequest_StateSet::SREGS) {
443 io.sregs = state;
444 }
445 }
446 if has_debugregs {
447 if let Ok(state) = get_vcpu_state(vcpu, VcpuRequest_StateSet::DEBUGREGS)
448 {
449 io.debugregs = state;
450 }
451 }
452 }
453 }
454 // don't hold lock while blocked in `handle_until_resume`.
455 drop(vcpu_state_lock);
456
457 if async_write {
458 let mut response = VcpuResponse::new();
459 response.set_wait(wait_reason);
460
461 let mut response_buffer = self.response_buffer.borrow_mut();
462 response_buffer.clear();
463 let mut stream = CodedOutputStream::vec(&mut response_buffer);
464 match response.write_length_delimited_to(&mut stream) {
465 Ok(_) => {
466 match stream.flush() {
467 Ok(_) => {}
468 Err(e) => error!("failed to flush to vec: {}", e),
469 }
470 let mut write_pipe = &self.write_pipe;
471 match write_pipe.write(&response_buffer[..]) {
472 Ok(_) => {}
473 Err(e) => error!("failed to write to pipe: {}", e),
474 }
475 }
476 Err(e) => error!("failed to write to buffer: {}", e),
477 }
478 } else {
479 self.wait_reason.set(Some(wait_reason));
480 match self.handle_until_resume(vcpu) {
481 Ok(resume_data) => data.copy_from_slice(&resume_data),
482 Err(e) if e.errno() == EPIPE => {}
483 Err(e) => error!("failed to process vcpu requests: {}", e),
484 }
485 }
486 true
487 }
488 None => false,
489 }
490 }
491
492 /// Has the plugin process handle a IO port read.
io_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool493 pub fn io_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool {
494 self.process(IoSpace::Ioport, addr, VcpuRunData::Read(data), vcpu)
495 }
496
497 /// Has the plugin process handle a IO port write.
io_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool498 pub fn io_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool {
499 self.process(IoSpace::Ioport, addr, VcpuRunData::Write(data), vcpu)
500 }
501
502 /// Has the plugin process handle a MMIO read.
mmio_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool503 pub fn mmio_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool {
504 self.process(IoSpace::Mmio, addr, VcpuRunData::Read(data), vcpu)
505 }
506
507 /// Has the plugin process handle a MMIO write.
mmio_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool508 pub fn mmio_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool {
509 self.process(IoSpace::Mmio, addr, VcpuRunData::Write(data), vcpu)
510 }
511
512 /// Has the plugin process handle a hyper-v call.
hyperv_call(&self, input: u64, params: [u64; 2], data: &mut [u8], vcpu: &Vcpu) -> bool513 pub fn hyperv_call(&self, input: u64, params: [u64; 2], data: &mut [u8], vcpu: &Vcpu) -> bool {
514 let mut wait_reason = VcpuResponse_Wait::new();
515 let hv = wait_reason.mut_hyperv_call();
516 hv.input = input;
517 hv.params0 = params[0];
518 hv.params1 = params[1];
519
520 self.wait_reason.set(Some(wait_reason));
521 match self.handle_until_resume(vcpu) {
522 Ok(resume_data) => {
523 data.copy_from_slice(&resume_data);
524 true
525 }
526 Err(e) if e.errno() == EPIPE => false,
527 Err(e) => {
528 error!("failed to process hyperv call request: {}", e);
529 false
530 }
531 }
532 }
533
534 /// Has the plugin process handle a synic config change.
hyperv_synic( &self, msr: u32, control: u64, evt_page: u64, msg_page: u64, vcpu: &Vcpu, ) -> bool535 pub fn hyperv_synic(
536 &self,
537 msr: u32,
538 control: u64,
539 evt_page: u64,
540 msg_page: u64,
541 vcpu: &Vcpu,
542 ) -> bool {
543 let mut wait_reason = VcpuResponse_Wait::new();
544 let hv = wait_reason.mut_hyperv_synic();
545 hv.msr = msr;
546 hv.control = control;
547 hv.evt_page = evt_page;
548 hv.msg_page = msg_page;
549 self.wait_reason.set(Some(wait_reason));
550 match self.handle_until_resume(vcpu) {
551 Ok(_resume_data) => true,
552 Err(e) if e.errno() == EPIPE => false,
553 Err(e) => {
554 error!("failed to process hyperv synic request: {}", e);
555 false
556 }
557 }
558 }
559
handle_request(&self, vcpu: &Vcpu) -> SysResult<Option<Vec<u8>>>560 fn handle_request(&self, vcpu: &Vcpu) -> SysResult<Option<Vec<u8>>> {
561 let mut wait_reason = self.wait_reason.take();
562 let mut do_recv = true;
563 let mut resume_data = None;
564 let mut response = VcpuResponse::new();
565 let mut send_response = true;
566
567 // Typically a response is sent for every request received. The odd (yet common)
568 // case is when a resume request is received. This function will skip sending
569 // a resume reply, and instead we'll go run the VM and then later reply with a wait
570 // response message. This code block handles checking if a wait reason is pending (where
571 // the wait reason isn't the first-time init [first time init needs to first
572 // receive a wait request from the plugin]) to send it as a reply before doing a recv()
573 // for the next request. Note that if a wait reply is pending then this function
574 // will send the reply and do nothing else--the expectation is that handle_until_resume()
575 // is the only caller of this function, so the function will immediately get called again
576 // and this second call will no longer see a pending wait reason and do a recv() for the
577 // next message.
578 if let Some(reason) = wait_reason {
579 if reason.has_init() {
580 wait_reason = Some(reason);
581 } else {
582 response.set_wait(reason);
583 do_recv = false;
584 wait_reason = None;
585 }
586 }
587
588 if do_recv {
589 let mut request_buffer = self.request_buffer.borrow_mut();
590 request_buffer.resize(MAX_VCPU_DATAGRAM_SIZE, 0);
591
592 let mut read_pipe = &self.read_pipe;
593 let msg_size = read_pipe.read(&mut request_buffer).map_err(io_to_sys_err)?;
594
595 let mut request =
596 protobuf::parse_from_bytes::<VcpuRequest>(&request_buffer[..msg_size])
597 .map_err(proto_to_sys_err)?;
598
599 let res = if request.has_wait() {
600 match wait_reason {
601 Some(wait_reason) => {
602 response.set_wait(wait_reason);
603 Ok(())
604 }
605 None => Err(SysError::new(EPROTO)),
606 }
607 } else if wait_reason.is_some() {
608 // Any request other than getting the wait_reason while there is one pending is invalid.
609 self.wait_reason.set(wait_reason);
610 Err(SysError::new(EPROTO))
611 } else if request.has_resume() {
612 send_response = false;
613 let resume = request.get_resume();
614 if !resume.get_regs().is_empty() {
615 set_vcpu_state(vcpu, VcpuRequest_StateSet::REGS, resume.get_regs())?;
616 }
617 if !resume.get_sregs().is_empty() {
618 set_vcpu_state(vcpu, VcpuRequest_StateSet::SREGS, resume.get_sregs())?;
619 }
620 if !resume.get_debugregs().is_empty() {
621 set_vcpu_state(
622 vcpu,
623 VcpuRequest_StateSet::DEBUGREGS,
624 resume.get_debugregs(),
625 )?;
626 }
627 resume_data = Some(request.take_resume().take_data());
628 Ok(())
629 } else if request.has_get_state() {
630 let response_state = response.mut_get_state();
631 match get_vcpu_state(vcpu, request.get_get_state().set) {
632 Ok(state) => {
633 response_state.state = state;
634 Ok(())
635 }
636 Err(e) => Err(e),
637 }
638 } else if request.has_set_state() {
639 response.mut_set_state();
640 let set_state = request.get_set_state();
641 set_vcpu_state(vcpu, set_state.set, set_state.get_state())
642 } else if request.has_get_hyperv_cpuid() {
643 let cpuid_response = &mut response.mut_get_hyperv_cpuid().entries;
644 match vcpu.get_hyperv_cpuid() {
645 Ok(mut cpuid) => {
646 for entry in cpuid.mut_entries_slice() {
647 cpuid_response.push(cpuid_kvm_to_proto(entry));
648 }
649 Ok(())
650 }
651 Err(e) => Err(e),
652 }
653 } else if request.has_get_msrs() {
654 let entry_data = &mut response.mut_get_msrs().entry_data;
655 let entry_indices = &request.get_get_msrs().entry_indices;
656 let mut msr_entries = Vec::with_capacity(entry_indices.len());
657 for &index in entry_indices {
658 msr_entries.push(kvm_msr_entry {
659 index,
660 ..Default::default()
661 });
662 }
663 match vcpu.get_msrs(&mut msr_entries) {
664 Ok(()) => {
665 for msr_entry in msr_entries {
666 entry_data.push(msr_entry.data);
667 }
668 Ok(())
669 }
670 Err(e) => Err(e),
671 }
672 } else if request.has_set_msrs() {
673 const SIZE_OF_MSRS: usize = mem::size_of::<kvm_msrs>();
674 const SIZE_OF_ENTRY: usize = mem::size_of::<kvm_msr_entry>();
675 const ALIGN_OF_MSRS: usize = mem::align_of::<kvm_msrs>();
676 const ALIGN_OF_ENTRY: usize = mem::align_of::<kvm_msr_entry>();
677 const_assert!(ALIGN_OF_MSRS >= ALIGN_OF_ENTRY);
678
679 response.mut_set_msrs();
680 let request_entries = &request.get_set_msrs().entries;
681
682 let size = SIZE_OF_MSRS + request_entries.len() * SIZE_OF_ENTRY;
683 let layout =
684 Layout::from_size_align(size, ALIGN_OF_MSRS).expect("impossible layout");
685 let mut allocation = LayoutAllocation::zeroed(layout);
686
687 // Safe to obtain an exclusive reference because there are no other
688 // references to the allocation yet and all-zero is a valid bit
689 // pattern.
690 let kvm_msrs = unsafe { allocation.as_mut::<kvm_msrs>() };
691
692 unsafe {
693 // Mapping the unsized array to a slice is unsafe becase the length isn't known.
694 // Providing the length used to create the struct guarantees the entire slice is
695 // valid.
696 let kvm_msr_entries: &mut [kvm_msr_entry] =
697 kvm_msrs.entries.as_mut_slice(request_entries.len());
698 for (msr_entry, entry) in kvm_msr_entries.iter_mut().zip(request_entries) {
699 msr_entry.index = entry.index;
700 msr_entry.data = entry.data;
701 }
702 }
703 kvm_msrs.nmsrs = request_entries.len() as u32;
704 vcpu.set_msrs(&kvm_msrs)
705 } else if request.has_set_cpuid() {
706 response.mut_set_cpuid();
707 let request_entries = &request.get_set_cpuid().entries;
708 let mut cpuid = CpuId::new(request_entries.len());
709 let cpuid_entries = cpuid.mut_entries_slice();
710 for (request_entry, cpuid_entry) in request_entries.iter().zip(cpuid_entries) {
711 cpuid_entry.function = request_entry.function;
712 if request_entry.has_index {
713 cpuid_entry.index = request_entry.index;
714 cpuid_entry.flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
715 }
716 cpuid_entry.eax = request_entry.eax;
717 cpuid_entry.ebx = request_entry.ebx;
718 cpuid_entry.ecx = request_entry.ecx;
719 cpuid_entry.edx = request_entry.edx;
720 }
721 vcpu.set_cpuid2(&cpuid)
722 } else if request.has_enable_capability() {
723 response.mut_enable_capability();
724 let capability = request.get_enable_capability().capability;
725 if capability != kvm_sys::KVM_CAP_HYPERV_SYNIC
726 && capability != kvm_sys::KVM_CAP_HYPERV_SYNIC2
727 {
728 Err(SysError::new(EINVAL))
729 } else {
730 let mut cap: kvm_enable_cap = Default::default();
731 cap.cap = capability;
732 // Safe because the allowed capabilities don't take pointer arguments.
733 unsafe { vcpu.kvm_enable_cap(&cap) }
734 }
735 } else if request.has_shutdown() {
736 return Err(SysError::new(EPIPE));
737 } else {
738 Err(SysError::new(ENOTTY))
739 };
740
741 if let Err(e) = res {
742 response.errno = e.errno();
743 }
744 }
745
746 // Send the response, except if it's a resume response (in which case
747 // we'll go run the VM and afterwards send a wait response message).
748 if send_response {
749 let mut response_buffer = self.response_buffer.borrow_mut();
750 response_buffer.clear();
751 let mut stream = CodedOutputStream::vec(&mut response_buffer);
752 response
753 .write_length_delimited_to(&mut stream)
754 .map_err(proto_to_sys_err)?;
755 stream.flush().map_err(proto_to_sys_err)?;
756 let mut write_pipe = &self.write_pipe;
757 write_pipe
758 .write(&response_buffer[..])
759 .map_err(io_to_sys_err)?;
760 }
761
762 Ok(resume_data)
763 }
764
handle_until_resume(&self, vcpu: &Vcpu) -> SysResult<Vec<u8>>765 fn handle_until_resume(&self, vcpu: &Vcpu) -> SysResult<Vec<u8>> {
766 loop {
767 if let Some(resume_data) = self.handle_request(vcpu)? {
768 return Ok(resume_data);
769 }
770 }
771 }
772 }
773
774 #[cfg(test)]
775 mod tests {
776 use super::*;
777
778 #[test]
shared_vcpu_reserve()779 fn shared_vcpu_reserve() {
780 let mut shared_vcpu_state = SharedVcpuState::default();
781 shared_vcpu_state
782 .reserve_range(IoSpace::Ioport, 0x10, 0, false)
783 .unwrap_err();
784 shared_vcpu_state
785 .reserve_range(IoSpace::Ioport, 0x10, 0x10, false)
786 .unwrap();
787 shared_vcpu_state
788 .reserve_range(IoSpace::Ioport, 0x0f, 0x10, false)
789 .unwrap_err();
790 shared_vcpu_state
791 .reserve_range(IoSpace::Ioport, 0x10, 0x10, false)
792 .unwrap_err();
793 shared_vcpu_state
794 .reserve_range(IoSpace::Ioport, 0x10, 0x15, false)
795 .unwrap_err();
796 shared_vcpu_state
797 .reserve_range(IoSpace::Ioport, 0x12, 0x15, false)
798 .unwrap_err();
799 shared_vcpu_state
800 .reserve_range(IoSpace::Ioport, 0x12, 0x01, false)
801 .unwrap_err();
802 shared_vcpu_state
803 .reserve_range(IoSpace::Ioport, 0x0, 0x20, false)
804 .unwrap_err();
805 shared_vcpu_state
806 .reserve_range(IoSpace::Ioport, 0x20, 0x05, false)
807 .unwrap();
808 shared_vcpu_state
809 .reserve_range(IoSpace::Ioport, 0x25, 0x05, false)
810 .unwrap();
811 shared_vcpu_state
812 .reserve_range(IoSpace::Ioport, 0x0, 0x10, false)
813 .unwrap();
814 }
815 }
816