1 // Copyright 2023, The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 //! This module contains the functions to start, stop and communicate with the
16 //! Service VM.
17
18 use android_system_virtualizationservice::{
19 aidl::android::system::virtualizationservice::{
20 CpuTopology::CpuTopology, DiskImage::DiskImage,
21 IVirtualizationService::IVirtualizationService, Partition::Partition,
22 PartitionType::PartitionType, VirtualMachineConfig::VirtualMachineConfig,
23 VirtualMachineRawConfig::VirtualMachineRawConfig,
24 },
25 binder::ParcelFileDescriptor,
26 };
27 use anyhow::{anyhow, ensure, Context, Result};
28 use lazy_static::lazy_static;
29 use log::{info, warn};
30 use service_vm_comm::{Request, Response, ServiceVmRequest, VmType};
31 use std::fs::{self, File, OpenOptions};
32 use std::io::{self, BufRead, BufReader, BufWriter, Read, Write};
33 use std::path::{Path, PathBuf};
34 use std::sync::{Condvar, Mutex};
35 use std::thread;
36 use std::time::Duration;
37 use vmclient::{DeathReason, VmInstance};
38 use vsock::{VsockListener, VsockStream, VMADDR_CID_HOST};
39
40 const VIRT_DATA_DIR: &str = "/data/misc/apexdata/com.android.virt";
41 const RIALTO_PATH: &str = "/apex/com.android.virt/etc/rialto.bin";
42 const INSTANCE_IMG_NAME: &str = "service_vm_instance.img";
43 const INSTANCE_ID_FILENAME: &str = "service_vm_instance_id";
44 const INSTANCE_IMG_SIZE_BYTES: i64 = 1 << 20; // 1MB
45 const MEMORY_MB: i32 = 300;
46 const WRITE_BUFFER_CAPACITY: usize = 512;
47 const READ_TIMEOUT: Duration = Duration::from_secs(10);
48 const WRITE_TIMEOUT: Duration = Duration::from_secs(10);
49 lazy_static! {
50 static ref PENDING_REQUESTS: AtomicCounter = AtomicCounter::default();
51 static ref SERVICE_VM: Mutex<Option<ServiceVm>> = Mutex::new(None);
52 static ref SERVICE_VM_SHUTDOWN: Condvar = Condvar::new();
53 }
54
55 /// Atomic counter with a condition variable that is used to wait for the counter
56 /// to become positive within a timeout.
57 #[derive(Debug, Default)]
58 struct AtomicCounter {
59 num: Mutex<usize>,
60 num_increased: Condvar,
61 }
62
63 impl AtomicCounter {
64 /// Checks if the counter becomes positive within the given timeout.
is_positive_within_timeout(&self, timeout: Duration) -> bool65 fn is_positive_within_timeout(&self, timeout: Duration) -> bool {
66 let (guard, _wait_result) = self
67 .num_increased
68 .wait_timeout_while(self.num.lock().unwrap(), timeout, |&mut x| x == 0)
69 .unwrap();
70 *guard > 0
71 }
72
increment(&self)73 fn increment(&self) {
74 let mut num = self.num.lock().unwrap();
75 *num = num.checked_add(1).unwrap();
76 self.num_increased.notify_all();
77 }
78
decrement(&self)79 fn decrement(&self) {
80 let mut num = self.num.lock().unwrap();
81 *num = num.checked_sub(1).unwrap();
82 }
83 }
84
85 /// Processes the request in the service VM.
process_request(request: Request) -> Result<Response>86 pub fn process_request(request: Request) -> Result<Response> {
87 PENDING_REQUESTS.increment();
88 let result = process_request_in_service_vm(request);
89 PENDING_REQUESTS.decrement();
90 thread::spawn(stop_service_vm_if_idle);
91 result
92 }
93
process_request_in_service_vm(request: Request) -> Result<Response>94 fn process_request_in_service_vm(request: Request) -> Result<Response> {
95 let mut service_vm = SERVICE_VM.lock().unwrap();
96 if service_vm.is_none() {
97 *service_vm = Some(ServiceVm::start()?);
98 }
99 service_vm.as_mut().unwrap().process_request(request)
100 }
101
stop_service_vm_if_idle()102 fn stop_service_vm_if_idle() {
103 if PENDING_REQUESTS.is_positive_within_timeout(Duration::from_secs(1)) {
104 info!("Service VM has pending requests, keeping it running.");
105 } else {
106 info!("Service VM is idle, shutting it down.");
107 *SERVICE_VM.lock().unwrap() = None;
108 SERVICE_VM_SHUTDOWN.notify_all();
109 }
110 }
111
112 /// Waits until the service VM shuts down.
113 /// This function is only used in tests.
wait_until_service_vm_shuts_down() -> Result<()>114 pub fn wait_until_service_vm_shuts_down() -> Result<()> {
115 const WAIT_FOR_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(5);
116
117 let (_guard, wait_result) = SERVICE_VM_SHUTDOWN
118 .wait_timeout_while(SERVICE_VM.lock().unwrap(), WAIT_FOR_SHUTDOWN_TIMEOUT, |x| x.is_some())
119 .unwrap();
120 ensure!(!wait_result.timed_out(), "Service VM didn't shut down within the timeout");
121 Ok(())
122 }
123
124 /// Service VM.
125 pub struct ServiceVm {
126 vsock_stream: VsockStream,
127 /// VmInstance will be dropped when ServiceVm goes out of scope, which will kill the VM.
128 vm: VmInstance,
129 }
130
131 impl ServiceVm {
132 /// Starts the service VM and returns its instance.
133 /// The same instance image is used for different VMs.
134 /// TODO(b/27593612): Remove instance image usage for Service VM.
start() -> Result<Self>135 pub fn start() -> Result<Self> {
136 let instance_img_path = Path::new(VIRT_DATA_DIR).join(INSTANCE_IMG_NAME);
137 let vm = protected_vm_instance(instance_img_path)?;
138
139 let vm = Self::start_vm(vm, VmType::ProtectedVm)?;
140 Ok(vm)
141 }
142
143 /// Starts the given VM instance and sets up the vsock connection with it.
144 /// Returns a `ServiceVm` instance.
145 /// This function is exposed for testing.
start_vm(vm: VmInstance, vm_type: VmType) -> Result<Self>146 pub fn start_vm(vm: VmInstance, vm_type: VmType) -> Result<Self> {
147 // Sets up the vsock server on the host.
148 let vsock_listener = VsockListener::bind_with_cid_port(VMADDR_CID_HOST, vm_type.port())?;
149
150 // Starts the service VM.
151 vm.start().context("Failed to start service VM")?;
152 info!("Service VM started");
153
154 // Accepts the connection from the service VM.
155 // TODO(b/299427101): Introduce a timeout for the accept.
156 let (vsock_stream, peer_addr) = vsock_listener.accept().context("Failed to accept")?;
157 info!("Accepted connection {:?}", vsock_stream);
158 ensure!(
159 peer_addr.cid() == u32::try_from(vm.cid()).unwrap(),
160 "The CID of the peer address {} doesn't match the service VM CID {}",
161 peer_addr,
162 vm.cid()
163 );
164 vsock_stream.set_read_timeout(Some(READ_TIMEOUT))?;
165 vsock_stream.set_write_timeout(Some(WRITE_TIMEOUT))?;
166
167 Ok(Self { vsock_stream, vm })
168 }
169
170 /// Processes the request in the service VM.
process_request(&mut self, request: Request) -> Result<Response>171 pub fn process_request(&mut self, request: Request) -> Result<Response> {
172 self.write_request(&ServiceVmRequest::Process(request))?;
173 self.read_response()
174 }
175
176 /// Sends the request to the service VM.
write_request(&mut self, request: &ServiceVmRequest) -> Result<()>177 fn write_request(&mut self, request: &ServiceVmRequest) -> Result<()> {
178 let mut buffer = BufWriter::with_capacity(WRITE_BUFFER_CAPACITY, &mut self.vsock_stream);
179 ciborium::into_writer(request, &mut buffer)?;
180 buffer.flush().context("Failed to flush the buffer")?;
181 info!("Sent request to the service VM.");
182 Ok(())
183 }
184
185 /// Reads the response from the service VM.
read_response(&mut self) -> Result<Response>186 fn read_response(&mut self) -> Result<Response> {
187 let response: Response = ciborium::from_reader(&mut self.vsock_stream)
188 .context("Failed to read the response from the service VM")?;
189 info!("Received response from the service VM.");
190 Ok(response)
191 }
192
193 /// Shuts down the service VM.
shutdown(&mut self) -> Result<DeathReason>194 fn shutdown(&mut self) -> Result<DeathReason> {
195 self.write_request(&ServiceVmRequest::Shutdown)?;
196 self.vm
197 .wait_for_death_with_timeout(Duration::from_secs(10))
198 .ok_or_else(|| anyhow!("Timed out to exit the service VM"))
199 }
200 }
201
202 impl Drop for ServiceVm {
drop(&mut self)203 fn drop(&mut self) {
204 // Wait till the service VM finishes releasing all the resources.
205 match self.shutdown() {
206 Ok(reason) => info!("Exit the service VM successfully: {reason:?}"),
207 Err(e) => warn!("Service VM shutdown request failed '{e:?}', killing it."),
208 }
209 }
210 }
211
212 /// Returns a `VmInstance` of a protected VM with the instance image from the given path.
protected_vm_instance(instance_img_path: PathBuf) -> Result<VmInstance>213 pub fn protected_vm_instance(instance_img_path: PathBuf) -> Result<VmInstance> {
214 let virtmgr = vmclient::VirtualizationService::new().context("Failed to spawn VirtMgr")?;
215 let service = virtmgr.connect().context("Failed to connect to VirtMgr")?;
216 info!("Connected to VirtMgr for service VM");
217
218 let instance_img = instance_img(service.as_ref(), instance_img_path)?;
219 let writable_partitions = vec![Partition {
220 label: "vm-instance".to_owned(),
221 image: Some(instance_img),
222 writable: true,
223 }];
224 let rialto = File::open(RIALTO_PATH).context("Failed to open Rialto kernel binary")?;
225 let instance_id_file = Path::new(VIRT_DATA_DIR).join(INSTANCE_ID_FILENAME);
226 let instance_id = get_or_allocate_instance_id(service.as_ref(), instance_id_file)?;
227 let config = VirtualMachineConfig::RawConfig(VirtualMachineRawConfig {
228 name: String::from("Service VM"),
229 bootloader: Some(ParcelFileDescriptor::new(rialto)),
230 disks: vec![DiskImage { image: None, partitions: writable_partitions, writable: true }],
231 instanceId: instance_id,
232 protectedVm: true,
233 memoryMib: MEMORY_MB,
234 cpuTopology: CpuTopology::ONE_CPU,
235 platformVersion: "~1.0".to_string(),
236 gdbPort: 0, // No gdb
237 ..Default::default()
238 });
239 let console_out = Some(android_log_fd()?);
240 let console_in = None;
241 let log = Some(android_log_fd()?);
242 let callback = None;
243 VmInstance::create(service.as_ref(), &config, console_out, console_in, log, callback)
244 .context("Failed to create service VM")
245 }
246
247 /// TODO(b/291213394): Reuse this method in other places such as vm and compos.
get_or_allocate_instance_id( service: &dyn IVirtualizationService, instance_id_file: PathBuf, ) -> Result<[u8; 64]>248 fn get_or_allocate_instance_id(
249 service: &dyn IVirtualizationService,
250 instance_id_file: PathBuf,
251 ) -> Result<[u8; 64]> {
252 let mut instance_id = [0; 64];
253 if instance_id_file.exists() {
254 let mut file = File::open(&instance_id_file)?;
255 file.read_exact(&mut instance_id)?;
256 } else {
257 info!("Allocating a new instance ID for the Service VM");
258 instance_id = service.allocateInstanceId()?;
259 fs::write(instance_id_file, instance_id)?;
260 }
261 Ok(instance_id)
262 }
263
264 /// Returns the file descriptor of the instance image at the given path.
instance_img( service: &dyn IVirtualizationService, instance_img_path: PathBuf, ) -> Result<ParcelFileDescriptor>265 fn instance_img(
266 service: &dyn IVirtualizationService,
267 instance_img_path: PathBuf,
268 ) -> Result<ParcelFileDescriptor> {
269 if instance_img_path.exists() {
270 // TODO(b/298174584): Try to recover if the service VM is triggered by rkpd.
271 return Ok(OpenOptions::new()
272 .read(true)
273 .write(true)
274 .open(instance_img_path)
275 .map(ParcelFileDescriptor::new)?);
276 }
277 let instance_img = OpenOptions::new()
278 .create(true)
279 .truncate(true)
280 .read(true)
281 .write(true)
282 .open(instance_img_path)
283 .map(ParcelFileDescriptor::new)?;
284 service.initializeWritablePartition(
285 &instance_img,
286 INSTANCE_IMG_SIZE_BYTES,
287 PartitionType::ANDROID_VM_INSTANCE,
288 )?;
289 Ok(instance_img)
290 }
291
292 /// This function is only exposed for testing.
android_log_fd() -> io::Result<File>293 pub fn android_log_fd() -> io::Result<File> {
294 let (reader_fd, writer_fd) = nix::unistd::pipe()?;
295
296 let reader = File::from(reader_fd);
297 let writer = File::from(writer_fd);
298
299 thread::spawn(|| {
300 for line in BufReader::new(reader).lines() {
301 match line {
302 Ok(l) => info!("{}", l),
303 Err(e) => {
304 warn!("Failed to read line: {e:?}");
305 break;
306 }
307 }
308 }
309 });
310 Ok(writer)
311 }
312