1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! Small system utility modules for usage by other modules.
6 
7 mod alloc;
8 #[cfg(target_os = "android")]
9 mod android;
10 #[cfg(target_os = "android")]
11 use android as target_os;
12 #[cfg(target_os = "linux")]
13 mod linux;
14 #[cfg(target_os = "linux")]
15 use linux as target_os;
16 #[macro_use]
17 pub mod handle_eintr;
18 #[macro_use]
19 pub mod ioctl;
20 #[macro_use]
21 pub mod syslog;
22 mod capabilities;
23 mod clock;
24 mod descriptor;
25 mod descriptor_reflection;
26 mod errno;
27 mod eventfd;
28 mod external_mapping;
29 mod file_flags;
30 pub mod file_traits;
31 mod fork;
32 mod mmap;
33 pub mod net;
34 mod passwd;
35 mod poll;
36 mod priority;
37 pub mod rand;
38 mod raw_fd;
39 pub mod sched;
40 pub mod scoped_path;
41 pub mod scoped_signal_handler;
42 mod seek_hole;
43 mod shm;
44 pub mod signal;
45 mod signalfd;
46 mod sock_ctrl_msg;
47 mod struct_util;
48 mod terminal;
49 mod timerfd;
50 pub mod vsock;
51 mod write_zeroes;
52 
53 pub use crate::alloc::LayoutAllocation;
54 pub use crate::capabilities::drop_capabilities;
55 pub use crate::clock::{Clock, FakeClock};
56 pub use crate::descriptor::*;
57 pub use crate::errno::{errno_result, Error, Result};
58 pub use crate::eventfd::*;
59 pub use crate::external_mapping::*;
60 pub use crate::file_flags::*;
61 pub use crate::fork::*;
62 pub use crate::ioctl::*;
63 pub use crate::mmap::*;
64 pub use crate::passwd::*;
65 pub use crate::poll::*;
66 pub use crate::priority::*;
67 pub use crate::raw_fd::*;
68 pub use crate::sched::*;
69 pub use crate::scoped_signal_handler::*;
70 pub use crate::shm::*;
71 pub use crate::signal::*;
72 pub use crate::signalfd::*;
73 pub use crate::sock_ctrl_msg::*;
74 pub use crate::struct_util::*;
75 pub use crate::terminal::*;
76 pub use crate::timerfd::*;
77 pub use descriptor_reflection::{
78     deserialize_with_descriptors, with_as_descriptor, with_raw_descriptor, FileSerdeWrapper,
79     SerializeDescriptors,
80 };
81 pub use poll_token_derive::*;
82 
83 pub use crate::external_mapping::Error as ExternalMappingError;
84 pub use crate::external_mapping::Result as ExternalMappingResult;
85 pub use crate::file_traits::{
86     AsRawFds, FileAllocate, FileGetLen, FileReadWriteAtVolatile, FileReadWriteVolatile, FileSetLen,
87     FileSync,
88 };
89 pub use crate::mmap::Error as MmapError;
90 pub use crate::seek_hole::SeekHole;
91 pub use crate::signalfd::Error as SignalFdError;
92 pub use crate::write_zeroes::{PunchHole, WriteZeroes, WriteZeroesAt};
93 
94 use std::cell::Cell;
95 use std::ffi::CStr;
96 use std::fs::{remove_file, File};
97 use std::mem;
98 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
99 use std::os::unix::net::UnixDatagram;
100 use std::ptr;
101 use std::time::Duration;
102 
103 use libc::{
104     c_int, c_long, fcntl, pipe2, syscall, sysconf, waitpid, SYS_getpid, SYS_gettid, F_GETFL,
105     F_SETFL, O_CLOEXEC, SIGKILL, WNOHANG, _SC_IOV_MAX, _SC_PAGESIZE,
106 };
107 
108 /// Re-export libc types that are part of the API.
109 pub type Pid = libc::pid_t;
110 pub type Uid = libc::uid_t;
111 pub type Gid = libc::gid_t;
112 
113 /// Used to mark types as !Sync.
114 pub type UnsyncMarker = std::marker::PhantomData<Cell<usize>>;
115 
116 /// Safe wrapper for `sysconf(_SC_PAGESIZE)`.
117 #[inline(always)]
pagesize() -> usize118 pub fn pagesize() -> usize {
119     // Trivially safe
120     unsafe { sysconf(_SC_PAGESIZE) as usize }
121 }
122 
123 /// Safe wrapper for `sysconf(_SC_IOV_MAX)`.
iov_max() -> usize124 pub fn iov_max() -> usize {
125     // Trivially safe
126     unsafe { sysconf(_SC_IOV_MAX) as usize }
127 }
128 
129 /// Uses the system's page size in bytes to round the given value up to the nearest page boundary.
130 #[inline(always)]
round_up_to_page_size(v: usize) -> usize131 pub fn round_up_to_page_size(v: usize) -> usize {
132     let page_mask = pagesize() - 1;
133     (v + page_mask) & !page_mask
134 }
135 
136 /// This bypasses `libc`'s caching `getpid(2)` wrapper which can be invalid if a raw clone was used
137 /// elsewhere.
138 #[inline(always)]
getpid() -> Pid139 pub fn getpid() -> Pid {
140     // Safe because this syscall can never fail and we give it a valid syscall number.
141     unsafe { syscall(SYS_getpid as c_long) as Pid }
142 }
143 
144 /// Safe wrapper for the gettid Linux systemcall.
gettid() -> Pid145 pub fn gettid() -> Pid {
146     // Calling the gettid() sycall is always safe.
147     unsafe { syscall(SYS_gettid as c_long) as Pid }
148 }
149 
150 /// Safe wrapper for `getsid(2)`.
getsid(pid: Option<Pid>) -> Result<Pid>151 pub fn getsid(pid: Option<Pid>) -> Result<Pid> {
152     // Calling the getsid() sycall is always safe.
153     let ret = unsafe { libc::getsid(pid.unwrap_or(0)) } as Pid;
154 
155     if ret < 0 {
156         errno_result()
157     } else {
158         Ok(ret)
159     }
160 }
161 
162 /// Wrapper for `setsid(2)`.
setsid() -> Result<Pid>163 pub fn setsid() -> Result<Pid> {
164     // Safe because the return code is checked.
165     let ret = unsafe { libc::setsid() as Pid };
166 
167     if ret < 0 {
168         errno_result()
169     } else {
170         Ok(ret)
171     }
172 }
173 
174 /// Safe wrapper for `geteuid(2)`.
175 #[inline(always)]
geteuid() -> Uid176 pub fn geteuid() -> Uid {
177     // trivially safe
178     unsafe { libc::geteuid() }
179 }
180 
181 /// Safe wrapper for `getegid(2)`.
182 #[inline(always)]
getegid() -> Gid183 pub fn getegid() -> Gid {
184     // trivially safe
185     unsafe { libc::getegid() }
186 }
187 
188 /// Safe wrapper for chown(2).
189 #[inline(always)]
chown(path: &CStr, uid: Uid, gid: Gid) -> Result<()>190 pub fn chown(path: &CStr, uid: Uid, gid: Gid) -> Result<()> {
191     // Safe since we pass in a valid string pointer and check the return value.
192     let ret = unsafe { libc::chown(path.as_ptr(), uid, gid) };
193 
194     if ret < 0 {
195         errno_result()
196     } else {
197         Ok(())
198     }
199 }
200 
201 /// The operation to perform with `flock`.
202 pub enum FlockOperation {
203     LockShared,
204     LockExclusive,
205     Unlock,
206 }
207 
208 /// Safe wrapper for flock(2) with the operation `op` and optionally `nonblocking`. The lock will be
209 /// dropped automatically when `file` is dropped.
210 #[inline(always)]
flock(file: &dyn AsRawFd, op: FlockOperation, nonblocking: bool) -> Result<()>211 pub fn flock(file: &dyn AsRawFd, op: FlockOperation, nonblocking: bool) -> Result<()> {
212     let mut operation = match op {
213         FlockOperation::LockShared => libc::LOCK_SH,
214         FlockOperation::LockExclusive => libc::LOCK_EX,
215         FlockOperation::Unlock => libc::LOCK_UN,
216     };
217 
218     if nonblocking {
219         operation |= libc::LOCK_NB;
220     }
221 
222     // Safe since we pass in a valid fd and flock operation, and check the return value.
223     let ret = unsafe { libc::flock(file.as_raw_fd(), operation) };
224 
225     if ret < 0 {
226         errno_result()
227     } else {
228         Ok(())
229     }
230 }
231 
232 /// The operation to perform with `fallocate`.
233 pub enum FallocateMode {
234     PunchHole,
235     ZeroRange,
236     Allocate,
237 }
238 
239 /// Safe wrapper for `fallocate()`.
fallocate( file: &dyn AsRawFd, mode: FallocateMode, keep_size: bool, offset: u64, len: u64, ) -> Result<()>240 pub fn fallocate(
241     file: &dyn AsRawFd,
242     mode: FallocateMode,
243     keep_size: bool,
244     offset: u64,
245     len: u64,
246 ) -> Result<()> {
247     let offset = if offset > libc::off64_t::max_value() as u64 {
248         return Err(Error::new(libc::EINVAL));
249     } else {
250         offset as libc::off64_t
251     };
252 
253     let len = if len > libc::off64_t::max_value() as u64 {
254         return Err(Error::new(libc::EINVAL));
255     } else {
256         len as libc::off64_t
257     };
258 
259     let mut mode = match mode {
260         FallocateMode::PunchHole => libc::FALLOC_FL_PUNCH_HOLE,
261         FallocateMode::ZeroRange => libc::FALLOC_FL_ZERO_RANGE,
262         FallocateMode::Allocate => 0,
263     };
264 
265     if keep_size {
266         mode |= libc::FALLOC_FL_KEEP_SIZE;
267     }
268 
269     // Safe since we pass in a valid fd and fallocate mode, validate offset and len,
270     // and check the return value.
271     let ret = unsafe { libc::fallocate64(file.as_raw_fd(), mode, offset, len) };
272     if ret < 0 {
273         errno_result()
274     } else {
275         Ok(())
276     }
277 }
278 
279 /// Reaps a child process that has terminated.
280 ///
281 /// Returns `Ok(pid)` where `pid` is the process that was reaped or `Ok(0)` if none of the children
282 /// have terminated. An `Error` is with `errno == ECHILD` if there are no children left to reap.
283 ///
284 /// # Examples
285 ///
286 /// Reaps all child processes until there are no terminated children to reap.
287 ///
288 /// ```
289 /// fn reap_children() {
290 ///     loop {
291 ///         match sys_util::reap_child() {
292 ///             Ok(0) => println!("no children ready to reap"),
293 ///             Ok(pid) => {
294 ///                 println!("reaped {}", pid);
295 ///                 continue
296 ///             },
297 ///             Err(e) if e.errno() == libc::ECHILD => println!("no children left"),
298 ///             Err(e) => println!("error reaping children: {}", e),
299 ///         }
300 ///         break
301 ///     }
302 /// }
303 /// ```
reap_child() -> Result<Pid>304 pub fn reap_child() -> Result<Pid> {
305     // Safe because we pass in no memory, prevent blocking with WNOHANG, and check for error.
306     let ret = unsafe { waitpid(-1, ptr::null_mut(), WNOHANG) };
307     if ret == -1 {
308         errno_result()
309     } else {
310         Ok(ret)
311     }
312 }
313 
314 /// Kill all processes in the current process group.
315 ///
316 /// On success, this kills all processes in the current process group, including the current
317 /// process, meaning this will not return. This is equivalent to a call to `kill(0, SIGKILL)`.
kill_process_group() -> Result<()>318 pub fn kill_process_group() -> Result<()> {
319     unsafe { kill(0, SIGKILL) }?;
320     // Kill succeeded, so this process never reaches here.
321     unreachable!();
322 }
323 
324 /// Spawns a pipe pair where the first pipe is the read end and the second pipe is the write end.
325 ///
326 /// If `close_on_exec` is true, the `O_CLOEXEC` flag will be set during pipe creation.
pipe(close_on_exec: bool) -> Result<(File, File)>327 pub fn pipe(close_on_exec: bool) -> Result<(File, File)> {
328     let flags = if close_on_exec { O_CLOEXEC } else { 0 };
329     let mut pipe_fds = [-1; 2];
330     // Safe because pipe2 will only write 2 element array of i32 to the given pointer, and we check
331     // for error.
332     let ret = unsafe { pipe2(&mut pipe_fds[0], flags) };
333     if ret == -1 {
334         errno_result()
335     } else {
336         // Safe because both fds must be valid for pipe2 to have returned sucessfully and we have
337         // exclusive ownership of them.
338         Ok(unsafe {
339             (
340                 File::from_raw_fd(pipe_fds[0]),
341                 File::from_raw_fd(pipe_fds[1]),
342             )
343         })
344     }
345 }
346 
347 /// Sets the pipe signified with fd to `size`.
348 ///
349 /// Returns the new size of the pipe or an error if the OS fails to set the pipe size.
set_pipe_size(fd: RawFd, size: usize) -> Result<usize>350 pub fn set_pipe_size(fd: RawFd, size: usize) -> Result<usize> {
351     // Safe because fcntl with the `F_SETPIPE_SZ` arg doesn't touch memory.
352     let ret = unsafe { fcntl(fd, libc::F_SETPIPE_SZ, size as c_int) };
353     if ret < 0 {
354         return errno_result();
355     }
356     Ok(ret as usize)
357 }
358 
359 /// Test-only function used to create a pipe that is full. The pipe is created, has its size set to
360 /// the minimum and then has that much data written to it. Use `new_pipe_full` to test handling of
361 /// blocking `write` calls in unit tests.
new_pipe_full() -> Result<(File, File)>362 pub fn new_pipe_full() -> Result<(File, File)> {
363     use std::io::Write;
364 
365     let (rx, mut tx) = pipe(true)?;
366     // The smallest allowed size of a pipe is the system page size on linux.
367     let page_size = set_pipe_size(tx.as_raw_fd(), round_up_to_page_size(1))?;
368 
369     // Fill the pipe with page_size zeros so the next write call will block.
370     let buf = vec![0u8; page_size];
371     tx.write_all(&buf)?;
372 
373     Ok((rx, tx))
374 }
375 
376 /// Used to attempt to clean up a named pipe after it is no longer used.
377 pub struct UnlinkUnixDatagram(pub UnixDatagram);
378 impl AsRef<UnixDatagram> for UnlinkUnixDatagram {
as_ref(&self) -> &UnixDatagram379     fn as_ref(&self) -> &UnixDatagram {
380         &self.0
381     }
382 }
383 impl Drop for UnlinkUnixDatagram {
drop(&mut self)384     fn drop(&mut self) {
385         if let Ok(addr) = self.0.local_addr() {
386             if let Some(path) = addr.as_pathname() {
387                 if let Err(e) = remove_file(path) {
388                     warn!("failed to remove control socket file: {}", e);
389                 }
390             }
391         }
392     }
393 }
394 
395 /// Verifies that |raw_fd| is actually owned by this process and duplicates it to ensure that
396 /// we have a unique handle to it.
validate_raw_fd(raw_fd: RawFd) -> Result<RawFd>397 pub fn validate_raw_fd(raw_fd: RawFd) -> Result<RawFd> {
398     // Checking that close-on-exec isn't set helps filter out FDs that were opened by
399     // crosvm as all crosvm FDs are close on exec.
400     // Safe because this doesn't modify any memory and we check the return value.
401     let flags = unsafe { libc::fcntl(raw_fd, libc::F_GETFD) };
402     if flags < 0 || (flags & libc::FD_CLOEXEC) != 0 {
403         return Err(Error::new(libc::EBADF));
404     }
405 
406     // Duplicate the fd to ensure that we don't accidentally close an fd previously
407     // opened by another subsystem.  Safe because this doesn't modify any memory and
408     // we check the return value.
409     let dup_fd = unsafe { libc::fcntl(raw_fd, libc::F_DUPFD_CLOEXEC, 0) };
410     if dup_fd < 0 {
411         return Err(Error::last());
412     }
413     Ok(dup_fd as RawFd)
414 }
415 
416 /// Utility function that returns true if the given FD is readable without blocking.
417 ///
418 /// On an error, such as an invalid or incompatible FD, this will return false, which can not be
419 /// distinguished from a non-ready to read FD.
poll_in(fd: &dyn AsRawFd) -> bool420 pub fn poll_in(fd: &dyn AsRawFd) -> bool {
421     let mut fds = libc::pollfd {
422         fd: fd.as_raw_fd(),
423         events: libc::POLLIN,
424         revents: 0,
425     };
426     // Safe because we give a valid pointer to a list (of 1) FD and check the return value.
427     let ret = unsafe { libc::poll(&mut fds, 1, 0) };
428     // An error probably indicates an invalid FD, or an FD that can't be polled. Returning false in
429     // that case is probably correct as such an FD is unlikely to be readable, although there are
430     // probably corner cases in which that is wrong.
431     if ret == -1 {
432         return false;
433     }
434     fds.revents & libc::POLLIN != 0
435 }
436 
437 /// Returns the file flags set for the given `RawFD`
438 ///
439 /// Returns an error if the OS indicates the flags can't be retrieved.
get_fd_flags(fd: RawFd) -> Result<c_int>440 fn get_fd_flags(fd: RawFd) -> Result<c_int> {
441     // Safe because no third parameter is expected and we check the return result.
442     let ret = unsafe { fcntl(fd, F_GETFL) };
443     if ret < 0 {
444         return errno_result();
445     }
446     Ok(ret)
447 }
448 
449 /// Sets the file flags set for the given `RawFD`.
450 ///
451 /// Returns an error if the OS indicates the flags can't be retrieved.
set_fd_flags(fd: RawFd, flags: c_int) -> Result<()>452 fn set_fd_flags(fd: RawFd, flags: c_int) -> Result<()> {
453     // Safe because we supply the third parameter and we check the return result.
454     // fcntlt is trusted not to modify the memory of the calling process.
455     let ret = unsafe { fcntl(fd, F_SETFL, flags) };
456     if ret < 0 {
457         return errno_result();
458     }
459     Ok(())
460 }
461 
462 /// Performs a logical OR of the given flags with the FD's flags, setting the given bits for the
463 /// FD.
464 ///
465 /// Returns an error if the OS indicates the flags can't be retrieved or set.
add_fd_flags(fd: RawFd, set_flags: c_int) -> Result<()>466 pub fn add_fd_flags(fd: RawFd, set_flags: c_int) -> Result<()> {
467     let start_flags = get_fd_flags(fd)?;
468     set_fd_flags(fd, start_flags | set_flags)
469 }
470 
471 /// Clears the given flags in the FD's flags.
472 ///
473 /// Returns an error if the OS indicates the flags can't be retrieved or set.
clear_fd_flags(fd: RawFd, clear_flags: c_int) -> Result<()>474 pub fn clear_fd_flags(fd: RawFd, clear_flags: c_int) -> Result<()> {
475     let start_flags = get_fd_flags(fd)?;
476     set_fd_flags(fd, start_flags & !clear_flags)
477 }
478 
479 /// Return a timespec filed with the specified Duration `duration`.
duration_to_timespec(duration: Duration) -> libc::timespec480 pub fn duration_to_timespec(duration: Duration) -> libc::timespec {
481     // Safe because we are zero-initializing a struct with only primitive member fields.
482     let mut ts: libc::timespec = unsafe { mem::zeroed() };
483 
484     ts.tv_sec = duration.as_secs() as libc::time_t;
485     // nsec always fits in i32 because subsec_nanos is defined to be less than one billion.
486     let nsec = duration.subsec_nanos() as i32;
487     ts.tv_nsec = libc::c_long::from(nsec);
488     ts
489 }
490 
491 /// Return the maximum Duration that can be used with libc::timespec.
max_timeout() -> Duration492 pub fn max_timeout() -> Duration {
493     Duration::new(libc::time_t::max_value() as u64, 999999999)
494 }
495 
496 #[cfg(test)]
497 mod tests {
498     use std::io::Write;
499 
500     use super::*;
501 
502     #[test]
pipe_size_and_fill()503     fn pipe_size_and_fill() {
504         let (_rx, mut tx) = new_pipe_full().expect("Failed to pipe");
505 
506         // To  check that setting the size worked, set the descriptor to non blocking and check that
507         // write returns an error.
508         add_fd_flags(tx.as_raw_fd(), libc::O_NONBLOCK).expect("Failed to set tx non blocking");
509         tx.write(&[0u8; 8])
510             .expect_err("Write after fill didn't fail");
511     }
512 }
513