// Copyright 2022, The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! High-level FDT functions.
use crate::bootargs::BootArgsIterator;
use crate::device_assignment::{self, DeviceAssignmentInfo, VmDtbo};
use crate::helpers::GUEST_PAGE_SIZE;
use crate::Box;
use crate::RebootReason;
use alloc::collections::BTreeMap;
use alloc::ffi::CString;
use alloc::format;
use alloc::vec::Vec;
use core::cmp::max;
use core::cmp::min;
use core::ffi::CStr;
use core::fmt;
use core::mem::size_of;
use core::ops::Range;
use cstr::cstr;
use fdtpci::PciMemoryFlags;
use fdtpci::PciRangeType;
use libfdt::AddressRange;
use libfdt::CellIterator;
use libfdt::Fdt;
use libfdt::FdtError;
use libfdt::FdtNode;
use libfdt::FdtNodeMut;
use libfdt::Phandle;
use log::debug;
use log::error;
use log::info;
use log::warn;
use static_assertions::const_assert;
use tinyvec::ArrayVec;
use vmbase::fdt::SwiotlbInfo;
use vmbase::hyp;
use vmbase::layout::{crosvm::MEM_START, MAX_VIRT_ADDR};
use vmbase::memory::SIZE_4KB;
use vmbase::util::flatten;
use vmbase::util::RangeExt as _;
use zerocopy::AsBytes as _;
/// An enumeration of errors that can occur during the FDT validation.
#[derive(Clone, Debug)]
pub enum FdtValidationError {
/// Invalid CPU count.
InvalidCpuCount(usize),
/// Invalid VCpufreq Range.
InvalidVcpufreq(u64, u64),
/// Forbidden /avf/untrusted property.
ForbiddenUntrustedProp(&'static CStr),
}
impl fmt::Display for FdtValidationError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::InvalidCpuCount(num_cpus) => write!(f, "Invalid CPU count: {num_cpus}"),
Self::InvalidVcpufreq(addr, size) => {
write!(f, "Invalid vcpufreq region: ({addr:#x}, {size:#x})")
}
Self::ForbiddenUntrustedProp(name) => {
write!(f, "Forbidden /avf/untrusted property '{name:?}'")
}
}
}
}
/// Extract from /config the address range containing the pre-loaded kernel. Absence of /config is
/// not an error.
fn read_kernel_range_from(fdt: &Fdt) -> libfdt::Result>> {
let addr = cstr!("kernel-address");
let size = cstr!("kernel-size");
if let Some(config) = fdt.node(cstr!("/config"))? {
if let (Some(addr), Some(size)) = (config.getprop_u32(addr)?, config.getprop_u32(size)?) {
let addr = addr as usize;
let size = size as usize;
return Ok(Some(addr..(addr + size)));
}
}
Ok(None)
}
/// Extract from /chosen the address range containing the pre-loaded ramdisk. Absence is not an
/// error as there can be initrd-less VM.
fn read_initrd_range_from(fdt: &Fdt) -> libfdt::Result >> {
let start = cstr!("linux,initrd-start");
let end = cstr!("linux,initrd-end");
if let Some(chosen) = fdt.chosen()? {
if let (Some(start), Some(end)) = (chosen.getprop_u32(start)?, chosen.getprop_u32(end)?) {
return Ok(Some((start as usize)..(end as usize)));
}
}
Ok(None)
}
fn patch_initrd_range(fdt: &mut Fdt, initrd_range: &Range) -> libfdt::Result<()> {
let start = u32::try_from(initrd_range.start).unwrap();
let end = u32::try_from(initrd_range.end).unwrap();
let mut node = fdt.chosen_mut()?.ok_or(FdtError::NotFound)?;
node.setprop(cstr!("linux,initrd-start"), &start.to_be_bytes())?;
node.setprop(cstr!("linux,initrd-end"), &end.to_be_bytes())?;
Ok(())
}
fn read_bootargs_from(fdt: &Fdt) -> libfdt::Result> {
if let Some(chosen) = fdt.chosen()? {
if let Some(bootargs) = chosen.getprop_str(cstr!("bootargs"))? {
// We need to copy the string to heap because the original fdt will be invalidated
// by the templated DT
let copy = CString::new(bootargs.to_bytes()).map_err(|_| FdtError::BadValue)?;
return Ok(Some(copy));
}
}
Ok(None)
}
fn patch_bootargs(fdt: &mut Fdt, bootargs: &CStr) -> libfdt::Result<()> {
let mut node = fdt.chosen_mut()?.ok_or(FdtError::NotFound)?;
// This function is called before the verification is done. So, we just copy the bootargs to
// the new FDT unmodified. This will be filtered again in the modify_for_next_stage function
// if the VM is not debuggable.
node.setprop(cstr!("bootargs"), bootargs.to_bytes_with_nul())
}
/// Reads and validates the memory range in the DT.
///
/// Only one memory range is expected with the crosvm setup for now.
fn read_and_validate_memory_range(fdt: &Fdt) -> Result, RebootReason> {
let mut memory = fdt.memory().map_err(|e| {
error!("Failed to read memory range from DT: {e}");
RebootReason::InvalidFdt
})?;
let range = memory.next().ok_or_else(|| {
error!("The /memory node in the DT contains no range.");
RebootReason::InvalidFdt
})?;
if memory.next().is_some() {
warn!(
"The /memory node in the DT contains more than one memory range, \
while only one is expected."
);
}
let base = range.start;
if base != MEM_START {
error!("Memory base address {:#x} is not {:#x}", base, MEM_START);
return Err(RebootReason::InvalidFdt);
}
let size = range.len();
if size % GUEST_PAGE_SIZE != 0 {
error!("Memory size {:#x} is not a multiple of page size {:#x}", size, GUEST_PAGE_SIZE);
return Err(RebootReason::InvalidFdt);
}
if size == 0 {
error!("Memory size is 0");
return Err(RebootReason::InvalidFdt);
}
Ok(range)
}
fn patch_memory_range(fdt: &mut Fdt, memory_range: &Range) -> libfdt::Result<()> {
let addr = u64::try_from(MEM_START).unwrap();
let size = u64::try_from(memory_range.len()).unwrap();
fdt.node_mut(cstr!("/memory"))?
.ok_or(FdtError::NotFound)?
.setprop_inplace(cstr!("reg"), [addr.to_be(), size.to_be()].as_bytes())
}
#[derive(Debug, Default)]
struct CpuInfo {
opptable_info: Option>,
cpu_capacity: Option,
}
impl CpuInfo {
const MAX_OPPTABLES: usize = 20;
}
fn read_opp_info_from(
opp_node: FdtNode,
) -> libfdt::Result> {
let mut table = ArrayVec::new();
let mut opp_nodes = opp_node.subnodes()?;
for subnode in opp_nodes.by_ref().take(table.capacity()) {
let prop = subnode.getprop_u64(cstr!("opp-hz"))?.ok_or(FdtError::NotFound)?;
table.push(prop);
}
if opp_nodes.next().is_some() {
warn!("OPP table has more than {} entries: discarding extra nodes.", table.capacity());
}
Ok(table)
}
#[derive(Debug, Default)]
struct ClusterTopology {
// TODO: Support multi-level clusters & threads.
cores: [Option; ClusterTopology::MAX_CORES_PER_CLUSTER],
}
impl ClusterTopology {
const MAX_CORES_PER_CLUSTER: usize = 10;
}
#[derive(Debug, Default)]
struct CpuTopology {
// TODO: Support sockets.
clusters: [Option; CpuTopology::MAX_CLUSTERS],
}
impl CpuTopology {
const MAX_CLUSTERS: usize = 3;
}
fn read_cpu_map_from(fdt: &Fdt) -> libfdt::Result>> {
let Some(cpu_map) = fdt.node(cstr!("/cpus/cpu-map"))? else {
return Ok(None);
};
let mut topology = BTreeMap::new();
for n in 0..CpuTopology::MAX_CLUSTERS {
let name = CString::new(format!("cluster{n}")).unwrap();
let Some(cluster) = cpu_map.subnode(&name)? else {
break;
};
for m in 0..ClusterTopology::MAX_CORES_PER_CLUSTER {
let name = CString::new(format!("core{m}")).unwrap();
let Some(core) = cluster.subnode(&name)? else {
break;
};
let cpu = core.getprop_u32(cstr!("cpu"))?.ok_or(FdtError::NotFound)?;
let prev = topology.insert(cpu.try_into()?, (n, m));
if prev.is_some() {
return Err(FdtError::BadValue);
}
}
}
Ok(Some(topology))
}
fn read_cpu_info_from(
fdt: &Fdt,
) -> libfdt::Result<(ArrayVec<[CpuInfo; DeviceTreeInfo::MAX_CPUS]>, Option)> {
let mut cpus = ArrayVec::new();
let cpu_map = read_cpu_map_from(fdt)?;
let mut topology: CpuTopology = Default::default();
let mut cpu_nodes = fdt.compatible_nodes(cstr!("arm,armv8"))?;
for (idx, cpu) in cpu_nodes.by_ref().take(cpus.capacity()).enumerate() {
let cpu_capacity = cpu.getprop_u32(cstr!("capacity-dmips-mhz"))?;
let opp_phandle = cpu.getprop_u32(cstr!("operating-points-v2"))?;
let opptable_info = if let Some(phandle) = opp_phandle {
let phandle = phandle.try_into()?;
let node = fdt.node_with_phandle(phandle)?.ok_or(FdtError::NotFound)?;
Some(read_opp_info_from(node)?)
} else {
None
};
let info = CpuInfo { opptable_info, cpu_capacity };
cpus.push(info);
if let Some(ref cpu_map) = cpu_map {
let phandle = cpu.get_phandle()?.ok_or(FdtError::NotFound)?;
let (cluster, core_idx) = cpu_map.get(&phandle).ok_or(FdtError::BadValue)?;
let cluster = topology.clusters[*cluster].get_or_insert(Default::default());
if cluster.cores[*core_idx].is_some() {
return Err(FdtError::BadValue);
}
cluster.cores[*core_idx] = Some(idx);
}
}
if cpu_nodes.next().is_some() {
warn!("DT has more than {} CPU nodes: discarding extra nodes.", cpus.capacity());
}
Ok((cpus, cpu_map.map(|_| topology)))
}
fn validate_cpu_info(cpus: &[CpuInfo]) -> Result<(), FdtValidationError> {
if cpus.is_empty() {
return Err(FdtValidationError::InvalidCpuCount(0));
}
Ok(())
}
fn read_vcpufreq_info(fdt: &Fdt) -> libfdt::Result> {
let mut nodes = fdt.compatible_nodes(cstr!("virtual,android-v-only-cpufreq"))?;
let Some(node) = nodes.next() else {
return Ok(None);
};
if nodes.next().is_some() {
warn!("DT has more than 1 cpufreq node: discarding extra nodes.");
}
let mut regs = node.reg()?.ok_or(FdtError::NotFound)?;
let reg = regs.next().ok_or(FdtError::NotFound)?;
let size = reg.size.ok_or(FdtError::NotFound)?;
Ok(Some(VcpufreqInfo { addr: reg.addr, size }))
}
fn validate_vcpufreq_info(
vcpufreq_info: &VcpufreqInfo,
cpus: &[CpuInfo],
) -> Result<(), FdtValidationError> {
const VCPUFREQ_BASE_ADDR: u64 = 0x1040000;
const VCPUFREQ_SIZE_PER_CPU: u64 = 0x8;
let base = vcpufreq_info.addr;
let size = vcpufreq_info.size;
let expected_size = VCPUFREQ_SIZE_PER_CPU * cpus.len() as u64;
if (base, size) != (VCPUFREQ_BASE_ADDR, expected_size) {
return Err(FdtValidationError::InvalidVcpufreq(base, size));
}
Ok(())
}
fn patch_opptable(
node: FdtNodeMut,
opptable: Option>,
) -> libfdt::Result<()> {
let oppcompat = cstr!("operating-points-v2");
let next = node.next_compatible(oppcompat)?.ok_or(FdtError::NoSpace)?;
let Some(opptable) = opptable else {
return next.nop();
};
let mut next_subnode = next.first_subnode()?;
for entry in opptable {
let mut subnode = next_subnode.ok_or(FdtError::NoSpace)?;
subnode.setprop_inplace(cstr!("opp-hz"), &entry.to_be_bytes())?;
next_subnode = subnode.next_subnode()?;
}
while let Some(current) = next_subnode {
next_subnode = current.delete_and_next_subnode()?;
}
Ok(())
}
// TODO(ptosi): Rework FdtNodeMut and replace this function.
fn get_nth_compatible<'a>(
fdt: &'a mut Fdt,
n: usize,
compat: &CStr,
) -> libfdt::Result>> {
let mut node = fdt.root_mut().next_compatible(compat)?;
for _ in 0..n {
node = node.ok_or(FdtError::NoSpace)?.next_compatible(compat)?;
}
Ok(node)
}
fn patch_cpus(
fdt: &mut Fdt,
cpus: &[CpuInfo],
topology: &Option,
) -> libfdt::Result<()> {
const COMPAT: &CStr = cstr!("arm,armv8");
let mut cpu_phandles = Vec::new();
for (idx, cpu) in cpus.iter().enumerate() {
let mut cur = get_nth_compatible(fdt, idx, COMPAT)?.ok_or(FdtError::NoSpace)?;
let phandle = cur.as_node().get_phandle()?.unwrap();
cpu_phandles.push(phandle);
if let Some(cpu_capacity) = cpu.cpu_capacity {
cur.setprop_inplace(cstr!("capacity-dmips-mhz"), &cpu_capacity.to_be_bytes())?;
}
patch_opptable(cur, cpu.opptable_info)?;
}
let mut next = get_nth_compatible(fdt, cpus.len(), COMPAT)?;
while let Some(current) = next {
next = current.delete_and_next_compatible(COMPAT)?;
}
if let Some(topology) = topology {
for (n, cluster) in topology.clusters.iter().enumerate() {
let path = CString::new(format!("/cpus/cpu-map/cluster{n}")).unwrap();
let cluster_node = fdt.node_mut(&path)?.unwrap();
if let Some(cluster) = cluster {
let mut iter = cluster_node.first_subnode()?;
for core in cluster.cores {
let mut core_node = iter.unwrap();
iter = if let Some(core_idx) = core {
let phandle = *cpu_phandles.get(core_idx).unwrap();
let value = u32::from(phandle).to_be_bytes();
core_node.setprop_inplace(cstr!("cpu"), &value)?;
core_node.next_subnode()?
} else {
core_node.delete_and_next_subnode()?
};
}
assert!(iter.is_none());
} else {
cluster_node.nop()?;
}
}
} else {
fdt.node_mut(cstr!("/cpus/cpu-map"))?.unwrap().nop()?;
}
Ok(())
}
/// Reads the /avf/untrusted DT node, which the host can use to pass properties (no subnodes) to
/// the guest that don't require being validated by pvmfw.
fn parse_untrusted_props(fdt: &Fdt) -> libfdt::Result>> {
let mut props = BTreeMap::new();
if let Some(node) = fdt.node(cstr!("/avf/untrusted"))? {
for property in node.properties()? {
let name = property.name()?;
let value = property.value()?;
props.insert(CString::from(name), value.to_vec());
}
if node.subnodes()?.next().is_some() {
warn!("Discarding unexpected /avf/untrusted subnodes.");
}
}
Ok(props)
}
/// Read candidate properties' names from DT which could be overlaid
fn parse_vm_ref_dt(fdt: &Fdt) -> libfdt::Result>> {
let mut property_map = BTreeMap::new();
if let Some(avf_node) = fdt.node(cstr!("/avf"))? {
for property in avf_node.properties()? {
let name = property.name()?;
let value = property.value()?;
property_map.insert(
CString::new(name.to_bytes()).map_err(|_| FdtError::BadValue)?,
value.to_vec(),
);
}
}
Ok(property_map)
}
fn validate_untrusted_props(props: &BTreeMap>) -> Result<(), FdtValidationError> {
const FORBIDDEN_PROPS: &[&CStr] =
&[cstr!("compatible"), cstr!("linux,phandle"), cstr!("phandle")];
for name in FORBIDDEN_PROPS {
if props.contains_key(*name) {
return Err(FdtValidationError::ForbiddenUntrustedProp(name));
}
}
Ok(())
}
/// Overlay VM reference DT into VM DT based on the props_info. Property is overlaid in vm_dt only
/// when it exists both in vm_ref_dt and props_info. If the values mismatch, it returns error.
fn validate_vm_ref_dt(
vm_dt: &mut Fdt,
vm_ref_dt: &Fdt,
props_info: &BTreeMap>,
) -> libfdt::Result<()> {
let root_vm_dt = vm_dt.root_mut();
let mut avf_vm_dt = root_vm_dt.add_subnode(cstr!("avf"))?;
// TODO(b/318431677): Validate nodes beyond /avf.
let avf_node = vm_ref_dt.node(cstr!("/avf"))?.ok_or(FdtError::NotFound)?;
for (name, value) in props_info.iter() {
if let Some(ref_value) = avf_node.getprop(name)? {
if value != ref_value {
error!(
"Property mismatches while applying overlay VM reference DT. \
Name:{:?}, Value from host as hex:{:x?}, Value from VM reference DT as hex:{:x?}",
name, value, ref_value
);
return Err(FdtError::BadValue);
}
avf_vm_dt.setprop(name, ref_value)?;
}
}
Ok(())
}
#[derive(Debug)]
struct PciInfo {
ranges: [PciAddrRange; 2],
irq_masks: ArrayVec<[PciIrqMask; PciInfo::MAX_IRQS]>,
irq_maps: ArrayVec<[PciIrqMap; PciInfo::MAX_IRQS]>,
}
impl PciInfo {
const IRQ_MASK_CELLS: usize = 4;
const IRQ_MAP_CELLS: usize = 10;
const MAX_IRQS: usize = 16;
}
type PciAddrRange = AddressRange<(u32, u64), u64, u64>;
type PciIrqMask = [u32; PciInfo::IRQ_MASK_CELLS];
type PciIrqMap = [u32; PciInfo::IRQ_MAP_CELLS];
/// Iterator that takes N cells as a chunk
struct CellChunkIterator<'a, const N: usize> {
cells: CellIterator<'a>,
}
impl<'a, const N: usize> CellChunkIterator<'a, N> {
fn new(cells: CellIterator<'a>) -> Self {
Self { cells }
}
}
impl<'a, const N: usize> Iterator for CellChunkIterator<'a, N> {
type Item = [u32; N];
fn next(&mut self) -> Option {
let mut ret: Self::Item = [0; N];
for i in ret.iter_mut() {
*i = self.cells.next()?;
}
Some(ret)
}
}
/// Read pci host controller ranges, irq maps, and irq map masks from DT
fn read_pci_info_from(fdt: &Fdt) -> libfdt::Result {
let node =
fdt.compatible_nodes(cstr!("pci-host-cam-generic"))?.next().ok_or(FdtError::NotFound)?;
let mut ranges = node.ranges::<(u32, u64), u64, u64>()?.ok_or(FdtError::NotFound)?;
let range0 = ranges.next().ok_or(FdtError::NotFound)?;
let range1 = ranges.next().ok_or(FdtError::NotFound)?;
let irq_masks = node.getprop_cells(cstr!("interrupt-map-mask"))?.ok_or(FdtError::NotFound)?;
let mut chunks = CellChunkIterator::<{ PciInfo::IRQ_MASK_CELLS }>::new(irq_masks);
let irq_masks = (&mut chunks).take(PciInfo::MAX_IRQS).collect();
if chunks.next().is_some() {
warn!("Input DT has more than {} PCI entries!", PciInfo::MAX_IRQS);
return Err(FdtError::NoSpace);
}
let irq_maps = node.getprop_cells(cstr!("interrupt-map"))?.ok_or(FdtError::NotFound)?;
let mut chunks = CellChunkIterator::<{ PciInfo::IRQ_MAP_CELLS }>::new(irq_maps);
let irq_maps = (&mut chunks).take(PciInfo::MAX_IRQS).collect();
if chunks.next().is_some() {
warn!("Input DT has more than {} PCI entries!", PciInfo::MAX_IRQS);
return Err(FdtError::NoSpace);
}
Ok(PciInfo { ranges: [range0, range1], irq_masks, irq_maps })
}
fn validate_pci_info(pci_info: &PciInfo, memory_range: &Range) -> Result<(), RebootReason> {
for range in pci_info.ranges.iter() {
validate_pci_addr_range(range, memory_range)?;
}
for irq_mask in pci_info.irq_masks.iter() {
validate_pci_irq_mask(irq_mask)?;
}
for (idx, irq_map) in pci_info.irq_maps.iter().enumerate() {
validate_pci_irq_map(irq_map, idx)?;
}
Ok(())
}
fn validate_pci_addr_range(
range: &PciAddrRange,
memory_range: &Range,
) -> Result<(), RebootReason> {
let mem_flags = PciMemoryFlags(range.addr.0);
let range_type = mem_flags.range_type();
let prefetchable = mem_flags.prefetchable();
let bus_addr = range.addr.1;
let cpu_addr = range.parent_addr;
let size = range.size;
if range_type != PciRangeType::Memory64 {
error!("Invalid range type {:?} for bus address {:#x} in PCI node", range_type, bus_addr);
return Err(RebootReason::InvalidFdt);
}
if prefetchable {
error!("PCI bus address {:#x} in PCI node is prefetchable", bus_addr);
return Err(RebootReason::InvalidFdt);
}
// Enforce ID bus-to-cpu mappings, as used by crosvm.
if bus_addr != cpu_addr {
error!("PCI bus address: {:#x} is different from CPU address: {:#x}", bus_addr, cpu_addr);
return Err(RebootReason::InvalidFdt);
}
let Some(bus_end) = bus_addr.checked_add(size) else {
error!("PCI address range size {:#x} overflows", size);
return Err(RebootReason::InvalidFdt);
};
if bus_end > MAX_VIRT_ADDR.try_into().unwrap() {
error!("PCI address end {:#x} is outside of translatable range", bus_end);
return Err(RebootReason::InvalidFdt);
}
let memory_start = memory_range.start.try_into().unwrap();
let memory_end = memory_range.end.try_into().unwrap();
if max(bus_addr, memory_start) < min(bus_end, memory_end) {
error!(
"PCI address range {:#x}-{:#x} overlaps with main memory range {:#x}-{:#x}",
bus_addr, bus_end, memory_start, memory_end
);
return Err(RebootReason::InvalidFdt);
}
Ok(())
}
fn validate_pci_irq_mask(irq_mask: &PciIrqMask) -> Result<(), RebootReason> {
const IRQ_MASK_ADDR_HI: u32 = 0xf800;
const IRQ_MASK_ADDR_ME: u32 = 0x0;
const IRQ_MASK_ADDR_LO: u32 = 0x0;
const IRQ_MASK_ANY_IRQ: u32 = 0x7;
const EXPECTED: PciIrqMask =
[IRQ_MASK_ADDR_HI, IRQ_MASK_ADDR_ME, IRQ_MASK_ADDR_LO, IRQ_MASK_ANY_IRQ];
if *irq_mask != EXPECTED {
error!("Invalid PCI irq mask {:#?}", irq_mask);
return Err(RebootReason::InvalidFdt);
}
Ok(())
}
fn validate_pci_irq_map(irq_map: &PciIrqMap, idx: usize) -> Result<(), RebootReason> {
const PCI_DEVICE_IDX: usize = 11;
const PCI_IRQ_ADDR_ME: u32 = 0;
const PCI_IRQ_ADDR_LO: u32 = 0;
const PCI_IRQ_INTC: u32 = 1;
const AARCH64_IRQ_BASE: u32 = 4; // from external/crosvm/aarch64/src/lib.rs
const GIC_SPI: u32 = 0;
const IRQ_TYPE_LEVEL_HIGH: u32 = 4;
let pci_addr = (irq_map[0], irq_map[1], irq_map[2]);
let pci_irq_number = irq_map[3];
let _controller_phandle = irq_map[4]; // skipped.
let gic_addr = (irq_map[5], irq_map[6]); // address-cells is <2> for GIC
// interrupt-cells is <3> for GIC
let gic_peripheral_interrupt_type = irq_map[7];
let gic_irq_number = irq_map[8];
let gic_irq_type = irq_map[9];
let phys_hi: u32 = (0x1 << PCI_DEVICE_IDX) * (idx + 1) as u32;
let expected_pci_addr = (phys_hi, PCI_IRQ_ADDR_ME, PCI_IRQ_ADDR_LO);
if pci_addr != expected_pci_addr {
error!("PCI device address {:#x} {:#x} {:#x} in interrupt-map is different from expected address \
{:#x} {:#x} {:#x}",
pci_addr.0, pci_addr.1, pci_addr.2, expected_pci_addr.0, expected_pci_addr.1, expected_pci_addr.2);
return Err(RebootReason::InvalidFdt);
}
if pci_irq_number != PCI_IRQ_INTC {
error!(
"PCI INT# {:#x} in interrupt-map is different from expected value {:#x}",
pci_irq_number, PCI_IRQ_INTC
);
return Err(RebootReason::InvalidFdt);
}
if gic_addr != (0, 0) {
error!(
"GIC address {:#x} {:#x} in interrupt-map is different from expected address \
{:#x} {:#x}",
gic_addr.0, gic_addr.1, 0, 0
);
return Err(RebootReason::InvalidFdt);
}
if gic_peripheral_interrupt_type != GIC_SPI {
error!("GIC peripheral interrupt type {:#x} in interrupt-map is different from expected value \
{:#x}", gic_peripheral_interrupt_type, GIC_SPI);
return Err(RebootReason::InvalidFdt);
}
let irq_nr: u32 = AARCH64_IRQ_BASE + (idx as u32);
if gic_irq_number != irq_nr {
error!(
"GIC irq number {:#x} in interrupt-map is unexpected. Expected {:#x}",
gic_irq_number, irq_nr
);
return Err(RebootReason::InvalidFdt);
}
if gic_irq_type != IRQ_TYPE_LEVEL_HIGH {
error!(
"IRQ type in {:#x} is invalid. Must be LEVEL_HIGH {:#x}",
gic_irq_type, IRQ_TYPE_LEVEL_HIGH
);
return Err(RebootReason::InvalidFdt);
}
Ok(())
}
fn patch_pci_info(fdt: &mut Fdt, pci_info: &PciInfo) -> libfdt::Result<()> {
let mut node =
fdt.root_mut().next_compatible(cstr!("pci-host-cam-generic"))?.ok_or(FdtError::NotFound)?;
let irq_masks_size = pci_info.irq_masks.len() * size_of::();
node.trimprop(cstr!("interrupt-map-mask"), irq_masks_size)?;
let irq_maps_size = pci_info.irq_maps.len() * size_of::();
node.trimprop(cstr!("interrupt-map"), irq_maps_size)?;
node.setprop_inplace(
cstr!("ranges"),
flatten(&[pci_info.ranges[0].to_cells(), pci_info.ranges[1].to_cells()]),
)
}
#[derive(Default, Debug)]
struct SerialInfo {
addrs: ArrayVec<[u64; Self::MAX_SERIALS]>,
}
impl SerialInfo {
const MAX_SERIALS: usize = 4;
}
fn read_serial_info_from(fdt: &Fdt) -> libfdt::Result {
let mut addrs = ArrayVec::new();
let mut serial_nodes = fdt.compatible_nodes(cstr!("ns16550a"))?;
for node in serial_nodes.by_ref().take(addrs.capacity()) {
let reg = node.first_reg()?;
addrs.push(reg.addr);
}
if serial_nodes.next().is_some() {
warn!("DT has more than {} UART nodes: discarding extra nodes.", addrs.capacity());
}
Ok(SerialInfo { addrs })
}
/// Patch the DT by deleting the ns16550a compatible nodes whose address are unknown
fn patch_serial_info(fdt: &mut Fdt, serial_info: &SerialInfo) -> libfdt::Result<()> {
let name = cstr!("ns16550a");
let mut next = fdt.root_mut().next_compatible(name);
while let Some(current) = next? {
let reg =
current.as_node().reg()?.ok_or(FdtError::NotFound)?.next().ok_or(FdtError::NotFound)?;
next = if !serial_info.addrs.contains(®.addr) {
current.delete_and_next_compatible(name)
} else {
current.next_compatible(name)
}
}
Ok(())
}
fn validate_swiotlb_info(
swiotlb_info: &SwiotlbInfo,
memory: &Range,
) -> Result<(), RebootReason> {
let size = swiotlb_info.size;
let align = swiotlb_info.align;
if size == 0 || (size % GUEST_PAGE_SIZE) != 0 {
error!("Invalid swiotlb size {:#x}", size);
return Err(RebootReason::InvalidFdt);
}
if let Some(align) = align.filter(|&a| a % GUEST_PAGE_SIZE != 0) {
error!("Invalid swiotlb alignment {:#x}", align);
return Err(RebootReason::InvalidFdt);
}
if let Some(addr) = swiotlb_info.addr {
if addr.checked_add(size).is_none() {
error!("Invalid swiotlb range: addr:{addr:#x} size:{size:#x}");
return Err(RebootReason::InvalidFdt);
}
}
if let Some(range) = swiotlb_info.fixed_range() {
if !range.is_within(memory) {
error!("swiotlb range {range:#x?} not part of memory range {memory:#x?}");
return Err(RebootReason::InvalidFdt);
}
}
Ok(())
}
fn patch_swiotlb_info(fdt: &mut Fdt, swiotlb_info: &SwiotlbInfo) -> libfdt::Result<()> {
let mut node =
fdt.root_mut().next_compatible(cstr!("restricted-dma-pool"))?.ok_or(FdtError::NotFound)?;
if let Some(range) = swiotlb_info.fixed_range() {
node.setprop_addrrange_inplace(
cstr!("reg"),
range.start.try_into().unwrap(),
range.len().try_into().unwrap(),
)?;
node.nop_property(cstr!("size"))?;
node.nop_property(cstr!("alignment"))?;
} else {
node.nop_property(cstr!("reg"))?;
node.setprop_inplace(cstr!("size"), &swiotlb_info.size.to_be_bytes())?;
node.setprop_inplace(cstr!("alignment"), &swiotlb_info.align.unwrap().to_be_bytes())?;
}
Ok(())
}
fn patch_gic(fdt: &mut Fdt, num_cpus: usize) -> libfdt::Result<()> {
let node = fdt.compatible_nodes(cstr!("arm,gic-v3"))?.next().ok_or(FdtError::NotFound)?;
let mut ranges = node.reg()?.ok_or(FdtError::NotFound)?;
let range0 = ranges.next().ok_or(FdtError::NotFound)?;
let mut range1 = ranges.next().ok_or(FdtError::NotFound)?;
let addr = range0.addr;
// `read_cpu_info_from()` guarantees that we have at most MAX_CPUS.
const_assert!(DeviceTreeInfo::gic_patched_size(DeviceTreeInfo::MAX_CPUS).is_some());
let size = u64::try_from(DeviceTreeInfo::gic_patched_size(num_cpus).unwrap()).unwrap();
// range1 is just below range0
range1.addr = addr - size;
range1.size = Some(size);
let (addr0, size0) = range0.to_cells();
let (addr1, size1) = range1.to_cells();
let value = [addr0, size0.unwrap(), addr1, size1.unwrap()];
let mut node =
fdt.root_mut().next_compatible(cstr!("arm,gic-v3"))?.ok_or(FdtError::NotFound)?;
node.setprop_inplace(cstr!("reg"), flatten(&value))
}
fn patch_timer(fdt: &mut Fdt, num_cpus: usize) -> libfdt::Result<()> {
const NUM_INTERRUPTS: usize = 4;
const CELLS_PER_INTERRUPT: usize = 3;
let node = fdt.compatible_nodes(cstr!("arm,armv8-timer"))?.next().ok_or(FdtError::NotFound)?;
let interrupts = node.getprop_cells(cstr!("interrupts"))?.ok_or(FdtError::NotFound)?;
let mut value: ArrayVec<[u32; NUM_INTERRUPTS * CELLS_PER_INTERRUPT]> =
interrupts.take(NUM_INTERRUPTS * CELLS_PER_INTERRUPT).collect();
let num_cpus: u32 = num_cpus.try_into().unwrap();
let cpu_mask: u32 = (((0x1 << num_cpus) - 1) & 0xff) << 8;
for v in value.iter_mut().skip(2).step_by(CELLS_PER_INTERRUPT) {
*v |= cpu_mask;
}
for v in value.iter_mut() {
*v = v.to_be();
}
let value = value.into_inner();
let mut node =
fdt.root_mut().next_compatible(cstr!("arm,armv8-timer"))?.ok_or(FdtError::NotFound)?;
node.setprop_inplace(cstr!("interrupts"), value.as_bytes())
}
fn patch_untrusted_props(fdt: &mut Fdt, props: &BTreeMap>) -> libfdt::Result<()> {
let avf_node = if let Some(node) = fdt.node_mut(cstr!("/avf"))? {
node
} else {
fdt.root_mut().add_subnode(cstr!("avf"))?
};
// The node shouldn't already be present; if it is, return the error.
let mut node = avf_node.add_subnode(cstr!("untrusted"))?;
for (name, value) in props {
node.setprop(name, value)?;
}
Ok(())
}
#[derive(Debug)]
struct VcpufreqInfo {
addr: u64,
size: u64,
}
fn patch_vcpufreq(fdt: &mut Fdt, vcpufreq_info: &Option) -> libfdt::Result<()> {
let mut node = fdt.node_mut(cstr!("/cpufreq"))?.unwrap();
if let Some(info) = vcpufreq_info {
node.setprop_addrrange_inplace(cstr!("reg"), info.addr, info.size)
} else {
node.nop()
}
}
#[derive(Debug)]
pub struct DeviceTreeInfo {
pub kernel_range: Option>,
pub initrd_range: Option>,
pub memory_range: Range,
bootargs: Option,
cpus: ArrayVec<[CpuInfo; DeviceTreeInfo::MAX_CPUS]>,
cpu_topology: Option,
pci_info: PciInfo,
serial_info: SerialInfo,
pub swiotlb_info: SwiotlbInfo,
device_assignment: Option,
untrusted_props: BTreeMap>,
vm_ref_dt_props_info: BTreeMap>,
vcpufreq_info: Option,
}
impl DeviceTreeInfo {
const MAX_CPUS: usize = 16;
const fn gic_patched_size(num_cpus: usize) -> Option {
const GIC_REDIST_SIZE_PER_CPU: usize = 32 * SIZE_4KB;
GIC_REDIST_SIZE_PER_CPU.checked_mul(num_cpus)
}
}
pub fn sanitize_device_tree(
fdt: &mut [u8],
vm_dtbo: Option<&mut [u8]>,
vm_ref_dt: Option<&[u8]>,
) -> Result {
let fdt = Fdt::from_mut_slice(fdt).map_err(|e| {
error!("Failed to load FDT: {e}");
RebootReason::InvalidFdt
})?;
let vm_dtbo = match vm_dtbo {
Some(vm_dtbo) => Some(VmDtbo::from_mut_slice(vm_dtbo).map_err(|e| {
error!("Failed to load VM DTBO: {e}");
RebootReason::InvalidFdt
})?),
None => None,
};
let info = parse_device_tree(fdt, vm_dtbo.as_deref())?;
// SAFETY: We trust that the template (hardcoded in our RO data) is a valid DT.
let fdt_template = unsafe { Fdt::unchecked_from_slice(pvmfw_fdt_template::RAW) };
fdt.clone_from(fdt_template).map_err(|e| {
error!("Failed to instantiate FDT from the template DT: {e}");
RebootReason::InvalidFdt
})?;
fdt.unpack().map_err(|e| {
error!("Failed to unpack DT for patching: {e}");
RebootReason::InvalidFdt
})?;
if let Some(device_assignment_info) = &info.device_assignment {
let vm_dtbo = vm_dtbo.unwrap();
device_assignment_info.filter(vm_dtbo).map_err(|e| {
error!("Failed to filter VM DTBO: {e}");
RebootReason::InvalidFdt
})?;
// SAFETY: Damaged VM DTBO isn't used in this API after this unsafe block.
// VM DTBO can't be reused in any way as Fdt nor VmDtbo outside of this API because
// it can only be instantiated after validation.
unsafe {
fdt.apply_overlay(vm_dtbo.as_mut()).map_err(|e| {
error!("Failed to apply filtered VM DTBO: {e}");
RebootReason::InvalidFdt
})?;
}
}
if let Some(vm_ref_dt) = vm_ref_dt {
let vm_ref_dt = Fdt::from_slice(vm_ref_dt).map_err(|e| {
error!("Failed to load VM reference DT: {e}");
RebootReason::InvalidFdt
})?;
validate_vm_ref_dt(fdt, vm_ref_dt, &info.vm_ref_dt_props_info).map_err(|e| {
error!("Failed to apply VM reference DT: {e}");
RebootReason::InvalidFdt
})?;
}
patch_device_tree(fdt, &info)?;
// TODO(b/317201360): Ensure no overlapping in among devices
fdt.pack().map_err(|e| {
error!("Failed to unpack DT after patching: {e}");
RebootReason::InvalidFdt
})?;
Ok(info)
}
fn parse_device_tree(fdt: &Fdt, vm_dtbo: Option<&VmDtbo>) -> Result {
let kernel_range = read_kernel_range_from(fdt).map_err(|e| {
error!("Failed to read kernel range from DT: {e}");
RebootReason::InvalidFdt
})?;
let initrd_range = read_initrd_range_from(fdt).map_err(|e| {
error!("Failed to read initrd range from DT: {e}");
RebootReason::InvalidFdt
})?;
let memory_range = read_and_validate_memory_range(fdt)?;
let bootargs = read_bootargs_from(fdt).map_err(|e| {
error!("Failed to read bootargs from DT: {e}");
RebootReason::InvalidFdt
})?;
let (cpus, cpu_topology) = read_cpu_info_from(fdt).map_err(|e| {
error!("Failed to read CPU info from DT: {e}");
RebootReason::InvalidFdt
})?;
validate_cpu_info(&cpus).map_err(|e| {
error!("Failed to validate CPU info from DT: {e}");
RebootReason::InvalidFdt
})?;
let vcpufreq_info = read_vcpufreq_info(fdt).map_err(|e| {
error!("Failed to read vcpufreq info from DT: {e}");
RebootReason::InvalidFdt
})?;
if let Some(ref info) = vcpufreq_info {
validate_vcpufreq_info(info, &cpus).map_err(|e| {
error!("Failed to validate vcpufreq info from DT: {e}");
RebootReason::InvalidFdt
})?;
}
let pci_info = read_pci_info_from(fdt).map_err(|e| {
error!("Failed to read pci info from DT: {e}");
RebootReason::InvalidFdt
})?;
validate_pci_info(&pci_info, &memory_range)?;
let serial_info = read_serial_info_from(fdt).map_err(|e| {
error!("Failed to read serial info from DT: {e}");
RebootReason::InvalidFdt
})?;
let swiotlb_info = SwiotlbInfo::new_from_fdt(fdt).map_err(|e| {
error!("Failed to read swiotlb info from DT: {e}");
RebootReason::InvalidFdt
})?;
validate_swiotlb_info(&swiotlb_info, &memory_range)?;
let device_assignment = match vm_dtbo {
Some(vm_dtbo) => {
if let Some(hypervisor) = hyp::get_device_assigner() {
DeviceAssignmentInfo::parse(fdt, vm_dtbo, hypervisor).map_err(|e| {
error!("Failed to parse device assignment from DT and VM DTBO: {e}");
RebootReason::InvalidFdt
})?
} else {
warn!(
"Device assignment is ignored because device assigning hypervisor is missing"
);
None
}
}
None => None,
};
let untrusted_props = parse_untrusted_props(fdt).map_err(|e| {
error!("Failed to read untrusted properties: {e}");
RebootReason::InvalidFdt
})?;
validate_untrusted_props(&untrusted_props).map_err(|e| {
error!("Failed to validate untrusted properties: {e}");
RebootReason::InvalidFdt
})?;
let vm_ref_dt_props_info = parse_vm_ref_dt(fdt).map_err(|e| {
error!("Failed to read names of properties under /avf from DT: {e}");
RebootReason::InvalidFdt
})?;
Ok(DeviceTreeInfo {
kernel_range,
initrd_range,
memory_range,
bootargs,
cpus,
cpu_topology,
pci_info,
serial_info,
swiotlb_info,
device_assignment,
untrusted_props,
vm_ref_dt_props_info,
vcpufreq_info,
})
}
fn patch_device_tree(fdt: &mut Fdt, info: &DeviceTreeInfo) -> Result<(), RebootReason> {
if let Some(initrd_range) = &info.initrd_range {
patch_initrd_range(fdt, initrd_range).map_err(|e| {
error!("Failed to patch initrd range to DT: {e}");
RebootReason::InvalidFdt
})?;
}
patch_memory_range(fdt, &info.memory_range).map_err(|e| {
error!("Failed to patch memory range to DT: {e}");
RebootReason::InvalidFdt
})?;
if let Some(bootargs) = &info.bootargs {
patch_bootargs(fdt, bootargs.as_c_str()).map_err(|e| {
error!("Failed to patch bootargs to DT: {e}");
RebootReason::InvalidFdt
})?;
}
patch_cpus(fdt, &info.cpus, &info.cpu_topology).map_err(|e| {
error!("Failed to patch cpus to DT: {e}");
RebootReason::InvalidFdt
})?;
patch_vcpufreq(fdt, &info.vcpufreq_info).map_err(|e| {
error!("Failed to patch vcpufreq info to DT: {e}");
RebootReason::InvalidFdt
})?;
patch_pci_info(fdt, &info.pci_info).map_err(|e| {
error!("Failed to patch pci info to DT: {e}");
RebootReason::InvalidFdt
})?;
patch_serial_info(fdt, &info.serial_info).map_err(|e| {
error!("Failed to patch serial info to DT: {e}");
RebootReason::InvalidFdt
})?;
patch_swiotlb_info(fdt, &info.swiotlb_info).map_err(|e| {
error!("Failed to patch swiotlb info to DT: {e}");
RebootReason::InvalidFdt
})?;
patch_gic(fdt, info.cpus.len()).map_err(|e| {
error!("Failed to patch gic info to DT: {e}");
RebootReason::InvalidFdt
})?;
patch_timer(fdt, info.cpus.len()).map_err(|e| {
error!("Failed to patch timer info to DT: {e}");
RebootReason::InvalidFdt
})?;
if let Some(device_assignment) = &info.device_assignment {
// Note: We patch values after VM DTBO is overlaid because patch may require more space
// then VM DTBO's underlying slice is allocated.
device_assignment.patch(fdt).map_err(|e| {
error!("Failed to patch device assignment info to DT: {e}");
RebootReason::InvalidFdt
})?;
} else {
device_assignment::clean(fdt).map_err(|e| {
error!("Failed to clean pre-polulated DT nodes for device assignment: {e}");
RebootReason::InvalidFdt
})?;
}
patch_untrusted_props(fdt, &info.untrusted_props).map_err(|e| {
error!("Failed to patch untrusted properties: {e}");
RebootReason::InvalidFdt
})?;
Ok(())
}
/// Modifies the input DT according to the fields of the configuration.
pub fn modify_for_next_stage(
fdt: &mut Fdt,
bcc: &[u8],
new_instance: bool,
strict_boot: bool,
debug_policy: Option<&[u8]>,
debuggable: bool,
kaslr_seed: u64,
) -> libfdt::Result<()> {
if let Some(debug_policy) = debug_policy {
let backup = Vec::from(fdt.as_slice());
fdt.unpack()?;
let backup_fdt = Fdt::from_slice(backup.as_slice()).unwrap();
if apply_debug_policy(fdt, backup_fdt, debug_policy)? {
info!("Debug policy applied.");
} else {
// apply_debug_policy restored fdt to backup_fdt so unpack it again.
fdt.unpack()?;
}
} else {
info!("No debug policy found.");
fdt.unpack()?;
}
patch_dice_node(fdt, bcc.as_ptr() as usize, bcc.len())?;
if let Some(mut chosen) = fdt.chosen_mut()? {
empty_or_delete_prop(&mut chosen, cstr!("avf,strict-boot"), strict_boot)?;
empty_or_delete_prop(&mut chosen, cstr!("avf,new-instance"), new_instance)?;
chosen.setprop_inplace(cstr!("kaslr-seed"), &kaslr_seed.to_be_bytes())?;
};
if !debuggable {
if let Some(bootargs) = read_bootargs_from(fdt)? {
filter_out_dangerous_bootargs(fdt, &bootargs)?;
}
}
fdt.pack()?;
Ok(())
}
/// Patch the "google,open-dice"-compatible reserved-memory node to point to the bcc range
fn patch_dice_node(fdt: &mut Fdt, addr: usize, size: usize) -> libfdt::Result<()> {
// We reject DTs with missing reserved-memory node as validation should have checked that the
// "swiotlb" subnode (compatible = "restricted-dma-pool") was present.
let node = fdt.node_mut(cstr!("/reserved-memory"))?.ok_or(libfdt::FdtError::NotFound)?;
let mut node = node.next_compatible(cstr!("google,open-dice"))?.ok_or(FdtError::NotFound)?;
let addr: u64 = addr.try_into().unwrap();
let size: u64 = size.try_into().unwrap();
node.setprop_inplace(cstr!("reg"), flatten(&[addr.to_be_bytes(), size.to_be_bytes()]))
}
fn empty_or_delete_prop(
fdt_node: &mut FdtNodeMut,
prop_name: &CStr,
keep_prop: bool,
) -> libfdt::Result<()> {
if keep_prop {
fdt_node.setprop_empty(prop_name)
} else {
fdt_node
.delprop(prop_name)
.or_else(|e| if e == FdtError::NotFound { Ok(()) } else { Err(e) })
}
}
/// Apply the debug policy overlay to the guest DT.
///
/// Returns Ok(true) on success, Ok(false) on recovered failure and Err(_) on corruption of the DT.
fn apply_debug_policy(
fdt: &mut Fdt,
backup_fdt: &Fdt,
debug_policy: &[u8],
) -> libfdt::Result {
let mut debug_policy = Vec::from(debug_policy);
let overlay = match Fdt::from_mut_slice(debug_policy.as_mut_slice()) {
Ok(overlay) => overlay,
Err(e) => {
warn!("Corrupted debug policy found: {e}. Not applying.");
return Ok(false);
}
};
// SAFETY: on failure, the corrupted DT is restored using the backup.
if let Err(e) = unsafe { fdt.apply_overlay(overlay) } {
warn!("Failed to apply debug policy: {e}. Recovering...");
fdt.clone_from(backup_fdt)?;
// A successful restoration is considered success because an invalid debug policy
// shouldn't DOS the pvmfw
Ok(false)
} else {
Ok(true)
}
}
fn has_common_debug_policy(fdt: &Fdt, debug_feature_name: &CStr) -> libfdt::Result {
if let Some(node) = fdt.node(cstr!("/avf/guest/common"))? {
if let Some(value) = node.getprop_u32(debug_feature_name)? {
return Ok(value == 1);
}
}
Ok(false) // if the policy doesn't exist or not 1, don't enable the debug feature
}
fn filter_out_dangerous_bootargs(fdt: &mut Fdt, bootargs: &CStr) -> libfdt::Result<()> {
let has_crashkernel = has_common_debug_policy(fdt, cstr!("ramdump"))?;
let has_console = has_common_debug_policy(fdt, cstr!("log"))?;
let accepted: &[(&str, Box) -> bool>)] = &[
("panic", Box::new(|v| if let Some(v) = v { v == "=-1" } else { false })),
("crashkernel", Box::new(|_| has_crashkernel)),
("console", Box::new(|_| has_console)),
];
// parse and filter out unwanted
let mut filtered = Vec::new();
for arg in BootArgsIterator::new(bootargs).map_err(|e| {
info!("Invalid bootarg: {e}");
FdtError::BadValue
})? {
match accepted.iter().find(|&t| t.0 == arg.name()) {
Some((_, pred)) if pred(arg.value()) => filtered.push(arg),
_ => debug!("Rejected bootarg {}", arg.as_ref()),
}
}
// flatten into a new C-string
let mut new_bootargs = Vec::new();
for (i, arg) in filtered.iter().enumerate() {
if i != 0 {
new_bootargs.push(b' '); // separator
}
new_bootargs.extend_from_slice(arg.as_ref().as_bytes());
}
new_bootargs.push(b'\0');
let mut node = fdt.chosen_mut()?.ok_or(FdtError::NotFound)?;
node.setprop(cstr!("bootargs"), new_bootargs.as_slice())
}