From 323b3640ecff1e00ad45a26c59dbcc11ae476dc7 Mon Sep 17 00:00:00 2001 From: "Panagiotis \"Ivory\" Vasilopoulos" Date: Tue, 25 Jun 2024 15:35:35 +0200 Subject: [PATCH] Implement ASLR for uhyve. - Kernel is loaded to a random physical address - Pagetables are created for the kernel region instead of just the first gigabyte Fixes #719. Co-authored-by: Jonathan --- Cargo.lock | 6 +- Cargo.toml | 7 +- src/arch/aarch64/mod.rs | 170 +++++++++++++++-------- src/arch/x86_64/mod.rs | 60 ++++---- src/arch/x86_64/paging/mod.rs | 252 ++++++++++++++++++++++------------ src/bin/uhyve.rs | 6 + src/consts.rs | 25 ++-- src/hypercall.rs | 13 +- src/linux/gdb/breakpoints.rs | 9 +- src/linux/gdb/mod.rs | 10 +- src/linux/x86_64/kvm_cpu.rs | 28 ++-- src/macos/aarch64/vcpu.rs | 43 ++++-- src/paging.rs | 49 +++++++ src/params.rs | 4 + src/vm.rs | 145 ++++++++++++++----- tests/gdb.rs | 2 - 16 files changed, 586 insertions(+), 243 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 50529a2c..1c1ccf3c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -644,9 +644,9 @@ checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" [[package]] name = "hermit-entry" -version = "0.10.2" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3489d14d0767c3d6a151e6e08effa39270356b7c6fe589775da8676a57c4d4cd" +checksum = "c92a8deb1f5da66f858a5e86cf441575cd6ebdc60d41a6be232f0f9c8e18cecc" dependencies = [ "align-address", "const_parse", @@ -1487,6 +1487,7 @@ dependencies = [ name = "uhyve" version = "0.4.0" dependencies = [ + "align-address", "assert_fs", "bitflags 2.8.0", "burst", @@ -1507,6 +1508,7 @@ dependencies = [ "mac_address", "memory_addresses", "nix 0.29.0", + "rand", "raw-cpuid 11.2.0", "rftrace", "rftrace-frontend", diff --git a/Cargo.toml b/Cargo.toml index 8bce08a9..7b865dea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,8 @@ path = "benches/benchmarks.rs" harness = false [features] -default = [] +default = ["aslr"] +aslr = ["dep:rand"] instrument = ["rftrace", "rftrace-frontend"] [dependencies] @@ -48,7 +49,7 @@ either = "1.13" env_logger = "0.11" gdbstub = "0.7" gdbstub_arch = "0.3" -hermit-entry = { version = "0.10", features = ["loader"] } +hermit-entry = { version = "0.10.3", features = ["loader"] } libc = "0.2" log = "0.4" mac_address = "1.1" @@ -59,12 +60,14 @@ uhyve-interface = { version = "0.1.1", path = "uhyve-interface", features = ["st virtio-bindings = { version = "0.2", features = ["virtio-v4_14_0"] } rftrace = { version = "0.1", optional = true } rftrace-frontend = { version = "0.1", optional = true } +rand = { version = "0.8.5", optional = true } shell-words = "1" sysinfo = { version = "0.33.1", default-features = false, features = ["system"] } vm-fdt = "0.3" tempfile = "3.15.0" uuid = { version = "1.12.1", features = ["fast-rng", "v4"]} clean-path = "0.2.1" +align-address = "0.3.0" [target.'cfg(target_os = "linux")'.dependencies] kvm-bindings = "0.11" diff --git a/src/arch/aarch64/mod.rs b/src/arch/aarch64/mod.rs index b3542b3b..6c6c5104 100644 --- a/src/arch/aarch64/mod.rs +++ b/src/arch/aarch64/mod.rs @@ -1,20 +1,29 @@ use std::mem::size_of; +use align_address::Align; use bitflags::bitflags; use uhyve_interface::{GuestPhysAddr, GuestVirtAddr}; use crate::{ - consts::{BOOT_INFO_ADDR, BOOT_PGT}, + consts::{PAGETABLES_END, PAGETABLES_OFFSET, PGT_OFFSET}, mem::MmapMemory, - paging::PagetableError, + paging::{BumpAllocator, PagetableError}, }; -pub const RAM_START: GuestPhysAddr = GuestPhysAddr::new(0x00); +pub(crate) const RAM_START: GuestPhysAddr = GuestPhysAddr::new(0x00); -pub const PT_DEVICE: u64 = 0x707; -pub const PT_PT: u64 = 0x713; -pub const PT_MEM: u64 = 0x713; -pub const PT_MEM_CD: u64 = 0x70F; +const SIZE_4KIB: u64 = 0x1000; + +// PageTableEntry Flags +/// Present + 4KiB + device memory + inner_sharable + accessed +pub const PT_DEVICE: u64 = 0b11100000111; +/// Present + 4KiB + normal + inner_sharable + accessed +pub const PT_PT: u64 = 0b11100010011; +/// Present + 4KiB + normal + inner_sharable + accessed +pub const PT_MEM: u64 = 0b11100010011; +/// Present + 4KiB + device + inner_sharable + accessed +pub const PT_MEM_CD: u64 = 0b11100001111; +/// Self reference flag pub const PT_SELF: u64 = 1 << 55; /* @@ -115,7 +124,7 @@ fn is_valid_address(virtual_address: GuestVirtAddr) -> bool { pub fn virt_to_phys( addr: GuestVirtAddr, mem: &MmapMemory, - pagetable_l0: GuestPhysAddr, + pgt: GuestPhysAddr, ) -> Result { if !is_valid_address(addr) { return Err(PagetableError::InvalidAddress); @@ -132,9 +141,7 @@ pub fn virt_to_phys( // - Our indices can't be larger than 512, so we stay in the borders of the page. // - We are page_aligned, and thus also PageTableEntry aligned. let mut pagetable: &[PageTableEntry] = unsafe { - std::mem::transmute::<&[u8], &[PageTableEntry]>( - mem.slice_at(pagetable_l0, PAGE_SIZE).unwrap(), - ) + std::mem::transmute::<&[u8], &[PageTableEntry]>(mem.slice_at(pgt, PAGE_SIZE).unwrap()) }; // TODO: Depending on the virtual address length and granule (defined in TCR register by TG and TxSZ), we could reduce the number of pagetable walks. Hermit doesn't do this at the moment. for level in 0..3 { @@ -155,71 +162,124 @@ pub fn virt_to_phys( Ok(pte.address()) } -pub fn init_guest_mem(mem: &mut [u8]) { +pub fn init_guest_mem(mem: &mut [u8], guest_address: GuestPhysAddr, length: u64) { + warn!("aarch64 pagetable initialization is untested!"); + let mem_addr = std::ptr::addr_of_mut!(mem[0]); - assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 512 * size_of::()); - let pgt_slice = unsafe { - std::slice::from_raw_parts_mut(mem_addr.offset(BOOT_PGT.as_u64() as isize) as *mut u64, 512) - }; - pgt_slice.fill(0); - pgt_slice[0] = BOOT_PGT.as_u64() + 0x1000 + PT_PT; - pgt_slice[511] = BOOT_PGT.as_u64() + PT_PT + PT_SELF; + assert!(mem.len() >= PGT_OFFSET as usize + 512 * size_of::()); - assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x1000 + 512 * size_of::()); let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x1000) as *mut u64, - 512, - ) + std::slice::from_raw_parts_mut(mem_addr.offset(PGT_OFFSET as isize) as *mut u64, 512) }; pgt_slice.fill(0); - pgt_slice[0] = BOOT_PGT.as_u64() + 0x2000 + PT_PT; + pgt_slice[511] = (guest_address + PGT_OFFSET) | PT_PT | PT_SELF; - assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x2000 + 512 * size_of::()); - let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x2000) as *mut u64, - 512, - ) - }; - pgt_slice.fill(0); - pgt_slice[0] = BOOT_PGT.as_u64() + 0x3000 + PT_PT; - pgt_slice[1] = BOOT_PGT.as_u64() + 0x4000 + PT_PT; - pgt_slice[2] = BOOT_PGT.as_u64() + 0x5000 + PT_PT; + let mut boot_frame_allocator = BumpAllocator::::new( + guest_address + PAGETABLES_OFFSET, + (PAGETABLES_END - PAGETABLES_OFFSET) / SIZE_4KIB, + ); - assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x3000 + 512 * size_of::()); - let pgt_slice = unsafe { + // Hypercalls are MMIO reads/writes in the lowest 4KiB of address space. Thus, we need to provide pagetable entries for this region. + let pgd0_addr = boot_frame_allocator.allocate().unwrap().as_u64(); + pgt_slice[0] = pgd0_addr | PT_PT; + let pgd0_slice = unsafe { std::slice::from_raw_parts_mut( - mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x3000) as *mut u64, + mem_addr.offset((pgd0_addr - guest_address.as_u64()) as isize) as *mut u64, 512, ) }; - pgt_slice.fill(0); - // map Uhyve ports into the virtual address space - pgt_slice[0] = PT_MEM_CD; - // map BootInfo into the virtual address space - pgt_slice[BOOT_INFO_ADDR.as_u64() as usize / PAGE_SIZE] = BOOT_INFO_ADDR.as_u64() + PT_MEM; + pgd0_slice.fill(0); + let pud0_addr = boot_frame_allocator.allocate().unwrap().as_u64(); + pgd0_slice[0] = pud0_addr | PT_PT; - assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x4000 + 512 * size_of::()); - let pgt_slice = unsafe { + let pud0_slice = unsafe { std::slice::from_raw_parts_mut( - mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x4000) as *mut u64, + mem_addr.offset((pud0_addr - guest_address.as_u64()) as isize) as *mut u64, 512, ) }; - for (idx, i) in pgt_slice.iter_mut().enumerate() { - *i = 0x200000u64 + (idx * PAGE_SIZE) as u64 + PT_MEM; - } + pud0_slice.fill(0); + let pmd0_addr = boot_frame_allocator.allocate().unwrap().as_u64(); + pud0_slice[0] = pmd0_addr | PT_PT; - assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x5000 + 512 * size_of::()); - let pgt_slice = unsafe { + let pmd0_slice = unsafe { std::slice::from_raw_parts_mut( - mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x5000) as *mut u64, + mem_addr.offset((pmd0_addr - guest_address.as_u64()) as isize) as *mut u64, 512, ) }; - for (idx, i) in pgt_slice.iter_mut().enumerate() { - *i = 0x400000u64 + (idx * PAGE_SIZE) as u64 + PT_MEM; + pmd0_slice.fill(0); + // Hypercall/IO mapping + pmd0_slice[0] = PT_MEM; + + for frame_addr in (guest_address.align_down(SIZE_4KIB).as_u64() + ..(guest_address + length).align_up(SIZE_4KIB).as_u64()) + .step_by(SIZE_4KIB as usize) + { + let idx_l4 = (frame_addr as usize / (0x80_0000_0000)) & (0xFFF); + let idx_l3 = (frame_addr as usize / (0x4000_0000)) & (0xFFF); + let idx_l2 = (frame_addr as usize / (0x20_0000)) & (0xFFF); + let idx_l1 = (frame_addr as usize / (0x1000)) & (0xFFF); + debug!("mapping frame {frame_addr:x} to pagetable {idx_l4}-{idx_l3}-{idx_l2}-{idx_l1}"); + + let (pgd_addr, new) = if pgt_slice[idx_l4] == 0 { + (boot_frame_allocator.allocate().unwrap() | PT_PT, true) + } else { + ( + PageTableEntry::from(pgt_slice[idx_l4]).address().as_u64(), + false, + ) + }; + let pgd_slice = unsafe { + std::slice::from_raw_parts_mut( + mem_addr.offset((pgd_addr - guest_address.as_u64()) as isize) as *mut u64, + 512, + ) + }; + if new { + pgd_slice.fill(0); + pgt_slice[idx_l4] = pgd_addr | PT_PT; + } + + let (pud_addr, new) = if pgd_slice[idx_l3] == 0 { + (boot_frame_allocator.allocate().unwrap() | PT_PT, true) + } else { + ( + PageTableEntry::from(pgd_slice[idx_l3]).address().as_u64(), + false, + ) + }; + let pud_slice = unsafe { + std::slice::from_raw_parts_mut( + mem_addr.offset((pud_addr - guest_address.as_u64()) as isize) as *mut u64, + 512, + ) + }; + if new { + pud_slice.fill(0); + pgd_slice[idx_l3] = pud_addr | PT_PT; + } + + let (pmd_addr, new) = if pud_slice[idx_l2] == 0 { + (boot_frame_allocator.allocate().unwrap() | PT_PT, true) + } else { + ( + PageTableEntry::from(pud_slice[idx_l2]).address().as_u64(), + false, + ) + }; + let pmd_slice = unsafe { + std::slice::from_raw_parts_mut( + mem_addr.offset((pmd_addr - guest_address.as_u64()) as isize) as *mut u64, + 512, + ) + }; + if new { + pmd_slice.fill(0); + pud_slice[idx_l2] = pmd_addr | PT_PT; + } + + pmd_slice[idx_l1] = frame_addr | PT_MEM } } diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index b8d69896..629356d5 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -16,7 +16,7 @@ pub const RAM_START: GuestPhysAddr = GuestPhysAddr::new(0x00); pub fn virt_to_phys( addr: GuestVirtAddr, mem: &MmapMemory, - pagetable_l0: GuestPhysAddr, + pml4: GuestPhysAddr, ) -> Result { /// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). pub const PAGE_BITS: u64 = 12; @@ -25,7 +25,7 @@ pub fn virt_to_phys( pub const PAGE_MAP_BITS: usize = 9; let mut page_table = - unsafe { (mem.host_address(pagetable_l0).unwrap() as *mut PageTable).as_mut() }.unwrap(); + unsafe { (mem.host_address(pml4).unwrap() as *mut PageTable).as_mut() }.unwrap(); let mut page_bits = 39; let mut entry = PageTableEntry::new(); @@ -53,9 +53,14 @@ pub fn virt_to_phys( Ok((entry.addr() + (addr.as_u64() & !((!0u64) << PAGE_BITS))).into()) } -pub fn init_guest_mem(mem: &mut [u8]) { +pub fn init_guest_mem( + mem: &mut [u8], + guest_address: GuestPhysAddr, + length: u64, + legacy_mapping: bool, +) { // TODO: we should maybe return an error on failure (e.g., the memory is too small) - initialize_pagetables(mem); + initialize_pagetables(mem, guest_address, length, legacy_mapping); } #[cfg(test)] @@ -63,7 +68,7 @@ mod tests { use x86_64::structures::paging::PageTableFlags; use super::*; - use crate::consts::{BOOT_PDE, BOOT_PDPTE, BOOT_PML4}; + use crate::consts::{MIN_PHYSMEM_SIZE, PAGETABLES_END, PAGETABLES_OFFSET, PML4_OFFSET}; #[test] fn test_virt_to_phys() { @@ -72,38 +77,45 @@ mod tests { .is_test(true) .try_init(); - let mem = MmapMemory::new( - 0, - align_up!(paging::MIN_PHYSMEM_SIZE * 2, 0x20_0000), - GuestPhysAddr::zero(), - true, - true, + let guest_address = GuestPhysAddr::new(0x11111000); + + let mem = MmapMemory::new(0, MIN_PHYSMEM_SIZE * 2, guest_address, true, true); + println!("mmap memory created {mem:x?}"); + + init_guest_mem( + unsafe { mem.as_slice_mut() }.try_into().unwrap(), + guest_address, + MIN_PHYSMEM_SIZE as u64 * 2, + false, ); - println!("mmap memory created {mem:?}"); - initialize_pagetables(unsafe { mem.as_slice_mut() }.try_into().unwrap()); // Get the address of the first entry in PML4 (the address of the PML4 itself) let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFFFF000); - let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); - assert_eq!(p_addr, BOOT_PML4); + let p_addr = virt_to_phys(virt_addr, &mem, guest_address + PML4_OFFSET).unwrap(); + assert_eq!(p_addr, guest_address + PML4_OFFSET); // The last entry on the PML4 is the address of the PML4 with flags let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFFFF000 | (4096 - 8)); - let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); + let p_addr = virt_to_phys(virt_addr, &mem, guest_address + PML4_OFFSET).unwrap(); assert_eq!( mem.read::(p_addr).unwrap(), - BOOT_PML4.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + (guest_address + PML4_OFFSET).as_u64() + | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() ); // the first entry on the 3rd level entry in the pagetables is the address of the boot pdpte let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFE00000); - let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); - assert_eq!(p_addr, BOOT_PDPTE); - - // the first entry on the 2rd level entry in the pagetables is the address of the boot pde - let virt_addr = GuestVirtAddr::new(0xFFFFFFFFC0000000); - let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); - assert_eq!(p_addr, BOOT_PDE); + let p_addr = virt_to_phys(virt_addr, &mem, guest_address + PML4_OFFSET).unwrap(); + assert!(p_addr.as_u64() - guest_address.as_u64() >= PAGETABLES_OFFSET); + assert!(p_addr.as_u64() - guest_address.as_u64() <= PAGETABLES_END); + + // the idx2 entry on the 2rd level entry in the pagetables is the address of the boot pde + let idx2 = GuestVirtAddr::new(guest_address.as_u64()).p2_index(); + let virt_addr = GuestVirtAddr::new(0xFFFFFFFFC0000000) + + u64::from(idx2) * size_of::() as u64; + let p_addr = virt_to_phys(virt_addr, &mem, guest_address + PML4_OFFSET).unwrap(); + assert!(p_addr.as_u64() - guest_address.as_u64() >= PAGETABLES_OFFSET); + assert!(p_addr.as_u64() - guest_address.as_u64() <= PAGETABLES_END); // That address points to a huge page assert!( PageTableFlags::from_bits_truncate(mem.read::(p_addr).unwrap()).contains( diff --git a/src/arch/x86_64/paging/mod.rs b/src/arch/x86_64/paging/mod.rs index 8b19bd6c..f74ebc4b 100644 --- a/src/arch/x86_64/paging/mod.rs +++ b/src/arch/x86_64/paging/mod.rs @@ -1,9 +1,13 @@ +use uhyve_interface::GuestPhysAddr; use x86_64::{ - structures::paging::{Page, PageTable, PageTableFlags, Size2MiB}, - PhysAddr, + structures::paging::{ + mapper::PageTableFrameMapping, FrameAllocator, MappedPageTable, Mapper, Page, PageSize, + PageTable, PageTableFlags, PageTableIndex, PhysFrame, Size2MiB, Size4KiB, + }, + VirtAddr, }; -use crate::consts::*; +use crate::{consts::*, paging::BumpAllocator}; // Constructor for a conventional segment GDT (or LDT) entry pub fn create_gdt_entry(flags: u64, base: u64, limit: u64) -> u64 { @@ -14,7 +18,28 @@ pub fn create_gdt_entry(flags: u64, base: u64, limit: u64) -> u64 { | (limit & 0x0000ffffu64) } -pub const MIN_PHYSMEM_SIZE: usize = BOOT_PDE.as_u64() as usize + 0x1000; +unsafe impl FrameAllocator for BumpAllocator<{ Size4KiB::SIZE }> { + fn allocate_frame(&mut self) -> Option> { + self.allocate() + // Safety: pa is only valid in the guest and + .map(|pa| unsafe { PhysFrame::from_start_address_unchecked(pa.into()) }) + } +} + +/// A mapper, that does not require to be run inside the system to be mapped. +/// Attention: This must be used in an empty or correctly mapped system with +/// `mem` of sufficient size and `guest_address` beeing the correct guest- +/// physical-address of `mem`. Otherwise this will corrup memory and lead to UB. +struct UhyvePageTableFrameMapper<'a> { + mem: &'a mut [u8], + guest_address: GuestPhysAddr, +} +unsafe impl PageTableFrameMapping for UhyvePageTableFrameMapper<'_> { + fn frame_to_pointer(&self, frame: PhysFrame) -> *mut PageTable { + let rel_addr = frame.start_address().as_u64() - self.guest_address.as_u64(); + unsafe { self.mem.as_ptr().add(rel_addr as usize) as *mut PageTable } + } +} /// Creates the pagetables and the GDT in the guest memory space. /// @@ -22,70 +47,89 @@ pub const MIN_PHYSMEM_SIZE: usize = BOOT_PDE.as_u64() as usize + 0x1000; /// Also, the memory `mem` needs to be zeroed for [`PAGE_SIZE`] bytes at the /// offsets [`BOOT_PML4`] and [`BOOT_PDPTE`], otherwise the integrity of the /// pagetables and thus the integrity of the guest's memory is not ensured -pub fn initialize_pagetables(mem: &mut [u8]) { +/// `mem` and `GuestPhysAddr` must be 2MiB page aligned. +/// length is the size of the identity mapped region in bytes. +pub fn initialize_pagetables( + mem: &mut [u8], + guest_address: GuestPhysAddr, + length: u64, + // TODO: deprecate the legacy_mapping option once hermit pre 0.10.0 isn't a thing anymore. + legacy_mapping: bool, +) { assert!(mem.len() >= MIN_PHYSMEM_SIZE); let mem_addr = std::ptr::addr_of_mut!(mem[0]); - let (gdt_entry, pml4, pdpte, pde); + let (gdt_entry, pml4); // Safety: // We only operate in `mem`, which is plain bytes and we have ownership of // these and it is asserted to be large enough. unsafe { gdt_entry = mem_addr - .add(BOOT_GDT.as_u64() as usize) + .add(GDT_OFFSET as usize) .cast::<[u64; 3]>() .as_mut() .unwrap(); pml4 = mem_addr - .add(BOOT_PML4.as_u64() as usize) - .cast::() - .as_mut() - .unwrap(); - pdpte = mem_addr - .add(BOOT_PDPTE.as_u64() as usize) - .cast::() - .as_mut() - .unwrap(); - pde = mem_addr - .add(BOOT_PDE.as_u64() as usize) + .add(PML4_OFFSET as usize) .cast::() .as_mut() .unwrap(); - - /* For simplicity we currently use 2MB pages and only a single - PML4/PDPTE/PDE. */ - - // per default is the memory zeroed, which we allocate by the system - // call mmap, so the following is not necessary: - /*libc::memset(pml4 as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pdpte as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ } + // initialize GDT gdt_entry[BOOT_GDT_NULL] = 0; gdt_entry[BOOT_GDT_CODE] = create_gdt_entry(0xA09B, 0, 0xFFFFF); gdt_entry[BOOT_GDT_DATA] = create_gdt_entry(0xC093, 0, 0xFFFFF); - pml4[0].set_addr( - BOOT_PDPTE.into(), - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); + // recursive pagetable setup pml4[511].set_addr( - BOOT_PML4.into(), + (guest_address + PML4_OFFSET).into(), PageTableFlags::PRESENT | PageTableFlags::WRITABLE, ); - pdpte[0].set_addr( - BOOT_PDE.into(), - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, + + let mut boot_frame_allocator = BumpAllocator::new( + guest_address + PAGETABLES_OFFSET, + (PAGETABLES_END - PAGETABLES_OFFSET) / Size4KiB::SIZE, ); + let page_mapper = UhyvePageTableFrameMapper { mem, guest_address }; + // Safety: pml4 is zero initialized and page_mapper operates in a correct environment + let mut pagetable_mapping = unsafe { MappedPageTable::new(pml4, page_mapper) }; - for i in 0..512 { - let addr = PhysAddr::new(i as u64 * Page::::SIZE); - pde[i].set_addr( - addr, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE, + let mapping_range = if legacy_mapping { + debug!("Legacy mapping of "); + let start_page = guest_address; + let kernel_start = VirtAddr::new((guest_address + KERNEL_OFFSET).as_u64()); + let end_page = Page::from_page_table_indices_2mib( + kernel_start.p4_index(), + kernel_start.p3_index(), + PageTableIndex::new(511), + ); + let end = u64::max( + end_page.start_address().as_u64(), + guest_address.as_u64() + length, ); + start_page.as_u64()..=end + } else { + guest_address.as_u64()..=guest_address.as_u64() + length + }; + + // Map the kernel + debug!( + "identity mapping from {guest_address:?} to {:?}", + guest_address + length + ); + for addr in mapping_range.step_by(Size2MiB::SIZE as usize) { + let ga = GuestPhysAddr::new(addr); + let _ = unsafe { + pagetable_mapping + .identity_map( + PhysFrame::::from_start_address_unchecked(ga.into()), + PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE, + &mut boot_frame_allocator, + ) + .unwrap() + }; } } @@ -112,10 +156,12 @@ fn pretty_print_pagetable(pt: &PageTable) { #[cfg(test)] mod tests { + use uhyve_interface::GuestVirtAddr; + use super::*; use crate::{ - consts::{BOOT_PDE, BOOT_PDPTE, BOOT_PML4}, - mem::HugePageAlignedMem, + consts::{GDT_OFFSET, PAGETABLES_END, PAGETABLES_OFFSET, PML4_OFFSET}, + mem::MmapMemory, }; #[test] @@ -125,51 +171,89 @@ mod tests { .is_test(true) .try_init(); - let aligned_mem = HugePageAlignedMem::::new(); - initialize_pagetables((aligned_mem.mem).try_into().unwrap()); + let gaddrs = [ + GuestPhysAddr::new(0x0), + GuestPhysAddr::new(0x11120000), + GuestPhysAddr::new(0x111ff000), + GuestPhysAddr::new(0xe1120000), + ]; + for &guest_address in gaddrs.iter() { + println!("\n\n---------------------------------------"); + println!("testing guest address {guest_address:?}"); + let mem = MmapMemory::new(0, MIN_PHYSMEM_SIZE * 2, guest_address, true, true); + initialize_pagetables( + unsafe { + mem.slice_at_mut(guest_address, MIN_PHYSMEM_SIZE * 2) + .unwrap() + }, + guest_address, + 0x20_0000 * 4, + false, + ); - // Test pagetable setup - let addr_pdpte = u64::from_le_bytes( - aligned_mem.mem[(BOOT_PML4.as_u64() as usize)..(BOOT_PML4.as_u64() as usize + 8)] - .try_into() - .unwrap(), - ); - assert_eq!( - addr_pdpte, - BOOT_PDPTE.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() - ); - let addr_pde = u64::from_le_bytes( - aligned_mem.mem[(BOOT_PDPTE.as_u64() as usize)..(BOOT_PDPTE.as_u64() as usize + 8)] - .try_into() - .unwrap(), - ); - assert_eq!( - addr_pde, - BOOT_PDE.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() - ); + /// Checks if `address` is in the pagetables. + fn check_and_print( + address: GuestVirtAddr, + phys_addr_offset: GuestPhysAddr, + mem: &MmapMemory, + ) { + let idx4 = address.p4_index(); + let idx3 = address.p3_index(); + let idx2 = address.p2_index(); + debug!( + "address: {address:#x}: {}-{}-{}", + u16::from(idx4), + u16::from(idx3), + u16::from(idx2) + ); + let pml4 = unsafe { mem.get_ref(phys_addr_offset + PML4_OFFSET).unwrap() }; + pretty_print_pagetable(pml4); - for i in (0..4096).step_by(8) { - let addr = BOOT_PDE.as_u64() as usize + i; - let entry = u64::from_le_bytes(aligned_mem.mem[addr..(addr + 8)].try_into().unwrap()); - assert!( - PageTableFlags::from_bits_truncate(entry) - .difference( - PageTableFlags::PRESENT - | PageTableFlags::WRITABLE - | PageTableFlags::HUGE_PAGE - ) - .is_empty(), - "Pagetable bits at {addr:#x} are incorrect" - ) - } + // Check PDPTE address + let addr_pdpte = &pml4[idx4]; + debug!("addr_ptpde: {addr_pdpte:?}"); + assert!( + addr_pdpte.addr().as_u64() - phys_addr_offset.as_u64() >= PAGETABLES_OFFSET + ); + assert!(addr_pdpte.addr().as_u64() - phys_addr_offset.as_u64() <= PAGETABLES_END); + assert!(addr_pdpte + .flags() + .contains(PageTableFlags::PRESENT | PageTableFlags::WRITABLE)); + + let pdpte = unsafe { mem.get_ref(addr_pdpte.addr().into()).unwrap() }; + pretty_print_pagetable(pdpte); + let addr_pde = &pdpte[idx3]; + assert!(addr_pde.addr().as_u64() - phys_addr_offset.as_u64() >= PAGETABLES_OFFSET); + assert!(addr_pde.addr().as_u64() - phys_addr_offset.as_u64() <= PAGETABLES_END); + assert!(addr_pde + .flags() + .contains(PageTableFlags::PRESENT | PageTableFlags::WRITABLE)); + + let pde = unsafe { mem.get_ref(addr_pde.addr().into()).unwrap() }; + pretty_print_pagetable(pde); + assert_eq!(pde[idx2].addr().as_u64(), address.as_u64()); + } + + check_and_print( + GuestVirtAddr::new(guest_address.as_u64()), + guest_address, + &mem, + ); + check_and_print( + GuestVirtAddr::new(guest_address.as_u64() + 3 * 0x20_0000), + guest_address, + &mem, + ); - // Test GDT - let gdt_results = [0x0, 0xAF9B000000FFFF, 0xCF93000000FFFF]; - for (i, res) in gdt_results.iter().enumerate() { - let gdt_addr = BOOT_GDT.as_u64() as usize + i * 8; - let gdt_entry = - u64::from_le_bytes(aligned_mem.mem[gdt_addr..gdt_addr + 8].try_into().unwrap()); - assert_eq!(*res, gdt_entry); + // Test GDT + let gdt_results = [0x0, 0xAF9B000000FFFF, 0xCF93000000FFFF]; + for (i, res) in gdt_results.iter().enumerate() { + let gdt_addr = guest_address + GDT_OFFSET as usize + i * 8; + let gdt_entry = u64::from_le_bytes(unsafe { + mem.slice_at(gdt_addr, 8).unwrap().try_into().unwrap() + }); + assert_eq!(*res, gdt_entry); + } } } } diff --git a/src/bin/uhyve.rs b/src/bin/uhyve.rs index f9a74b58..c0a942dd 100644 --- a/src/bin/uhyve.rs +++ b/src/bin/uhyve.rs @@ -93,6 +93,10 @@ struct MemoryArgs { #[clap(short = 'm', long, default_value_t, env = "HERMIT_MEMORY_SIZE")] memory_size: GuestMemorySize, + /// Disable ASLR + #[clap(long)] + no_aslr: bool, + /// Transparent Hugepages /// /// Advise the kernel to enable Transparent Hugepages [THP] on the virtual RAM. @@ -252,6 +256,7 @@ impl From for Params { memory_args: MemoryArgs { memory_size, + no_aslr, #[cfg(target_os = "linux")] thp, #[cfg(target_os = "linux")] @@ -279,6 +284,7 @@ impl From for Params { thp, #[cfg(target_os = "linux")] ksm, + aslr: !no_aslr, cpu_count, #[cfg(target_os = "linux")] pit, diff --git a/src/consts.rs b/src/consts.rs index c5869733..567d8b94 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -1,20 +1,27 @@ -use uhyve_interface::GuestPhysAddr; - pub const PAGE_SIZE: usize = 0x1000; pub const GDT_KERNEL_CODE: u16 = 1; pub const GDT_KERNEL_DATA: u16 = 2; pub const APIC_DEFAULT_BASE: u64 = 0xfee00000; -pub const BOOT_GDT: GuestPhysAddr = GuestPhysAddr::new(0x1000); + pub const BOOT_GDT_NULL: usize = 0; pub const BOOT_GDT_CODE: usize = 1; pub const BOOT_GDT_DATA: usize = 2; pub const BOOT_GDT_MAX: usize = 3; -pub const BOOT_PML4: GuestPhysAddr = GuestPhysAddr::new(0x10000); -pub const BOOT_PGT: GuestPhysAddr = BOOT_PML4; -pub const BOOT_PDPTE: GuestPhysAddr = GuestPhysAddr::new(0x11000); -pub const BOOT_PDE: GuestPhysAddr = GuestPhysAddr::new(0x12000); -pub const FDT_ADDR: GuestPhysAddr = GuestPhysAddr::new(0x5000); -pub const BOOT_INFO_ADDR: GuestPhysAddr = GuestPhysAddr::new(0x9000); + +// guest_address + OFFSET +pub const GDT_OFFSET: u64 = 0x1000; +pub const FDT_OFFSET: u64 = 0x5000; +pub const BOOT_INFO_OFFSET: u64 = 0x9000; +pub const PML4_OFFSET: u64 = 0x10000; +pub const PGT_OFFSET: u64 = 0x10000; +pub const PAGETABLES_OFFSET: u64 = 0x11000; +pub const PAGETABLES_END: u64 = 0x30000; +pub const KERNEL_OFFSET: u64 = 0x40000; + +// The offset of the kernel in the memory. +// Must be larger than BOOT_INFO_OFFSET + KERNEL_STACK_SIZE +pub const MIN_PHYSMEM_SIZE: usize = 0x43000; + pub const EFER_SCE: u64 = 1; /* System Call Extensions */ pub const EFER_LME: u64 = 1 << 8; /* Long mode enable */ pub const EFER_LMA: u64 = 1 << 10; /* Long mode active (read-only) */ diff --git a/src/hypercall.rs b/src/hypercall.rs index 740b90f2..5f927fc6 100644 --- a/src/hypercall.rs +++ b/src/hypercall.rs @@ -7,7 +7,6 @@ use std::{ use uhyve_interface::{parameters::*, GuestPhysAddr, Hypercall, HypercallAddress, MAX_ARGC_ENVC}; use crate::{ - consts::BOOT_PML4, isolation::filemap::UhyveFileMap, mem::{MemoryError, MmapMemory}, virt_to_phys, @@ -148,11 +147,11 @@ pub fn close(sysclose: &mut CloseParams) { } /// Handles an read syscall on the host. -pub fn read(mem: &MmapMemory, sysread: &mut ReadParams) { +pub fn read(mem: &MmapMemory, sysread: &mut ReadParams, root_pt: GuestPhysAddr) { unsafe { let bytes_read = libc::read( sysread.fd, - mem.host_address(virt_to_phys(sysread.buf, mem, BOOT_PML4).unwrap()) + mem.host_address(virt_to_phys(sysread.buf, mem, root_pt).unwrap()) .unwrap() as *mut libc::c_void, sysread.len, ); @@ -165,13 +164,17 @@ pub fn read(mem: &MmapMemory, sysread: &mut ReadParams) { } /// Handles an write syscall on the host. -pub fn write(peripherals: &VmPeripherals, syswrite: &WriteParams) -> io::Result<()> { +pub fn write( + peripherals: &VmPeripherals, + syswrite: &WriteParams, + root_pt: GuestPhysAddr, +) -> io::Result<()> { let mut bytes_written: usize = 0; while bytes_written != syswrite.len { let guest_phys_addr = virt_to_phys( syswrite.buf + bytes_written as u64, &peripherals.mem, - BOOT_PML4, + root_pt, ) .unwrap(); diff --git a/src/linux/gdb/breakpoints.rs b/src/linux/gdb/breakpoints.rs index 0252594c..c73414cc 100644 --- a/src/linux/gdb/breakpoints.rs +++ b/src/linux/gdb/breakpoints.rs @@ -4,10 +4,7 @@ use gdbstub::target::{self, ext::breakpoints::WatchKind, TargetResult}; use uhyve_interface::GuestVirtAddr; use super::GdbUhyve; -use crate::{ - arch::x86_64::{registers, virt_to_phys}, - consts::BOOT_PML4, -}; +use crate::arch::x86_64::{registers, virt_to_phys}; #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct SwBreakpoint { addr: u64, @@ -58,7 +55,7 @@ impl target::ext::breakpoints::SwBreakpoint for GdbUhyve { virt_to_phys( GuestVirtAddr::new(addr), &self.vm.peripherals.mem, - BOOT_PML4, + self.vm.vcpus[0].get_root_pagetable(), ) .map_err(|_err| ())?, kind, @@ -83,7 +80,7 @@ impl target::ext::breakpoints::SwBreakpoint for GdbUhyve { virt_to_phys( GuestVirtAddr::new(addr), &self.vm.peripherals.mem, - BOOT_PML4, + self.vm.vcpus[0].get_root_pagetable(), ) .map_err(|_err| ())?, kind, diff --git a/src/linux/gdb/mod.rs b/src/linux/gdb/mod.rs index 6c8891d7..d1357f3e 100644 --- a/src/linux/gdb/mod.rs +++ b/src/linux/gdb/mod.rs @@ -23,7 +23,6 @@ use x86_64::registers::debug::Dr6Flags; use self::breakpoints::SwBreakpoints; use crate::{ arch::x86_64::{registers::debug::HwBreakpoints, virt_to_phys}, - consts::BOOT_PML4, linux::{x86_64::kvm_cpu::KvmVm, KickSignal}, vcpu::{VcpuStopReason, VirtualCPU}, vm::UhyveVm, @@ -127,7 +126,12 @@ impl SingleThreadBase for GdbUhyve { // Safety: mem is copied to data before mem can be modified. let src = unsafe { self.vm.peripherals.mem.slice_at( - virt_to_phys(guest_addr, &self.vm.peripherals.mem, BOOT_PML4).map_err(|_err| ())?, + virt_to_phys( + guest_addr, + &self.vm.peripherals.mem, + self.vm.vcpus[0].get_root_pagetable(), + ) + .map_err(|_err| ())?, data.len(), ) } @@ -143,7 +147,7 @@ impl SingleThreadBase for GdbUhyve { virt_to_phys( GuestVirtAddr::new(start_addr), &self.vm.peripherals.mem, - BOOT_PML4, + self.vm.vcpus[0].get_root_pagetable(), ) .map_err(|_err| ())?, data.len(), diff --git a/src/linux/x86_64/kvm_cpu.rs b/src/linux/x86_64/kvm_cpu.rs index c988b178..17fbff59 100644 --- a/src/linux/x86_64/kvm_cpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -276,6 +276,7 @@ impl KvmCpu { &self, entry_point: GuestPhysAddr, stack_address: GuestPhysAddr, + guest_address: GuestPhysAddr, cpu_id: u32, ) -> Result<(), kvm_ioctls::Error> { //debug!("Setup long mode"); @@ -288,7 +289,7 @@ impl KvmCpu { | Cr0Flags::PAGING; sregs.cr0 = cr0.bits(); - sregs.cr3 = BOOT_PML4.as_u64(); + sregs.cr3 = (guest_address + PML4_OFFSET).as_u64(); let cr4 = Cr4Flags::PHYSICAL_ADDRESS_EXTENSION; sregs.cr4 = cr4.bits(); @@ -319,7 +320,7 @@ impl KvmCpu { sregs.ss = seg; //sregs.fs = seg; //sregs.gs = seg; - sregs.gdt.base = BOOT_GDT.as_u64(); + sregs.gdt.base = (guest_address + GDT_OFFSET).as_u64(); sregs.gdt.limit = ((std::mem::size_of::() * BOOT_GDT_MAX) - 1) as u16; self.vcpu.set_sregs(&sregs)?; @@ -327,7 +328,7 @@ impl KvmCpu { let mut regs = self.vcpu.get_regs()?; regs.rflags = 2; regs.rip = entry_point.as_u64(); - regs.rdi = BOOT_INFO_ADDR.as_u64(); + regs.rdi = (guest_address + BOOT_INFO_OFFSET).as_u64(); regs.rsi = cpu_id.into(); regs.rsp = stack_address.as_u64(); @@ -344,10 +345,15 @@ impl KvmCpu { &self.vcpu } + pub fn get_root_pagetable(&self) -> GuestPhysAddr { + GuestPhysAddr::new(self.vcpu.get_sregs().unwrap().cr3) + } + fn init(&mut self, cpu_id: u32) -> HypervisorResult<()> { self.setup_long_mode( self.kernel_info.entry_point, self.kernel_info.stack_address, + self.kernel_info.guest_address, cpu_id, )?; self.setup_cpuid()?; @@ -464,12 +470,16 @@ impl VirtualCPU for KvmCpu { sysopen, &mut self.peripherals.file_mapping.lock().unwrap(), ), - Hypercall::FileRead(sysread) => { - hypercall::read(&self.peripherals.mem, sysread) - } - Hypercall::FileWrite(syswrite) => { - hypercall::write(&self.peripherals, syswrite)? - } + Hypercall::FileRead(sysread) => hypercall::read( + &self.peripherals.mem, + sysread, + self.get_root_pagetable(), + ), + Hypercall::FileWrite(syswrite) => hypercall::write( + &self.peripherals, + syswrite, + self.get_root_pagetable(), + )?, Hypercall::FileUnlink(sysunlink) => hypercall::unlink( &self.peripherals.mem, sysunlink, diff --git a/src/macos/aarch64/vcpu.rs b/src/macos/aarch64/vcpu.rs index d7505367..ae62ab67 100644 --- a/src/macos/aarch64/vcpu.rs +++ b/src/macos/aarch64/vcpu.rs @@ -14,7 +14,7 @@ use crate::{ mair, tcr_size, MT_DEVICE_nGnRE, MT_DEVICE_nGnRnE, MT_DEVICE_GRE, MT_NORMAL, MT_NORMAL_NC, PSR, TCR_FLAGS, TCR_TG1_4K, VA_BITS, }, - consts::*, + consts::{PGT_OFFSET, *}, hypercall::{self, copy_argv, copy_env}, params::Params, stats::{CpuStats, VmExit}, @@ -49,7 +49,12 @@ impl VirtualizationBackendInternal for XhyveVm { None }, }; - vcpu.init(kernel_info.entry_point, kernel_info.stack_address, id)?; + vcpu.init( + kernel_info.entry_point, + kernel_info.stack_address, + kernel_info.guest_address, + id, + )?; Ok(vcpu) } @@ -87,6 +92,7 @@ impl XhyveCpu { &mut self, entry_point: GuestPhysAddr, stack_address: GuestPhysAddr, + guest_address: GuestPhysAddr, cpu_id: u32, ) -> HypervisorResult<()> { debug!("Initialize VirtualCPU"); @@ -99,7 +105,7 @@ impl XhyveCpu { self.vcpu .write_system_register(SystemRegister::SP_EL1, stack_address.as_u64())?; self.vcpu - .write_register(Register::X0, BOOT_INFO_ADDR.as_u64())?; + .write_register(Register::X0, (guest_address + BOOT_INFO_OFFSET).as_u64())?; self.vcpu.write_register(Register::X1, cpu_id.into())?; /* @@ -150,8 +156,10 @@ impl XhyveCpu { // Load TTBRx self.vcpu .write_system_register(SystemRegister::TTBR1_EL1, 0)?; - self.vcpu - .write_system_register(SystemRegister::TTBR0_EL1, BOOT_PGT.as_u64())?; + self.vcpu.write_system_register( + SystemRegister::TTBR0_EL1, + (guest_address + PGT_OFFSET).as_u64(), + )?; /* * Prepare system control register (SCTRL) @@ -196,6 +204,14 @@ impl XhyveCpu { Ok(()) } + + pub fn get_root_pagetable(&self) -> GuestPhysAddr { + GuestPhysAddr::new( + self.vcpu + .read_system_register(SystemRegister::TTBR0_EL1) + .unwrap(), + ) + } } impl VirtualCPU for XhyveCpu { @@ -265,12 +281,17 @@ impl VirtualCPU for XhyveCpu { sysopen, &mut self.peripherals.file_mapping.lock().unwrap(), ), - Hypercall::FileRead(sysread) => { - hypercall::read(&self.peripherals.mem, sysread) - } - Hypercall::FileWrite(syswrite) => { - hypercall::write(&self.peripherals, syswrite).unwrap() - } + Hypercall::FileRead(sysread) => hypercall::read( + &self.peripherals.mem, + sysread, + self.get_root_pagetable(), + ), + Hypercall::FileWrite(syswrite) => hypercall::write( + &self.peripherals, + syswrite, + self.get_root_pagetable(), + ) + .unwrap(), Hypercall::FileUnlink(sysunlink) => hypercall::unlink( &self.peripherals.mem, sysunlink, diff --git a/src/paging.rs b/src/paging.rs index a8d27925..06b0cecf 100644 --- a/src/paging.rs +++ b/src/paging.rs @@ -1,8 +1,57 @@ //! General paging related code +use align_address::Align; use thiserror::Error; +use uhyve_interface::GuestPhysAddr; #[derive(Error, Debug)] pub enum PagetableError { #[error("The accessed virtual address is not mapped")] InvalidAddress, } + +/// A simple bump allocator for initial boot paging frame allocations. +/// Only intended for the initial memory creation. If used incorrectly, this leads to undefined behaviour! +pub(crate) struct BumpAllocator { + start: GuestPhysAddr, + length: u64, + cnt: u64, +} +impl BumpAllocator { + /// Create a new allocator at `start` with `length` frames as capacity + /// `start` must be 4KiB aligned. + pub(crate) fn new(start: GuestPhysAddr, length: u64) -> Self { + assert!(start.as_u64().is_aligned_to(FRAMESIZE)); + Self { + start, + length, + cnt: 0, + } + } + + pub(crate) fn allocate(&mut self) -> Option { + if self.cnt < self.length { + let f = self.start + self.cnt * FRAMESIZE; + self.cnt += 1; + Some(f) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use uhyve_interface::GuestPhysAddr; + + use super::*; + + #[test] + fn test_bump_frame_allocator() { + let mut ba = BumpAllocator::<0x1000>::new(GuestPhysAddr::new(0x40_0000), 4); + assert_eq!(ba.allocate(), Some(GuestPhysAddr::new(0x40_0000))); + assert_eq!(ba.allocate(), Some(GuestPhysAddr::new(0x40_1000))); + assert_eq!(ba.allocate(), Some(GuestPhysAddr::new(0x40_2000))); + assert_eq!(ba.allocate(), Some(GuestPhysAddr::new(0x40_3000))); + assert_eq!(ba.allocate(), None); + } +} diff --git a/src/params.rs b/src/params.rs index 10b85856..2aa4f2ff 100644 --- a/src/params.rs +++ b/src/params.rs @@ -46,6 +46,9 @@ pub struct Params { /// Collect run statistics pub stats: bool, + + /// Load the kernel to a random address + pub aslr: bool, } #[allow(clippy::derivable_impls)] @@ -66,6 +69,7 @@ impl Default for Params { kernel_args: Default::default(), output: Default::default(), stats: false, + aslr: true, } } } diff --git a/src/vm.rs b/src/vm.rs index 03990523..18f19a0f 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -1,6 +1,5 @@ use std::{ env, fmt, fs, io, - mem::MaybeUninit, num::NonZeroU32, os::unix::prelude::JoinHandleExt, path::PathBuf, @@ -13,9 +12,11 @@ use core_affinity::CoreId; use hermit_entry::{ boot_info::{BootInfo, HardwareInfo, LoadInfo, PlatformInfo, RawBootInfo, SerialPortBase}, elf::{KernelObject, LoadedKernel, ParseKernelError}, + HermitVersion, }; use internal::VirtualizationBackendInternal; use log::error; +use rand::Rng; use thiserror::Error; use uhyve_interface::GuestPhysAddr; @@ -48,6 +49,17 @@ pub enum LoadKernelError { type LoadKernelResult = Result; +/// Generates a random guest address for Uhyve's virtualized memory. +/// This function gets invoked when a new UhyveVM gets created, provided that the object file is relocatable. +fn generate_address(object_mem_size: usize) -> GuestPhysAddr { + let mut rng = rand::thread_rng(); + // TODO: Also allow mappings beyond the 32 Bit gap + let start_address_upper_bound: u64 = + 0x0000_0000_CFF0_0000 - object_mem_size as u64 - KERNEL_OFFSET; + + GuestPhysAddr::new(rng.gen_range(0x0..start_address_upper_bound) & !(0x20_0000 - 1)) +} + #[cfg(target_os = "linux")] pub type DefaultBackend = crate::linux::x86_64::kvm_cpu::KvmVm; #[cfg(target_os = "macos")] @@ -106,6 +118,7 @@ unsafe impl Sync for VmPeripherals {} /// static information that does not change during execution #[derive(Debug)] pub(crate) struct KernelInfo { + /// The first instruction after boot pub entry_point: GuestPhysAddr, /// The starting position of the image in physical memory #[cfg_attr(target_os = "macos", allow(dead_code))] // currently only needed in gdb @@ -113,6 +126,8 @@ pub(crate) struct KernelInfo { pub params: Params, pub path: PathBuf, pub stack_address: GuestPhysAddr, + /// The location of the whole guest in the physical address space + pub guest_address: GuestPhysAddr, } pub struct UhyveVm { @@ -122,34 +137,73 @@ pub struct UhyveVm { } impl UhyveVm { pub fn new(kernel_path: PathBuf, params: Params) -> HypervisorResult> { + let mut guest_address = arch::RAM_START; let memory_size = params.memory_size.get(); + let elf = fs::read(&kernel_path)?; + let object: KernelObject<'_> = + KernelObject::parse(&elf).map_err(LoadKernelError::ParseKernelError)?; + + let hermit_version = object.hermit_version(); + info!( + "Loading a {} Kernel", + if let Some(version) = hermit_version { + format!("Hermit v{version}") + } else { + format!("pre Hermit v0.10.0") + } + ); + + let kernel_address = if let Some(start_addr) = object.start_addr() { + if params.aslr { + warn!("ASLR is enabled but kernel is not relocatable - disabling ASLR"); + } + start_addr + } else { + guest_address = if params.aslr { + generate_address(object.mem_size()) + } else { + GuestPhysAddr::zero() + }; + (guest_address + KERNEL_OFFSET).as_u64() + } + .into(); + + debug!("Kernel gets loaded to {kernel_address:#x}"); + + #[cfg(target_os = "linux")] + #[cfg(target_arch = "x86_64")] + let mut mem = MmapMemory::new(0, memory_size, guest_address, params.thp, params.ksm); + + // TODO: guest_address is only taken into account on Linux platforms. + // TODO: Before changing this, fix init_guest_mem in `src/arch/aarch64/mod.rs` #[cfg(target_os = "linux")] - let mem = MmapMemory::new(0, memory_size, arch::RAM_START, params.thp, params.ksm); + #[cfg(not(target_arch = "x86_64"))] + let mut mem = MmapMemory::new(0, memory_size, guest_address, params.thp, params.ksm); + #[cfg(not(target_os = "linux"))] - let mem = MmapMemory::new(0, memory_size, arch::RAM_START, false, false); + let mut mem = MmapMemory::new(0, memory_size, guest_address, false, false); let ( LoadedKernel { load_info, entry_point, }, - kernel_address, - ) = load_kernel_to_mem(&kernel_path, unsafe { mem.as_slice_uninit_mut() }) + kernel_end_address, + ) = load_kernel_to_mem(&object, &mut mem, kernel_address) .expect("Unable to load Kernel {kernel_path}"); - let stack_address = GuestPhysAddr::new( - kernel_address - .as_u64() - .checked_sub(KERNEL_STACK_SIZE) - .expect( - "there should be enough space for the boot stack before the kernel start address", - ), + assert!( + kernel_address.as_u64() > KERNEL_STACK_SIZE, + "there should be enough space for the boot stack before the kernel start address", ); + let stack_address = kernel_address - KERNEL_STACK_SIZE; + debug!("Stack starts at {stack_address:#x}"); let kernel_info = Arc::new(KernelInfo { entry_point: entry_point.into(), kernel_address, + guest_address: mem.guest_address, path: kernel_path, params, stack_address, @@ -202,8 +256,22 @@ impl UhyveVm { write_fdt_into_mem(&peripherals.mem, &kernel_info.params, freq); write_boot_info_to_mem(&peripherals.mem, load_info, cpu_count as u64, freq); + let legacy_mapping = if let Some(version) = hermit_version { + // actually, all versions that have the tag in the elf are valid, but an explicit check doesn't hurt + version + < HermitVersion { + major: 0, + minor: 10, + patch: 0, + } + } else { + true + }; init_guest_mem( unsafe { peripherals.mem.as_slice_mut() }, // slice only lives during this fn call + peripherals.mem.guest_address, + kernel_end_address - guest_address, + legacy_mapping, ); debug!("VM initialization complete"); @@ -314,6 +382,7 @@ impl fmt::Debug for UhyveVm { f.debug_struct(&format!("UhyveVm<{}>", VirtIf::BACKEND::NAME)) .field("entry_point", &self.kernel_info.entry_point) .field("stack_address", &self.kernel_info.stack_address) + .field("guest_address", &self.kernel_info.guest_address) .field("mem", &self.peripherals.mem) .field("path", &self.kernel_info.path) .field("virtio_device", &self.peripherals.virtio_device) @@ -324,11 +393,21 @@ impl fmt::Debug for UhyveVm { } /// Initialize the page tables for the guest -fn init_guest_mem(mem: &mut [u8]) { +/// `memory_size` is the length of the memory from the start of the physical +/// memory till the end of the kernel in bytes. +fn init_guest_mem( + mem: &mut [u8], + guest_addr: GuestPhysAddr, + memory_size: u64, + legacy_mapping: bool, +) { debug!("Initialize guest memory"); crate::arch::init_guest_mem( mem.try_into() .expect("Guest memory is not large enough for pagetables"), + guest_addr, + memory_size, + legacy_mapping, ); } @@ -356,9 +435,9 @@ fn write_fdt_into_mem(mem: &MmapMemory, params: &Params, cpu_freq: Option, ) { - debug!("Writing BootInfo to memory"); + debug!( + "Writing BootInfo to {:?}", + mem.guest_address + BOOT_INFO_OFFSET + ); let boot_info = BootInfo { hardware_info: HardwareInfo { phys_addr_range: mem.guest_address.as_u64() @@ -377,7 +459,11 @@ fn write_boot_info_to_mem( serial_port_base: SerialPortBase::new( (uhyve_interface::HypercallAddress::Uart as u16).into(), ), - device_tree: Some(FDT_ADDR.as_u64().try_into().unwrap()), + device_tree: Some( + (mem.guest_address.as_u64() + FDT_OFFSET) + .try_into() + .unwrap(), + ), }, load_info, platform_info: PlatformInfo::Uhyve { @@ -388,34 +474,31 @@ fn write_boot_info_to_mem( }, }; unsafe { - let raw_boot_info_ptr = - mem.host_address.add(BOOT_INFO_ADDR.as_u64() as usize) as *mut RawBootInfo; + let raw_boot_info_ptr = mem.host_address.add(BOOT_INFO_OFFSET as usize) as *mut RawBootInfo; *raw_boot_info_ptr = RawBootInfo::from(boot_info); } } /// loads the kernel image into `mem`. `offset` is the start address of `mem`. +/// Returns the loaded kernel marker and the address of the kernel's end address. fn load_kernel_to_mem( - kernel_path: &PathBuf, - mem: &mut [MaybeUninit], + object: &KernelObject<'_>, + mem: &mut MmapMemory, + offset: GuestPhysAddr, ) -> LoadKernelResult<(LoadedKernel, GuestPhysAddr)> { - let elf = fs::read(kernel_path)?; - let object = KernelObject::parse(&elf).map_err(LoadKernelError::ParseKernelError)?; - - // TODO: should be a random start address, if we have a relocatable executable - let kernel_address = GuestPhysAddr::new(object.start_addr().unwrap_or(0x400000)); - let kernel_end_address = kernel_address + object.mem_size(); + let kernel_end_address = offset + object.mem_size(); - if kernel_end_address.as_u64() > mem.len() as u64 - arch::RAM_START.as_u64() { + if kernel_end_address.as_u64() > mem.memory_size as u64 + mem.guest_address.as_u64() { return Err(LoadKernelError::InsufficientMemory); } Ok(( object.load_kernel( // Safety: Slice only lives during this fn call, so no aliasing happens - &mut mem[kernel_address.as_u64() as usize..kernel_end_address.as_u64() as usize], - kernel_address.as_u64(), + &mut unsafe { mem.as_slice_uninit_mut() } + [KERNEL_OFFSET as usize..object.mem_size() + KERNEL_OFFSET as usize], + offset.as_u64(), ), - kernel_address, + kernel_end_address, )) } diff --git a/tests/gdb.rs b/tests/gdb.rs index d547619b..69eb4a59 100644 --- a/tests/gdb.rs +++ b/tests/gdb.rs @@ -46,7 +46,6 @@ fn gdb() -> io::Result<()> { write!( &mut command_file, "target remote :{port} -symbol-file {bin_path} -o 0x400000 break gdb::main continue @@ -88,7 +87,6 @@ pipe print _x|cat >> {output_path} continue ", port = port, - bin_path = bin_path.display(), output_path = output_path.display() )?;